1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Sema/SemaCUDA.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Basic/Cuda.h" 18 #include "clang/Basic/TargetInfo.h" 19 #include "clang/Lex/Preprocessor.h" 20 #include "clang/Sema/Lookup.h" 21 #include "clang/Sema/Overload.h" 22 #include "clang/Sema/ScopeInfo.h" 23 #include "clang/Sema/Sema.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include <optional> 27 using namespace clang; 28 29 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} 30 31 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { 32 if (!D) 33 return false; 34 if (auto *A = D->getAttr<AttrT>()) 35 return !A->isImplicit(); 36 return false; 37 } 38 39 void SemaCUDA::PushForceHostDevice() { 40 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 41 ForceHostDeviceDepth++; 42 } 43 44 bool SemaCUDA::PopForceHostDevice() { 45 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 46 if (ForceHostDeviceDepth == 0) 47 return false; 48 ForceHostDeviceDepth--; 49 return true; 50 } 51 52 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, 53 MultiExprArg ExecConfig, 54 SourceLocation GGGLoc) { 55 FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); 56 if (!ConfigDecl) 57 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 58 << getConfigureFuncName()); 59 QualType ConfigQTy = ConfigDecl->getType(); 60 61 DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( 62 getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 63 SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl); 64 65 return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 66 /*IsExecConfig=*/true); 67 } 68 69 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { 70 bool HasHostAttr = false; 71 bool HasDeviceAttr = false; 72 bool HasGlobalAttr = false; 73 bool HasInvalidTargetAttr = false; 74 for (const ParsedAttr &AL : Attrs) { 75 switch (AL.getKind()) { 76 case ParsedAttr::AT_CUDAGlobal: 77 HasGlobalAttr = true; 78 break; 79 case ParsedAttr::AT_CUDAHost: 80 HasHostAttr = true; 81 break; 82 case ParsedAttr::AT_CUDADevice: 83 HasDeviceAttr = true; 84 break; 85 case ParsedAttr::AT_CUDAInvalidTarget: 86 HasInvalidTargetAttr = true; 87 break; 88 default: 89 break; 90 } 91 } 92 93 if (HasInvalidTargetAttr) 94 return CUDAFunctionTarget::InvalidTarget; 95 96 if (HasGlobalAttr) 97 return CUDAFunctionTarget::Global; 98 99 if (HasHostAttr && HasDeviceAttr) 100 return CUDAFunctionTarget::HostDevice; 101 102 if (HasDeviceAttr) 103 return CUDAFunctionTarget::Device; 104 105 return CUDAFunctionTarget::Host; 106 } 107 108 template <typename A> 109 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { 110 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 111 return isa<A>(Attribute) && 112 !(IgnoreImplicitAttr && Attribute->isImplicit()); 113 }); 114 } 115 116 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII( 117 SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) 118 : S(S_) { 119 SavedCtx = S.CurCUDATargetCtx; 120 assert(K == SemaCUDA::CTCK_InitGlobalVar); 121 auto *VD = dyn_cast_or_null<VarDecl>(D); 122 if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { 123 auto Target = CUDAFunctionTarget::Host; 124 if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && 125 !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || 126 hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || 127 hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) 128 Target = CUDAFunctionTarget::Device; 129 S.CurCUDATargetCtx = {Target, K, VD}; 130 } 131 } 132 133 /// IdentifyTarget - Determine the CUDA compilation target for this function 134 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, 135 bool IgnoreImplicitHDAttr) { 136 // Code that lives outside a function gets the target from CurCUDATargetCtx. 137 if (D == nullptr) 138 return CurCUDATargetCtx.Target; 139 140 if (D->hasAttr<CUDAInvalidTargetAttr>()) 141 return CUDAFunctionTarget::InvalidTarget; 142 143 if (D->hasAttr<CUDAGlobalAttr>()) 144 return CUDAFunctionTarget::Global; 145 146 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 147 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 148 return CUDAFunctionTarget::HostDevice; 149 return CUDAFunctionTarget::Device; 150 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 151 return CUDAFunctionTarget::Host; 152 } else if ((D->isImplicit() || !D->isUserProvided()) && 153 !IgnoreImplicitHDAttr) { 154 // Some implicit declarations (like intrinsic functions) are not marked. 155 // Set the most lenient target on them for maximal flexibility. 156 return CUDAFunctionTarget::HostDevice; 157 } 158 159 return CUDAFunctionTarget::Host; 160 } 161 162 /// IdentifyTarget - Determine the CUDA compilation target for this variable. 163 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { 164 if (Var->hasAttr<HIPManagedAttr>()) 165 return CVT_Unified; 166 // Only constexpr and const variabless with implicit constant attribute 167 // are emitted on both sides. Such variables are promoted to device side 168 // only if they have static constant intializers on device side. 169 if ((Var->isConstexpr() || Var->getType().isConstQualified()) && 170 Var->hasAttr<CUDAConstantAttr>() && 171 !hasExplicitAttr<CUDAConstantAttr>(Var)) 172 return CVT_Both; 173 if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || 174 Var->hasAttr<CUDASharedAttr>() || 175 Var->getType()->isCUDADeviceBuiltinSurfaceType() || 176 Var->getType()->isCUDADeviceBuiltinTextureType()) 177 return CVT_Device; 178 // Function-scope static variable without explicit device or constant 179 // attribute are emitted 180 // - on both sides in host device functions 181 // - on device side in device or global functions 182 if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { 183 switch (IdentifyTarget(FD)) { 184 case CUDAFunctionTarget::HostDevice: 185 return CVT_Both; 186 case CUDAFunctionTarget::Device: 187 case CUDAFunctionTarget::Global: 188 return CVT_Device; 189 default: 190 return CVT_Host; 191 } 192 } 193 return CVT_Host; 194 } 195 196 // * CUDA Call preference table 197 // 198 // F - from, 199 // T - to 200 // Ph - preference in host mode 201 // Pd - preference in device mode 202 // H - handled in (x) 203 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 204 // 205 // | F | T | Ph | Pd | H | 206 // |----+----+-----+-----+-----+ 207 // | d | d | N | N | (c) | 208 // | d | g | -- | -- | (a) | 209 // | d | h | -- | -- | (e) | 210 // | d | hd | HD | HD | (b) | 211 // | g | d | N | N | (c) | 212 // | g | g | -- | -- | (a) | 213 // | g | h | -- | -- | (e) | 214 // | g | hd | HD | HD | (b) | 215 // | h | d | -- | -- | (e) | 216 // | h | g | N | N | (c) | 217 // | h | h | N | N | (c) | 218 // | h | hd | HD | HD | (b) | 219 // | hd | d | WS | SS | (d) | 220 // | hd | g | SS | -- |(d/a)| 221 // | hd | h | SS | WS | (d) | 222 // | hd | hd | HD | HD | (b) | 223 224 SemaCUDA::CUDAFunctionPreference 225 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, 226 const FunctionDecl *Callee) { 227 assert(Callee && "Callee must be valid."); 228 229 // Treat ctor/dtor as host device function in device var initializer to allow 230 // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor 231 // will be diagnosed by checkAllowedInitializer. 232 if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && 233 CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && 234 (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee))) 235 return CFP_HostDevice; 236 237 CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); 238 CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee); 239 240 // If one of the targets is invalid, the check always fails, no matter what 241 // the other target is. 242 if (CallerTarget == CUDAFunctionTarget::InvalidTarget || 243 CalleeTarget == CUDAFunctionTarget::InvalidTarget) 244 return CFP_Never; 245 246 // (a) Can't call global from some contexts until we support CUDA's 247 // dynamic parallelism. 248 if (CalleeTarget == CUDAFunctionTarget::Global && 249 (CallerTarget == CUDAFunctionTarget::Global || 250 CallerTarget == CUDAFunctionTarget::Device)) 251 return CFP_Never; 252 253 // (b) Calling HostDevice is OK for everyone. 254 if (CalleeTarget == CUDAFunctionTarget::HostDevice) 255 return CFP_HostDevice; 256 257 // (c) Best case scenarios 258 if (CalleeTarget == CallerTarget || 259 (CallerTarget == CUDAFunctionTarget::Host && 260 CalleeTarget == CUDAFunctionTarget::Global) || 261 (CallerTarget == CUDAFunctionTarget::Global && 262 CalleeTarget == CUDAFunctionTarget::Device)) 263 return CFP_Native; 264 265 // HipStdPar mode is special, in that assessing whether a device side call to 266 // a host target is deferred to a subsequent pass, and cannot unambiguously be 267 // adjudicated in the AST, hence we optimistically allow them to pass here. 268 if (getLangOpts().HIPStdPar && 269 (CallerTarget == CUDAFunctionTarget::Global || 270 CallerTarget == CUDAFunctionTarget::Device || 271 CallerTarget == CUDAFunctionTarget::HostDevice) && 272 CalleeTarget == CUDAFunctionTarget::Host) 273 return CFP_HostDevice; 274 275 // (d) HostDevice behavior depends on compilation mode. 276 if (CallerTarget == CUDAFunctionTarget::HostDevice) { 277 // It's OK to call a compilation-mode matching function from an HD one. 278 if ((getLangOpts().CUDAIsDevice && 279 CalleeTarget == CUDAFunctionTarget::Device) || 280 (!getLangOpts().CUDAIsDevice && 281 (CalleeTarget == CUDAFunctionTarget::Host || 282 CalleeTarget == CUDAFunctionTarget::Global))) 283 return CFP_SameSide; 284 285 // Calls from HD to non-mode-matching functions (i.e., to host functions 286 // when compiling in device mode or to device functions when compiling in 287 // host mode) are allowed at the sema level, but eventually rejected if 288 // they're ever codegened. TODO: Reject said calls earlier. 289 return CFP_WrongSide; 290 } 291 292 // (e) Calling across device/host boundary is not something you should do. 293 if ((CallerTarget == CUDAFunctionTarget::Host && 294 CalleeTarget == CUDAFunctionTarget::Device) || 295 (CallerTarget == CUDAFunctionTarget::Device && 296 CalleeTarget == CUDAFunctionTarget::Host) || 297 (CallerTarget == CUDAFunctionTarget::Global && 298 CalleeTarget == CUDAFunctionTarget::Host)) 299 return CFP_Never; 300 301 llvm_unreachable("All cases should've been handled by now."); 302 } 303 304 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 305 if (!D) 306 return false; 307 if (auto *A = D->getAttr<AttrT>()) 308 return A->isImplicit(); 309 return D->isImplicit(); 310 } 311 312 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { 313 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 314 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 315 return IsImplicitDevAttr && IsImplicitHostAttr; 316 } 317 318 void SemaCUDA::EraseUnwantedMatches( 319 const FunctionDecl *Caller, 320 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 321 if (Matches.size() <= 1) 322 return; 323 324 using Pair = std::pair<DeclAccessPair, FunctionDecl *>; 325 326 // Gets the CUDA function preference for a call from Caller to Match. 327 auto GetCFP = [&](const Pair &Match) { 328 return IdentifyPreference(Caller, Match.second); 329 }; 330 331 // Find the best call preference among the functions in Matches. 332 CUDAFunctionPreference BestCFP = 333 GetCFP(*llvm::max_element(Matches, [&](const Pair &M1, const Pair &M2) { 334 return GetCFP(M1) < GetCFP(M2); 335 })); 336 337 // Erase all functions with lower priority. 338 llvm::erase_if(Matches, 339 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 340 } 341 342 /// When an implicitly-declared special member has to invoke more than one 343 /// base/field special member, conflicts may occur in the targets of these 344 /// members. For example, if one base's member __host__ and another's is 345 /// __device__, it's a conflict. 346 /// This function figures out if the given targets \param Target1 and 347 /// \param Target2 conflict, and if they do not it fills in 348 /// \param ResolvedTarget with a target that resolves for both calls. 349 /// \return true if there's a conflict, false otherwise. 350 static bool 351 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, 352 CUDAFunctionTarget Target2, 353 CUDAFunctionTarget *ResolvedTarget) { 354 // Only free functions and static member functions may be global. 355 assert(Target1 != CUDAFunctionTarget::Global); 356 assert(Target2 != CUDAFunctionTarget::Global); 357 358 if (Target1 == CUDAFunctionTarget::HostDevice) { 359 *ResolvedTarget = Target2; 360 } else if (Target2 == CUDAFunctionTarget::HostDevice) { 361 *ResolvedTarget = Target1; 362 } else if (Target1 != Target2) { 363 return true; 364 } else { 365 *ResolvedTarget = Target1; 366 } 367 368 return false; 369 } 370 371 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 372 CXXSpecialMemberKind CSM, 373 CXXMethodDecl *MemberDecl, 374 bool ConstRHS, 375 bool Diagnose) { 376 // If MemberDecl is virtual destructor of an explicit template class 377 // instantiation, it must be emitted, therefore it needs to be inferred 378 // conservatively by ignoring implicit host/device attrs of member and parent 379 // dtors called by it. Also, it needs to be checed by deferred diag visitor. 380 bool IsExpVDtor = false; 381 if (isa<CXXDestructorDecl>(MemberDecl) && MemberDecl->isVirtual()) { 382 if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(ClassDecl)) { 383 TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind(); 384 IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration || 385 TSK == TSK_ExplicitInstantiationDefinition; 386 } 387 } 388 if (IsExpVDtor) 389 SemaRef.DeclsToCheckForDeferredDiags.insert(MemberDecl); 390 391 // If the defaulted special member is defined lexically outside of its 392 // owning class, or the special member already has explicit device or host 393 // attributes, do not infer. 394 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 395 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 396 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 397 bool HasExplicitAttr = 398 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 399 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 400 if (!InClass || HasExplicitAttr) 401 return false; 402 403 std::optional<CUDAFunctionTarget> InferredTarget; 404 405 // We're going to invoke special member lookup; mark that these special 406 // members are called from this one, and not from its caller. 407 Sema::ContextRAII MethodContext(SemaRef, MemberDecl); 408 409 // Look for special members in base classes that should be invoked from here. 410 // Infer the target of this member base on the ones it should call. 411 // Skip direct and indirect virtual bases for abstract classes. 412 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 413 for (const auto &B : ClassDecl->bases()) { 414 if (!B.isVirtual()) { 415 Bases.push_back(&B); 416 } 417 } 418 419 if (!ClassDecl->isAbstract()) { 420 llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); 421 } 422 423 for (const auto *B : Bases) { 424 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 425 if (!BaseType) { 426 continue; 427 } 428 429 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 430 Sema::SpecialMemberOverloadResult SMOR = 431 SemaRef.LookupSpecialMember(BaseClassDecl, CSM, 432 /* ConstArg */ ConstRHS, 433 /* VolatileArg */ false, 434 /* RValueThis */ false, 435 /* ConstThis */ false, 436 /* VolatileThis */ false); 437 438 if (!SMOR.getMethod()) 439 continue; 440 441 CUDAFunctionTarget BaseMethodTarget = 442 IdentifyTarget(SMOR.getMethod(), IsExpVDtor); 443 444 if (!InferredTarget) { 445 InferredTarget = BaseMethodTarget; 446 } else { 447 bool ResolutionError = resolveCalleeCUDATargetConflict( 448 *InferredTarget, BaseMethodTarget, &*InferredTarget); 449 if (ResolutionError) { 450 if (Diagnose) { 451 Diag(ClassDecl->getLocation(), 452 diag::note_implicit_member_target_infer_collision) 453 << (unsigned)CSM << *InferredTarget << BaseMethodTarget; 454 } 455 MemberDecl->addAttr( 456 CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); 457 return true; 458 } 459 } 460 } 461 462 // Same as for bases, but now for special members of fields. 463 for (const auto *F : ClassDecl->fields()) { 464 if (F->isInvalidDecl()) { 465 continue; 466 } 467 468 const RecordType *FieldType = 469 getASTContext().getBaseElementType(F->getType())->getAs<RecordType>(); 470 if (!FieldType) { 471 continue; 472 } 473 474 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 475 Sema::SpecialMemberOverloadResult SMOR = 476 SemaRef.LookupSpecialMember(FieldRecDecl, CSM, 477 /* ConstArg */ ConstRHS && !F->isMutable(), 478 /* VolatileArg */ false, 479 /* RValueThis */ false, 480 /* ConstThis */ false, 481 /* VolatileThis */ false); 482 483 if (!SMOR.getMethod()) 484 continue; 485 486 CUDAFunctionTarget FieldMethodTarget = 487 IdentifyTarget(SMOR.getMethod(), IsExpVDtor); 488 489 if (!InferredTarget) { 490 InferredTarget = FieldMethodTarget; 491 } else { 492 bool ResolutionError = resolveCalleeCUDATargetConflict( 493 *InferredTarget, FieldMethodTarget, &*InferredTarget); 494 if (ResolutionError) { 495 if (Diagnose) { 496 Diag(ClassDecl->getLocation(), 497 diag::note_implicit_member_target_infer_collision) 498 << (unsigned)CSM << *InferredTarget << FieldMethodTarget; 499 } 500 MemberDecl->addAttr( 501 CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); 502 return true; 503 } 504 } 505 } 506 507 // If no target was inferred, mark this member as __host__ __device__; 508 // it's the least restrictive option that can be invoked from any target. 509 bool NeedsH = true, NeedsD = true; 510 if (InferredTarget) { 511 if (*InferredTarget == CUDAFunctionTarget::Device) 512 NeedsH = false; 513 else if (*InferredTarget == CUDAFunctionTarget::Host) 514 NeedsD = false; 515 } 516 517 // We either setting attributes first time, or the inferred ones must match 518 // previously set ones. 519 if (NeedsD && !HasD) 520 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 521 if (NeedsH && !HasH) 522 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 523 524 return false; 525 } 526 527 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 528 if (!CD->isDefined() && CD->isTemplateInstantiation()) 529 SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 530 531 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 532 // empty at a point in the translation unit, if it is either a 533 // trivial constructor 534 if (CD->isTrivial()) 535 return true; 536 537 // ... or it satisfies all of the following conditions: 538 // The constructor function has been defined. 539 // The constructor function has no parameters, 540 // and the function body is an empty compound statement. 541 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 542 return false; 543 544 // Its class has no virtual functions and no virtual base classes. 545 if (CD->getParent()->isDynamicClass()) 546 return false; 547 548 // Union ctor does not call ctors of its data members. 549 if (CD->getParent()->isUnion()) 550 return true; 551 552 // The only form of initializer allowed is an empty constructor. 553 // This will recursively check all base classes and member initializers 554 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 555 if (const CXXConstructExpr *CE = 556 dyn_cast<CXXConstructExpr>(CI->getInit())) 557 return isEmptyConstructor(Loc, CE->getConstructor()); 558 return false; 559 })) 560 return false; 561 562 return true; 563 } 564 565 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 566 // No destructor -> no problem. 567 if (!DD) 568 return true; 569 570 if (!DD->isDefined() && DD->isTemplateInstantiation()) 571 SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 572 573 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 574 // empty at a point in the translation unit, if it is either a 575 // trivial constructor 576 if (DD->isTrivial()) 577 return true; 578 579 // ... or it satisfies all of the following conditions: 580 // The destructor function has been defined. 581 // and the function body is an empty compound statement. 582 if (!DD->hasTrivialBody()) 583 return false; 584 585 const CXXRecordDecl *ClassDecl = DD->getParent(); 586 587 // Its class has no virtual functions and no virtual base classes. 588 if (ClassDecl->isDynamicClass()) 589 return false; 590 591 // Union does not have base class and union dtor does not call dtors of its 592 // data members. 593 if (DD->getParent()->isUnion()) 594 return true; 595 596 // Only empty destructors are allowed. This will recursively check 597 // destructors for all base classes... 598 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 599 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 600 return isEmptyDestructor(Loc, RD->getDestructor()); 601 return true; 602 })) 603 return false; 604 605 // ... and member fields. 606 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 607 if (CXXRecordDecl *RD = Field->getType() 608 ->getBaseElementTypeUnsafe() 609 ->getAsCXXRecordDecl()) 610 return isEmptyDestructor(Loc, RD->getDestructor()); 611 return true; 612 })) 613 return false; 614 615 return true; 616 } 617 618 namespace { 619 enum CUDAInitializerCheckKind { 620 CICK_DeviceOrConstant, // Check initializer for device/constant variable 621 CICK_Shared, // Check initializer for shared variable 622 }; 623 624 bool IsDependentVar(VarDecl *VD) { 625 if (VD->getType()->isDependentType()) 626 return true; 627 if (const auto *Init = VD->getInit()) 628 return Init->isValueDependent(); 629 return false; 630 } 631 632 // Check whether a variable has an allowed initializer for a CUDA device side 633 // variable with global storage. \p VD may be a host variable to be checked for 634 // potential promotion to device side variable. 635 // 636 // CUDA/HIP allows only empty constructors as initializers for global 637 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all 638 // __shared__ variables whether they are local or not (they all are implicitly 639 // static in CUDA). One exception is that CUDA allows constant initializers 640 // for __constant__ and __device__ variables. 641 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, 642 CUDAInitializerCheckKind CheckKind) { 643 assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); 644 assert(!IsDependentVar(VD) && "do not check dependent var"); 645 const Expr *Init = VD->getInit(); 646 auto IsEmptyInit = [&](const Expr *Init) { 647 if (!Init) 648 return true; 649 if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { 650 return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor()); 651 } 652 return false; 653 }; 654 auto IsConstantInit = [&](const Expr *Init) { 655 assert(Init); 656 ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), 657 /*NoWronSidedVars=*/true); 658 return Init->isConstantInitializer(S.getASTContext(), 659 VD->getType()->isReferenceType()); 660 }; 661 auto HasEmptyDtor = [&](VarDecl *VD) { 662 if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) 663 return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor()); 664 return true; 665 }; 666 if (CheckKind == CICK_Shared) 667 return IsEmptyInit(Init) && HasEmptyDtor(VD); 668 return S.getLangOpts().GPUAllowDeviceInit || 669 ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); 670 } 671 } // namespace 672 673 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { 674 // Return early if VD is inside a non-instantiated template function since 675 // the implicit constructor is not defined yet. 676 if (const FunctionDecl *FD = 677 dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()); 678 FD && FD->isDependentContext()) 679 return; 680 681 bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); 682 bool IsDeviceOrConstantVar = 683 !IsSharedVar && 684 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); 685 if ((IsSharedVar || IsDeviceOrConstantVar) && 686 VD->getType().getQualifiers().getAddressSpace() != LangAS::Default) { 687 Diag(VD->getLocation(), diag::err_cuda_address_space_gpuvar); 688 VD->setInvalidDecl(); 689 return; 690 } 691 // Do not check dependent variables since the ctor/dtor/initializer are not 692 // determined. Do it after instantiation. 693 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || 694 IsDependentVar(VD)) 695 return; 696 const Expr *Init = VD->getInit(); 697 if (IsDeviceOrConstantVar || IsSharedVar) { 698 if (HasAllowedCUDADeviceStaticInitializer( 699 *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) 700 return; 701 Diag(VD->getLocation(), 702 IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) 703 << Init->getSourceRange(); 704 VD->setInvalidDecl(); 705 } else { 706 // This is a host-side global variable. Check that the initializer is 707 // callable from the host side. 708 const FunctionDecl *InitFn = nullptr; 709 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 710 InitFn = CE->getConstructor(); 711 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 712 InitFn = CE->getDirectCallee(); 713 } 714 if (InitFn) { 715 CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn); 716 if (InitFnTarget != CUDAFunctionTarget::Host && 717 InitFnTarget != CUDAFunctionTarget::HostDevice) { 718 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 719 << InitFnTarget << InitFn; 720 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 721 VD->setInvalidDecl(); 722 } 723 } 724 } 725 } 726 727 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( 728 const FunctionDecl *Callee) { 729 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 730 if (!Caller) 731 return; 732 733 if (!isImplicitHostDeviceFunction(Callee)) 734 return; 735 736 CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); 737 738 // Record whether an implicit host device function is used on device side. 739 if (CallerTarget != CUDAFunctionTarget::Device && 740 CallerTarget != CUDAFunctionTarget::Global && 741 (CallerTarget != CUDAFunctionTarget::HostDevice || 742 (isImplicitHostDeviceFunction(Caller) && 743 !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) 744 return; 745 746 getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); 747 } 748 749 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 750 // treated as implicitly __host__ __device__, unless: 751 // * it is a variadic function (device-side variadic functions are not 752 // allowed), or 753 // * a __device__ function with this signature was already declared, in which 754 // case in which case we output an error, unless the __device__ decl is in a 755 // system header, in which case we leave the constexpr function unattributed. 756 // 757 // In addition, all function decls are treated as __host__ __device__ when 758 // ForceHostDeviceDepth > 0 (corresponding to code within a 759 // #pragma clang force_cuda_host_device_begin/end 760 // pair). 761 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, 762 const LookupResult &Previous) { 763 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 764 765 if (ForceHostDeviceDepth > 0) { 766 if (!NewD->hasAttr<CUDAHostAttr>()) 767 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 768 if (!NewD->hasAttr<CUDADeviceAttr>()) 769 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 770 return; 771 } 772 773 // If a template function has no host/device/global attributes, 774 // make it implicitly host device function. 775 if (getLangOpts().OffloadImplicitHostDeviceTemplates && 776 !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && 777 !NewD->hasAttr<CUDAGlobalAttr>() && 778 (NewD->getDescribedFunctionTemplate() || 779 NewD->isFunctionTemplateSpecialization())) { 780 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 781 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 782 return; 783 } 784 785 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 786 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 787 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 788 return; 789 790 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 791 // attributes? 792 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 793 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 794 D = Using->getTargetDecl(); 795 FunctionDecl *OldD = D->getAsFunction(); 796 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 797 !OldD->hasAttr<CUDAHostAttr>() && 798 !SemaRef.IsOverload(NewD, OldD, 799 /* UseMemberUsingDeclRules = */ false, 800 /* ConsiderCudaAttrs = */ false); 801 }; 802 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 803 if (It != Previous.end()) { 804 // We found a __device__ function with the same name and signature as NewD 805 // (ignoring CUDA attrs). This is an error unless that function is defined 806 // in a system header, in which case we simply return without making NewD 807 // host+device. 808 NamedDecl *Match = *It; 809 if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) { 810 Diag(NewD->getLocation(), 811 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 812 << NewD; 813 Diag(Match->getLocation(), 814 diag::note_cuda_conflicting_device_function_declared_here); 815 } 816 return; 817 } 818 819 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 820 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 821 } 822 823 // TODO: `__constant__` memory may be a limited resource for certain targets. 824 // A safeguard may be needed at the end of compilation pipeline if 825 // `__constant__` memory usage goes beyond limit. 826 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { 827 // Do not promote dependent variables since the cotr/dtor/initializer are 828 // not determined. Do it after instantiation. 829 if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && 830 !VD->hasAttr<CUDASharedAttr>() && 831 (VD->isFileVarDecl() || VD->isStaticDataMember()) && 832 !IsDependentVar(VD) && 833 ((VD->isConstexpr() || VD->getType().isConstQualified()) && 834 HasAllowedCUDADeviceStaticInitializer(*this, VD, 835 CICK_DeviceOrConstant))) { 836 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 837 } 838 } 839 840 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, 841 unsigned DiagID) { 842 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 843 FunctionDecl *CurFunContext = 844 SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 845 SemaDiagnosticBuilder::Kind DiagKind = [&] { 846 if (!CurFunContext) 847 return SemaDiagnosticBuilder::K_Nop; 848 switch (CurrentTarget()) { 849 case CUDAFunctionTarget::Global: 850 case CUDAFunctionTarget::Device: 851 return SemaDiagnosticBuilder::K_Immediate; 852 case CUDAFunctionTarget::HostDevice: 853 // An HD function counts as host code if we're compiling for host, and 854 // device code if we're compiling for device. Defer any errors in device 855 // mode until the function is known-emitted. 856 if (!getLangOpts().CUDAIsDevice) 857 return SemaDiagnosticBuilder::K_Nop; 858 if (SemaRef.IsLastErrorImmediate && 859 getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) 860 return SemaDiagnosticBuilder::K_Immediate; 861 return (SemaRef.getEmissionStatus(CurFunContext) == 862 Sema::FunctionEmissionStatus::Emitted) 863 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 864 : SemaDiagnosticBuilder::K_Deferred; 865 default: 866 return SemaDiagnosticBuilder::K_Nop; 867 } 868 }(); 869 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); 870 } 871 872 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, 873 unsigned DiagID) { 874 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 875 FunctionDecl *CurFunContext = 876 SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 877 SemaDiagnosticBuilder::Kind DiagKind = [&] { 878 if (!CurFunContext) 879 return SemaDiagnosticBuilder::K_Nop; 880 switch (CurrentTarget()) { 881 case CUDAFunctionTarget::Host: 882 return SemaDiagnosticBuilder::K_Immediate; 883 case CUDAFunctionTarget::HostDevice: 884 // An HD function counts as host code if we're compiling for host, and 885 // device code if we're compiling for device. Defer any errors in device 886 // mode until the function is known-emitted. 887 if (getLangOpts().CUDAIsDevice) 888 return SemaDiagnosticBuilder::K_Nop; 889 if (SemaRef.IsLastErrorImmediate && 890 getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) 891 return SemaDiagnosticBuilder::K_Immediate; 892 return (SemaRef.getEmissionStatus(CurFunContext) == 893 Sema::FunctionEmissionStatus::Emitted) 894 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 895 : SemaDiagnosticBuilder::K_Deferred; 896 default: 897 return SemaDiagnosticBuilder::K_Nop; 898 } 899 }(); 900 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); 901 } 902 903 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { 904 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 905 assert(Callee && "Callee may not be null."); 906 907 const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); 908 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 909 return true; 910 911 // FIXME: Is bailing out early correct here? Should we instead assume that 912 // the caller is a global initializer? 913 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 914 if (!Caller) 915 return true; 916 917 // If the caller is known-emitted, mark the callee as known-emitted. 918 // Otherwise, mark the call in our call graph so we can traverse it later. 919 bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) == 920 Sema::FunctionEmissionStatus::Emitted; 921 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 922 CallerKnownEmitted] { 923 switch (IdentifyPreference(Caller, Callee)) { 924 case CFP_Never: 925 case CFP_WrongSide: 926 assert(Caller && "Never/wrongSide calls require a non-null caller"); 927 // If we know the caller will be emitted, we know this wrong-side call 928 // will be emitted, so it's an immediate error. Otherwise, defer the 929 // error until we know the caller is emitted. 930 return CallerKnownEmitted 931 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 932 : SemaDiagnosticBuilder::K_Deferred; 933 default: 934 return SemaDiagnosticBuilder::K_Nop; 935 } 936 }(); 937 938 if (DiagKind == SemaDiagnosticBuilder::K_Nop) { 939 // For -fgpu-rdc, keep track of external kernels used by host functions. 940 if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && 941 Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && 942 (!Caller || (!Caller->getDescribedFunctionTemplate() && 943 getASTContext().GetGVALinkageForFunction(Caller) == 944 GVA_StrongExternal))) 945 getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); 946 return true; 947 } 948 949 // Avoid emitting this error twice for the same location. Using a hashtable 950 // like this is unfortunate, but because we must continue parsing as normal 951 // after encountering a deferred error, it's otherwise very tricky for us to 952 // ensure that we only emit this deferred error once. 953 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 954 return true; 955 956 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, 957 SemaRef) 958 << IdentifyTarget(Callee) << /*function*/ 0 << Callee 959 << IdentifyTarget(Caller); 960 if (!Callee->getBuiltinID()) 961 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 962 diag::note_previous_decl, Caller, SemaRef) 963 << Callee; 964 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 965 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 966 } 967 968 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 969 // A lambda function may capture a stack variable by reference when it is 970 // defined and uses the capture by reference when the lambda is called. When 971 // the capture and use happen on different sides, the capture is invalid and 972 // should be diagnosed. 973 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, 974 const sema::Capture &Capture) { 975 // In host compilation we only need to check lambda functions emitted on host 976 // side. In such lambda functions, a reference capture is invalid only 977 // if the lambda structure is populated by a device function or kernel then 978 // is passed to and called by a host function. However that is impossible, 979 // since a device function or kernel can only call a device function, also a 980 // kernel cannot pass a lambda back to a host function since we cannot 981 // define a kernel argument type which can hold the lambda before the lambda 982 // itself is defined. 983 if (!getLangOpts().CUDAIsDevice) 984 return; 985 986 // File-scope lambda can only do init captures for global variables, which 987 // results in passing by value for these global variables. 988 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 989 if (!Caller) 990 return; 991 992 // In device compilation, we only need to check lambda functions which are 993 // emitted on device side. For such lambdas, a reference capture is invalid 994 // only if the lambda structure is populated by a host function then passed 995 // to and called in a device function or kernel. 996 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 997 bool CallerIsHost = 998 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 999 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 1000 if (!ShouldCheck || !Capture.isReferenceCapture()) 1001 return; 1002 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 1003 if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { 1004 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 1005 diag::err_capture_bad_target, Callee, SemaRef) 1006 << Capture.getVariable(); 1007 } else if (Capture.isThisCapture()) { 1008 // Capture of this pointer is allowed since this pointer may be pointing to 1009 // managed memory which is accessible on both device and host sides. It only 1010 // results in invalid memory access if this pointer points to memory not 1011 // accessible on device side. 1012 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 1013 diag::warn_maybe_capture_bad_target_this_ptr, Callee, 1014 SemaRef); 1015 } 1016 } 1017 1018 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { 1019 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 1020 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 1021 return; 1022 Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 1023 Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 1024 } 1025 1026 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, 1027 const LookupResult &Previous) { 1028 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 1029 CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD); 1030 for (NamedDecl *OldND : Previous) { 1031 FunctionDecl *OldFD = OldND->getAsFunction(); 1032 if (!OldFD) 1033 continue; 1034 1035 CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD); 1036 // Don't allow HD and global functions to overload other functions with the 1037 // same signature. We allow overloading based on CUDA attributes so that 1038 // functions can have different implementations on the host and device, but 1039 // HD/global functions "exist" in some sense on both the host and device, so 1040 // should have the same implementation on both sides. 1041 if (NewTarget != OldTarget && 1042 !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 1043 /* ConsiderCudaAttrs = */ false)) { 1044 if ((NewTarget == CUDAFunctionTarget::HostDevice && 1045 !(getLangOpts().OffloadImplicitHostDeviceTemplates && 1046 isImplicitHostDeviceFunction(NewFD) && 1047 OldTarget == CUDAFunctionTarget::Device)) || 1048 (OldTarget == CUDAFunctionTarget::HostDevice && 1049 !(getLangOpts().OffloadImplicitHostDeviceTemplates && 1050 isImplicitHostDeviceFunction(OldFD) && 1051 NewTarget == CUDAFunctionTarget::Device)) || 1052 (NewTarget == CUDAFunctionTarget::Global) || 1053 (OldTarget == CUDAFunctionTarget::Global)) { 1054 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 1055 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 1056 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1057 NewFD->setInvalidDecl(); 1058 break; 1059 } 1060 if ((NewTarget == CUDAFunctionTarget::Host && 1061 OldTarget == CUDAFunctionTarget::Device) || 1062 (NewTarget == CUDAFunctionTarget::Device && 1063 OldTarget == CUDAFunctionTarget::Host)) { 1064 Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare) 1065 << NewTarget << OldTarget; 1066 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1067 } 1068 } 1069 } 1070 } 1071 1072 template <typename AttrTy> 1073 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 1074 const FunctionDecl &TemplateFD) { 1075 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 1076 AttrTy *Clone = Attribute->clone(S.Context); 1077 Clone->setInherited(true); 1078 FD->addAttr(Clone); 1079 } 1080 } 1081 1082 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, 1083 const FunctionTemplateDecl &TD) { 1084 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 1085 copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD); 1086 copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD); 1087 copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD); 1088 } 1089 1090 std::string SemaCUDA::getConfigureFuncName() const { 1091 if (getLangOpts().OffloadViaLLVM) 1092 return "__llvmPushCallConfiguration"; 1093 1094 if (getLangOpts().HIP) 1095 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 1096 : "hipConfigureCall"; 1097 1098 // New CUDA kernel launch sequence. 1099 if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), 1100 CudaFeature::CUDA_USES_NEW_LAUNCH)) 1101 return "__cudaPushCallConfiguration"; 1102 1103 // Legacy CUDA kernel configuration call 1104 return "cudaConfigureCall"; 1105 } 1106 1107 // Record any local constexpr variables that are passed one way on the host 1108 // and another on the device. 1109 void SemaCUDA::recordPotentialODRUsedVariable( 1110 MultiExprArg Arguments, OverloadCandidateSet &Candidates) { 1111 sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda(); 1112 if (!LambdaInfo) 1113 return; 1114 1115 for (unsigned I = 0; I < Arguments.size(); ++I) { 1116 auto *DeclRef = dyn_cast<DeclRefExpr>(Arguments[I]); 1117 if (!DeclRef) 1118 continue; 1119 auto *Variable = dyn_cast<VarDecl>(DeclRef->getDecl()); 1120 if (!Variable || !Variable->isLocalVarDecl() || !Variable->isConstexpr()) 1121 continue; 1122 1123 bool HostByValue = false, HostByRef = false; 1124 bool DeviceByValue = false, DeviceByRef = false; 1125 1126 for (OverloadCandidate &Candidate : Candidates) { 1127 FunctionDecl *Callee = Candidate.Function; 1128 if (!Callee || I >= Callee->getNumParams()) 1129 continue; 1130 1131 CUDAFunctionTarget Target = IdentifyTarget(Callee); 1132 if (Target == CUDAFunctionTarget::InvalidTarget || 1133 Target == CUDAFunctionTarget::Global) 1134 continue; 1135 1136 bool CoversHost = (Target == CUDAFunctionTarget::Host || 1137 Target == CUDAFunctionTarget::HostDevice); 1138 bool CoversDevice = (Target == CUDAFunctionTarget::Device || 1139 Target == CUDAFunctionTarget::HostDevice); 1140 1141 bool IsRef = Callee->getParamDecl(I)->getType()->isReferenceType(); 1142 HostByValue |= CoversHost && !IsRef; 1143 HostByRef |= CoversHost && IsRef; 1144 DeviceByValue |= CoversDevice && !IsRef; 1145 DeviceByRef |= CoversDevice && IsRef; 1146 } 1147 1148 if ((HostByValue && DeviceByRef) || (HostByRef && DeviceByValue)) 1149 LambdaInfo->CUDAPotentialODRUsedVars.insert(Variable); 1150 } 1151 } 1152