1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/ScopeInfo.h" 21 #include "clang/Sema/Sema.h" 22 #include "clang/Sema/SemaDiagnostic.h" 23 #include "clang/Sema/SemaInternal.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/SmallVector.h" 27 using namespace clang; 28 29 void Sema::PushForceCUDAHostDevice() { 30 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 31 ForceCUDAHostDeviceDepth++; 32 } 33 34 bool Sema::PopForceCUDAHostDevice() { 35 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 36 if (ForceCUDAHostDeviceDepth == 0) 37 return false; 38 ForceCUDAHostDeviceDepth--; 39 return true; 40 } 41 42 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 43 MultiExprArg ExecConfig, 44 SourceLocation GGGLoc) { 45 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 46 if (!ConfigDecl) 47 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 48 << getCudaConfigureFuncName()); 49 QualType ConfigQTy = ConfigDecl->getType(); 50 51 DeclRefExpr *ConfigDR = new (Context) 52 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 53 MarkFunctionReferenced(LLLLoc, ConfigDecl); 54 55 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 56 /*IsExecConfig=*/true); 57 } 58 59 Sema::CUDAFunctionTarget 60 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 61 bool HasHostAttr = false; 62 bool HasDeviceAttr = false; 63 bool HasGlobalAttr = false; 64 bool HasInvalidTargetAttr = false; 65 for (const ParsedAttr &AL : Attrs) { 66 switch (AL.getKind()) { 67 case ParsedAttr::AT_CUDAGlobal: 68 HasGlobalAttr = true; 69 break; 70 case ParsedAttr::AT_CUDAHost: 71 HasHostAttr = true; 72 break; 73 case ParsedAttr::AT_CUDADevice: 74 HasDeviceAttr = true; 75 break; 76 case ParsedAttr::AT_CUDAInvalidTarget: 77 HasInvalidTargetAttr = true; 78 break; 79 default: 80 break; 81 } 82 } 83 84 if (HasInvalidTargetAttr) 85 return CFT_InvalidTarget; 86 87 if (HasGlobalAttr) 88 return CFT_Global; 89 90 if (HasHostAttr && HasDeviceAttr) 91 return CFT_HostDevice; 92 93 if (HasDeviceAttr) 94 return CFT_Device; 95 96 return CFT_Host; 97 } 98 99 template <typename A> 100 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 101 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 102 return isa<A>(Attribute) && 103 !(IgnoreImplicitAttr && Attribute->isImplicit()); 104 }); 105 } 106 107 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 108 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 109 bool IgnoreImplicitHDAttr) { 110 // Code that lives outside a function is run on the host. 111 if (D == nullptr) 112 return CFT_Host; 113 114 if (D->hasAttr<CUDAInvalidTargetAttr>()) 115 return CFT_InvalidTarget; 116 117 if (D->hasAttr<CUDAGlobalAttr>()) 118 return CFT_Global; 119 120 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 121 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 122 return CFT_HostDevice; 123 return CFT_Device; 124 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 125 return CFT_Host; 126 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 127 // Some implicit declarations (like intrinsic functions) are not marked. 128 // Set the most lenient target on them for maximal flexibility. 129 return CFT_HostDevice; 130 } 131 132 return CFT_Host; 133 } 134 135 // * CUDA Call preference table 136 // 137 // F - from, 138 // T - to 139 // Ph - preference in host mode 140 // Pd - preference in device mode 141 // H - handled in (x) 142 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 143 // 144 // | F | T | Ph | Pd | H | 145 // |----+----+-----+-----+-----+ 146 // | d | d | N | N | (c) | 147 // | d | g | -- | -- | (a) | 148 // | d | h | -- | -- | (e) | 149 // | d | hd | HD | HD | (b) | 150 // | g | d | N | N | (c) | 151 // | g | g | -- | -- | (a) | 152 // | g | h | -- | -- | (e) | 153 // | g | hd | HD | HD | (b) | 154 // | h | d | -- | -- | (e) | 155 // | h | g | N | N | (c) | 156 // | h | h | N | N | (c) | 157 // | h | hd | HD | HD | (b) | 158 // | hd | d | WS | SS | (d) | 159 // | hd | g | SS | -- |(d/a)| 160 // | hd | h | SS | WS | (d) | 161 // | hd | hd | HD | HD | (b) | 162 163 Sema::CUDAFunctionPreference 164 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 165 const FunctionDecl *Callee) { 166 assert(Callee && "Callee must be valid."); 167 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 168 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 169 170 // If one of the targets is invalid, the check always fails, no matter what 171 // the other target is. 172 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 173 return CFP_Never; 174 175 // (a) Can't call global from some contexts until we support CUDA's 176 // dynamic parallelism. 177 if (CalleeTarget == CFT_Global && 178 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 179 return CFP_Never; 180 181 // (b) Calling HostDevice is OK for everyone. 182 if (CalleeTarget == CFT_HostDevice) 183 return CFP_HostDevice; 184 185 // (c) Best case scenarios 186 if (CalleeTarget == CallerTarget || 187 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 188 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 189 return CFP_Native; 190 191 // (d) HostDevice behavior depends on compilation mode. 192 if (CallerTarget == CFT_HostDevice) { 193 // It's OK to call a compilation-mode matching function from an HD one. 194 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 195 (!getLangOpts().CUDAIsDevice && 196 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 197 return CFP_SameSide; 198 199 // Calls from HD to non-mode-matching functions (i.e., to host functions 200 // when compiling in device mode or to device functions when compiling in 201 // host mode) are allowed at the sema level, but eventually rejected if 202 // they're ever codegened. TODO: Reject said calls earlier. 203 return CFP_WrongSide; 204 } 205 206 // (e) Calling across device/host boundary is not something you should do. 207 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 208 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 209 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 210 return CFP_Never; 211 212 llvm_unreachable("All cases should've been handled by now."); 213 } 214 215 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 216 if (!D) 217 return false; 218 if (auto *A = D->getAttr<AttrT>()) 219 return A->isImplicit(); 220 return D->isImplicit(); 221 } 222 223 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 224 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 225 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 226 return IsImplicitDevAttr && IsImplicitHostAttr; 227 } 228 229 void Sema::EraseUnwantedCUDAMatches( 230 const FunctionDecl *Caller, 231 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 232 if (Matches.size() <= 1) 233 return; 234 235 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 236 237 // Gets the CUDA function preference for a call from Caller to Match. 238 auto GetCFP = [&](const Pair &Match) { 239 return IdentifyCUDAPreference(Caller, Match.second); 240 }; 241 242 // Find the best call preference among the functions in Matches. 243 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 244 Matches.begin(), Matches.end(), 245 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 246 247 // Erase all functions with lower priority. 248 llvm::erase_if(Matches, 249 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 250 } 251 252 /// When an implicitly-declared special member has to invoke more than one 253 /// base/field special member, conflicts may occur in the targets of these 254 /// members. For example, if one base's member __host__ and another's is 255 /// __device__, it's a conflict. 256 /// This function figures out if the given targets \param Target1 and 257 /// \param Target2 conflict, and if they do not it fills in 258 /// \param ResolvedTarget with a target that resolves for both calls. 259 /// \return true if there's a conflict, false otherwise. 260 static bool 261 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 262 Sema::CUDAFunctionTarget Target2, 263 Sema::CUDAFunctionTarget *ResolvedTarget) { 264 // Only free functions and static member functions may be global. 265 assert(Target1 != Sema::CFT_Global); 266 assert(Target2 != Sema::CFT_Global); 267 268 if (Target1 == Sema::CFT_HostDevice) { 269 *ResolvedTarget = Target2; 270 } else if (Target2 == Sema::CFT_HostDevice) { 271 *ResolvedTarget = Target1; 272 } else if (Target1 != Target2) { 273 return true; 274 } else { 275 *ResolvedTarget = Target1; 276 } 277 278 return false; 279 } 280 281 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 282 CXXSpecialMember CSM, 283 CXXMethodDecl *MemberDecl, 284 bool ConstRHS, 285 bool Diagnose) { 286 // If the defaulted special member is defined lexically outside of its 287 // owning class, or the special member already has explicit device or host 288 // attributes, do not infer. 289 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 290 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 291 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 292 bool HasExplicitAttr = 293 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 294 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 295 if (!InClass || HasExplicitAttr) 296 return false; 297 298 llvm::Optional<CUDAFunctionTarget> InferredTarget; 299 300 // We're going to invoke special member lookup; mark that these special 301 // members are called from this one, and not from its caller. 302 ContextRAII MethodContext(*this, MemberDecl); 303 304 // Look for special members in base classes that should be invoked from here. 305 // Infer the target of this member base on the ones it should call. 306 // Skip direct and indirect virtual bases for abstract classes. 307 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 308 for (const auto &B : ClassDecl->bases()) { 309 if (!B.isVirtual()) { 310 Bases.push_back(&B); 311 } 312 } 313 314 if (!ClassDecl->isAbstract()) { 315 for (const auto &VB : ClassDecl->vbases()) { 316 Bases.push_back(&VB); 317 } 318 } 319 320 for (const auto *B : Bases) { 321 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 322 if (!BaseType) { 323 continue; 324 } 325 326 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 327 Sema::SpecialMemberOverloadResult SMOR = 328 LookupSpecialMember(BaseClassDecl, CSM, 329 /* ConstArg */ ConstRHS, 330 /* VolatileArg */ false, 331 /* RValueThis */ false, 332 /* ConstThis */ false, 333 /* VolatileThis */ false); 334 335 if (!SMOR.getMethod()) 336 continue; 337 338 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 339 if (!InferredTarget.hasValue()) { 340 InferredTarget = BaseMethodTarget; 341 } else { 342 bool ResolutionError = resolveCalleeCUDATargetConflict( 343 InferredTarget.getValue(), BaseMethodTarget, 344 InferredTarget.getPointer()); 345 if (ResolutionError) { 346 if (Diagnose) { 347 Diag(ClassDecl->getLocation(), 348 diag::note_implicit_member_target_infer_collision) 349 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 350 } 351 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 352 return true; 353 } 354 } 355 } 356 357 // Same as for bases, but now for special members of fields. 358 for (const auto *F : ClassDecl->fields()) { 359 if (F->isInvalidDecl()) { 360 continue; 361 } 362 363 const RecordType *FieldType = 364 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 365 if (!FieldType) { 366 continue; 367 } 368 369 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 370 Sema::SpecialMemberOverloadResult SMOR = 371 LookupSpecialMember(FieldRecDecl, CSM, 372 /* ConstArg */ ConstRHS && !F->isMutable(), 373 /* VolatileArg */ false, 374 /* RValueThis */ false, 375 /* ConstThis */ false, 376 /* VolatileThis */ false); 377 378 if (!SMOR.getMethod()) 379 continue; 380 381 CUDAFunctionTarget FieldMethodTarget = 382 IdentifyCUDATarget(SMOR.getMethod()); 383 if (!InferredTarget.hasValue()) { 384 InferredTarget = FieldMethodTarget; 385 } else { 386 bool ResolutionError = resolveCalleeCUDATargetConflict( 387 InferredTarget.getValue(), FieldMethodTarget, 388 InferredTarget.getPointer()); 389 if (ResolutionError) { 390 if (Diagnose) { 391 Diag(ClassDecl->getLocation(), 392 diag::note_implicit_member_target_infer_collision) 393 << (unsigned)CSM << InferredTarget.getValue() 394 << FieldMethodTarget; 395 } 396 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 397 return true; 398 } 399 } 400 } 401 402 403 // If no target was inferred, mark this member as __host__ __device__; 404 // it's the least restrictive option that can be invoked from any target. 405 bool NeedsH = true, NeedsD = true; 406 if (InferredTarget.hasValue()) { 407 if (InferredTarget.getValue() == CFT_Device) 408 NeedsH = false; 409 else if (InferredTarget.getValue() == CFT_Host) 410 NeedsD = false; 411 } 412 413 // We either setting attributes first time, or the inferred ones must match 414 // previously set ones. 415 if (NeedsD && !HasD) 416 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 417 if (NeedsH && !HasH) 418 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 419 420 return false; 421 } 422 423 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 424 if (!CD->isDefined() && CD->isTemplateInstantiation()) 425 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 426 427 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 428 // empty at a point in the translation unit, if it is either a 429 // trivial constructor 430 if (CD->isTrivial()) 431 return true; 432 433 // ... or it satisfies all of the following conditions: 434 // The constructor function has been defined. 435 // The constructor function has no parameters, 436 // and the function body is an empty compound statement. 437 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 438 return false; 439 440 // Its class has no virtual functions and no virtual base classes. 441 if (CD->getParent()->isDynamicClass()) 442 return false; 443 444 // Union ctor does not call ctors of its data members. 445 if (CD->getParent()->isUnion()) 446 return true; 447 448 // The only form of initializer allowed is an empty constructor. 449 // This will recursively check all base classes and member initializers 450 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 451 if (const CXXConstructExpr *CE = 452 dyn_cast<CXXConstructExpr>(CI->getInit())) 453 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 454 return false; 455 })) 456 return false; 457 458 return true; 459 } 460 461 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 462 // No destructor -> no problem. 463 if (!DD) 464 return true; 465 466 if (!DD->isDefined() && DD->isTemplateInstantiation()) 467 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 468 469 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 470 // empty at a point in the translation unit, if it is either a 471 // trivial constructor 472 if (DD->isTrivial()) 473 return true; 474 475 // ... or it satisfies all of the following conditions: 476 // The destructor function has been defined. 477 // and the function body is an empty compound statement. 478 if (!DD->hasTrivialBody()) 479 return false; 480 481 const CXXRecordDecl *ClassDecl = DD->getParent(); 482 483 // Its class has no virtual functions and no virtual base classes. 484 if (ClassDecl->isDynamicClass()) 485 return false; 486 487 // Union does not have base class and union dtor does not call dtors of its 488 // data members. 489 if (DD->getParent()->isUnion()) 490 return true; 491 492 // Only empty destructors are allowed. This will recursively check 493 // destructors for all base classes... 494 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 495 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 496 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 497 return true; 498 })) 499 return false; 500 501 // ... and member fields. 502 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 503 if (CXXRecordDecl *RD = Field->getType() 504 ->getBaseElementTypeUnsafe() 505 ->getAsCXXRecordDecl()) 506 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 507 return true; 508 })) 509 return false; 510 511 return true; 512 } 513 514 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 515 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 516 return; 517 const Expr *Init = VD->getInit(); 518 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 519 VD->hasAttr<CUDASharedAttr>()) { 520 if (LangOpts.GPUAllowDeviceInit) 521 return; 522 assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 523 bool AllowedInit = false; 524 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 525 AllowedInit = 526 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 527 // We'll allow constant initializers even if it's a non-empty 528 // constructor according to CUDA rules. This deviates from NVCC, 529 // but allows us to handle things like constexpr constructors. 530 if (!AllowedInit && 531 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) { 532 auto *Init = VD->getInit(); 533 AllowedInit = 534 ((VD->getType()->isDependentType() || Init->isValueDependent()) && 535 VD->isConstexpr()) || 536 Init->isConstantInitializer(Context, 537 VD->getType()->isReferenceType()); 538 } 539 540 // Also make sure that destructor, if there is one, is empty. 541 if (AllowedInit) 542 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 543 AllowedInit = 544 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 545 546 if (!AllowedInit) { 547 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 548 ? diag::err_shared_var_init 549 : diag::err_dynamic_var_init) 550 << Init->getSourceRange(); 551 VD->setInvalidDecl(); 552 } 553 } else { 554 // This is a host-side global variable. Check that the initializer is 555 // callable from the host side. 556 const FunctionDecl *InitFn = nullptr; 557 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 558 InitFn = CE->getConstructor(); 559 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 560 InitFn = CE->getDirectCallee(); 561 } 562 if (InitFn) { 563 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 564 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 565 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 566 << InitFnTarget << InitFn; 567 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 568 VD->setInvalidDecl(); 569 } 570 } 571 } 572 } 573 574 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 575 // treated as implicitly __host__ __device__, unless: 576 // * it is a variadic function (device-side variadic functions are not 577 // allowed), or 578 // * a __device__ function with this signature was already declared, in which 579 // case in which case we output an error, unless the __device__ decl is in a 580 // system header, in which case we leave the constexpr function unattributed. 581 // 582 // In addition, all function decls are treated as __host__ __device__ when 583 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 584 // #pragma clang force_cuda_host_device_begin/end 585 // pair). 586 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 587 const LookupResult &Previous) { 588 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 589 590 if (ForceCUDAHostDeviceDepth > 0) { 591 if (!NewD->hasAttr<CUDAHostAttr>()) 592 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 593 if (!NewD->hasAttr<CUDADeviceAttr>()) 594 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 595 return; 596 } 597 598 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 599 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 600 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 601 return; 602 603 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 604 // attributes? 605 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 606 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 607 D = Using->getTargetDecl(); 608 FunctionDecl *OldD = D->getAsFunction(); 609 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 610 !OldD->hasAttr<CUDAHostAttr>() && 611 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 612 /* ConsiderCudaAttrs = */ false); 613 }; 614 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 615 if (It != Previous.end()) { 616 // We found a __device__ function with the same name and signature as NewD 617 // (ignoring CUDA attrs). This is an error unless that function is defined 618 // in a system header, in which case we simply return without making NewD 619 // host+device. 620 NamedDecl *Match = *It; 621 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 622 Diag(NewD->getLocation(), 623 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 624 << NewD; 625 Diag(Match->getLocation(), 626 diag::note_cuda_conflicting_device_function_declared_here); 627 } 628 return; 629 } 630 631 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 632 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 633 } 634 635 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { 636 if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && 637 (VD->isFileVarDecl() || VD->isStaticDataMember())) { 638 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 639 } 640 } 641 642 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 643 unsigned DiagID) { 644 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 645 DeviceDiagBuilder::Kind DiagKind = [this] { 646 switch (CurrentCUDATarget()) { 647 case CFT_Global: 648 case CFT_Device: 649 return DeviceDiagBuilder::K_Immediate; 650 case CFT_HostDevice: 651 // An HD function counts as host code if we're compiling for host, and 652 // device code if we're compiling for device. Defer any errors in device 653 // mode until the function is known-emitted. 654 if (getLangOpts().CUDAIsDevice) { 655 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 656 FunctionEmissionStatus::Emitted) 657 ? DeviceDiagBuilder::K_ImmediateWithCallStack 658 : DeviceDiagBuilder::K_Deferred; 659 } 660 return DeviceDiagBuilder::K_Nop; 661 662 default: 663 return DeviceDiagBuilder::K_Nop; 664 } 665 }(); 666 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 667 dyn_cast<FunctionDecl>(CurContext), *this); 668 } 669 670 Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 671 unsigned DiagID) { 672 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 673 DeviceDiagBuilder::Kind DiagKind = [this] { 674 switch (CurrentCUDATarget()) { 675 case CFT_Host: 676 return DeviceDiagBuilder::K_Immediate; 677 case CFT_HostDevice: 678 // An HD function counts as host code if we're compiling for host, and 679 // device code if we're compiling for device. Defer any errors in device 680 // mode until the function is known-emitted. 681 if (getLangOpts().CUDAIsDevice) 682 return DeviceDiagBuilder::K_Nop; 683 684 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 685 FunctionEmissionStatus::Emitted) 686 ? DeviceDiagBuilder::K_ImmediateWithCallStack 687 : DeviceDiagBuilder::K_Deferred; 688 default: 689 return DeviceDiagBuilder::K_Nop; 690 } 691 }(); 692 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 693 dyn_cast<FunctionDecl>(CurContext), *this); 694 } 695 696 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 697 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 698 assert(Callee && "Callee may not be null."); 699 700 auto &ExprEvalCtx = ExprEvalContexts.back(); 701 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 702 return true; 703 704 // FIXME: Is bailing out early correct here? Should we instead assume that 705 // the caller is a global initializer? 706 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 707 if (!Caller) 708 return true; 709 710 // If the caller is known-emitted, mark the callee as known-emitted. 711 // Otherwise, mark the call in our call graph so we can traverse it later. 712 bool CallerKnownEmitted = 713 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 714 DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, 715 CallerKnownEmitted] { 716 switch (IdentifyCUDAPreference(Caller, Callee)) { 717 case CFP_Never: 718 return DeviceDiagBuilder::K_Immediate; 719 case CFP_WrongSide: 720 assert(Caller && "WrongSide calls require a non-null caller"); 721 // If we know the caller will be emitted, we know this wrong-side call 722 // will be emitted, so it's an immediate error. Otherwise, defer the 723 // error until we know the caller is emitted. 724 return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack 725 : DeviceDiagBuilder::K_Deferred; 726 default: 727 return DeviceDiagBuilder::K_Nop; 728 } 729 }(); 730 731 if (DiagKind == DeviceDiagBuilder::K_Nop) 732 return true; 733 734 // Avoid emitting this error twice for the same location. Using a hashtable 735 // like this is unfortunate, but because we must continue parsing as normal 736 // after encountering a deferred error, it's otherwise very tricky for us to 737 // ensure that we only emit this deferred error once. 738 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 739 return true; 740 741 DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 742 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 743 DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 744 Caller, *this) 745 << Callee; 746 return DiagKind != DeviceDiagBuilder::K_Immediate && 747 DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; 748 } 749 750 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 751 // A lambda function may capture a stack variable by reference when it is 752 // defined and uses the capture by reference when the lambda is called. When 753 // the capture and use happen on different sides, the capture is invalid and 754 // should be diagnosed. 755 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, 756 const sema::Capture &Capture) { 757 // In host compilation we only need to check lambda functions emitted on host 758 // side. In such lambda functions, a reference capture is invalid only 759 // if the lambda structure is populated by a device function or kernel then 760 // is passed to and called by a host function. However that is impossible, 761 // since a device function or kernel can only call a device function, also a 762 // kernel cannot pass a lambda back to a host function since we cannot 763 // define a kernel argument type which can hold the lambda before the lambda 764 // itself is defined. 765 if (!LangOpts.CUDAIsDevice) 766 return; 767 768 // File-scope lambda can only do init captures for global variables, which 769 // results in passing by value for these global variables. 770 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 771 if (!Caller) 772 return; 773 774 // In device compilation, we only need to check lambda functions which are 775 // emitted on device side. For such lambdas, a reference capture is invalid 776 // only if the lambda structure is populated by a host function then passed 777 // to and called in a device function or kernel. 778 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 779 bool CallerIsHost = 780 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 781 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 782 if (!ShouldCheck || !Capture.isReferenceCapture()) 783 return; 784 auto DiagKind = DeviceDiagBuilder::K_Deferred; 785 if (Capture.isVariableCapture()) { 786 DeviceDiagBuilder(DiagKind, Capture.getLocation(), 787 diag::err_capture_bad_target, Callee, *this) 788 << Capture.getVariable(); 789 } else if (Capture.isThisCapture()) { 790 DeviceDiagBuilder(DiagKind, Capture.getLocation(), 791 diag::err_capture_bad_target_this_ptr, Callee, *this); 792 } 793 return; 794 } 795 796 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 797 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 798 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 799 return; 800 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 801 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 802 } 803 804 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 805 const LookupResult &Previous) { 806 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 807 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 808 for (NamedDecl *OldND : Previous) { 809 FunctionDecl *OldFD = OldND->getAsFunction(); 810 if (!OldFD) 811 continue; 812 813 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 814 // Don't allow HD and global functions to overload other functions with the 815 // same signature. We allow overloading based on CUDA attributes so that 816 // functions can have different implementations on the host and device, but 817 // HD/global functions "exist" in some sense on both the host and device, so 818 // should have the same implementation on both sides. 819 if (NewTarget != OldTarget && 820 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 821 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 822 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 823 /* ConsiderCudaAttrs = */ false)) { 824 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 825 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 826 Diag(OldFD->getLocation(), diag::note_previous_declaration); 827 NewFD->setInvalidDecl(); 828 break; 829 } 830 } 831 } 832 833 template <typename AttrTy> 834 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 835 const FunctionDecl &TemplateFD) { 836 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 837 AttrTy *Clone = Attribute->clone(S.Context); 838 Clone->setInherited(true); 839 FD->addAttr(Clone); 840 } 841 } 842 843 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 844 const FunctionTemplateDecl &TD) { 845 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 846 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 847 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 848 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 849 } 850 851 std::string Sema::getCudaConfigureFuncName() const { 852 if (getLangOpts().HIP) 853 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 854 : "hipConfigureCall"; 855 856 // New CUDA kernel launch sequence. 857 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 858 CudaFeature::CUDA_USES_NEW_LAUNCH)) 859 return "__cudaPushCallConfiguration"; 860 861 // Legacy CUDA kernel configuration call 862 return "cudaConfigureCall"; 863 } 864