1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/ScopeInfo.h" 21 #include "clang/Sema/Sema.h" 22 #include "clang/Sema/SemaDiagnostic.h" 23 #include "clang/Sema/SemaInternal.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/SmallVector.h" 27 using namespace clang; 28 29 void Sema::PushForceCUDAHostDevice() { 30 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 31 ForceCUDAHostDeviceDepth++; 32 } 33 34 bool Sema::PopForceCUDAHostDevice() { 35 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 36 if (ForceCUDAHostDeviceDepth == 0) 37 return false; 38 ForceCUDAHostDeviceDepth--; 39 return true; 40 } 41 42 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 43 MultiExprArg ExecConfig, 44 SourceLocation GGGLoc) { 45 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 46 if (!ConfigDecl) 47 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 48 << getCudaConfigureFuncName()); 49 QualType ConfigQTy = ConfigDecl->getType(); 50 51 DeclRefExpr *ConfigDR = new (Context) 52 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 53 MarkFunctionReferenced(LLLLoc, ConfigDecl); 54 55 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 56 /*IsExecConfig=*/true); 57 } 58 59 Sema::CUDAFunctionTarget 60 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 61 bool HasHostAttr = false; 62 bool HasDeviceAttr = false; 63 bool HasGlobalAttr = false; 64 bool HasInvalidTargetAttr = false; 65 for (const ParsedAttr &AL : Attrs) { 66 switch (AL.getKind()) { 67 case ParsedAttr::AT_CUDAGlobal: 68 HasGlobalAttr = true; 69 break; 70 case ParsedAttr::AT_CUDAHost: 71 HasHostAttr = true; 72 break; 73 case ParsedAttr::AT_CUDADevice: 74 HasDeviceAttr = true; 75 break; 76 case ParsedAttr::AT_CUDAInvalidTarget: 77 HasInvalidTargetAttr = true; 78 break; 79 default: 80 break; 81 } 82 } 83 84 if (HasInvalidTargetAttr) 85 return CFT_InvalidTarget; 86 87 if (HasGlobalAttr) 88 return CFT_Global; 89 90 if (HasHostAttr && HasDeviceAttr) 91 return CFT_HostDevice; 92 93 if (HasDeviceAttr) 94 return CFT_Device; 95 96 return CFT_Host; 97 } 98 99 template <typename A> 100 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 101 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 102 return isa<A>(Attribute) && 103 !(IgnoreImplicitAttr && Attribute->isImplicit()); 104 }); 105 } 106 107 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 108 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 109 bool IgnoreImplicitHDAttr) { 110 // Code that lives outside a function is run on the host. 111 if (D == nullptr) 112 return CFT_Host; 113 114 if (D->hasAttr<CUDAInvalidTargetAttr>()) 115 return CFT_InvalidTarget; 116 117 if (D->hasAttr<CUDAGlobalAttr>()) 118 return CFT_Global; 119 120 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 121 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 122 return CFT_HostDevice; 123 return CFT_Device; 124 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 125 return CFT_Host; 126 } else if ((D->isImplicit() || !D->isUserProvided()) && 127 !IgnoreImplicitHDAttr) { 128 // Some implicit declarations (like intrinsic functions) are not marked. 129 // Set the most lenient target on them for maximal flexibility. 130 return CFT_HostDevice; 131 } 132 133 return CFT_Host; 134 } 135 136 // * CUDA Call preference table 137 // 138 // F - from, 139 // T - to 140 // Ph - preference in host mode 141 // Pd - preference in device mode 142 // H - handled in (x) 143 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 144 // 145 // | F | T | Ph | Pd | H | 146 // |----+----+-----+-----+-----+ 147 // | d | d | N | N | (c) | 148 // | d | g | -- | -- | (a) | 149 // | d | h | -- | -- | (e) | 150 // | d | hd | HD | HD | (b) | 151 // | g | d | N | N | (c) | 152 // | g | g | -- | -- | (a) | 153 // | g | h | -- | -- | (e) | 154 // | g | hd | HD | HD | (b) | 155 // | h | d | -- | -- | (e) | 156 // | h | g | N | N | (c) | 157 // | h | h | N | N | (c) | 158 // | h | hd | HD | HD | (b) | 159 // | hd | d | WS | SS | (d) | 160 // | hd | g | SS | -- |(d/a)| 161 // | hd | h | SS | WS | (d) | 162 // | hd | hd | HD | HD | (b) | 163 164 Sema::CUDAFunctionPreference 165 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 166 const FunctionDecl *Callee) { 167 assert(Callee && "Callee must be valid."); 168 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 169 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 170 171 // If one of the targets is invalid, the check always fails, no matter what 172 // the other target is. 173 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 174 return CFP_Never; 175 176 // (a) Can't call global from some contexts until we support CUDA's 177 // dynamic parallelism. 178 if (CalleeTarget == CFT_Global && 179 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 180 return CFP_Never; 181 182 // (b) Calling HostDevice is OK for everyone. 183 if (CalleeTarget == CFT_HostDevice) 184 return CFP_HostDevice; 185 186 // (c) Best case scenarios 187 if (CalleeTarget == CallerTarget || 188 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 189 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 190 return CFP_Native; 191 192 // (d) HostDevice behavior depends on compilation mode. 193 if (CallerTarget == CFT_HostDevice) { 194 // It's OK to call a compilation-mode matching function from an HD one. 195 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 196 (!getLangOpts().CUDAIsDevice && 197 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 198 return CFP_SameSide; 199 200 // Calls from HD to non-mode-matching functions (i.e., to host functions 201 // when compiling in device mode or to device functions when compiling in 202 // host mode) are allowed at the sema level, but eventually rejected if 203 // they're ever codegened. TODO: Reject said calls earlier. 204 return CFP_WrongSide; 205 } 206 207 // (e) Calling across device/host boundary is not something you should do. 208 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 209 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 210 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 211 return CFP_Never; 212 213 llvm_unreachable("All cases should've been handled by now."); 214 } 215 216 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 217 if (!D) 218 return false; 219 if (auto *A = D->getAttr<AttrT>()) 220 return A->isImplicit(); 221 return D->isImplicit(); 222 } 223 224 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 225 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 226 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 227 return IsImplicitDevAttr && IsImplicitHostAttr; 228 } 229 230 void Sema::EraseUnwantedCUDAMatches( 231 const FunctionDecl *Caller, 232 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 233 if (Matches.size() <= 1) 234 return; 235 236 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 237 238 // Gets the CUDA function preference for a call from Caller to Match. 239 auto GetCFP = [&](const Pair &Match) { 240 return IdentifyCUDAPreference(Caller, Match.second); 241 }; 242 243 // Find the best call preference among the functions in Matches. 244 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 245 Matches.begin(), Matches.end(), 246 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 247 248 // Erase all functions with lower priority. 249 llvm::erase_if(Matches, 250 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 251 } 252 253 /// When an implicitly-declared special member has to invoke more than one 254 /// base/field special member, conflicts may occur in the targets of these 255 /// members. For example, if one base's member __host__ and another's is 256 /// __device__, it's a conflict. 257 /// This function figures out if the given targets \param Target1 and 258 /// \param Target2 conflict, and if they do not it fills in 259 /// \param ResolvedTarget with a target that resolves for both calls. 260 /// \return true if there's a conflict, false otherwise. 261 static bool 262 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 263 Sema::CUDAFunctionTarget Target2, 264 Sema::CUDAFunctionTarget *ResolvedTarget) { 265 // Only free functions and static member functions may be global. 266 assert(Target1 != Sema::CFT_Global); 267 assert(Target2 != Sema::CFT_Global); 268 269 if (Target1 == Sema::CFT_HostDevice) { 270 *ResolvedTarget = Target2; 271 } else if (Target2 == Sema::CFT_HostDevice) { 272 *ResolvedTarget = Target1; 273 } else if (Target1 != Target2) { 274 return true; 275 } else { 276 *ResolvedTarget = Target1; 277 } 278 279 return false; 280 } 281 282 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 283 CXXSpecialMember CSM, 284 CXXMethodDecl *MemberDecl, 285 bool ConstRHS, 286 bool Diagnose) { 287 // If the defaulted special member is defined lexically outside of its 288 // owning class, or the special member already has explicit device or host 289 // attributes, do not infer. 290 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 291 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 292 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 293 bool HasExplicitAttr = 294 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 295 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 296 if (!InClass || HasExplicitAttr) 297 return false; 298 299 llvm::Optional<CUDAFunctionTarget> InferredTarget; 300 301 // We're going to invoke special member lookup; mark that these special 302 // members are called from this one, and not from its caller. 303 ContextRAII MethodContext(*this, MemberDecl); 304 305 // Look for special members in base classes that should be invoked from here. 306 // Infer the target of this member base on the ones it should call. 307 // Skip direct and indirect virtual bases for abstract classes. 308 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 309 for (const auto &B : ClassDecl->bases()) { 310 if (!B.isVirtual()) { 311 Bases.push_back(&B); 312 } 313 } 314 315 if (!ClassDecl->isAbstract()) { 316 for (const auto &VB : ClassDecl->vbases()) { 317 Bases.push_back(&VB); 318 } 319 } 320 321 for (const auto *B : Bases) { 322 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 323 if (!BaseType) { 324 continue; 325 } 326 327 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 328 Sema::SpecialMemberOverloadResult SMOR = 329 LookupSpecialMember(BaseClassDecl, CSM, 330 /* ConstArg */ ConstRHS, 331 /* VolatileArg */ false, 332 /* RValueThis */ false, 333 /* ConstThis */ false, 334 /* VolatileThis */ false); 335 336 if (!SMOR.getMethod()) 337 continue; 338 339 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 340 if (!InferredTarget.hasValue()) { 341 InferredTarget = BaseMethodTarget; 342 } else { 343 bool ResolutionError = resolveCalleeCUDATargetConflict( 344 InferredTarget.getValue(), BaseMethodTarget, 345 InferredTarget.getPointer()); 346 if (ResolutionError) { 347 if (Diagnose) { 348 Diag(ClassDecl->getLocation(), 349 diag::note_implicit_member_target_infer_collision) 350 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 351 } 352 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 353 return true; 354 } 355 } 356 } 357 358 // Same as for bases, but now for special members of fields. 359 for (const auto *F : ClassDecl->fields()) { 360 if (F->isInvalidDecl()) { 361 continue; 362 } 363 364 const RecordType *FieldType = 365 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 366 if (!FieldType) { 367 continue; 368 } 369 370 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 371 Sema::SpecialMemberOverloadResult SMOR = 372 LookupSpecialMember(FieldRecDecl, CSM, 373 /* ConstArg */ ConstRHS && !F->isMutable(), 374 /* VolatileArg */ false, 375 /* RValueThis */ false, 376 /* ConstThis */ false, 377 /* VolatileThis */ false); 378 379 if (!SMOR.getMethod()) 380 continue; 381 382 CUDAFunctionTarget FieldMethodTarget = 383 IdentifyCUDATarget(SMOR.getMethod()); 384 if (!InferredTarget.hasValue()) { 385 InferredTarget = FieldMethodTarget; 386 } else { 387 bool ResolutionError = resolveCalleeCUDATargetConflict( 388 InferredTarget.getValue(), FieldMethodTarget, 389 InferredTarget.getPointer()); 390 if (ResolutionError) { 391 if (Diagnose) { 392 Diag(ClassDecl->getLocation(), 393 diag::note_implicit_member_target_infer_collision) 394 << (unsigned)CSM << InferredTarget.getValue() 395 << FieldMethodTarget; 396 } 397 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 398 return true; 399 } 400 } 401 } 402 403 404 // If no target was inferred, mark this member as __host__ __device__; 405 // it's the least restrictive option that can be invoked from any target. 406 bool NeedsH = true, NeedsD = true; 407 if (InferredTarget.hasValue()) { 408 if (InferredTarget.getValue() == CFT_Device) 409 NeedsH = false; 410 else if (InferredTarget.getValue() == CFT_Host) 411 NeedsD = false; 412 } 413 414 // We either setting attributes first time, or the inferred ones must match 415 // previously set ones. 416 if (NeedsD && !HasD) 417 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 418 if (NeedsH && !HasH) 419 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 420 421 return false; 422 } 423 424 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 425 if (!CD->isDefined() && CD->isTemplateInstantiation()) 426 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 427 428 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 429 // empty at a point in the translation unit, if it is either a 430 // trivial constructor 431 if (CD->isTrivial()) 432 return true; 433 434 // ... or it satisfies all of the following conditions: 435 // The constructor function has been defined. 436 // The constructor function has no parameters, 437 // and the function body is an empty compound statement. 438 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 439 return false; 440 441 // Its class has no virtual functions and no virtual base classes. 442 if (CD->getParent()->isDynamicClass()) 443 return false; 444 445 // Union ctor does not call ctors of its data members. 446 if (CD->getParent()->isUnion()) 447 return true; 448 449 // The only form of initializer allowed is an empty constructor. 450 // This will recursively check all base classes and member initializers 451 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 452 if (const CXXConstructExpr *CE = 453 dyn_cast<CXXConstructExpr>(CI->getInit())) 454 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 455 return false; 456 })) 457 return false; 458 459 return true; 460 } 461 462 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 463 // No destructor -> no problem. 464 if (!DD) 465 return true; 466 467 if (!DD->isDefined() && DD->isTemplateInstantiation()) 468 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 469 470 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 471 // empty at a point in the translation unit, if it is either a 472 // trivial constructor 473 if (DD->isTrivial()) 474 return true; 475 476 // ... or it satisfies all of the following conditions: 477 // The destructor function has been defined. 478 // and the function body is an empty compound statement. 479 if (!DD->hasTrivialBody()) 480 return false; 481 482 const CXXRecordDecl *ClassDecl = DD->getParent(); 483 484 // Its class has no virtual functions and no virtual base classes. 485 if (ClassDecl->isDynamicClass()) 486 return false; 487 488 // Union does not have base class and union dtor does not call dtors of its 489 // data members. 490 if (DD->getParent()->isUnion()) 491 return true; 492 493 // Only empty destructors are allowed. This will recursively check 494 // destructors for all base classes... 495 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 496 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 497 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 498 return true; 499 })) 500 return false; 501 502 // ... and member fields. 503 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 504 if (CXXRecordDecl *RD = Field->getType() 505 ->getBaseElementTypeUnsafe() 506 ->getAsCXXRecordDecl()) 507 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 508 return true; 509 })) 510 return false; 511 512 return true; 513 } 514 515 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 516 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 517 return; 518 const Expr *Init = VD->getInit(); 519 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 520 VD->hasAttr<CUDASharedAttr>()) { 521 if (LangOpts.GPUAllowDeviceInit) 522 return; 523 bool AllowedInit = false; 524 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 525 AllowedInit = 526 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 527 // We'll allow constant initializers even if it's a non-empty 528 // constructor according to CUDA rules. This deviates from NVCC, 529 // but allows us to handle things like constexpr constructors. 530 if (!AllowedInit && 531 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) { 532 auto *Init = VD->getInit(); 533 AllowedInit = 534 ((VD->getType()->isDependentType() || Init->isValueDependent()) && 535 VD->isConstexpr()) || 536 Init->isConstantInitializer(Context, 537 VD->getType()->isReferenceType()); 538 } 539 540 // Also make sure that destructor, if there is one, is empty. 541 if (AllowedInit) 542 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 543 AllowedInit = 544 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 545 546 if (!AllowedInit) { 547 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 548 ? diag::err_shared_var_init 549 : diag::err_dynamic_var_init) 550 << Init->getSourceRange(); 551 VD->setInvalidDecl(); 552 } 553 } else { 554 // This is a host-side global variable. Check that the initializer is 555 // callable from the host side. 556 const FunctionDecl *InitFn = nullptr; 557 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 558 InitFn = CE->getConstructor(); 559 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 560 InitFn = CE->getDirectCallee(); 561 } 562 if (InitFn) { 563 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 564 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 565 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 566 << InitFnTarget << InitFn; 567 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 568 VD->setInvalidDecl(); 569 } 570 } 571 } 572 } 573 574 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 575 // treated as implicitly __host__ __device__, unless: 576 // * it is a variadic function (device-side variadic functions are not 577 // allowed), or 578 // * a __device__ function with this signature was already declared, in which 579 // case in which case we output an error, unless the __device__ decl is in a 580 // system header, in which case we leave the constexpr function unattributed. 581 // 582 // In addition, all function decls are treated as __host__ __device__ when 583 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 584 // #pragma clang force_cuda_host_device_begin/end 585 // pair). 586 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 587 const LookupResult &Previous) { 588 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 589 590 if (ForceCUDAHostDeviceDepth > 0) { 591 if (!NewD->hasAttr<CUDAHostAttr>()) 592 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 593 if (!NewD->hasAttr<CUDADeviceAttr>()) 594 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 595 return; 596 } 597 598 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 599 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 600 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 601 return; 602 603 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 604 // attributes? 605 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 606 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 607 D = Using->getTargetDecl(); 608 FunctionDecl *OldD = D->getAsFunction(); 609 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 610 !OldD->hasAttr<CUDAHostAttr>() && 611 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 612 /* ConsiderCudaAttrs = */ false); 613 }; 614 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 615 if (It != Previous.end()) { 616 // We found a __device__ function with the same name and signature as NewD 617 // (ignoring CUDA attrs). This is an error unless that function is defined 618 // in a system header, in which case we simply return without making NewD 619 // host+device. 620 NamedDecl *Match = *It; 621 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 622 Diag(NewD->getLocation(), 623 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 624 << NewD; 625 Diag(Match->getLocation(), 626 diag::note_cuda_conflicting_device_function_declared_here); 627 } 628 return; 629 } 630 631 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 632 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 633 } 634 635 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { 636 if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && 637 (VD->isFileVarDecl() || VD->isStaticDataMember())) { 638 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 639 } 640 } 641 642 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 643 unsigned DiagID) { 644 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 645 SemaDiagnosticBuilder::Kind DiagKind = [&] { 646 if (!isa<FunctionDecl>(CurContext)) 647 return SemaDiagnosticBuilder::K_Nop; 648 switch (CurrentCUDATarget()) { 649 case CFT_Global: 650 case CFT_Device: 651 return SemaDiagnosticBuilder::K_Immediate; 652 case CFT_HostDevice: 653 // An HD function counts as host code if we're compiling for host, and 654 // device code if we're compiling for device. Defer any errors in device 655 // mode until the function is known-emitted. 656 if (!getLangOpts().CUDAIsDevice) 657 return SemaDiagnosticBuilder::K_Nop; 658 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 659 return SemaDiagnosticBuilder::K_Immediate; 660 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 661 FunctionEmissionStatus::Emitted) 662 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 663 : SemaDiagnosticBuilder::K_Deferred; 664 default: 665 return SemaDiagnosticBuilder::K_Nop; 666 } 667 }(); 668 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 669 dyn_cast<FunctionDecl>(CurContext), *this); 670 } 671 672 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 673 unsigned DiagID) { 674 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 675 SemaDiagnosticBuilder::Kind DiagKind = [&] { 676 if (!isa<FunctionDecl>(CurContext)) 677 return SemaDiagnosticBuilder::K_Nop; 678 switch (CurrentCUDATarget()) { 679 case CFT_Host: 680 return SemaDiagnosticBuilder::K_Immediate; 681 case CFT_HostDevice: 682 // An HD function counts as host code if we're compiling for host, and 683 // device code if we're compiling for device. Defer any errors in device 684 // mode until the function is known-emitted. 685 if (getLangOpts().CUDAIsDevice) 686 return SemaDiagnosticBuilder::K_Nop; 687 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 688 return SemaDiagnosticBuilder::K_Immediate; 689 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 690 FunctionEmissionStatus::Emitted) 691 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 692 : SemaDiagnosticBuilder::K_Deferred; 693 default: 694 return SemaDiagnosticBuilder::K_Nop; 695 } 696 }(); 697 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 698 dyn_cast<FunctionDecl>(CurContext), *this); 699 } 700 701 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 702 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 703 assert(Callee && "Callee may not be null."); 704 705 auto &ExprEvalCtx = ExprEvalContexts.back(); 706 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 707 return true; 708 709 // FIXME: Is bailing out early correct here? Should we instead assume that 710 // the caller is a global initializer? 711 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 712 if (!Caller) 713 return true; 714 715 // If the caller is known-emitted, mark the callee as known-emitted. 716 // Otherwise, mark the call in our call graph so we can traverse it later. 717 bool CallerKnownEmitted = 718 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 719 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 720 CallerKnownEmitted] { 721 switch (IdentifyCUDAPreference(Caller, Callee)) { 722 case CFP_Never: 723 case CFP_WrongSide: 724 assert(Caller && "Never/wrongSide calls require a non-null caller"); 725 // If we know the caller will be emitted, we know this wrong-side call 726 // will be emitted, so it's an immediate error. Otherwise, defer the 727 // error until we know the caller is emitted. 728 return CallerKnownEmitted 729 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 730 : SemaDiagnosticBuilder::K_Deferred; 731 default: 732 return SemaDiagnosticBuilder::K_Nop; 733 } 734 }(); 735 736 if (DiagKind == SemaDiagnosticBuilder::K_Nop) 737 return true; 738 739 // Avoid emitting this error twice for the same location. Using a hashtable 740 // like this is unfortunate, but because we must continue parsing as normal 741 // after encountering a deferred error, it's otherwise very tricky for us to 742 // ensure that we only emit this deferred error once. 743 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 744 return true; 745 746 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 747 << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee 748 << IdentifyCUDATarget(Caller); 749 if (!Callee->getBuiltinID()) 750 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 751 diag::note_previous_decl, Caller, *this) 752 << Callee; 753 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 754 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 755 } 756 757 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 758 // A lambda function may capture a stack variable by reference when it is 759 // defined and uses the capture by reference when the lambda is called. When 760 // the capture and use happen on different sides, the capture is invalid and 761 // should be diagnosed. 762 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, 763 const sema::Capture &Capture) { 764 // In host compilation we only need to check lambda functions emitted on host 765 // side. In such lambda functions, a reference capture is invalid only 766 // if the lambda structure is populated by a device function or kernel then 767 // is passed to and called by a host function. However that is impossible, 768 // since a device function or kernel can only call a device function, also a 769 // kernel cannot pass a lambda back to a host function since we cannot 770 // define a kernel argument type which can hold the lambda before the lambda 771 // itself is defined. 772 if (!LangOpts.CUDAIsDevice) 773 return; 774 775 // File-scope lambda can only do init captures for global variables, which 776 // results in passing by value for these global variables. 777 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 778 if (!Caller) 779 return; 780 781 // In device compilation, we only need to check lambda functions which are 782 // emitted on device side. For such lambdas, a reference capture is invalid 783 // only if the lambda structure is populated by a host function then passed 784 // to and called in a device function or kernel. 785 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 786 bool CallerIsHost = 787 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 788 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 789 if (!ShouldCheck || !Capture.isReferenceCapture()) 790 return; 791 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 792 if (Capture.isVariableCapture()) { 793 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 794 diag::err_capture_bad_target, Callee, *this) 795 << Capture.getVariable(); 796 } else if (Capture.isThisCapture()) { 797 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 798 diag::err_capture_bad_target_this_ptr, Callee, *this); 799 } 800 return; 801 } 802 803 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 804 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 805 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 806 return; 807 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 808 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 809 } 810 811 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 812 const LookupResult &Previous) { 813 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 814 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 815 for (NamedDecl *OldND : Previous) { 816 FunctionDecl *OldFD = OldND->getAsFunction(); 817 if (!OldFD) 818 continue; 819 820 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 821 // Don't allow HD and global functions to overload other functions with the 822 // same signature. We allow overloading based on CUDA attributes so that 823 // functions can have different implementations on the host and device, but 824 // HD/global functions "exist" in some sense on both the host and device, so 825 // should have the same implementation on both sides. 826 if (NewTarget != OldTarget && 827 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 828 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 829 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 830 /* ConsiderCudaAttrs = */ false)) { 831 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 832 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 833 Diag(OldFD->getLocation(), diag::note_previous_declaration); 834 NewFD->setInvalidDecl(); 835 break; 836 } 837 } 838 } 839 840 template <typename AttrTy> 841 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 842 const FunctionDecl &TemplateFD) { 843 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 844 AttrTy *Clone = Attribute->clone(S.Context); 845 Clone->setInherited(true); 846 FD->addAttr(Clone); 847 } 848 } 849 850 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 851 const FunctionTemplateDecl &TD) { 852 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 853 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 854 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 855 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 856 } 857 858 std::string Sema::getCudaConfigureFuncName() const { 859 if (getLangOpts().HIP) 860 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 861 : "hipConfigureCall"; 862 863 // New CUDA kernel launch sequence. 864 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 865 CudaFeature::CUDA_USES_NEW_LAUNCH)) 866 return "__cudaPushCallConfiguration"; 867 868 // Legacy CUDA kernel configuration call 869 return "cudaConfigureCall"; 870 } 871