1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46 << getCudaConfigureFuncName()); 47 QualType ConfigQTy = ConfigDecl->getType(); 48 49 DeclRefExpr *ConfigDR = new (Context) 50 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51 MarkFunctionReferenced(LLLLoc, ConfigDecl); 52 53 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54 /*IsExecConfig=*/true); 55 } 56 57 Sema::CUDAFunctionTarget 58 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 59 bool HasHostAttr = false; 60 bool HasDeviceAttr = false; 61 bool HasGlobalAttr = false; 62 bool HasInvalidTargetAttr = false; 63 for (const ParsedAttr &AL : Attrs) { 64 switch (AL.getKind()) { 65 case ParsedAttr::AT_CUDAGlobal: 66 HasGlobalAttr = true; 67 break; 68 case ParsedAttr::AT_CUDAHost: 69 HasHostAttr = true; 70 break; 71 case ParsedAttr::AT_CUDADevice: 72 HasDeviceAttr = true; 73 break; 74 case ParsedAttr::AT_CUDAInvalidTarget: 75 HasInvalidTargetAttr = true; 76 break; 77 default: 78 break; 79 } 80 } 81 82 if (HasInvalidTargetAttr) 83 return CFT_InvalidTarget; 84 85 if (HasGlobalAttr) 86 return CFT_Global; 87 88 if (HasHostAttr && HasDeviceAttr) 89 return CFT_HostDevice; 90 91 if (HasDeviceAttr) 92 return CFT_Device; 93 94 return CFT_Host; 95 } 96 97 template <typename A> 98 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 99 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 100 return isa<A>(Attribute) && 101 !(IgnoreImplicitAttr && Attribute->isImplicit()); 102 }); 103 } 104 105 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 106 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 107 bool IgnoreImplicitHDAttr) { 108 // Code that lives outside a function is run on the host. 109 if (D == nullptr) 110 return CFT_Host; 111 112 if (D->hasAttr<CUDAInvalidTargetAttr>()) 113 return CFT_InvalidTarget; 114 115 if (D->hasAttr<CUDAGlobalAttr>()) 116 return CFT_Global; 117 118 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 119 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 120 return CFT_HostDevice; 121 return CFT_Device; 122 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 123 return CFT_Host; 124 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 125 // Some implicit declarations (like intrinsic functions) are not marked. 126 // Set the most lenient target on them for maximal flexibility. 127 return CFT_HostDevice; 128 } 129 130 return CFT_Host; 131 } 132 133 // * CUDA Call preference table 134 // 135 // F - from, 136 // T - to 137 // Ph - preference in host mode 138 // Pd - preference in device mode 139 // H - handled in (x) 140 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 141 // 142 // | F | T | Ph | Pd | H | 143 // |----+----+-----+-----+-----+ 144 // | d | d | N | N | (c) | 145 // | d | g | -- | -- | (a) | 146 // | d | h | -- | -- | (e) | 147 // | d | hd | HD | HD | (b) | 148 // | g | d | N | N | (c) | 149 // | g | g | -- | -- | (a) | 150 // | g | h | -- | -- | (e) | 151 // | g | hd | HD | HD | (b) | 152 // | h | d | -- | -- | (e) | 153 // | h | g | N | N | (c) | 154 // | h | h | N | N | (c) | 155 // | h | hd | HD | HD | (b) | 156 // | hd | d | WS | SS | (d) | 157 // | hd | g | SS | -- |(d/a)| 158 // | hd | h | SS | WS | (d) | 159 // | hd | hd | HD | HD | (b) | 160 161 Sema::CUDAFunctionPreference 162 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 163 const FunctionDecl *Callee) { 164 assert(Callee && "Callee must be valid."); 165 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 166 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 167 168 // If one of the targets is invalid, the check always fails, no matter what 169 // the other target is. 170 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 171 return CFP_Never; 172 173 // (a) Can't call global from some contexts until we support CUDA's 174 // dynamic parallelism. 175 if (CalleeTarget == CFT_Global && 176 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 177 return CFP_Never; 178 179 // (b) Calling HostDevice is OK for everyone. 180 if (CalleeTarget == CFT_HostDevice) 181 return CFP_HostDevice; 182 183 // (c) Best case scenarios 184 if (CalleeTarget == CallerTarget || 185 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 186 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 187 return CFP_Native; 188 189 // (d) HostDevice behavior depends on compilation mode. 190 if (CallerTarget == CFT_HostDevice) { 191 // It's OK to call a compilation-mode matching function from an HD one. 192 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 193 (!getLangOpts().CUDAIsDevice && 194 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 195 return CFP_SameSide; 196 197 // Calls from HD to non-mode-matching functions (i.e., to host functions 198 // when compiling in device mode or to device functions when compiling in 199 // host mode) are allowed at the sema level, but eventually rejected if 200 // they're ever codegened. TODO: Reject said calls earlier. 201 return CFP_WrongSide; 202 } 203 204 // (e) Calling across device/host boundary is not something you should do. 205 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 206 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 207 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 208 return CFP_Never; 209 210 llvm_unreachable("All cases should've been handled by now."); 211 } 212 213 void Sema::EraseUnwantedCUDAMatches( 214 const FunctionDecl *Caller, 215 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 216 if (Matches.size() <= 1) 217 return; 218 219 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 220 221 // Gets the CUDA function preference for a call from Caller to Match. 222 auto GetCFP = [&](const Pair &Match) { 223 return IdentifyCUDAPreference(Caller, Match.second); 224 }; 225 226 // Find the best call preference among the functions in Matches. 227 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 228 Matches.begin(), Matches.end(), 229 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 230 231 // Erase all functions with lower priority. 232 llvm::erase_if(Matches, 233 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 234 } 235 236 /// When an implicitly-declared special member has to invoke more than one 237 /// base/field special member, conflicts may occur in the targets of these 238 /// members. For example, if one base's member __host__ and another's is 239 /// __device__, it's a conflict. 240 /// This function figures out if the given targets \param Target1 and 241 /// \param Target2 conflict, and if they do not it fills in 242 /// \param ResolvedTarget with a target that resolves for both calls. 243 /// \return true if there's a conflict, false otherwise. 244 static bool 245 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 246 Sema::CUDAFunctionTarget Target2, 247 Sema::CUDAFunctionTarget *ResolvedTarget) { 248 // Only free functions and static member functions may be global. 249 assert(Target1 != Sema::CFT_Global); 250 assert(Target2 != Sema::CFT_Global); 251 252 if (Target1 == Sema::CFT_HostDevice) { 253 *ResolvedTarget = Target2; 254 } else if (Target2 == Sema::CFT_HostDevice) { 255 *ResolvedTarget = Target1; 256 } else if (Target1 != Target2) { 257 return true; 258 } else { 259 *ResolvedTarget = Target1; 260 } 261 262 return false; 263 } 264 265 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 266 CXXSpecialMember CSM, 267 CXXMethodDecl *MemberDecl, 268 bool ConstRHS, 269 bool Diagnose) { 270 // If the defaulted special member is defined lexically outside of its 271 // owning class, or the special member already has explicit device or host 272 // attributes, do not infer. 273 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 274 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 275 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 276 bool HasExplicitAttr = 277 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 278 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 279 if (!InClass || HasExplicitAttr) 280 return false; 281 282 llvm::Optional<CUDAFunctionTarget> InferredTarget; 283 284 // We're going to invoke special member lookup; mark that these special 285 // members are called from this one, and not from its caller. 286 ContextRAII MethodContext(*this, MemberDecl); 287 288 // Look for special members in base classes that should be invoked from here. 289 // Infer the target of this member base on the ones it should call. 290 // Skip direct and indirect virtual bases for abstract classes. 291 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 292 for (const auto &B : ClassDecl->bases()) { 293 if (!B.isVirtual()) { 294 Bases.push_back(&B); 295 } 296 } 297 298 if (!ClassDecl->isAbstract()) { 299 for (const auto &VB : ClassDecl->vbases()) { 300 Bases.push_back(&VB); 301 } 302 } 303 304 for (const auto *B : Bases) { 305 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 306 if (!BaseType) { 307 continue; 308 } 309 310 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 311 Sema::SpecialMemberOverloadResult SMOR = 312 LookupSpecialMember(BaseClassDecl, CSM, 313 /* ConstArg */ ConstRHS, 314 /* VolatileArg */ false, 315 /* RValueThis */ false, 316 /* ConstThis */ false, 317 /* VolatileThis */ false); 318 319 if (!SMOR.getMethod()) 320 continue; 321 322 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 323 if (!InferredTarget.hasValue()) { 324 InferredTarget = BaseMethodTarget; 325 } else { 326 bool ResolutionError = resolveCalleeCUDATargetConflict( 327 InferredTarget.getValue(), BaseMethodTarget, 328 InferredTarget.getPointer()); 329 if (ResolutionError) { 330 if (Diagnose) { 331 Diag(ClassDecl->getLocation(), 332 diag::note_implicit_member_target_infer_collision) 333 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 334 } 335 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 336 return true; 337 } 338 } 339 } 340 341 // Same as for bases, but now for special members of fields. 342 for (const auto *F : ClassDecl->fields()) { 343 if (F->isInvalidDecl()) { 344 continue; 345 } 346 347 const RecordType *FieldType = 348 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 349 if (!FieldType) { 350 continue; 351 } 352 353 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 354 Sema::SpecialMemberOverloadResult SMOR = 355 LookupSpecialMember(FieldRecDecl, CSM, 356 /* ConstArg */ ConstRHS && !F->isMutable(), 357 /* VolatileArg */ false, 358 /* RValueThis */ false, 359 /* ConstThis */ false, 360 /* VolatileThis */ false); 361 362 if (!SMOR.getMethod()) 363 continue; 364 365 CUDAFunctionTarget FieldMethodTarget = 366 IdentifyCUDATarget(SMOR.getMethod()); 367 if (!InferredTarget.hasValue()) { 368 InferredTarget = FieldMethodTarget; 369 } else { 370 bool ResolutionError = resolveCalleeCUDATargetConflict( 371 InferredTarget.getValue(), FieldMethodTarget, 372 InferredTarget.getPointer()); 373 if (ResolutionError) { 374 if (Diagnose) { 375 Diag(ClassDecl->getLocation(), 376 diag::note_implicit_member_target_infer_collision) 377 << (unsigned)CSM << InferredTarget.getValue() 378 << FieldMethodTarget; 379 } 380 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 381 return true; 382 } 383 } 384 } 385 386 387 // If no target was inferred, mark this member as __host__ __device__; 388 // it's the least restrictive option that can be invoked from any target. 389 bool NeedsH = true, NeedsD = true; 390 if (InferredTarget.hasValue()) { 391 if (InferredTarget.getValue() == CFT_Device) 392 NeedsH = false; 393 else if (InferredTarget.getValue() == CFT_Host) 394 NeedsD = false; 395 } 396 397 // We either setting attributes first time, or the inferred ones must match 398 // previously set ones. 399 if (NeedsD && !HasD) 400 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 401 if (NeedsH && !HasH) 402 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 403 404 return false; 405 } 406 407 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 408 if (!CD->isDefined() && CD->isTemplateInstantiation()) 409 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 410 411 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 412 // empty at a point in the translation unit, if it is either a 413 // trivial constructor 414 if (CD->isTrivial()) 415 return true; 416 417 // ... or it satisfies all of the following conditions: 418 // The constructor function has been defined. 419 // The constructor function has no parameters, 420 // and the function body is an empty compound statement. 421 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 422 return false; 423 424 // Its class has no virtual functions and no virtual base classes. 425 if (CD->getParent()->isDynamicClass()) 426 return false; 427 428 // The only form of initializer allowed is an empty constructor. 429 // This will recursively check all base classes and member initializers 430 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 431 if (const CXXConstructExpr *CE = 432 dyn_cast<CXXConstructExpr>(CI->getInit())) 433 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 434 return false; 435 })) 436 return false; 437 438 return true; 439 } 440 441 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 442 // No destructor -> no problem. 443 if (!DD) 444 return true; 445 446 if (!DD->isDefined() && DD->isTemplateInstantiation()) 447 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 448 449 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 450 // empty at a point in the translation unit, if it is either a 451 // trivial constructor 452 if (DD->isTrivial()) 453 return true; 454 455 // ... or it satisfies all of the following conditions: 456 // The destructor function has been defined. 457 // and the function body is an empty compound statement. 458 if (!DD->hasTrivialBody()) 459 return false; 460 461 const CXXRecordDecl *ClassDecl = DD->getParent(); 462 463 // Its class has no virtual functions and no virtual base classes. 464 if (ClassDecl->isDynamicClass()) 465 return false; 466 467 // Only empty destructors are allowed. This will recursively check 468 // destructors for all base classes... 469 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 470 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 471 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 472 return true; 473 })) 474 return false; 475 476 // ... and member fields. 477 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 478 if (CXXRecordDecl *RD = Field->getType() 479 ->getBaseElementTypeUnsafe() 480 ->getAsCXXRecordDecl()) 481 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 482 return true; 483 })) 484 return false; 485 486 return true; 487 } 488 489 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 490 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 491 return; 492 const Expr *Init = VD->getInit(); 493 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 494 VD->hasAttr<CUDASharedAttr>()) { 495 if (LangOpts.GPUAllowDeviceInit) 496 return; 497 assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 498 bool AllowedInit = false; 499 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 500 AllowedInit = 501 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 502 // We'll allow constant initializers even if it's a non-empty 503 // constructor according to CUDA rules. This deviates from NVCC, 504 // but allows us to handle things like constexpr constructors. 505 if (!AllowedInit && 506 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) 507 AllowedInit = VD->getInit()->isConstantInitializer( 508 Context, VD->getType()->isReferenceType()); 509 510 // Also make sure that destructor, if there is one, is empty. 511 if (AllowedInit) 512 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 513 AllowedInit = 514 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 515 516 if (!AllowedInit) { 517 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 518 ? diag::err_shared_var_init 519 : diag::err_dynamic_var_init) 520 << Init->getSourceRange(); 521 VD->setInvalidDecl(); 522 } 523 } else { 524 // This is a host-side global variable. Check that the initializer is 525 // callable from the host side. 526 const FunctionDecl *InitFn = nullptr; 527 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 528 InitFn = CE->getConstructor(); 529 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 530 InitFn = CE->getDirectCallee(); 531 } 532 if (InitFn) { 533 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 534 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 535 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 536 << InitFnTarget << InitFn; 537 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 538 VD->setInvalidDecl(); 539 } 540 } 541 } 542 } 543 544 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 545 // treated as implicitly __host__ __device__, unless: 546 // * it is a variadic function (device-side variadic functions are not 547 // allowed), or 548 // * a __device__ function with this signature was already declared, in which 549 // case in which case we output an error, unless the __device__ decl is in a 550 // system header, in which case we leave the constexpr function unattributed. 551 // 552 // In addition, all function decls are treated as __host__ __device__ when 553 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 554 // #pragma clang force_cuda_host_device_begin/end 555 // pair). 556 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 557 const LookupResult &Previous) { 558 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 559 560 if (ForceCUDAHostDeviceDepth > 0) { 561 if (!NewD->hasAttr<CUDAHostAttr>()) 562 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 563 if (!NewD->hasAttr<CUDADeviceAttr>()) 564 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 565 return; 566 } 567 568 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 569 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 570 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 571 return; 572 573 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 574 // attributes? 575 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 576 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 577 D = Using->getTargetDecl(); 578 FunctionDecl *OldD = D->getAsFunction(); 579 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 580 !OldD->hasAttr<CUDAHostAttr>() && 581 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 582 /* ConsiderCudaAttrs = */ false); 583 }; 584 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 585 if (It != Previous.end()) { 586 // We found a __device__ function with the same name and signature as NewD 587 // (ignoring CUDA attrs). This is an error unless that function is defined 588 // in a system header, in which case we simply return without making NewD 589 // host+device. 590 NamedDecl *Match = *It; 591 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 592 Diag(NewD->getLocation(), 593 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 594 << NewD; 595 Diag(Match->getLocation(), 596 diag::note_cuda_conflicting_device_function_declared_here); 597 } 598 return; 599 } 600 601 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 602 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 603 } 604 605 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 606 unsigned DiagID) { 607 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 608 DeviceDiagBuilder::Kind DiagKind = [this] { 609 switch (CurrentCUDATarget()) { 610 case CFT_Global: 611 case CFT_Device: 612 return DeviceDiagBuilder::K_Immediate; 613 case CFT_HostDevice: 614 // An HD function counts as host code if we're compiling for host, and 615 // device code if we're compiling for device. Defer any errors in device 616 // mode until the function is known-emitted. 617 if (getLangOpts().CUDAIsDevice) { 618 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 619 FunctionEmissionStatus::Emitted) 620 ? DeviceDiagBuilder::K_ImmediateWithCallStack 621 : DeviceDiagBuilder::K_Deferred; 622 } 623 return DeviceDiagBuilder::K_Nop; 624 625 default: 626 return DeviceDiagBuilder::K_Nop; 627 } 628 }(); 629 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 630 dyn_cast<FunctionDecl>(CurContext), *this); 631 } 632 633 Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 634 unsigned DiagID) { 635 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 636 DeviceDiagBuilder::Kind DiagKind = [this] { 637 switch (CurrentCUDATarget()) { 638 case CFT_Host: 639 return DeviceDiagBuilder::K_Immediate; 640 case CFT_HostDevice: 641 // An HD function counts as host code if we're compiling for host, and 642 // device code if we're compiling for device. Defer any errors in device 643 // mode until the function is known-emitted. 644 if (getLangOpts().CUDAIsDevice) 645 return DeviceDiagBuilder::K_Nop; 646 647 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 648 FunctionEmissionStatus::Emitted) 649 ? DeviceDiagBuilder::K_ImmediateWithCallStack 650 : DeviceDiagBuilder::K_Deferred; 651 default: 652 return DeviceDiagBuilder::K_Nop; 653 } 654 }(); 655 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 656 dyn_cast<FunctionDecl>(CurContext), *this); 657 } 658 659 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 660 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 661 assert(Callee && "Callee may not be null."); 662 663 auto &ExprEvalCtx = ExprEvalContexts.back(); 664 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 665 return true; 666 667 // FIXME: Is bailing out early correct here? Should we instead assume that 668 // the caller is a global initializer? 669 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 670 if (!Caller) 671 return true; 672 673 // If the caller is known-emitted, mark the callee as known-emitted. 674 // Otherwise, mark the call in our call graph so we can traverse it later. 675 bool CallerKnownEmitted = 676 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 677 if (CallerKnownEmitted) { 678 // Host-side references to a __global__ function refer to the stub, so the 679 // function itself is never emitted and therefore should not be marked. 680 if (!shouldIgnoreInHostDeviceCheck(Callee)) 681 markKnownEmitted( 682 *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { 683 return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; 684 }); 685 } else { 686 // If we have 687 // host fn calls kernel fn calls host+device, 688 // the HD function does not get instantiated on the host. We model this by 689 // omitting at the call to the kernel from the callgraph. This ensures 690 // that, when compiling for host, only HD functions actually called from the 691 // host get marked as known-emitted. 692 if (!shouldIgnoreInHostDeviceCheck(Callee)) 693 DeviceCallGraph[Caller].insert({Callee, Loc}); 694 } 695 696 DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, 697 CallerKnownEmitted] { 698 switch (IdentifyCUDAPreference(Caller, Callee)) { 699 case CFP_Never: 700 return DeviceDiagBuilder::K_Immediate; 701 case CFP_WrongSide: 702 assert(Caller && "WrongSide calls require a non-null caller"); 703 // If we know the caller will be emitted, we know this wrong-side call 704 // will be emitted, so it's an immediate error. Otherwise, defer the 705 // error until we know the caller is emitted. 706 return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack 707 : DeviceDiagBuilder::K_Deferred; 708 default: 709 return DeviceDiagBuilder::K_Nop; 710 } 711 }(); 712 713 if (DiagKind == DeviceDiagBuilder::K_Nop) 714 return true; 715 716 // Avoid emitting this error twice for the same location. Using a hashtable 717 // like this is unfortunate, but because we must continue parsing as normal 718 // after encountering a deferred error, it's otherwise very tricky for us to 719 // ensure that we only emit this deferred error once. 720 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 721 return true; 722 723 DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 724 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 725 DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 726 Caller, *this) 727 << Callee; 728 return DiagKind != DeviceDiagBuilder::K_Immediate && 729 DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; 730 } 731 732 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 733 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 734 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 735 return; 736 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 737 if (!CurFn) 738 return; 739 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 740 if (Target == CFT_Global || Target == CFT_Device) { 741 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 742 } else if (Target == CFT_HostDevice) { 743 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 744 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 745 } 746 } 747 748 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 749 const LookupResult &Previous) { 750 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 751 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 752 for (NamedDecl *OldND : Previous) { 753 FunctionDecl *OldFD = OldND->getAsFunction(); 754 if (!OldFD) 755 continue; 756 757 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 758 // Don't allow HD and global functions to overload other functions with the 759 // same signature. We allow overloading based on CUDA attributes so that 760 // functions can have different implementations on the host and device, but 761 // HD/global functions "exist" in some sense on both the host and device, so 762 // should have the same implementation on both sides. 763 if (NewTarget != OldTarget && 764 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 765 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 766 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 767 /* ConsiderCudaAttrs = */ false)) { 768 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 769 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 770 Diag(OldFD->getLocation(), diag::note_previous_declaration); 771 NewFD->setInvalidDecl(); 772 break; 773 } 774 } 775 } 776 777 template <typename AttrTy> 778 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 779 const FunctionDecl &TemplateFD) { 780 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 781 AttrTy *Clone = Attribute->clone(S.Context); 782 Clone->setInherited(true); 783 FD->addAttr(Clone); 784 } 785 } 786 787 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 788 const FunctionTemplateDecl &TD) { 789 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 790 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 791 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 792 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 793 } 794 795 std::string Sema::getCudaConfigureFuncName() const { 796 if (getLangOpts().HIP) 797 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 798 : "hipConfigureCall"; 799 800 // New CUDA kernel launch sequence. 801 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 802 CudaFeature::CUDA_USES_NEW_LAUNCH)) 803 return "__cudaPushCallConfiguration"; 804 805 // Legacy CUDA kernel configuration call 806 return "cudaConfigureCall"; 807 } 808