1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/ScopeInfo.h" 21 #include "clang/Sema/Sema.h" 22 #include "clang/Sema/SemaDiagnostic.h" 23 #include "clang/Sema/SemaInternal.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include <optional> 27 using namespace clang; 28 29 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { 30 if (!D) 31 return false; 32 if (auto *A = D->getAttr<AttrT>()) 33 return !A->isImplicit(); 34 return false; 35 } 36 37 void Sema::PushForceCUDAHostDevice() { 38 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 39 ForceCUDAHostDeviceDepth++; 40 } 41 42 bool Sema::PopForceCUDAHostDevice() { 43 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 44 if (ForceCUDAHostDeviceDepth == 0) 45 return false; 46 ForceCUDAHostDeviceDepth--; 47 return true; 48 } 49 50 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 51 MultiExprArg ExecConfig, 52 SourceLocation GGGLoc) { 53 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 54 if (!ConfigDecl) 55 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 56 << getCudaConfigureFuncName()); 57 QualType ConfigQTy = ConfigDecl->getType(); 58 59 DeclRefExpr *ConfigDR = new (Context) 60 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 61 MarkFunctionReferenced(LLLLoc, ConfigDecl); 62 63 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 64 /*IsExecConfig=*/true); 65 } 66 67 Sema::CUDAFunctionTarget 68 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 69 bool HasHostAttr = false; 70 bool HasDeviceAttr = false; 71 bool HasGlobalAttr = false; 72 bool HasInvalidTargetAttr = false; 73 for (const ParsedAttr &AL : Attrs) { 74 switch (AL.getKind()) { 75 case ParsedAttr::AT_CUDAGlobal: 76 HasGlobalAttr = true; 77 break; 78 case ParsedAttr::AT_CUDAHost: 79 HasHostAttr = true; 80 break; 81 case ParsedAttr::AT_CUDADevice: 82 HasDeviceAttr = true; 83 break; 84 case ParsedAttr::AT_CUDAInvalidTarget: 85 HasInvalidTargetAttr = true; 86 break; 87 default: 88 break; 89 } 90 } 91 92 if (HasInvalidTargetAttr) 93 return CFT_InvalidTarget; 94 95 if (HasGlobalAttr) 96 return CFT_Global; 97 98 if (HasHostAttr && HasDeviceAttr) 99 return CFT_HostDevice; 100 101 if (HasDeviceAttr) 102 return CFT_Device; 103 104 return CFT_Host; 105 } 106 107 template <typename A> 108 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { 109 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 110 return isa<A>(Attribute) && 111 !(IgnoreImplicitAttr && Attribute->isImplicit()); 112 }); 113 } 114 115 Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, 116 CUDATargetContextKind K, 117 Decl *D) 118 : S(S_) { 119 SavedCtx = S.CurCUDATargetCtx; 120 assert(K == CTCK_InitGlobalVar); 121 auto *VD = dyn_cast_or_null<VarDecl>(D); 122 if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { 123 auto Target = CFT_Host; 124 if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && 125 !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || 126 hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || 127 hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) 128 Target = CFT_Device; 129 S.CurCUDATargetCtx = {Target, K, VD}; 130 } 131 } 132 133 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 134 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 135 bool IgnoreImplicitHDAttr) { 136 // Code that lives outside a function gets the target from CurCUDATargetCtx. 137 if (D == nullptr) 138 return CurCUDATargetCtx.Target; 139 140 if (D->hasAttr<CUDAInvalidTargetAttr>()) 141 return CFT_InvalidTarget; 142 143 if (D->hasAttr<CUDAGlobalAttr>()) 144 return CFT_Global; 145 146 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 147 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 148 return CFT_HostDevice; 149 return CFT_Device; 150 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 151 return CFT_Host; 152 } else if ((D->isImplicit() || !D->isUserProvided()) && 153 !IgnoreImplicitHDAttr) { 154 // Some implicit declarations (like intrinsic functions) are not marked. 155 // Set the most lenient target on them for maximal flexibility. 156 return CFT_HostDevice; 157 } 158 159 return CFT_Host; 160 } 161 162 /// IdentifyTarget - Determine the CUDA compilation target for this variable. 163 Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { 164 if (Var->hasAttr<HIPManagedAttr>()) 165 return CVT_Unified; 166 // Only constexpr and const variabless with implicit constant attribute 167 // are emitted on both sides. Such variables are promoted to device side 168 // only if they have static constant intializers on device side. 169 if ((Var->isConstexpr() || Var->getType().isConstQualified()) && 170 Var->hasAttr<CUDAConstantAttr>() && 171 !hasExplicitAttr<CUDAConstantAttr>(Var)) 172 return CVT_Both; 173 if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || 174 Var->hasAttr<CUDASharedAttr>() || 175 Var->getType()->isCUDADeviceBuiltinSurfaceType() || 176 Var->getType()->isCUDADeviceBuiltinTextureType()) 177 return CVT_Device; 178 // Function-scope static variable without explicit device or constant 179 // attribute are emitted 180 // - on both sides in host device functions 181 // - on device side in device or global functions 182 if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { 183 switch (IdentifyCUDATarget(FD)) { 184 case CFT_HostDevice: 185 return CVT_Both; 186 case CFT_Device: 187 case CFT_Global: 188 return CVT_Device; 189 default: 190 return CVT_Host; 191 } 192 } 193 return CVT_Host; 194 } 195 196 // * CUDA Call preference table 197 // 198 // F - from, 199 // T - to 200 // Ph - preference in host mode 201 // Pd - preference in device mode 202 // H - handled in (x) 203 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 204 // 205 // | F | T | Ph | Pd | H | 206 // |----+----+-----+-----+-----+ 207 // | d | d | N | N | (c) | 208 // | d | g | -- | -- | (a) | 209 // | d | h | -- | -- | (e) | 210 // | d | hd | HD | HD | (b) | 211 // | g | d | N | N | (c) | 212 // | g | g | -- | -- | (a) | 213 // | g | h | -- | -- | (e) | 214 // | g | hd | HD | HD | (b) | 215 // | h | d | -- | -- | (e) | 216 // | h | g | N | N | (c) | 217 // | h | h | N | N | (c) | 218 // | h | hd | HD | HD | (b) | 219 // | hd | d | WS | SS | (d) | 220 // | hd | g | SS | -- |(d/a)| 221 // | hd | h | SS | WS | (d) | 222 // | hd | hd | HD | HD | (b) | 223 224 Sema::CUDAFunctionPreference 225 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 226 const FunctionDecl *Callee) { 227 assert(Callee && "Callee must be valid."); 228 229 // Treat ctor/dtor as host device function in device var initializer to allow 230 // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor 231 // will be diagnosed by checkAllowedCUDAInitializer. 232 if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && 233 CurCUDATargetCtx.Target == CFT_Device && 234 (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee))) 235 return CFP_HostDevice; 236 237 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 238 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 239 240 // If one of the targets is invalid, the check always fails, no matter what 241 // the other target is. 242 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 243 return CFP_Never; 244 245 // (a) Can't call global from some contexts until we support CUDA's 246 // dynamic parallelism. 247 if (CalleeTarget == CFT_Global && 248 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 249 return CFP_Never; 250 251 // (b) Calling HostDevice is OK for everyone. 252 if (CalleeTarget == CFT_HostDevice) 253 return CFP_HostDevice; 254 255 // (c) Best case scenarios 256 if (CalleeTarget == CallerTarget || 257 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 258 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 259 return CFP_Native; 260 261 // HipStdPar mode is special, in that assessing whether a device side call to 262 // a host target is deferred to a subsequent pass, and cannot unambiguously be 263 // adjudicated in the AST, hence we optimistically allow them to pass here. 264 if (getLangOpts().HIPStdPar && 265 (CallerTarget == CFT_Global || CallerTarget == CFT_Device || 266 CallerTarget == CFT_HostDevice) && 267 CalleeTarget == CFT_Host) 268 return CFP_HostDevice; 269 270 // (d) HostDevice behavior depends on compilation mode. 271 if (CallerTarget == CFT_HostDevice) { 272 // It's OK to call a compilation-mode matching function from an HD one. 273 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 274 (!getLangOpts().CUDAIsDevice && 275 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 276 return CFP_SameSide; 277 278 // Calls from HD to non-mode-matching functions (i.e., to host functions 279 // when compiling in device mode or to device functions when compiling in 280 // host mode) are allowed at the sema level, but eventually rejected if 281 // they're ever codegened. TODO: Reject said calls earlier. 282 return CFP_WrongSide; 283 } 284 285 // (e) Calling across device/host boundary is not something you should do. 286 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 287 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 288 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 289 return CFP_Never; 290 291 llvm_unreachable("All cases should've been handled by now."); 292 } 293 294 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 295 if (!D) 296 return false; 297 if (auto *A = D->getAttr<AttrT>()) 298 return A->isImplicit(); 299 return D->isImplicit(); 300 } 301 302 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 303 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 304 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 305 return IsImplicitDevAttr && IsImplicitHostAttr; 306 } 307 308 void Sema::EraseUnwantedCUDAMatches( 309 const FunctionDecl *Caller, 310 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 311 if (Matches.size() <= 1) 312 return; 313 314 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 315 316 // Gets the CUDA function preference for a call from Caller to Match. 317 auto GetCFP = [&](const Pair &Match) { 318 return IdentifyCUDAPreference(Caller, Match.second); 319 }; 320 321 // Find the best call preference among the functions in Matches. 322 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 323 Matches.begin(), Matches.end(), 324 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 325 326 // Erase all functions with lower priority. 327 llvm::erase_if(Matches, 328 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 329 } 330 331 /// When an implicitly-declared special member has to invoke more than one 332 /// base/field special member, conflicts may occur in the targets of these 333 /// members. For example, if one base's member __host__ and another's is 334 /// __device__, it's a conflict. 335 /// This function figures out if the given targets \param Target1 and 336 /// \param Target2 conflict, and if they do not it fills in 337 /// \param ResolvedTarget with a target that resolves for both calls. 338 /// \return true if there's a conflict, false otherwise. 339 static bool 340 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 341 Sema::CUDAFunctionTarget Target2, 342 Sema::CUDAFunctionTarget *ResolvedTarget) { 343 // Only free functions and static member functions may be global. 344 assert(Target1 != Sema::CFT_Global); 345 assert(Target2 != Sema::CFT_Global); 346 347 if (Target1 == Sema::CFT_HostDevice) { 348 *ResolvedTarget = Target2; 349 } else if (Target2 == Sema::CFT_HostDevice) { 350 *ResolvedTarget = Target1; 351 } else if (Target1 != Target2) { 352 return true; 353 } else { 354 *ResolvedTarget = Target1; 355 } 356 357 return false; 358 } 359 360 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 361 CXXSpecialMember CSM, 362 CXXMethodDecl *MemberDecl, 363 bool ConstRHS, 364 bool Diagnose) { 365 // If the defaulted special member is defined lexically outside of its 366 // owning class, or the special member already has explicit device or host 367 // attributes, do not infer. 368 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 369 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 370 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 371 bool HasExplicitAttr = 372 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 373 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 374 if (!InClass || HasExplicitAttr) 375 return false; 376 377 std::optional<CUDAFunctionTarget> InferredTarget; 378 379 // We're going to invoke special member lookup; mark that these special 380 // members are called from this one, and not from its caller. 381 ContextRAII MethodContext(*this, MemberDecl); 382 383 // Look for special members in base classes that should be invoked from here. 384 // Infer the target of this member base on the ones it should call. 385 // Skip direct and indirect virtual bases for abstract classes. 386 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 387 for (const auto &B : ClassDecl->bases()) { 388 if (!B.isVirtual()) { 389 Bases.push_back(&B); 390 } 391 } 392 393 if (!ClassDecl->isAbstract()) { 394 llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); 395 } 396 397 for (const auto *B : Bases) { 398 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 399 if (!BaseType) { 400 continue; 401 } 402 403 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 404 Sema::SpecialMemberOverloadResult SMOR = 405 LookupSpecialMember(BaseClassDecl, CSM, 406 /* ConstArg */ ConstRHS, 407 /* VolatileArg */ false, 408 /* RValueThis */ false, 409 /* ConstThis */ false, 410 /* VolatileThis */ false); 411 412 if (!SMOR.getMethod()) 413 continue; 414 415 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 416 if (!InferredTarget) { 417 InferredTarget = BaseMethodTarget; 418 } else { 419 bool ResolutionError = resolveCalleeCUDATargetConflict( 420 *InferredTarget, BaseMethodTarget, &*InferredTarget); 421 if (ResolutionError) { 422 if (Diagnose) { 423 Diag(ClassDecl->getLocation(), 424 diag::note_implicit_member_target_infer_collision) 425 << (unsigned)CSM << *InferredTarget << BaseMethodTarget; 426 } 427 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 428 return true; 429 } 430 } 431 } 432 433 // Same as for bases, but now for special members of fields. 434 for (const auto *F : ClassDecl->fields()) { 435 if (F->isInvalidDecl()) { 436 continue; 437 } 438 439 const RecordType *FieldType = 440 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 441 if (!FieldType) { 442 continue; 443 } 444 445 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 446 Sema::SpecialMemberOverloadResult SMOR = 447 LookupSpecialMember(FieldRecDecl, CSM, 448 /* ConstArg */ ConstRHS && !F->isMutable(), 449 /* VolatileArg */ false, 450 /* RValueThis */ false, 451 /* ConstThis */ false, 452 /* VolatileThis */ false); 453 454 if (!SMOR.getMethod()) 455 continue; 456 457 CUDAFunctionTarget FieldMethodTarget = 458 IdentifyCUDATarget(SMOR.getMethod()); 459 if (!InferredTarget) { 460 InferredTarget = FieldMethodTarget; 461 } else { 462 bool ResolutionError = resolveCalleeCUDATargetConflict( 463 *InferredTarget, FieldMethodTarget, &*InferredTarget); 464 if (ResolutionError) { 465 if (Diagnose) { 466 Diag(ClassDecl->getLocation(), 467 diag::note_implicit_member_target_infer_collision) 468 << (unsigned)CSM << *InferredTarget << FieldMethodTarget; 469 } 470 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 471 return true; 472 } 473 } 474 } 475 476 477 // If no target was inferred, mark this member as __host__ __device__; 478 // it's the least restrictive option that can be invoked from any target. 479 bool NeedsH = true, NeedsD = true; 480 if (InferredTarget) { 481 if (*InferredTarget == CFT_Device) 482 NeedsH = false; 483 else if (*InferredTarget == CFT_Host) 484 NeedsD = false; 485 } 486 487 // We either setting attributes first time, or the inferred ones must match 488 // previously set ones. 489 if (NeedsD && !HasD) 490 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 491 if (NeedsH && !HasH) 492 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 493 494 return false; 495 } 496 497 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 498 if (!CD->isDefined() && CD->isTemplateInstantiation()) 499 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 500 501 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 502 // empty at a point in the translation unit, if it is either a 503 // trivial constructor 504 if (CD->isTrivial()) 505 return true; 506 507 // ... or it satisfies all of the following conditions: 508 // The constructor function has been defined. 509 // The constructor function has no parameters, 510 // and the function body is an empty compound statement. 511 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 512 return false; 513 514 // Its class has no virtual functions and no virtual base classes. 515 if (CD->getParent()->isDynamicClass()) 516 return false; 517 518 // Union ctor does not call ctors of its data members. 519 if (CD->getParent()->isUnion()) 520 return true; 521 522 // The only form of initializer allowed is an empty constructor. 523 // This will recursively check all base classes and member initializers 524 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 525 if (const CXXConstructExpr *CE = 526 dyn_cast<CXXConstructExpr>(CI->getInit())) 527 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 528 return false; 529 })) 530 return false; 531 532 return true; 533 } 534 535 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 536 // No destructor -> no problem. 537 if (!DD) 538 return true; 539 540 if (!DD->isDefined() && DD->isTemplateInstantiation()) 541 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 542 543 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 544 // empty at a point in the translation unit, if it is either a 545 // trivial constructor 546 if (DD->isTrivial()) 547 return true; 548 549 // ... or it satisfies all of the following conditions: 550 // The destructor function has been defined. 551 // and the function body is an empty compound statement. 552 if (!DD->hasTrivialBody()) 553 return false; 554 555 const CXXRecordDecl *ClassDecl = DD->getParent(); 556 557 // Its class has no virtual functions and no virtual base classes. 558 if (ClassDecl->isDynamicClass()) 559 return false; 560 561 // Union does not have base class and union dtor does not call dtors of its 562 // data members. 563 if (DD->getParent()->isUnion()) 564 return true; 565 566 // Only empty destructors are allowed. This will recursively check 567 // destructors for all base classes... 568 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 569 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 570 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 571 return true; 572 })) 573 return false; 574 575 // ... and member fields. 576 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 577 if (CXXRecordDecl *RD = Field->getType() 578 ->getBaseElementTypeUnsafe() 579 ->getAsCXXRecordDecl()) 580 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 581 return true; 582 })) 583 return false; 584 585 return true; 586 } 587 588 namespace { 589 enum CUDAInitializerCheckKind { 590 CICK_DeviceOrConstant, // Check initializer for device/constant variable 591 CICK_Shared, // Check initializer for shared variable 592 }; 593 594 bool IsDependentVar(VarDecl *VD) { 595 if (VD->getType()->isDependentType()) 596 return true; 597 if (const auto *Init = VD->getInit()) 598 return Init->isValueDependent(); 599 return false; 600 } 601 602 // Check whether a variable has an allowed initializer for a CUDA device side 603 // variable with global storage. \p VD may be a host variable to be checked for 604 // potential promotion to device side variable. 605 // 606 // CUDA/HIP allows only empty constructors as initializers for global 607 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all 608 // __shared__ variables whether they are local or not (they all are implicitly 609 // static in CUDA). One exception is that CUDA allows constant initializers 610 // for __constant__ and __device__ variables. 611 bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, 612 CUDAInitializerCheckKind CheckKind) { 613 assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); 614 assert(!IsDependentVar(VD) && "do not check dependent var"); 615 const Expr *Init = VD->getInit(); 616 auto IsEmptyInit = [&](const Expr *Init) { 617 if (!Init) 618 return true; 619 if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { 620 return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 621 } 622 return false; 623 }; 624 auto IsConstantInit = [&](const Expr *Init) { 625 assert(Init); 626 ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, 627 /*NoWronSidedVars=*/true); 628 return Init->isConstantInitializer(S.Context, 629 VD->getType()->isReferenceType()); 630 }; 631 auto HasEmptyDtor = [&](VarDecl *VD) { 632 if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) 633 return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 634 return true; 635 }; 636 if (CheckKind == CICK_Shared) 637 return IsEmptyInit(Init) && HasEmptyDtor(VD); 638 return S.LangOpts.GPUAllowDeviceInit || 639 ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); 640 } 641 } // namespace 642 643 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 644 // Return early if VD is inside a non-instantiated template function since 645 // the implicit constructor is not defined yet. 646 if (const FunctionDecl *FD = 647 dyn_cast_or_null<FunctionDecl>(VD->getDeclContext())) 648 if (FD->isDependentContext()) 649 return; 650 651 // Do not check dependent variables since the ctor/dtor/initializer are not 652 // determined. Do it after instantiation. 653 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || 654 IsDependentVar(VD)) 655 return; 656 const Expr *Init = VD->getInit(); 657 bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); 658 bool IsDeviceOrConstantVar = 659 !IsSharedVar && 660 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); 661 if (IsDeviceOrConstantVar || IsSharedVar) { 662 if (HasAllowedCUDADeviceStaticInitializer( 663 *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) 664 return; 665 Diag(VD->getLocation(), 666 IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) 667 << Init->getSourceRange(); 668 VD->setInvalidDecl(); 669 } else { 670 // This is a host-side global variable. Check that the initializer is 671 // callable from the host side. 672 const FunctionDecl *InitFn = nullptr; 673 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 674 InitFn = CE->getConstructor(); 675 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 676 InitFn = CE->getDirectCallee(); 677 } 678 if (InitFn) { 679 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 680 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 681 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 682 << InitFnTarget << InitFn; 683 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 684 VD->setInvalidDecl(); 685 } 686 } 687 } 688 } 689 690 void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice( 691 const FunctionDecl *Callee) { 692 FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); 693 if (!Caller) 694 return; 695 696 if (!isCUDAImplicitHostDeviceFunction(Callee)) 697 return; 698 699 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 700 701 // Record whether an implicit host device function is used on device side. 702 if (CallerTarget != CFT_Device && CallerTarget != CFT_Global && 703 (CallerTarget != CFT_HostDevice || 704 (isCUDAImplicitHostDeviceFunction(Caller) && 705 !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) 706 return; 707 708 getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); 709 } 710 711 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 712 // treated as implicitly __host__ __device__, unless: 713 // * it is a variadic function (device-side variadic functions are not 714 // allowed), or 715 // * a __device__ function with this signature was already declared, in which 716 // case in which case we output an error, unless the __device__ decl is in a 717 // system header, in which case we leave the constexpr function unattributed. 718 // 719 // In addition, all function decls are treated as __host__ __device__ when 720 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 721 // #pragma clang force_cuda_host_device_begin/end 722 // pair). 723 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 724 const LookupResult &Previous) { 725 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 726 727 if (ForceCUDAHostDeviceDepth > 0) { 728 if (!NewD->hasAttr<CUDAHostAttr>()) 729 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 730 if (!NewD->hasAttr<CUDADeviceAttr>()) 731 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 732 return; 733 } 734 735 // If a template function has no host/device/global attributes, 736 // make it implicitly host device function. 737 if (getLangOpts().OffloadImplicitHostDeviceTemplates && 738 !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && 739 !NewD->hasAttr<CUDAGlobalAttr>() && 740 (NewD->getDescribedFunctionTemplate() || 741 NewD->isFunctionTemplateSpecialization())) { 742 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 743 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 744 return; 745 } 746 747 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 748 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 749 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 750 return; 751 752 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 753 // attributes? 754 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 755 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 756 D = Using->getTargetDecl(); 757 FunctionDecl *OldD = D->getAsFunction(); 758 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 759 !OldD->hasAttr<CUDAHostAttr>() && 760 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 761 /* ConsiderCudaAttrs = */ false); 762 }; 763 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 764 if (It != Previous.end()) { 765 // We found a __device__ function with the same name and signature as NewD 766 // (ignoring CUDA attrs). This is an error unless that function is defined 767 // in a system header, in which case we simply return without making NewD 768 // host+device. 769 NamedDecl *Match = *It; 770 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 771 Diag(NewD->getLocation(), 772 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 773 << NewD; 774 Diag(Match->getLocation(), 775 diag::note_cuda_conflicting_device_function_declared_here); 776 } 777 return; 778 } 779 780 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 781 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 782 } 783 784 // TODO: `__constant__` memory may be a limited resource for certain targets. 785 // A safeguard may be needed at the end of compilation pipeline if 786 // `__constant__` memory usage goes beyond limit. 787 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { 788 // Do not promote dependent variables since the cotr/dtor/initializer are 789 // not determined. Do it after instantiation. 790 if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && 791 !VD->hasAttr<CUDASharedAttr>() && 792 (VD->isFileVarDecl() || VD->isStaticDataMember()) && 793 !IsDependentVar(VD) && 794 ((VD->isConstexpr() || VD->getType().isConstQualified()) && 795 HasAllowedCUDADeviceStaticInitializer(*this, VD, 796 CICK_DeviceOrConstant))) { 797 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 798 } 799 } 800 801 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 802 unsigned DiagID) { 803 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 804 FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); 805 SemaDiagnosticBuilder::Kind DiagKind = [&] { 806 if (!CurFunContext) 807 return SemaDiagnosticBuilder::K_Nop; 808 switch (CurrentCUDATarget()) { 809 case CFT_Global: 810 case CFT_Device: 811 return SemaDiagnosticBuilder::K_Immediate; 812 case CFT_HostDevice: 813 // An HD function counts as host code if we're compiling for host, and 814 // device code if we're compiling for device. Defer any errors in device 815 // mode until the function is known-emitted. 816 if (!getLangOpts().CUDAIsDevice) 817 return SemaDiagnosticBuilder::K_Nop; 818 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 819 return SemaDiagnosticBuilder::K_Immediate; 820 return (getEmissionStatus(CurFunContext) == 821 FunctionEmissionStatus::Emitted) 822 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 823 : SemaDiagnosticBuilder::K_Deferred; 824 default: 825 return SemaDiagnosticBuilder::K_Nop; 826 } 827 }(); 828 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); 829 } 830 831 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 832 unsigned DiagID) { 833 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 834 FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); 835 SemaDiagnosticBuilder::Kind DiagKind = [&] { 836 if (!CurFunContext) 837 return SemaDiagnosticBuilder::K_Nop; 838 switch (CurrentCUDATarget()) { 839 case CFT_Host: 840 return SemaDiagnosticBuilder::K_Immediate; 841 case CFT_HostDevice: 842 // An HD function counts as host code if we're compiling for host, and 843 // device code if we're compiling for device. Defer any errors in device 844 // mode until the function is known-emitted. 845 if (getLangOpts().CUDAIsDevice) 846 return SemaDiagnosticBuilder::K_Nop; 847 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 848 return SemaDiagnosticBuilder::K_Immediate; 849 return (getEmissionStatus(CurFunContext) == 850 FunctionEmissionStatus::Emitted) 851 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 852 : SemaDiagnosticBuilder::K_Deferred; 853 default: 854 return SemaDiagnosticBuilder::K_Nop; 855 } 856 }(); 857 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); 858 } 859 860 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 861 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 862 assert(Callee && "Callee may not be null."); 863 864 const auto &ExprEvalCtx = currentEvaluationContext(); 865 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 866 return true; 867 868 // FIXME: Is bailing out early correct here? Should we instead assume that 869 // the caller is a global initializer? 870 FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); 871 if (!Caller) 872 return true; 873 874 // If the caller is known-emitted, mark the callee as known-emitted. 875 // Otherwise, mark the call in our call graph so we can traverse it later. 876 bool CallerKnownEmitted = 877 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 878 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 879 CallerKnownEmitted] { 880 switch (IdentifyCUDAPreference(Caller, Callee)) { 881 case CFP_Never: 882 case CFP_WrongSide: 883 assert(Caller && "Never/wrongSide calls require a non-null caller"); 884 // If we know the caller will be emitted, we know this wrong-side call 885 // will be emitted, so it's an immediate error. Otherwise, defer the 886 // error until we know the caller is emitted. 887 return CallerKnownEmitted 888 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 889 : SemaDiagnosticBuilder::K_Deferred; 890 default: 891 return SemaDiagnosticBuilder::K_Nop; 892 } 893 }(); 894 895 if (DiagKind == SemaDiagnosticBuilder::K_Nop) { 896 // For -fgpu-rdc, keep track of external kernels used by host functions. 897 if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode && 898 Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined()) 899 getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); 900 return true; 901 } 902 903 // Avoid emitting this error twice for the same location. Using a hashtable 904 // like this is unfortunate, but because we must continue parsing as normal 905 // after encountering a deferred error, it's otherwise very tricky for us to 906 // ensure that we only emit this deferred error once. 907 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 908 return true; 909 910 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 911 << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee 912 << IdentifyCUDATarget(Caller); 913 if (!Callee->getBuiltinID()) 914 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 915 diag::note_previous_decl, Caller, *this) 916 << Callee; 917 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 918 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 919 } 920 921 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 922 // A lambda function may capture a stack variable by reference when it is 923 // defined and uses the capture by reference when the lambda is called. When 924 // the capture and use happen on different sides, the capture is invalid and 925 // should be diagnosed. 926 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, 927 const sema::Capture &Capture) { 928 // In host compilation we only need to check lambda functions emitted on host 929 // side. In such lambda functions, a reference capture is invalid only 930 // if the lambda structure is populated by a device function or kernel then 931 // is passed to and called by a host function. However that is impossible, 932 // since a device function or kernel can only call a device function, also a 933 // kernel cannot pass a lambda back to a host function since we cannot 934 // define a kernel argument type which can hold the lambda before the lambda 935 // itself is defined. 936 if (!LangOpts.CUDAIsDevice) 937 return; 938 939 // File-scope lambda can only do init captures for global variables, which 940 // results in passing by value for these global variables. 941 FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); 942 if (!Caller) 943 return; 944 945 // In device compilation, we only need to check lambda functions which are 946 // emitted on device side. For such lambdas, a reference capture is invalid 947 // only if the lambda structure is populated by a host function then passed 948 // to and called in a device function or kernel. 949 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 950 bool CallerIsHost = 951 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 952 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 953 if (!ShouldCheck || !Capture.isReferenceCapture()) 954 return; 955 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 956 if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { 957 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 958 diag::err_capture_bad_target, Callee, *this) 959 << Capture.getVariable(); 960 } else if (Capture.isThisCapture()) { 961 // Capture of this pointer is allowed since this pointer may be pointing to 962 // managed memory which is accessible on both device and host sides. It only 963 // results in invalid memory access if this pointer points to memory not 964 // accessible on device side. 965 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 966 diag::warn_maybe_capture_bad_target_this_ptr, Callee, 967 *this); 968 } 969 } 970 971 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 972 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 973 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 974 return; 975 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 976 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 977 } 978 979 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 980 const LookupResult &Previous) { 981 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 982 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 983 for (NamedDecl *OldND : Previous) { 984 FunctionDecl *OldFD = OldND->getAsFunction(); 985 if (!OldFD) 986 continue; 987 988 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 989 // Don't allow HD and global functions to overload other functions with the 990 // same signature. We allow overloading based on CUDA attributes so that 991 // functions can have different implementations on the host and device, but 992 // HD/global functions "exist" in some sense on both the host and device, so 993 // should have the same implementation on both sides. 994 if (NewTarget != OldTarget && 995 ((NewTarget == CFT_HostDevice && 996 !(LangOpts.OffloadImplicitHostDeviceTemplates && 997 isCUDAImplicitHostDeviceFunction(NewFD) && 998 OldTarget == CFT_Device)) || 999 (OldTarget == CFT_HostDevice && 1000 !(LangOpts.OffloadImplicitHostDeviceTemplates && 1001 isCUDAImplicitHostDeviceFunction(OldFD) && 1002 NewTarget == CFT_Device)) || 1003 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 1004 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 1005 /* ConsiderCudaAttrs = */ false)) { 1006 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 1007 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 1008 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1009 NewFD->setInvalidDecl(); 1010 break; 1011 } 1012 } 1013 } 1014 1015 template <typename AttrTy> 1016 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 1017 const FunctionDecl &TemplateFD) { 1018 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 1019 AttrTy *Clone = Attribute->clone(S.Context); 1020 Clone->setInherited(true); 1021 FD->addAttr(Clone); 1022 } 1023 } 1024 1025 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 1026 const FunctionTemplateDecl &TD) { 1027 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 1028 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 1029 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 1030 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 1031 } 1032 1033 std::string Sema::getCudaConfigureFuncName() const { 1034 if (getLangOpts().HIP) 1035 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 1036 : "hipConfigureCall"; 1037 1038 // New CUDA kernel launch sequence. 1039 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 1040 CudaFeature::CUDA_USES_NEW_LAUNCH)) 1041 return "__cudaPushCallConfiguration"; 1042 1043 // Legacy CUDA kernel configuration call 1044 return "cudaConfigureCall"; 1045 } 1046