1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/CalledOnceCheck.h" 10 #include "clang/AST/ASTContext.h" 11 #include "clang/AST/Attr.h" 12 #include "clang/AST/Decl.h" 13 #include "clang/AST/DeclBase.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprObjC.h" 16 #include "clang/AST/OperationKinds.h" 17 #include "clang/AST/ParentMap.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtObjC.h" 21 #include "clang/AST/StmtVisitor.h" 22 #include "clang/AST/Type.h" 23 #include "clang/Analysis/AnalysisDeclContext.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" 26 #include "clang/Basic/Builtins.h" 27 #include "clang/Basic/IdentifierTable.h" 28 #include "clang/Basic/LLVM.h" 29 #include "llvm/ADT/BitVector.h" 30 #include "llvm/ADT/BitmaskEnum.h" 31 #include "llvm/ADT/PointerIntPair.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/Sequence.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include <memory> 40 #include <optional> 41 42 using namespace clang; 43 44 namespace { 45 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; 46 template <class T> 47 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; 48 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; 49 template <class T> 50 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; 51 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { 52 "completionHandler", "completion", "withCompletionHandler", 53 "withCompletion", "completionBlock", "withCompletionBlock", 54 "replyTo", "reply", "withReplyTo"}; 55 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { 56 "WithCompletionHandler", "WithCompletion", "WithCompletionBlock", 57 "WithReplyTo", "WithReply"}; 58 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { 59 "error", "cancel", "shouldCall", "done", "OK", "success"}; 60 61 struct KnownCalledOnceParameter { 62 llvm::StringLiteral FunctionName; 63 unsigned ParamIndex; 64 }; 65 constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = { 66 {llvm::StringLiteral{"dispatch_async"}, 1}, 67 {llvm::StringLiteral{"dispatch_async_and_wait"}, 1}, 68 {llvm::StringLiteral{"dispatch_after"}, 2}, 69 {llvm::StringLiteral{"dispatch_sync"}, 1}, 70 {llvm::StringLiteral{"dispatch_once"}, 1}, 71 {llvm::StringLiteral{"dispatch_barrier_async"}, 1}, 72 {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1}, 73 {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}}; 74 75 class ParameterStatus { 76 public: 77 // Status kind is basically the main part of parameter's status. 78 // The kind represents our knowledge (so far) about a tracked parameter 79 // in the context of this analysis. 80 // 81 // Since we want to report on missing and extraneous calls, we need to 82 // track the fact whether paramater was called or not. This automatically 83 // decides two kinds: `NotCalled` and `Called`. 84 // 85 // One of the erroneous situations is the case when parameter is called only 86 // on some of the paths. We could've considered it `NotCalled`, but we want 87 // to report double call warnings even if these two calls are not guaranteed 88 // to happen in every execution. We also don't want to have it as `Called` 89 // because not calling tracked parameter on all of the paths is an error 90 // on its own. For these reasons, we need to have a separate kind, 91 // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid 92 // confusion. 93 // 94 // Two violations of calling parameter more than once and not calling it on 95 // every path are not, however, mutually exclusive. In situations where both 96 // violations take place, we prefer to report ONLY double call. It's always 97 // harder to pinpoint a bug that has arisen when a user neglects to take the 98 // right action (and therefore, no action is taken), than when a user takes 99 // the wrong action. And, in order to remember that we already reported 100 // a double call, we need another kind: `Reported`. 101 // 102 // Our analysis is intra-procedural and, while in the perfect world, 103 // developers only use tracked parameters to call them, in the real world, 104 // the picture might be different. Parameters can be stored in global 105 // variables or leaked into other functions that we know nothing about. 106 // We try to be lenient and trust users. Another kind `Escaped` reflects 107 // such situations. We don't know if it gets called there or not, but we 108 // should always think of `Escaped` as the best possible option. 109 // 110 // Some of the paths in the analyzed functions might end with a call 111 // to noreturn functions. Such paths are not required to have parameter 112 // calls and we want to track that. For the purposes of better diagnostics, 113 // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. 114 // 115 // Additionally, we have `NotVisited` kind that tells us nothing about 116 // a tracked parameter, but is used for tracking analyzed (aka visited) 117 // basic blocks. 118 // 119 // If we consider `|` to be a JOIN operation of two kinds coming from 120 // two different paths, the following properties must hold: 121 // 122 // 1. for any Kind K: K | K == K 123 // Joining two identical kinds should result in the same kind. 124 // 125 // 2. for any Kind K: Reported | K == Reported 126 // Doesn't matter on which path it was reported, it still is. 127 // 128 // 3. for any Kind K: NoReturn | K == K 129 // We can totally ignore noreturn paths during merges. 130 // 131 // 4. DefinitelyCalled | NotCalled == MaybeCalled 132 // Called on one path, not called on another - that's simply 133 // a definition for MaybeCalled. 134 // 135 // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: 136 // Escaped | K == K 137 // Escaped mirrors other statuses after joins. 138 // Every situation, when we join any of the listed kinds K, 139 // is a violation. For this reason, in order to assume the 140 // best outcome for this escape, we consider it to be the 141 // same as the other path. 142 // 143 // 6. for any Kind K in [DefinitelyCalled, NotCalled]: 144 // MaybeCalled | K == MaybeCalled 145 // MaybeCalled should basically stay after almost every join. 146 enum Kind { 147 // No-return paths should be absolutely transparent for the analysis. 148 // 0x0 is the identity element for selected join operation (binary or). 149 NoReturn = 0x0, /* 0000 */ 150 // Escaped marks situations when marked parameter escaped into 151 // another function (so we can assume that it was possibly called there). 152 Escaped = 0x1, /* 0001 */ 153 // Parameter was definitely called once at this point. 154 DefinitelyCalled = 0x3, /* 0011 */ 155 // Kinds less or equal to NON_ERROR_STATUS are not considered errors. 156 NON_ERROR_STATUS = DefinitelyCalled, 157 // Parameter was not yet called. 158 NotCalled = 0x5, /* 0101 */ 159 // Parameter was not called at least on one path leading to this point, 160 // while there is also at least one path that it gets called. 161 MaybeCalled = 0x7, /* 0111 */ 162 // Parameter was not yet analyzed. 163 NotVisited = 0x8, /* 1000 */ 164 // We already reported a violation and stopped tracking calls for this 165 // parameter. 166 Reported = 0x15, /* 1111 */ 167 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) 168 }; 169 170 constexpr ParameterStatus() = default; 171 /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { 172 assert(!seenAnyCalls(K) && "Can't initialize status without a call"); 173 } 174 ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { 175 assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); 176 } 177 178 const Expr &getCall() const { 179 assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); 180 return *Call; 181 } 182 static bool seenAnyCalls(Kind K) { 183 return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; 184 } 185 bool seenAnyCalls() const { return seenAnyCalls(getKind()); } 186 187 static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } 188 bool isErrorStatus() const { return isErrorStatus(getKind()); } 189 190 Kind getKind() const { return StatusKind; } 191 192 void join(const ParameterStatus &Other) { 193 // If we have a pointer already, let's keep it. 194 // For the purposes of the analysis, it doesn't really matter 195 // which call we report. 196 // 197 // If we don't have a pointer, let's take whatever gets joined. 198 if (!Call) { 199 Call = Other.Call; 200 } 201 // Join kinds. 202 StatusKind |= Other.getKind(); 203 } 204 205 bool operator==(const ParameterStatus &Other) const { 206 // We compare only kinds, pointers on their own is only additional 207 // information. 208 return getKind() == Other.getKind(); 209 } 210 211 private: 212 // It would've been a perfect place to use llvm::PointerIntPair, but 213 // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. 214 Kind StatusKind = NotVisited; 215 const Expr *Call = nullptr; 216 }; 217 218 /// State aggregates statuses of all tracked parameters. 219 class State { 220 public: 221 State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) 222 : ParamData(Size, K) {} 223 224 /// Return status of a parameter with the given index. 225 /// \{ 226 ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } 227 const ParameterStatus &getStatusFor(unsigned Index) const { 228 return ParamData[Index]; 229 } 230 /// \} 231 232 /// Return true if parameter with the given index can be called. 233 bool seenAnyCalls(unsigned Index) const { 234 return getStatusFor(Index).seenAnyCalls(); 235 } 236 /// Return a reference that we consider a call. 237 /// 238 /// Should only be used for parameters that can be called. 239 const Expr &getCallFor(unsigned Index) const { 240 return getStatusFor(Index).getCall(); 241 } 242 /// Return status kind of parameter with the given index. 243 ParameterStatus::Kind getKindFor(unsigned Index) const { 244 return getStatusFor(Index).getKind(); 245 } 246 247 bool isVisited() const { 248 return llvm::all_of(ParamData, [](const ParameterStatus &S) { 249 return S.getKind() != ParameterStatus::NotVisited; 250 }); 251 } 252 253 // Join other state into the current state. 254 void join(const State &Other) { 255 assert(ParamData.size() == Other.ParamData.size() && 256 "Couldn't join statuses with different sizes"); 257 for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { 258 std::get<0>(Pair).join(std::get<1>(Pair)); 259 } 260 } 261 262 using iterator = ParamSizedVector<ParameterStatus>::iterator; 263 using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; 264 265 iterator begin() { return ParamData.begin(); } 266 iterator end() { return ParamData.end(); } 267 268 const_iterator begin() const { return ParamData.begin(); } 269 const_iterator end() const { return ParamData.end(); } 270 271 bool operator==(const State &Other) const { 272 return ParamData == Other.ParamData; 273 } 274 275 private: 276 ParamSizedVector<ParameterStatus> ParamData; 277 }; 278 279 /// A simple class that finds DeclRefExpr in the given expression. 280 /// 281 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever 282 /// expressions on our way. Only certain expressions considered "no-op" 283 /// for our task are indeed skipped. 284 class DeclRefFinder 285 : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { 286 public: 287 /// Find a DeclRefExpr in the given expression. 288 /// 289 /// In its most basic form (ShouldRetrieveFromComparisons == false), 290 /// this function can be simply reduced to the following question: 291 /// 292 /// - If expression E is used as a function argument, could we say 293 /// that DeclRefExpr nested in E is used as an argument? 294 /// 295 /// According to this rule, we can say that parens, casts and dereferencing 296 /// (dereferencing only applied to function pointers, but this is our case) 297 /// can be skipped. 298 /// 299 /// When we should look into comparisons the question changes to: 300 /// 301 /// - If expression E is used as a condition, could we say that 302 /// DeclRefExpr is being checked? 303 /// 304 /// And even though, these are two different questions, they have quite a lot 305 /// in common. Actually, we can say that whatever expression answers 306 /// positively the first question also fits the second question as well. 307 /// 308 /// In addition, we skip binary operators == and !=, and unary opeartor !. 309 static const DeclRefExpr *find(const Expr *E, 310 bool ShouldRetrieveFromComparisons = false) { 311 return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); 312 } 313 314 const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } 315 316 const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { 317 switch (UO->getOpcode()) { 318 case UO_LNot: 319 // We care about logical not only if we care about comparisons. 320 if (!ShouldRetrieveFromComparisons) 321 return nullptr; 322 [[fallthrough]]; 323 // Function pointer/references can be dereferenced before a call. 324 // That doesn't make it, however, any different from a regular call. 325 // For this reason, dereference operation is a "no-op". 326 case UO_Deref: 327 return Visit(UO->getSubExpr()); 328 default: 329 return nullptr; 330 } 331 } 332 333 const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { 334 if (!ShouldRetrieveFromComparisons) 335 return nullptr; 336 337 switch (BO->getOpcode()) { 338 case BO_EQ: 339 case BO_NE: { 340 const DeclRefExpr *LHS = Visit(BO->getLHS()); 341 return LHS ? LHS : Visit(BO->getRHS()); 342 } 343 default: 344 return nullptr; 345 } 346 } 347 348 const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { 349 return Visit(OVE->getSourceExpr()); 350 } 351 352 const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { 353 if (!ShouldRetrieveFromComparisons) 354 return nullptr; 355 356 // We want to see through some of the boolean builtin functions 357 // that we are likely to see in conditions. 358 switch (CE->getBuiltinCallee()) { 359 case Builtin::BI__builtin_expect: 360 case Builtin::BI__builtin_expect_with_probability: { 361 assert(CE->getNumArgs() >= 2); 362 363 const DeclRefExpr *Candidate = Visit(CE->getArg(0)); 364 return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); 365 } 366 367 case Builtin::BI__builtin_unpredictable: 368 return Visit(CE->getArg(0)); 369 370 default: 371 return nullptr; 372 } 373 } 374 375 const DeclRefExpr *VisitExpr(const Expr *E) { 376 // It is a fallback method that gets called whenever the actual type 377 // of the given expression is not covered. 378 // 379 // We first check if we have anything to skip. And then repeat the whole 380 // procedure for a nested expression instead. 381 const Expr *DeclutteredExpr = E->IgnoreParenCasts(); 382 return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; 383 } 384 385 private: 386 DeclRefFinder(bool ShouldRetrieveFromComparisons) 387 : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} 388 389 bool ShouldRetrieveFromComparisons; 390 }; 391 392 const DeclRefExpr *findDeclRefExpr(const Expr *In, 393 bool ShouldRetrieveFromComparisons = false) { 394 return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); 395 } 396 397 const ParmVarDecl * 398 findReferencedParmVarDecl(const Expr *In, 399 bool ShouldRetrieveFromComparisons = false) { 400 if (const DeclRefExpr *DR = 401 findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { 402 return dyn_cast<ParmVarDecl>(DR->getDecl()); 403 } 404 405 return nullptr; 406 } 407 408 /// Return conditions expression of a statement if it has one. 409 const Expr *getCondition(const Stmt *S) { 410 if (!S) { 411 return nullptr; 412 } 413 414 if (const auto *If = dyn_cast<IfStmt>(S)) { 415 return If->getCond(); 416 } 417 if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { 418 return Ternary->getCond(); 419 } 420 421 return nullptr; 422 } 423 424 /// A small helper class that collects all named identifiers in the given 425 /// expression. It traverses it recursively, so names from deeper levels 426 /// of the AST will end up in the results. 427 /// Results might have duplicate names, if this is a problem, convert to 428 /// string sets afterwards. 429 class NamesCollector : public RecursiveASTVisitor<NamesCollector> { 430 public: 431 static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; 432 using NameCollection = 433 llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; 434 435 static NameCollection collect(const Expr *From) { 436 NamesCollector Impl; 437 Impl.TraverseStmt(const_cast<Expr *>(From)); 438 return Impl.Result; 439 } 440 441 bool VisitDeclRefExpr(const DeclRefExpr *E) { 442 Result.push_back(E->getDecl()->getName()); 443 return true; 444 } 445 446 bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) { 447 llvm::StringRef Name; 448 449 if (E->isImplicitProperty()) { 450 ObjCMethodDecl *PropertyMethodDecl = nullptr; 451 if (E->isMessagingGetter()) { 452 PropertyMethodDecl = E->getImplicitPropertyGetter(); 453 } else { 454 PropertyMethodDecl = E->getImplicitPropertySetter(); 455 } 456 assert(PropertyMethodDecl && 457 "Implicit property must have associated declaration"); 458 Name = PropertyMethodDecl->getSelector().getNameForSlot(0); 459 } else { 460 assert(E->isExplicitProperty()); 461 Name = E->getExplicitProperty()->getName(); 462 } 463 464 Result.push_back(Name); 465 return true; 466 } 467 468 private: 469 NamesCollector() = default; 470 NameCollection Result; 471 }; 472 473 /// Check whether the given expression mentions any of conventional names. 474 bool mentionsAnyOfConventionalNames(const Expr *E) { 475 NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); 476 477 return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { 478 return llvm::any_of( 479 CONVENTIONAL_CONDITIONS, 480 [ConditionName](const llvm::StringLiteral &Conventional) { 481 return ConditionName.contains_insensitive(Conventional); 482 }); 483 }); 484 } 485 486 /// Clarification is a simple pair of a reason why parameter is not called 487 /// on every path and a statement to blame. 488 struct Clarification { 489 NeverCalledReason Reason; 490 const Stmt *Location; 491 }; 492 493 /// A helper class that can produce a clarification based on the given pair 494 /// of basic blocks. 495 class NotCalledClarifier 496 : public ConstStmtVisitor<NotCalledClarifier, 497 std::optional<Clarification>> { 498 public: 499 /// The main entrypoint for the class, the function that tries to find the 500 /// clarification of how to explain which sub-path starts with a CFG edge 501 /// from Conditional to SuccWithoutCall. 502 /// 503 /// This means that this function has one precondition: 504 /// SuccWithoutCall should be a successor block for Conditional. 505 /// 506 /// Because clarification is not needed for non-trivial pairs of blocks 507 /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful 508 /// results only for such cases. For this very reason, the parent basic 509 /// block, Conditional, is named that way, so it is clear what kind of 510 /// block is expected. 511 static std::optional<Clarification> clarify(const CFGBlock *Conditional, 512 const CFGBlock *SuccWithoutCall) { 513 if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { 514 return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); 515 } 516 return std::nullopt; 517 } 518 519 std::optional<Clarification> VisitIfStmt(const IfStmt *If) { 520 return VisitBranchingBlock(If, NeverCalledReason::IfThen); 521 } 522 523 std::optional<Clarification> 524 VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { 525 return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); 526 } 527 528 std::optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { 529 const Stmt *CaseToBlame = SuccInQuestion->getLabel(); 530 if (!CaseToBlame) { 531 // If interesting basic block is not labeled, it means that this 532 // basic block does not represent any of the cases. 533 return Clarification{NeverCalledReason::SwitchSkipped, Switch}; 534 } 535 536 for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; 537 Case = Case->getNextSwitchCase()) { 538 if (Case == CaseToBlame) { 539 return Clarification{NeverCalledReason::Switch, Case}; 540 } 541 } 542 543 llvm_unreachable("Found unexpected switch structure"); 544 } 545 546 std::optional<Clarification> VisitForStmt(const ForStmt *For) { 547 return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); 548 } 549 550 std::optional<Clarification> VisitWhileStmt(const WhileStmt *While) { 551 return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); 552 } 553 554 std::optional<Clarification> 555 VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { 556 assert(Parent->succ_size() == 2 && 557 "Branching block should have exactly two successors"); 558 unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); 559 NeverCalledReason ActualReason = 560 updateForSuccessor(DefaultReason, SuccessorIndex); 561 return Clarification{ActualReason, Terminator}; 562 } 563 564 std::optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { 565 // We don't want to report on short-curcuit logical operations. 566 return std::nullopt; 567 } 568 569 std::optional<Clarification> VisitStmt(const Stmt *Terminator) { 570 // If we got here, we didn't have a visit function for more derived 571 // classes of statement that this terminator actually belongs to. 572 // 573 // This is not a good scenario and should not happen in practice, but 574 // at least we'll warn the user. 575 return Clarification{NeverCalledReason::FallbackReason, Terminator}; 576 } 577 578 static unsigned getSuccessorIndex(const CFGBlock *Parent, 579 const CFGBlock *Child) { 580 CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); 581 assert(It != Parent->succ_end() && 582 "Given blocks should be in parent-child relationship"); 583 return It - Parent->succ_begin(); 584 } 585 586 static NeverCalledReason 587 updateForSuccessor(NeverCalledReason ReasonForTrueBranch, 588 unsigned SuccessorIndex) { 589 assert(SuccessorIndex <= 1); 590 unsigned RawReason = 591 static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; 592 assert(RawReason <= 593 static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); 594 return static_cast<NeverCalledReason>(RawReason); 595 } 596 597 private: 598 NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) 599 : Parent(Parent), SuccInQuestion(SuccInQuestion) {} 600 601 const CFGBlock *Parent, *SuccInQuestion; 602 }; 603 604 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { 605 public: 606 static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 607 bool CheckConventionalParameters) { 608 CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); 609 } 610 611 private: 612 CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 613 bool CheckConventionalParameters) 614 : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), 615 CheckConventionalParameters(CheckConventionalParameters), 616 CurrentState(0) { 617 initDataStructures(); 618 assert((size() == 0 || !States.empty()) && 619 "Data structures are inconsistent"); 620 } 621 622 //===----------------------------------------------------------------------===// 623 // Initializing functions 624 //===----------------------------------------------------------------------===// 625 626 void initDataStructures() { 627 const Decl *AnalyzedDecl = AC.getDecl(); 628 629 if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { 630 findParamsToTrack(Function); 631 } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { 632 findParamsToTrack(Method); 633 } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { 634 findCapturesToTrack(Block); 635 findParamsToTrack(Block); 636 } 637 638 // Have something to track, let's init states for every block from the CFG. 639 if (size() != 0) { 640 States = 641 CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); 642 } 643 } 644 645 void findCapturesToTrack(const BlockDecl *Block) { 646 for (const auto &Capture : Block->captures()) { 647 if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 648 // Parameter DeclContext is its owning function or method. 649 const DeclContext *ParamContext = P->getDeclContext(); 650 if (shouldBeCalledOnce(ParamContext, P)) { 651 TrackedParams.push_back(P); 652 } 653 } 654 } 655 } 656 657 template <class FunctionLikeDecl> 658 void findParamsToTrack(const FunctionLikeDecl *Function) { 659 for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { 660 if (shouldBeCalledOnce(Function, Index)) { 661 TrackedParams.push_back(Function->getParamDecl(Index)); 662 } 663 } 664 } 665 666 //===----------------------------------------------------------------------===// 667 // Main logic 'check' functions 668 //===----------------------------------------------------------------------===// 669 670 void check() { 671 // Nothing to check here: we don't have marked parameters. 672 if (size() == 0 || isPossiblyEmptyImpl()) 673 return; 674 675 assert( 676 llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && 677 "None of the blocks should be 'visited' before the analysis"); 678 679 // For our task, both backward and forward approaches suite well. 680 // However, in order to report better diagnostics, we decided to go with 681 // backward analysis. 682 // 683 // Let's consider the following CFG and how forward and backward analyses 684 // will work for it. 685 // 686 // FORWARD: | BACKWARD: 687 // #1 | #1 688 // +---------+ | +-----------+ 689 // | if | | |MaybeCalled| 690 // +---------+ | +-----------+ 691 // |NotCalled| | | if | 692 // +---------+ | +-----------+ 693 // / \ | / \ 694 // #2 / \ #3 | #2 / \ #3 695 // +----------------+ +---------+ | +----------------+ +---------+ 696 // | foo() | | ... | | |DefinitelyCalled| |NotCalled| 697 // +----------------+ +---------+ | +----------------+ +---------+ 698 // |DefinitelyCalled| |NotCalled| | | foo() | | ... | 699 // +----------------+ +---------+ | +----------------+ +---------+ 700 // \ / | \ / 701 // \ #4 / | \ #4 / 702 // +-----------+ | +---------+ 703 // | ... | | |NotCalled| 704 // +-----------+ | +---------+ 705 // |MaybeCalled| | | ... | 706 // +-----------+ | +---------+ 707 // 708 // The most natural way to report lacking call in the block #3 would be to 709 // message that the false branch of the if statement in the block #1 doesn't 710 // have a call. And while with the forward approach we'll need to find a 711 // least common ancestor or something like that to find the 'if' to blame, 712 // backward analysis gives it to us out of the box. 713 BackwardDataflowWorklist Worklist(FunctionCFG, AC); 714 715 // Let's visit EXIT. 716 const CFGBlock *Exit = &FunctionCFG.getExit(); 717 assignState(Exit, State(size(), ParameterStatus::NotCalled)); 718 Worklist.enqueuePredecessors(Exit); 719 720 while (const CFGBlock *BB = Worklist.dequeue()) { 721 assert(BB && "Worklist should filter out null blocks"); 722 check(BB); 723 assert(CurrentState.isVisited() && 724 "After the check, basic block should be visited"); 725 726 // Traverse successor basic blocks if the status of this block 727 // has changed. 728 if (assignState(BB, CurrentState)) { 729 Worklist.enqueuePredecessors(BB); 730 } 731 } 732 733 // Check that we have all tracked parameters at the last block. 734 // As we are performing a backward version of the analysis, 735 // it should be the ENTRY block. 736 checkEntry(&FunctionCFG.getEntry()); 737 } 738 739 void check(const CFGBlock *BB) { 740 // We start with a state 'inherited' from all the successors. 741 CurrentState = joinSuccessors(BB); 742 assert(CurrentState.isVisited() && 743 "Shouldn't start with a 'not visited' state"); 744 745 // This is the 'exit' situation, broken promises are probably OK 746 // in such scenarios. 747 if (BB->hasNoReturnElement()) { 748 markNoReturn(); 749 // This block still can have calls (even multiple calls) and 750 // for this reason there is no early return here. 751 } 752 753 // We use a backward dataflow propagation and for this reason we 754 // should traverse basic blocks bottom-up. 755 for (const CFGElement &Element : llvm::reverse(*BB)) { 756 if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 757 check(S->getStmt()); 758 } 759 } 760 } 761 void check(const Stmt *S) { Visit(S); } 762 763 void checkEntry(const CFGBlock *Entry) { 764 // We finalize this algorithm with the ENTRY block because 765 // we use a backward version of the analysis. This is where 766 // we can judge that some of the tracked parameters are not called on 767 // every path from ENTRY to EXIT. 768 769 const State &EntryStatus = getState(Entry); 770 llvm::BitVector NotCalledOnEveryPath(size(), false); 771 llvm::BitVector NotUsedOnEveryPath(size(), false); 772 773 // Check if there are no calls of the marked parameter at all 774 for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { 775 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 776 777 switch (IndexedStatus.value().getKind()) { 778 case ParameterStatus::NotCalled: 779 // If there were places where this parameter escapes (aka being used), 780 // we can provide a more useful diagnostic by pointing at the exact 781 // branches where it is not even mentioned. 782 if (!hasEverEscaped(IndexedStatus.index())) { 783 // This parameter is was not used at all, so we should report the 784 // most generic version of the warning. 785 if (isCaptured(Parameter)) { 786 // We want to specify that it was captured by the block. 787 Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), 788 !isExplicitlyMarked(Parameter)); 789 } else { 790 Handler.handleNeverCalled(Parameter, 791 !isExplicitlyMarked(Parameter)); 792 } 793 } else { 794 // Mark it as 'interesting' to figure out which paths don't even 795 // have escapes. 796 NotUsedOnEveryPath[IndexedStatus.index()] = true; 797 } 798 799 break; 800 case ParameterStatus::MaybeCalled: 801 // If we have 'maybe called' at this point, we have an error 802 // that there is at least one path where this parameter 803 // is not called. 804 // 805 // However, reporting the warning with only that information can be 806 // too vague for the users. For this reason, we mark such parameters 807 // as "interesting" for further analysis. 808 NotCalledOnEveryPath[IndexedStatus.index()] = true; 809 break; 810 default: 811 break; 812 } 813 } 814 815 // Early exit if we don't have parameters for extra analysis... 816 if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() && 817 // ... or if we've seen variables with cleanup functions. 818 // We can't reason that we've seen every path in this case, 819 // and thus abandon reporting any warnings that imply that. 820 !FunctionHasCleanupVars) 821 return; 822 823 // We are looking for a pair of blocks A, B so that the following is true: 824 // * A is a predecessor of B 825 // * B is marked as NotCalled 826 // * A has at least one successor marked as either 827 // Escaped or DefinitelyCalled 828 // 829 // In that situation, it is guaranteed that B is the first block of the path 830 // where the user doesn't call or use parameter in question. 831 // 832 // For this reason, branch A -> B can be used for reporting. 833 // 834 // This part of the algorithm is guarded by a condition that the function 835 // does indeed have a violation of contract. For this reason, we can 836 // spend more time to find a good spot to place the warning. 837 // 838 // The following algorithm has the worst case complexity of O(V + E), 839 // where V is the number of basic blocks in FunctionCFG, 840 // E is the number of edges between blocks in FunctionCFG. 841 for (const CFGBlock *BB : FunctionCFG) { 842 if (!BB) 843 continue; 844 845 const State &BlockState = getState(BB); 846 847 for (unsigned Index : llvm::seq(0u, size())) { 848 // We don't want to use 'isLosingCall' here because we want to report 849 // the following situation as well: 850 // 851 // MaybeCalled 852 // | ... | 853 // MaybeCalled NotCalled 854 // 855 // Even though successor is not 'DefinitelyCalled', it is still useful 856 // to report it, it is still a path without a call. 857 if (NotCalledOnEveryPath[Index] && 858 BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { 859 860 findAndReportNotCalledBranches(BB, Index); 861 } else if (NotUsedOnEveryPath[Index] && 862 isLosingEscape(BlockState, BB, Index)) { 863 864 findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); 865 } 866 } 867 } 868 } 869 870 /// Check potential call of a tracked parameter. 871 void checkDirectCall(const CallExpr *Call) { 872 if (auto Index = getIndexOfCallee(Call)) { 873 processCallFor(*Index, Call); 874 } 875 } 876 877 /// Check the call expression for being an indirect call of one of the tracked 878 /// parameters. It is indirect in the sense that this particular call is not 879 /// calling the parameter itself, but rather uses it as the argument. 880 template <class CallLikeExpr> 881 void checkIndirectCall(const CallLikeExpr *CallOrMessage) { 882 // CallExpr::arguments does not interact nicely with llvm::enumerate. 883 llvm::ArrayRef<const Expr *> Arguments = 884 llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 885 886 // Let's check if any of the call arguments is a point of interest. 887 for (const auto &Argument : llvm::enumerate(Arguments)) { 888 if (auto Index = getIndexOfExpression(Argument.value())) { 889 if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { 890 // If the corresponding parameter is marked as 'called_once' we should 891 // consider it as a call. 892 processCallFor(*Index, CallOrMessage); 893 } else { 894 // Otherwise, we mark this parameter as escaped, which can be 895 // interpreted both as called or not called depending on the context. 896 processEscapeFor(*Index); 897 } 898 // Otherwise, let's keep the state as it is. 899 } 900 } 901 } 902 903 /// Process call of the parameter with the given index 904 void processCallFor(unsigned Index, const Expr *Call) { 905 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 906 907 if (CurrentParamStatus.seenAnyCalls()) { 908 909 // At this point, this parameter was called, so this is a second call. 910 const ParmVarDecl *Parameter = getParameter(Index); 911 Handler.handleDoubleCall( 912 Parameter, &CurrentState.getCallFor(Index), Call, 913 !isExplicitlyMarked(Parameter), 914 // We are sure that the second call is definitely 915 // going to happen if the status is 'DefinitelyCalled'. 916 CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); 917 918 // Mark this parameter as already reported on, so we don't repeat 919 // warnings. 920 CurrentParamStatus = ParameterStatus::Reported; 921 922 } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { 923 // If we didn't report anything yet, let's mark this parameter 924 // as called. 925 ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); 926 CurrentParamStatus = Called; 927 } 928 } 929 930 /// Process escape of the parameter with the given index 931 void processEscapeFor(unsigned Index) { 932 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 933 934 // Escape overrides whatever error we think happened. 935 if (CurrentParamStatus.isErrorStatus()) { 936 CurrentParamStatus = ParameterStatus::Escaped; 937 } 938 } 939 940 void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, 941 bool IsEscape = false) { 942 for (const CFGBlock *Succ : Parent->succs()) { 943 if (!Succ) 944 continue; 945 946 if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { 947 assert(Parent->succ_size() >= 2 && 948 "Block should have at least two successors at this point"); 949 if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { 950 const ParmVarDecl *Parameter = getParameter(Index); 951 Handler.handleNeverCalled( 952 Parameter, AC.getDecl(), Clarification->Location, 953 Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter)); 954 } 955 } 956 } 957 } 958 959 //===----------------------------------------------------------------------===// 960 // Predicate functions to check parameters 961 //===----------------------------------------------------------------------===// 962 963 /// Return true if parameter is explicitly marked as 'called_once'. 964 static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { 965 return Parameter->hasAttr<CalledOnceAttr>(); 966 } 967 968 /// Return true if the given name matches conventional pattens. 969 static bool isConventional(llvm::StringRef Name) { 970 return llvm::count(CONVENTIONAL_NAMES, Name) != 0; 971 } 972 973 /// Return true if the given name has conventional suffixes. 974 static bool hasConventionalSuffix(llvm::StringRef Name) { 975 return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { 976 return Name.endswith(Suffix); 977 }); 978 } 979 980 /// Return true if the given type can be used for conventional parameters. 981 static bool isConventional(QualType Ty) { 982 if (!Ty->isBlockPointerType()) { 983 return false; 984 } 985 986 QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType(); 987 // Completion handlers should have a block type with void return type. 988 return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType(); 989 } 990 991 /// Return true if the only parameter of the function is conventional. 992 static bool isOnlyParameterConventional(const FunctionDecl *Function) { 993 IdentifierInfo *II = Function->getIdentifier(); 994 return Function->getNumParams() == 1 && II && 995 hasConventionalSuffix(II->getName()); 996 } 997 998 /// Return true/false if 'swift_async' attribute states that the given 999 /// parameter is conventionally called once. 1000 /// Return std::nullopt if the given declaration doesn't have 'swift_async' 1001 /// attribute. 1002 static std::optional<bool> isConventionalSwiftAsync(const Decl *D, 1003 unsigned ParamIndex) { 1004 if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { 1005 if (A->getKind() == SwiftAsyncAttr::None) { 1006 return false; 1007 } 1008 1009 return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; 1010 } 1011 return std::nullopt; 1012 } 1013 1014 /// Return true if the specified selector represents init method. 1015 static bool isInitMethod(Selector MethodSelector) { 1016 return MethodSelector.getMethodFamily() == OMF_init; 1017 } 1018 1019 /// Return true if the specified selector piece matches conventions. 1020 static bool isConventionalSelectorPiece(Selector MethodSelector, 1021 unsigned PieceIndex, 1022 QualType PieceType) { 1023 if (!isConventional(PieceType) || isInitMethod(MethodSelector)) { 1024 return false; 1025 } 1026 1027 if (MethodSelector.getNumArgs() == 1) { 1028 assert(PieceIndex == 0); 1029 return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); 1030 } 1031 1032 llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex); 1033 return isConventional(PieceName) || hasConventionalSuffix(PieceName); 1034 } 1035 1036 bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { 1037 return isExplicitlyMarked(Parameter) || 1038 (CheckConventionalParameters && 1039 (isConventional(Parameter->getName()) || 1040 hasConventionalSuffix(Parameter->getName())) && 1041 isConventional(Parameter->getType())); 1042 } 1043 1044 bool shouldBeCalledOnce(const DeclContext *ParamContext, 1045 const ParmVarDecl *Param) { 1046 unsigned ParamIndex = Param->getFunctionScopeIndex(); 1047 if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { 1048 return shouldBeCalledOnce(Function, ParamIndex); 1049 } 1050 if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { 1051 return shouldBeCalledOnce(Method, ParamIndex); 1052 } 1053 return shouldBeCalledOnce(Param); 1054 } 1055 1056 bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { 1057 return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); 1058 } 1059 1060 bool shouldBeCalledOnce(const FunctionDecl *Function, 1061 unsigned ParamIndex) const { 1062 if (ParamIndex >= Function->getNumParams()) { 1063 return false; 1064 } 1065 // 'swift_async' goes first and overrides anything else. 1066 if (auto ConventionalAsync = 1067 isConventionalSwiftAsync(Function, ParamIndex)) { 1068 return *ConventionalAsync; 1069 } 1070 1071 return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || 1072 (CheckConventionalParameters && 1073 isOnlyParameterConventional(Function)); 1074 } 1075 1076 bool shouldBeCalledOnce(const ObjCMethodDecl *Method, 1077 unsigned ParamIndex) const { 1078 Selector MethodSelector = Method->getSelector(); 1079 if (ParamIndex >= MethodSelector.getNumArgs()) { 1080 return false; 1081 } 1082 1083 // 'swift_async' goes first and overrides anything else. 1084 if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { 1085 return *ConventionalAsync; 1086 } 1087 1088 const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); 1089 return shouldBeCalledOnce(Parameter) || 1090 (CheckConventionalParameters && 1091 isConventionalSelectorPiece(MethodSelector, ParamIndex, 1092 Parameter->getType())); 1093 } 1094 1095 bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { 1096 const FunctionDecl *Function = Call->getDirectCallee(); 1097 return Function && shouldBeCalledOnce(Function, ParamIndex); 1098 } 1099 1100 bool shouldBeCalledOnce(const ObjCMessageExpr *Message, 1101 unsigned ParamIndex) const { 1102 const ObjCMethodDecl *Method = Message->getMethodDecl(); 1103 return Method && ParamIndex < Method->param_size() && 1104 shouldBeCalledOnce(Method, ParamIndex); 1105 } 1106 1107 //===----------------------------------------------------------------------===// 1108 // Utility methods 1109 //===----------------------------------------------------------------------===// 1110 1111 bool isCaptured(const ParmVarDecl *Parameter) const { 1112 if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { 1113 return Block->capturesVariable(Parameter); 1114 } 1115 return false; 1116 } 1117 1118 // Return a call site where the block is called exactly once or null otherwise 1119 const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const { 1120 ParentMap &PM = AC.getParentMap(); 1121 1122 // We don't want to track the block through assignments and so on, instead 1123 // we simply see how the block used and if it's used directly in a call, 1124 // we decide based on call to what it is. 1125 // 1126 // In order to do this, we go up the parents of the block looking for 1127 // a call or a message expressions. These might not be immediate parents 1128 // of the actual block expression due to casts and parens, so we skip them. 1129 for (const Stmt *Prev = Block, *Current = PM.getParent(Block); 1130 Current != nullptr; Prev = Current, Current = PM.getParent(Current)) { 1131 // Skip no-op (for our case) operations. 1132 if (isa<CastExpr>(Current) || isa<ParenExpr>(Current)) 1133 continue; 1134 1135 // At this point, Prev represents our block as an immediate child of the 1136 // call. 1137 if (const auto *Call = dyn_cast<CallExpr>(Current)) { 1138 // It might be the call of the Block itself... 1139 if (Call->getCallee() == Prev) 1140 return Call; 1141 1142 // ...or it can be an indirect call of the block. 1143 return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr; 1144 } 1145 if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) { 1146 return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message 1147 : nullptr; 1148 } 1149 1150 break; 1151 } 1152 1153 return nullptr; 1154 } 1155 1156 template <class CallLikeExpr> 1157 bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage, 1158 const Stmt *BlockArgument) const { 1159 // CallExpr::arguments does not interact nicely with llvm::enumerate. 1160 llvm::ArrayRef<const Expr *> Arguments = 1161 llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 1162 1163 for (const auto &Argument : llvm::enumerate(Arguments)) { 1164 if (Argument.value() == BlockArgument) { 1165 return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index()); 1166 } 1167 } 1168 1169 return false; 1170 } 1171 1172 bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call, 1173 unsigned ParamIndex) const { 1174 const FunctionDecl *Function = Call->getDirectCallee(); 1175 return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) || 1176 shouldBeCalledOnce(Call, ParamIndex); 1177 } 1178 1179 bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message, 1180 unsigned ParamIndex) const { 1181 // At the moment, we don't have any Obj-C methods we want to specifically 1182 // check in here. 1183 return shouldBeCalledOnce(Message, ParamIndex); 1184 } 1185 1186 static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function, 1187 unsigned ParamIndex) { 1188 // There is a list of important API functions that while not following 1189 // conventions nor being directly annotated, still guarantee that the 1190 // callback parameter will be called exactly once. 1191 // 1192 // Here we check if this is the case. 1193 return Function && 1194 llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS, 1195 [Function, ParamIndex]( 1196 const KnownCalledOnceParameter &Reference) { 1197 return Reference.FunctionName == 1198 Function->getName() && 1199 Reference.ParamIndex == ParamIndex; 1200 }); 1201 } 1202 1203 /// Return true if the analyzed function is actually a default implementation 1204 /// of the method that has to be overriden. 1205 /// 1206 /// These functions can have tracked parameters, but wouldn't call them 1207 /// because they are not designed to perform any meaningful actions. 1208 /// 1209 /// There are a couple of flavors of such default implementations: 1210 /// 1. Empty methods or methods with a single return statement 1211 /// 2. Methods that have one block with a call to no return function 1212 /// 3. Methods with only assertion-like operations 1213 bool isPossiblyEmptyImpl() const { 1214 if (!isa<ObjCMethodDecl>(AC.getDecl())) { 1215 // We care only about functions that are not supposed to be called. 1216 // Only methods can be overriden. 1217 return false; 1218 } 1219 1220 // Case #1 (without return statements) 1221 if (FunctionCFG.size() == 2) { 1222 // Method has only two blocks: ENTRY and EXIT. 1223 // This is equivalent to empty function. 1224 return true; 1225 } 1226 1227 // Case #2 1228 if (FunctionCFG.size() == 3) { 1229 const CFGBlock &Entry = FunctionCFG.getEntry(); 1230 if (Entry.succ_empty()) { 1231 return false; 1232 } 1233 1234 const CFGBlock *OnlyBlock = *Entry.succ_begin(); 1235 // Method has only one block, let's see if it has a no-return 1236 // element. 1237 if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { 1238 return true; 1239 } 1240 // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. 1241 } 1242 1243 // Cases #1 (return statements) and #3. 1244 // 1245 // It is hard to detect that something is an assertion or came 1246 // from assertion. Here we use a simple heuristic: 1247 // 1248 // - If it came from a macro, it can be an assertion. 1249 // 1250 // Additionally, we can't assume a number of basic blocks or the CFG's 1251 // structure because assertions might include loops and conditions. 1252 return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { 1253 if (!BB) { 1254 // Unreachable blocks are totally fine. 1255 return true; 1256 } 1257 1258 // Return statements can have sub-expressions that are represented as 1259 // separate statements of a basic block. We should allow this. 1260 // This parent map will be initialized with a parent tree for all 1261 // subexpressions of the block's return statement (if it has one). 1262 std::unique_ptr<ParentMap> ReturnChildren; 1263 1264 return llvm::all_of( 1265 llvm::reverse(*BB), // we should start with return statements, if we 1266 // have any, i.e. from the bottom of the block 1267 [&ReturnChildren](const CFGElement &Element) { 1268 if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 1269 const Stmt *SuspiciousStmt = S->getStmt(); 1270 1271 if (isa<ReturnStmt>(SuspiciousStmt)) { 1272 // Let's initialize this structure to test whether 1273 // some further statement is a part of this return. 1274 ReturnChildren = std::make_unique<ParentMap>( 1275 const_cast<Stmt *>(SuspiciousStmt)); 1276 // Return statements are allowed as part of #1. 1277 return true; 1278 } 1279 1280 return SuspiciousStmt->getBeginLoc().isMacroID() || 1281 (ReturnChildren && 1282 ReturnChildren->hasParent(SuspiciousStmt)); 1283 } 1284 return true; 1285 }); 1286 }); 1287 } 1288 1289 /// Check if parameter with the given index has ever escaped. 1290 bool hasEverEscaped(unsigned Index) const { 1291 return llvm::any_of(States, [Index](const State &StateForOneBB) { 1292 return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; 1293 }); 1294 } 1295 1296 /// Return status stored for the given basic block. 1297 /// \{ 1298 State &getState(const CFGBlock *BB) { 1299 assert(BB); 1300 return States[BB->getBlockID()]; 1301 } 1302 const State &getState(const CFGBlock *BB) const { 1303 assert(BB); 1304 return States[BB->getBlockID()]; 1305 } 1306 /// \} 1307 1308 /// Assign status to the given basic block. 1309 /// 1310 /// Returns true when the stored status changed. 1311 bool assignState(const CFGBlock *BB, const State &ToAssign) { 1312 State &Current = getState(BB); 1313 if (Current == ToAssign) { 1314 return false; 1315 } 1316 1317 Current = ToAssign; 1318 return true; 1319 } 1320 1321 /// Join all incoming statuses for the given basic block. 1322 State joinSuccessors(const CFGBlock *BB) const { 1323 auto Succs = 1324 llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { 1325 return Succ && this->getState(Succ).isVisited(); 1326 }); 1327 // We came to this block from somewhere after all. 1328 assert(!Succs.empty() && 1329 "Basic block should have at least one visited successor"); 1330 1331 State Result = getState(*Succs.begin()); 1332 1333 for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { 1334 Result.join(getState(Succ)); 1335 } 1336 1337 if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { 1338 handleConditional(BB, Condition, Result); 1339 } 1340 1341 return Result; 1342 } 1343 1344 void handleConditional(const CFGBlock *BB, const Expr *Condition, 1345 State &ToAlter) const { 1346 handleParameterCheck(BB, Condition, ToAlter); 1347 if (SuppressOnConventionalErrorPaths) { 1348 handleConventionalCheck(BB, Condition, ToAlter); 1349 } 1350 } 1351 1352 void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, 1353 State &ToAlter) const { 1354 // In this function, we try to deal with the following pattern: 1355 // 1356 // if (parameter) 1357 // parameter(...); 1358 // 1359 // It's not good to show a warning here because clearly 'parameter' 1360 // couldn't and shouldn't be called on the 'else' path. 1361 // 1362 // Let's check if this if statement has a check involving one of 1363 // the tracked parameters. 1364 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( 1365 Condition, 1366 /* ShouldRetrieveFromComparisons = */ true)) { 1367 if (const auto Index = getIndex(*Parameter)) { 1368 ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); 1369 1370 // We don't want to deep dive into semantics of the check and 1371 // figure out if that check was for null or something else. 1372 // We simply trust the user that they know what they are doing. 1373 // 1374 // For this reason, in the following loop we look for the 1375 // best-looking option. 1376 for (const CFGBlock *Succ : BB->succs()) { 1377 if (!Succ) 1378 continue; 1379 1380 const ParameterStatus &StatusInSucc = 1381 getState(Succ).getStatusFor(*Index); 1382 1383 if (StatusInSucc.isErrorStatus()) { 1384 continue; 1385 } 1386 1387 // Let's use this status instead. 1388 CurrentStatus = StatusInSucc; 1389 1390 if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { 1391 // This is the best option to have and we already found it. 1392 break; 1393 } 1394 1395 // If we found 'Escaped' first, we still might find 'DefinitelyCalled' 1396 // on the other branch. And we prefer the latter. 1397 } 1398 } 1399 } 1400 } 1401 1402 void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, 1403 State &ToAlter) const { 1404 // Even when the analysis is technically correct, it is a widespread pattern 1405 // not to call completion handlers in some scenarios. These usually have 1406 // typical conditional names, such as 'error' or 'cancel'. 1407 if (!mentionsAnyOfConventionalNames(Condition)) { 1408 return; 1409 } 1410 1411 for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { 1412 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 1413 // Conventions do not apply to explicitly marked parameters. 1414 if (isExplicitlyMarked(Parameter)) { 1415 continue; 1416 } 1417 1418 ParameterStatus &CurrentStatus = IndexedStatus.value(); 1419 // If we did find that on one of the branches the user uses the callback 1420 // and doesn't on the other path, we believe that they know what they are 1421 // doing and trust them. 1422 // 1423 // There are two possible scenarios for that: 1424 // 1. Current status is 'MaybeCalled' and one of the branches is 1425 // 'DefinitelyCalled' 1426 // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' 1427 if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || 1428 isLosingEscape(ToAlter, BB, IndexedStatus.index())) { 1429 CurrentStatus = ParameterStatus::Escaped; 1430 } 1431 } 1432 } 1433 1434 bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1435 unsigned ParameterIndex) const { 1436 // Let's check if the block represents DefinitelyCalled -> MaybeCalled 1437 // transition. 1438 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1439 ParameterStatus::MaybeCalled, 1440 ParameterStatus::DefinitelyCalled); 1441 } 1442 1443 bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1444 unsigned ParameterIndex) const { 1445 // Let's check if the block represents Escaped -> NotCalled transition. 1446 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1447 ParameterStatus::NotCalled, ParameterStatus::Escaped); 1448 } 1449 1450 bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1451 unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, 1452 ParameterStatus::Kind BeforeJoin) const { 1453 assert(!ParameterStatus::isErrorStatus(BeforeJoin) && 1454 ParameterStatus::isErrorStatus(AfterJoin) && 1455 "It's not a losing join if statuses do not represent " 1456 "correct-to-error transition"); 1457 1458 const ParameterStatus &CurrentStatus = 1459 StateAfterJoin.getStatusFor(ParameterIndex); 1460 1461 return CurrentStatus.getKind() == AfterJoin && 1462 anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); 1463 } 1464 1465 /// Return true if any of the successors of the given basic block has 1466 /// a specified status for the given parameter. 1467 bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, 1468 ParameterStatus::Kind ToFind) const { 1469 return llvm::any_of( 1470 Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { 1471 return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; 1472 }); 1473 } 1474 1475 /// Check given expression that was discovered to escape. 1476 void checkEscapee(const Expr *E) { 1477 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1478 checkEscapee(*Parameter); 1479 } 1480 } 1481 1482 /// Check given parameter that was discovered to escape. 1483 void checkEscapee(const ParmVarDecl &Parameter) { 1484 if (auto Index = getIndex(Parameter)) { 1485 processEscapeFor(*Index); 1486 } 1487 } 1488 1489 /// Mark all parameters in the current state as 'no-return'. 1490 void markNoReturn() { 1491 for (ParameterStatus &PS : CurrentState) { 1492 PS = ParameterStatus::NoReturn; 1493 } 1494 } 1495 1496 /// Check if the given assignment represents suppression and act on it. 1497 void checkSuppression(const BinaryOperator *Assignment) { 1498 // Suppression has the following form: 1499 // parameter = 0; 1500 // 0 can be of any form (NULL, nil, etc.) 1501 if (auto Index = getIndexOfExpression(Assignment->getLHS())) { 1502 1503 // We don't care what is written in the RHS, it could be whatever 1504 // we can interpret as 0. 1505 if (auto Constant = 1506 Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( 1507 AC.getASTContext())) { 1508 1509 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1510 1511 if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { 1512 // Even though this suppression mechanism is introduced to tackle 1513 // false positives for multiple calls, the fact that the user has 1514 // to use suppression can also tell us that we couldn't figure out 1515 // how different paths cancel each other out. And if that is true, 1516 // we will most certainly have false positives about parameters not 1517 // being called on certain paths. 1518 // 1519 // For this reason, we abandon tracking this parameter altogether. 1520 CurrentParamStatus = ParameterStatus::Reported; 1521 } 1522 } 1523 } 1524 } 1525 1526 public: 1527 //===----------------------------------------------------------------------===// 1528 // Tree traversal methods 1529 //===----------------------------------------------------------------------===// 1530 1531 void VisitCallExpr(const CallExpr *Call) { 1532 // This call might be a direct call, i.e. a parameter call... 1533 checkDirectCall(Call); 1534 // ... or an indirect call, i.e. when parameter is an argument. 1535 checkIndirectCall(Call); 1536 } 1537 1538 void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { 1539 // The most common situation that we are defending against here is 1540 // copying a tracked parameter. 1541 if (const Expr *Receiver = Message->getInstanceReceiver()) { 1542 checkEscapee(Receiver); 1543 } 1544 // Message expressions unlike calls, could not be direct. 1545 checkIndirectCall(Message); 1546 } 1547 1548 void VisitBlockExpr(const BlockExpr *Block) { 1549 // Block expressions are tricky. It is a very common practice to capture 1550 // completion handlers by blocks and use them there. 1551 // For this reason, it is important to analyze blocks and report warnings 1552 // for completion handler misuse in blocks. 1553 // 1554 // However, it can be quite difficult to track how the block itself is being 1555 // used. The full precise anlysis of that will be similar to alias analysis 1556 // for completion handlers and can be too heavyweight for a compile-time 1557 // diagnostic. Instead, we judge about the immediate use of the block. 1558 // 1559 // Here, we try to find a call expression where we know due to conventions, 1560 // annotations, or other reasons that the block is called once and only 1561 // once. 1562 const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block); 1563 1564 // We need to report this information to the handler because in the 1565 // situation when we know that the block is called exactly once, we can be 1566 // stricter in terms of reported diagnostics. 1567 if (CalledOnceCallSite) { 1568 Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl()); 1569 } else { 1570 Handler.handleBlockWithNoGuarantees(Block->getBlockDecl()); 1571 } 1572 1573 for (const auto &Capture : Block->getBlockDecl()->captures()) { 1574 if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 1575 if (auto Index = getIndex(*Param)) { 1576 if (CalledOnceCallSite) { 1577 // The call site of a block can be considered a call site of the 1578 // captured parameter we track. 1579 processCallFor(*Index, CalledOnceCallSite); 1580 } else { 1581 // We still should consider this block as an escape for parameter, 1582 // if we don't know about its call site or the number of time it 1583 // can be invoked. 1584 processEscapeFor(*Index); 1585 } 1586 } 1587 } 1588 } 1589 } 1590 1591 void VisitBinaryOperator(const BinaryOperator *Op) { 1592 if (Op->getOpcode() == clang::BO_Assign) { 1593 // Let's check if one of the tracked parameters is assigned into 1594 // something, and if it is we don't want to track extra variables, so we 1595 // consider it as an escapee. 1596 checkEscapee(Op->getRHS()); 1597 1598 // Let's check whether this assignment is a suppression. 1599 checkSuppression(Op); 1600 } 1601 } 1602 1603 void VisitDeclStmt(const DeclStmt *DS) { 1604 // Variable initialization is not assignment and should be handled 1605 // separately. 1606 // 1607 // Multiple declarations can be a part of declaration statement. 1608 for (const auto *Declaration : DS->getDeclGroup()) { 1609 if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { 1610 if (Var->getInit()) { 1611 checkEscapee(Var->getInit()); 1612 } 1613 1614 if (Var->hasAttr<CleanupAttr>()) { 1615 FunctionHasCleanupVars = true; 1616 } 1617 } 1618 } 1619 } 1620 1621 void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { 1622 // We consider '(void)parameter' as a manual no-op escape. 1623 // It should be used to explicitly tell the analysis that this parameter 1624 // is intentionally not called on this path. 1625 if (Cast->getType().getCanonicalType()->isVoidType()) { 1626 checkEscapee(Cast->getSubExpr()); 1627 } 1628 } 1629 1630 void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { 1631 // It is OK not to call marked parameters on exceptional paths. 1632 markNoReturn(); 1633 } 1634 1635 private: 1636 unsigned size() const { return TrackedParams.size(); } 1637 1638 std::optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { 1639 return getIndexOfExpression(Call->getCallee()); 1640 } 1641 1642 std::optional<unsigned> getIndexOfExpression(const Expr *E) const { 1643 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1644 return getIndex(*Parameter); 1645 } 1646 1647 return std::nullopt; 1648 } 1649 1650 std::optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { 1651 // Expected number of parameters that we actually track is 1. 1652 // 1653 // Also, the maximum number of declared parameters could not be on a scale 1654 // of hundreds of thousands. 1655 // 1656 // In this setting, linear search seems reasonable and even performs better 1657 // than bisection. 1658 ParamSizedVector<const ParmVarDecl *>::const_iterator It = 1659 llvm::find(TrackedParams, &Parameter); 1660 1661 if (It != TrackedParams.end()) { 1662 return It - TrackedParams.begin(); 1663 } 1664 1665 return std::nullopt; 1666 } 1667 1668 const ParmVarDecl *getParameter(unsigned Index) const { 1669 assert(Index < TrackedParams.size()); 1670 return TrackedParams[Index]; 1671 } 1672 1673 const CFG &FunctionCFG; 1674 AnalysisDeclContext &AC; 1675 CalledOnceCheckHandler &Handler; 1676 bool CheckConventionalParameters; 1677 // As of now, we turn this behavior off. So, we still are going to report 1678 // missing calls on paths that look like it was intentional. 1679 // Technically such reports are true positives, but they can make some users 1680 // grumpy because of the sheer number of warnings. 1681 // It can be turned back on if we decide that we want to have the other way 1682 // around. 1683 bool SuppressOnConventionalErrorPaths = false; 1684 1685 // The user can annotate variable declarations with cleanup functions, which 1686 // essentially imposes a custom destructor logic on that variable. 1687 // It is possible to use it, however, to call tracked parameters on all exits 1688 // from the function. For this reason, we track the fact that the function 1689 // actually has these. 1690 bool FunctionHasCleanupVars = false; 1691 1692 State CurrentState; 1693 ParamSizedVector<const ParmVarDecl *> TrackedParams; 1694 CFGSizedVector<State> States; 1695 }; 1696 1697 } // end anonymous namespace 1698 1699 namespace clang { 1700 void checkCalledOnceParameters(AnalysisDeclContext &AC, 1701 CalledOnceCheckHandler &Handler, 1702 bool CheckConventionalParameters) { 1703 CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); 1704 } 1705 } // end namespace clang 1706