1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/CalledOnceCheck.h" 10 #include "clang/AST/ASTContext.h" 11 #include "clang/AST/Attr.h" 12 #include "clang/AST/Decl.h" 13 #include "clang/AST/DeclBase.h" 14 #include "clang/AST/DynamicRecursiveASTVisitor.h" 15 #include "clang/AST/Expr.h" 16 #include "clang/AST/ExprObjC.h" 17 #include "clang/AST/OperationKinds.h" 18 #include "clang/AST/ParentMap.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtObjC.h" 21 #include "clang/AST/StmtVisitor.h" 22 #include "clang/AST/Type.h" 23 #include "clang/Analysis/AnalysisDeclContext.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" 26 #include "clang/Basic/Builtins.h" 27 #include "clang/Basic/IdentifierTable.h" 28 #include "clang/Basic/LLVM.h" 29 #include "llvm/ADT/BitVector.h" 30 #include "llvm/ADT/BitmaskEnum.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/Sequence.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include <memory> 38 #include <optional> 39 40 using namespace clang; 41 42 namespace { 43 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; 44 template <class T> 45 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; 46 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; 47 template <class T> 48 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; 49 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { 50 "completionHandler", "completion", "withCompletionHandler", 51 "withCompletion", "completionBlock", "withCompletionBlock", 52 "replyTo", "reply", "withReplyTo"}; 53 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { 54 "WithCompletionHandler", "WithCompletion", "WithCompletionBlock", 55 "WithReplyTo", "WithReply"}; 56 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { 57 "error", "cancel", "shouldCall", "done", "OK", "success"}; 58 59 struct KnownCalledOnceParameter { 60 llvm::StringLiteral FunctionName; 61 unsigned ParamIndex; 62 }; 63 constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = { 64 {llvm::StringLiteral{"dispatch_async"}, 1}, 65 {llvm::StringLiteral{"dispatch_async_and_wait"}, 1}, 66 {llvm::StringLiteral{"dispatch_after"}, 2}, 67 {llvm::StringLiteral{"dispatch_sync"}, 1}, 68 {llvm::StringLiteral{"dispatch_once"}, 1}, 69 {llvm::StringLiteral{"dispatch_barrier_async"}, 1}, 70 {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1}, 71 {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}}; 72 73 class ParameterStatus { 74 public: 75 // Status kind is basically the main part of parameter's status. 76 // The kind represents our knowledge (so far) about a tracked parameter 77 // in the context of this analysis. 78 // 79 // Since we want to report on missing and extraneous calls, we need to 80 // track the fact whether paramater was called or not. This automatically 81 // decides two kinds: `NotCalled` and `Called`. 82 // 83 // One of the erroneous situations is the case when parameter is called only 84 // on some of the paths. We could've considered it `NotCalled`, but we want 85 // to report double call warnings even if these two calls are not guaranteed 86 // to happen in every execution. We also don't want to have it as `Called` 87 // because not calling tracked parameter on all of the paths is an error 88 // on its own. For these reasons, we need to have a separate kind, 89 // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid 90 // confusion. 91 // 92 // Two violations of calling parameter more than once and not calling it on 93 // every path are not, however, mutually exclusive. In situations where both 94 // violations take place, we prefer to report ONLY double call. It's always 95 // harder to pinpoint a bug that has arisen when a user neglects to take the 96 // right action (and therefore, no action is taken), than when a user takes 97 // the wrong action. And, in order to remember that we already reported 98 // a double call, we need another kind: `Reported`. 99 // 100 // Our analysis is intra-procedural and, while in the perfect world, 101 // developers only use tracked parameters to call them, in the real world, 102 // the picture might be different. Parameters can be stored in global 103 // variables or leaked into other functions that we know nothing about. 104 // We try to be lenient and trust users. Another kind `Escaped` reflects 105 // such situations. We don't know if it gets called there or not, but we 106 // should always think of `Escaped` as the best possible option. 107 // 108 // Some of the paths in the analyzed functions might end with a call 109 // to noreturn functions. Such paths are not required to have parameter 110 // calls and we want to track that. For the purposes of better diagnostics, 111 // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. 112 // 113 // Additionally, we have `NotVisited` kind that tells us nothing about 114 // a tracked parameter, but is used for tracking analyzed (aka visited) 115 // basic blocks. 116 // 117 // If we consider `|` to be a JOIN operation of two kinds coming from 118 // two different paths, the following properties must hold: 119 // 120 // 1. for any Kind K: K | K == K 121 // Joining two identical kinds should result in the same kind. 122 // 123 // 2. for any Kind K: Reported | K == Reported 124 // Doesn't matter on which path it was reported, it still is. 125 // 126 // 3. for any Kind K: NoReturn | K == K 127 // We can totally ignore noreturn paths during merges. 128 // 129 // 4. DefinitelyCalled | NotCalled == MaybeCalled 130 // Called on one path, not called on another - that's simply 131 // a definition for MaybeCalled. 132 // 133 // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: 134 // Escaped | K == K 135 // Escaped mirrors other statuses after joins. 136 // Every situation, when we join any of the listed kinds K, 137 // is a violation. For this reason, in order to assume the 138 // best outcome for this escape, we consider it to be the 139 // same as the other path. 140 // 141 // 6. for any Kind K in [DefinitelyCalled, NotCalled]: 142 // MaybeCalled | K == MaybeCalled 143 // MaybeCalled should basically stay after almost every join. 144 enum Kind { 145 // No-return paths should be absolutely transparent for the analysis. 146 // 0x0 is the identity element for selected join operation (binary or). 147 NoReturn = 0x0, /* 0000 */ 148 // Escaped marks situations when marked parameter escaped into 149 // another function (so we can assume that it was possibly called there). 150 Escaped = 0x1, /* 0001 */ 151 // Parameter was definitely called once at this point. 152 DefinitelyCalled = 0x3, /* 0011 */ 153 // Kinds less or equal to NON_ERROR_STATUS are not considered errors. 154 NON_ERROR_STATUS = DefinitelyCalled, 155 // Parameter was not yet called. 156 NotCalled = 0x5, /* 0101 */ 157 // Parameter was not called at least on one path leading to this point, 158 // while there is also at least one path that it gets called. 159 MaybeCalled = 0x7, /* 0111 */ 160 // Parameter was not yet analyzed. 161 NotVisited = 0x8, /* 1000 */ 162 // We already reported a violation and stopped tracking calls for this 163 // parameter. 164 Reported = 0xF, /* 1111 */ 165 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) 166 }; 167 168 constexpr ParameterStatus() = default; 169 /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { 170 assert(!seenAnyCalls(K) && "Can't initialize status without a call"); 171 } 172 ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { 173 assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); 174 } 175 176 const Expr &getCall() const { 177 assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); 178 return *Call; 179 } 180 static bool seenAnyCalls(Kind K) { 181 return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; 182 } 183 bool seenAnyCalls() const { return seenAnyCalls(getKind()); } 184 185 static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } 186 bool isErrorStatus() const { return isErrorStatus(getKind()); } 187 188 Kind getKind() const { return StatusKind; } 189 190 void join(const ParameterStatus &Other) { 191 // If we have a pointer already, let's keep it. 192 // For the purposes of the analysis, it doesn't really matter 193 // which call we report. 194 // 195 // If we don't have a pointer, let's take whatever gets joined. 196 if (!Call) { 197 Call = Other.Call; 198 } 199 // Join kinds. 200 StatusKind |= Other.getKind(); 201 } 202 203 bool operator==(const ParameterStatus &Other) const { 204 // We compare only kinds, pointers on their own is only additional 205 // information. 206 return getKind() == Other.getKind(); 207 } 208 209 private: 210 // It would've been a perfect place to use llvm::PointerIntPair, but 211 // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. 212 Kind StatusKind = NotVisited; 213 const Expr *Call = nullptr; 214 }; 215 216 /// State aggregates statuses of all tracked parameters. 217 class State { 218 public: 219 State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) 220 : ParamData(Size, K) {} 221 222 /// Return status of a parameter with the given index. 223 /// \{ 224 ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } 225 const ParameterStatus &getStatusFor(unsigned Index) const { 226 return ParamData[Index]; 227 } 228 /// \} 229 230 /// Return true if parameter with the given index can be called. 231 bool seenAnyCalls(unsigned Index) const { 232 return getStatusFor(Index).seenAnyCalls(); 233 } 234 /// Return a reference that we consider a call. 235 /// 236 /// Should only be used for parameters that can be called. 237 const Expr &getCallFor(unsigned Index) const { 238 return getStatusFor(Index).getCall(); 239 } 240 /// Return status kind of parameter with the given index. 241 ParameterStatus::Kind getKindFor(unsigned Index) const { 242 return getStatusFor(Index).getKind(); 243 } 244 245 bool isVisited() const { 246 return llvm::all_of(ParamData, [](const ParameterStatus &S) { 247 return S.getKind() != ParameterStatus::NotVisited; 248 }); 249 } 250 251 // Join other state into the current state. 252 void join(const State &Other) { 253 assert(ParamData.size() == Other.ParamData.size() && 254 "Couldn't join statuses with different sizes"); 255 for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { 256 std::get<0>(Pair).join(std::get<1>(Pair)); 257 } 258 } 259 260 using iterator = ParamSizedVector<ParameterStatus>::iterator; 261 using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; 262 263 iterator begin() { return ParamData.begin(); } 264 iterator end() { return ParamData.end(); } 265 266 const_iterator begin() const { return ParamData.begin(); } 267 const_iterator end() const { return ParamData.end(); } 268 269 bool operator==(const State &Other) const { 270 return ParamData == Other.ParamData; 271 } 272 273 private: 274 ParamSizedVector<ParameterStatus> ParamData; 275 }; 276 277 /// A simple class that finds DeclRefExpr in the given expression. 278 /// 279 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever 280 /// expressions on our way. Only certain expressions considered "no-op" 281 /// for our task are indeed skipped. 282 class DeclRefFinder 283 : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { 284 public: 285 /// Find a DeclRefExpr in the given expression. 286 /// 287 /// In its most basic form (ShouldRetrieveFromComparisons == false), 288 /// this function can be simply reduced to the following question: 289 /// 290 /// - If expression E is used as a function argument, could we say 291 /// that DeclRefExpr nested in E is used as an argument? 292 /// 293 /// According to this rule, we can say that parens, casts and dereferencing 294 /// (dereferencing only applied to function pointers, but this is our case) 295 /// can be skipped. 296 /// 297 /// When we should look into comparisons the question changes to: 298 /// 299 /// - If expression E is used as a condition, could we say that 300 /// DeclRefExpr is being checked? 301 /// 302 /// And even though, these are two different questions, they have quite a lot 303 /// in common. Actually, we can say that whatever expression answers 304 /// positively the first question also fits the second question as well. 305 /// 306 /// In addition, we skip binary operators == and !=, and unary opeartor !. 307 static const DeclRefExpr *find(const Expr *E, 308 bool ShouldRetrieveFromComparisons = false) { 309 return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); 310 } 311 312 const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } 313 314 const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { 315 switch (UO->getOpcode()) { 316 case UO_LNot: 317 // We care about logical not only if we care about comparisons. 318 if (!ShouldRetrieveFromComparisons) 319 return nullptr; 320 [[fallthrough]]; 321 // Function pointer/references can be dereferenced before a call. 322 // That doesn't make it, however, any different from a regular call. 323 // For this reason, dereference operation is a "no-op". 324 case UO_Deref: 325 return Visit(UO->getSubExpr()); 326 default: 327 return nullptr; 328 } 329 } 330 331 const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { 332 if (!ShouldRetrieveFromComparisons) 333 return nullptr; 334 335 switch (BO->getOpcode()) { 336 case BO_EQ: 337 case BO_NE: { 338 const DeclRefExpr *LHS = Visit(BO->getLHS()); 339 return LHS ? LHS : Visit(BO->getRHS()); 340 } 341 default: 342 return nullptr; 343 } 344 } 345 346 const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { 347 return Visit(OVE->getSourceExpr()); 348 } 349 350 const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { 351 if (!ShouldRetrieveFromComparisons) 352 return nullptr; 353 354 // We want to see through some of the boolean builtin functions 355 // that we are likely to see in conditions. 356 switch (CE->getBuiltinCallee()) { 357 case Builtin::BI__builtin_expect: 358 case Builtin::BI__builtin_expect_with_probability: { 359 assert(CE->getNumArgs() >= 2); 360 361 const DeclRefExpr *Candidate = Visit(CE->getArg(0)); 362 return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); 363 } 364 365 case Builtin::BI__builtin_unpredictable: 366 return Visit(CE->getArg(0)); 367 368 default: 369 return nullptr; 370 } 371 } 372 373 const DeclRefExpr *VisitExpr(const Expr *E) { 374 // It is a fallback method that gets called whenever the actual type 375 // of the given expression is not covered. 376 // 377 // We first check if we have anything to skip. And then repeat the whole 378 // procedure for a nested expression instead. 379 const Expr *DeclutteredExpr = E->IgnoreParenCasts(); 380 return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; 381 } 382 383 private: 384 DeclRefFinder(bool ShouldRetrieveFromComparisons) 385 : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} 386 387 bool ShouldRetrieveFromComparisons; 388 }; 389 390 const DeclRefExpr *findDeclRefExpr(const Expr *In, 391 bool ShouldRetrieveFromComparisons = false) { 392 return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); 393 } 394 395 const ParmVarDecl * 396 findReferencedParmVarDecl(const Expr *In, 397 bool ShouldRetrieveFromComparisons = false) { 398 if (const DeclRefExpr *DR = 399 findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { 400 return dyn_cast<ParmVarDecl>(DR->getDecl()); 401 } 402 403 return nullptr; 404 } 405 406 /// Return conditions expression of a statement if it has one. 407 const Expr *getCondition(const Stmt *S) { 408 if (!S) { 409 return nullptr; 410 } 411 412 if (const auto *If = dyn_cast<IfStmt>(S)) { 413 return If->getCond(); 414 } 415 if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { 416 return Ternary->getCond(); 417 } 418 419 return nullptr; 420 } 421 422 /// A small helper class that collects all named identifiers in the given 423 /// expression. It traverses it recursively, so names from deeper levels 424 /// of the AST will end up in the results. 425 /// Results might have duplicate names, if this is a problem, convert to 426 /// string sets afterwards. 427 class NamesCollector : public DynamicRecursiveASTVisitor { 428 public: 429 static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; 430 using NameCollection = 431 llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; 432 433 static NameCollection collect(const Expr *From) { 434 NamesCollector Impl; 435 Impl.TraverseStmt(const_cast<Expr *>(From)); 436 return Impl.Result; 437 } 438 439 bool VisitDeclRefExpr(DeclRefExpr *E) override { 440 Result.push_back(E->getDecl()->getName()); 441 return true; 442 } 443 444 bool VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *E) override { 445 llvm::StringRef Name; 446 447 if (E->isImplicitProperty()) { 448 ObjCMethodDecl *PropertyMethodDecl = nullptr; 449 if (E->isMessagingGetter()) { 450 PropertyMethodDecl = E->getImplicitPropertyGetter(); 451 } else { 452 PropertyMethodDecl = E->getImplicitPropertySetter(); 453 } 454 assert(PropertyMethodDecl && 455 "Implicit property must have associated declaration"); 456 Name = PropertyMethodDecl->getSelector().getNameForSlot(0); 457 } else { 458 assert(E->isExplicitProperty()); 459 Name = E->getExplicitProperty()->getName(); 460 } 461 462 Result.push_back(Name); 463 return true; 464 } 465 466 private: 467 NamesCollector() = default; 468 NameCollection Result; 469 }; 470 471 /// Check whether the given expression mentions any of conventional names. 472 bool mentionsAnyOfConventionalNames(const Expr *E) { 473 NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); 474 475 return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { 476 return llvm::any_of( 477 CONVENTIONAL_CONDITIONS, 478 [ConditionName](const llvm::StringLiteral &Conventional) { 479 return ConditionName.contains_insensitive(Conventional); 480 }); 481 }); 482 } 483 484 /// Clarification is a simple pair of a reason why parameter is not called 485 /// on every path and a statement to blame. 486 struct Clarification { 487 NeverCalledReason Reason; 488 const Stmt *Location; 489 }; 490 491 /// A helper class that can produce a clarification based on the given pair 492 /// of basic blocks. 493 class NotCalledClarifier 494 : public ConstStmtVisitor<NotCalledClarifier, 495 std::optional<Clarification>> { 496 public: 497 /// The main entrypoint for the class, the function that tries to find the 498 /// clarification of how to explain which sub-path starts with a CFG edge 499 /// from Conditional to SuccWithoutCall. 500 /// 501 /// This means that this function has one precondition: 502 /// SuccWithoutCall should be a successor block for Conditional. 503 /// 504 /// Because clarification is not needed for non-trivial pairs of blocks 505 /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful 506 /// results only for such cases. For this very reason, the parent basic 507 /// block, Conditional, is named that way, so it is clear what kind of 508 /// block is expected. 509 static std::optional<Clarification> clarify(const CFGBlock *Conditional, 510 const CFGBlock *SuccWithoutCall) { 511 if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { 512 return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); 513 } 514 return std::nullopt; 515 } 516 517 std::optional<Clarification> VisitIfStmt(const IfStmt *If) { 518 return VisitBranchingBlock(If, NeverCalledReason::IfThen); 519 } 520 521 std::optional<Clarification> 522 VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { 523 return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); 524 } 525 526 std::optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { 527 const Stmt *CaseToBlame = SuccInQuestion->getLabel(); 528 if (!CaseToBlame) { 529 // If interesting basic block is not labeled, it means that this 530 // basic block does not represent any of the cases. 531 return Clarification{NeverCalledReason::SwitchSkipped, Switch}; 532 } 533 534 for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; 535 Case = Case->getNextSwitchCase()) { 536 if (Case == CaseToBlame) { 537 return Clarification{NeverCalledReason::Switch, Case}; 538 } 539 } 540 541 llvm_unreachable("Found unexpected switch structure"); 542 } 543 544 std::optional<Clarification> VisitForStmt(const ForStmt *For) { 545 return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); 546 } 547 548 std::optional<Clarification> VisitWhileStmt(const WhileStmt *While) { 549 return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); 550 } 551 552 std::optional<Clarification> 553 VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { 554 assert(Parent->succ_size() == 2 && 555 "Branching block should have exactly two successors"); 556 unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); 557 NeverCalledReason ActualReason = 558 updateForSuccessor(DefaultReason, SuccessorIndex); 559 return Clarification{ActualReason, Terminator}; 560 } 561 562 std::optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { 563 // We don't want to report on short-curcuit logical operations. 564 return std::nullopt; 565 } 566 567 std::optional<Clarification> VisitStmt(const Stmt *Terminator) { 568 // If we got here, we didn't have a visit function for more derived 569 // classes of statement that this terminator actually belongs to. 570 // 571 // This is not a good scenario and should not happen in practice, but 572 // at least we'll warn the user. 573 return Clarification{NeverCalledReason::FallbackReason, Terminator}; 574 } 575 576 static unsigned getSuccessorIndex(const CFGBlock *Parent, 577 const CFGBlock *Child) { 578 CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); 579 assert(It != Parent->succ_end() && 580 "Given blocks should be in parent-child relationship"); 581 return It - Parent->succ_begin(); 582 } 583 584 static NeverCalledReason 585 updateForSuccessor(NeverCalledReason ReasonForTrueBranch, 586 unsigned SuccessorIndex) { 587 assert(SuccessorIndex <= 1); 588 unsigned RawReason = 589 static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; 590 assert(RawReason <= 591 static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); 592 return static_cast<NeverCalledReason>(RawReason); 593 } 594 595 private: 596 NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) 597 : Parent(Parent), SuccInQuestion(SuccInQuestion) {} 598 599 const CFGBlock *Parent, *SuccInQuestion; 600 }; 601 602 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { 603 public: 604 static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 605 bool CheckConventionalParameters) { 606 CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); 607 } 608 609 private: 610 CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 611 bool CheckConventionalParameters) 612 : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), 613 CheckConventionalParameters(CheckConventionalParameters), 614 CurrentState(0) { 615 initDataStructures(); 616 assert((size() == 0 || !States.empty()) && 617 "Data structures are inconsistent"); 618 } 619 620 //===----------------------------------------------------------------------===// 621 // Initializing functions 622 //===----------------------------------------------------------------------===// 623 624 void initDataStructures() { 625 const Decl *AnalyzedDecl = AC.getDecl(); 626 627 if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { 628 findParamsToTrack(Function); 629 } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { 630 findParamsToTrack(Method); 631 } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { 632 findCapturesToTrack(Block); 633 findParamsToTrack(Block); 634 } 635 636 // Have something to track, let's init states for every block from the CFG. 637 if (size() != 0) { 638 States = 639 CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); 640 } 641 } 642 643 void findCapturesToTrack(const BlockDecl *Block) { 644 for (const auto &Capture : Block->captures()) { 645 if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 646 // Parameter DeclContext is its owning function or method. 647 const DeclContext *ParamContext = P->getDeclContext(); 648 if (shouldBeCalledOnce(ParamContext, P)) { 649 TrackedParams.push_back(P); 650 } 651 } 652 } 653 } 654 655 template <class FunctionLikeDecl> 656 void findParamsToTrack(const FunctionLikeDecl *Function) { 657 for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { 658 if (shouldBeCalledOnce(Function, Index)) { 659 TrackedParams.push_back(Function->getParamDecl(Index)); 660 } 661 } 662 } 663 664 //===----------------------------------------------------------------------===// 665 // Main logic 'check' functions 666 //===----------------------------------------------------------------------===// 667 668 void check() { 669 // Nothing to check here: we don't have marked parameters. 670 if (size() == 0 || isPossiblyEmptyImpl()) 671 return; 672 673 assert( 674 llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && 675 "None of the blocks should be 'visited' before the analysis"); 676 677 // For our task, both backward and forward approaches suite well. 678 // However, in order to report better diagnostics, we decided to go with 679 // backward analysis. 680 // 681 // Let's consider the following CFG and how forward and backward analyses 682 // will work for it. 683 // 684 // FORWARD: | BACKWARD: 685 // #1 | #1 686 // +---------+ | +-----------+ 687 // | if | | |MaybeCalled| 688 // +---------+ | +-----------+ 689 // |NotCalled| | | if | 690 // +---------+ | +-----------+ 691 // / \ | / \ 692 // #2 / \ #3 | #2 / \ #3 693 // +----------------+ +---------+ | +----------------+ +---------+ 694 // | foo() | | ... | | |DefinitelyCalled| |NotCalled| 695 // +----------------+ +---------+ | +----------------+ +---------+ 696 // |DefinitelyCalled| |NotCalled| | | foo() | | ... | 697 // +----------------+ +---------+ | +----------------+ +---------+ 698 // \ / | \ / 699 // \ #4 / | \ #4 / 700 // +-----------+ | +---------+ 701 // | ... | | |NotCalled| 702 // +-----------+ | +---------+ 703 // |MaybeCalled| | | ... | 704 // +-----------+ | +---------+ 705 // 706 // The most natural way to report lacking call in the block #3 would be to 707 // message that the false branch of the if statement in the block #1 doesn't 708 // have a call. And while with the forward approach we'll need to find a 709 // least common ancestor or something like that to find the 'if' to blame, 710 // backward analysis gives it to us out of the box. 711 BackwardDataflowWorklist Worklist(FunctionCFG, AC); 712 713 // Let's visit EXIT. 714 const CFGBlock *Exit = &FunctionCFG.getExit(); 715 assignState(Exit, State(size(), ParameterStatus::NotCalled)); 716 Worklist.enqueuePredecessors(Exit); 717 718 while (const CFGBlock *BB = Worklist.dequeue()) { 719 assert(BB && "Worklist should filter out null blocks"); 720 check(BB); 721 assert(CurrentState.isVisited() && 722 "After the check, basic block should be visited"); 723 724 // Traverse successor basic blocks if the status of this block 725 // has changed. 726 if (assignState(BB, CurrentState)) { 727 Worklist.enqueuePredecessors(BB); 728 } 729 } 730 731 // Check that we have all tracked parameters at the last block. 732 // As we are performing a backward version of the analysis, 733 // it should be the ENTRY block. 734 checkEntry(&FunctionCFG.getEntry()); 735 } 736 737 void check(const CFGBlock *BB) { 738 // We start with a state 'inherited' from all the successors. 739 CurrentState = joinSuccessors(BB); 740 assert(CurrentState.isVisited() && 741 "Shouldn't start with a 'not visited' state"); 742 743 // This is the 'exit' situation, broken promises are probably OK 744 // in such scenarios. 745 if (BB->hasNoReturnElement()) { 746 markNoReturn(); 747 // This block still can have calls (even multiple calls) and 748 // for this reason there is no early return here. 749 } 750 751 // We use a backward dataflow propagation and for this reason we 752 // should traverse basic blocks bottom-up. 753 for (const CFGElement &Element : llvm::reverse(*BB)) { 754 if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 755 check(S->getStmt()); 756 } 757 } 758 } 759 void check(const Stmt *S) { Visit(S); } 760 761 void checkEntry(const CFGBlock *Entry) { 762 // We finalize this algorithm with the ENTRY block because 763 // we use a backward version of the analysis. This is where 764 // we can judge that some of the tracked parameters are not called on 765 // every path from ENTRY to EXIT. 766 767 const State &EntryStatus = getState(Entry); 768 llvm::BitVector NotCalledOnEveryPath(size(), false); 769 llvm::BitVector NotUsedOnEveryPath(size(), false); 770 771 // Check if there are no calls of the marked parameter at all 772 for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { 773 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 774 775 switch (IndexedStatus.value().getKind()) { 776 case ParameterStatus::NotCalled: 777 // If there were places where this parameter escapes (aka being used), 778 // we can provide a more useful diagnostic by pointing at the exact 779 // branches where it is not even mentioned. 780 if (!hasEverEscaped(IndexedStatus.index())) { 781 // This parameter is was not used at all, so we should report the 782 // most generic version of the warning. 783 if (isCaptured(Parameter)) { 784 // We want to specify that it was captured by the block. 785 Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), 786 !isExplicitlyMarked(Parameter)); 787 } else { 788 Handler.handleNeverCalled(Parameter, 789 !isExplicitlyMarked(Parameter)); 790 } 791 } else { 792 // Mark it as 'interesting' to figure out which paths don't even 793 // have escapes. 794 NotUsedOnEveryPath[IndexedStatus.index()] = true; 795 } 796 797 break; 798 case ParameterStatus::MaybeCalled: 799 // If we have 'maybe called' at this point, we have an error 800 // that there is at least one path where this parameter 801 // is not called. 802 // 803 // However, reporting the warning with only that information can be 804 // too vague for the users. For this reason, we mark such parameters 805 // as "interesting" for further analysis. 806 NotCalledOnEveryPath[IndexedStatus.index()] = true; 807 break; 808 default: 809 break; 810 } 811 } 812 813 // Early exit if we don't have parameters for extra analysis... 814 if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() && 815 // ... or if we've seen variables with cleanup functions. 816 // We can't reason that we've seen every path in this case, 817 // and thus abandon reporting any warnings that imply that. 818 !FunctionHasCleanupVars) 819 return; 820 821 // We are looking for a pair of blocks A, B so that the following is true: 822 // * A is a predecessor of B 823 // * B is marked as NotCalled 824 // * A has at least one successor marked as either 825 // Escaped or DefinitelyCalled 826 // 827 // In that situation, it is guaranteed that B is the first block of the path 828 // where the user doesn't call or use parameter in question. 829 // 830 // For this reason, branch A -> B can be used for reporting. 831 // 832 // This part of the algorithm is guarded by a condition that the function 833 // does indeed have a violation of contract. For this reason, we can 834 // spend more time to find a good spot to place the warning. 835 // 836 // The following algorithm has the worst case complexity of O(V + E), 837 // where V is the number of basic blocks in FunctionCFG, 838 // E is the number of edges between blocks in FunctionCFG. 839 for (const CFGBlock *BB : FunctionCFG) { 840 if (!BB) 841 continue; 842 843 const State &BlockState = getState(BB); 844 845 for (unsigned Index : llvm::seq(0u, size())) { 846 // We don't want to use 'isLosingCall' here because we want to report 847 // the following situation as well: 848 // 849 // MaybeCalled 850 // | ... | 851 // MaybeCalled NotCalled 852 // 853 // Even though successor is not 'DefinitelyCalled', it is still useful 854 // to report it, it is still a path without a call. 855 if (NotCalledOnEveryPath[Index] && 856 BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { 857 858 findAndReportNotCalledBranches(BB, Index); 859 } else if (NotUsedOnEveryPath[Index] && 860 isLosingEscape(BlockState, BB, Index)) { 861 862 findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); 863 } 864 } 865 } 866 } 867 868 /// Check potential call of a tracked parameter. 869 void checkDirectCall(const CallExpr *Call) { 870 if (auto Index = getIndexOfCallee(Call)) { 871 processCallFor(*Index, Call); 872 } 873 } 874 875 /// Check the call expression for being an indirect call of one of the tracked 876 /// parameters. It is indirect in the sense that this particular call is not 877 /// calling the parameter itself, but rather uses it as the argument. 878 template <class CallLikeExpr> 879 void checkIndirectCall(const CallLikeExpr *CallOrMessage) { 880 // CallExpr::arguments does not interact nicely with llvm::enumerate. 881 llvm::ArrayRef<const Expr *> Arguments = 882 llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 883 884 // Let's check if any of the call arguments is a point of interest. 885 for (const auto &Argument : llvm::enumerate(Arguments)) { 886 if (auto Index = getIndexOfExpression(Argument.value())) { 887 if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { 888 // If the corresponding parameter is marked as 'called_once' we should 889 // consider it as a call. 890 processCallFor(*Index, CallOrMessage); 891 } else { 892 // Otherwise, we mark this parameter as escaped, which can be 893 // interpreted both as called or not called depending on the context. 894 processEscapeFor(*Index); 895 } 896 // Otherwise, let's keep the state as it is. 897 } 898 } 899 } 900 901 /// Process call of the parameter with the given index 902 void processCallFor(unsigned Index, const Expr *Call) { 903 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 904 905 if (CurrentParamStatus.seenAnyCalls()) { 906 907 // At this point, this parameter was called, so this is a second call. 908 const ParmVarDecl *Parameter = getParameter(Index); 909 Handler.handleDoubleCall( 910 Parameter, &CurrentState.getCallFor(Index), Call, 911 !isExplicitlyMarked(Parameter), 912 // We are sure that the second call is definitely 913 // going to happen if the status is 'DefinitelyCalled'. 914 CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); 915 916 // Mark this parameter as already reported on, so we don't repeat 917 // warnings. 918 CurrentParamStatus = ParameterStatus::Reported; 919 920 } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { 921 // If we didn't report anything yet, let's mark this parameter 922 // as called. 923 ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); 924 CurrentParamStatus = Called; 925 } 926 } 927 928 /// Process escape of the parameter with the given index 929 void processEscapeFor(unsigned Index) { 930 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 931 932 // Escape overrides whatever error we think happened. 933 if (CurrentParamStatus.isErrorStatus() && 934 CurrentParamStatus.getKind() != ParameterStatus::Kind::Reported) { 935 CurrentParamStatus = ParameterStatus::Escaped; 936 } 937 } 938 939 void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, 940 bool IsEscape = false) { 941 for (const CFGBlock *Succ : Parent->succs()) { 942 if (!Succ) 943 continue; 944 945 if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { 946 assert(Parent->succ_size() >= 2 && 947 "Block should have at least two successors at this point"); 948 if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { 949 const ParmVarDecl *Parameter = getParameter(Index); 950 Handler.handleNeverCalled( 951 Parameter, AC.getDecl(), Clarification->Location, 952 Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter)); 953 } 954 } 955 } 956 } 957 958 //===----------------------------------------------------------------------===// 959 // Predicate functions to check parameters 960 //===----------------------------------------------------------------------===// 961 962 /// Return true if parameter is explicitly marked as 'called_once'. 963 static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { 964 return Parameter->hasAttr<CalledOnceAttr>(); 965 } 966 967 /// Return true if the given name matches conventional pattens. 968 static bool isConventional(llvm::StringRef Name) { 969 return llvm::count(CONVENTIONAL_NAMES, Name) != 0; 970 } 971 972 /// Return true if the given name has conventional suffixes. 973 static bool hasConventionalSuffix(llvm::StringRef Name) { 974 return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { 975 return Name.ends_with(Suffix); 976 }); 977 } 978 979 /// Return true if the given type can be used for conventional parameters. 980 static bool isConventional(QualType Ty) { 981 if (!Ty->isBlockPointerType()) { 982 return false; 983 } 984 985 QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType(); 986 // Completion handlers should have a block type with void return type. 987 return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType(); 988 } 989 990 /// Return true if the only parameter of the function is conventional. 991 static bool isOnlyParameterConventional(const FunctionDecl *Function) { 992 IdentifierInfo *II = Function->getIdentifier(); 993 return Function->getNumParams() == 1 && II && 994 hasConventionalSuffix(II->getName()); 995 } 996 997 /// Return true/false if 'swift_async' attribute states that the given 998 /// parameter is conventionally called once. 999 /// Return std::nullopt if the given declaration doesn't have 'swift_async' 1000 /// attribute. 1001 static std::optional<bool> isConventionalSwiftAsync(const Decl *D, 1002 unsigned ParamIndex) { 1003 if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { 1004 if (A->getKind() == SwiftAsyncAttr::None) { 1005 return false; 1006 } 1007 1008 return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; 1009 } 1010 return std::nullopt; 1011 } 1012 1013 /// Return true if the specified selector represents init method. 1014 static bool isInitMethod(Selector MethodSelector) { 1015 return MethodSelector.getMethodFamily() == OMF_init; 1016 } 1017 1018 /// Return true if the specified selector piece matches conventions. 1019 static bool isConventionalSelectorPiece(Selector MethodSelector, 1020 unsigned PieceIndex, 1021 QualType PieceType) { 1022 if (!isConventional(PieceType) || isInitMethod(MethodSelector)) { 1023 return false; 1024 } 1025 1026 if (MethodSelector.getNumArgs() == 1) { 1027 assert(PieceIndex == 0); 1028 return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); 1029 } 1030 1031 llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex); 1032 return isConventional(PieceName) || hasConventionalSuffix(PieceName); 1033 } 1034 1035 bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { 1036 return isExplicitlyMarked(Parameter) || 1037 (CheckConventionalParameters && 1038 (isConventional(Parameter->getName()) || 1039 hasConventionalSuffix(Parameter->getName())) && 1040 isConventional(Parameter->getType())); 1041 } 1042 1043 bool shouldBeCalledOnce(const DeclContext *ParamContext, 1044 const ParmVarDecl *Param) { 1045 unsigned ParamIndex = Param->getFunctionScopeIndex(); 1046 if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { 1047 return shouldBeCalledOnce(Function, ParamIndex); 1048 } 1049 if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { 1050 return shouldBeCalledOnce(Method, ParamIndex); 1051 } 1052 return shouldBeCalledOnce(Param); 1053 } 1054 1055 bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { 1056 return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); 1057 } 1058 1059 bool shouldBeCalledOnce(const FunctionDecl *Function, 1060 unsigned ParamIndex) const { 1061 if (ParamIndex >= Function->getNumParams()) { 1062 return false; 1063 } 1064 // 'swift_async' goes first and overrides anything else. 1065 if (auto ConventionalAsync = 1066 isConventionalSwiftAsync(Function, ParamIndex)) { 1067 return *ConventionalAsync; 1068 } 1069 1070 return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || 1071 (CheckConventionalParameters && 1072 isOnlyParameterConventional(Function)); 1073 } 1074 1075 bool shouldBeCalledOnce(const ObjCMethodDecl *Method, 1076 unsigned ParamIndex) const { 1077 Selector MethodSelector = Method->getSelector(); 1078 if (ParamIndex >= MethodSelector.getNumArgs()) { 1079 return false; 1080 } 1081 1082 // 'swift_async' goes first and overrides anything else. 1083 if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { 1084 return *ConventionalAsync; 1085 } 1086 1087 const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); 1088 return shouldBeCalledOnce(Parameter) || 1089 (CheckConventionalParameters && 1090 isConventionalSelectorPiece(MethodSelector, ParamIndex, 1091 Parameter->getType())); 1092 } 1093 1094 bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { 1095 const FunctionDecl *Function = Call->getDirectCallee(); 1096 return Function && shouldBeCalledOnce(Function, ParamIndex); 1097 } 1098 1099 bool shouldBeCalledOnce(const ObjCMessageExpr *Message, 1100 unsigned ParamIndex) const { 1101 const ObjCMethodDecl *Method = Message->getMethodDecl(); 1102 return Method && ParamIndex < Method->param_size() && 1103 shouldBeCalledOnce(Method, ParamIndex); 1104 } 1105 1106 //===----------------------------------------------------------------------===// 1107 // Utility methods 1108 //===----------------------------------------------------------------------===// 1109 1110 bool isCaptured(const ParmVarDecl *Parameter) const { 1111 if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { 1112 return Block->capturesVariable(Parameter); 1113 } 1114 return false; 1115 } 1116 1117 // Return a call site where the block is called exactly once or null otherwise 1118 const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const { 1119 ParentMap &PM = AC.getParentMap(); 1120 1121 // We don't want to track the block through assignments and so on, instead 1122 // we simply see how the block used and if it's used directly in a call, 1123 // we decide based on call to what it is. 1124 // 1125 // In order to do this, we go up the parents of the block looking for 1126 // a call or a message expressions. These might not be immediate parents 1127 // of the actual block expression due to casts and parens, so we skip them. 1128 for (const Stmt *Prev = Block, *Current = PM.getParent(Block); 1129 Current != nullptr; Prev = Current, Current = PM.getParent(Current)) { 1130 // Skip no-op (for our case) operations. 1131 if (isa<CastExpr>(Current) || isa<ParenExpr>(Current)) 1132 continue; 1133 1134 // At this point, Prev represents our block as an immediate child of the 1135 // call. 1136 if (const auto *Call = dyn_cast<CallExpr>(Current)) { 1137 // It might be the call of the Block itself... 1138 if (Call->getCallee() == Prev) 1139 return Call; 1140 1141 // ...or it can be an indirect call of the block. 1142 return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr; 1143 } 1144 if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) { 1145 return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message 1146 : nullptr; 1147 } 1148 1149 break; 1150 } 1151 1152 return nullptr; 1153 } 1154 1155 template <class CallLikeExpr> 1156 bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage, 1157 const Stmt *BlockArgument) const { 1158 // CallExpr::arguments does not interact nicely with llvm::enumerate. 1159 llvm::ArrayRef<const Expr *> Arguments = 1160 llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 1161 1162 for (const auto &Argument : llvm::enumerate(Arguments)) { 1163 if (Argument.value() == BlockArgument) { 1164 return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index()); 1165 } 1166 } 1167 1168 return false; 1169 } 1170 1171 bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call, 1172 unsigned ParamIndex) const { 1173 const FunctionDecl *Function = Call->getDirectCallee(); 1174 return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) || 1175 shouldBeCalledOnce(Call, ParamIndex); 1176 } 1177 1178 bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message, 1179 unsigned ParamIndex) const { 1180 // At the moment, we don't have any Obj-C methods we want to specifically 1181 // check in here. 1182 return shouldBeCalledOnce(Message, ParamIndex); 1183 } 1184 1185 static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function, 1186 unsigned ParamIndex) { 1187 // There is a list of important API functions that while not following 1188 // conventions nor being directly annotated, still guarantee that the 1189 // callback parameter will be called exactly once. 1190 // 1191 // Here we check if this is the case. 1192 return Function && 1193 llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS, 1194 [Function, ParamIndex]( 1195 const KnownCalledOnceParameter &Reference) { 1196 return Reference.FunctionName == 1197 Function->getName() && 1198 Reference.ParamIndex == ParamIndex; 1199 }); 1200 } 1201 1202 /// Return true if the analyzed function is actually a default implementation 1203 /// of the method that has to be overriden. 1204 /// 1205 /// These functions can have tracked parameters, but wouldn't call them 1206 /// because they are not designed to perform any meaningful actions. 1207 /// 1208 /// There are a couple of flavors of such default implementations: 1209 /// 1. Empty methods or methods with a single return statement 1210 /// 2. Methods that have one block with a call to no return function 1211 /// 3. Methods with only assertion-like operations 1212 bool isPossiblyEmptyImpl() const { 1213 if (!isa<ObjCMethodDecl>(AC.getDecl())) { 1214 // We care only about functions that are not supposed to be called. 1215 // Only methods can be overriden. 1216 return false; 1217 } 1218 1219 // Case #1 (without return statements) 1220 if (FunctionCFG.size() == 2) { 1221 // Method has only two blocks: ENTRY and EXIT. 1222 // This is equivalent to empty function. 1223 return true; 1224 } 1225 1226 // Case #2 1227 if (FunctionCFG.size() == 3) { 1228 const CFGBlock &Entry = FunctionCFG.getEntry(); 1229 if (Entry.succ_empty()) { 1230 return false; 1231 } 1232 1233 const CFGBlock *OnlyBlock = *Entry.succ_begin(); 1234 // Method has only one block, let's see if it has a no-return 1235 // element. 1236 if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { 1237 return true; 1238 } 1239 // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. 1240 } 1241 1242 // Cases #1 (return statements) and #3. 1243 // 1244 // It is hard to detect that something is an assertion or came 1245 // from assertion. Here we use a simple heuristic: 1246 // 1247 // - If it came from a macro, it can be an assertion. 1248 // 1249 // Additionally, we can't assume a number of basic blocks or the CFG's 1250 // structure because assertions might include loops and conditions. 1251 return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { 1252 if (!BB) { 1253 // Unreachable blocks are totally fine. 1254 return true; 1255 } 1256 1257 // Return statements can have sub-expressions that are represented as 1258 // separate statements of a basic block. We should allow this. 1259 // This parent map will be initialized with a parent tree for all 1260 // subexpressions of the block's return statement (if it has one). 1261 std::unique_ptr<ParentMap> ReturnChildren; 1262 1263 return llvm::all_of( 1264 llvm::reverse(*BB), // we should start with return statements, if we 1265 // have any, i.e. from the bottom of the block 1266 [&ReturnChildren](const CFGElement &Element) { 1267 if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 1268 const Stmt *SuspiciousStmt = S->getStmt(); 1269 1270 if (isa<ReturnStmt>(SuspiciousStmt)) { 1271 // Let's initialize this structure to test whether 1272 // some further statement is a part of this return. 1273 ReturnChildren = std::make_unique<ParentMap>( 1274 const_cast<Stmt *>(SuspiciousStmt)); 1275 // Return statements are allowed as part of #1. 1276 return true; 1277 } 1278 1279 return SuspiciousStmt->getBeginLoc().isMacroID() || 1280 (ReturnChildren && 1281 ReturnChildren->hasParent(SuspiciousStmt)); 1282 } 1283 return true; 1284 }); 1285 }); 1286 } 1287 1288 /// Check if parameter with the given index has ever escaped. 1289 bool hasEverEscaped(unsigned Index) const { 1290 return llvm::any_of(States, [Index](const State &StateForOneBB) { 1291 return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; 1292 }); 1293 } 1294 1295 /// Return status stored for the given basic block. 1296 /// \{ 1297 State &getState(const CFGBlock *BB) { 1298 assert(BB); 1299 return States[BB->getBlockID()]; 1300 } 1301 const State &getState(const CFGBlock *BB) const { 1302 assert(BB); 1303 return States[BB->getBlockID()]; 1304 } 1305 /// \} 1306 1307 /// Assign status to the given basic block. 1308 /// 1309 /// Returns true when the stored status changed. 1310 bool assignState(const CFGBlock *BB, const State &ToAssign) { 1311 State &Current = getState(BB); 1312 if (Current == ToAssign) { 1313 return false; 1314 } 1315 1316 Current = ToAssign; 1317 return true; 1318 } 1319 1320 /// Join all incoming statuses for the given basic block. 1321 State joinSuccessors(const CFGBlock *BB) const { 1322 auto Succs = 1323 llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { 1324 return Succ && this->getState(Succ).isVisited(); 1325 }); 1326 // We came to this block from somewhere after all. 1327 assert(!Succs.empty() && 1328 "Basic block should have at least one visited successor"); 1329 1330 State Result = getState(*Succs.begin()); 1331 1332 for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { 1333 Result.join(getState(Succ)); 1334 } 1335 1336 if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { 1337 handleConditional(BB, Condition, Result); 1338 } 1339 1340 return Result; 1341 } 1342 1343 void handleConditional(const CFGBlock *BB, const Expr *Condition, 1344 State &ToAlter) const { 1345 handleParameterCheck(BB, Condition, ToAlter); 1346 if (SuppressOnConventionalErrorPaths) { 1347 handleConventionalCheck(BB, Condition, ToAlter); 1348 } 1349 } 1350 1351 void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, 1352 State &ToAlter) const { 1353 // In this function, we try to deal with the following pattern: 1354 // 1355 // if (parameter) 1356 // parameter(...); 1357 // 1358 // It's not good to show a warning here because clearly 'parameter' 1359 // couldn't and shouldn't be called on the 'else' path. 1360 // 1361 // Let's check if this if statement has a check involving one of 1362 // the tracked parameters. 1363 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( 1364 Condition, 1365 /* ShouldRetrieveFromComparisons = */ true)) { 1366 if (const auto Index = getIndex(*Parameter)) { 1367 ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); 1368 1369 // We don't want to deep dive into semantics of the check and 1370 // figure out if that check was for null or something else. 1371 // We simply trust the user that they know what they are doing. 1372 // 1373 // For this reason, in the following loop we look for the 1374 // best-looking option. 1375 for (const CFGBlock *Succ : BB->succs()) { 1376 if (!Succ) 1377 continue; 1378 1379 const ParameterStatus &StatusInSucc = 1380 getState(Succ).getStatusFor(*Index); 1381 1382 if (StatusInSucc.isErrorStatus()) { 1383 continue; 1384 } 1385 1386 // Let's use this status instead. 1387 CurrentStatus = StatusInSucc; 1388 1389 if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { 1390 // This is the best option to have and we already found it. 1391 break; 1392 } 1393 1394 // If we found 'Escaped' first, we still might find 'DefinitelyCalled' 1395 // on the other branch. And we prefer the latter. 1396 } 1397 } 1398 } 1399 } 1400 1401 void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, 1402 State &ToAlter) const { 1403 // Even when the analysis is technically correct, it is a widespread pattern 1404 // not to call completion handlers in some scenarios. These usually have 1405 // typical conditional names, such as 'error' or 'cancel'. 1406 if (!mentionsAnyOfConventionalNames(Condition)) { 1407 return; 1408 } 1409 1410 for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { 1411 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 1412 // Conventions do not apply to explicitly marked parameters. 1413 if (isExplicitlyMarked(Parameter)) { 1414 continue; 1415 } 1416 1417 ParameterStatus &CurrentStatus = IndexedStatus.value(); 1418 // If we did find that on one of the branches the user uses the callback 1419 // and doesn't on the other path, we believe that they know what they are 1420 // doing and trust them. 1421 // 1422 // There are two possible scenarios for that: 1423 // 1. Current status is 'MaybeCalled' and one of the branches is 1424 // 'DefinitelyCalled' 1425 // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' 1426 if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || 1427 isLosingEscape(ToAlter, BB, IndexedStatus.index())) { 1428 CurrentStatus = ParameterStatus::Escaped; 1429 } 1430 } 1431 } 1432 1433 bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1434 unsigned ParameterIndex) const { 1435 // Let's check if the block represents DefinitelyCalled -> MaybeCalled 1436 // transition. 1437 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1438 ParameterStatus::MaybeCalled, 1439 ParameterStatus::DefinitelyCalled); 1440 } 1441 1442 bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1443 unsigned ParameterIndex) const { 1444 // Let's check if the block represents Escaped -> NotCalled transition. 1445 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1446 ParameterStatus::NotCalled, ParameterStatus::Escaped); 1447 } 1448 1449 bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1450 unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, 1451 ParameterStatus::Kind BeforeJoin) const { 1452 assert(!ParameterStatus::isErrorStatus(BeforeJoin) && 1453 ParameterStatus::isErrorStatus(AfterJoin) && 1454 "It's not a losing join if statuses do not represent " 1455 "correct-to-error transition"); 1456 1457 const ParameterStatus &CurrentStatus = 1458 StateAfterJoin.getStatusFor(ParameterIndex); 1459 1460 return CurrentStatus.getKind() == AfterJoin && 1461 anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); 1462 } 1463 1464 /// Return true if any of the successors of the given basic block has 1465 /// a specified status for the given parameter. 1466 bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, 1467 ParameterStatus::Kind ToFind) const { 1468 return llvm::any_of( 1469 Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { 1470 return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; 1471 }); 1472 } 1473 1474 /// Check given expression that was discovered to escape. 1475 void checkEscapee(const Expr *E) { 1476 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1477 checkEscapee(*Parameter); 1478 } 1479 } 1480 1481 /// Check given parameter that was discovered to escape. 1482 void checkEscapee(const ParmVarDecl &Parameter) { 1483 if (auto Index = getIndex(Parameter)) { 1484 processEscapeFor(*Index); 1485 } 1486 } 1487 1488 /// Mark all parameters in the current state as 'no-return'. 1489 void markNoReturn() { 1490 for (ParameterStatus &PS : CurrentState) { 1491 PS = ParameterStatus::NoReturn; 1492 } 1493 } 1494 1495 /// Check if the given assignment represents suppression and act on it. 1496 void checkSuppression(const BinaryOperator *Assignment) { 1497 // Suppression has the following form: 1498 // parameter = 0; 1499 // 0 can be of any form (NULL, nil, etc.) 1500 if (auto Index = getIndexOfExpression(Assignment->getLHS())) { 1501 1502 // We don't care what is written in the RHS, it could be whatever 1503 // we can interpret as 0. 1504 if (auto Constant = 1505 Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( 1506 AC.getASTContext())) { 1507 1508 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1509 1510 if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { 1511 // Even though this suppression mechanism is introduced to tackle 1512 // false positives for multiple calls, the fact that the user has 1513 // to use suppression can also tell us that we couldn't figure out 1514 // how different paths cancel each other out. And if that is true, 1515 // we will most certainly have false positives about parameters not 1516 // being called on certain paths. 1517 // 1518 // For this reason, we abandon tracking this parameter altogether. 1519 CurrentParamStatus = ParameterStatus::Reported; 1520 } 1521 } 1522 } 1523 } 1524 1525 public: 1526 //===----------------------------------------------------------------------===// 1527 // Tree traversal methods 1528 //===----------------------------------------------------------------------===// 1529 1530 void VisitCallExpr(const CallExpr *Call) { 1531 // This call might be a direct call, i.e. a parameter call... 1532 checkDirectCall(Call); 1533 // ... or an indirect call, i.e. when parameter is an argument. 1534 checkIndirectCall(Call); 1535 } 1536 1537 void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { 1538 // The most common situation that we are defending against here is 1539 // copying a tracked parameter. 1540 if (const Expr *Receiver = Message->getInstanceReceiver()) { 1541 checkEscapee(Receiver); 1542 } 1543 // Message expressions unlike calls, could not be direct. 1544 checkIndirectCall(Message); 1545 } 1546 1547 void VisitBlockExpr(const BlockExpr *Block) { 1548 // Block expressions are tricky. It is a very common practice to capture 1549 // completion handlers by blocks and use them there. 1550 // For this reason, it is important to analyze blocks and report warnings 1551 // for completion handler misuse in blocks. 1552 // 1553 // However, it can be quite difficult to track how the block itself is being 1554 // used. The full precise anlysis of that will be similar to alias analysis 1555 // for completion handlers and can be too heavyweight for a compile-time 1556 // diagnostic. Instead, we judge about the immediate use of the block. 1557 // 1558 // Here, we try to find a call expression where we know due to conventions, 1559 // annotations, or other reasons that the block is called once and only 1560 // once. 1561 const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block); 1562 1563 // We need to report this information to the handler because in the 1564 // situation when we know that the block is called exactly once, we can be 1565 // stricter in terms of reported diagnostics. 1566 if (CalledOnceCallSite) { 1567 Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl()); 1568 } else { 1569 Handler.handleBlockWithNoGuarantees(Block->getBlockDecl()); 1570 } 1571 1572 for (const auto &Capture : Block->getBlockDecl()->captures()) { 1573 if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 1574 if (auto Index = getIndex(*Param)) { 1575 if (CalledOnceCallSite) { 1576 // The call site of a block can be considered a call site of the 1577 // captured parameter we track. 1578 processCallFor(*Index, CalledOnceCallSite); 1579 } else { 1580 // We still should consider this block as an escape for parameter, 1581 // if we don't know about its call site or the number of time it 1582 // can be invoked. 1583 processEscapeFor(*Index); 1584 } 1585 } 1586 } 1587 } 1588 } 1589 1590 void VisitBinaryOperator(const BinaryOperator *Op) { 1591 if (Op->getOpcode() == clang::BO_Assign) { 1592 // Let's check if one of the tracked parameters is assigned into 1593 // something, and if it is we don't want to track extra variables, so we 1594 // consider it as an escapee. 1595 checkEscapee(Op->getRHS()); 1596 1597 // Let's check whether this assignment is a suppression. 1598 checkSuppression(Op); 1599 } 1600 } 1601 1602 void VisitDeclStmt(const DeclStmt *DS) { 1603 // Variable initialization is not assignment and should be handled 1604 // separately. 1605 // 1606 // Multiple declarations can be a part of declaration statement. 1607 for (const auto *Declaration : DS->getDeclGroup()) { 1608 if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { 1609 if (Var->getInit()) { 1610 checkEscapee(Var->getInit()); 1611 } 1612 1613 if (Var->hasAttr<CleanupAttr>()) { 1614 FunctionHasCleanupVars = true; 1615 } 1616 } 1617 } 1618 } 1619 1620 void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { 1621 // We consider '(void)parameter' as a manual no-op escape. 1622 // It should be used to explicitly tell the analysis that this parameter 1623 // is intentionally not called on this path. 1624 if (Cast->getType().getCanonicalType()->isVoidType()) { 1625 checkEscapee(Cast->getSubExpr()); 1626 } 1627 } 1628 1629 void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { 1630 // It is OK not to call marked parameters on exceptional paths. 1631 markNoReturn(); 1632 } 1633 1634 private: 1635 unsigned size() const { return TrackedParams.size(); } 1636 1637 std::optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { 1638 return getIndexOfExpression(Call->getCallee()); 1639 } 1640 1641 std::optional<unsigned> getIndexOfExpression(const Expr *E) const { 1642 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1643 return getIndex(*Parameter); 1644 } 1645 1646 return std::nullopt; 1647 } 1648 1649 std::optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { 1650 // Expected number of parameters that we actually track is 1. 1651 // 1652 // Also, the maximum number of declared parameters could not be on a scale 1653 // of hundreds of thousands. 1654 // 1655 // In this setting, linear search seems reasonable and even performs better 1656 // than bisection. 1657 ParamSizedVector<const ParmVarDecl *>::const_iterator It = 1658 llvm::find(TrackedParams, &Parameter); 1659 1660 if (It != TrackedParams.end()) { 1661 return It - TrackedParams.begin(); 1662 } 1663 1664 return std::nullopt; 1665 } 1666 1667 const ParmVarDecl *getParameter(unsigned Index) const { 1668 assert(Index < TrackedParams.size()); 1669 return TrackedParams[Index]; 1670 } 1671 1672 const CFG &FunctionCFG; 1673 AnalysisDeclContext &AC; 1674 CalledOnceCheckHandler &Handler; 1675 bool CheckConventionalParameters; 1676 // As of now, we turn this behavior off. So, we still are going to report 1677 // missing calls on paths that look like it was intentional. 1678 // Technically such reports are true positives, but they can make some users 1679 // grumpy because of the sheer number of warnings. 1680 // It can be turned back on if we decide that we want to have the other way 1681 // around. 1682 bool SuppressOnConventionalErrorPaths = false; 1683 1684 // The user can annotate variable declarations with cleanup functions, which 1685 // essentially imposes a custom destructor logic on that variable. 1686 // It is possible to use it, however, to call tracked parameters on all exits 1687 // from the function. For this reason, we track the fact that the function 1688 // actually has these. 1689 bool FunctionHasCleanupVars = false; 1690 1691 State CurrentState; 1692 ParamSizedVector<const ParmVarDecl *> TrackedParams; 1693 CFGSizedVector<State> States; 1694 }; 1695 1696 } // end anonymous namespace 1697 1698 namespace clang { 1699 void checkCalledOnceParameters(AnalysisDeclContext &AC, 1700 CalledOnceCheckHandler &Handler, 1701 bool CheckConventionalParameters) { 1702 CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); 1703 } 1704 } // end namespace clang 1705