xref: /freebsd/contrib/llvm-project/clang/lib/Analysis/CalledOnceCheck.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Analysis/Analyses/CalledOnceCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/AST/Attr.h"
12 #include "clang/AST/Decl.h"
13 #include "clang/AST/DeclBase.h"
14 #include "clang/AST/Expr.h"
15 #include "clang/AST/ExprObjC.h"
16 #include "clang/AST/OperationKinds.h"
17 #include "clang/AST/ParentMap.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtObjC.h"
21 #include "clang/AST/StmtVisitor.h"
22 #include "clang/AST/Type.h"
23 #include "clang/Analysis/AnalysisDeclContext.h"
24 #include "clang/Analysis/CFG.h"
25 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
26 #include "clang/Basic/Builtins.h"
27 #include "clang/Basic/IdentifierTable.h"
28 #include "clang/Basic/LLVM.h"
29 #include "llvm/ADT/BitVector.h"
30 #include "llvm/ADT/BitmaskEnum.h"
31 #include "llvm/ADT/PointerIntPair.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/Sequence.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include <memory>
40 #include <optional>
41 
42 using namespace clang;
43 
44 namespace {
45 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2;
46 template <class T>
47 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>;
48 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8;
49 template <class T>
50 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>;
51 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = {
52     "completionHandler", "completion",      "withCompletionHandler",
53     "withCompletion",    "completionBlock", "withCompletionBlock",
54     "replyTo",           "reply",           "withReplyTo"};
55 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = {
56     "WithCompletionHandler", "WithCompletion", "WithCompletionBlock",
57     "WithReplyTo", "WithReply"};
58 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = {
59     "error", "cancel", "shouldCall", "done", "OK", "success"};
60 
61 struct KnownCalledOnceParameter {
62   llvm::StringLiteral FunctionName;
63   unsigned ParamIndex;
64 };
65 constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = {
66     {llvm::StringLiteral{"dispatch_async"}, 1},
67     {llvm::StringLiteral{"dispatch_async_and_wait"}, 1},
68     {llvm::StringLiteral{"dispatch_after"}, 2},
69     {llvm::StringLiteral{"dispatch_sync"}, 1},
70     {llvm::StringLiteral{"dispatch_once"}, 1},
71     {llvm::StringLiteral{"dispatch_barrier_async"}, 1},
72     {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1},
73     {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}};
74 
75 class ParameterStatus {
76 public:
77   // Status kind is basically the main part of parameter's status.
78   // The kind represents our knowledge (so far) about a tracked parameter
79   // in the context of this analysis.
80   //
81   // Since we want to report on missing and extraneous calls, we need to
82   // track the fact whether paramater was called or not.  This automatically
83   // decides two kinds: `NotCalled` and `Called`.
84   //
85   // One of the erroneous situations is the case when parameter is called only
86   // on some of the paths.  We could've considered it `NotCalled`, but we want
87   // to report double call warnings even if these two calls are not guaranteed
88   // to happen in every execution.  We also don't want to have it as `Called`
89   // because not calling tracked parameter on all of the paths is an error
90   // on its own.  For these reasons, we need to have a separate kind,
91   // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid
92   // confusion.
93   //
94   // Two violations of calling parameter more than once and not calling it on
95   // every path are not, however, mutually exclusive.  In situations where both
96   // violations take place, we prefer to report ONLY double call.  It's always
97   // harder to pinpoint a bug that has arisen when a user neglects to take the
98   // right action (and therefore, no action is taken), than when a user takes
99   // the wrong action.  And, in order to remember that we already reported
100   // a double call, we need another kind: `Reported`.
101   //
102   // Our analysis is intra-procedural and, while in the perfect world,
103   // developers only use tracked parameters to call them, in the real world,
104   // the picture might be different.  Parameters can be stored in global
105   // variables or leaked into other functions that we know nothing about.
106   // We try to be lenient and trust users.  Another kind `Escaped` reflects
107   // such situations.  We don't know if it gets called there or not, but we
108   // should always think of `Escaped` as the best possible option.
109   //
110   // Some of the paths in the analyzed functions might end with a call
111   // to noreturn functions.  Such paths are not required to have parameter
112   // calls and we want to track that.  For the purposes of better diagnostics,
113   // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`.
114   //
115   // Additionally, we have `NotVisited` kind that tells us nothing about
116   // a tracked parameter, but is used for tracking analyzed (aka visited)
117   // basic blocks.
118   //
119   // If we consider `|` to be a JOIN operation of two kinds coming from
120   // two different paths, the following properties must hold:
121   //
122   //   1. for any Kind K: K | K == K
123   //      Joining two identical kinds should result in the same kind.
124   //
125   //   2. for any Kind K: Reported | K == Reported
126   //      Doesn't matter on which path it was reported, it still is.
127   //
128   //   3. for any Kind K: NoReturn | K == K
129   //      We can totally ignore noreturn paths during merges.
130   //
131   //   4. DefinitelyCalled | NotCalled == MaybeCalled
132   //      Called on one path, not called on another - that's simply
133   //      a definition for MaybeCalled.
134   //
135   //   5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]:
136   //      Escaped | K == K
137   //      Escaped mirrors other statuses after joins.
138   //      Every situation, when we join any of the listed kinds K,
139   //      is a violation.  For this reason, in order to assume the
140   //      best outcome for this escape, we consider it to be the
141   //      same as the other path.
142   //
143   //   6. for any Kind K in [DefinitelyCalled, NotCalled]:
144   //      MaybeCalled | K == MaybeCalled
145   //      MaybeCalled should basically stay after almost every join.
146   enum Kind {
147     // No-return paths should be absolutely transparent for the analysis.
148     // 0x0 is the identity element for selected join operation (binary or).
149     NoReturn = 0x0, /* 0000 */
150     // Escaped marks situations when marked parameter escaped into
151     // another function (so we can assume that it was possibly called there).
152     Escaped = 0x1, /* 0001 */
153     // Parameter was definitely called once at this point.
154     DefinitelyCalled = 0x3, /* 0011 */
155     // Kinds less or equal to NON_ERROR_STATUS are not considered errors.
156     NON_ERROR_STATUS = DefinitelyCalled,
157     // Parameter was not yet called.
158     NotCalled = 0x5, /* 0101 */
159     // Parameter was not called at least on one path leading to this point,
160     // while there is also at least one path that it gets called.
161     MaybeCalled = 0x7, /* 0111 */
162     // Parameter was not yet analyzed.
163     NotVisited = 0x8, /* 1000 */
164     // We already reported a violation and stopped tracking calls for this
165     // parameter.
166     Reported = 0xF, /* 1111 */
167     LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported)
168   };
169 
170   constexpr ParameterStatus() = default;
171   /* implicit */ ParameterStatus(Kind K) : StatusKind(K) {
172     assert(!seenAnyCalls(K) && "Can't initialize status without a call");
173   }
174   ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) {
175     assert(seenAnyCalls(K) && "This kind is not supposed to have a call");
176   }
177 
178   const Expr &getCall() const {
179     assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call");
180     return *Call;
181   }
182   static bool seenAnyCalls(Kind K) {
183     return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported;
184   }
185   bool seenAnyCalls() const { return seenAnyCalls(getKind()); }
186 
187   static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; }
188   bool isErrorStatus() const { return isErrorStatus(getKind()); }
189 
190   Kind getKind() const { return StatusKind; }
191 
192   void join(const ParameterStatus &Other) {
193     // If we have a pointer already, let's keep it.
194     // For the purposes of the analysis, it doesn't really matter
195     // which call we report.
196     //
197     // If we don't have a pointer, let's take whatever gets joined.
198     if (!Call) {
199       Call = Other.Call;
200     }
201     // Join kinds.
202     StatusKind |= Other.getKind();
203   }
204 
205   bool operator==(const ParameterStatus &Other) const {
206     // We compare only kinds, pointers on their own is only additional
207     // information.
208     return getKind() == Other.getKind();
209   }
210 
211 private:
212   // It would've been a perfect place to use llvm::PointerIntPair, but
213   // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2.
214   Kind StatusKind = NotVisited;
215   const Expr *Call = nullptr;
216 };
217 
218 /// State aggregates statuses of all tracked parameters.
219 class State {
220 public:
221   State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited)
222       : ParamData(Size, K) {}
223 
224   /// Return status of a parameter with the given index.
225   /// \{
226   ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; }
227   const ParameterStatus &getStatusFor(unsigned Index) const {
228     return ParamData[Index];
229   }
230   /// \}
231 
232   /// Return true if parameter with the given index can be called.
233   bool seenAnyCalls(unsigned Index) const {
234     return getStatusFor(Index).seenAnyCalls();
235   }
236   /// Return a reference that we consider a call.
237   ///
238   /// Should only be used for parameters that can be called.
239   const Expr &getCallFor(unsigned Index) const {
240     return getStatusFor(Index).getCall();
241   }
242   /// Return status kind of parameter with the given index.
243   ParameterStatus::Kind getKindFor(unsigned Index) const {
244     return getStatusFor(Index).getKind();
245   }
246 
247   bool isVisited() const {
248     return llvm::all_of(ParamData, [](const ParameterStatus &S) {
249       return S.getKind() != ParameterStatus::NotVisited;
250     });
251   }
252 
253   // Join other state into the current state.
254   void join(const State &Other) {
255     assert(ParamData.size() == Other.ParamData.size() &&
256            "Couldn't join statuses with different sizes");
257     for (auto Pair : llvm::zip(ParamData, Other.ParamData)) {
258       std::get<0>(Pair).join(std::get<1>(Pair));
259     }
260   }
261 
262   using iterator = ParamSizedVector<ParameterStatus>::iterator;
263   using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator;
264 
265   iterator begin() { return ParamData.begin(); }
266   iterator end() { return ParamData.end(); }
267 
268   const_iterator begin() const { return ParamData.begin(); }
269   const_iterator end() const { return ParamData.end(); }
270 
271   bool operator==(const State &Other) const {
272     return ParamData == Other.ParamData;
273   }
274 
275 private:
276   ParamSizedVector<ParameterStatus> ParamData;
277 };
278 
279 /// A simple class that finds DeclRefExpr in the given expression.
280 ///
281 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever
282 /// expressions on our way.  Only certain expressions considered "no-op"
283 /// for our task are indeed skipped.
284 class DeclRefFinder
285     : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> {
286 public:
287   /// Find a DeclRefExpr in the given expression.
288   ///
289   /// In its most basic form (ShouldRetrieveFromComparisons == false),
290   /// this function can be simply reduced to the following question:
291   ///
292   ///   - If expression E is used as a function argument, could we say
293   ///     that DeclRefExpr nested in E is used as an argument?
294   ///
295   /// According to this rule, we can say that parens, casts and dereferencing
296   /// (dereferencing only applied to function pointers, but this is our case)
297   /// can be skipped.
298   ///
299   /// When we should look into comparisons the question changes to:
300   ///
301   ///   - If expression E is used as a condition, could we say that
302   ///     DeclRefExpr is being checked?
303   ///
304   /// And even though, these are two different questions, they have quite a lot
305   /// in common.  Actually, we can say that whatever expression answers
306   /// positively the first question also fits the second question as well.
307   ///
308   /// In addition, we skip binary operators == and !=, and unary opeartor !.
309   static const DeclRefExpr *find(const Expr *E,
310                                  bool ShouldRetrieveFromComparisons = false) {
311     return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E);
312   }
313 
314   const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; }
315 
316   const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) {
317     switch (UO->getOpcode()) {
318     case UO_LNot:
319       // We care about logical not only if we care about comparisons.
320       if (!ShouldRetrieveFromComparisons)
321         return nullptr;
322       [[fallthrough]];
323     // Function pointer/references can be dereferenced before a call.
324     // That doesn't make it, however, any different from a regular call.
325     // For this reason, dereference operation is a "no-op".
326     case UO_Deref:
327       return Visit(UO->getSubExpr());
328     default:
329       return nullptr;
330     }
331   }
332 
333   const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) {
334     if (!ShouldRetrieveFromComparisons)
335       return nullptr;
336 
337     switch (BO->getOpcode()) {
338     case BO_EQ:
339     case BO_NE: {
340       const DeclRefExpr *LHS = Visit(BO->getLHS());
341       return LHS ? LHS : Visit(BO->getRHS());
342     }
343     default:
344       return nullptr;
345     }
346   }
347 
348   const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) {
349     return Visit(OVE->getSourceExpr());
350   }
351 
352   const DeclRefExpr *VisitCallExpr(const CallExpr *CE) {
353     if (!ShouldRetrieveFromComparisons)
354       return nullptr;
355 
356     // We want to see through some of the boolean builtin functions
357     // that we are likely to see in conditions.
358     switch (CE->getBuiltinCallee()) {
359     case Builtin::BI__builtin_expect:
360     case Builtin::BI__builtin_expect_with_probability: {
361       assert(CE->getNumArgs() >= 2);
362 
363       const DeclRefExpr *Candidate = Visit(CE->getArg(0));
364       return Candidate != nullptr ? Candidate : Visit(CE->getArg(1));
365     }
366 
367     case Builtin::BI__builtin_unpredictable:
368       return Visit(CE->getArg(0));
369 
370     default:
371       return nullptr;
372     }
373   }
374 
375   const DeclRefExpr *VisitExpr(const Expr *E) {
376     // It is a fallback method that gets called whenever the actual type
377     // of the given expression is not covered.
378     //
379     // We first check if we have anything to skip.  And then repeat the whole
380     // procedure for a nested expression instead.
381     const Expr *DeclutteredExpr = E->IgnoreParenCasts();
382     return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr;
383   }
384 
385 private:
386   DeclRefFinder(bool ShouldRetrieveFromComparisons)
387       : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {}
388 
389   bool ShouldRetrieveFromComparisons;
390 };
391 
392 const DeclRefExpr *findDeclRefExpr(const Expr *In,
393                                    bool ShouldRetrieveFromComparisons = false) {
394   return DeclRefFinder::find(In, ShouldRetrieveFromComparisons);
395 }
396 
397 const ParmVarDecl *
398 findReferencedParmVarDecl(const Expr *In,
399                           bool ShouldRetrieveFromComparisons = false) {
400   if (const DeclRefExpr *DR =
401           findDeclRefExpr(In, ShouldRetrieveFromComparisons)) {
402     return dyn_cast<ParmVarDecl>(DR->getDecl());
403   }
404 
405   return nullptr;
406 }
407 
408 /// Return conditions expression of a statement if it has one.
409 const Expr *getCondition(const Stmt *S) {
410   if (!S) {
411     return nullptr;
412   }
413 
414   if (const auto *If = dyn_cast<IfStmt>(S)) {
415     return If->getCond();
416   }
417   if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) {
418     return Ternary->getCond();
419   }
420 
421   return nullptr;
422 }
423 
424 /// A small helper class that collects all named identifiers in the given
425 /// expression.  It traverses it recursively, so names from deeper levels
426 /// of the AST will end up in the results.
427 /// Results might have duplicate names, if this is a problem, convert to
428 /// string sets afterwards.
429 class NamesCollector : public RecursiveASTVisitor<NamesCollector> {
430 public:
431   static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5;
432   using NameCollection =
433       llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>;
434 
435   static NameCollection collect(const Expr *From) {
436     NamesCollector Impl;
437     Impl.TraverseStmt(const_cast<Expr *>(From));
438     return Impl.Result;
439   }
440 
441   bool VisitDeclRefExpr(const DeclRefExpr *E) {
442     Result.push_back(E->getDecl()->getName());
443     return true;
444   }
445 
446   bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) {
447     llvm::StringRef Name;
448 
449     if (E->isImplicitProperty()) {
450       ObjCMethodDecl *PropertyMethodDecl = nullptr;
451       if (E->isMessagingGetter()) {
452         PropertyMethodDecl = E->getImplicitPropertyGetter();
453       } else {
454         PropertyMethodDecl = E->getImplicitPropertySetter();
455       }
456       assert(PropertyMethodDecl &&
457              "Implicit property must have associated declaration");
458       Name = PropertyMethodDecl->getSelector().getNameForSlot(0);
459     } else {
460       assert(E->isExplicitProperty());
461       Name = E->getExplicitProperty()->getName();
462     }
463 
464     Result.push_back(Name);
465     return true;
466   }
467 
468 private:
469   NamesCollector() = default;
470   NameCollection Result;
471 };
472 
473 /// Check whether the given expression mentions any of conventional names.
474 bool mentionsAnyOfConventionalNames(const Expr *E) {
475   NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E);
476 
477   return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) {
478     return llvm::any_of(
479         CONVENTIONAL_CONDITIONS,
480         [ConditionName](const llvm::StringLiteral &Conventional) {
481           return ConditionName.contains_insensitive(Conventional);
482         });
483   });
484 }
485 
486 /// Clarification is a simple pair of a reason why parameter is not called
487 /// on every path and a statement to blame.
488 struct Clarification {
489   NeverCalledReason Reason;
490   const Stmt *Location;
491 };
492 
493 /// A helper class that can produce a clarification based on the given pair
494 /// of basic blocks.
495 class NotCalledClarifier
496     : public ConstStmtVisitor<NotCalledClarifier,
497                               std::optional<Clarification>> {
498 public:
499   /// The main entrypoint for the class, the function that tries to find the
500   /// clarification of how to explain which sub-path starts with a CFG edge
501   /// from Conditional to SuccWithoutCall.
502   ///
503   /// This means that this function has one precondition:
504   ///   SuccWithoutCall should be a successor block for Conditional.
505   ///
506   /// Because clarification is not needed for non-trivial pairs of blocks
507   /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful
508   /// results only for such cases.  For this very reason, the parent basic
509   /// block, Conditional, is named that way, so it is clear what kind of
510   /// block is expected.
511   static std::optional<Clarification> clarify(const CFGBlock *Conditional,
512                                               const CFGBlock *SuccWithoutCall) {
513     if (const Stmt *Terminator = Conditional->getTerminatorStmt()) {
514       return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator);
515     }
516     return std::nullopt;
517   }
518 
519   std::optional<Clarification> VisitIfStmt(const IfStmt *If) {
520     return VisitBranchingBlock(If, NeverCalledReason::IfThen);
521   }
522 
523   std::optional<Clarification>
524   VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) {
525     return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen);
526   }
527 
528   std::optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) {
529     const Stmt *CaseToBlame = SuccInQuestion->getLabel();
530     if (!CaseToBlame) {
531       // If interesting basic block is not labeled, it means that this
532       // basic block does not represent any of the cases.
533       return Clarification{NeverCalledReason::SwitchSkipped, Switch};
534     }
535 
536     for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case;
537          Case = Case->getNextSwitchCase()) {
538       if (Case == CaseToBlame) {
539         return Clarification{NeverCalledReason::Switch, Case};
540       }
541     }
542 
543     llvm_unreachable("Found unexpected switch structure");
544   }
545 
546   std::optional<Clarification> VisitForStmt(const ForStmt *For) {
547     return VisitBranchingBlock(For, NeverCalledReason::LoopEntered);
548   }
549 
550   std::optional<Clarification> VisitWhileStmt(const WhileStmt *While) {
551     return VisitBranchingBlock(While, NeverCalledReason::LoopEntered);
552   }
553 
554   std::optional<Clarification>
555   VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) {
556     assert(Parent->succ_size() == 2 &&
557            "Branching block should have exactly two successors");
558     unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion);
559     NeverCalledReason ActualReason =
560         updateForSuccessor(DefaultReason, SuccessorIndex);
561     return Clarification{ActualReason, Terminator};
562   }
563 
564   std::optional<Clarification> VisitBinaryOperator(const BinaryOperator *) {
565     // We don't want to report on short-curcuit logical operations.
566     return std::nullopt;
567   }
568 
569   std::optional<Clarification> VisitStmt(const Stmt *Terminator) {
570     // If we got here, we didn't have a visit function for more derived
571     // classes of statement that this terminator actually belongs to.
572     //
573     // This is not a good scenario and should not happen in practice, but
574     // at least we'll warn the user.
575     return Clarification{NeverCalledReason::FallbackReason, Terminator};
576   }
577 
578   static unsigned getSuccessorIndex(const CFGBlock *Parent,
579                                     const CFGBlock *Child) {
580     CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child);
581     assert(It != Parent->succ_end() &&
582            "Given blocks should be in parent-child relationship");
583     return It - Parent->succ_begin();
584   }
585 
586   static NeverCalledReason
587   updateForSuccessor(NeverCalledReason ReasonForTrueBranch,
588                      unsigned SuccessorIndex) {
589     assert(SuccessorIndex <= 1);
590     unsigned RawReason =
591         static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex;
592     assert(RawReason <=
593            static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE));
594     return static_cast<NeverCalledReason>(RawReason);
595   }
596 
597 private:
598   NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion)
599       : Parent(Parent), SuccInQuestion(SuccInQuestion) {}
600 
601   const CFGBlock *Parent, *SuccInQuestion;
602 };
603 
604 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> {
605 public:
606   static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler,
607                     bool CheckConventionalParameters) {
608     CalledOnceChecker(AC, Handler, CheckConventionalParameters).check();
609   }
610 
611 private:
612   CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler,
613                     bool CheckConventionalParameters)
614       : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler),
615         CheckConventionalParameters(CheckConventionalParameters),
616         CurrentState(0) {
617     initDataStructures();
618     assert((size() == 0 || !States.empty()) &&
619            "Data structures are inconsistent");
620   }
621 
622   //===----------------------------------------------------------------------===//
623   //                            Initializing functions
624   //===----------------------------------------------------------------------===//
625 
626   void initDataStructures() {
627     const Decl *AnalyzedDecl = AC.getDecl();
628 
629     if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) {
630       findParamsToTrack(Function);
631     } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) {
632       findParamsToTrack(Method);
633     } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) {
634       findCapturesToTrack(Block);
635       findParamsToTrack(Block);
636     }
637 
638     // Have something to track, let's init states for every block from the CFG.
639     if (size() != 0) {
640       States =
641           CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size()));
642     }
643   }
644 
645   void findCapturesToTrack(const BlockDecl *Block) {
646     for (const auto &Capture : Block->captures()) {
647       if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) {
648         // Parameter DeclContext is its owning function or method.
649         const DeclContext *ParamContext = P->getDeclContext();
650         if (shouldBeCalledOnce(ParamContext, P)) {
651           TrackedParams.push_back(P);
652         }
653       }
654     }
655   }
656 
657   template <class FunctionLikeDecl>
658   void findParamsToTrack(const FunctionLikeDecl *Function) {
659     for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) {
660       if (shouldBeCalledOnce(Function, Index)) {
661         TrackedParams.push_back(Function->getParamDecl(Index));
662       }
663     }
664   }
665 
666   //===----------------------------------------------------------------------===//
667   //                         Main logic 'check' functions
668   //===----------------------------------------------------------------------===//
669 
670   void check() {
671     // Nothing to check here: we don't have marked parameters.
672     if (size() == 0 || isPossiblyEmptyImpl())
673       return;
674 
675     assert(
676         llvm::none_of(States, [](const State &S) { return S.isVisited(); }) &&
677         "None of the blocks should be 'visited' before the analysis");
678 
679     // For our task, both backward and forward approaches suite well.
680     // However, in order to report better diagnostics, we decided to go with
681     // backward analysis.
682     //
683     // Let's consider the following CFG and how forward and backward analyses
684     // will work for it.
685     //
686     //                  FORWARD:           |                 BACKWARD:
687     //                    #1               |                     #1
688     //                +---------+          |               +-----------+
689     //                |   if    |          |               |MaybeCalled|
690     //                +---------+          |               +-----------+
691     //                |NotCalled|          |               |    if     |
692     //                +---------+          |               +-----------+
693     //                 /       \           |                 /       \
694     //         #2     /         \  #3      |         #2     /         \  #3
695     // +----------------+      +---------+ | +----------------+      +---------+
696     // |     foo()      |      |   ...   | | |DefinitelyCalled|      |NotCalled|
697     // +----------------+      +---------+ | +----------------+      +---------+
698     // |DefinitelyCalled|      |NotCalled| | |     foo()      |      |   ...   |
699     // +----------------+      +---------+ | +----------------+      +---------+
700     //                \         /          |                \         /
701     //                 \  #4   /           |                 \  #4   /
702     //               +-----------+         |                +---------+
703     //               |    ...    |         |                |NotCalled|
704     //               +-----------+         |                +---------+
705     //               |MaybeCalled|         |                |   ...   |
706     //               +-----------+         |                +---------+
707     //
708     // The most natural way to report lacking call in the block #3 would be to
709     // message that the false branch of the if statement in the block #1 doesn't
710     // have a call.  And while with the forward approach we'll need to find a
711     // least common ancestor or something like that to find the 'if' to blame,
712     // backward analysis gives it to us out of the box.
713     BackwardDataflowWorklist Worklist(FunctionCFG, AC);
714 
715     // Let's visit EXIT.
716     const CFGBlock *Exit = &FunctionCFG.getExit();
717     assignState(Exit, State(size(), ParameterStatus::NotCalled));
718     Worklist.enqueuePredecessors(Exit);
719 
720     while (const CFGBlock *BB = Worklist.dequeue()) {
721       assert(BB && "Worklist should filter out null blocks");
722       check(BB);
723       assert(CurrentState.isVisited() &&
724              "After the check, basic block should be visited");
725 
726       // Traverse successor basic blocks if the status of this block
727       // has changed.
728       if (assignState(BB, CurrentState)) {
729         Worklist.enqueuePredecessors(BB);
730       }
731     }
732 
733     // Check that we have all tracked parameters at the last block.
734     // As we are performing a backward version of the analysis,
735     // it should be the ENTRY block.
736     checkEntry(&FunctionCFG.getEntry());
737   }
738 
739   void check(const CFGBlock *BB) {
740     // We start with a state 'inherited' from all the successors.
741     CurrentState = joinSuccessors(BB);
742     assert(CurrentState.isVisited() &&
743            "Shouldn't start with a 'not visited' state");
744 
745     // This is the 'exit' situation, broken promises are probably OK
746     // in such scenarios.
747     if (BB->hasNoReturnElement()) {
748       markNoReturn();
749       // This block still can have calls (even multiple calls) and
750       // for this reason there is no early return here.
751     }
752 
753     // We use a backward dataflow propagation and for this reason we
754     // should traverse basic blocks bottom-up.
755     for (const CFGElement &Element : llvm::reverse(*BB)) {
756       if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) {
757         check(S->getStmt());
758       }
759     }
760   }
761   void check(const Stmt *S) { Visit(S); }
762 
763   void checkEntry(const CFGBlock *Entry) {
764     // We finalize this algorithm with the ENTRY block because
765     // we use a backward version of the analysis.  This is where
766     // we can judge that some of the tracked parameters are not called on
767     // every path from ENTRY to EXIT.
768 
769     const State &EntryStatus = getState(Entry);
770     llvm::BitVector NotCalledOnEveryPath(size(), false);
771     llvm::BitVector NotUsedOnEveryPath(size(), false);
772 
773     // Check if there are no calls of the marked parameter at all
774     for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) {
775       const ParmVarDecl *Parameter = getParameter(IndexedStatus.index());
776 
777       switch (IndexedStatus.value().getKind()) {
778       case ParameterStatus::NotCalled:
779         // If there were places where this parameter escapes (aka being used),
780         // we can provide a more useful diagnostic by pointing at the exact
781         // branches where it is not even mentioned.
782         if (!hasEverEscaped(IndexedStatus.index())) {
783           // This parameter is was not used at all, so we should report the
784           // most generic version of the warning.
785           if (isCaptured(Parameter)) {
786             // We want to specify that it was captured by the block.
787             Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(),
788                                               !isExplicitlyMarked(Parameter));
789           } else {
790             Handler.handleNeverCalled(Parameter,
791                                       !isExplicitlyMarked(Parameter));
792           }
793         } else {
794           // Mark it as 'interesting' to figure out which paths don't even
795           // have escapes.
796           NotUsedOnEveryPath[IndexedStatus.index()] = true;
797         }
798 
799         break;
800       case ParameterStatus::MaybeCalled:
801         // If we have 'maybe called' at this point, we have an error
802         // that there is at least one path where this parameter
803         // is not called.
804         //
805         // However, reporting the warning with only that information can be
806         // too vague for the users.  For this reason, we mark such parameters
807         // as "interesting" for further analysis.
808         NotCalledOnEveryPath[IndexedStatus.index()] = true;
809         break;
810       default:
811         break;
812       }
813     }
814 
815     // Early exit if we don't have parameters for extra analysis...
816     if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() &&
817         // ... or if we've seen variables with cleanup functions.
818         // We can't reason that we've seen every path in this case,
819         // and thus abandon reporting any warnings that imply that.
820         !FunctionHasCleanupVars)
821       return;
822 
823     // We are looking for a pair of blocks A, B so that the following is true:
824     //   * A is a predecessor of B
825     //   * B is marked as NotCalled
826     //   * A has at least one successor marked as either
827     //     Escaped or DefinitelyCalled
828     //
829     // In that situation, it is guaranteed that B is the first block of the path
830     // where the user doesn't call or use parameter in question.
831     //
832     // For this reason, branch A -> B can be used for reporting.
833     //
834     // This part of the algorithm is guarded by a condition that the function
835     // does indeed have a violation of contract.  For this reason, we can
836     // spend more time to find a good spot to place the warning.
837     //
838     // The following algorithm has the worst case complexity of O(V + E),
839     // where V is the number of basic blocks in FunctionCFG,
840     //       E is the number of edges between blocks in FunctionCFG.
841     for (const CFGBlock *BB : FunctionCFG) {
842       if (!BB)
843         continue;
844 
845       const State &BlockState = getState(BB);
846 
847       for (unsigned Index : llvm::seq(0u, size())) {
848         // We don't want to use 'isLosingCall' here because we want to report
849         // the following situation as well:
850         //
851         //           MaybeCalled
852         //            |  ...  |
853         //    MaybeCalled   NotCalled
854         //
855         // Even though successor is not 'DefinitelyCalled', it is still useful
856         // to report it, it is still a path without a call.
857         if (NotCalledOnEveryPath[Index] &&
858             BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) {
859 
860           findAndReportNotCalledBranches(BB, Index);
861         } else if (NotUsedOnEveryPath[Index] &&
862                    isLosingEscape(BlockState, BB, Index)) {
863 
864           findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true);
865         }
866       }
867     }
868   }
869 
870   /// Check potential call of a tracked parameter.
871   void checkDirectCall(const CallExpr *Call) {
872     if (auto Index = getIndexOfCallee(Call)) {
873       processCallFor(*Index, Call);
874     }
875   }
876 
877   /// Check the call expression for being an indirect call of one of the tracked
878   /// parameters.  It is indirect in the sense that this particular call is not
879   /// calling the parameter itself, but rather uses it as the argument.
880   template <class CallLikeExpr>
881   void checkIndirectCall(const CallLikeExpr *CallOrMessage) {
882     // CallExpr::arguments does not interact nicely with llvm::enumerate.
883     llvm::ArrayRef<const Expr *> Arguments =
884         llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs());
885 
886     // Let's check if any of the call arguments is a point of interest.
887     for (const auto &Argument : llvm::enumerate(Arguments)) {
888       if (auto Index = getIndexOfExpression(Argument.value())) {
889         if (shouldBeCalledOnce(CallOrMessage, Argument.index())) {
890           // If the corresponding parameter is marked as 'called_once' we should
891           // consider it as a call.
892           processCallFor(*Index, CallOrMessage);
893         } else {
894           // Otherwise, we mark this parameter as escaped, which can be
895           // interpreted both as called or not called depending on the context.
896           processEscapeFor(*Index);
897         }
898         // Otherwise, let's keep the state as it is.
899       }
900     }
901   }
902 
903   /// Process call of the parameter with the given index
904   void processCallFor(unsigned Index, const Expr *Call) {
905     ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index);
906 
907     if (CurrentParamStatus.seenAnyCalls()) {
908 
909       // At this point, this parameter was called, so this is a second call.
910       const ParmVarDecl *Parameter = getParameter(Index);
911       Handler.handleDoubleCall(
912           Parameter, &CurrentState.getCallFor(Index), Call,
913           !isExplicitlyMarked(Parameter),
914           // We are sure that the second call is definitely
915           // going to happen if the status is 'DefinitelyCalled'.
916           CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled);
917 
918       // Mark this parameter as already reported on, so we don't repeat
919       // warnings.
920       CurrentParamStatus = ParameterStatus::Reported;
921 
922     } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) {
923       // If we didn't report anything yet, let's mark this parameter
924       // as called.
925       ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call);
926       CurrentParamStatus = Called;
927     }
928   }
929 
930   /// Process escape of the parameter with the given index
931   void processEscapeFor(unsigned Index) {
932     ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index);
933 
934     // Escape overrides whatever error we think happened.
935     if (CurrentParamStatus.isErrorStatus() &&
936         CurrentParamStatus.getKind() != ParameterStatus::Kind::Reported) {
937       CurrentParamStatus = ParameterStatus::Escaped;
938     }
939   }
940 
941   void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index,
942                                       bool IsEscape = false) {
943     for (const CFGBlock *Succ : Parent->succs()) {
944       if (!Succ)
945         continue;
946 
947       if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) {
948         assert(Parent->succ_size() >= 2 &&
949                "Block should have at least two successors at this point");
950         if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) {
951           const ParmVarDecl *Parameter = getParameter(Index);
952           Handler.handleNeverCalled(
953               Parameter, AC.getDecl(), Clarification->Location,
954               Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter));
955         }
956       }
957     }
958   }
959 
960   //===----------------------------------------------------------------------===//
961   //                   Predicate functions to check parameters
962   //===----------------------------------------------------------------------===//
963 
964   /// Return true if parameter is explicitly marked as 'called_once'.
965   static bool isExplicitlyMarked(const ParmVarDecl *Parameter) {
966     return Parameter->hasAttr<CalledOnceAttr>();
967   }
968 
969   /// Return true if the given name matches conventional pattens.
970   static bool isConventional(llvm::StringRef Name) {
971     return llvm::count(CONVENTIONAL_NAMES, Name) != 0;
972   }
973 
974   /// Return true if the given name has conventional suffixes.
975   static bool hasConventionalSuffix(llvm::StringRef Name) {
976     return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) {
977       return Name.ends_with(Suffix);
978     });
979   }
980 
981   /// Return true if the given type can be used for conventional parameters.
982   static bool isConventional(QualType Ty) {
983     if (!Ty->isBlockPointerType()) {
984       return false;
985     }
986 
987     QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType();
988     // Completion handlers should have a block type with void return type.
989     return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType();
990   }
991 
992   /// Return true if the only parameter of the function is conventional.
993   static bool isOnlyParameterConventional(const FunctionDecl *Function) {
994     IdentifierInfo *II = Function->getIdentifier();
995     return Function->getNumParams() == 1 && II &&
996            hasConventionalSuffix(II->getName());
997   }
998 
999   /// Return true/false if 'swift_async' attribute states that the given
1000   /// parameter is conventionally called once.
1001   /// Return std::nullopt if the given declaration doesn't have 'swift_async'
1002   /// attribute.
1003   static std::optional<bool> isConventionalSwiftAsync(const Decl *D,
1004                                                       unsigned ParamIndex) {
1005     if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) {
1006       if (A->getKind() == SwiftAsyncAttr::None) {
1007         return false;
1008       }
1009 
1010       return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex;
1011     }
1012     return std::nullopt;
1013   }
1014 
1015   /// Return true if the specified selector represents init method.
1016   static bool isInitMethod(Selector MethodSelector) {
1017     return MethodSelector.getMethodFamily() == OMF_init;
1018   }
1019 
1020   /// Return true if the specified selector piece matches conventions.
1021   static bool isConventionalSelectorPiece(Selector MethodSelector,
1022                                           unsigned PieceIndex,
1023                                           QualType PieceType) {
1024     if (!isConventional(PieceType) || isInitMethod(MethodSelector)) {
1025       return false;
1026     }
1027 
1028     if (MethodSelector.getNumArgs() == 1) {
1029       assert(PieceIndex == 0);
1030       return hasConventionalSuffix(MethodSelector.getNameForSlot(0));
1031     }
1032 
1033     llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex);
1034     return isConventional(PieceName) || hasConventionalSuffix(PieceName);
1035   }
1036 
1037   bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const {
1038     return isExplicitlyMarked(Parameter) ||
1039            (CheckConventionalParameters &&
1040             (isConventional(Parameter->getName()) ||
1041              hasConventionalSuffix(Parameter->getName())) &&
1042             isConventional(Parameter->getType()));
1043   }
1044 
1045   bool shouldBeCalledOnce(const DeclContext *ParamContext,
1046                           const ParmVarDecl *Param) {
1047     unsigned ParamIndex = Param->getFunctionScopeIndex();
1048     if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) {
1049       return shouldBeCalledOnce(Function, ParamIndex);
1050     }
1051     if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) {
1052       return shouldBeCalledOnce(Method, ParamIndex);
1053     }
1054     return shouldBeCalledOnce(Param);
1055   }
1056 
1057   bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const {
1058     return shouldBeCalledOnce(Block->getParamDecl(ParamIndex));
1059   }
1060 
1061   bool shouldBeCalledOnce(const FunctionDecl *Function,
1062                           unsigned ParamIndex) const {
1063     if (ParamIndex >= Function->getNumParams()) {
1064       return false;
1065     }
1066     // 'swift_async' goes first and overrides anything else.
1067     if (auto ConventionalAsync =
1068             isConventionalSwiftAsync(Function, ParamIndex)) {
1069       return *ConventionalAsync;
1070     }
1071 
1072     return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) ||
1073            (CheckConventionalParameters &&
1074             isOnlyParameterConventional(Function));
1075   }
1076 
1077   bool shouldBeCalledOnce(const ObjCMethodDecl *Method,
1078                           unsigned ParamIndex) const {
1079     Selector MethodSelector = Method->getSelector();
1080     if (ParamIndex >= MethodSelector.getNumArgs()) {
1081       return false;
1082     }
1083 
1084     // 'swift_async' goes first and overrides anything else.
1085     if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) {
1086       return *ConventionalAsync;
1087     }
1088 
1089     const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex);
1090     return shouldBeCalledOnce(Parameter) ||
1091            (CheckConventionalParameters &&
1092             isConventionalSelectorPiece(MethodSelector, ParamIndex,
1093                                         Parameter->getType()));
1094   }
1095 
1096   bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const {
1097     const FunctionDecl *Function = Call->getDirectCallee();
1098     return Function && shouldBeCalledOnce(Function, ParamIndex);
1099   }
1100 
1101   bool shouldBeCalledOnce(const ObjCMessageExpr *Message,
1102                           unsigned ParamIndex) const {
1103     const ObjCMethodDecl *Method = Message->getMethodDecl();
1104     return Method && ParamIndex < Method->param_size() &&
1105            shouldBeCalledOnce(Method, ParamIndex);
1106   }
1107 
1108   //===----------------------------------------------------------------------===//
1109   //                               Utility methods
1110   //===----------------------------------------------------------------------===//
1111 
1112   bool isCaptured(const ParmVarDecl *Parameter) const {
1113     if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) {
1114       return Block->capturesVariable(Parameter);
1115     }
1116     return false;
1117   }
1118 
1119   // Return a call site where the block is called exactly once or null otherwise
1120   const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const {
1121     ParentMap &PM = AC.getParentMap();
1122 
1123     // We don't want to track the block through assignments and so on, instead
1124     // we simply see how the block used and if it's used directly in a call,
1125     // we decide based on call to what it is.
1126     //
1127     // In order to do this, we go up the parents of the block looking for
1128     // a call or a message expressions.  These might not be immediate parents
1129     // of the actual block expression due to casts and parens, so we skip them.
1130     for (const Stmt *Prev = Block, *Current = PM.getParent(Block);
1131          Current != nullptr; Prev = Current, Current = PM.getParent(Current)) {
1132       // Skip no-op (for our case) operations.
1133       if (isa<CastExpr>(Current) || isa<ParenExpr>(Current))
1134         continue;
1135 
1136       // At this point, Prev represents our block as an immediate child of the
1137       // call.
1138       if (const auto *Call = dyn_cast<CallExpr>(Current)) {
1139         // It might be the call of the Block itself...
1140         if (Call->getCallee() == Prev)
1141           return Call;
1142 
1143         // ...or it can be an indirect call of the block.
1144         return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr;
1145       }
1146       if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) {
1147         return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message
1148                                                               : nullptr;
1149       }
1150 
1151       break;
1152     }
1153 
1154     return nullptr;
1155   }
1156 
1157   template <class CallLikeExpr>
1158   bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage,
1159                                        const Stmt *BlockArgument) const {
1160     // CallExpr::arguments does not interact nicely with llvm::enumerate.
1161     llvm::ArrayRef<const Expr *> Arguments =
1162         llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs());
1163 
1164     for (const auto &Argument : llvm::enumerate(Arguments)) {
1165       if (Argument.value() == BlockArgument) {
1166         return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index());
1167       }
1168     }
1169 
1170     return false;
1171   }
1172 
1173   bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call,
1174                                        unsigned ParamIndex) const {
1175     const FunctionDecl *Function = Call->getDirectCallee();
1176     return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) ||
1177            shouldBeCalledOnce(Call, ParamIndex);
1178   }
1179 
1180   bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message,
1181                                        unsigned ParamIndex) const {
1182     // At the moment, we don't have any Obj-C methods we want to specifically
1183     // check in here.
1184     return shouldBeCalledOnce(Message, ParamIndex);
1185   }
1186 
1187   static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function,
1188                                               unsigned ParamIndex) {
1189     // There is a list of important API functions that while not following
1190     // conventions nor being directly annotated, still guarantee that the
1191     // callback parameter will be called exactly once.
1192     //
1193     // Here we check if this is the case.
1194     return Function &&
1195            llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS,
1196                         [Function, ParamIndex](
1197                             const KnownCalledOnceParameter &Reference) {
1198                           return Reference.FunctionName ==
1199                                      Function->getName() &&
1200                                  Reference.ParamIndex == ParamIndex;
1201                         });
1202   }
1203 
1204   /// Return true if the analyzed function is actually a default implementation
1205   /// of the method that has to be overriden.
1206   ///
1207   /// These functions can have tracked parameters, but wouldn't call them
1208   /// because they are not designed to perform any meaningful actions.
1209   ///
1210   /// There are a couple of flavors of such default implementations:
1211   ///   1. Empty methods or methods with a single return statement
1212   ///   2. Methods that have one block with a call to no return function
1213   ///   3. Methods with only assertion-like operations
1214   bool isPossiblyEmptyImpl() const {
1215     if (!isa<ObjCMethodDecl>(AC.getDecl())) {
1216       // We care only about functions that are not supposed to be called.
1217       // Only methods can be overriden.
1218       return false;
1219     }
1220 
1221     // Case #1 (without return statements)
1222     if (FunctionCFG.size() == 2) {
1223       // Method has only two blocks: ENTRY and EXIT.
1224       // This is equivalent to empty function.
1225       return true;
1226     }
1227 
1228     // Case #2
1229     if (FunctionCFG.size() == 3) {
1230       const CFGBlock &Entry = FunctionCFG.getEntry();
1231       if (Entry.succ_empty()) {
1232         return false;
1233       }
1234 
1235       const CFGBlock *OnlyBlock = *Entry.succ_begin();
1236       // Method has only one block, let's see if it has a no-return
1237       // element.
1238       if (OnlyBlock && OnlyBlock->hasNoReturnElement()) {
1239         return true;
1240       }
1241       // Fallthrough, CFGs with only one block can fall into #1 and #3 as well.
1242     }
1243 
1244     // Cases #1 (return statements) and #3.
1245     //
1246     // It is hard to detect that something is an assertion or came
1247     // from assertion.  Here we use a simple heuristic:
1248     //
1249     //   - If it came from a macro, it can be an assertion.
1250     //
1251     // Additionally, we can't assume a number of basic blocks or the CFG's
1252     // structure because assertions might include loops and conditions.
1253     return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) {
1254       if (!BB) {
1255         // Unreachable blocks are totally fine.
1256         return true;
1257       }
1258 
1259       // Return statements can have sub-expressions that are represented as
1260       // separate statements of a basic block.  We should allow this.
1261       // This parent map will be initialized with a parent tree for all
1262       // subexpressions of the block's return statement (if it has one).
1263       std::unique_ptr<ParentMap> ReturnChildren;
1264 
1265       return llvm::all_of(
1266           llvm::reverse(*BB), // we should start with return statements, if we
1267                               // have any, i.e. from the bottom of the block
1268           [&ReturnChildren](const CFGElement &Element) {
1269             if (std::optional<CFGStmt> S = Element.getAs<CFGStmt>()) {
1270               const Stmt *SuspiciousStmt = S->getStmt();
1271 
1272               if (isa<ReturnStmt>(SuspiciousStmt)) {
1273                 // Let's initialize this structure to test whether
1274                 // some further statement is a part of this return.
1275                 ReturnChildren = std::make_unique<ParentMap>(
1276                     const_cast<Stmt *>(SuspiciousStmt));
1277                 // Return statements are allowed as part of #1.
1278                 return true;
1279               }
1280 
1281               return SuspiciousStmt->getBeginLoc().isMacroID() ||
1282                      (ReturnChildren &&
1283                       ReturnChildren->hasParent(SuspiciousStmt));
1284             }
1285             return true;
1286           });
1287     });
1288   }
1289 
1290   /// Check if parameter with the given index has ever escaped.
1291   bool hasEverEscaped(unsigned Index) const {
1292     return llvm::any_of(States, [Index](const State &StateForOneBB) {
1293       return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped;
1294     });
1295   }
1296 
1297   /// Return status stored for the given basic block.
1298   /// \{
1299   State &getState(const CFGBlock *BB) {
1300     assert(BB);
1301     return States[BB->getBlockID()];
1302   }
1303   const State &getState(const CFGBlock *BB) const {
1304     assert(BB);
1305     return States[BB->getBlockID()];
1306   }
1307   /// \}
1308 
1309   /// Assign status to the given basic block.
1310   ///
1311   /// Returns true when the stored status changed.
1312   bool assignState(const CFGBlock *BB, const State &ToAssign) {
1313     State &Current = getState(BB);
1314     if (Current == ToAssign) {
1315       return false;
1316     }
1317 
1318     Current = ToAssign;
1319     return true;
1320   }
1321 
1322   /// Join all incoming statuses for the given basic block.
1323   State joinSuccessors(const CFGBlock *BB) const {
1324     auto Succs =
1325         llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) {
1326           return Succ && this->getState(Succ).isVisited();
1327         });
1328     // We came to this block from somewhere after all.
1329     assert(!Succs.empty() &&
1330            "Basic block should have at least one visited successor");
1331 
1332     State Result = getState(*Succs.begin());
1333 
1334     for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) {
1335       Result.join(getState(Succ));
1336     }
1337 
1338     if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) {
1339       handleConditional(BB, Condition, Result);
1340     }
1341 
1342     return Result;
1343   }
1344 
1345   void handleConditional(const CFGBlock *BB, const Expr *Condition,
1346                          State &ToAlter) const {
1347     handleParameterCheck(BB, Condition, ToAlter);
1348     if (SuppressOnConventionalErrorPaths) {
1349       handleConventionalCheck(BB, Condition, ToAlter);
1350     }
1351   }
1352 
1353   void handleParameterCheck(const CFGBlock *BB, const Expr *Condition,
1354                             State &ToAlter) const {
1355     // In this function, we try to deal with the following pattern:
1356     //
1357     //   if (parameter)
1358     //     parameter(...);
1359     //
1360     // It's not good to show a warning here because clearly 'parameter'
1361     // couldn't and shouldn't be called on the 'else' path.
1362     //
1363     // Let's check if this if statement has a check involving one of
1364     // the tracked parameters.
1365     if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(
1366             Condition,
1367             /* ShouldRetrieveFromComparisons = */ true)) {
1368       if (const auto Index = getIndex(*Parameter)) {
1369         ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index);
1370 
1371         // We don't want to deep dive into semantics of the check and
1372         // figure out if that check was for null or something else.
1373         // We simply trust the user that they know what they are doing.
1374         //
1375         // For this reason, in the following loop we look for the
1376         // best-looking option.
1377         for (const CFGBlock *Succ : BB->succs()) {
1378           if (!Succ)
1379             continue;
1380 
1381           const ParameterStatus &StatusInSucc =
1382               getState(Succ).getStatusFor(*Index);
1383 
1384           if (StatusInSucc.isErrorStatus()) {
1385             continue;
1386           }
1387 
1388           // Let's use this status instead.
1389           CurrentStatus = StatusInSucc;
1390 
1391           if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) {
1392             // This is the best option to have and we already found it.
1393             break;
1394           }
1395 
1396           // If we found 'Escaped' first, we still might find 'DefinitelyCalled'
1397           // on the other branch.  And we prefer the latter.
1398         }
1399       }
1400     }
1401   }
1402 
1403   void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition,
1404                                State &ToAlter) const {
1405     // Even when the analysis is technically correct, it is a widespread pattern
1406     // not to call completion handlers in some scenarios.  These usually have
1407     // typical conditional names, such as 'error' or 'cancel'.
1408     if (!mentionsAnyOfConventionalNames(Condition)) {
1409       return;
1410     }
1411 
1412     for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) {
1413       const ParmVarDecl *Parameter = getParameter(IndexedStatus.index());
1414       // Conventions do not apply to explicitly marked parameters.
1415       if (isExplicitlyMarked(Parameter)) {
1416         continue;
1417       }
1418 
1419       ParameterStatus &CurrentStatus = IndexedStatus.value();
1420       // If we did find that on one of the branches the user uses the callback
1421       // and doesn't on the other path, we believe that they know what they are
1422       // doing and trust them.
1423       //
1424       // There are two possible scenarios for that:
1425       //   1. Current status is 'MaybeCalled' and one of the branches is
1426       //      'DefinitelyCalled'
1427       //   2. Current status is 'NotCalled' and one of the branches is 'Escaped'
1428       if (isLosingCall(ToAlter, BB, IndexedStatus.index()) ||
1429           isLosingEscape(ToAlter, BB, IndexedStatus.index())) {
1430         CurrentStatus = ParameterStatus::Escaped;
1431       }
1432     }
1433   }
1434 
1435   bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1436                     unsigned ParameterIndex) const {
1437     // Let's check if the block represents DefinitelyCalled -> MaybeCalled
1438     // transition.
1439     return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex,
1440                         ParameterStatus::MaybeCalled,
1441                         ParameterStatus::DefinitelyCalled);
1442   }
1443 
1444   bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1445                       unsigned ParameterIndex) const {
1446     // Let's check if the block represents Escaped -> NotCalled transition.
1447     return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex,
1448                         ParameterStatus::NotCalled, ParameterStatus::Escaped);
1449   }
1450 
1451   bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock,
1452                     unsigned ParameterIndex, ParameterStatus::Kind AfterJoin,
1453                     ParameterStatus::Kind BeforeJoin) const {
1454     assert(!ParameterStatus::isErrorStatus(BeforeJoin) &&
1455            ParameterStatus::isErrorStatus(AfterJoin) &&
1456            "It's not a losing join if statuses do not represent "
1457            "correct-to-error transition");
1458 
1459     const ParameterStatus &CurrentStatus =
1460         StateAfterJoin.getStatusFor(ParameterIndex);
1461 
1462     return CurrentStatus.getKind() == AfterJoin &&
1463            anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin);
1464   }
1465 
1466   /// Return true if any of the successors of the given basic block has
1467   /// a specified status for the given parameter.
1468   bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex,
1469                              ParameterStatus::Kind ToFind) const {
1470     return llvm::any_of(
1471         Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) {
1472           return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind;
1473         });
1474   }
1475 
1476   /// Check given expression that was discovered to escape.
1477   void checkEscapee(const Expr *E) {
1478     if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) {
1479       checkEscapee(*Parameter);
1480     }
1481   }
1482 
1483   /// Check given parameter that was discovered to escape.
1484   void checkEscapee(const ParmVarDecl &Parameter) {
1485     if (auto Index = getIndex(Parameter)) {
1486       processEscapeFor(*Index);
1487     }
1488   }
1489 
1490   /// Mark all parameters in the current state as 'no-return'.
1491   void markNoReturn() {
1492     for (ParameterStatus &PS : CurrentState) {
1493       PS = ParameterStatus::NoReturn;
1494     }
1495   }
1496 
1497   /// Check if the given assignment represents suppression and act on it.
1498   void checkSuppression(const BinaryOperator *Assignment) {
1499     // Suppression has the following form:
1500     //    parameter = 0;
1501     // 0 can be of any form (NULL, nil, etc.)
1502     if (auto Index = getIndexOfExpression(Assignment->getLHS())) {
1503 
1504       // We don't care what is written in the RHS, it could be whatever
1505       // we can interpret as 0.
1506       if (auto Constant =
1507               Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr(
1508                   AC.getASTContext())) {
1509 
1510         ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index);
1511 
1512         if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) {
1513           // Even though this suppression mechanism is introduced to tackle
1514           // false positives for multiple calls, the fact that the user has
1515           // to use suppression can also tell us that we couldn't figure out
1516           // how different paths cancel each other out.  And if that is true,
1517           // we will most certainly have false positives about parameters not
1518           // being called on certain paths.
1519           //
1520           // For this reason, we abandon tracking this parameter altogether.
1521           CurrentParamStatus = ParameterStatus::Reported;
1522         }
1523       }
1524     }
1525   }
1526 
1527 public:
1528   //===----------------------------------------------------------------------===//
1529   //                            Tree traversal methods
1530   //===----------------------------------------------------------------------===//
1531 
1532   void VisitCallExpr(const CallExpr *Call) {
1533     // This call might be a direct call, i.e. a parameter call...
1534     checkDirectCall(Call);
1535     // ... or an indirect call, i.e. when parameter is an argument.
1536     checkIndirectCall(Call);
1537   }
1538 
1539   void VisitObjCMessageExpr(const ObjCMessageExpr *Message) {
1540     // The most common situation that we are defending against here is
1541     // copying a tracked parameter.
1542     if (const Expr *Receiver = Message->getInstanceReceiver()) {
1543       checkEscapee(Receiver);
1544     }
1545     // Message expressions unlike calls, could not be direct.
1546     checkIndirectCall(Message);
1547   }
1548 
1549   void VisitBlockExpr(const BlockExpr *Block) {
1550     // Block expressions are tricky.  It is a very common practice to capture
1551     // completion handlers by blocks and use them there.
1552     // For this reason, it is important to analyze blocks and report warnings
1553     // for completion handler misuse in blocks.
1554     //
1555     // However, it can be quite difficult to track how the block itself is being
1556     // used.  The full precise anlysis of that will be similar to alias analysis
1557     // for completion handlers and can be too heavyweight for a compile-time
1558     // diagnostic.  Instead, we judge about the immediate use of the block.
1559     //
1560     // Here, we try to find a call expression where we know due to conventions,
1561     // annotations, or other reasons that the block is called once and only
1562     // once.
1563     const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block);
1564 
1565     // We need to report this information to the handler because in the
1566     // situation when we know that the block is called exactly once, we can be
1567     // stricter in terms of reported diagnostics.
1568     if (CalledOnceCallSite) {
1569       Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl());
1570     } else {
1571       Handler.handleBlockWithNoGuarantees(Block->getBlockDecl());
1572     }
1573 
1574     for (const auto &Capture : Block->getBlockDecl()->captures()) {
1575       if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) {
1576         if (auto Index = getIndex(*Param)) {
1577           if (CalledOnceCallSite) {
1578             // The call site of a block can be considered a call site of the
1579             // captured parameter we track.
1580             processCallFor(*Index, CalledOnceCallSite);
1581           } else {
1582             // We still should consider this block as an escape for parameter,
1583             // if we don't know about its call site or the number of time it
1584             // can be invoked.
1585             processEscapeFor(*Index);
1586           }
1587         }
1588       }
1589     }
1590   }
1591 
1592   void VisitBinaryOperator(const BinaryOperator *Op) {
1593     if (Op->getOpcode() == clang::BO_Assign) {
1594       // Let's check if one of the tracked parameters is assigned into
1595       // something, and if it is we don't want to track extra variables, so we
1596       // consider it as an escapee.
1597       checkEscapee(Op->getRHS());
1598 
1599       // Let's check whether this assignment is a suppression.
1600       checkSuppression(Op);
1601     }
1602   }
1603 
1604   void VisitDeclStmt(const DeclStmt *DS) {
1605     // Variable initialization is not assignment and should be handled
1606     // separately.
1607     //
1608     // Multiple declarations can be a part of declaration statement.
1609     for (const auto *Declaration : DS->getDeclGroup()) {
1610       if (const auto *Var = dyn_cast<VarDecl>(Declaration)) {
1611         if (Var->getInit()) {
1612           checkEscapee(Var->getInit());
1613         }
1614 
1615         if (Var->hasAttr<CleanupAttr>()) {
1616           FunctionHasCleanupVars = true;
1617         }
1618       }
1619     }
1620   }
1621 
1622   void VisitCStyleCastExpr(const CStyleCastExpr *Cast) {
1623     // We consider '(void)parameter' as a manual no-op escape.
1624     // It should be used to explicitly tell the analysis that this parameter
1625     // is intentionally not called on this path.
1626     if (Cast->getType().getCanonicalType()->isVoidType()) {
1627       checkEscapee(Cast->getSubExpr());
1628     }
1629   }
1630 
1631   void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) {
1632     // It is OK not to call marked parameters on exceptional paths.
1633     markNoReturn();
1634   }
1635 
1636 private:
1637   unsigned size() const { return TrackedParams.size(); }
1638 
1639   std::optional<unsigned> getIndexOfCallee(const CallExpr *Call) const {
1640     return getIndexOfExpression(Call->getCallee());
1641   }
1642 
1643   std::optional<unsigned> getIndexOfExpression(const Expr *E) const {
1644     if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) {
1645       return getIndex(*Parameter);
1646     }
1647 
1648     return std::nullopt;
1649   }
1650 
1651   std::optional<unsigned> getIndex(const ParmVarDecl &Parameter) const {
1652     // Expected number of parameters that we actually track is 1.
1653     //
1654     // Also, the maximum number of declared parameters could not be on a scale
1655     // of hundreds of thousands.
1656     //
1657     // In this setting, linear search seems reasonable and even performs better
1658     // than bisection.
1659     ParamSizedVector<const ParmVarDecl *>::const_iterator It =
1660         llvm::find(TrackedParams, &Parameter);
1661 
1662     if (It != TrackedParams.end()) {
1663       return It - TrackedParams.begin();
1664     }
1665 
1666     return std::nullopt;
1667   }
1668 
1669   const ParmVarDecl *getParameter(unsigned Index) const {
1670     assert(Index < TrackedParams.size());
1671     return TrackedParams[Index];
1672   }
1673 
1674   const CFG &FunctionCFG;
1675   AnalysisDeclContext &AC;
1676   CalledOnceCheckHandler &Handler;
1677   bool CheckConventionalParameters;
1678   // As of now, we turn this behavior off.  So, we still are going to report
1679   // missing calls on paths that look like it was intentional.
1680   // Technically such reports are true positives, but they can make some users
1681   // grumpy because of the sheer number of warnings.
1682   // It can be turned back on if we decide that we want to have the other way
1683   // around.
1684   bool SuppressOnConventionalErrorPaths = false;
1685 
1686   // The user can annotate variable declarations with cleanup functions, which
1687   // essentially imposes a custom destructor logic on that variable.
1688   // It is possible to use it, however, to call tracked parameters on all exits
1689   // from the function.  For this reason, we track the fact that the function
1690   // actually has these.
1691   bool FunctionHasCleanupVars = false;
1692 
1693   State CurrentState;
1694   ParamSizedVector<const ParmVarDecl *> TrackedParams;
1695   CFGSizedVector<State> States;
1696 };
1697 
1698 } // end anonymous namespace
1699 
1700 namespace clang {
1701 void checkCalledOnceParameters(AnalysisDeclContext &AC,
1702                                CalledOnceCheckHandler &Handler,
1703                                bool CheckConventionalParameters) {
1704   CalledOnceChecker::check(AC, Handler, CheckConventionalParameters);
1705 }
1706 } // end namespace clang
1707