xref: /freebsd/contrib/llvm-project/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- CallDescription.h - function/method call matching       --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file defines a generic mechanism for matching for function and
10 /// method calls of C, C++, and Objective-C languages. Instances of these
11 /// classes are frequently used together with the CallEvent classes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
16 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
17 
18 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include <optional>
22 #include <vector>
23 
24 namespace clang {
25 class IdentifierInfo;
26 } // namespace clang
27 
28 namespace clang {
29 namespace ento {
30 /// A `CallDescription` is a pattern that can be used to _match_ calls
31 /// based on the qualified name and the argument/parameter counts.
32 class CallDescription {
33 public:
34   enum class Mode {
35     /// Match calls to functions from the C standard library. This also
36     /// recognizes builtin variants whose name is derived by adding
37     /// "__builtin", "__inline" or similar prefixes or suffixes; but only
38     /// matches functions than are externally visible and are declared either
39     /// directly within a TU or in the namespace 'std'.
40     /// For the exact heuristics, see CheckerContext::isCLibraryFunction().
41     CLibrary,
42 
43     /// An extended version of the `CLibrary` mode that also matches the
44     /// hardened variants like __FOO_chk() and __builtin__FOO_chk() that take
45     /// additional arguments compared to the "regular" function FOO().
46     /// This is not the default behavior of `CLibrary` because in this case the
47     /// checker code must be prepared to handle the different parametrization.
48     /// For the exact heuristics, see CheckerContext::isHardenedVariantOf().
49     CLibraryMaybeHardened,
50 
51     /// Matches "simple" functions that are not methods. (Static methods are
52     /// methods.)
53     SimpleFunc,
54 
55     /// Matches a C++ method (may be static, may be virtual, may be an
56     /// overloaded operator, a constructor or a destructor).
57     CXXMethod,
58 
59     /// Match any CallEvent that is not an ObjCMethodCall. This should not be
60     /// used when the checker looks for a concrete function (and knows whether
61     /// it is a method); but GenericTaintChecker uses this mode to match
62     /// functions whose name was configured by the user.
63     Unspecified,
64 
65     /// FIXME: Add support for ObjCMethodCall events (I'm not adding it because
66     /// I'm not familiar with Objective-C). Note that currently an early return
67     /// in `bool matches(const CallEvent &Call) const;` discards all
68     /// Objective-C method calls.
69   };
70 
71 private:
72   friend class CallEvent;
73   using MaybeCount = std::optional<unsigned>;
74 
75   mutable std::optional<const IdentifierInfo *> II;
76   // The list of the qualified names used to identify the specified CallEvent,
77   // e.g. "{a, b}" represent the qualified names, like "a::b".
78   std::vector<std::string> QualifiedName;
79   MaybeCount RequiredArgs;
80   MaybeCount RequiredParams;
81   Mode MatchAs;
82 
83 public:
84   /// Constructs a CallDescription object.
85   ///
86   /// @param MatchAs Specifies the kind of the call that should be matched.
87   ///
88   /// @param QualifiedName The list of the name qualifiers of the function that
89   /// will be matched. The user is allowed to skip any of the qualifiers.
90   /// For example, {"std", "basic_string", "c_str"} would match both
91   /// std::basic_string<...>::c_str() and std::__1::basic_string<...>::c_str().
92   ///
93   /// @param RequiredArgs The expected number of arguments that are passed to
94   /// the function. Omit this parameter (or pass std::nullopt) to match every
95   /// occurrence without checking the argument count in the call.
96   ///
97   /// @param RequiredParams The expected number of parameters in the function
98   /// definition that is called. Omit this parameter to match every occurrence
99   /// without checking the parameter count in the definition.
100   CallDescription(Mode MatchAs, ArrayRef<StringRef> QualifiedName,
101                   MaybeCount RequiredArgs = std::nullopt,
102                   MaybeCount RequiredParams = std::nullopt);
103 
104   /// Get the name of the function that this object matches.
getFunctionName()105   StringRef getFunctionName() const { return QualifiedName.back(); }
106 
107   /// Get the qualified name parts in reversed order.
108   /// E.g. { "std", "vector", "data" } -> "vector", "std"
begin_qualified_name_parts()109   auto begin_qualified_name_parts() const {
110     return std::next(QualifiedName.rbegin());
111   }
end_qualified_name_parts()112   auto end_qualified_name_parts() const { return QualifiedName.rend(); }
113 
114   /// It's false, if and only if we expect a single identifier, such as
115   /// `getenv`. It's true for `std::swap`, or `my::detail::container::data`.
hasQualifiedNameParts()116   bool hasQualifiedNameParts() const { return QualifiedName.size() > 1; }
117 
118   /// @name Matching CallDescriptions against a CallEvent
119   /// @{
120 
121   /// Returns true if the CallEvent is a call to a function that matches
122   /// the CallDescription.
123   ///
124   /// \note This function is not intended to be used to match Obj-C method
125   /// calls.
126   bool matches(const CallEvent &Call) const;
127 
128   /// Returns true whether the CallEvent matches on any of the CallDescriptions
129   /// supplied.
130   ///
131   /// \note This function is not intended to be used to match Obj-C method
132   /// calls.
matchesAny(const CallEvent & Call,const CallDescription & CD1)133   friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1) {
134     return CD1.matches(Call);
135   }
136 
137   /// \copydoc clang::ento::CallDescription::matchesAny(const CallEvent &, const CallDescription &)
138   template <typename... Ts>
matchesAny(const CallEvent & Call,const CallDescription & CD1,const Ts &...CDs)139   friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1,
140                          const Ts &...CDs) {
141     return CD1.matches(Call) || matchesAny(Call, CDs...);
142   }
143   /// @}
144 
145   /// @name Matching CallDescriptions against a CallExpr
146   /// @{
147 
148   /// Returns true if the CallExpr is a call to a function that matches the
149   /// CallDescription.
150   ///
151   /// When available, always prefer matching with a CallEvent! This function
152   /// exists only when that is not available, for example, when _only_
153   /// syntactic check is done on a piece of code.
154   ///
155   /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade
156   /// for syntactic only matching if you are writing a new checker. This is
157   /// handy if a CallDescriptionMap is already there.
158   ///
159   /// The function is imprecise because CallEvent may know path sensitive
160   /// information, such as the precise argument count (see comments for
161   /// CallEvent::getNumArgs), the called function if it was called through a
162   /// function pointer, and other information not available syntactically.
163   bool matchesAsWritten(const CallExpr &CE) const;
164 
165   /// Returns true whether the CallExpr matches on any of the CallDescriptions
166   /// supplied.
167   ///
168   /// \note This function is not intended to be used to match Obj-C method
169   /// calls.
matchesAnyAsWritten(const CallExpr & CE,const CallDescription & CD1)170   friend bool matchesAnyAsWritten(const CallExpr &CE,
171                                   const CallDescription &CD1) {
172     return CD1.matchesAsWritten(CE);
173   }
174 
175   /// \copydoc clang::ento::CallDescription::matchesAnyAsWritten(const CallExpr &, const CallDescription &)
176   template <typename... Ts>
matchesAnyAsWritten(const CallExpr & CE,const CallDescription & CD1,const Ts &...CDs)177   friend bool matchesAnyAsWritten(const CallExpr &CE,
178                                   const CallDescription &CD1,
179                                   const Ts &...CDs) {
180     return CD1.matchesAsWritten(CE) || matchesAnyAsWritten(CE, CDs...);
181   }
182   /// @}
183 
184 private:
185   bool matchesImpl(const FunctionDecl *Callee, size_t ArgCount,
186                    size_t ParamCount) const;
187 
188   bool matchNameOnly(const NamedDecl *ND) const;
189   bool matchQualifiedNameParts(const Decl *D) const;
190 };
191 
192 /// An immutable map from CallDescriptions to arbitrary data. Provides a unified
193 /// way for checkers to react on function calls.
194 template <typename T> class CallDescriptionMap {
195   friend class CallDescriptionSet;
196 
197   // Some call descriptions aren't easily hashable (eg., the ones with qualified
198   // names in which some sections are omitted), so let's put them
199   // in a simple vector and use linear lookup.
200   // TODO: Implement an actual map for fast lookup for "hashable" call
201   // descriptions (eg., the ones for C functions that just match the name).
202   std::vector<std::pair<CallDescription, T>> LinearMap;
203 
204 public:
CallDescriptionMap(std::initializer_list<std::pair<CallDescription,T>> && List)205   CallDescriptionMap(
206       std::initializer_list<std::pair<CallDescription, T>> &&List)
207       : LinearMap(List) {}
208 
209   template <typename InputIt>
CallDescriptionMap(InputIt First,InputIt Last)210   CallDescriptionMap(InputIt First, InputIt Last) : LinearMap(First, Last) {}
211 
212   ~CallDescriptionMap() = default;
213 
214   // These maps are usually stored once per checker, so let's make sure
215   // we don't do redundant copies.
216   CallDescriptionMap(const CallDescriptionMap &) = delete;
217   CallDescriptionMap &operator=(const CallDescription &) = delete;
218 
219   CallDescriptionMap(CallDescriptionMap &&) = default;
220   CallDescriptionMap &operator=(CallDescriptionMap &&) = default;
221 
lookup(const CallEvent & Call)222   [[nodiscard]] const T *lookup(const CallEvent &Call) const {
223     // Slow path: linear lookup.
224     // TODO: Implement some sort of fast path.
225     for (const std::pair<CallDescription, T> &I : LinearMap)
226       if (I.first.matches(Call))
227         return &I.second;
228 
229     return nullptr;
230   }
231 
232   /// When available, always prefer lookup with a CallEvent! This function
233   /// exists only when that is not available, for example, when _only_
234   /// syntactic check is done on a piece of code.
235   ///
236   /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade
237   /// for syntactic only matching if you are writing a new checker. This is
238   /// handy if a CallDescriptionMap is already there.
239   ///
240   /// The function is imprecise because CallEvent may know path sensitive
241   /// information, such as the precise argument count (see comments for
242   /// CallEvent::getNumArgs), the called function if it was called through a
243   /// function pointer, and other information not available syntactically.
lookupAsWritten(const CallExpr & Call)244   [[nodiscard]] const T *lookupAsWritten(const CallExpr &Call) const {
245     // Slow path: linear lookup.
246     // TODO: Implement some sort of fast path.
247     for (const std::pair<CallDescription, T> &I : LinearMap)
248       if (I.first.matchesAsWritten(Call))
249         return &I.second;
250 
251     return nullptr;
252   }
253 };
254 
255 /// Enumerators of this enum class are used to construct CallDescription
256 /// objects; in that context the fully qualified name is needlessly verbose.
257 using CDM = CallDescription::Mode;
258 
259 /// An immutable set of CallDescriptions.
260 /// Checkers can efficiently decide if a given CallEvent matches any
261 /// CallDescription in the set.
262 class CallDescriptionSet {
263   CallDescriptionMap<bool /*unused*/> Impl = {};
264 
265 public:
266   CallDescriptionSet(std::initializer_list<CallDescription> &&List);
267 
268   CallDescriptionSet(const CallDescriptionSet &) = delete;
269   CallDescriptionSet &operator=(const CallDescription &) = delete;
270 
271   [[nodiscard]] bool contains(const CallEvent &Call) const;
272 
273   /// When available, always prefer lookup with a CallEvent! This function
274   /// exists only when that is not available, for example, when _only_
275   /// syntactic check is done on a piece of code.
276   ///
277   /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade
278   /// for syntactic only matching if you are writing a new checker. This is
279   /// handy if a CallDescriptionMap is already there.
280   ///
281   /// The function is imprecise because CallEvent may know path sensitive
282   /// information, such as the precise argument count (see comments for
283   /// CallEvent::getNumArgs), the called function if it was called through a
284   /// function pointer, and other information not available syntactically.
285   [[nodiscard]] bool containsAsWritten(const CallExpr &CE) const;
286 };
287 
288 } // namespace ento
289 } // namespace clang
290 
291 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
292