1 //===- CallDescription.h - function/method call matching --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file defines a generic mechanism for matching for function and 10 /// method calls of C, C++, and Objective-C languages. Instances of these 11 /// classes are frequently used together with the CallEvent classes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H 16 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H 17 18 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/Support/Compiler.h" 21 #include <optional> 22 #include <vector> 23 24 namespace clang { 25 class IdentifierInfo; 26 } // namespace clang 27 28 namespace clang { 29 namespace ento { 30 /// A `CallDescription` is a pattern that can be used to _match_ calls 31 /// based on the qualified name and the argument/parameter counts. 32 class CallDescription { 33 public: 34 enum class Mode { 35 /// Match calls to functions from the C standard library. This also 36 /// recognizes builtin variants whose name is derived by adding 37 /// "__builtin", "__inline" or similar prefixes or suffixes; but only 38 /// matches functions than are externally visible and are declared either 39 /// directly within a TU or in the namespace 'std'. 40 /// For the exact heuristics, see CheckerContext::isCLibraryFunction(). 41 CLibrary, 42 43 /// An extended version of the `CLibrary` mode that also matches the 44 /// hardened variants like __FOO_chk() and __builtin__FOO_chk() that take 45 /// additional arguments compared to the "regular" function FOO(). 46 /// This is not the default behavior of `CLibrary` because in this case the 47 /// checker code must be prepared to handle the different parametrization. 48 /// For the exact heuristics, see CheckerContext::isHardenedVariantOf(). 49 CLibraryMaybeHardened, 50 51 /// Matches "simple" functions that are not methods. (Static methods are 52 /// methods.) 53 SimpleFunc, 54 55 /// Matches a C++ method (may be static, may be virtual, may be an 56 /// overloaded operator, a constructor or a destructor). 57 CXXMethod, 58 59 /// Match any CallEvent that is not an ObjCMethodCall. This should not be 60 /// used when the checker looks for a concrete function (and knows whether 61 /// it is a method); but GenericTaintChecker uses this mode to match 62 /// functions whose name was configured by the user. 63 Unspecified, 64 65 /// FIXME: Add support for ObjCMethodCall events (I'm not adding it because 66 /// I'm not familiar with Objective-C). Note that currently an early return 67 /// in `bool matches(const CallEvent &Call) const;` discards all 68 /// Objective-C method calls. 69 }; 70 71 private: 72 friend class CallEvent; 73 using MaybeCount = std::optional<unsigned>; 74 75 mutable std::optional<const IdentifierInfo *> II; 76 // The list of the qualified names used to identify the specified CallEvent, 77 // e.g. "{a, b}" represent the qualified names, like "a::b". 78 std::vector<std::string> QualifiedName; 79 MaybeCount RequiredArgs; 80 MaybeCount RequiredParams; 81 Mode MatchAs; 82 83 public: 84 /// Constructs a CallDescription object. 85 /// 86 /// @param MatchAs Specifies the kind of the call that should be matched. 87 /// 88 /// @param QualifiedName The list of the name qualifiers of the function that 89 /// will be matched. The user is allowed to skip any of the qualifiers. 90 /// For example, {"std", "basic_string", "c_str"} would match both 91 /// std::basic_string<...>::c_str() and std::__1::basic_string<...>::c_str(). 92 /// 93 /// @param RequiredArgs The expected number of arguments that are passed to 94 /// the function. Omit this parameter (or pass std::nullopt) to match every 95 /// occurrence without checking the argument count in the call. 96 /// 97 /// @param RequiredParams The expected number of parameters in the function 98 /// definition that is called. Omit this parameter to match every occurrence 99 /// without checking the parameter count in the definition. 100 CallDescription(Mode MatchAs, ArrayRef<StringRef> QualifiedName, 101 MaybeCount RequiredArgs = std::nullopt, 102 MaybeCount RequiredParams = std::nullopt); 103 104 /// Get the name of the function that this object matches. getFunctionName()105 StringRef getFunctionName() const { return QualifiedName.back(); } 106 107 /// Get the qualified name parts in reversed order. 108 /// E.g. { "std", "vector", "data" } -> "vector", "std" begin_qualified_name_parts()109 auto begin_qualified_name_parts() const { 110 return std::next(QualifiedName.rbegin()); 111 } end_qualified_name_parts()112 auto end_qualified_name_parts() const { return QualifiedName.rend(); } 113 114 /// It's false, if and only if we expect a single identifier, such as 115 /// `getenv`. It's true for `std::swap`, or `my::detail::container::data`. hasQualifiedNameParts()116 bool hasQualifiedNameParts() const { return QualifiedName.size() > 1; } 117 118 /// @name Matching CallDescriptions against a CallEvent 119 /// @{ 120 121 /// Returns true if the CallEvent is a call to a function that matches 122 /// the CallDescription. 123 /// 124 /// \note This function is not intended to be used to match Obj-C method 125 /// calls. 126 bool matches(const CallEvent &Call) const; 127 128 /// Returns true whether the CallEvent matches on any of the CallDescriptions 129 /// supplied. 130 /// 131 /// \note This function is not intended to be used to match Obj-C method 132 /// calls. matchesAny(const CallEvent & Call,const CallDescription & CD1)133 friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1) { 134 return CD1.matches(Call); 135 } 136 137 /// \copydoc clang::ento::CallDescription::matchesAny(const CallEvent &, const CallDescription &) 138 template <typename... Ts> matchesAny(const CallEvent & Call,const CallDescription & CD1,const Ts &...CDs)139 friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1, 140 const Ts &...CDs) { 141 return CD1.matches(Call) || matchesAny(Call, CDs...); 142 } 143 /// @} 144 145 /// @name Matching CallDescriptions against a CallExpr 146 /// @{ 147 148 /// Returns true if the CallExpr is a call to a function that matches the 149 /// CallDescription. 150 /// 151 /// When available, always prefer matching with a CallEvent! This function 152 /// exists only when that is not available, for example, when _only_ 153 /// syntactic check is done on a piece of code. 154 /// 155 /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade 156 /// for syntactic only matching if you are writing a new checker. This is 157 /// handy if a CallDescriptionMap is already there. 158 /// 159 /// The function is imprecise because CallEvent may know path sensitive 160 /// information, such as the precise argument count (see comments for 161 /// CallEvent::getNumArgs), the called function if it was called through a 162 /// function pointer, and other information not available syntactically. 163 bool matchesAsWritten(const CallExpr &CE) const; 164 165 /// Returns true whether the CallExpr matches on any of the CallDescriptions 166 /// supplied. 167 /// 168 /// \note This function is not intended to be used to match Obj-C method 169 /// calls. matchesAnyAsWritten(const CallExpr & CE,const CallDescription & CD1)170 friend bool matchesAnyAsWritten(const CallExpr &CE, 171 const CallDescription &CD1) { 172 return CD1.matchesAsWritten(CE); 173 } 174 175 /// \copydoc clang::ento::CallDescription::matchesAnyAsWritten(const CallExpr &, const CallDescription &) 176 template <typename... Ts> matchesAnyAsWritten(const CallExpr & CE,const CallDescription & CD1,const Ts &...CDs)177 friend bool matchesAnyAsWritten(const CallExpr &CE, 178 const CallDescription &CD1, 179 const Ts &...CDs) { 180 return CD1.matchesAsWritten(CE) || matchesAnyAsWritten(CE, CDs...); 181 } 182 /// @} 183 184 private: 185 bool matchesImpl(const FunctionDecl *Callee, size_t ArgCount, 186 size_t ParamCount) const; 187 188 bool matchNameOnly(const NamedDecl *ND) const; 189 bool matchQualifiedNameParts(const Decl *D) const; 190 }; 191 192 /// An immutable map from CallDescriptions to arbitrary data. Provides a unified 193 /// way for checkers to react on function calls. 194 template <typename T> class CallDescriptionMap { 195 friend class CallDescriptionSet; 196 197 // Some call descriptions aren't easily hashable (eg., the ones with qualified 198 // names in which some sections are omitted), so let's put them 199 // in a simple vector and use linear lookup. 200 // TODO: Implement an actual map for fast lookup for "hashable" call 201 // descriptions (eg., the ones for C functions that just match the name). 202 std::vector<std::pair<CallDescription, T>> LinearMap; 203 204 public: CallDescriptionMap(std::initializer_list<std::pair<CallDescription,T>> && List)205 CallDescriptionMap( 206 std::initializer_list<std::pair<CallDescription, T>> &&List) 207 : LinearMap(List) {} 208 209 template <typename InputIt> CallDescriptionMap(InputIt First,InputIt Last)210 CallDescriptionMap(InputIt First, InputIt Last) : LinearMap(First, Last) {} 211 212 ~CallDescriptionMap() = default; 213 214 // These maps are usually stored once per checker, so let's make sure 215 // we don't do redundant copies. 216 CallDescriptionMap(const CallDescriptionMap &) = delete; 217 CallDescriptionMap &operator=(const CallDescription &) = delete; 218 219 CallDescriptionMap(CallDescriptionMap &&) = default; 220 CallDescriptionMap &operator=(CallDescriptionMap &&) = default; 221 lookup(const CallEvent & Call)222 [[nodiscard]] const T *lookup(const CallEvent &Call) const { 223 // Slow path: linear lookup. 224 // TODO: Implement some sort of fast path. 225 for (const std::pair<CallDescription, T> &I : LinearMap) 226 if (I.first.matches(Call)) 227 return &I.second; 228 229 return nullptr; 230 } 231 232 /// When available, always prefer lookup with a CallEvent! This function 233 /// exists only when that is not available, for example, when _only_ 234 /// syntactic check is done on a piece of code. 235 /// 236 /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade 237 /// for syntactic only matching if you are writing a new checker. This is 238 /// handy if a CallDescriptionMap is already there. 239 /// 240 /// The function is imprecise because CallEvent may know path sensitive 241 /// information, such as the precise argument count (see comments for 242 /// CallEvent::getNumArgs), the called function if it was called through a 243 /// function pointer, and other information not available syntactically. lookupAsWritten(const CallExpr & Call)244 [[nodiscard]] const T *lookupAsWritten(const CallExpr &Call) const { 245 // Slow path: linear lookup. 246 // TODO: Implement some sort of fast path. 247 for (const std::pair<CallDescription, T> &I : LinearMap) 248 if (I.first.matchesAsWritten(Call)) 249 return &I.second; 250 251 return nullptr; 252 } 253 }; 254 255 /// Enumerators of this enum class are used to construct CallDescription 256 /// objects; in that context the fully qualified name is needlessly verbose. 257 using CDM = CallDescription::Mode; 258 259 /// An immutable set of CallDescriptions. 260 /// Checkers can efficiently decide if a given CallEvent matches any 261 /// CallDescription in the set. 262 class CallDescriptionSet { 263 CallDescriptionMap<bool /*unused*/> Impl = {}; 264 265 public: 266 CallDescriptionSet(std::initializer_list<CallDescription> &&List); 267 268 CallDescriptionSet(const CallDescriptionSet &) = delete; 269 CallDescriptionSet &operator=(const CallDescription &) = delete; 270 271 [[nodiscard]] bool contains(const CallEvent &Call) const; 272 273 /// When available, always prefer lookup with a CallEvent! This function 274 /// exists only when that is not available, for example, when _only_ 275 /// syntactic check is done on a piece of code. 276 /// 277 /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade 278 /// for syntactic only matching if you are writing a new checker. This is 279 /// handy if a CallDescriptionMap is already there. 280 /// 281 /// The function is imprecise because CallEvent may know path sensitive 282 /// information, such as the precise argument count (see comments for 283 /// CallEvent::getNumArgs), the called function if it was called through a 284 /// function pointer, and other information not available syntactically. 285 [[nodiscard]] bool containsAsWritten(const CallExpr &CE) const; 286 }; 287 288 } // namespace ento 289 } // namespace clang 290 291 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H 292