xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
10b57cec5SDimitry Andric //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This checker defines the attack surface for generic taint propagation.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // The taint information produced by it might be useful to other checkers. For
120b57cec5SDimitry Andric // example, checkers should report errors which involve tainted data more
130b57cec5SDimitry Andric // aggressively, even if the involved symbols are under constrained.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "Taint.h"
18a7dea167SDimitry Andric #include "Yaml.h"
190b57cec5SDimitry Andric #include "clang/AST/Attr.h"
200b57cec5SDimitry Andric #include "clang/Basic/Builtins.h"
21a7dea167SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
230b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
255ffd83dbSDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
260b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
270b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28a7dea167SDimitry Andric #include "llvm/Support/YAMLTraits.h"
295ffd83dbSDimitry Andric 
30480093f4SDimitry Andric #include <algorithm>
31a7dea167SDimitry Andric #include <limits>
325ffd83dbSDimitry Andric #include <memory>
33480093f4SDimitry Andric #include <unordered_map>
340b57cec5SDimitry Andric #include <utility>
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric using namespace clang;
370b57cec5SDimitry Andric using namespace ento;
380b57cec5SDimitry Andric using namespace taint;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric namespace {
415ffd83dbSDimitry Andric class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
420b57cec5SDimitry Andric public:
430b57cec5SDimitry Andric   static void *getTag() {
440b57cec5SDimitry Andric     static int Tag;
450b57cec5SDimitry Andric     return &Tag;
460b57cec5SDimitry Andric   }
470b57cec5SDimitry Andric 
485ffd83dbSDimitry Andric   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
495ffd83dbSDimitry Andric   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
500b57cec5SDimitry Andric 
51a7dea167SDimitry Andric   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52a7dea167SDimitry Andric                   const char *Sep) const override;
53a7dea167SDimitry Andric 
54a7dea167SDimitry Andric   using ArgVector = SmallVector<unsigned, 2>;
55a7dea167SDimitry Andric   using SignedArgVector = SmallVector<int, 2>;
56a7dea167SDimitry Andric 
57a7dea167SDimitry Andric   enum class VariadicType { None, Src, Dst };
58a7dea167SDimitry Andric 
59a7dea167SDimitry Andric   /// Used to parse the configuration file.
60a7dea167SDimitry Andric   struct TaintConfiguration {
61480093f4SDimitry Andric     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62a7dea167SDimitry Andric 
63a7dea167SDimitry Andric     struct Propagation {
64a7dea167SDimitry Andric       std::string Name;
65480093f4SDimitry Andric       std::string Scope;
66a7dea167SDimitry Andric       ArgVector SrcArgs;
67a7dea167SDimitry Andric       SignedArgVector DstArgs;
68a7dea167SDimitry Andric       VariadicType VarType;
69a7dea167SDimitry Andric       unsigned VarIndex;
70a7dea167SDimitry Andric     };
71a7dea167SDimitry Andric 
72a7dea167SDimitry Andric     std::vector<Propagation> Propagations;
73480093f4SDimitry Andric     std::vector<NameScopeArgs> Filters;
74480093f4SDimitry Andric     std::vector<NameScopeArgs> Sinks;
75a7dea167SDimitry Andric 
76a7dea167SDimitry Andric     TaintConfiguration() = default;
77a7dea167SDimitry Andric     TaintConfiguration(const TaintConfiguration &) = default;
78a7dea167SDimitry Andric     TaintConfiguration(TaintConfiguration &&) = default;
79a7dea167SDimitry Andric     TaintConfiguration &operator=(const TaintConfiguration &) = default;
80a7dea167SDimitry Andric     TaintConfiguration &operator=(TaintConfiguration &&) = default;
81a7dea167SDimitry Andric   };
82a7dea167SDimitry Andric 
83a7dea167SDimitry Andric   /// Convert SignedArgVector to ArgVector.
84a7dea167SDimitry Andric   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
855ffd83dbSDimitry Andric                                const SignedArgVector &Args);
86a7dea167SDimitry Andric 
87a7dea167SDimitry Andric   /// Parse the config.
88a7dea167SDimitry Andric   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89a7dea167SDimitry Andric                           TaintConfiguration &&Config);
90a7dea167SDimitry Andric 
91a7dea167SDimitry Andric   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92a7dea167SDimitry Andric   /// Denotes the return vale.
93a7dea167SDimitry Andric   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94a7dea167SDimitry Andric                                          1};
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric private:
970b57cec5SDimitry Andric   mutable std::unique_ptr<BugType> BT;
980b57cec5SDimitry Andric   void initBugType() const {
990b57cec5SDimitry Andric     if (!BT)
1005ffd83dbSDimitry Andric       BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
1015ffd83dbSDimitry Andric                                      "Untrusted Data");
1020b57cec5SDimitry Andric   }
1030b57cec5SDimitry Andric 
104480093f4SDimitry Andric   struct FunctionData {
105480093f4SDimitry Andric     FunctionData() = delete;
106e8d8bef9SDimitry Andric     FunctionData(const FunctionDecl *FDecl, StringRef Name,
107e8d8bef9SDimitry Andric                  std::string FullName)
108e8d8bef9SDimitry Andric         : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {}
109480093f4SDimitry Andric     FunctionData(const FunctionData &) = default;
110480093f4SDimitry Andric     FunctionData(FunctionData &&) = default;
111480093f4SDimitry Andric     FunctionData &operator=(const FunctionData &) = delete;
112480093f4SDimitry Andric     FunctionData &operator=(FunctionData &&) = delete;
113480093f4SDimitry Andric 
1145ffd83dbSDimitry Andric     static Optional<FunctionData> create(const CallEvent &Call,
115480093f4SDimitry Andric                                          const CheckerContext &C) {
1165ffd83dbSDimitry Andric       if (!Call.getDecl())
1175ffd83dbSDimitry Andric         return None;
1185ffd83dbSDimitry Andric 
1195ffd83dbSDimitry Andric       const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
120480093f4SDimitry Andric       if (!FDecl || (FDecl->getKind() != Decl::Function &&
121480093f4SDimitry Andric                      FDecl->getKind() != Decl::CXXMethod))
122480093f4SDimitry Andric         return None;
123480093f4SDimitry Andric 
124480093f4SDimitry Andric       StringRef Name = C.getCalleeName(FDecl);
125480093f4SDimitry Andric       std::string FullName = FDecl->getQualifiedNameAsString();
126480093f4SDimitry Andric       if (Name.empty() || FullName.empty())
127480093f4SDimitry Andric         return None;
128480093f4SDimitry Andric 
129e8d8bef9SDimitry Andric       return FunctionData{FDecl, Name, std::move(FullName)};
130480093f4SDimitry Andric     }
131480093f4SDimitry Andric 
132480093f4SDimitry Andric     bool isInScope(StringRef Scope) const {
133480093f4SDimitry Andric       return StringRef(FullName).startswith(Scope);
134480093f4SDimitry Andric     }
135480093f4SDimitry Andric 
136480093f4SDimitry Andric     const FunctionDecl *const FDecl;
137480093f4SDimitry Andric     const StringRef Name;
138480093f4SDimitry Andric     const std::string FullName;
139480093f4SDimitry Andric   };
140480093f4SDimitry Andric 
1410b57cec5SDimitry Andric   /// Catch taint related bugs. Check if tainted data is passed to a
142480093f4SDimitry Andric   /// system call etc. Returns true on matching.
1435ffd83dbSDimitry Andric   bool checkPre(const CallEvent &Call, const FunctionData &FData,
144480093f4SDimitry Andric                 CheckerContext &C) const;
1450b57cec5SDimitry Andric 
146480093f4SDimitry Andric   /// Add taint sources on a pre-visit. Returns true on matching.
1475ffd83dbSDimitry Andric   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
148480093f4SDimitry Andric                      CheckerContext &C) const;
1490b57cec5SDimitry Andric 
150480093f4SDimitry Andric   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
151480093f4SDimitry Andric   /// matching.
1525ffd83dbSDimitry Andric   bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
153480093f4SDimitry Andric                      CheckerContext &C) const;
154480093f4SDimitry Andric 
155480093f4SDimitry Andric   /// Propagate taint generated at pre-visit. Returns true on matching.
1565ffd83dbSDimitry Andric   static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   /// Check if the region the expression evaluates to is the standard input,
1590b57cec5SDimitry Andric   /// and thus, is tainted.
1600b57cec5SDimitry Andric   static bool isStdin(const Expr *E, CheckerContext &C);
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric   /// Given a pointer argument, return the value it points to.
1635ffd83dbSDimitry Andric   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric   /// Check for CWE-134: Uncontrolled Format String.
166a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
167a7dea167SDimitry Andric       "Untrusted data is used as a format string "
168a7dea167SDimitry Andric       "(CWE-134: Uncontrolled Format String)";
1695ffd83dbSDimitry Andric   bool checkUncontrolledFormatString(const CallEvent &Call,
1700b57cec5SDimitry Andric                                      CheckerContext &C) const;
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   /// Check for:
1730b57cec5SDimitry Andric   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
1740b57cec5SDimitry Andric   /// CWE-78, "Failure to Sanitize Data into an OS Command"
175a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
176a7dea167SDimitry Andric       "Untrusted data is passed to a system call "
177a7dea167SDimitry Andric       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
1785ffd83dbSDimitry Andric   bool checkSystemCall(const CallEvent &Call, StringRef Name,
1790b57cec5SDimitry Andric                        CheckerContext &C) const;
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric   /// Check if tainted data is used as a buffer size ins strn.. functions,
1820b57cec5SDimitry Andric   /// and allocators.
183a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
184a7dea167SDimitry Andric       "Untrusted data is used to specify the buffer size "
185a7dea167SDimitry Andric       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
186a7dea167SDimitry Andric       "for character data and the null terminator)";
1875ffd83dbSDimitry Andric   bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
1880b57cec5SDimitry Andric 
189a7dea167SDimitry Andric   /// Check if tainted data is used as a custom sink's parameter.
190a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgCustomSink =
191a7dea167SDimitry Andric       "Untrusted data is passed to a user-defined sink";
1925ffd83dbSDimitry Andric   bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
1930b57cec5SDimitry Andric                         CheckerContext &C) const;
1940b57cec5SDimitry Andric 
195a7dea167SDimitry Andric   /// Generate a report if the expression is tainted or points to tainted data.
196a7dea167SDimitry Andric   bool generateReportIfTainted(const Expr *E, StringRef Msg,
197a7dea167SDimitry Andric                                CheckerContext &C) const;
198a7dea167SDimitry Andric 
199a7dea167SDimitry Andric   struct TaintPropagationRule;
200480093f4SDimitry Andric   template <typename T>
201480093f4SDimitry Andric   using ConfigDataMap =
202480093f4SDimitry Andric       std::unordered_multimap<std::string, std::pair<std::string, T>>;
203480093f4SDimitry Andric   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
204480093f4SDimitry Andric   using NameArgMap = ConfigDataMap<ArgVector>;
205480093f4SDimitry Andric 
206480093f4SDimitry Andric   /// Find a function with the given name and scope. Returns the first match
207480093f4SDimitry Andric   /// or the end of the map.
208480093f4SDimitry Andric   template <typename T>
209480093f4SDimitry Andric   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
210480093f4SDimitry Andric                                    const FunctionData &FData);
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   /// A struct used to specify taint propagation rules for a function.
2130b57cec5SDimitry Andric   ///
2140b57cec5SDimitry Andric   /// If any of the possible taint source arguments is tainted, all of the
2150b57cec5SDimitry Andric   /// destination arguments should also be tainted. Use InvalidArgIndex in the
2160b57cec5SDimitry Andric   /// src list to specify that all of the arguments can introduce taint. Use
2170b57cec5SDimitry Andric   /// InvalidArgIndex in the dst arguments to signify that all the non-const
2180b57cec5SDimitry Andric   /// pointer and reference arguments might be tainted on return. If
2190b57cec5SDimitry Andric   /// ReturnValueIndex is added to the dst list, the return value will be
2200b57cec5SDimitry Andric   /// tainted.
2210b57cec5SDimitry Andric   struct TaintPropagationRule {
2225ffd83dbSDimitry Andric     using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
2230b57cec5SDimitry Andric                                          CheckerContext &C);
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric     /// List of arguments which can be taint sources and should be checked.
2260b57cec5SDimitry Andric     ArgVector SrcArgs;
2270b57cec5SDimitry Andric     /// List of arguments which should be tainted on function return.
2280b57cec5SDimitry Andric     ArgVector DstArgs;
2290b57cec5SDimitry Andric     /// Index for the first variadic parameter if exist.
2300b57cec5SDimitry Andric     unsigned VariadicIndex;
2310b57cec5SDimitry Andric     /// Show when a function has variadic parameters. If it has, it marks all
2320b57cec5SDimitry Andric     /// of them as source or destination.
2330b57cec5SDimitry Andric     VariadicType VarType;
2340b57cec5SDimitry Andric     /// Special function for tainted source determination. If defined, it can
2350b57cec5SDimitry Andric     /// override the default behavior.
2360b57cec5SDimitry Andric     PropagationFuncType PropagationFunc;
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric     TaintPropagationRule()
2390b57cec5SDimitry Andric         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
2400b57cec5SDimitry Andric           PropagationFunc(nullptr) {}
2410b57cec5SDimitry Andric 
242a7dea167SDimitry Andric     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
2430b57cec5SDimitry Andric                          VariadicType Var = VariadicType::None,
2440b57cec5SDimitry Andric                          unsigned VarIndex = InvalidArgIndex,
2450b57cec5SDimitry Andric                          PropagationFuncType Func = nullptr)
2460b57cec5SDimitry Andric         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
2470b57cec5SDimitry Andric           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric     /// Get the propagation rule for a given function.
2500b57cec5SDimitry Andric     static TaintPropagationRule
251a7dea167SDimitry Andric     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
252480093f4SDimitry Andric                             const FunctionData &FData, CheckerContext &C);
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
2550b57cec5SDimitry Andric     void addDstArg(unsigned A) { DstArgs.push_back(A); }
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric     bool isNull() const {
2580b57cec5SDimitry Andric       return SrcArgs.empty() && DstArgs.empty() &&
2590b57cec5SDimitry Andric              VariadicType::None == VarType;
2600b57cec5SDimitry Andric     }
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric     bool isDestinationArgument(unsigned ArgNum) const {
263*349cc55cSDimitry Andric       return llvm::is_contained(DstArgs, ArgNum);
2640b57cec5SDimitry Andric     }
2650b57cec5SDimitry Andric 
2665ffd83dbSDimitry Andric     static bool isTaintedOrPointsToTainted(const Expr *E,
2675ffd83dbSDimitry Andric                                            const ProgramStateRef &State,
2680b57cec5SDimitry Andric                                            CheckerContext &C) {
2690b57cec5SDimitry Andric       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
2700b57cec5SDimitry Andric         return true;
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric       if (!E->getType().getTypePtr()->isPointerType())
2730b57cec5SDimitry Andric         return false;
2740b57cec5SDimitry Andric 
2755ffd83dbSDimitry Andric       Optional<SVal> V = getPointeeOf(C, E);
2760b57cec5SDimitry Andric       return (V && isTainted(State, *V));
2770b57cec5SDimitry Andric     }
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric     /// Pre-process a function which propagates taint according to the
2800b57cec5SDimitry Andric     /// taint rule.
2815ffd83dbSDimitry Andric     ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric     // Functions for custom taintedness propagation.
2845ffd83dbSDimitry Andric     static bool postSocket(bool IsTainted, const CallEvent &Call,
2850b57cec5SDimitry Andric                            CheckerContext &C);
2860b57cec5SDimitry Andric   };
287a7dea167SDimitry Andric 
288480093f4SDimitry Andric   /// Defines a map between the propagation function's name, scope
289480093f4SDimitry Andric   /// and TaintPropagationRule.
290a7dea167SDimitry Andric   NameRuleMap CustomPropagations;
291a7dea167SDimitry Andric 
292480093f4SDimitry Andric   /// Defines a map between the filter function's name, scope and filtering
293480093f4SDimitry Andric   /// args.
294a7dea167SDimitry Andric   NameArgMap CustomFilters;
295a7dea167SDimitry Andric 
296480093f4SDimitry Andric   /// Defines a map between the sink function's name, scope and sinking args.
297a7dea167SDimitry Andric   NameArgMap CustomSinks;
2980b57cec5SDimitry Andric };
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric const unsigned GenericTaintChecker::ReturnValueIndex;
3010b57cec5SDimitry Andric const unsigned GenericTaintChecker::InvalidArgIndex;
3020b57cec5SDimitry Andric 
303a7dea167SDimitry Andric // FIXME: these lines can be removed in C++17
304a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
305a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
306a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
307a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
3080b57cec5SDimitry Andric } // end of anonymous namespace
3090b57cec5SDimitry Andric 
310a7dea167SDimitry Andric using TaintConfig = GenericTaintChecker::TaintConfiguration;
311a7dea167SDimitry Andric 
312a7dea167SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
313480093f4SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
314a7dea167SDimitry Andric 
315a7dea167SDimitry Andric namespace llvm {
316a7dea167SDimitry Andric namespace yaml {
317a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig> {
318a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig &Config) {
319a7dea167SDimitry Andric     IO.mapOptional("Propagations", Config.Propagations);
320a7dea167SDimitry Andric     IO.mapOptional("Filters", Config.Filters);
321a7dea167SDimitry Andric     IO.mapOptional("Sinks", Config.Sinks);
322a7dea167SDimitry Andric   }
323a7dea167SDimitry Andric };
324a7dea167SDimitry Andric 
325a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig::Propagation> {
326a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
327a7dea167SDimitry Andric     IO.mapRequired("Name", Propagation.Name);
328480093f4SDimitry Andric     IO.mapOptional("Scope", Propagation.Scope);
329a7dea167SDimitry Andric     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
330a7dea167SDimitry Andric     IO.mapOptional("DstArgs", Propagation.DstArgs);
331a7dea167SDimitry Andric     IO.mapOptional("VariadicType", Propagation.VarType,
332a7dea167SDimitry Andric                    GenericTaintChecker::VariadicType::None);
333a7dea167SDimitry Andric     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
334a7dea167SDimitry Andric                    GenericTaintChecker::InvalidArgIndex);
335a7dea167SDimitry Andric   }
336a7dea167SDimitry Andric };
337a7dea167SDimitry Andric 
338a7dea167SDimitry Andric template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
339a7dea167SDimitry Andric   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
340a7dea167SDimitry Andric     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
341a7dea167SDimitry Andric     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
342a7dea167SDimitry Andric     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
343a7dea167SDimitry Andric   }
344a7dea167SDimitry Andric };
345a7dea167SDimitry Andric 
346480093f4SDimitry Andric template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
347480093f4SDimitry Andric   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
348480093f4SDimitry Andric     IO.mapRequired("Name", std::get<0>(NSA));
349480093f4SDimitry Andric     IO.mapOptional("Scope", std::get<1>(NSA));
350480093f4SDimitry Andric     IO.mapRequired("Args", std::get<2>(NSA));
351a7dea167SDimitry Andric   }
352a7dea167SDimitry Andric };
353a7dea167SDimitry Andric } // namespace yaml
354a7dea167SDimitry Andric } // namespace llvm
355a7dea167SDimitry Andric 
3560b57cec5SDimitry Andric /// A set which is used to pass information from call pre-visit instruction
3570b57cec5SDimitry Andric /// to the call post-visit. The values are unsigned integers, which are either
3580b57cec5SDimitry Andric /// ReturnValueIndex, or indexes of the pointer/reference argument, which
3590b57cec5SDimitry Andric /// points to data, which should be tainted on return.
3600b57cec5SDimitry Andric REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
3610b57cec5SDimitry Andric 
3625ffd83dbSDimitry Andric GenericTaintChecker::ArgVector
3635ffd83dbSDimitry Andric GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
3645ffd83dbSDimitry Andric                                         const std::string &Option,
3655ffd83dbSDimitry Andric                                         const SignedArgVector &Args) {
366a7dea167SDimitry Andric   ArgVector Result;
367a7dea167SDimitry Andric   for (int Arg : Args) {
368a7dea167SDimitry Andric     if (Arg == -1)
369a7dea167SDimitry Andric       Result.push_back(ReturnValueIndex);
370a7dea167SDimitry Andric     else if (Arg < -1) {
371a7dea167SDimitry Andric       Result.push_back(InvalidArgIndex);
372a7dea167SDimitry Andric       Mgr.reportInvalidCheckerOptionValue(
373a7dea167SDimitry Andric           this, Option,
374a7dea167SDimitry Andric           "an argument number for propagation rules greater or equal to -1");
375a7dea167SDimitry Andric     } else
376a7dea167SDimitry Andric       Result.push_back(static_cast<unsigned>(Arg));
377a7dea167SDimitry Andric   }
378a7dea167SDimitry Andric   return Result;
379a7dea167SDimitry Andric }
380a7dea167SDimitry Andric 
381a7dea167SDimitry Andric void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
382a7dea167SDimitry Andric                                              const std::string &Option,
383a7dea167SDimitry Andric                                              TaintConfiguration &&Config) {
384a7dea167SDimitry Andric   for (auto &P : Config.Propagations) {
385480093f4SDimitry Andric     GenericTaintChecker::CustomPropagations.emplace(
386480093f4SDimitry Andric         P.Name,
387480093f4SDimitry Andric         std::make_pair(P.Scope, TaintPropagationRule{
388480093f4SDimitry Andric                                     std::move(P.SrcArgs),
389480093f4SDimitry Andric                                     convertToArgVector(Mgr, Option, P.DstArgs),
390480093f4SDimitry Andric                                     P.VarType, P.VarIndex}));
391a7dea167SDimitry Andric   }
392a7dea167SDimitry Andric 
393a7dea167SDimitry Andric   for (auto &F : Config.Filters) {
394480093f4SDimitry Andric     GenericTaintChecker::CustomFilters.emplace(
395480093f4SDimitry Andric         std::get<0>(F),
396480093f4SDimitry Andric         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
397a7dea167SDimitry Andric   }
398a7dea167SDimitry Andric 
399a7dea167SDimitry Andric   for (auto &S : Config.Sinks) {
400480093f4SDimitry Andric     GenericTaintChecker::CustomSinks.emplace(
401480093f4SDimitry Andric         std::get<0>(S),
402480093f4SDimitry Andric         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
403a7dea167SDimitry Andric   }
404a7dea167SDimitry Andric }
405a7dea167SDimitry Andric 
406480093f4SDimitry Andric template <typename T>
407480093f4SDimitry Andric auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
408480093f4SDimitry Andric                                                const FunctionData &FData) {
4095ffd83dbSDimitry Andric   auto Range = Map.equal_range(std::string(FData.Name));
410480093f4SDimitry Andric   auto It =
411480093f4SDimitry Andric       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
412480093f4SDimitry Andric         const auto &Value = Entry.second;
413480093f4SDimitry Andric         StringRef Scope = Value.first;
414480093f4SDimitry Andric         return Scope.empty() || FData.isInScope(Scope);
415480093f4SDimitry Andric       });
416480093f4SDimitry Andric   return It != Range.second ? It : Map.end();
417480093f4SDimitry Andric }
418480093f4SDimitry Andric 
4190b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule
4200b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
421480093f4SDimitry Andric     const NameRuleMap &CustomPropagations, const FunctionData &FData,
422480093f4SDimitry Andric     CheckerContext &C) {
4230b57cec5SDimitry Andric   // TODO: Currently, we might lose precision here: we always mark a return
4240b57cec5SDimitry Andric   // value as tainted even if it's just a pointer, pointing to tainted data.
4250b57cec5SDimitry Andric 
4260b57cec5SDimitry Andric   // Check for exact name match for functions without builtin substitutes.
427480093f4SDimitry Andric   // Use qualified name, because these are C functions without namespace.
4280b57cec5SDimitry Andric   TaintPropagationRule Rule =
429480093f4SDimitry Andric       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
4300b57cec5SDimitry Andric           // Source functions
4310b57cec5SDimitry Andric           // TODO: Add support for vfscanf & family.
4325ffd83dbSDimitry Andric           .Case("fdopen", {{}, {ReturnValueIndex}})
4335ffd83dbSDimitry Andric           .Case("fopen", {{}, {ReturnValueIndex}})
4345ffd83dbSDimitry Andric           .Case("freopen", {{}, {ReturnValueIndex}})
4355ffd83dbSDimitry Andric           .Case("getch", {{}, {ReturnValueIndex}})
4365ffd83dbSDimitry Andric           .Case("getchar", {{}, {ReturnValueIndex}})
4375ffd83dbSDimitry Andric           .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
4385ffd83dbSDimitry Andric           .Case("gets", {{}, {0, ReturnValueIndex}})
4395ffd83dbSDimitry Andric           .Case("scanf", {{}, {}, VariadicType::Dst, 1})
4405ffd83dbSDimitry Andric           .Case("socket", {{},
4415ffd83dbSDimitry Andric                            {ReturnValueIndex},
4425ffd83dbSDimitry Andric                            VariadicType::None,
4430b57cec5SDimitry Andric                            InvalidArgIndex,
4445ffd83dbSDimitry Andric                            &TaintPropagationRule::postSocket})
4455ffd83dbSDimitry Andric           .Case("wgetch", {{}, {ReturnValueIndex}})
4460b57cec5SDimitry Andric           // Propagating functions
4475ffd83dbSDimitry Andric           .Case("atoi", {{0}, {ReturnValueIndex}})
4485ffd83dbSDimitry Andric           .Case("atol", {{0}, {ReturnValueIndex}})
4495ffd83dbSDimitry Andric           .Case("atoll", {{0}, {ReturnValueIndex}})
4505ffd83dbSDimitry Andric           .Case("fgetc", {{0}, {ReturnValueIndex}})
4515ffd83dbSDimitry Andric           .Case("fgetln", {{0}, {ReturnValueIndex}})
4525ffd83dbSDimitry Andric           .Case("fgets", {{2}, {0, ReturnValueIndex}})
4535ffd83dbSDimitry Andric           .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
4545ffd83dbSDimitry Andric           .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
4555ffd83dbSDimitry Andric           .Case("getc", {{0}, {ReturnValueIndex}})
4565ffd83dbSDimitry Andric           .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
4575ffd83dbSDimitry Andric           .Case("getdelim", {{3}, {0}})
4585ffd83dbSDimitry Andric           .Case("getline", {{2}, {0}})
4595ffd83dbSDimitry Andric           .Case("getw", {{0}, {ReturnValueIndex}})
4605ffd83dbSDimitry Andric           .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
4615ffd83dbSDimitry Andric           .Case("read", {{0, 2}, {1, ReturnValueIndex}})
4625ffd83dbSDimitry Andric           .Case("strchr", {{0}, {ReturnValueIndex}})
4635ffd83dbSDimitry Andric           .Case("strrchr", {{0}, {ReturnValueIndex}})
4645ffd83dbSDimitry Andric           .Case("tolower", {{0}, {ReturnValueIndex}})
4655ffd83dbSDimitry Andric           .Case("toupper", {{0}, {ReturnValueIndex}})
4665ffd83dbSDimitry Andric           .Default({});
4670b57cec5SDimitry Andric 
4680b57cec5SDimitry Andric   if (!Rule.isNull())
4690b57cec5SDimitry Andric     return Rule;
470*349cc55cSDimitry Andric 
471*349cc55cSDimitry Andric   // `getenv` returns taint only in untrusted environments.
472*349cc55cSDimitry Andric   if (FData.FullName == "getenv") {
473*349cc55cSDimitry Andric     if (C.getAnalysisManager()
474*349cc55cSDimitry Andric             .getAnalyzerOptions()
475*349cc55cSDimitry Andric             .ShouldAssumeControlledEnvironment)
476*349cc55cSDimitry Andric       return {};
477*349cc55cSDimitry Andric     return {{}, {ReturnValueIndex}};
478*349cc55cSDimitry Andric   }
479*349cc55cSDimitry Andric 
4805ffd83dbSDimitry Andric   assert(FData.FDecl);
4810b57cec5SDimitry Andric 
4820b57cec5SDimitry Andric   // Check if it's one of the memory setting/copying functions.
4830b57cec5SDimitry Andric   // This check is specialized but faster then calling isCLibraryFunction.
484480093f4SDimitry Andric   const FunctionDecl *FDecl = FData.FDecl;
4850b57cec5SDimitry Andric   unsigned BId = 0;
4865ffd83dbSDimitry Andric   if ((BId = FDecl->getMemoryFunctionKind())) {
4870b57cec5SDimitry Andric     switch (BId) {
4880b57cec5SDimitry Andric     case Builtin::BImemcpy:
4890b57cec5SDimitry Andric     case Builtin::BImemmove:
4900b57cec5SDimitry Andric     case Builtin::BIstrncpy:
4910b57cec5SDimitry Andric     case Builtin::BIstrncat:
4925ffd83dbSDimitry Andric       return {{1, 2}, {0, ReturnValueIndex}};
4930b57cec5SDimitry Andric     case Builtin::BIstrlcpy:
4940b57cec5SDimitry Andric     case Builtin::BIstrlcat:
4955ffd83dbSDimitry Andric       return {{1, 2}, {0}};
4960b57cec5SDimitry Andric     case Builtin::BIstrndup:
4975ffd83dbSDimitry Andric       return {{0, 1}, {ReturnValueIndex}};
4980b57cec5SDimitry Andric 
4990b57cec5SDimitry Andric     default:
5000b57cec5SDimitry Andric       break;
5015ffd83dbSDimitry Andric     }
5025ffd83dbSDimitry Andric   }
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric   // Process all other functions which could be defined as builtins.
5050b57cec5SDimitry Andric   if (Rule.isNull()) {
5065ffd83dbSDimitry Andric     const auto OneOf = [FDecl](const auto &... Name) {
5075ffd83dbSDimitry Andric       // FIXME: use fold expression in C++17
5085ffd83dbSDimitry Andric       using unused = int[];
5095ffd83dbSDimitry Andric       bool ret = false;
5105ffd83dbSDimitry Andric       static_cast<void>(unused{
5115ffd83dbSDimitry Andric           0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
5125ffd83dbSDimitry Andric       return ret;
5135ffd83dbSDimitry Andric     };
5145ffd83dbSDimitry Andric     if (OneOf("snprintf"))
5155ffd83dbSDimitry Andric       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
5165ffd83dbSDimitry Andric     if (OneOf("sprintf"))
517*349cc55cSDimitry Andric       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 2};
5185ffd83dbSDimitry Andric     if (OneOf("strcpy", "stpcpy", "strcat"))
5195ffd83dbSDimitry Andric       return {{1}, {0, ReturnValueIndex}};
5205ffd83dbSDimitry Andric     if (OneOf("bcopy"))
5215ffd83dbSDimitry Andric       return {{0, 2}, {1}};
5225ffd83dbSDimitry Andric     if (OneOf("strdup", "strdupa", "wcsdup"))
5235ffd83dbSDimitry Andric       return {{0}, {ReturnValueIndex}};
5240b57cec5SDimitry Andric   }
5250b57cec5SDimitry Andric 
5265ffd83dbSDimitry Andric   // Skipping the following functions, since they might be used for cleansing or
5275ffd83dbSDimitry Andric   // smart memory copy:
5280b57cec5SDimitry Andric   // - memccpy - copying until hitting a special character.
5290b57cec5SDimitry Andric 
530480093f4SDimitry Andric   auto It = findFunctionInConfig(CustomPropagations, FData);
5315ffd83dbSDimitry Andric   if (It != CustomPropagations.end())
5325ffd83dbSDimitry Andric     return It->second.second;
5335ffd83dbSDimitry Andric   return {};
534480093f4SDimitry Andric }
535a7dea167SDimitry Andric 
5365ffd83dbSDimitry Andric void GenericTaintChecker::checkPreCall(const CallEvent &Call,
5370b57cec5SDimitry Andric                                        CheckerContext &C) const {
5385ffd83dbSDimitry Andric   Optional<FunctionData> FData = FunctionData::create(Call, C);
539480093f4SDimitry Andric   if (!FData)
540480093f4SDimitry Andric     return;
541480093f4SDimitry Andric 
5420b57cec5SDimitry Andric   // Check for taintedness related errors first: system call, uncontrolled
5430b57cec5SDimitry Andric   // format string, tainted buffer size.
5445ffd83dbSDimitry Andric   if (checkPre(Call, *FData, C))
5450b57cec5SDimitry Andric     return;
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric   // Marks the function's arguments and/or return value tainted if it present in
5480b57cec5SDimitry Andric   // the list.
5495ffd83dbSDimitry Andric   if (addSourcesPre(Call, *FData, C))
550480093f4SDimitry Andric     return;
551480093f4SDimitry Andric 
5525ffd83dbSDimitry Andric   addFiltersPre(Call, *FData, C);
5530b57cec5SDimitry Andric }
5540b57cec5SDimitry Andric 
5555ffd83dbSDimitry Andric void GenericTaintChecker::checkPostCall(const CallEvent &Call,
5560b57cec5SDimitry Andric                                         CheckerContext &C) const {
5570b57cec5SDimitry Andric   // Set the marked values as tainted. The return value only accessible from
5580b57cec5SDimitry Andric   // checkPostStmt.
5595ffd83dbSDimitry Andric   propagateFromPre(Call, C);
5600b57cec5SDimitry Andric }
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
5630b57cec5SDimitry Andric                                      const char *NL, const char *Sep) const {
5640b57cec5SDimitry Andric   printTaint(State, Out, NL, Sep);
5650b57cec5SDimitry Andric }
5660b57cec5SDimitry Andric 
5675ffd83dbSDimitry Andric bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
568480093f4SDimitry Andric                                         const FunctionData &FData,
5690b57cec5SDimitry Andric                                         CheckerContext &C) const {
5700b57cec5SDimitry Andric   // First, try generating a propagation rule for this function.
571a7dea167SDimitry Andric   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
572480093f4SDimitry Andric       this->CustomPropagations, FData, C);
5730b57cec5SDimitry Andric   if (!Rule.isNull()) {
5745ffd83dbSDimitry Andric     ProgramStateRef State = Rule.process(Call, C);
575480093f4SDimitry Andric     if (State) {
5760b57cec5SDimitry Andric       C.addTransition(State);
577480093f4SDimitry Andric       return true;
578480093f4SDimitry Andric     }
579480093f4SDimitry Andric   }
580480093f4SDimitry Andric   return false;
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
5835ffd83dbSDimitry Andric bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
584480093f4SDimitry Andric                                         const FunctionData &FData,
585480093f4SDimitry Andric                                         CheckerContext &C) const {
586480093f4SDimitry Andric   auto It = findFunctionInConfig(CustomFilters, FData);
587480093f4SDimitry Andric   if (It == CustomFilters.end())
588480093f4SDimitry Andric     return false;
589480093f4SDimitry Andric 
590480093f4SDimitry Andric   ProgramStateRef State = C.getState();
591480093f4SDimitry Andric   const auto &Value = It->second;
592480093f4SDimitry Andric   const ArgVector &Args = Value.second;
593480093f4SDimitry Andric   for (unsigned ArgNum : Args) {
5945ffd83dbSDimitry Andric     if (ArgNum >= Call.getNumArgs())
595480093f4SDimitry Andric       continue;
596480093f4SDimitry Andric 
5975ffd83dbSDimitry Andric     const Expr *Arg = Call.getArgExpr(ArgNum);
5985ffd83dbSDimitry Andric     Optional<SVal> V = getPointeeOf(C, Arg);
599480093f4SDimitry Andric     if (V)
600480093f4SDimitry Andric       State = removeTaint(State, *V);
601480093f4SDimitry Andric   }
602480093f4SDimitry Andric 
603480093f4SDimitry Andric   if (State != C.getState()) {
6040b57cec5SDimitry Andric     C.addTransition(State);
605480093f4SDimitry Andric     return true;
606480093f4SDimitry Andric   }
607480093f4SDimitry Andric   return false;
6080b57cec5SDimitry Andric }
6090b57cec5SDimitry Andric 
6105ffd83dbSDimitry Andric bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
6115ffd83dbSDimitry Andric                                            CheckerContext &C) {
6120b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric   // Depending on what was tainted at pre-visit, we determined a set of
6150b57cec5SDimitry Andric   // arguments which should be tainted after the function returns. These are
6160b57cec5SDimitry Andric   // stored in the state as TaintArgsOnPostVisit set.
6170b57cec5SDimitry Andric   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
6180b57cec5SDimitry Andric   if (TaintArgs.isEmpty())
6190b57cec5SDimitry Andric     return false;
6200b57cec5SDimitry Andric 
6210b57cec5SDimitry Andric   for (unsigned ArgNum : TaintArgs) {
6220b57cec5SDimitry Andric     // Special handling for the tainted return value.
6230b57cec5SDimitry Andric     if (ArgNum == ReturnValueIndex) {
6245ffd83dbSDimitry Andric       State = addTaint(State, Call.getReturnValue());
6250b57cec5SDimitry Andric       continue;
6260b57cec5SDimitry Andric     }
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric     // The arguments are pointer arguments. The data they are pointing at is
6290b57cec5SDimitry Andric     // tainted after the call.
6305ffd83dbSDimitry Andric     if (Call.getNumArgs() < (ArgNum + 1))
6310b57cec5SDimitry Andric       return false;
6325ffd83dbSDimitry Andric     const Expr *Arg = Call.getArgExpr(ArgNum);
6335ffd83dbSDimitry Andric     Optional<SVal> V = getPointeeOf(C, Arg);
6340b57cec5SDimitry Andric     if (V)
6350b57cec5SDimitry Andric       State = addTaint(State, *V);
6360b57cec5SDimitry Andric   }
6370b57cec5SDimitry Andric 
6380b57cec5SDimitry Andric   // Clear up the taint info from the state.
6390b57cec5SDimitry Andric   State = State->remove<TaintArgsOnPostVisit>();
6400b57cec5SDimitry Andric 
6410b57cec5SDimitry Andric   if (State != C.getState()) {
6420b57cec5SDimitry Andric     C.addTransition(State);
6430b57cec5SDimitry Andric     return true;
6440b57cec5SDimitry Andric   }
6450b57cec5SDimitry Andric   return false;
6460b57cec5SDimitry Andric }
6470b57cec5SDimitry Andric 
6485ffd83dbSDimitry Andric bool GenericTaintChecker::checkPre(const CallEvent &Call,
649480093f4SDimitry Andric                                    const FunctionData &FData,
6500b57cec5SDimitry Andric                                    CheckerContext &C) const {
6515ffd83dbSDimitry Andric   if (checkUncontrolledFormatString(Call, C))
6520b57cec5SDimitry Andric     return true;
6530b57cec5SDimitry Andric 
6545ffd83dbSDimitry Andric   if (checkSystemCall(Call, FData.Name, C))
6550b57cec5SDimitry Andric     return true;
6560b57cec5SDimitry Andric 
6575ffd83dbSDimitry Andric   if (checkTaintedBufferSize(Call, C))
6580b57cec5SDimitry Andric     return true;
6590b57cec5SDimitry Andric 
6605ffd83dbSDimitry Andric   return checkCustomSinks(Call, FData, C);
6610b57cec5SDimitry Andric }
6620b57cec5SDimitry Andric 
6635ffd83dbSDimitry Andric Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
6640b57cec5SDimitry Andric                                                  const Expr *Arg) {
6650b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
6660b57cec5SDimitry Andric   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
6670b57cec5SDimitry Andric   if (AddrVal.isUnknownOrUndef())
6680b57cec5SDimitry Andric     return None;
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
6710b57cec5SDimitry Andric   if (!AddrLoc)
6720b57cec5SDimitry Andric     return None;
6730b57cec5SDimitry Andric 
6740b57cec5SDimitry Andric   QualType ArgTy = Arg->getType().getCanonicalType();
6750b57cec5SDimitry Andric   if (!ArgTy->isPointerType())
676480093f4SDimitry Andric     return State->getSVal(*AddrLoc);
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric   QualType ValTy = ArgTy->getPointeeType();
6790b57cec5SDimitry Andric 
6800b57cec5SDimitry Andric   // Do not dereference void pointers. Treat them as byte pointers instead.
6810b57cec5SDimitry Andric   // FIXME: we might want to consider more than just the first byte.
6820b57cec5SDimitry Andric   if (ValTy->isVoidType())
6830b57cec5SDimitry Andric     ValTy = C.getASTContext().CharTy;
6840b57cec5SDimitry Andric 
6850b57cec5SDimitry Andric   return State->getSVal(*AddrLoc, ValTy);
6860b57cec5SDimitry Andric }
6870b57cec5SDimitry Andric 
6880b57cec5SDimitry Andric ProgramStateRef
6895ffd83dbSDimitry Andric GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
6900b57cec5SDimitry Andric                                                    CheckerContext &C) const {
6910b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric   // Check for taint in arguments.
6940b57cec5SDimitry Andric   bool IsTainted = true;
6950b57cec5SDimitry Andric   for (unsigned ArgNum : SrcArgs) {
6965ffd83dbSDimitry Andric     if (ArgNum >= Call.getNumArgs())
697a7dea167SDimitry Andric       continue;
698a7dea167SDimitry Andric 
6995ffd83dbSDimitry Andric     if ((IsTainted =
7005ffd83dbSDimitry Andric              isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
7010b57cec5SDimitry Andric       break;
7020b57cec5SDimitry Andric   }
7030b57cec5SDimitry Andric 
7040b57cec5SDimitry Andric   // Check for taint in variadic arguments.
7050b57cec5SDimitry Andric   if (!IsTainted && VariadicType::Src == VarType) {
7060b57cec5SDimitry Andric     // Check if any of the arguments is tainted
7075ffd83dbSDimitry Andric     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
7085ffd83dbSDimitry Andric       if ((IsTainted =
7095ffd83dbSDimitry Andric                isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
7100b57cec5SDimitry Andric         break;
7110b57cec5SDimitry Andric     }
7120b57cec5SDimitry Andric   }
7130b57cec5SDimitry Andric 
7140b57cec5SDimitry Andric   if (PropagationFunc)
7155ffd83dbSDimitry Andric     IsTainted = PropagationFunc(IsTainted, Call, C);
7160b57cec5SDimitry Andric 
7170b57cec5SDimitry Andric   if (!IsTainted)
7180b57cec5SDimitry Andric     return State;
7190b57cec5SDimitry Andric 
7200b57cec5SDimitry Andric   // Mark the arguments which should be tainted after the function returns.
7210b57cec5SDimitry Andric   for (unsigned ArgNum : DstArgs) {
7220b57cec5SDimitry Andric     // Should mark the return value?
7230b57cec5SDimitry Andric     if (ArgNum == ReturnValueIndex) {
7240b57cec5SDimitry Andric       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
7250b57cec5SDimitry Andric       continue;
7260b57cec5SDimitry Andric     }
7270b57cec5SDimitry Andric 
7285ffd83dbSDimitry Andric     if (ArgNum >= Call.getNumArgs())
729a7dea167SDimitry Andric       continue;
730a7dea167SDimitry Andric 
7310b57cec5SDimitry Andric     // Mark the given argument.
7320b57cec5SDimitry Andric     State = State->add<TaintArgsOnPostVisit>(ArgNum);
7330b57cec5SDimitry Andric   }
7340b57cec5SDimitry Andric 
7350b57cec5SDimitry Andric   // Mark all variadic arguments tainted if present.
7360b57cec5SDimitry Andric   if (VariadicType::Dst == VarType) {
7370b57cec5SDimitry Andric     // For all pointer and references that were passed in:
7380b57cec5SDimitry Andric     //   If they are not pointing to const data, mark data as tainted.
7390b57cec5SDimitry Andric     //   TODO: So far we are just going one level down; ideally we'd need to
7400b57cec5SDimitry Andric     //         recurse here.
7415ffd83dbSDimitry Andric     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
7425ffd83dbSDimitry Andric       const Expr *Arg = Call.getArgExpr(i);
7430b57cec5SDimitry Andric       // Process pointer argument.
7440b57cec5SDimitry Andric       const Type *ArgTy = Arg->getType().getTypePtr();
7450b57cec5SDimitry Andric       QualType PType = ArgTy->getPointeeType();
7460b57cec5SDimitry Andric       if ((!PType.isNull() && !PType.isConstQualified()) ||
7475ffd83dbSDimitry Andric           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
7480b57cec5SDimitry Andric         State = State->add<TaintArgsOnPostVisit>(i);
7490b57cec5SDimitry Andric       }
7500b57cec5SDimitry Andric     }
7515ffd83dbSDimitry Andric   }
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric   return State;
7540b57cec5SDimitry Andric }
7550b57cec5SDimitry Andric 
7560b57cec5SDimitry Andric // If argument 0(protocol domain) is network, the return value should get taint.
7575ffd83dbSDimitry Andric bool GenericTaintChecker::TaintPropagationRule::postSocket(
7585ffd83dbSDimitry Andric     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
7595ffd83dbSDimitry Andric   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
7600b57cec5SDimitry Andric   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
7610b57cec5SDimitry Andric   // White list the internal communication protocols.
7620b57cec5SDimitry Andric   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
7630b57cec5SDimitry Andric       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
7640b57cec5SDimitry Andric     return false;
7650b57cec5SDimitry Andric   return true;
7660b57cec5SDimitry Andric }
7670b57cec5SDimitry Andric 
7680b57cec5SDimitry Andric bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
7690b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
7700b57cec5SDimitry Andric   SVal Val = C.getSVal(E);
7710b57cec5SDimitry Andric 
7720b57cec5SDimitry Andric   // stdin is a pointer, so it would be a region.
7730b57cec5SDimitry Andric   const MemRegion *MemReg = Val.getAsRegion();
7740b57cec5SDimitry Andric 
7750b57cec5SDimitry Andric   // The region should be symbolic, we do not know it's value.
7765ffd83dbSDimitry Andric   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
7770b57cec5SDimitry Andric   if (!SymReg)
7780b57cec5SDimitry Andric     return false;
7790b57cec5SDimitry Andric 
7800b57cec5SDimitry Andric   // Get it's symbol and find the declaration region it's pointing to.
7815ffd83dbSDimitry Andric   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
7820b57cec5SDimitry Andric   if (!Sm)
7830b57cec5SDimitry Andric     return false;
7845ffd83dbSDimitry Andric   const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
7850b57cec5SDimitry Andric   if (!DeclReg)
7860b57cec5SDimitry Andric     return false;
7870b57cec5SDimitry Andric 
7880b57cec5SDimitry Andric   // This region corresponds to a declaration, find out if it's a global/extern
7890b57cec5SDimitry Andric   // variable named stdin with the proper type.
7900b57cec5SDimitry Andric   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
7910b57cec5SDimitry Andric     D = D->getCanonicalDecl();
792*349cc55cSDimitry Andric     if (D->getName().contains("stdin") && D->isExternC()) {
7930b57cec5SDimitry Andric       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
7940b57cec5SDimitry Andric       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
7950b57cec5SDimitry Andric                        C.getASTContext().getFILEType().getCanonicalType())
7960b57cec5SDimitry Andric         return true;
7970b57cec5SDimitry Andric     }
7980b57cec5SDimitry Andric   }
7990b57cec5SDimitry Andric   return false;
8000b57cec5SDimitry Andric }
8010b57cec5SDimitry Andric 
8025ffd83dbSDimitry Andric static bool getPrintfFormatArgumentNum(const CallEvent &Call,
8030b57cec5SDimitry Andric                                        const CheckerContext &C,
804a7dea167SDimitry Andric                                        unsigned &ArgNum) {
8050b57cec5SDimitry Andric   // Find if the function contains a format string argument.
8060b57cec5SDimitry Andric   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
8070b57cec5SDimitry Andric   // vsnprintf, syslog, custom annotated functions.
8085ffd83dbSDimitry Andric   const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
8090b57cec5SDimitry Andric   if (!FDecl)
8100b57cec5SDimitry Andric     return false;
8110b57cec5SDimitry Andric   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
8120b57cec5SDimitry Andric     ArgNum = Format->getFormatIdx() - 1;
8135ffd83dbSDimitry Andric     if ((Format->getType()->getName() == "printf") &&
8145ffd83dbSDimitry Andric         Call.getNumArgs() > ArgNum)
8150b57cec5SDimitry Andric       return true;
8160b57cec5SDimitry Andric   }
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric   // Or if a function is named setproctitle (this is a heuristic).
819*349cc55cSDimitry Andric   if (C.getCalleeName(FDecl).contains("setproctitle")) {
8200b57cec5SDimitry Andric     ArgNum = 0;
8210b57cec5SDimitry Andric     return true;
8220b57cec5SDimitry Andric   }
8230b57cec5SDimitry Andric 
8240b57cec5SDimitry Andric   return false;
8250b57cec5SDimitry Andric }
8260b57cec5SDimitry Andric 
827a7dea167SDimitry Andric bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
8280b57cec5SDimitry Andric                                                   CheckerContext &C) const {
8290b57cec5SDimitry Andric   assert(E);
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric   // Check for taint.
8320b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
8335ffd83dbSDimitry Andric   Optional<SVal> PointedToSVal = getPointeeOf(C, E);
8340b57cec5SDimitry Andric   SVal TaintedSVal;
8350b57cec5SDimitry Andric   if (PointedToSVal && isTainted(State, *PointedToSVal))
8360b57cec5SDimitry Andric     TaintedSVal = *PointedToSVal;
8370b57cec5SDimitry Andric   else if (isTainted(State, E, C.getLocationContext()))
8380b57cec5SDimitry Andric     TaintedSVal = C.getSVal(E);
8390b57cec5SDimitry Andric   else
8400b57cec5SDimitry Andric     return false;
8410b57cec5SDimitry Andric 
8420b57cec5SDimitry Andric   // Generate diagnostic.
8430b57cec5SDimitry Andric   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
8440b57cec5SDimitry Andric     initBugType();
845a7dea167SDimitry Andric     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
8460b57cec5SDimitry Andric     report->addRange(E->getSourceRange());
847a7dea167SDimitry Andric     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
8480b57cec5SDimitry Andric     C.emitReport(std::move(report));
8490b57cec5SDimitry Andric     return true;
8500b57cec5SDimitry Andric   }
8510b57cec5SDimitry Andric   return false;
8520b57cec5SDimitry Andric }
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric bool GenericTaintChecker::checkUncontrolledFormatString(
8555ffd83dbSDimitry Andric     const CallEvent &Call, CheckerContext &C) const {
8560b57cec5SDimitry Andric   // Check if the function contains a format string argument.
857a7dea167SDimitry Andric   unsigned ArgNum = 0;
8585ffd83dbSDimitry Andric   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
8590b57cec5SDimitry Andric     return false;
8600b57cec5SDimitry Andric 
8610b57cec5SDimitry Andric   // If either the format string content or the pointer itself are tainted,
8620b57cec5SDimitry Andric   // warn.
8635ffd83dbSDimitry Andric   return generateReportIfTainted(Call.getArgExpr(ArgNum),
8640b57cec5SDimitry Andric                                  MsgUncontrolledFormatString, C);
8650b57cec5SDimitry Andric }
8660b57cec5SDimitry Andric 
8675ffd83dbSDimitry Andric bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
8680b57cec5SDimitry Andric                                           CheckerContext &C) const {
8690b57cec5SDimitry Andric   // TODO: It might make sense to run this check on demand. In some cases,
8700b57cec5SDimitry Andric   // we should check if the environment has been cleansed here. We also might
8710b57cec5SDimitry Andric   // need to know if the user was reset before these calls(seteuid).
8720b57cec5SDimitry Andric   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
8730b57cec5SDimitry Andric                         .Case("system", 0)
8740b57cec5SDimitry Andric                         .Case("popen", 0)
8750b57cec5SDimitry Andric                         .Case("execl", 0)
8760b57cec5SDimitry Andric                         .Case("execle", 0)
8770b57cec5SDimitry Andric                         .Case("execlp", 0)
8780b57cec5SDimitry Andric                         .Case("execv", 0)
8790b57cec5SDimitry Andric                         .Case("execvp", 0)
8800b57cec5SDimitry Andric                         .Case("execvP", 0)
8810b57cec5SDimitry Andric                         .Case("execve", 0)
8820b57cec5SDimitry Andric                         .Case("dlopen", 0)
883a7dea167SDimitry Andric                         .Default(InvalidArgIndex);
8840b57cec5SDimitry Andric 
8855ffd83dbSDimitry Andric   if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
8860b57cec5SDimitry Andric     return false;
8870b57cec5SDimitry Andric 
8885ffd83dbSDimitry Andric   return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
8895ffd83dbSDimitry Andric                                  C);
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric 
8920b57cec5SDimitry Andric // TODO: Should this check be a part of the CString checker?
8930b57cec5SDimitry Andric // If yes, should taint be a global setting?
8945ffd83dbSDimitry Andric bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
8950b57cec5SDimitry Andric                                                  CheckerContext &C) const {
8965ffd83dbSDimitry Andric   const auto *FDecl = Call.getDecl()->getAsFunction();
8970b57cec5SDimitry Andric   // If the function has a buffer size argument, set ArgNum.
8980b57cec5SDimitry Andric   unsigned ArgNum = InvalidArgIndex;
8990b57cec5SDimitry Andric   unsigned BId = 0;
9005ffd83dbSDimitry Andric   if ((BId = FDecl->getMemoryFunctionKind())) {
9010b57cec5SDimitry Andric     switch (BId) {
9020b57cec5SDimitry Andric     case Builtin::BImemcpy:
9030b57cec5SDimitry Andric     case Builtin::BImemmove:
9040b57cec5SDimitry Andric     case Builtin::BIstrncpy:
9050b57cec5SDimitry Andric       ArgNum = 2;
9060b57cec5SDimitry Andric       break;
9070b57cec5SDimitry Andric     case Builtin::BIstrndup:
9080b57cec5SDimitry Andric       ArgNum = 1;
9090b57cec5SDimitry Andric       break;
9100b57cec5SDimitry Andric     default:
9110b57cec5SDimitry Andric       break;
9125ffd83dbSDimitry Andric     }
9135ffd83dbSDimitry Andric   }
9140b57cec5SDimitry Andric 
9150b57cec5SDimitry Andric   if (ArgNum == InvalidArgIndex) {
9165ffd83dbSDimitry Andric     using CCtx = CheckerContext;
9175ffd83dbSDimitry Andric     if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
9185ffd83dbSDimitry Andric         CCtx::isCLibraryFunction(FDecl, "calloc") ||
9195ffd83dbSDimitry Andric         CCtx::isCLibraryFunction(FDecl, "alloca"))
9200b57cec5SDimitry Andric       ArgNum = 0;
9215ffd83dbSDimitry Andric     else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
9220b57cec5SDimitry Andric       ArgNum = 3;
9235ffd83dbSDimitry Andric     else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
9240b57cec5SDimitry Andric       ArgNum = 1;
9255ffd83dbSDimitry Andric     else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
9260b57cec5SDimitry Andric       ArgNum = 2;
9270b57cec5SDimitry Andric   }
9280b57cec5SDimitry Andric 
9295ffd83dbSDimitry Andric   return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
9305ffd83dbSDimitry Andric          generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
9315ffd83dbSDimitry Andric                                  C);
9320b57cec5SDimitry Andric }
9330b57cec5SDimitry Andric 
9345ffd83dbSDimitry Andric bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
935480093f4SDimitry Andric                                            const FunctionData &FData,
936a7dea167SDimitry Andric                                            CheckerContext &C) const {
937480093f4SDimitry Andric   auto It = findFunctionInConfig(CustomSinks, FData);
938a7dea167SDimitry Andric   if (It == CustomSinks.end())
939a7dea167SDimitry Andric     return false;
940a7dea167SDimitry Andric 
941480093f4SDimitry Andric   const auto &Value = It->second;
942480093f4SDimitry Andric   const GenericTaintChecker::ArgVector &Args = Value.second;
943a7dea167SDimitry Andric   for (unsigned ArgNum : Args) {
9445ffd83dbSDimitry Andric     if (ArgNum >= Call.getNumArgs())
945a7dea167SDimitry Andric       continue;
946a7dea167SDimitry Andric 
9475ffd83dbSDimitry Andric     if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
948a7dea167SDimitry Andric       return true;
949a7dea167SDimitry Andric   }
950a7dea167SDimitry Andric 
951a7dea167SDimitry Andric   return false;
952a7dea167SDimitry Andric }
953a7dea167SDimitry Andric 
954a7dea167SDimitry Andric void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
955a7dea167SDimitry Andric   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
956a7dea167SDimitry Andric   std::string Option{"Config"};
957a7dea167SDimitry Andric   StringRef ConfigFile =
958a7dea167SDimitry Andric       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
959a7dea167SDimitry Andric   llvm::Optional<TaintConfig> Config =
960a7dea167SDimitry Andric       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
961a7dea167SDimitry Andric   if (Config)
962a7dea167SDimitry Andric     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
9630b57cec5SDimitry Andric }
9640b57cec5SDimitry Andric 
9655ffd83dbSDimitry Andric bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
9660b57cec5SDimitry Andric   return true;
9670b57cec5SDimitry Andric }
968