xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision 480093f4440d54b30b3025afeac24b48f2ba7a2e)
10b57cec5SDimitry Andric //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This checker defines the attack surface for generic taint propagation.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // The taint information produced by it might be useful to other checkers. For
120b57cec5SDimitry Andric // example, checkers should report errors which involve tainted data more
130b57cec5SDimitry Andric // aggressively, even if the involved symbols are under constrained.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "Taint.h"
18a7dea167SDimitry Andric #include "Yaml.h"
190b57cec5SDimitry Andric #include "clang/AST/Attr.h"
200b57cec5SDimitry Andric #include "clang/Basic/Builtins.h"
21a7dea167SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
230b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
250b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
260b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27a7dea167SDimitry Andric #include "llvm/Support/YAMLTraits.h"
28*480093f4SDimitry Andric #include <algorithm>
29a7dea167SDimitry Andric #include <limits>
30*480093f4SDimitry Andric #include <unordered_map>
310b57cec5SDimitry Andric #include <utility>
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric using namespace clang;
340b57cec5SDimitry Andric using namespace ento;
350b57cec5SDimitry Andric using namespace taint;
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric namespace {
380b57cec5SDimitry Andric class GenericTaintChecker
390b57cec5SDimitry Andric     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
400b57cec5SDimitry Andric public:
410b57cec5SDimitry Andric   static void *getTag() {
420b57cec5SDimitry Andric     static int Tag;
430b57cec5SDimitry Andric     return &Tag;
440b57cec5SDimitry Andric   }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
490b57cec5SDimitry Andric 
50a7dea167SDimitry Andric   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
51a7dea167SDimitry Andric                   const char *Sep) const override;
52a7dea167SDimitry Andric 
53a7dea167SDimitry Andric   using ArgVector = SmallVector<unsigned, 2>;
54a7dea167SDimitry Andric   using SignedArgVector = SmallVector<int, 2>;
55a7dea167SDimitry Andric 
56a7dea167SDimitry Andric   enum class VariadicType { None, Src, Dst };
57a7dea167SDimitry Andric 
58a7dea167SDimitry Andric   /// Used to parse the configuration file.
59a7dea167SDimitry Andric   struct TaintConfiguration {
60*480093f4SDimitry Andric     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
61a7dea167SDimitry Andric 
62a7dea167SDimitry Andric     struct Propagation {
63a7dea167SDimitry Andric       std::string Name;
64*480093f4SDimitry Andric       std::string Scope;
65a7dea167SDimitry Andric       ArgVector SrcArgs;
66a7dea167SDimitry Andric       SignedArgVector DstArgs;
67a7dea167SDimitry Andric       VariadicType VarType;
68a7dea167SDimitry Andric       unsigned VarIndex;
69a7dea167SDimitry Andric     };
70a7dea167SDimitry Andric 
71a7dea167SDimitry Andric     std::vector<Propagation> Propagations;
72*480093f4SDimitry Andric     std::vector<NameScopeArgs> Filters;
73*480093f4SDimitry Andric     std::vector<NameScopeArgs> Sinks;
74a7dea167SDimitry Andric 
75a7dea167SDimitry Andric     TaintConfiguration() = default;
76a7dea167SDimitry Andric     TaintConfiguration(const TaintConfiguration &) = default;
77a7dea167SDimitry Andric     TaintConfiguration(TaintConfiguration &&) = default;
78a7dea167SDimitry Andric     TaintConfiguration &operator=(const TaintConfiguration &) = default;
79a7dea167SDimitry Andric     TaintConfiguration &operator=(TaintConfiguration &&) = default;
80a7dea167SDimitry Andric   };
81a7dea167SDimitry Andric 
82a7dea167SDimitry Andric   /// Convert SignedArgVector to ArgVector.
83a7dea167SDimitry Andric   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
84a7dea167SDimitry Andric                                SignedArgVector Args);
85a7dea167SDimitry Andric 
86a7dea167SDimitry Andric   /// Parse the config.
87a7dea167SDimitry Andric   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
88a7dea167SDimitry Andric                           TaintConfiguration &&Config);
89a7dea167SDimitry Andric 
90a7dea167SDimitry Andric   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
91a7dea167SDimitry Andric   /// Denotes the return vale.
92a7dea167SDimitry Andric   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
93a7dea167SDimitry Andric                                          1};
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric private:
960b57cec5SDimitry Andric   mutable std::unique_ptr<BugType> BT;
970b57cec5SDimitry Andric   void initBugType() const {
980b57cec5SDimitry Andric     if (!BT)
990b57cec5SDimitry Andric       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
1000b57cec5SDimitry Andric   }
1010b57cec5SDimitry Andric 
102*480093f4SDimitry Andric   struct FunctionData {
103*480093f4SDimitry Andric     FunctionData() = delete;
104*480093f4SDimitry Andric     FunctionData(const FunctionData &) = default;
105*480093f4SDimitry Andric     FunctionData(FunctionData &&) = default;
106*480093f4SDimitry Andric     FunctionData &operator=(const FunctionData &) = delete;
107*480093f4SDimitry Andric     FunctionData &operator=(FunctionData &&) = delete;
108*480093f4SDimitry Andric 
109*480093f4SDimitry Andric     static Optional<FunctionData> create(const CallExpr *CE,
110*480093f4SDimitry Andric                                          const CheckerContext &C) {
111*480093f4SDimitry Andric       const FunctionDecl *FDecl = C.getCalleeDecl(CE);
112*480093f4SDimitry Andric       if (!FDecl || (FDecl->getKind() != Decl::Function &&
113*480093f4SDimitry Andric                      FDecl->getKind() != Decl::CXXMethod))
114*480093f4SDimitry Andric         return None;
115*480093f4SDimitry Andric 
116*480093f4SDimitry Andric       StringRef Name = C.getCalleeName(FDecl);
117*480093f4SDimitry Andric       std::string FullName = FDecl->getQualifiedNameAsString();
118*480093f4SDimitry Andric       if (Name.empty() || FullName.empty())
119*480093f4SDimitry Andric         return None;
120*480093f4SDimitry Andric 
121*480093f4SDimitry Andric       return FunctionData{FDecl, Name, FullName};
122*480093f4SDimitry Andric     }
123*480093f4SDimitry Andric 
124*480093f4SDimitry Andric     bool isInScope(StringRef Scope) const {
125*480093f4SDimitry Andric       return StringRef(FullName).startswith(Scope);
126*480093f4SDimitry Andric     }
127*480093f4SDimitry Andric 
128*480093f4SDimitry Andric     const FunctionDecl *const FDecl;
129*480093f4SDimitry Andric     const StringRef Name;
130*480093f4SDimitry Andric     const std::string FullName;
131*480093f4SDimitry Andric   };
132*480093f4SDimitry Andric 
1330b57cec5SDimitry Andric   /// Catch taint related bugs. Check if tainted data is passed to a
134*480093f4SDimitry Andric   /// system call etc. Returns true on matching.
135*480093f4SDimitry Andric   bool checkPre(const CallExpr *CE, const FunctionData &FData,
136*480093f4SDimitry Andric                 CheckerContext &C) const;
1370b57cec5SDimitry Andric 
138*480093f4SDimitry Andric   /// Add taint sources on a pre-visit. Returns true on matching.
139*480093f4SDimitry Andric   bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
140*480093f4SDimitry Andric                      CheckerContext &C) const;
1410b57cec5SDimitry Andric 
142*480093f4SDimitry Andric   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
143*480093f4SDimitry Andric   /// matching.
144*480093f4SDimitry Andric   bool addFiltersPre(const CallExpr *CE, const FunctionData &FData,
145*480093f4SDimitry Andric                      CheckerContext &C) const;
146*480093f4SDimitry Andric 
147*480093f4SDimitry Andric   /// Propagate taint generated at pre-visit. Returns true on matching.
1480b57cec5SDimitry Andric   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric   /// Check if the region the expression evaluates to is the standard input,
1510b57cec5SDimitry Andric   /// and thus, is tainted.
1520b57cec5SDimitry Andric   static bool isStdin(const Expr *E, CheckerContext &C);
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   /// Given a pointer argument, return the value it points to.
1550b57cec5SDimitry Andric   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric   /// Check for CWE-134: Uncontrolled Format String.
158a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
159a7dea167SDimitry Andric       "Untrusted data is used as a format string "
160a7dea167SDimitry Andric       "(CWE-134: Uncontrolled Format String)";
1610b57cec5SDimitry Andric   bool checkUncontrolledFormatString(const CallExpr *CE,
1620b57cec5SDimitry Andric                                      CheckerContext &C) const;
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric   /// Check for:
1650b57cec5SDimitry Andric   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
1660b57cec5SDimitry Andric   /// CWE-78, "Failure to Sanitize Data into an OS Command"
167a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
168a7dea167SDimitry Andric       "Untrusted data is passed to a system call "
169a7dea167SDimitry Andric       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
1700b57cec5SDimitry Andric   bool checkSystemCall(const CallExpr *CE, StringRef Name,
1710b57cec5SDimitry Andric                        CheckerContext &C) const;
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   /// Check if tainted data is used as a buffer size ins strn.. functions,
1740b57cec5SDimitry Andric   /// and allocators.
175a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
176a7dea167SDimitry Andric       "Untrusted data is used to specify the buffer size "
177a7dea167SDimitry Andric       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
178a7dea167SDimitry Andric       "for character data and the null terminator)";
1790b57cec5SDimitry Andric   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
1800b57cec5SDimitry Andric                               CheckerContext &C) const;
1810b57cec5SDimitry Andric 
182a7dea167SDimitry Andric   /// Check if tainted data is used as a custom sink's parameter.
183a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgCustomSink =
184a7dea167SDimitry Andric       "Untrusted data is passed to a user-defined sink";
185*480093f4SDimitry Andric   bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData,
1860b57cec5SDimitry Andric                         CheckerContext &C) const;
1870b57cec5SDimitry Andric 
188a7dea167SDimitry Andric   /// Generate a report if the expression is tainted or points to tainted data.
189a7dea167SDimitry Andric   bool generateReportIfTainted(const Expr *E, StringRef Msg,
190a7dea167SDimitry Andric                                CheckerContext &C) const;
191a7dea167SDimitry Andric 
192a7dea167SDimitry Andric   struct TaintPropagationRule;
193*480093f4SDimitry Andric   template <typename T>
194*480093f4SDimitry Andric   using ConfigDataMap =
195*480093f4SDimitry Andric       std::unordered_multimap<std::string, std::pair<std::string, T>>;
196*480093f4SDimitry Andric   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
197*480093f4SDimitry Andric   using NameArgMap = ConfigDataMap<ArgVector>;
198*480093f4SDimitry Andric 
199*480093f4SDimitry Andric   /// Find a function with the given name and scope. Returns the first match
200*480093f4SDimitry Andric   /// or the end of the map.
201*480093f4SDimitry Andric   template <typename T>
202*480093f4SDimitry Andric   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
203*480093f4SDimitry Andric                                    const FunctionData &FData);
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric   /// A struct used to specify taint propagation rules for a function.
2060b57cec5SDimitry Andric   ///
2070b57cec5SDimitry Andric   /// If any of the possible taint source arguments is tainted, all of the
2080b57cec5SDimitry Andric   /// destination arguments should also be tainted. Use InvalidArgIndex in the
2090b57cec5SDimitry Andric   /// src list to specify that all of the arguments can introduce taint. Use
2100b57cec5SDimitry Andric   /// InvalidArgIndex in the dst arguments to signify that all the non-const
2110b57cec5SDimitry Andric   /// pointer and reference arguments might be tainted on return. If
2120b57cec5SDimitry Andric   /// ReturnValueIndex is added to the dst list, the return value will be
2130b57cec5SDimitry Andric   /// tainted.
2140b57cec5SDimitry Andric   struct TaintPropagationRule {
2150b57cec5SDimitry Andric     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
2160b57cec5SDimitry Andric                                          CheckerContext &C);
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric     /// List of arguments which can be taint sources and should be checked.
2190b57cec5SDimitry Andric     ArgVector SrcArgs;
2200b57cec5SDimitry Andric     /// List of arguments which should be tainted on function return.
2210b57cec5SDimitry Andric     ArgVector DstArgs;
2220b57cec5SDimitry Andric     /// Index for the first variadic parameter if exist.
2230b57cec5SDimitry Andric     unsigned VariadicIndex;
2240b57cec5SDimitry Andric     /// Show when a function has variadic parameters. If it has, it marks all
2250b57cec5SDimitry Andric     /// of them as source or destination.
2260b57cec5SDimitry Andric     VariadicType VarType;
2270b57cec5SDimitry Andric     /// Special function for tainted source determination. If defined, it can
2280b57cec5SDimitry Andric     /// override the default behavior.
2290b57cec5SDimitry Andric     PropagationFuncType PropagationFunc;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric     TaintPropagationRule()
2320b57cec5SDimitry Andric         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
2330b57cec5SDimitry Andric           PropagationFunc(nullptr) {}
2340b57cec5SDimitry Andric 
235a7dea167SDimitry Andric     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
2360b57cec5SDimitry Andric                          VariadicType Var = VariadicType::None,
2370b57cec5SDimitry Andric                          unsigned VarIndex = InvalidArgIndex,
2380b57cec5SDimitry Andric                          PropagationFuncType Func = nullptr)
2390b57cec5SDimitry Andric         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
2400b57cec5SDimitry Andric           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric     /// Get the propagation rule for a given function.
2430b57cec5SDimitry Andric     static TaintPropagationRule
244a7dea167SDimitry Andric     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
245*480093f4SDimitry Andric                             const FunctionData &FData, CheckerContext &C);
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
2480b57cec5SDimitry Andric     void addDstArg(unsigned A) { DstArgs.push_back(A); }
2490b57cec5SDimitry Andric 
2500b57cec5SDimitry Andric     bool isNull() const {
2510b57cec5SDimitry Andric       return SrcArgs.empty() && DstArgs.empty() &&
2520b57cec5SDimitry Andric              VariadicType::None == VarType;
2530b57cec5SDimitry Andric     }
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric     bool isDestinationArgument(unsigned ArgNum) const {
2560b57cec5SDimitry Andric       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
2570b57cec5SDimitry Andric     }
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
2600b57cec5SDimitry Andric                                            CheckerContext &C) {
2610b57cec5SDimitry Andric       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
2620b57cec5SDimitry Andric         return true;
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric       if (!E->getType().getTypePtr()->isPointerType())
2650b57cec5SDimitry Andric         return false;
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric       Optional<SVal> V = getPointedToSVal(C, E);
2680b57cec5SDimitry Andric       return (V && isTainted(State, *V));
2690b57cec5SDimitry Andric     }
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric     /// Pre-process a function which propagates taint according to the
2720b57cec5SDimitry Andric     /// taint rule.
2730b57cec5SDimitry Andric     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric     // Functions for custom taintedness propagation.
2760b57cec5SDimitry Andric     static bool postSocket(bool IsTainted, const CallExpr *CE,
2770b57cec5SDimitry Andric                            CheckerContext &C);
2780b57cec5SDimitry Andric   };
279a7dea167SDimitry Andric 
280*480093f4SDimitry Andric   /// Defines a map between the propagation function's name, scope
281*480093f4SDimitry Andric   /// and TaintPropagationRule.
282a7dea167SDimitry Andric   NameRuleMap CustomPropagations;
283a7dea167SDimitry Andric 
284*480093f4SDimitry Andric   /// Defines a map between the filter function's name, scope and filtering
285*480093f4SDimitry Andric   /// args.
286a7dea167SDimitry Andric   NameArgMap CustomFilters;
287a7dea167SDimitry Andric 
288*480093f4SDimitry Andric   /// Defines a map between the sink function's name, scope and sinking args.
289a7dea167SDimitry Andric   NameArgMap CustomSinks;
2900b57cec5SDimitry Andric };
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric const unsigned GenericTaintChecker::ReturnValueIndex;
2930b57cec5SDimitry Andric const unsigned GenericTaintChecker::InvalidArgIndex;
2940b57cec5SDimitry Andric 
295a7dea167SDimitry Andric // FIXME: these lines can be removed in C++17
296a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
297a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
298a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
299a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
3000b57cec5SDimitry Andric } // end of anonymous namespace
3010b57cec5SDimitry Andric 
302a7dea167SDimitry Andric using TaintConfig = GenericTaintChecker::TaintConfiguration;
303a7dea167SDimitry Andric 
304a7dea167SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
305*480093f4SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
306a7dea167SDimitry Andric 
307a7dea167SDimitry Andric namespace llvm {
308a7dea167SDimitry Andric namespace yaml {
309a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig> {
310a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig &Config) {
311a7dea167SDimitry Andric     IO.mapOptional("Propagations", Config.Propagations);
312a7dea167SDimitry Andric     IO.mapOptional("Filters", Config.Filters);
313a7dea167SDimitry Andric     IO.mapOptional("Sinks", Config.Sinks);
314a7dea167SDimitry Andric   }
315a7dea167SDimitry Andric };
316a7dea167SDimitry Andric 
317a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig::Propagation> {
318a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
319a7dea167SDimitry Andric     IO.mapRequired("Name", Propagation.Name);
320*480093f4SDimitry Andric     IO.mapOptional("Scope", Propagation.Scope);
321a7dea167SDimitry Andric     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
322a7dea167SDimitry Andric     IO.mapOptional("DstArgs", Propagation.DstArgs);
323a7dea167SDimitry Andric     IO.mapOptional("VariadicType", Propagation.VarType,
324a7dea167SDimitry Andric                    GenericTaintChecker::VariadicType::None);
325a7dea167SDimitry Andric     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
326a7dea167SDimitry Andric                    GenericTaintChecker::InvalidArgIndex);
327a7dea167SDimitry Andric   }
328a7dea167SDimitry Andric };
329a7dea167SDimitry Andric 
330a7dea167SDimitry Andric template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
331a7dea167SDimitry Andric   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
332a7dea167SDimitry Andric     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
333a7dea167SDimitry Andric     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
334a7dea167SDimitry Andric     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
335a7dea167SDimitry Andric   }
336a7dea167SDimitry Andric };
337a7dea167SDimitry Andric 
338*480093f4SDimitry Andric template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
339*480093f4SDimitry Andric   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
340*480093f4SDimitry Andric     IO.mapRequired("Name", std::get<0>(NSA));
341*480093f4SDimitry Andric     IO.mapOptional("Scope", std::get<1>(NSA));
342*480093f4SDimitry Andric     IO.mapRequired("Args", std::get<2>(NSA));
343a7dea167SDimitry Andric   }
344a7dea167SDimitry Andric };
345a7dea167SDimitry Andric } // namespace yaml
346a7dea167SDimitry Andric } // namespace llvm
347a7dea167SDimitry Andric 
3480b57cec5SDimitry Andric /// A set which is used to pass information from call pre-visit instruction
3490b57cec5SDimitry Andric /// to the call post-visit. The values are unsigned integers, which are either
3500b57cec5SDimitry Andric /// ReturnValueIndex, or indexes of the pointer/reference argument, which
3510b57cec5SDimitry Andric /// points to data, which should be tainted on return.
3520b57cec5SDimitry Andric REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
3530b57cec5SDimitry Andric 
354a7dea167SDimitry Andric GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
355a7dea167SDimitry Andric     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
356a7dea167SDimitry Andric   ArgVector Result;
357a7dea167SDimitry Andric   for (int Arg : Args) {
358a7dea167SDimitry Andric     if (Arg == -1)
359a7dea167SDimitry Andric       Result.push_back(ReturnValueIndex);
360a7dea167SDimitry Andric     else if (Arg < -1) {
361a7dea167SDimitry Andric       Result.push_back(InvalidArgIndex);
362a7dea167SDimitry Andric       Mgr.reportInvalidCheckerOptionValue(
363a7dea167SDimitry Andric           this, Option,
364a7dea167SDimitry Andric           "an argument number for propagation rules greater or equal to -1");
365a7dea167SDimitry Andric     } else
366a7dea167SDimitry Andric       Result.push_back(static_cast<unsigned>(Arg));
367a7dea167SDimitry Andric   }
368a7dea167SDimitry Andric   return Result;
369a7dea167SDimitry Andric }
370a7dea167SDimitry Andric 
371a7dea167SDimitry Andric void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
372a7dea167SDimitry Andric                                              const std::string &Option,
373a7dea167SDimitry Andric                                              TaintConfiguration &&Config) {
374a7dea167SDimitry Andric   for (auto &P : Config.Propagations) {
375*480093f4SDimitry Andric     GenericTaintChecker::CustomPropagations.emplace(
376*480093f4SDimitry Andric         P.Name,
377*480093f4SDimitry Andric         std::make_pair(P.Scope, TaintPropagationRule{
378*480093f4SDimitry Andric                                     std::move(P.SrcArgs),
379*480093f4SDimitry Andric                                     convertToArgVector(Mgr, Option, P.DstArgs),
380*480093f4SDimitry Andric                                     P.VarType, P.VarIndex}));
381a7dea167SDimitry Andric   }
382a7dea167SDimitry Andric 
383a7dea167SDimitry Andric   for (auto &F : Config.Filters) {
384*480093f4SDimitry Andric     GenericTaintChecker::CustomFilters.emplace(
385*480093f4SDimitry Andric         std::get<0>(F),
386*480093f4SDimitry Andric         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
387a7dea167SDimitry Andric   }
388a7dea167SDimitry Andric 
389a7dea167SDimitry Andric   for (auto &S : Config.Sinks) {
390*480093f4SDimitry Andric     GenericTaintChecker::CustomSinks.emplace(
391*480093f4SDimitry Andric         std::get<0>(S),
392*480093f4SDimitry Andric         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
393a7dea167SDimitry Andric   }
394a7dea167SDimitry Andric }
395a7dea167SDimitry Andric 
396*480093f4SDimitry Andric template <typename T>
397*480093f4SDimitry Andric auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
398*480093f4SDimitry Andric                                                const FunctionData &FData) {
399*480093f4SDimitry Andric   auto Range = Map.equal_range(FData.Name);
400*480093f4SDimitry Andric   auto It =
401*480093f4SDimitry Andric       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
402*480093f4SDimitry Andric         const auto &Value = Entry.second;
403*480093f4SDimitry Andric         StringRef Scope = Value.first;
404*480093f4SDimitry Andric         return Scope.empty() || FData.isInScope(Scope);
405*480093f4SDimitry Andric       });
406*480093f4SDimitry Andric   return It != Range.second ? It : Map.end();
407*480093f4SDimitry Andric }
408*480093f4SDimitry Andric 
4090b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule
4100b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
411*480093f4SDimitry Andric     const NameRuleMap &CustomPropagations, const FunctionData &FData,
412*480093f4SDimitry Andric     CheckerContext &C) {
4130b57cec5SDimitry Andric   // TODO: Currently, we might lose precision here: we always mark a return
4140b57cec5SDimitry Andric   // value as tainted even if it's just a pointer, pointing to tainted data.
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   // Check for exact name match for functions without builtin substitutes.
417*480093f4SDimitry Andric   // Use qualified name, because these are C functions without namespace.
4180b57cec5SDimitry Andric   TaintPropagationRule Rule =
419*480093f4SDimitry Andric       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
4200b57cec5SDimitry Andric           // Source functions
4210b57cec5SDimitry Andric           // TODO: Add support for vfscanf & family.
4220b57cec5SDimitry Andric           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
4230b57cec5SDimitry Andric           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
4240b57cec5SDimitry Andric           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
4250b57cec5SDimitry Andric           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
4260b57cec5SDimitry Andric           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
427a7dea167SDimitry Andric           .Case("getchar_unlocked",
428a7dea167SDimitry Andric                 TaintPropagationRule({}, {ReturnValueIndex}))
4290b57cec5SDimitry Andric           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
4300b57cec5SDimitry Andric           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
4310b57cec5SDimitry Andric           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
4320b57cec5SDimitry Andric           .Case("socket",
4330b57cec5SDimitry Andric                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
4340b57cec5SDimitry Andric                                      InvalidArgIndex,
4350b57cec5SDimitry Andric                                      &TaintPropagationRule::postSocket))
4360b57cec5SDimitry Andric           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
4370b57cec5SDimitry Andric           // Propagating functions
4380b57cec5SDimitry Andric           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
4390b57cec5SDimitry Andric           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
4400b57cec5SDimitry Andric           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
4410b57cec5SDimitry Andric           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
4420b57cec5SDimitry Andric           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
4430b57cec5SDimitry Andric           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
4440b57cec5SDimitry Andric           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
4450b57cec5SDimitry Andric           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
4460b57cec5SDimitry Andric           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
4470b57cec5SDimitry Andric           .Case("getdelim", TaintPropagationRule({3}, {0}))
4480b57cec5SDimitry Andric           .Case("getline", TaintPropagationRule({2}, {0}))
4490b57cec5SDimitry Andric           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
4500b57cec5SDimitry Andric           .Case("pread",
4510b57cec5SDimitry Andric                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
4520b57cec5SDimitry Andric           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
4530b57cec5SDimitry Andric           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
4540b57cec5SDimitry Andric           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
4550b57cec5SDimitry Andric           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
4560b57cec5SDimitry Andric           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
4570b57cec5SDimitry Andric           .Default(TaintPropagationRule());
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric   if (!Rule.isNull())
4600b57cec5SDimitry Andric     return Rule;
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric   // Check if it's one of the memory setting/copying functions.
4630b57cec5SDimitry Andric   // This check is specialized but faster then calling isCLibraryFunction.
464*480093f4SDimitry Andric   const FunctionDecl *FDecl = FData.FDecl;
4650b57cec5SDimitry Andric   unsigned BId = 0;
4660b57cec5SDimitry Andric   if ((BId = FDecl->getMemoryFunctionKind()))
4670b57cec5SDimitry Andric     switch (BId) {
4680b57cec5SDimitry Andric     case Builtin::BImemcpy:
4690b57cec5SDimitry Andric     case Builtin::BImemmove:
4700b57cec5SDimitry Andric     case Builtin::BIstrncpy:
4710b57cec5SDimitry Andric     case Builtin::BIstrncat:
4720b57cec5SDimitry Andric       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
4730b57cec5SDimitry Andric     case Builtin::BIstrlcpy:
4740b57cec5SDimitry Andric     case Builtin::BIstrlcat:
4750b57cec5SDimitry Andric       return TaintPropagationRule({1, 2}, {0});
4760b57cec5SDimitry Andric     case Builtin::BIstrndup:
4770b57cec5SDimitry Andric       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric     default:
4800b57cec5SDimitry Andric       break;
4810b57cec5SDimitry Andric     };
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric   // Process all other functions which could be defined as builtins.
4840b57cec5SDimitry Andric   if (Rule.isNull()) {
4850b57cec5SDimitry Andric     if (C.isCLibraryFunction(FDecl, "snprintf"))
4860b57cec5SDimitry Andric       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
4870b57cec5SDimitry Andric                                   3);
4880b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "sprintf"))
4890b57cec5SDimitry Andric       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
4900b57cec5SDimitry Andric                                   2);
4910b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
4920b57cec5SDimitry Andric              C.isCLibraryFunction(FDecl, "stpcpy") ||
4930b57cec5SDimitry Andric              C.isCLibraryFunction(FDecl, "strcat"))
4940b57cec5SDimitry Andric       return TaintPropagationRule({1}, {0, ReturnValueIndex});
4950b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "bcopy"))
4960b57cec5SDimitry Andric       return TaintPropagationRule({0, 2}, {1});
4970b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "strdup") ||
4980b57cec5SDimitry Andric              C.isCLibraryFunction(FDecl, "strdupa"))
4990b57cec5SDimitry Andric       return TaintPropagationRule({0}, {ReturnValueIndex});
5000b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
5010b57cec5SDimitry Andric       return TaintPropagationRule({0}, {ReturnValueIndex});
5020b57cec5SDimitry Andric   }
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric   // Skipping the following functions, since they might be used for cleansing
5050b57cec5SDimitry Andric   // or smart memory copy:
5060b57cec5SDimitry Andric   // - memccpy - copying until hitting a special character.
5070b57cec5SDimitry Andric 
508*480093f4SDimitry Andric   auto It = findFunctionInConfig(CustomPropagations, FData);
509*480093f4SDimitry Andric   if (It != CustomPropagations.end()) {
510*480093f4SDimitry Andric     const auto &Value = It->second;
511*480093f4SDimitry Andric     return Value.second;
512*480093f4SDimitry Andric   }
513a7dea167SDimitry Andric 
5140b57cec5SDimitry Andric   return TaintPropagationRule();
5150b57cec5SDimitry Andric }
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
5180b57cec5SDimitry Andric                                        CheckerContext &C) const {
519*480093f4SDimitry Andric   Optional<FunctionData> FData = FunctionData::create(CE, C);
520*480093f4SDimitry Andric   if (!FData)
521*480093f4SDimitry Andric     return;
522*480093f4SDimitry Andric 
5230b57cec5SDimitry Andric   // Check for taintedness related errors first: system call, uncontrolled
5240b57cec5SDimitry Andric   // format string, tainted buffer size.
525*480093f4SDimitry Andric   if (checkPre(CE, *FData, C))
5260b57cec5SDimitry Andric     return;
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   // Marks the function's arguments and/or return value tainted if it present in
5290b57cec5SDimitry Andric   // the list.
530*480093f4SDimitry Andric   if (addSourcesPre(CE, *FData, C))
531*480093f4SDimitry Andric     return;
532*480093f4SDimitry Andric 
533*480093f4SDimitry Andric   addFiltersPre(CE, *FData, C);
5340b57cec5SDimitry Andric }
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
5370b57cec5SDimitry Andric                                         CheckerContext &C) const {
5380b57cec5SDimitry Andric   // Set the marked values as tainted. The return value only accessible from
5390b57cec5SDimitry Andric   // checkPostStmt.
5400b57cec5SDimitry Andric   propagateFromPre(CE, C);
5410b57cec5SDimitry Andric }
5420b57cec5SDimitry Andric 
5430b57cec5SDimitry Andric void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
5440b57cec5SDimitry Andric                                      const char *NL, const char *Sep) const {
5450b57cec5SDimitry Andric   printTaint(State, Out, NL, Sep);
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric 
548*480093f4SDimitry Andric bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
549*480093f4SDimitry Andric                                         const FunctionData &FData,
5500b57cec5SDimitry Andric                                         CheckerContext &C) const {
5510b57cec5SDimitry Andric   // First, try generating a propagation rule for this function.
552a7dea167SDimitry Andric   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
553*480093f4SDimitry Andric       this->CustomPropagations, FData, C);
5540b57cec5SDimitry Andric   if (!Rule.isNull()) {
555*480093f4SDimitry Andric     ProgramStateRef State = Rule.process(CE, C);
556*480093f4SDimitry Andric     if (State) {
5570b57cec5SDimitry Andric       C.addTransition(State);
558*480093f4SDimitry Andric       return true;
559*480093f4SDimitry Andric     }
560*480093f4SDimitry Andric   }
561*480093f4SDimitry Andric   return false;
5620b57cec5SDimitry Andric }
5630b57cec5SDimitry Andric 
564*480093f4SDimitry Andric bool GenericTaintChecker::addFiltersPre(const CallExpr *CE,
565*480093f4SDimitry Andric                                         const FunctionData &FData,
566*480093f4SDimitry Andric                                         CheckerContext &C) const {
567*480093f4SDimitry Andric   auto It = findFunctionInConfig(CustomFilters, FData);
568*480093f4SDimitry Andric   if (It == CustomFilters.end())
569*480093f4SDimitry Andric     return false;
570*480093f4SDimitry Andric 
571*480093f4SDimitry Andric   ProgramStateRef State = C.getState();
572*480093f4SDimitry Andric   const auto &Value = It->second;
573*480093f4SDimitry Andric   const ArgVector &Args = Value.second;
574*480093f4SDimitry Andric   for (unsigned ArgNum : Args) {
575*480093f4SDimitry Andric     if (ArgNum >= CE->getNumArgs())
576*480093f4SDimitry Andric       continue;
577*480093f4SDimitry Andric 
578*480093f4SDimitry Andric     const Expr *Arg = CE->getArg(ArgNum);
579*480093f4SDimitry Andric     Optional<SVal> V = getPointedToSVal(C, Arg);
580*480093f4SDimitry Andric     if (V)
581*480093f4SDimitry Andric       State = removeTaint(State, *V);
582*480093f4SDimitry Andric   }
583*480093f4SDimitry Andric 
584*480093f4SDimitry Andric   if (State != C.getState()) {
5850b57cec5SDimitry Andric     C.addTransition(State);
586*480093f4SDimitry Andric     return true;
587*480093f4SDimitry Andric   }
588*480093f4SDimitry Andric   return false;
5890b57cec5SDimitry Andric }
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
5920b57cec5SDimitry Andric                                            CheckerContext &C) const {
5930b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
5940b57cec5SDimitry Andric 
5950b57cec5SDimitry Andric   // Depending on what was tainted at pre-visit, we determined a set of
5960b57cec5SDimitry Andric   // arguments which should be tainted after the function returns. These are
5970b57cec5SDimitry Andric   // stored in the state as TaintArgsOnPostVisit set.
5980b57cec5SDimitry Andric   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
5990b57cec5SDimitry Andric   if (TaintArgs.isEmpty())
6000b57cec5SDimitry Andric     return false;
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   for (unsigned ArgNum : TaintArgs) {
6030b57cec5SDimitry Andric     // Special handling for the tainted return value.
6040b57cec5SDimitry Andric     if (ArgNum == ReturnValueIndex) {
6050b57cec5SDimitry Andric       State = addTaint(State, CE, C.getLocationContext());
6060b57cec5SDimitry Andric       continue;
6070b57cec5SDimitry Andric     }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric     // The arguments are pointer arguments. The data they are pointing at is
6100b57cec5SDimitry Andric     // tainted after the call.
6110b57cec5SDimitry Andric     if (CE->getNumArgs() < (ArgNum + 1))
6120b57cec5SDimitry Andric       return false;
6130b57cec5SDimitry Andric     const Expr *Arg = CE->getArg(ArgNum);
6140b57cec5SDimitry Andric     Optional<SVal> V = getPointedToSVal(C, Arg);
6150b57cec5SDimitry Andric     if (V)
6160b57cec5SDimitry Andric       State = addTaint(State, *V);
6170b57cec5SDimitry Andric   }
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric   // Clear up the taint info from the state.
6200b57cec5SDimitry Andric   State = State->remove<TaintArgsOnPostVisit>();
6210b57cec5SDimitry Andric 
6220b57cec5SDimitry Andric   if (State != C.getState()) {
6230b57cec5SDimitry Andric     C.addTransition(State);
6240b57cec5SDimitry Andric     return true;
6250b57cec5SDimitry Andric   }
6260b57cec5SDimitry Andric   return false;
6270b57cec5SDimitry Andric }
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric bool GenericTaintChecker::checkPre(const CallExpr *CE,
630*480093f4SDimitry Andric                                    const FunctionData &FData,
6310b57cec5SDimitry Andric                                    CheckerContext &C) const {
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric   if (checkUncontrolledFormatString(CE, C))
6340b57cec5SDimitry Andric     return true;
6350b57cec5SDimitry Andric 
636*480093f4SDimitry Andric   if (checkSystemCall(CE, FData.Name, C))
6370b57cec5SDimitry Andric     return true;
6380b57cec5SDimitry Andric 
639*480093f4SDimitry Andric   if (checkTaintedBufferSize(CE, FData.FDecl, C))
6400b57cec5SDimitry Andric     return true;
6410b57cec5SDimitry Andric 
642*480093f4SDimitry Andric   if (checkCustomSinks(CE, FData, C))
643a7dea167SDimitry Andric     return true;
644a7dea167SDimitry Andric 
6450b57cec5SDimitry Andric   return false;
6460b57cec5SDimitry Andric }
6470b57cec5SDimitry Andric 
6480b57cec5SDimitry Andric Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
6490b57cec5SDimitry Andric                                                      const Expr *Arg) {
6500b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
6510b57cec5SDimitry Andric   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
6520b57cec5SDimitry Andric   if (AddrVal.isUnknownOrUndef())
6530b57cec5SDimitry Andric     return None;
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
6560b57cec5SDimitry Andric   if (!AddrLoc)
6570b57cec5SDimitry Andric     return None;
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric   QualType ArgTy = Arg->getType().getCanonicalType();
6600b57cec5SDimitry Andric   if (!ArgTy->isPointerType())
661*480093f4SDimitry Andric     return State->getSVal(*AddrLoc);
6620b57cec5SDimitry Andric 
6630b57cec5SDimitry Andric   QualType ValTy = ArgTy->getPointeeType();
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric   // Do not dereference void pointers. Treat them as byte pointers instead.
6660b57cec5SDimitry Andric   // FIXME: we might want to consider more than just the first byte.
6670b57cec5SDimitry Andric   if (ValTy->isVoidType())
6680b57cec5SDimitry Andric     ValTy = C.getASTContext().CharTy;
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric   return State->getSVal(*AddrLoc, ValTy);
6710b57cec5SDimitry Andric }
6720b57cec5SDimitry Andric 
6730b57cec5SDimitry Andric ProgramStateRef
6740b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
6750b57cec5SDimitry Andric                                                    CheckerContext &C) const {
6760b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric   // Check for taint in arguments.
6790b57cec5SDimitry Andric   bool IsTainted = true;
6800b57cec5SDimitry Andric   for (unsigned ArgNum : SrcArgs) {
6810b57cec5SDimitry Andric     if (ArgNum >= CE->getNumArgs())
682a7dea167SDimitry Andric       continue;
683a7dea167SDimitry Andric 
6840b57cec5SDimitry Andric     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
6850b57cec5SDimitry Andric       break;
6860b57cec5SDimitry Andric   }
6870b57cec5SDimitry Andric 
6880b57cec5SDimitry Andric   // Check for taint in variadic arguments.
6890b57cec5SDimitry Andric   if (!IsTainted && VariadicType::Src == VarType) {
6900b57cec5SDimitry Andric     // Check if any of the arguments is tainted
691a7dea167SDimitry Andric     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
6920b57cec5SDimitry Andric       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
6930b57cec5SDimitry Andric         break;
6940b57cec5SDimitry Andric     }
6950b57cec5SDimitry Andric   }
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   if (PropagationFunc)
6980b57cec5SDimitry Andric     IsTainted = PropagationFunc(IsTainted, CE, C);
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric   if (!IsTainted)
7010b57cec5SDimitry Andric     return State;
7020b57cec5SDimitry Andric 
7030b57cec5SDimitry Andric   // Mark the arguments which should be tainted after the function returns.
7040b57cec5SDimitry Andric   for (unsigned ArgNum : DstArgs) {
7050b57cec5SDimitry Andric     // Should mark the return value?
7060b57cec5SDimitry Andric     if (ArgNum == ReturnValueIndex) {
7070b57cec5SDimitry Andric       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
7080b57cec5SDimitry Andric       continue;
7090b57cec5SDimitry Andric     }
7100b57cec5SDimitry Andric 
711a7dea167SDimitry Andric     if (ArgNum >= CE->getNumArgs())
712a7dea167SDimitry Andric       continue;
713a7dea167SDimitry Andric 
7140b57cec5SDimitry Andric     // Mark the given argument.
7150b57cec5SDimitry Andric     State = State->add<TaintArgsOnPostVisit>(ArgNum);
7160b57cec5SDimitry Andric   }
7170b57cec5SDimitry Andric 
7180b57cec5SDimitry Andric   // Mark all variadic arguments tainted if present.
7190b57cec5SDimitry Andric   if (VariadicType::Dst == VarType) {
7200b57cec5SDimitry Andric     // For all pointer and references that were passed in:
7210b57cec5SDimitry Andric     //   If they are not pointing to const data, mark data as tainted.
7220b57cec5SDimitry Andric     //   TODO: So far we are just going one level down; ideally we'd need to
7230b57cec5SDimitry Andric     //         recurse here.
724a7dea167SDimitry Andric     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
7250b57cec5SDimitry Andric       const Expr *Arg = CE->getArg(i);
7260b57cec5SDimitry Andric       // Process pointer argument.
7270b57cec5SDimitry Andric       const Type *ArgTy = Arg->getType().getTypePtr();
7280b57cec5SDimitry Andric       QualType PType = ArgTy->getPointeeType();
7290b57cec5SDimitry Andric       if ((!PType.isNull() && !PType.isConstQualified()) ||
7300b57cec5SDimitry Andric           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
7310b57cec5SDimitry Andric         State = State->add<TaintArgsOnPostVisit>(i);
7320b57cec5SDimitry Andric     }
7330b57cec5SDimitry Andric   }
7340b57cec5SDimitry Andric 
7350b57cec5SDimitry Andric   return State;
7360b57cec5SDimitry Andric }
7370b57cec5SDimitry Andric 
7380b57cec5SDimitry Andric // If argument 0(protocol domain) is network, the return value should get taint.
7390b57cec5SDimitry Andric bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
7400b57cec5SDimitry Andric                                                            const CallExpr *CE,
7410b57cec5SDimitry Andric                                                            CheckerContext &C) {
7420b57cec5SDimitry Andric   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
7430b57cec5SDimitry Andric   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
7440b57cec5SDimitry Andric   // White list the internal communication protocols.
7450b57cec5SDimitry Andric   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
7460b57cec5SDimitry Andric       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
7470b57cec5SDimitry Andric     return false;
7480b57cec5SDimitry Andric 
7490b57cec5SDimitry Andric   return true;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
7530b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
7540b57cec5SDimitry Andric   SVal Val = C.getSVal(E);
7550b57cec5SDimitry Andric 
7560b57cec5SDimitry Andric   // stdin is a pointer, so it would be a region.
7570b57cec5SDimitry Andric   const MemRegion *MemReg = Val.getAsRegion();
7580b57cec5SDimitry Andric 
7590b57cec5SDimitry Andric   // The region should be symbolic, we do not know it's value.
7600b57cec5SDimitry Andric   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
7610b57cec5SDimitry Andric   if (!SymReg)
7620b57cec5SDimitry Andric     return false;
7630b57cec5SDimitry Andric 
7640b57cec5SDimitry Andric   // Get it's symbol and find the declaration region it's pointing to.
7650b57cec5SDimitry Andric   const SymbolRegionValue *Sm =
7660b57cec5SDimitry Andric       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
7670b57cec5SDimitry Andric   if (!Sm)
7680b57cec5SDimitry Andric     return false;
7690b57cec5SDimitry Andric   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
7700b57cec5SDimitry Andric   if (!DeclReg)
7710b57cec5SDimitry Andric     return false;
7720b57cec5SDimitry Andric 
7730b57cec5SDimitry Andric   // This region corresponds to a declaration, find out if it's a global/extern
7740b57cec5SDimitry Andric   // variable named stdin with the proper type.
7750b57cec5SDimitry Andric   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
7760b57cec5SDimitry Andric     D = D->getCanonicalDecl();
7770b57cec5SDimitry Andric     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
7780b57cec5SDimitry Andric       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
7790b57cec5SDimitry Andric       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
7800b57cec5SDimitry Andric                        C.getASTContext().getFILEType().getCanonicalType())
7810b57cec5SDimitry Andric         return true;
7820b57cec5SDimitry Andric     }
7830b57cec5SDimitry Andric   }
7840b57cec5SDimitry Andric   return false;
7850b57cec5SDimitry Andric }
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric static bool getPrintfFormatArgumentNum(const CallExpr *CE,
7880b57cec5SDimitry Andric                                        const CheckerContext &C,
789a7dea167SDimitry Andric                                        unsigned &ArgNum) {
7900b57cec5SDimitry Andric   // Find if the function contains a format string argument.
7910b57cec5SDimitry Andric   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
7920b57cec5SDimitry Andric   // vsnprintf, syslog, custom annotated functions.
7930b57cec5SDimitry Andric   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
7940b57cec5SDimitry Andric   if (!FDecl)
7950b57cec5SDimitry Andric     return false;
7960b57cec5SDimitry Andric   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
7970b57cec5SDimitry Andric     ArgNum = Format->getFormatIdx() - 1;
7980b57cec5SDimitry Andric     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
7990b57cec5SDimitry Andric       return true;
8000b57cec5SDimitry Andric   }
8010b57cec5SDimitry Andric 
8020b57cec5SDimitry Andric   // Or if a function is named setproctitle (this is a heuristic).
8030b57cec5SDimitry Andric   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
8040b57cec5SDimitry Andric     ArgNum = 0;
8050b57cec5SDimitry Andric     return true;
8060b57cec5SDimitry Andric   }
8070b57cec5SDimitry Andric 
8080b57cec5SDimitry Andric   return false;
8090b57cec5SDimitry Andric }
8100b57cec5SDimitry Andric 
811a7dea167SDimitry Andric bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
8120b57cec5SDimitry Andric                                                   CheckerContext &C) const {
8130b57cec5SDimitry Andric   assert(E);
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric   // Check for taint.
8160b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
8170b57cec5SDimitry Andric   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
8180b57cec5SDimitry Andric   SVal TaintedSVal;
8190b57cec5SDimitry Andric   if (PointedToSVal && isTainted(State, *PointedToSVal))
8200b57cec5SDimitry Andric     TaintedSVal = *PointedToSVal;
8210b57cec5SDimitry Andric   else if (isTainted(State, E, C.getLocationContext()))
8220b57cec5SDimitry Andric     TaintedSVal = C.getSVal(E);
8230b57cec5SDimitry Andric   else
8240b57cec5SDimitry Andric     return false;
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric   // Generate diagnostic.
8270b57cec5SDimitry Andric   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
8280b57cec5SDimitry Andric     initBugType();
829a7dea167SDimitry Andric     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
8300b57cec5SDimitry Andric     report->addRange(E->getSourceRange());
831a7dea167SDimitry Andric     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
8320b57cec5SDimitry Andric     C.emitReport(std::move(report));
8330b57cec5SDimitry Andric     return true;
8340b57cec5SDimitry Andric   }
8350b57cec5SDimitry Andric   return false;
8360b57cec5SDimitry Andric }
8370b57cec5SDimitry Andric 
8380b57cec5SDimitry Andric bool GenericTaintChecker::checkUncontrolledFormatString(
8390b57cec5SDimitry Andric     const CallExpr *CE, CheckerContext &C) const {
8400b57cec5SDimitry Andric   // Check if the function contains a format string argument.
841a7dea167SDimitry Andric   unsigned ArgNum = 0;
8420b57cec5SDimitry Andric   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
8430b57cec5SDimitry Andric     return false;
8440b57cec5SDimitry Andric 
8450b57cec5SDimitry Andric   // If either the format string content or the pointer itself are tainted,
8460b57cec5SDimitry Andric   // warn.
8470b57cec5SDimitry Andric   return generateReportIfTainted(CE->getArg(ArgNum),
8480b57cec5SDimitry Andric                                  MsgUncontrolledFormatString, C);
8490b57cec5SDimitry Andric }
8500b57cec5SDimitry Andric 
8510b57cec5SDimitry Andric bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
8520b57cec5SDimitry Andric                                           CheckerContext &C) const {
8530b57cec5SDimitry Andric   // TODO: It might make sense to run this check on demand. In some cases,
8540b57cec5SDimitry Andric   // we should check if the environment has been cleansed here. We also might
8550b57cec5SDimitry Andric   // need to know if the user was reset before these calls(seteuid).
8560b57cec5SDimitry Andric   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
8570b57cec5SDimitry Andric                         .Case("system", 0)
8580b57cec5SDimitry Andric                         .Case("popen", 0)
8590b57cec5SDimitry Andric                         .Case("execl", 0)
8600b57cec5SDimitry Andric                         .Case("execle", 0)
8610b57cec5SDimitry Andric                         .Case("execlp", 0)
8620b57cec5SDimitry Andric                         .Case("execv", 0)
8630b57cec5SDimitry Andric                         .Case("execvp", 0)
8640b57cec5SDimitry Andric                         .Case("execvP", 0)
8650b57cec5SDimitry Andric                         .Case("execve", 0)
8660b57cec5SDimitry Andric                         .Case("dlopen", 0)
867a7dea167SDimitry Andric                         .Default(InvalidArgIndex);
8680b57cec5SDimitry Andric 
869a7dea167SDimitry Andric   if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
8700b57cec5SDimitry Andric     return false;
8710b57cec5SDimitry Andric 
8720b57cec5SDimitry Andric   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
8730b57cec5SDimitry Andric }
8740b57cec5SDimitry Andric 
8750b57cec5SDimitry Andric // TODO: Should this check be a part of the CString checker?
8760b57cec5SDimitry Andric // If yes, should taint be a global setting?
8770b57cec5SDimitry Andric bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
8780b57cec5SDimitry Andric                                                  const FunctionDecl *FDecl,
8790b57cec5SDimitry Andric                                                  CheckerContext &C) const {
8800b57cec5SDimitry Andric   // If the function has a buffer size argument, set ArgNum.
8810b57cec5SDimitry Andric   unsigned ArgNum = InvalidArgIndex;
8820b57cec5SDimitry Andric   unsigned BId = 0;
8830b57cec5SDimitry Andric   if ((BId = FDecl->getMemoryFunctionKind()))
8840b57cec5SDimitry Andric     switch (BId) {
8850b57cec5SDimitry Andric     case Builtin::BImemcpy:
8860b57cec5SDimitry Andric     case Builtin::BImemmove:
8870b57cec5SDimitry Andric     case Builtin::BIstrncpy:
8880b57cec5SDimitry Andric       ArgNum = 2;
8890b57cec5SDimitry Andric       break;
8900b57cec5SDimitry Andric     case Builtin::BIstrndup:
8910b57cec5SDimitry Andric       ArgNum = 1;
8920b57cec5SDimitry Andric       break;
8930b57cec5SDimitry Andric     default:
8940b57cec5SDimitry Andric       break;
8950b57cec5SDimitry Andric     };
8960b57cec5SDimitry Andric 
8970b57cec5SDimitry Andric   if (ArgNum == InvalidArgIndex) {
8980b57cec5SDimitry Andric     if (C.isCLibraryFunction(FDecl, "malloc") ||
8990b57cec5SDimitry Andric         C.isCLibraryFunction(FDecl, "calloc") ||
9000b57cec5SDimitry Andric         C.isCLibraryFunction(FDecl, "alloca"))
9010b57cec5SDimitry Andric       ArgNum = 0;
9020b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "memccpy"))
9030b57cec5SDimitry Andric       ArgNum = 3;
9040b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "realloc"))
9050b57cec5SDimitry Andric       ArgNum = 1;
9060b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "bcopy"))
9070b57cec5SDimitry Andric       ArgNum = 2;
9080b57cec5SDimitry Andric   }
9090b57cec5SDimitry Andric 
9100b57cec5SDimitry Andric   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
9110b57cec5SDimitry Andric          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
9120b57cec5SDimitry Andric }
9130b57cec5SDimitry Andric 
914*480093f4SDimitry Andric bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
915*480093f4SDimitry Andric                                            const FunctionData &FData,
916a7dea167SDimitry Andric                                            CheckerContext &C) const {
917*480093f4SDimitry Andric   auto It = findFunctionInConfig(CustomSinks, FData);
918a7dea167SDimitry Andric   if (It == CustomSinks.end())
919a7dea167SDimitry Andric     return false;
920a7dea167SDimitry Andric 
921*480093f4SDimitry Andric   const auto &Value = It->second;
922*480093f4SDimitry Andric   const GenericTaintChecker::ArgVector &Args = Value.second;
923a7dea167SDimitry Andric   for (unsigned ArgNum : Args) {
924a7dea167SDimitry Andric     if (ArgNum >= CE->getNumArgs())
925a7dea167SDimitry Andric       continue;
926a7dea167SDimitry Andric 
927a7dea167SDimitry Andric     if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
928a7dea167SDimitry Andric       return true;
929a7dea167SDimitry Andric   }
930a7dea167SDimitry Andric 
931a7dea167SDimitry Andric   return false;
932a7dea167SDimitry Andric }
933a7dea167SDimitry Andric 
934a7dea167SDimitry Andric void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
935a7dea167SDimitry Andric   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
936a7dea167SDimitry Andric   std::string Option{"Config"};
937a7dea167SDimitry Andric   StringRef ConfigFile =
938a7dea167SDimitry Andric       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
939a7dea167SDimitry Andric   llvm::Optional<TaintConfig> Config =
940a7dea167SDimitry Andric       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
941a7dea167SDimitry Andric   if (Config)
942a7dea167SDimitry Andric     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
9430b57cec5SDimitry Andric }
9440b57cec5SDimitry Andric 
9450b57cec5SDimitry Andric bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
9460b57cec5SDimitry Andric   return true;
9470b57cec5SDimitry Andric }
948