xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
10b57cec5SDimitry Andric //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This checker defines the attack surface for generic taint propagation.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // The taint information produced by it might be useful to other checkers. For
120b57cec5SDimitry Andric // example, checkers should report errors which involve tainted data more
130b57cec5SDimitry Andric // aggressively, even if the involved symbols are under constrained.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "Taint.h"
18*a7dea167SDimitry Andric #include "Yaml.h"
190b57cec5SDimitry Andric #include "clang/AST/Attr.h"
200b57cec5SDimitry Andric #include "clang/Basic/Builtins.h"
21*a7dea167SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
230b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
250b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
260b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27*a7dea167SDimitry Andric #include "llvm/ADT/StringMap.h"
28*a7dea167SDimitry Andric #include "llvm/Support/YAMLTraits.h"
29*a7dea167SDimitry Andric #include <limits>
300b57cec5SDimitry Andric #include <utility>
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric using namespace clang;
330b57cec5SDimitry Andric using namespace ento;
340b57cec5SDimitry Andric using namespace taint;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric namespace {
370b57cec5SDimitry Andric class GenericTaintChecker
380b57cec5SDimitry Andric     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
390b57cec5SDimitry Andric public:
400b57cec5SDimitry Andric   static void *getTag() {
410b57cec5SDimitry Andric     static int Tag;
420b57cec5SDimitry Andric     return &Tag;
430b57cec5SDimitry Andric   }
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
480b57cec5SDimitry Andric 
49*a7dea167SDimitry Andric   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
50*a7dea167SDimitry Andric                   const char *Sep) const override;
51*a7dea167SDimitry Andric 
52*a7dea167SDimitry Andric   using ArgVector = SmallVector<unsigned, 2>;
53*a7dea167SDimitry Andric   using SignedArgVector = SmallVector<int, 2>;
54*a7dea167SDimitry Andric 
55*a7dea167SDimitry Andric   enum class VariadicType { None, Src, Dst };
56*a7dea167SDimitry Andric 
57*a7dea167SDimitry Andric   /// Used to parse the configuration file.
58*a7dea167SDimitry Andric   struct TaintConfiguration {
59*a7dea167SDimitry Andric     using NameArgsPair = std::pair<std::string, ArgVector>;
60*a7dea167SDimitry Andric 
61*a7dea167SDimitry Andric     struct Propagation {
62*a7dea167SDimitry Andric       std::string Name;
63*a7dea167SDimitry Andric       ArgVector SrcArgs;
64*a7dea167SDimitry Andric       SignedArgVector DstArgs;
65*a7dea167SDimitry Andric       VariadicType VarType;
66*a7dea167SDimitry Andric       unsigned VarIndex;
67*a7dea167SDimitry Andric     };
68*a7dea167SDimitry Andric 
69*a7dea167SDimitry Andric     std::vector<Propagation> Propagations;
70*a7dea167SDimitry Andric     std::vector<NameArgsPair> Filters;
71*a7dea167SDimitry Andric     std::vector<NameArgsPair> Sinks;
72*a7dea167SDimitry Andric 
73*a7dea167SDimitry Andric     TaintConfiguration() = default;
74*a7dea167SDimitry Andric     TaintConfiguration(const TaintConfiguration &) = default;
75*a7dea167SDimitry Andric     TaintConfiguration(TaintConfiguration &&) = default;
76*a7dea167SDimitry Andric     TaintConfiguration &operator=(const TaintConfiguration &) = default;
77*a7dea167SDimitry Andric     TaintConfiguration &operator=(TaintConfiguration &&) = default;
78*a7dea167SDimitry Andric   };
79*a7dea167SDimitry Andric 
80*a7dea167SDimitry Andric   /// Convert SignedArgVector to ArgVector.
81*a7dea167SDimitry Andric   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
82*a7dea167SDimitry Andric                                SignedArgVector Args);
83*a7dea167SDimitry Andric 
84*a7dea167SDimitry Andric   /// Parse the config.
85*a7dea167SDimitry Andric   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
86*a7dea167SDimitry Andric                           TaintConfiguration &&Config);
87*a7dea167SDimitry Andric 
88*a7dea167SDimitry Andric   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
89*a7dea167SDimitry Andric   /// Denotes the return vale.
90*a7dea167SDimitry Andric   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
91*a7dea167SDimitry Andric                                          1};
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric private:
940b57cec5SDimitry Andric   mutable std::unique_ptr<BugType> BT;
950b57cec5SDimitry Andric   void initBugType() const {
960b57cec5SDimitry Andric     if (!BT)
970b57cec5SDimitry Andric       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
980b57cec5SDimitry Andric   }
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric   /// Catch taint related bugs. Check if tainted data is passed to a
1010b57cec5SDimitry Andric   /// system call etc.
1020b57cec5SDimitry Andric   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   /// Add taint sources on a pre-visit.
1050b57cec5SDimitry Andric   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric   /// Propagate taint generated at pre-visit.
1080b57cec5SDimitry Andric   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   /// Check if the region the expression evaluates to is the standard input,
1110b57cec5SDimitry Andric   /// and thus, is tainted.
1120b57cec5SDimitry Andric   static bool isStdin(const Expr *E, CheckerContext &C);
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric   /// Given a pointer argument, return the value it points to.
1150b57cec5SDimitry Andric   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric   /// Check for CWE-134: Uncontrolled Format String.
118*a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
119*a7dea167SDimitry Andric       "Untrusted data is used as a format string "
120*a7dea167SDimitry Andric       "(CWE-134: Uncontrolled Format String)";
1210b57cec5SDimitry Andric   bool checkUncontrolledFormatString(const CallExpr *CE,
1220b57cec5SDimitry Andric                                      CheckerContext &C) const;
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric   /// Check for:
1250b57cec5SDimitry Andric   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
1260b57cec5SDimitry Andric   /// CWE-78, "Failure to Sanitize Data into an OS Command"
127*a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
128*a7dea167SDimitry Andric       "Untrusted data is passed to a system call "
129*a7dea167SDimitry Andric       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
1300b57cec5SDimitry Andric   bool checkSystemCall(const CallExpr *CE, StringRef Name,
1310b57cec5SDimitry Andric                        CheckerContext &C) const;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric   /// Check if tainted data is used as a buffer size ins strn.. functions,
1340b57cec5SDimitry Andric   /// and allocators.
135*a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
136*a7dea167SDimitry Andric       "Untrusted data is used to specify the buffer size "
137*a7dea167SDimitry Andric       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
138*a7dea167SDimitry Andric       "for character data and the null terminator)";
1390b57cec5SDimitry Andric   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
1400b57cec5SDimitry Andric                               CheckerContext &C) const;
1410b57cec5SDimitry Andric 
142*a7dea167SDimitry Andric   /// Check if tainted data is used as a custom sink's parameter.
143*a7dea167SDimitry Andric   static constexpr llvm::StringLiteral MsgCustomSink =
144*a7dea167SDimitry Andric       "Untrusted data is passed to a user-defined sink";
145*a7dea167SDimitry Andric   bool checkCustomSinks(const CallExpr *CE, StringRef Name,
1460b57cec5SDimitry Andric                         CheckerContext &C) const;
1470b57cec5SDimitry Andric 
148*a7dea167SDimitry Andric   /// Generate a report if the expression is tainted or points to tainted data.
149*a7dea167SDimitry Andric   bool generateReportIfTainted(const Expr *E, StringRef Msg,
150*a7dea167SDimitry Andric                                CheckerContext &C) const;
151*a7dea167SDimitry Andric 
152*a7dea167SDimitry Andric   struct TaintPropagationRule;
153*a7dea167SDimitry Andric   using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
154*a7dea167SDimitry Andric   using NameArgMap = llvm::StringMap<ArgVector>;
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric   /// A struct used to specify taint propagation rules for a function.
1570b57cec5SDimitry Andric   ///
1580b57cec5SDimitry Andric   /// If any of the possible taint source arguments is tainted, all of the
1590b57cec5SDimitry Andric   /// destination arguments should also be tainted. Use InvalidArgIndex in the
1600b57cec5SDimitry Andric   /// src list to specify that all of the arguments can introduce taint. Use
1610b57cec5SDimitry Andric   /// InvalidArgIndex in the dst arguments to signify that all the non-const
1620b57cec5SDimitry Andric   /// pointer and reference arguments might be tainted on return. If
1630b57cec5SDimitry Andric   /// ReturnValueIndex is added to the dst list, the return value will be
1640b57cec5SDimitry Andric   /// tainted.
1650b57cec5SDimitry Andric   struct TaintPropagationRule {
1660b57cec5SDimitry Andric     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
1670b57cec5SDimitry Andric                                          CheckerContext &C);
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric     /// List of arguments which can be taint sources and should be checked.
1700b57cec5SDimitry Andric     ArgVector SrcArgs;
1710b57cec5SDimitry Andric     /// List of arguments which should be tainted on function return.
1720b57cec5SDimitry Andric     ArgVector DstArgs;
1730b57cec5SDimitry Andric     /// Index for the first variadic parameter if exist.
1740b57cec5SDimitry Andric     unsigned VariadicIndex;
1750b57cec5SDimitry Andric     /// Show when a function has variadic parameters. If it has, it marks all
1760b57cec5SDimitry Andric     /// of them as source or destination.
1770b57cec5SDimitry Andric     VariadicType VarType;
1780b57cec5SDimitry Andric     /// Special function for tainted source determination. If defined, it can
1790b57cec5SDimitry Andric     /// override the default behavior.
1800b57cec5SDimitry Andric     PropagationFuncType PropagationFunc;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric     TaintPropagationRule()
1830b57cec5SDimitry Andric         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
1840b57cec5SDimitry Andric           PropagationFunc(nullptr) {}
1850b57cec5SDimitry Andric 
186*a7dea167SDimitry Andric     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
1870b57cec5SDimitry Andric                          VariadicType Var = VariadicType::None,
1880b57cec5SDimitry Andric                          unsigned VarIndex = InvalidArgIndex,
1890b57cec5SDimitry Andric                          PropagationFuncType Func = nullptr)
1900b57cec5SDimitry Andric         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
1910b57cec5SDimitry Andric           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric     /// Get the propagation rule for a given function.
1940b57cec5SDimitry Andric     static TaintPropagationRule
195*a7dea167SDimitry Andric     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
196*a7dea167SDimitry Andric                             const FunctionDecl *FDecl, StringRef Name,
1970b57cec5SDimitry Andric                             CheckerContext &C);
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
2000b57cec5SDimitry Andric     void addDstArg(unsigned A) { DstArgs.push_back(A); }
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric     bool isNull() const {
2030b57cec5SDimitry Andric       return SrcArgs.empty() && DstArgs.empty() &&
2040b57cec5SDimitry Andric              VariadicType::None == VarType;
2050b57cec5SDimitry Andric     }
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric     bool isDestinationArgument(unsigned ArgNum) const {
2080b57cec5SDimitry Andric       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
2090b57cec5SDimitry Andric     }
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
2120b57cec5SDimitry Andric                                            CheckerContext &C) {
2130b57cec5SDimitry Andric       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
2140b57cec5SDimitry Andric         return true;
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric       if (!E->getType().getTypePtr()->isPointerType())
2170b57cec5SDimitry Andric         return false;
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric       Optional<SVal> V = getPointedToSVal(C, E);
2200b57cec5SDimitry Andric       return (V && isTainted(State, *V));
2210b57cec5SDimitry Andric     }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric     /// Pre-process a function which propagates taint according to the
2240b57cec5SDimitry Andric     /// taint rule.
2250b57cec5SDimitry Andric     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric     // Functions for custom taintedness propagation.
2280b57cec5SDimitry Andric     static bool postSocket(bool IsTainted, const CallExpr *CE,
2290b57cec5SDimitry Andric                            CheckerContext &C);
2300b57cec5SDimitry Andric   };
231*a7dea167SDimitry Andric 
232*a7dea167SDimitry Andric   /// Defines a map between the propagation function's name and
233*a7dea167SDimitry Andric   /// TaintPropagationRule.
234*a7dea167SDimitry Andric   NameRuleMap CustomPropagations;
235*a7dea167SDimitry Andric 
236*a7dea167SDimitry Andric   /// Defines a map between the filter function's name and filtering args.
237*a7dea167SDimitry Andric   NameArgMap CustomFilters;
238*a7dea167SDimitry Andric 
239*a7dea167SDimitry Andric   /// Defines a map between the sink function's name and sinking args.
240*a7dea167SDimitry Andric   NameArgMap CustomSinks;
2410b57cec5SDimitry Andric };
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric const unsigned GenericTaintChecker::ReturnValueIndex;
2440b57cec5SDimitry Andric const unsigned GenericTaintChecker::InvalidArgIndex;
2450b57cec5SDimitry Andric 
246*a7dea167SDimitry Andric // FIXME: these lines can be removed in C++17
247*a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
248*a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
249*a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
250*a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
2510b57cec5SDimitry Andric } // end of anonymous namespace
2520b57cec5SDimitry Andric 
253*a7dea167SDimitry Andric using TaintConfig = GenericTaintChecker::TaintConfiguration;
254*a7dea167SDimitry Andric 
255*a7dea167SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
256*a7dea167SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
257*a7dea167SDimitry Andric 
258*a7dea167SDimitry Andric namespace llvm {
259*a7dea167SDimitry Andric namespace yaml {
260*a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig> {
261*a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig &Config) {
262*a7dea167SDimitry Andric     IO.mapOptional("Propagations", Config.Propagations);
263*a7dea167SDimitry Andric     IO.mapOptional("Filters", Config.Filters);
264*a7dea167SDimitry Andric     IO.mapOptional("Sinks", Config.Sinks);
265*a7dea167SDimitry Andric   }
266*a7dea167SDimitry Andric };
267*a7dea167SDimitry Andric 
268*a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig::Propagation> {
269*a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
270*a7dea167SDimitry Andric     IO.mapRequired("Name", Propagation.Name);
271*a7dea167SDimitry Andric     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
272*a7dea167SDimitry Andric     IO.mapOptional("DstArgs", Propagation.DstArgs);
273*a7dea167SDimitry Andric     IO.mapOptional("VariadicType", Propagation.VarType,
274*a7dea167SDimitry Andric                    GenericTaintChecker::VariadicType::None);
275*a7dea167SDimitry Andric     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
276*a7dea167SDimitry Andric                    GenericTaintChecker::InvalidArgIndex);
277*a7dea167SDimitry Andric   }
278*a7dea167SDimitry Andric };
279*a7dea167SDimitry Andric 
280*a7dea167SDimitry Andric template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
281*a7dea167SDimitry Andric   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
282*a7dea167SDimitry Andric     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
283*a7dea167SDimitry Andric     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
284*a7dea167SDimitry Andric     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
285*a7dea167SDimitry Andric   }
286*a7dea167SDimitry Andric };
287*a7dea167SDimitry Andric 
288*a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig::NameArgsPair> {
289*a7dea167SDimitry Andric   static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
290*a7dea167SDimitry Andric     IO.mapRequired("Name", NameArg.first);
291*a7dea167SDimitry Andric     IO.mapRequired("Args", NameArg.second);
292*a7dea167SDimitry Andric   }
293*a7dea167SDimitry Andric };
294*a7dea167SDimitry Andric } // namespace yaml
295*a7dea167SDimitry Andric } // namespace llvm
296*a7dea167SDimitry Andric 
2970b57cec5SDimitry Andric /// A set which is used to pass information from call pre-visit instruction
2980b57cec5SDimitry Andric /// to the call post-visit. The values are unsigned integers, which are either
2990b57cec5SDimitry Andric /// ReturnValueIndex, or indexes of the pointer/reference argument, which
3000b57cec5SDimitry Andric /// points to data, which should be tainted on return.
3010b57cec5SDimitry Andric REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
3020b57cec5SDimitry Andric 
303*a7dea167SDimitry Andric GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
304*a7dea167SDimitry Andric     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
305*a7dea167SDimitry Andric   ArgVector Result;
306*a7dea167SDimitry Andric   for (int Arg : Args) {
307*a7dea167SDimitry Andric     if (Arg == -1)
308*a7dea167SDimitry Andric       Result.push_back(ReturnValueIndex);
309*a7dea167SDimitry Andric     else if (Arg < -1) {
310*a7dea167SDimitry Andric       Result.push_back(InvalidArgIndex);
311*a7dea167SDimitry Andric       Mgr.reportInvalidCheckerOptionValue(
312*a7dea167SDimitry Andric           this, Option,
313*a7dea167SDimitry Andric           "an argument number for propagation rules greater or equal to -1");
314*a7dea167SDimitry Andric     } else
315*a7dea167SDimitry Andric       Result.push_back(static_cast<unsigned>(Arg));
316*a7dea167SDimitry Andric   }
317*a7dea167SDimitry Andric   return Result;
318*a7dea167SDimitry Andric }
319*a7dea167SDimitry Andric 
320*a7dea167SDimitry Andric void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
321*a7dea167SDimitry Andric                                              const std::string &Option,
322*a7dea167SDimitry Andric                                              TaintConfiguration &&Config) {
323*a7dea167SDimitry Andric   for (auto &P : Config.Propagations) {
324*a7dea167SDimitry Andric     GenericTaintChecker::CustomPropagations.try_emplace(
325*a7dea167SDimitry Andric         P.Name, std::move(P.SrcArgs),
326*a7dea167SDimitry Andric         convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
327*a7dea167SDimitry Andric   }
328*a7dea167SDimitry Andric 
329*a7dea167SDimitry Andric   for (auto &F : Config.Filters) {
330*a7dea167SDimitry Andric     GenericTaintChecker::CustomFilters.try_emplace(F.first,
331*a7dea167SDimitry Andric                                                    std::move(F.second));
332*a7dea167SDimitry Andric   }
333*a7dea167SDimitry Andric 
334*a7dea167SDimitry Andric   for (auto &S : Config.Sinks) {
335*a7dea167SDimitry Andric     GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
336*a7dea167SDimitry Andric   }
337*a7dea167SDimitry Andric }
338*a7dea167SDimitry Andric 
3390b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule
3400b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
341*a7dea167SDimitry Andric     const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl,
342*a7dea167SDimitry Andric     StringRef Name, CheckerContext &C) {
3430b57cec5SDimitry Andric   // TODO: Currently, we might lose precision here: we always mark a return
3440b57cec5SDimitry Andric   // value as tainted even if it's just a pointer, pointing to tainted data.
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric   // Check for exact name match for functions without builtin substitutes.
3470b57cec5SDimitry Andric   TaintPropagationRule Rule =
3480b57cec5SDimitry Andric       llvm::StringSwitch<TaintPropagationRule>(Name)
3490b57cec5SDimitry Andric           // Source functions
3500b57cec5SDimitry Andric           // TODO: Add support for vfscanf & family.
3510b57cec5SDimitry Andric           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
3520b57cec5SDimitry Andric           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
3530b57cec5SDimitry Andric           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
3540b57cec5SDimitry Andric           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
3550b57cec5SDimitry Andric           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
356*a7dea167SDimitry Andric           .Case("getchar_unlocked",
357*a7dea167SDimitry Andric                 TaintPropagationRule({}, {ReturnValueIndex}))
3580b57cec5SDimitry Andric           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
3590b57cec5SDimitry Andric           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
3600b57cec5SDimitry Andric           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
3610b57cec5SDimitry Andric           .Case("socket",
3620b57cec5SDimitry Andric                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
3630b57cec5SDimitry Andric                                      InvalidArgIndex,
3640b57cec5SDimitry Andric                                      &TaintPropagationRule::postSocket))
3650b57cec5SDimitry Andric           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
3660b57cec5SDimitry Andric           // Propagating functions
3670b57cec5SDimitry Andric           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
3680b57cec5SDimitry Andric           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
3690b57cec5SDimitry Andric           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
3700b57cec5SDimitry Andric           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
3710b57cec5SDimitry Andric           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
3720b57cec5SDimitry Andric           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
3730b57cec5SDimitry Andric           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
3740b57cec5SDimitry Andric           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
3750b57cec5SDimitry Andric           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
3760b57cec5SDimitry Andric           .Case("getdelim", TaintPropagationRule({3}, {0}))
3770b57cec5SDimitry Andric           .Case("getline", TaintPropagationRule({2}, {0}))
3780b57cec5SDimitry Andric           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
3790b57cec5SDimitry Andric           .Case("pread",
3800b57cec5SDimitry Andric                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
3810b57cec5SDimitry Andric           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
3820b57cec5SDimitry Andric           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
3830b57cec5SDimitry Andric           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
3840b57cec5SDimitry Andric           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
3850b57cec5SDimitry Andric           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
3860b57cec5SDimitry Andric           .Default(TaintPropagationRule());
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric   if (!Rule.isNull())
3890b57cec5SDimitry Andric     return Rule;
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric   // Check if it's one of the memory setting/copying functions.
3920b57cec5SDimitry Andric   // This check is specialized but faster then calling isCLibraryFunction.
3930b57cec5SDimitry Andric   unsigned BId = 0;
3940b57cec5SDimitry Andric   if ((BId = FDecl->getMemoryFunctionKind()))
3950b57cec5SDimitry Andric     switch (BId) {
3960b57cec5SDimitry Andric     case Builtin::BImemcpy:
3970b57cec5SDimitry Andric     case Builtin::BImemmove:
3980b57cec5SDimitry Andric     case Builtin::BIstrncpy:
3990b57cec5SDimitry Andric     case Builtin::BIstrncat:
4000b57cec5SDimitry Andric       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
4010b57cec5SDimitry Andric     case Builtin::BIstrlcpy:
4020b57cec5SDimitry Andric     case Builtin::BIstrlcat:
4030b57cec5SDimitry Andric       return TaintPropagationRule({1, 2}, {0});
4040b57cec5SDimitry Andric     case Builtin::BIstrndup:
4050b57cec5SDimitry Andric       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric     default:
4080b57cec5SDimitry Andric       break;
4090b57cec5SDimitry Andric     };
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric   // Process all other functions which could be defined as builtins.
4120b57cec5SDimitry Andric   if (Rule.isNull()) {
4130b57cec5SDimitry Andric     if (C.isCLibraryFunction(FDecl, "snprintf"))
4140b57cec5SDimitry Andric       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
4150b57cec5SDimitry Andric                                   3);
4160b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "sprintf"))
4170b57cec5SDimitry Andric       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
4180b57cec5SDimitry Andric                                   2);
4190b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
4200b57cec5SDimitry Andric              C.isCLibraryFunction(FDecl, "stpcpy") ||
4210b57cec5SDimitry Andric              C.isCLibraryFunction(FDecl, "strcat"))
4220b57cec5SDimitry Andric       return TaintPropagationRule({1}, {0, ReturnValueIndex});
4230b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "bcopy"))
4240b57cec5SDimitry Andric       return TaintPropagationRule({0, 2}, {1});
4250b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "strdup") ||
4260b57cec5SDimitry Andric              C.isCLibraryFunction(FDecl, "strdupa"))
4270b57cec5SDimitry Andric       return TaintPropagationRule({0}, {ReturnValueIndex});
4280b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
4290b57cec5SDimitry Andric       return TaintPropagationRule({0}, {ReturnValueIndex});
4300b57cec5SDimitry Andric   }
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric   // Skipping the following functions, since they might be used for cleansing
4330b57cec5SDimitry Andric   // or smart memory copy:
4340b57cec5SDimitry Andric   // - memccpy - copying until hitting a special character.
4350b57cec5SDimitry Andric 
436*a7dea167SDimitry Andric   auto It = CustomPropagations.find(Name);
437*a7dea167SDimitry Andric   if (It != CustomPropagations.end())
438*a7dea167SDimitry Andric     return It->getValue();
439*a7dea167SDimitry Andric 
4400b57cec5SDimitry Andric   return TaintPropagationRule();
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric 
4430b57cec5SDimitry Andric void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
4440b57cec5SDimitry Andric                                        CheckerContext &C) const {
4450b57cec5SDimitry Andric   // Check for taintedness related errors first: system call, uncontrolled
4460b57cec5SDimitry Andric   // format string, tainted buffer size.
4470b57cec5SDimitry Andric   if (checkPre(CE, C))
4480b57cec5SDimitry Andric     return;
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric   // Marks the function's arguments and/or return value tainted if it present in
4510b57cec5SDimitry Andric   // the list.
4520b57cec5SDimitry Andric   addSourcesPre(CE, C);
4530b57cec5SDimitry Andric }
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
4560b57cec5SDimitry Andric                                         CheckerContext &C) const {
4570b57cec5SDimitry Andric   // Set the marked values as tainted. The return value only accessible from
4580b57cec5SDimitry Andric   // checkPostStmt.
4590b57cec5SDimitry Andric   propagateFromPre(CE, C);
4600b57cec5SDimitry Andric }
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
4630b57cec5SDimitry Andric                                      const char *NL, const char *Sep) const {
4640b57cec5SDimitry Andric   printTaint(State, Out, NL, Sep);
4650b57cec5SDimitry Andric }
4660b57cec5SDimitry Andric 
4670b57cec5SDimitry Andric void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
4680b57cec5SDimitry Andric                                         CheckerContext &C) const {
4690b57cec5SDimitry Andric   ProgramStateRef State = nullptr;
4700b57cec5SDimitry Andric   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
4710b57cec5SDimitry Andric   if (!FDecl || FDecl->getKind() != Decl::Function)
4720b57cec5SDimitry Andric     return;
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric   StringRef Name = C.getCalleeName(FDecl);
4750b57cec5SDimitry Andric   if (Name.empty())
4760b57cec5SDimitry Andric     return;
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric   // First, try generating a propagation rule for this function.
479*a7dea167SDimitry Andric   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
480*a7dea167SDimitry Andric       this->CustomPropagations, FDecl, Name, C);
4810b57cec5SDimitry Andric   if (!Rule.isNull()) {
4820b57cec5SDimitry Andric     State = Rule.process(CE, C);
4830b57cec5SDimitry Andric     if (!State)
4840b57cec5SDimitry Andric       return;
4850b57cec5SDimitry Andric     C.addTransition(State);
4860b57cec5SDimitry Andric     return;
4870b57cec5SDimitry Andric   }
4880b57cec5SDimitry Andric 
4890b57cec5SDimitry Andric   if (!State)
4900b57cec5SDimitry Andric     return;
4910b57cec5SDimitry Andric   C.addTransition(State);
4920b57cec5SDimitry Andric }
4930b57cec5SDimitry Andric 
4940b57cec5SDimitry Andric bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
4950b57cec5SDimitry Andric                                            CheckerContext &C) const {
4960b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
4970b57cec5SDimitry Andric 
4980b57cec5SDimitry Andric   // Depending on what was tainted at pre-visit, we determined a set of
4990b57cec5SDimitry Andric   // arguments which should be tainted after the function returns. These are
5000b57cec5SDimitry Andric   // stored in the state as TaintArgsOnPostVisit set.
5010b57cec5SDimitry Andric   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
5020b57cec5SDimitry Andric   if (TaintArgs.isEmpty())
5030b57cec5SDimitry Andric     return false;
5040b57cec5SDimitry Andric 
5050b57cec5SDimitry Andric   for (unsigned ArgNum : TaintArgs) {
5060b57cec5SDimitry Andric     // Special handling for the tainted return value.
5070b57cec5SDimitry Andric     if (ArgNum == ReturnValueIndex) {
5080b57cec5SDimitry Andric       State = addTaint(State, CE, C.getLocationContext());
5090b57cec5SDimitry Andric       continue;
5100b57cec5SDimitry Andric     }
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric     // The arguments are pointer arguments. The data they are pointing at is
5130b57cec5SDimitry Andric     // tainted after the call.
5140b57cec5SDimitry Andric     if (CE->getNumArgs() < (ArgNum + 1))
5150b57cec5SDimitry Andric       return false;
5160b57cec5SDimitry Andric     const Expr *Arg = CE->getArg(ArgNum);
5170b57cec5SDimitry Andric     Optional<SVal> V = getPointedToSVal(C, Arg);
5180b57cec5SDimitry Andric     if (V)
5190b57cec5SDimitry Andric       State = addTaint(State, *V);
5200b57cec5SDimitry Andric   }
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric   // Clear up the taint info from the state.
5230b57cec5SDimitry Andric   State = State->remove<TaintArgsOnPostVisit>();
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric   if (State != C.getState()) {
5260b57cec5SDimitry Andric     C.addTransition(State);
5270b57cec5SDimitry Andric     return true;
5280b57cec5SDimitry Andric   }
5290b57cec5SDimitry Andric   return false;
5300b57cec5SDimitry Andric }
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric bool GenericTaintChecker::checkPre(const CallExpr *CE,
5330b57cec5SDimitry Andric                                    CheckerContext &C) const {
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   if (checkUncontrolledFormatString(CE, C))
5360b57cec5SDimitry Andric     return true;
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
5390b57cec5SDimitry Andric   if (!FDecl || FDecl->getKind() != Decl::Function)
5400b57cec5SDimitry Andric     return false;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric   StringRef Name = C.getCalleeName(FDecl);
5430b57cec5SDimitry Andric   if (Name.empty())
5440b57cec5SDimitry Andric     return false;
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric   if (checkSystemCall(CE, Name, C))
5470b57cec5SDimitry Andric     return true;
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric   if (checkTaintedBufferSize(CE, FDecl, C))
5500b57cec5SDimitry Andric     return true;
5510b57cec5SDimitry Andric 
552*a7dea167SDimitry Andric   if (checkCustomSinks(CE, Name, C))
553*a7dea167SDimitry Andric     return true;
554*a7dea167SDimitry Andric 
5550b57cec5SDimitry Andric   return false;
5560b57cec5SDimitry Andric }
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
5590b57cec5SDimitry Andric                                                      const Expr *Arg) {
5600b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
5610b57cec5SDimitry Andric   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
5620b57cec5SDimitry Andric   if (AddrVal.isUnknownOrUndef())
5630b57cec5SDimitry Andric     return None;
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
5660b57cec5SDimitry Andric   if (!AddrLoc)
5670b57cec5SDimitry Andric     return None;
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric   QualType ArgTy = Arg->getType().getCanonicalType();
5700b57cec5SDimitry Andric   if (!ArgTy->isPointerType())
5710b57cec5SDimitry Andric     return None;
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   QualType ValTy = ArgTy->getPointeeType();
5740b57cec5SDimitry Andric 
5750b57cec5SDimitry Andric   // Do not dereference void pointers. Treat them as byte pointers instead.
5760b57cec5SDimitry Andric   // FIXME: we might want to consider more than just the first byte.
5770b57cec5SDimitry Andric   if (ValTy->isVoidType())
5780b57cec5SDimitry Andric     ValTy = C.getASTContext().CharTy;
5790b57cec5SDimitry Andric 
5800b57cec5SDimitry Andric   return State->getSVal(*AddrLoc, ValTy);
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric ProgramStateRef
5840b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
5850b57cec5SDimitry Andric                                                    CheckerContext &C) const {
5860b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric   // Check for taint in arguments.
5890b57cec5SDimitry Andric   bool IsTainted = true;
5900b57cec5SDimitry Andric   for (unsigned ArgNum : SrcArgs) {
5910b57cec5SDimitry Andric     if (ArgNum >= CE->getNumArgs())
592*a7dea167SDimitry Andric       continue;
593*a7dea167SDimitry Andric 
5940b57cec5SDimitry Andric     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
5950b57cec5SDimitry Andric       break;
5960b57cec5SDimitry Andric   }
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric   // Check for taint in variadic arguments.
5990b57cec5SDimitry Andric   if (!IsTainted && VariadicType::Src == VarType) {
6000b57cec5SDimitry Andric     // Check if any of the arguments is tainted
601*a7dea167SDimitry Andric     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
6020b57cec5SDimitry Andric       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
6030b57cec5SDimitry Andric         break;
6040b57cec5SDimitry Andric     }
6050b57cec5SDimitry Andric   }
6060b57cec5SDimitry Andric 
6070b57cec5SDimitry Andric   if (PropagationFunc)
6080b57cec5SDimitry Andric     IsTainted = PropagationFunc(IsTainted, CE, C);
6090b57cec5SDimitry Andric 
6100b57cec5SDimitry Andric   if (!IsTainted)
6110b57cec5SDimitry Andric     return State;
6120b57cec5SDimitry Andric 
6130b57cec5SDimitry Andric   // Mark the arguments which should be tainted after the function returns.
6140b57cec5SDimitry Andric   for (unsigned ArgNum : DstArgs) {
6150b57cec5SDimitry Andric     // Should mark the return value?
6160b57cec5SDimitry Andric     if (ArgNum == ReturnValueIndex) {
6170b57cec5SDimitry Andric       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
6180b57cec5SDimitry Andric       continue;
6190b57cec5SDimitry Andric     }
6200b57cec5SDimitry Andric 
621*a7dea167SDimitry Andric     if (ArgNum >= CE->getNumArgs())
622*a7dea167SDimitry Andric       continue;
623*a7dea167SDimitry Andric 
6240b57cec5SDimitry Andric     // Mark the given argument.
6250b57cec5SDimitry Andric     State = State->add<TaintArgsOnPostVisit>(ArgNum);
6260b57cec5SDimitry Andric   }
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric   // Mark all variadic arguments tainted if present.
6290b57cec5SDimitry Andric   if (VariadicType::Dst == VarType) {
6300b57cec5SDimitry Andric     // For all pointer and references that were passed in:
6310b57cec5SDimitry Andric     //   If they are not pointing to const data, mark data as tainted.
6320b57cec5SDimitry Andric     //   TODO: So far we are just going one level down; ideally we'd need to
6330b57cec5SDimitry Andric     //         recurse here.
634*a7dea167SDimitry Andric     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
6350b57cec5SDimitry Andric       const Expr *Arg = CE->getArg(i);
6360b57cec5SDimitry Andric       // Process pointer argument.
6370b57cec5SDimitry Andric       const Type *ArgTy = Arg->getType().getTypePtr();
6380b57cec5SDimitry Andric       QualType PType = ArgTy->getPointeeType();
6390b57cec5SDimitry Andric       if ((!PType.isNull() && !PType.isConstQualified()) ||
6400b57cec5SDimitry Andric           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
6410b57cec5SDimitry Andric         State = State->add<TaintArgsOnPostVisit>(i);
6420b57cec5SDimitry Andric     }
6430b57cec5SDimitry Andric   }
6440b57cec5SDimitry Andric 
6450b57cec5SDimitry Andric   return State;
6460b57cec5SDimitry Andric }
6470b57cec5SDimitry Andric 
6480b57cec5SDimitry Andric // If argument 0(protocol domain) is network, the return value should get taint.
6490b57cec5SDimitry Andric bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
6500b57cec5SDimitry Andric                                                            const CallExpr *CE,
6510b57cec5SDimitry Andric                                                            CheckerContext &C) {
6520b57cec5SDimitry Andric   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
6530b57cec5SDimitry Andric   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
6540b57cec5SDimitry Andric   // White list the internal communication protocols.
6550b57cec5SDimitry Andric   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
6560b57cec5SDimitry Andric       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
6570b57cec5SDimitry Andric     return false;
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric   return true;
6600b57cec5SDimitry Andric }
6610b57cec5SDimitry Andric 
6620b57cec5SDimitry Andric bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
6630b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
6640b57cec5SDimitry Andric   SVal Val = C.getSVal(E);
6650b57cec5SDimitry Andric 
6660b57cec5SDimitry Andric   // stdin is a pointer, so it would be a region.
6670b57cec5SDimitry Andric   const MemRegion *MemReg = Val.getAsRegion();
6680b57cec5SDimitry Andric 
6690b57cec5SDimitry Andric   // The region should be symbolic, we do not know it's value.
6700b57cec5SDimitry Andric   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
6710b57cec5SDimitry Andric   if (!SymReg)
6720b57cec5SDimitry Andric     return false;
6730b57cec5SDimitry Andric 
6740b57cec5SDimitry Andric   // Get it's symbol and find the declaration region it's pointing to.
6750b57cec5SDimitry Andric   const SymbolRegionValue *Sm =
6760b57cec5SDimitry Andric       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
6770b57cec5SDimitry Andric   if (!Sm)
6780b57cec5SDimitry Andric     return false;
6790b57cec5SDimitry Andric   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
6800b57cec5SDimitry Andric   if (!DeclReg)
6810b57cec5SDimitry Andric     return false;
6820b57cec5SDimitry Andric 
6830b57cec5SDimitry Andric   // This region corresponds to a declaration, find out if it's a global/extern
6840b57cec5SDimitry Andric   // variable named stdin with the proper type.
6850b57cec5SDimitry Andric   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
6860b57cec5SDimitry Andric     D = D->getCanonicalDecl();
6870b57cec5SDimitry Andric     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
6880b57cec5SDimitry Andric       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
6890b57cec5SDimitry Andric       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
6900b57cec5SDimitry Andric                        C.getASTContext().getFILEType().getCanonicalType())
6910b57cec5SDimitry Andric         return true;
6920b57cec5SDimitry Andric     }
6930b57cec5SDimitry Andric   }
6940b57cec5SDimitry Andric   return false;
6950b57cec5SDimitry Andric }
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric static bool getPrintfFormatArgumentNum(const CallExpr *CE,
6980b57cec5SDimitry Andric                                        const CheckerContext &C,
699*a7dea167SDimitry Andric                                        unsigned &ArgNum) {
7000b57cec5SDimitry Andric   // Find if the function contains a format string argument.
7010b57cec5SDimitry Andric   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
7020b57cec5SDimitry Andric   // vsnprintf, syslog, custom annotated functions.
7030b57cec5SDimitry Andric   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
7040b57cec5SDimitry Andric   if (!FDecl)
7050b57cec5SDimitry Andric     return false;
7060b57cec5SDimitry Andric   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
7070b57cec5SDimitry Andric     ArgNum = Format->getFormatIdx() - 1;
7080b57cec5SDimitry Andric     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
7090b57cec5SDimitry Andric       return true;
7100b57cec5SDimitry Andric   }
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric   // Or if a function is named setproctitle (this is a heuristic).
7130b57cec5SDimitry Andric   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
7140b57cec5SDimitry Andric     ArgNum = 0;
7150b57cec5SDimitry Andric     return true;
7160b57cec5SDimitry Andric   }
7170b57cec5SDimitry Andric 
7180b57cec5SDimitry Andric   return false;
7190b57cec5SDimitry Andric }
7200b57cec5SDimitry Andric 
721*a7dea167SDimitry Andric bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
7220b57cec5SDimitry Andric                                                   CheckerContext &C) const {
7230b57cec5SDimitry Andric   assert(E);
7240b57cec5SDimitry Andric 
7250b57cec5SDimitry Andric   // Check for taint.
7260b57cec5SDimitry Andric   ProgramStateRef State = C.getState();
7270b57cec5SDimitry Andric   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
7280b57cec5SDimitry Andric   SVal TaintedSVal;
7290b57cec5SDimitry Andric   if (PointedToSVal && isTainted(State, *PointedToSVal))
7300b57cec5SDimitry Andric     TaintedSVal = *PointedToSVal;
7310b57cec5SDimitry Andric   else if (isTainted(State, E, C.getLocationContext()))
7320b57cec5SDimitry Andric     TaintedSVal = C.getSVal(E);
7330b57cec5SDimitry Andric   else
7340b57cec5SDimitry Andric     return false;
7350b57cec5SDimitry Andric 
7360b57cec5SDimitry Andric   // Generate diagnostic.
7370b57cec5SDimitry Andric   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
7380b57cec5SDimitry Andric     initBugType();
739*a7dea167SDimitry Andric     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
7400b57cec5SDimitry Andric     report->addRange(E->getSourceRange());
741*a7dea167SDimitry Andric     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
7420b57cec5SDimitry Andric     C.emitReport(std::move(report));
7430b57cec5SDimitry Andric     return true;
7440b57cec5SDimitry Andric   }
7450b57cec5SDimitry Andric   return false;
7460b57cec5SDimitry Andric }
7470b57cec5SDimitry Andric 
7480b57cec5SDimitry Andric bool GenericTaintChecker::checkUncontrolledFormatString(
7490b57cec5SDimitry Andric     const CallExpr *CE, CheckerContext &C) const {
7500b57cec5SDimitry Andric   // Check if the function contains a format string argument.
751*a7dea167SDimitry Andric   unsigned ArgNum = 0;
7520b57cec5SDimitry Andric   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
7530b57cec5SDimitry Andric     return false;
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric   // If either the format string content or the pointer itself are tainted,
7560b57cec5SDimitry Andric   // warn.
7570b57cec5SDimitry Andric   return generateReportIfTainted(CE->getArg(ArgNum),
7580b57cec5SDimitry Andric                                  MsgUncontrolledFormatString, C);
7590b57cec5SDimitry Andric }
7600b57cec5SDimitry Andric 
7610b57cec5SDimitry Andric bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
7620b57cec5SDimitry Andric                                           CheckerContext &C) const {
7630b57cec5SDimitry Andric   // TODO: It might make sense to run this check on demand. In some cases,
7640b57cec5SDimitry Andric   // we should check if the environment has been cleansed here. We also might
7650b57cec5SDimitry Andric   // need to know if the user was reset before these calls(seteuid).
7660b57cec5SDimitry Andric   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
7670b57cec5SDimitry Andric                         .Case("system", 0)
7680b57cec5SDimitry Andric                         .Case("popen", 0)
7690b57cec5SDimitry Andric                         .Case("execl", 0)
7700b57cec5SDimitry Andric                         .Case("execle", 0)
7710b57cec5SDimitry Andric                         .Case("execlp", 0)
7720b57cec5SDimitry Andric                         .Case("execv", 0)
7730b57cec5SDimitry Andric                         .Case("execvp", 0)
7740b57cec5SDimitry Andric                         .Case("execvP", 0)
7750b57cec5SDimitry Andric                         .Case("execve", 0)
7760b57cec5SDimitry Andric                         .Case("dlopen", 0)
777*a7dea167SDimitry Andric                         .Default(InvalidArgIndex);
7780b57cec5SDimitry Andric 
779*a7dea167SDimitry Andric   if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
7800b57cec5SDimitry Andric     return false;
7810b57cec5SDimitry Andric 
7820b57cec5SDimitry Andric   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
7830b57cec5SDimitry Andric }
7840b57cec5SDimitry Andric 
7850b57cec5SDimitry Andric // TODO: Should this check be a part of the CString checker?
7860b57cec5SDimitry Andric // If yes, should taint be a global setting?
7870b57cec5SDimitry Andric bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
7880b57cec5SDimitry Andric                                                  const FunctionDecl *FDecl,
7890b57cec5SDimitry Andric                                                  CheckerContext &C) const {
7900b57cec5SDimitry Andric   // If the function has a buffer size argument, set ArgNum.
7910b57cec5SDimitry Andric   unsigned ArgNum = InvalidArgIndex;
7920b57cec5SDimitry Andric   unsigned BId = 0;
7930b57cec5SDimitry Andric   if ((BId = FDecl->getMemoryFunctionKind()))
7940b57cec5SDimitry Andric     switch (BId) {
7950b57cec5SDimitry Andric     case Builtin::BImemcpy:
7960b57cec5SDimitry Andric     case Builtin::BImemmove:
7970b57cec5SDimitry Andric     case Builtin::BIstrncpy:
7980b57cec5SDimitry Andric       ArgNum = 2;
7990b57cec5SDimitry Andric       break;
8000b57cec5SDimitry Andric     case Builtin::BIstrndup:
8010b57cec5SDimitry Andric       ArgNum = 1;
8020b57cec5SDimitry Andric       break;
8030b57cec5SDimitry Andric     default:
8040b57cec5SDimitry Andric       break;
8050b57cec5SDimitry Andric     };
8060b57cec5SDimitry Andric 
8070b57cec5SDimitry Andric   if (ArgNum == InvalidArgIndex) {
8080b57cec5SDimitry Andric     if (C.isCLibraryFunction(FDecl, "malloc") ||
8090b57cec5SDimitry Andric         C.isCLibraryFunction(FDecl, "calloc") ||
8100b57cec5SDimitry Andric         C.isCLibraryFunction(FDecl, "alloca"))
8110b57cec5SDimitry Andric       ArgNum = 0;
8120b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "memccpy"))
8130b57cec5SDimitry Andric       ArgNum = 3;
8140b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "realloc"))
8150b57cec5SDimitry Andric       ArgNum = 1;
8160b57cec5SDimitry Andric     else if (C.isCLibraryFunction(FDecl, "bcopy"))
8170b57cec5SDimitry Andric       ArgNum = 2;
8180b57cec5SDimitry Andric   }
8190b57cec5SDimitry Andric 
8200b57cec5SDimitry Andric   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
8210b57cec5SDimitry Andric          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
8220b57cec5SDimitry Andric }
8230b57cec5SDimitry Andric 
824*a7dea167SDimitry Andric bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
825*a7dea167SDimitry Andric                                            CheckerContext &C) const {
826*a7dea167SDimitry Andric   auto It = CustomSinks.find(Name);
827*a7dea167SDimitry Andric   if (It == CustomSinks.end())
828*a7dea167SDimitry Andric     return false;
829*a7dea167SDimitry Andric 
830*a7dea167SDimitry Andric   const GenericTaintChecker::ArgVector &Args = It->getValue();
831*a7dea167SDimitry Andric   for (unsigned ArgNum : Args) {
832*a7dea167SDimitry Andric     if (ArgNum >= CE->getNumArgs())
833*a7dea167SDimitry Andric       continue;
834*a7dea167SDimitry Andric 
835*a7dea167SDimitry Andric     if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
836*a7dea167SDimitry Andric       return true;
837*a7dea167SDimitry Andric   }
838*a7dea167SDimitry Andric 
839*a7dea167SDimitry Andric   return false;
840*a7dea167SDimitry Andric }
841*a7dea167SDimitry Andric 
842*a7dea167SDimitry Andric void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
843*a7dea167SDimitry Andric   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
844*a7dea167SDimitry Andric   std::string Option{"Config"};
845*a7dea167SDimitry Andric   StringRef ConfigFile =
846*a7dea167SDimitry Andric       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
847*a7dea167SDimitry Andric   llvm::Optional<TaintConfig> Config =
848*a7dea167SDimitry Andric       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
849*a7dea167SDimitry Andric   if (Config)
850*a7dea167SDimitry Andric     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
8510b57cec5SDimitry Andric }
8520b57cec5SDimitry Andric 
8530b57cec5SDimitry Andric bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
8540b57cec5SDimitry Andric   return true;
8550b57cec5SDimitry Andric }
856