10b57cec5SDimitry Andric //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This checker defines the attack surface for generic taint propagation. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // The taint information produced by it might be useful to other checkers. For 120b57cec5SDimitry Andric // example, checkers should report errors which involve tainted data more 130b57cec5SDimitry Andric // aggressively, even if the involved symbols are under constrained. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "Taint.h" 18a7dea167SDimitry Andric #include "Yaml.h" 190b57cec5SDimitry Andric #include "clang/AST/Attr.h" 200b57cec5SDimitry Andric #include "clang/Basic/Builtins.h" 21a7dea167SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 230b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h" 240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h" 255ffd83dbSDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 260b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 270b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 28a7dea167SDimitry Andric #include "llvm/Support/YAMLTraits.h" 295ffd83dbSDimitry Andric 30480093f4SDimitry Andric #include <algorithm> 31a7dea167SDimitry Andric #include <limits> 325ffd83dbSDimitry Andric #include <memory> 33480093f4SDimitry Andric #include <unordered_map> 340b57cec5SDimitry Andric #include <utility> 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric using namespace clang; 370b57cec5SDimitry Andric using namespace ento; 380b57cec5SDimitry Andric using namespace taint; 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric namespace { 415ffd83dbSDimitry Andric class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 420b57cec5SDimitry Andric public: 430b57cec5SDimitry Andric static void *getTag() { 440b57cec5SDimitry Andric static int Tag; 450b57cec5SDimitry Andric return &Tag; 460b57cec5SDimitry Andric } 470b57cec5SDimitry Andric 485ffd83dbSDimitry Andric void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 495ffd83dbSDimitry Andric void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 500b57cec5SDimitry Andric 51a7dea167SDimitry Andric void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 52a7dea167SDimitry Andric const char *Sep) const override; 53a7dea167SDimitry Andric 54a7dea167SDimitry Andric using ArgVector = SmallVector<unsigned, 2>; 55a7dea167SDimitry Andric using SignedArgVector = SmallVector<int, 2>; 56a7dea167SDimitry Andric 57a7dea167SDimitry Andric enum class VariadicType { None, Src, Dst }; 58a7dea167SDimitry Andric 59a7dea167SDimitry Andric /// Used to parse the configuration file. 60a7dea167SDimitry Andric struct TaintConfiguration { 61480093f4SDimitry Andric using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 62a7dea167SDimitry Andric 63a7dea167SDimitry Andric struct Propagation { 64a7dea167SDimitry Andric std::string Name; 65480093f4SDimitry Andric std::string Scope; 66a7dea167SDimitry Andric ArgVector SrcArgs; 67a7dea167SDimitry Andric SignedArgVector DstArgs; 68a7dea167SDimitry Andric VariadicType VarType; 69a7dea167SDimitry Andric unsigned VarIndex; 70a7dea167SDimitry Andric }; 71a7dea167SDimitry Andric 72a7dea167SDimitry Andric std::vector<Propagation> Propagations; 73480093f4SDimitry Andric std::vector<NameScopeArgs> Filters; 74480093f4SDimitry Andric std::vector<NameScopeArgs> Sinks; 75a7dea167SDimitry Andric 76a7dea167SDimitry Andric TaintConfiguration() = default; 77a7dea167SDimitry Andric TaintConfiguration(const TaintConfiguration &) = default; 78a7dea167SDimitry Andric TaintConfiguration(TaintConfiguration &&) = default; 79a7dea167SDimitry Andric TaintConfiguration &operator=(const TaintConfiguration &) = default; 80a7dea167SDimitry Andric TaintConfiguration &operator=(TaintConfiguration &&) = default; 81a7dea167SDimitry Andric }; 82a7dea167SDimitry Andric 83a7dea167SDimitry Andric /// Convert SignedArgVector to ArgVector. 84a7dea167SDimitry Andric ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 855ffd83dbSDimitry Andric const SignedArgVector &Args); 86a7dea167SDimitry Andric 87a7dea167SDimitry Andric /// Parse the config. 88a7dea167SDimitry Andric void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 89a7dea167SDimitry Andric TaintConfiguration &&Config); 90a7dea167SDimitry Andric 91a7dea167SDimitry Andric static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 92a7dea167SDimitry Andric /// Denotes the return vale. 93a7dea167SDimitry Andric static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 94a7dea167SDimitry Andric 1}; 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric private: 970b57cec5SDimitry Andric mutable std::unique_ptr<BugType> BT; 980b57cec5SDimitry Andric void initBugType() const { 990b57cec5SDimitry Andric if (!BT) 1005ffd83dbSDimitry Andric BT = std::make_unique<BugType>(this, "Use of Untrusted Data", 1015ffd83dbSDimitry Andric "Untrusted Data"); 1020b57cec5SDimitry Andric } 1030b57cec5SDimitry Andric 104480093f4SDimitry Andric struct FunctionData { 105480093f4SDimitry Andric FunctionData() = delete; 106*e8d8bef9SDimitry Andric FunctionData(const FunctionDecl *FDecl, StringRef Name, 107*e8d8bef9SDimitry Andric std::string FullName) 108*e8d8bef9SDimitry Andric : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {} 109480093f4SDimitry Andric FunctionData(const FunctionData &) = default; 110480093f4SDimitry Andric FunctionData(FunctionData &&) = default; 111480093f4SDimitry Andric FunctionData &operator=(const FunctionData &) = delete; 112480093f4SDimitry Andric FunctionData &operator=(FunctionData &&) = delete; 113480093f4SDimitry Andric 1145ffd83dbSDimitry Andric static Optional<FunctionData> create(const CallEvent &Call, 115480093f4SDimitry Andric const CheckerContext &C) { 1165ffd83dbSDimitry Andric if (!Call.getDecl()) 1175ffd83dbSDimitry Andric return None; 1185ffd83dbSDimitry Andric 1195ffd83dbSDimitry Andric const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 120480093f4SDimitry Andric if (!FDecl || (FDecl->getKind() != Decl::Function && 121480093f4SDimitry Andric FDecl->getKind() != Decl::CXXMethod)) 122480093f4SDimitry Andric return None; 123480093f4SDimitry Andric 124480093f4SDimitry Andric StringRef Name = C.getCalleeName(FDecl); 125480093f4SDimitry Andric std::string FullName = FDecl->getQualifiedNameAsString(); 126480093f4SDimitry Andric if (Name.empty() || FullName.empty()) 127480093f4SDimitry Andric return None; 128480093f4SDimitry Andric 129*e8d8bef9SDimitry Andric return FunctionData{FDecl, Name, std::move(FullName)}; 130480093f4SDimitry Andric } 131480093f4SDimitry Andric 132480093f4SDimitry Andric bool isInScope(StringRef Scope) const { 133480093f4SDimitry Andric return StringRef(FullName).startswith(Scope); 134480093f4SDimitry Andric } 135480093f4SDimitry Andric 136480093f4SDimitry Andric const FunctionDecl *const FDecl; 137480093f4SDimitry Andric const StringRef Name; 138480093f4SDimitry Andric const std::string FullName; 139480093f4SDimitry Andric }; 140480093f4SDimitry Andric 1410b57cec5SDimitry Andric /// Catch taint related bugs. Check if tainted data is passed to a 142480093f4SDimitry Andric /// system call etc. Returns true on matching. 1435ffd83dbSDimitry Andric bool checkPre(const CallEvent &Call, const FunctionData &FData, 144480093f4SDimitry Andric CheckerContext &C) const; 1450b57cec5SDimitry Andric 146480093f4SDimitry Andric /// Add taint sources on a pre-visit. Returns true on matching. 1475ffd83dbSDimitry Andric bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, 148480093f4SDimitry Andric CheckerContext &C) const; 1490b57cec5SDimitry Andric 150480093f4SDimitry Andric /// Mark filter's arguments not tainted on a pre-visit. Returns true on 151480093f4SDimitry Andric /// matching. 1525ffd83dbSDimitry Andric bool addFiltersPre(const CallEvent &Call, const FunctionData &FData, 153480093f4SDimitry Andric CheckerContext &C) const; 154480093f4SDimitry Andric 155480093f4SDimitry Andric /// Propagate taint generated at pre-visit. Returns true on matching. 1565ffd83dbSDimitry Andric static bool propagateFromPre(const CallEvent &Call, CheckerContext &C); 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric /// Check if the region the expression evaluates to is the standard input, 1590b57cec5SDimitry Andric /// and thus, is tainted. 1600b57cec5SDimitry Andric static bool isStdin(const Expr *E, CheckerContext &C); 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric /// Given a pointer argument, return the value it points to. 1635ffd83dbSDimitry Andric static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg); 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric /// Check for CWE-134: Uncontrolled Format String. 166a7dea167SDimitry Andric static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 167a7dea167SDimitry Andric "Untrusted data is used as a format string " 168a7dea167SDimitry Andric "(CWE-134: Uncontrolled Format String)"; 1695ffd83dbSDimitry Andric bool checkUncontrolledFormatString(const CallEvent &Call, 1700b57cec5SDimitry Andric CheckerContext &C) const; 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric /// Check for: 1730b57cec5SDimitry Andric /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 1740b57cec5SDimitry Andric /// CWE-78, "Failure to Sanitize Data into an OS Command" 175a7dea167SDimitry Andric static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 176a7dea167SDimitry Andric "Untrusted data is passed to a system call " 177a7dea167SDimitry Andric "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 1785ffd83dbSDimitry Andric bool checkSystemCall(const CallEvent &Call, StringRef Name, 1790b57cec5SDimitry Andric CheckerContext &C) const; 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric /// Check if tainted data is used as a buffer size ins strn.. functions, 1820b57cec5SDimitry Andric /// and allocators. 183a7dea167SDimitry Andric static constexpr llvm::StringLiteral MsgTaintedBufferSize = 184a7dea167SDimitry Andric "Untrusted data is used to specify the buffer size " 185a7dea167SDimitry Andric "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 186a7dea167SDimitry Andric "for character data and the null terminator)"; 1875ffd83dbSDimitry Andric bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const; 1880b57cec5SDimitry Andric 189a7dea167SDimitry Andric /// Check if tainted data is used as a custom sink's parameter. 190a7dea167SDimitry Andric static constexpr llvm::StringLiteral MsgCustomSink = 191a7dea167SDimitry Andric "Untrusted data is passed to a user-defined sink"; 1925ffd83dbSDimitry Andric bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData, 1930b57cec5SDimitry Andric CheckerContext &C) const; 1940b57cec5SDimitry Andric 195a7dea167SDimitry Andric /// Generate a report if the expression is tainted or points to tainted data. 196a7dea167SDimitry Andric bool generateReportIfTainted(const Expr *E, StringRef Msg, 197a7dea167SDimitry Andric CheckerContext &C) const; 198a7dea167SDimitry Andric 199a7dea167SDimitry Andric struct TaintPropagationRule; 200480093f4SDimitry Andric template <typename T> 201480093f4SDimitry Andric using ConfigDataMap = 202480093f4SDimitry Andric std::unordered_multimap<std::string, std::pair<std::string, T>>; 203480093f4SDimitry Andric using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 204480093f4SDimitry Andric using NameArgMap = ConfigDataMap<ArgVector>; 205480093f4SDimitry Andric 206480093f4SDimitry Andric /// Find a function with the given name and scope. Returns the first match 207480093f4SDimitry Andric /// or the end of the map. 208480093f4SDimitry Andric template <typename T> 209480093f4SDimitry Andric static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 210480093f4SDimitry Andric const FunctionData &FData); 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric /// A struct used to specify taint propagation rules for a function. 2130b57cec5SDimitry Andric /// 2140b57cec5SDimitry Andric /// If any of the possible taint source arguments is tainted, all of the 2150b57cec5SDimitry Andric /// destination arguments should also be tainted. Use InvalidArgIndex in the 2160b57cec5SDimitry Andric /// src list to specify that all of the arguments can introduce taint. Use 2170b57cec5SDimitry Andric /// InvalidArgIndex in the dst arguments to signify that all the non-const 2180b57cec5SDimitry Andric /// pointer and reference arguments might be tainted on return. If 2190b57cec5SDimitry Andric /// ReturnValueIndex is added to the dst list, the return value will be 2200b57cec5SDimitry Andric /// tainted. 2210b57cec5SDimitry Andric struct TaintPropagationRule { 2225ffd83dbSDimitry Andric using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call, 2230b57cec5SDimitry Andric CheckerContext &C); 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric /// List of arguments which can be taint sources and should be checked. 2260b57cec5SDimitry Andric ArgVector SrcArgs; 2270b57cec5SDimitry Andric /// List of arguments which should be tainted on function return. 2280b57cec5SDimitry Andric ArgVector DstArgs; 2290b57cec5SDimitry Andric /// Index for the first variadic parameter if exist. 2300b57cec5SDimitry Andric unsigned VariadicIndex; 2310b57cec5SDimitry Andric /// Show when a function has variadic parameters. If it has, it marks all 2320b57cec5SDimitry Andric /// of them as source or destination. 2330b57cec5SDimitry Andric VariadicType VarType; 2340b57cec5SDimitry Andric /// Special function for tainted source determination. If defined, it can 2350b57cec5SDimitry Andric /// override the default behavior. 2360b57cec5SDimitry Andric PropagationFuncType PropagationFunc; 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric TaintPropagationRule() 2390b57cec5SDimitry Andric : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 2400b57cec5SDimitry Andric PropagationFunc(nullptr) {} 2410b57cec5SDimitry Andric 242a7dea167SDimitry Andric TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 2430b57cec5SDimitry Andric VariadicType Var = VariadicType::None, 2440b57cec5SDimitry Andric unsigned VarIndex = InvalidArgIndex, 2450b57cec5SDimitry Andric PropagationFuncType Func = nullptr) 2460b57cec5SDimitry Andric : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 2470b57cec5SDimitry Andric VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric /// Get the propagation rule for a given function. 2500b57cec5SDimitry Andric static TaintPropagationRule 251a7dea167SDimitry Andric getTaintPropagationRule(const NameRuleMap &CustomPropagations, 252480093f4SDimitry Andric const FunctionData &FData, CheckerContext &C); 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 2550b57cec5SDimitry Andric void addDstArg(unsigned A) { DstArgs.push_back(A); } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric bool isNull() const { 2580b57cec5SDimitry Andric return SrcArgs.empty() && DstArgs.empty() && 2590b57cec5SDimitry Andric VariadicType::None == VarType; 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric bool isDestinationArgument(unsigned ArgNum) const { 2630b57cec5SDimitry Andric return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 2640b57cec5SDimitry Andric } 2650b57cec5SDimitry Andric 2665ffd83dbSDimitry Andric static bool isTaintedOrPointsToTainted(const Expr *E, 2675ffd83dbSDimitry Andric const ProgramStateRef &State, 2680b57cec5SDimitry Andric CheckerContext &C) { 2690b57cec5SDimitry Andric if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 2700b57cec5SDimitry Andric return true; 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric if (!E->getType().getTypePtr()->isPointerType()) 2730b57cec5SDimitry Andric return false; 2740b57cec5SDimitry Andric 2755ffd83dbSDimitry Andric Optional<SVal> V = getPointeeOf(C, E); 2760b57cec5SDimitry Andric return (V && isTainted(State, *V)); 2770b57cec5SDimitry Andric } 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric /// Pre-process a function which propagates taint according to the 2800b57cec5SDimitry Andric /// taint rule. 2815ffd83dbSDimitry Andric ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const; 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // Functions for custom taintedness propagation. 2845ffd83dbSDimitry Andric static bool postSocket(bool IsTainted, const CallEvent &Call, 2850b57cec5SDimitry Andric CheckerContext &C); 2860b57cec5SDimitry Andric }; 287a7dea167SDimitry Andric 288480093f4SDimitry Andric /// Defines a map between the propagation function's name, scope 289480093f4SDimitry Andric /// and TaintPropagationRule. 290a7dea167SDimitry Andric NameRuleMap CustomPropagations; 291a7dea167SDimitry Andric 292480093f4SDimitry Andric /// Defines a map between the filter function's name, scope and filtering 293480093f4SDimitry Andric /// args. 294a7dea167SDimitry Andric NameArgMap CustomFilters; 295a7dea167SDimitry Andric 296480093f4SDimitry Andric /// Defines a map between the sink function's name, scope and sinking args. 297a7dea167SDimitry Andric NameArgMap CustomSinks; 2980b57cec5SDimitry Andric }; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric const unsigned GenericTaintChecker::ReturnValueIndex; 3010b57cec5SDimitry Andric const unsigned GenericTaintChecker::InvalidArgIndex; 3020b57cec5SDimitry Andric 303a7dea167SDimitry Andric // FIXME: these lines can be removed in C++17 304a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 305a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 306a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 307a7dea167SDimitry Andric constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 3080b57cec5SDimitry Andric } // end of anonymous namespace 3090b57cec5SDimitry Andric 310a7dea167SDimitry Andric using TaintConfig = GenericTaintChecker::TaintConfiguration; 311a7dea167SDimitry Andric 312a7dea167SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 313480093f4SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 314a7dea167SDimitry Andric 315a7dea167SDimitry Andric namespace llvm { 316a7dea167SDimitry Andric namespace yaml { 317a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig> { 318a7dea167SDimitry Andric static void mapping(IO &IO, TaintConfig &Config) { 319a7dea167SDimitry Andric IO.mapOptional("Propagations", Config.Propagations); 320a7dea167SDimitry Andric IO.mapOptional("Filters", Config.Filters); 321a7dea167SDimitry Andric IO.mapOptional("Sinks", Config.Sinks); 322a7dea167SDimitry Andric } 323a7dea167SDimitry Andric }; 324a7dea167SDimitry Andric 325a7dea167SDimitry Andric template <> struct MappingTraits<TaintConfig::Propagation> { 326a7dea167SDimitry Andric static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 327a7dea167SDimitry Andric IO.mapRequired("Name", Propagation.Name); 328480093f4SDimitry Andric IO.mapOptional("Scope", Propagation.Scope); 329a7dea167SDimitry Andric IO.mapOptional("SrcArgs", Propagation.SrcArgs); 330a7dea167SDimitry Andric IO.mapOptional("DstArgs", Propagation.DstArgs); 331a7dea167SDimitry Andric IO.mapOptional("VariadicType", Propagation.VarType, 332a7dea167SDimitry Andric GenericTaintChecker::VariadicType::None); 333a7dea167SDimitry Andric IO.mapOptional("VariadicIndex", Propagation.VarIndex, 334a7dea167SDimitry Andric GenericTaintChecker::InvalidArgIndex); 335a7dea167SDimitry Andric } 336a7dea167SDimitry Andric }; 337a7dea167SDimitry Andric 338a7dea167SDimitry Andric template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 339a7dea167SDimitry Andric static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 340a7dea167SDimitry Andric IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 341a7dea167SDimitry Andric IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 342a7dea167SDimitry Andric IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 343a7dea167SDimitry Andric } 344a7dea167SDimitry Andric }; 345a7dea167SDimitry Andric 346480093f4SDimitry Andric template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 347480093f4SDimitry Andric static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 348480093f4SDimitry Andric IO.mapRequired("Name", std::get<0>(NSA)); 349480093f4SDimitry Andric IO.mapOptional("Scope", std::get<1>(NSA)); 350480093f4SDimitry Andric IO.mapRequired("Args", std::get<2>(NSA)); 351a7dea167SDimitry Andric } 352a7dea167SDimitry Andric }; 353a7dea167SDimitry Andric } // namespace yaml 354a7dea167SDimitry Andric } // namespace llvm 355a7dea167SDimitry Andric 3560b57cec5SDimitry Andric /// A set which is used to pass information from call pre-visit instruction 3570b57cec5SDimitry Andric /// to the call post-visit. The values are unsigned integers, which are either 3580b57cec5SDimitry Andric /// ReturnValueIndex, or indexes of the pointer/reference argument, which 3590b57cec5SDimitry Andric /// points to data, which should be tainted on return. 3600b57cec5SDimitry Andric REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 3610b57cec5SDimitry Andric 3625ffd83dbSDimitry Andric GenericTaintChecker::ArgVector 3635ffd83dbSDimitry Andric GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, 3645ffd83dbSDimitry Andric const std::string &Option, 3655ffd83dbSDimitry Andric const SignedArgVector &Args) { 366a7dea167SDimitry Andric ArgVector Result; 367a7dea167SDimitry Andric for (int Arg : Args) { 368a7dea167SDimitry Andric if (Arg == -1) 369a7dea167SDimitry Andric Result.push_back(ReturnValueIndex); 370a7dea167SDimitry Andric else if (Arg < -1) { 371a7dea167SDimitry Andric Result.push_back(InvalidArgIndex); 372a7dea167SDimitry Andric Mgr.reportInvalidCheckerOptionValue( 373a7dea167SDimitry Andric this, Option, 374a7dea167SDimitry Andric "an argument number for propagation rules greater or equal to -1"); 375a7dea167SDimitry Andric } else 376a7dea167SDimitry Andric Result.push_back(static_cast<unsigned>(Arg)); 377a7dea167SDimitry Andric } 378a7dea167SDimitry Andric return Result; 379a7dea167SDimitry Andric } 380a7dea167SDimitry Andric 381a7dea167SDimitry Andric void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 382a7dea167SDimitry Andric const std::string &Option, 383a7dea167SDimitry Andric TaintConfiguration &&Config) { 384a7dea167SDimitry Andric for (auto &P : Config.Propagations) { 385480093f4SDimitry Andric GenericTaintChecker::CustomPropagations.emplace( 386480093f4SDimitry Andric P.Name, 387480093f4SDimitry Andric std::make_pair(P.Scope, TaintPropagationRule{ 388480093f4SDimitry Andric std::move(P.SrcArgs), 389480093f4SDimitry Andric convertToArgVector(Mgr, Option, P.DstArgs), 390480093f4SDimitry Andric P.VarType, P.VarIndex})); 391a7dea167SDimitry Andric } 392a7dea167SDimitry Andric 393a7dea167SDimitry Andric for (auto &F : Config.Filters) { 394480093f4SDimitry Andric GenericTaintChecker::CustomFilters.emplace( 395480093f4SDimitry Andric std::get<0>(F), 396480093f4SDimitry Andric std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 397a7dea167SDimitry Andric } 398a7dea167SDimitry Andric 399a7dea167SDimitry Andric for (auto &S : Config.Sinks) { 400480093f4SDimitry Andric GenericTaintChecker::CustomSinks.emplace( 401480093f4SDimitry Andric std::get<0>(S), 402480093f4SDimitry Andric std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 403a7dea167SDimitry Andric } 404a7dea167SDimitry Andric } 405a7dea167SDimitry Andric 406480093f4SDimitry Andric template <typename T> 407480093f4SDimitry Andric auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 408480093f4SDimitry Andric const FunctionData &FData) { 4095ffd83dbSDimitry Andric auto Range = Map.equal_range(std::string(FData.Name)); 410480093f4SDimitry Andric auto It = 411480093f4SDimitry Andric std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 412480093f4SDimitry Andric const auto &Value = Entry.second; 413480093f4SDimitry Andric StringRef Scope = Value.first; 414480093f4SDimitry Andric return Scope.empty() || FData.isInScope(Scope); 415480093f4SDimitry Andric }); 416480093f4SDimitry Andric return It != Range.second ? It : Map.end(); 417480093f4SDimitry Andric } 418480093f4SDimitry Andric 4190b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule 4200b57cec5SDimitry Andric GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 421480093f4SDimitry Andric const NameRuleMap &CustomPropagations, const FunctionData &FData, 422480093f4SDimitry Andric CheckerContext &C) { 4230b57cec5SDimitry Andric // TODO: Currently, we might lose precision here: we always mark a return 4240b57cec5SDimitry Andric // value as tainted even if it's just a pointer, pointing to tainted data. 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric // Check for exact name match for functions without builtin substitutes. 427480093f4SDimitry Andric // Use qualified name, because these are C functions without namespace. 4280b57cec5SDimitry Andric TaintPropagationRule Rule = 429480093f4SDimitry Andric llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 4300b57cec5SDimitry Andric // Source functions 4310b57cec5SDimitry Andric // TODO: Add support for vfscanf & family. 4325ffd83dbSDimitry Andric .Case("fdopen", {{}, {ReturnValueIndex}}) 4335ffd83dbSDimitry Andric .Case("fopen", {{}, {ReturnValueIndex}}) 4345ffd83dbSDimitry Andric .Case("freopen", {{}, {ReturnValueIndex}}) 4355ffd83dbSDimitry Andric .Case("getch", {{}, {ReturnValueIndex}}) 4365ffd83dbSDimitry Andric .Case("getchar", {{}, {ReturnValueIndex}}) 4375ffd83dbSDimitry Andric .Case("getchar_unlocked", {{}, {ReturnValueIndex}}) 4385ffd83dbSDimitry Andric .Case("getenv", {{}, {ReturnValueIndex}}) 4395ffd83dbSDimitry Andric .Case("gets", {{}, {0, ReturnValueIndex}}) 4405ffd83dbSDimitry Andric .Case("scanf", {{}, {}, VariadicType::Dst, 1}) 4415ffd83dbSDimitry Andric .Case("socket", {{}, 4425ffd83dbSDimitry Andric {ReturnValueIndex}, 4435ffd83dbSDimitry Andric VariadicType::None, 4440b57cec5SDimitry Andric InvalidArgIndex, 4455ffd83dbSDimitry Andric &TaintPropagationRule::postSocket}) 4465ffd83dbSDimitry Andric .Case("wgetch", {{}, {ReturnValueIndex}}) 4470b57cec5SDimitry Andric // Propagating functions 4485ffd83dbSDimitry Andric .Case("atoi", {{0}, {ReturnValueIndex}}) 4495ffd83dbSDimitry Andric .Case("atol", {{0}, {ReturnValueIndex}}) 4505ffd83dbSDimitry Andric .Case("atoll", {{0}, {ReturnValueIndex}}) 4515ffd83dbSDimitry Andric .Case("fgetc", {{0}, {ReturnValueIndex}}) 4525ffd83dbSDimitry Andric .Case("fgetln", {{0}, {ReturnValueIndex}}) 4535ffd83dbSDimitry Andric .Case("fgets", {{2}, {0, ReturnValueIndex}}) 4545ffd83dbSDimitry Andric .Case("fscanf", {{0}, {}, VariadicType::Dst, 2}) 4555ffd83dbSDimitry Andric .Case("sscanf", {{0}, {}, VariadicType::Dst, 2}) 4565ffd83dbSDimitry Andric .Case("getc", {{0}, {ReturnValueIndex}}) 4575ffd83dbSDimitry Andric .Case("getc_unlocked", {{0}, {ReturnValueIndex}}) 4585ffd83dbSDimitry Andric .Case("getdelim", {{3}, {0}}) 4595ffd83dbSDimitry Andric .Case("getline", {{2}, {0}}) 4605ffd83dbSDimitry Andric .Case("getw", {{0}, {ReturnValueIndex}}) 4615ffd83dbSDimitry Andric .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}}) 4625ffd83dbSDimitry Andric .Case("read", {{0, 2}, {1, ReturnValueIndex}}) 4635ffd83dbSDimitry Andric .Case("strchr", {{0}, {ReturnValueIndex}}) 4645ffd83dbSDimitry Andric .Case("strrchr", {{0}, {ReturnValueIndex}}) 4655ffd83dbSDimitry Andric .Case("tolower", {{0}, {ReturnValueIndex}}) 4665ffd83dbSDimitry Andric .Case("toupper", {{0}, {ReturnValueIndex}}) 4675ffd83dbSDimitry Andric .Default({}); 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric if (!Rule.isNull()) 4700b57cec5SDimitry Andric return Rule; 4715ffd83dbSDimitry Andric assert(FData.FDecl); 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric // Check if it's one of the memory setting/copying functions. 4740b57cec5SDimitry Andric // This check is specialized but faster then calling isCLibraryFunction. 475480093f4SDimitry Andric const FunctionDecl *FDecl = FData.FDecl; 4760b57cec5SDimitry Andric unsigned BId = 0; 4775ffd83dbSDimitry Andric if ((BId = FDecl->getMemoryFunctionKind())) { 4780b57cec5SDimitry Andric switch (BId) { 4790b57cec5SDimitry Andric case Builtin::BImemcpy: 4800b57cec5SDimitry Andric case Builtin::BImemmove: 4810b57cec5SDimitry Andric case Builtin::BIstrncpy: 4820b57cec5SDimitry Andric case Builtin::BIstrncat: 4835ffd83dbSDimitry Andric return {{1, 2}, {0, ReturnValueIndex}}; 4840b57cec5SDimitry Andric case Builtin::BIstrlcpy: 4850b57cec5SDimitry Andric case Builtin::BIstrlcat: 4865ffd83dbSDimitry Andric return {{1, 2}, {0}}; 4870b57cec5SDimitry Andric case Builtin::BIstrndup: 4885ffd83dbSDimitry Andric return {{0, 1}, {ReturnValueIndex}}; 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric default: 4910b57cec5SDimitry Andric break; 4925ffd83dbSDimitry Andric } 4935ffd83dbSDimitry Andric } 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric // Process all other functions which could be defined as builtins. 4960b57cec5SDimitry Andric if (Rule.isNull()) { 4975ffd83dbSDimitry Andric const auto OneOf = [FDecl](const auto &... Name) { 4985ffd83dbSDimitry Andric // FIXME: use fold expression in C++17 4995ffd83dbSDimitry Andric using unused = int[]; 5005ffd83dbSDimitry Andric bool ret = false; 5015ffd83dbSDimitry Andric static_cast<void>(unused{ 5025ffd83dbSDimitry Andric 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...}); 5035ffd83dbSDimitry Andric return ret; 5045ffd83dbSDimitry Andric }; 5055ffd83dbSDimitry Andric if (OneOf("snprintf")) 5065ffd83dbSDimitry Andric return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3}; 5075ffd83dbSDimitry Andric if (OneOf("sprintf")) 5085ffd83dbSDimitry Andric return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2}; 5095ffd83dbSDimitry Andric if (OneOf("strcpy", "stpcpy", "strcat")) 5105ffd83dbSDimitry Andric return {{1}, {0, ReturnValueIndex}}; 5115ffd83dbSDimitry Andric if (OneOf("bcopy")) 5125ffd83dbSDimitry Andric return {{0, 2}, {1}}; 5135ffd83dbSDimitry Andric if (OneOf("strdup", "strdupa", "wcsdup")) 5145ffd83dbSDimitry Andric return {{0}, {ReturnValueIndex}}; 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric 5175ffd83dbSDimitry Andric // Skipping the following functions, since they might be used for cleansing or 5185ffd83dbSDimitry Andric // smart memory copy: 5190b57cec5SDimitry Andric // - memccpy - copying until hitting a special character. 5200b57cec5SDimitry Andric 521480093f4SDimitry Andric auto It = findFunctionInConfig(CustomPropagations, FData); 5225ffd83dbSDimitry Andric if (It != CustomPropagations.end()) 5235ffd83dbSDimitry Andric return It->second.second; 5245ffd83dbSDimitry Andric return {}; 525480093f4SDimitry Andric } 526a7dea167SDimitry Andric 5275ffd83dbSDimitry Andric void GenericTaintChecker::checkPreCall(const CallEvent &Call, 5280b57cec5SDimitry Andric CheckerContext &C) const { 5295ffd83dbSDimitry Andric Optional<FunctionData> FData = FunctionData::create(Call, C); 530480093f4SDimitry Andric if (!FData) 531480093f4SDimitry Andric return; 532480093f4SDimitry Andric 5330b57cec5SDimitry Andric // Check for taintedness related errors first: system call, uncontrolled 5340b57cec5SDimitry Andric // format string, tainted buffer size. 5355ffd83dbSDimitry Andric if (checkPre(Call, *FData, C)) 5360b57cec5SDimitry Andric return; 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric // Marks the function's arguments and/or return value tainted if it present in 5390b57cec5SDimitry Andric // the list. 5405ffd83dbSDimitry Andric if (addSourcesPre(Call, *FData, C)) 541480093f4SDimitry Andric return; 542480093f4SDimitry Andric 5435ffd83dbSDimitry Andric addFiltersPre(Call, *FData, C); 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5465ffd83dbSDimitry Andric void GenericTaintChecker::checkPostCall(const CallEvent &Call, 5470b57cec5SDimitry Andric CheckerContext &C) const { 5480b57cec5SDimitry Andric // Set the marked values as tainted. The return value only accessible from 5490b57cec5SDimitry Andric // checkPostStmt. 5505ffd83dbSDimitry Andric propagateFromPre(Call, C); 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 5540b57cec5SDimitry Andric const char *NL, const char *Sep) const { 5550b57cec5SDimitry Andric printTaint(State, Out, NL, Sep); 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric 5585ffd83dbSDimitry Andric bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, 559480093f4SDimitry Andric const FunctionData &FData, 5600b57cec5SDimitry Andric CheckerContext &C) const { 5610b57cec5SDimitry Andric // First, try generating a propagation rule for this function. 562a7dea167SDimitry Andric TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 563480093f4SDimitry Andric this->CustomPropagations, FData, C); 5640b57cec5SDimitry Andric if (!Rule.isNull()) { 5655ffd83dbSDimitry Andric ProgramStateRef State = Rule.process(Call, C); 566480093f4SDimitry Andric if (State) { 5670b57cec5SDimitry Andric C.addTransition(State); 568480093f4SDimitry Andric return true; 569480093f4SDimitry Andric } 570480093f4SDimitry Andric } 571480093f4SDimitry Andric return false; 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 5745ffd83dbSDimitry Andric bool GenericTaintChecker::addFiltersPre(const CallEvent &Call, 575480093f4SDimitry Andric const FunctionData &FData, 576480093f4SDimitry Andric CheckerContext &C) const { 577480093f4SDimitry Andric auto It = findFunctionInConfig(CustomFilters, FData); 578480093f4SDimitry Andric if (It == CustomFilters.end()) 579480093f4SDimitry Andric return false; 580480093f4SDimitry Andric 581480093f4SDimitry Andric ProgramStateRef State = C.getState(); 582480093f4SDimitry Andric const auto &Value = It->second; 583480093f4SDimitry Andric const ArgVector &Args = Value.second; 584480093f4SDimitry Andric for (unsigned ArgNum : Args) { 5855ffd83dbSDimitry Andric if (ArgNum >= Call.getNumArgs()) 586480093f4SDimitry Andric continue; 587480093f4SDimitry Andric 5885ffd83dbSDimitry Andric const Expr *Arg = Call.getArgExpr(ArgNum); 5895ffd83dbSDimitry Andric Optional<SVal> V = getPointeeOf(C, Arg); 590480093f4SDimitry Andric if (V) 591480093f4SDimitry Andric State = removeTaint(State, *V); 592480093f4SDimitry Andric } 593480093f4SDimitry Andric 594480093f4SDimitry Andric if (State != C.getState()) { 5950b57cec5SDimitry Andric C.addTransition(State); 596480093f4SDimitry Andric return true; 597480093f4SDimitry Andric } 598480093f4SDimitry Andric return false; 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric 6015ffd83dbSDimitry Andric bool GenericTaintChecker::propagateFromPre(const CallEvent &Call, 6025ffd83dbSDimitry Andric CheckerContext &C) { 6030b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric // Depending on what was tainted at pre-visit, we determined a set of 6060b57cec5SDimitry Andric // arguments which should be tainted after the function returns. These are 6070b57cec5SDimitry Andric // stored in the state as TaintArgsOnPostVisit set. 6080b57cec5SDimitry Andric TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 6090b57cec5SDimitry Andric if (TaintArgs.isEmpty()) 6100b57cec5SDimitry Andric return false; 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric for (unsigned ArgNum : TaintArgs) { 6130b57cec5SDimitry Andric // Special handling for the tainted return value. 6140b57cec5SDimitry Andric if (ArgNum == ReturnValueIndex) { 6155ffd83dbSDimitry Andric State = addTaint(State, Call.getReturnValue()); 6160b57cec5SDimitry Andric continue; 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric // The arguments are pointer arguments. The data they are pointing at is 6200b57cec5SDimitry Andric // tainted after the call. 6215ffd83dbSDimitry Andric if (Call.getNumArgs() < (ArgNum + 1)) 6220b57cec5SDimitry Andric return false; 6235ffd83dbSDimitry Andric const Expr *Arg = Call.getArgExpr(ArgNum); 6245ffd83dbSDimitry Andric Optional<SVal> V = getPointeeOf(C, Arg); 6250b57cec5SDimitry Andric if (V) 6260b57cec5SDimitry Andric State = addTaint(State, *V); 6270b57cec5SDimitry Andric } 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric // Clear up the taint info from the state. 6300b57cec5SDimitry Andric State = State->remove<TaintArgsOnPostVisit>(); 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric if (State != C.getState()) { 6330b57cec5SDimitry Andric C.addTransition(State); 6340b57cec5SDimitry Andric return true; 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric return false; 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 6395ffd83dbSDimitry Andric bool GenericTaintChecker::checkPre(const CallEvent &Call, 640480093f4SDimitry Andric const FunctionData &FData, 6410b57cec5SDimitry Andric CheckerContext &C) const { 6425ffd83dbSDimitry Andric if (checkUncontrolledFormatString(Call, C)) 6430b57cec5SDimitry Andric return true; 6440b57cec5SDimitry Andric 6455ffd83dbSDimitry Andric if (checkSystemCall(Call, FData.Name, C)) 6460b57cec5SDimitry Andric return true; 6470b57cec5SDimitry Andric 6485ffd83dbSDimitry Andric if (checkTaintedBufferSize(Call, C)) 6490b57cec5SDimitry Andric return true; 6500b57cec5SDimitry Andric 6515ffd83dbSDimitry Andric return checkCustomSinks(Call, FData, C); 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric 6545ffd83dbSDimitry Andric Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C, 6550b57cec5SDimitry Andric const Expr *Arg) { 6560b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 6570b57cec5SDimitry Andric SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 6580b57cec5SDimitry Andric if (AddrVal.isUnknownOrUndef()) 6590b57cec5SDimitry Andric return None; 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 6620b57cec5SDimitry Andric if (!AddrLoc) 6630b57cec5SDimitry Andric return None; 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric QualType ArgTy = Arg->getType().getCanonicalType(); 6660b57cec5SDimitry Andric if (!ArgTy->isPointerType()) 667480093f4SDimitry Andric return State->getSVal(*AddrLoc); 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric QualType ValTy = ArgTy->getPointeeType(); 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric // Do not dereference void pointers. Treat them as byte pointers instead. 6720b57cec5SDimitry Andric // FIXME: we might want to consider more than just the first byte. 6730b57cec5SDimitry Andric if (ValTy->isVoidType()) 6740b57cec5SDimitry Andric ValTy = C.getASTContext().CharTy; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric return State->getSVal(*AddrLoc, ValTy); 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric ProgramStateRef 6805ffd83dbSDimitry Andric GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call, 6810b57cec5SDimitry Andric CheckerContext &C) const { 6820b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric // Check for taint in arguments. 6850b57cec5SDimitry Andric bool IsTainted = true; 6860b57cec5SDimitry Andric for (unsigned ArgNum : SrcArgs) { 6875ffd83dbSDimitry Andric if (ArgNum >= Call.getNumArgs()) 688a7dea167SDimitry Andric continue; 689a7dea167SDimitry Andric 6905ffd83dbSDimitry Andric if ((IsTainted = 6915ffd83dbSDimitry Andric isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) 6920b57cec5SDimitry Andric break; 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric // Check for taint in variadic arguments. 6960b57cec5SDimitry Andric if (!IsTainted && VariadicType::Src == VarType) { 6970b57cec5SDimitry Andric // Check if any of the arguments is tainted 6985ffd83dbSDimitry Andric for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 6995ffd83dbSDimitry Andric if ((IsTainted = 7005ffd83dbSDimitry Andric isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) 7010b57cec5SDimitry Andric break; 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric if (PropagationFunc) 7065ffd83dbSDimitry Andric IsTainted = PropagationFunc(IsTainted, Call, C); 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric if (!IsTainted) 7090b57cec5SDimitry Andric return State; 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric // Mark the arguments which should be tainted after the function returns. 7120b57cec5SDimitry Andric for (unsigned ArgNum : DstArgs) { 7130b57cec5SDimitry Andric // Should mark the return value? 7140b57cec5SDimitry Andric if (ArgNum == ReturnValueIndex) { 7150b57cec5SDimitry Andric State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 7160b57cec5SDimitry Andric continue; 7170b57cec5SDimitry Andric } 7180b57cec5SDimitry Andric 7195ffd83dbSDimitry Andric if (ArgNum >= Call.getNumArgs()) 720a7dea167SDimitry Andric continue; 721a7dea167SDimitry Andric 7220b57cec5SDimitry Andric // Mark the given argument. 7230b57cec5SDimitry Andric State = State->add<TaintArgsOnPostVisit>(ArgNum); 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric // Mark all variadic arguments tainted if present. 7270b57cec5SDimitry Andric if (VariadicType::Dst == VarType) { 7280b57cec5SDimitry Andric // For all pointer and references that were passed in: 7290b57cec5SDimitry Andric // If they are not pointing to const data, mark data as tainted. 7300b57cec5SDimitry Andric // TODO: So far we are just going one level down; ideally we'd need to 7310b57cec5SDimitry Andric // recurse here. 7325ffd83dbSDimitry Andric for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 7335ffd83dbSDimitry Andric const Expr *Arg = Call.getArgExpr(i); 7340b57cec5SDimitry Andric // Process pointer argument. 7350b57cec5SDimitry Andric const Type *ArgTy = Arg->getType().getTypePtr(); 7360b57cec5SDimitry Andric QualType PType = ArgTy->getPointeeType(); 7370b57cec5SDimitry Andric if ((!PType.isNull() && !PType.isConstQualified()) || 7385ffd83dbSDimitry Andric (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) { 7390b57cec5SDimitry Andric State = State->add<TaintArgsOnPostVisit>(i); 7400b57cec5SDimitry Andric } 7410b57cec5SDimitry Andric } 7425ffd83dbSDimitry Andric } 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric return State; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric // If argument 0(protocol domain) is network, the return value should get taint. 7485ffd83dbSDimitry Andric bool GenericTaintChecker::TaintPropagationRule::postSocket( 7495ffd83dbSDimitry Andric bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { 7505ffd83dbSDimitry Andric SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 7510b57cec5SDimitry Andric StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 7520b57cec5SDimitry Andric // White list the internal communication protocols. 7530b57cec5SDimitry Andric if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 7540b57cec5SDimitry Andric DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 7550b57cec5SDimitry Andric return false; 7560b57cec5SDimitry Andric return true; 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric 7590b57cec5SDimitry Andric bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 7600b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 7610b57cec5SDimitry Andric SVal Val = C.getSVal(E); 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // stdin is a pointer, so it would be a region. 7640b57cec5SDimitry Andric const MemRegion *MemReg = Val.getAsRegion(); 7650b57cec5SDimitry Andric 7660b57cec5SDimitry Andric // The region should be symbolic, we do not know it's value. 7675ffd83dbSDimitry Andric const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 7680b57cec5SDimitry Andric if (!SymReg) 7690b57cec5SDimitry Andric return false; 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric // Get it's symbol and find the declaration region it's pointing to. 7725ffd83dbSDimitry Andric const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 7730b57cec5SDimitry Andric if (!Sm) 7740b57cec5SDimitry Andric return false; 7755ffd83dbSDimitry Andric const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 7760b57cec5SDimitry Andric if (!DeclReg) 7770b57cec5SDimitry Andric return false; 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric // This region corresponds to a declaration, find out if it's a global/extern 7800b57cec5SDimitry Andric // variable named stdin with the proper type. 7810b57cec5SDimitry Andric if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 7820b57cec5SDimitry Andric D = D->getCanonicalDecl(); 7830b57cec5SDimitry Andric if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 7840b57cec5SDimitry Andric const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 7850b57cec5SDimitry Andric if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 7860b57cec5SDimitry Andric C.getASTContext().getFILEType().getCanonicalType()) 7870b57cec5SDimitry Andric return true; 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric return false; 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric 7935ffd83dbSDimitry Andric static bool getPrintfFormatArgumentNum(const CallEvent &Call, 7940b57cec5SDimitry Andric const CheckerContext &C, 795a7dea167SDimitry Andric unsigned &ArgNum) { 7960b57cec5SDimitry Andric // Find if the function contains a format string argument. 7970b57cec5SDimitry Andric // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 7980b57cec5SDimitry Andric // vsnprintf, syslog, custom annotated functions. 7995ffd83dbSDimitry Andric const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 8000b57cec5SDimitry Andric if (!FDecl) 8010b57cec5SDimitry Andric return false; 8020b57cec5SDimitry Andric for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 8030b57cec5SDimitry Andric ArgNum = Format->getFormatIdx() - 1; 8045ffd83dbSDimitry Andric if ((Format->getType()->getName() == "printf") && 8055ffd83dbSDimitry Andric Call.getNumArgs() > ArgNum) 8060b57cec5SDimitry Andric return true; 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric // Or if a function is named setproctitle (this is a heuristic). 8105ffd83dbSDimitry Andric if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) { 8110b57cec5SDimitry Andric ArgNum = 0; 8120b57cec5SDimitry Andric return true; 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric return false; 8160b57cec5SDimitry Andric } 8170b57cec5SDimitry Andric 818a7dea167SDimitry Andric bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 8190b57cec5SDimitry Andric CheckerContext &C) const { 8200b57cec5SDimitry Andric assert(E); 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric // Check for taint. 8230b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 8245ffd83dbSDimitry Andric Optional<SVal> PointedToSVal = getPointeeOf(C, E); 8250b57cec5SDimitry Andric SVal TaintedSVal; 8260b57cec5SDimitry Andric if (PointedToSVal && isTainted(State, *PointedToSVal)) 8270b57cec5SDimitry Andric TaintedSVal = *PointedToSVal; 8280b57cec5SDimitry Andric else if (isTainted(State, E, C.getLocationContext())) 8290b57cec5SDimitry Andric TaintedSVal = C.getSVal(E); 8300b57cec5SDimitry Andric else 8310b57cec5SDimitry Andric return false; 8320b57cec5SDimitry Andric 8330b57cec5SDimitry Andric // Generate diagnostic. 8340b57cec5SDimitry Andric if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 8350b57cec5SDimitry Andric initBugType(); 836a7dea167SDimitry Andric auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 8370b57cec5SDimitry Andric report->addRange(E->getSourceRange()); 838a7dea167SDimitry Andric report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 8390b57cec5SDimitry Andric C.emitReport(std::move(report)); 8400b57cec5SDimitry Andric return true; 8410b57cec5SDimitry Andric } 8420b57cec5SDimitry Andric return false; 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric bool GenericTaintChecker::checkUncontrolledFormatString( 8465ffd83dbSDimitry Andric const CallEvent &Call, CheckerContext &C) const { 8470b57cec5SDimitry Andric // Check if the function contains a format string argument. 848a7dea167SDimitry Andric unsigned ArgNum = 0; 8495ffd83dbSDimitry Andric if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 8500b57cec5SDimitry Andric return false; 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andric // If either the format string content or the pointer itself are tainted, 8530b57cec5SDimitry Andric // warn. 8545ffd83dbSDimitry Andric return generateReportIfTainted(Call.getArgExpr(ArgNum), 8550b57cec5SDimitry Andric MsgUncontrolledFormatString, C); 8560b57cec5SDimitry Andric } 8570b57cec5SDimitry Andric 8585ffd83dbSDimitry Andric bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name, 8590b57cec5SDimitry Andric CheckerContext &C) const { 8600b57cec5SDimitry Andric // TODO: It might make sense to run this check on demand. In some cases, 8610b57cec5SDimitry Andric // we should check if the environment has been cleansed here. We also might 8620b57cec5SDimitry Andric // need to know if the user was reset before these calls(seteuid). 8630b57cec5SDimitry Andric unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 8640b57cec5SDimitry Andric .Case("system", 0) 8650b57cec5SDimitry Andric .Case("popen", 0) 8660b57cec5SDimitry Andric .Case("execl", 0) 8670b57cec5SDimitry Andric .Case("execle", 0) 8680b57cec5SDimitry Andric .Case("execlp", 0) 8690b57cec5SDimitry Andric .Case("execv", 0) 8700b57cec5SDimitry Andric .Case("execvp", 0) 8710b57cec5SDimitry Andric .Case("execvP", 0) 8720b57cec5SDimitry Andric .Case("execve", 0) 8730b57cec5SDimitry Andric .Case("dlopen", 0) 874a7dea167SDimitry Andric .Default(InvalidArgIndex); 8750b57cec5SDimitry Andric 8765ffd83dbSDimitry Andric if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) 8770b57cec5SDimitry Andric return false; 8780b57cec5SDimitry Andric 8795ffd83dbSDimitry Andric return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, 8805ffd83dbSDimitry Andric C); 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric // TODO: Should this check be a part of the CString checker? 8840b57cec5SDimitry Andric // If yes, should taint be a global setting? 8855ffd83dbSDimitry Andric bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call, 8860b57cec5SDimitry Andric CheckerContext &C) const { 8875ffd83dbSDimitry Andric const auto *FDecl = Call.getDecl()->getAsFunction(); 8880b57cec5SDimitry Andric // If the function has a buffer size argument, set ArgNum. 8890b57cec5SDimitry Andric unsigned ArgNum = InvalidArgIndex; 8900b57cec5SDimitry Andric unsigned BId = 0; 8915ffd83dbSDimitry Andric if ((BId = FDecl->getMemoryFunctionKind())) { 8920b57cec5SDimitry Andric switch (BId) { 8930b57cec5SDimitry Andric case Builtin::BImemcpy: 8940b57cec5SDimitry Andric case Builtin::BImemmove: 8950b57cec5SDimitry Andric case Builtin::BIstrncpy: 8960b57cec5SDimitry Andric ArgNum = 2; 8970b57cec5SDimitry Andric break; 8980b57cec5SDimitry Andric case Builtin::BIstrndup: 8990b57cec5SDimitry Andric ArgNum = 1; 9000b57cec5SDimitry Andric break; 9010b57cec5SDimitry Andric default: 9020b57cec5SDimitry Andric break; 9035ffd83dbSDimitry Andric } 9045ffd83dbSDimitry Andric } 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric if (ArgNum == InvalidArgIndex) { 9075ffd83dbSDimitry Andric using CCtx = CheckerContext; 9085ffd83dbSDimitry Andric if (CCtx::isCLibraryFunction(FDecl, "malloc") || 9095ffd83dbSDimitry Andric CCtx::isCLibraryFunction(FDecl, "calloc") || 9105ffd83dbSDimitry Andric CCtx::isCLibraryFunction(FDecl, "alloca")) 9110b57cec5SDimitry Andric ArgNum = 0; 9125ffd83dbSDimitry Andric else if (CCtx::isCLibraryFunction(FDecl, "memccpy")) 9130b57cec5SDimitry Andric ArgNum = 3; 9145ffd83dbSDimitry Andric else if (CCtx::isCLibraryFunction(FDecl, "realloc")) 9150b57cec5SDimitry Andric ArgNum = 1; 9165ffd83dbSDimitry Andric else if (CCtx::isCLibraryFunction(FDecl, "bcopy")) 9170b57cec5SDimitry Andric ArgNum = 2; 9180b57cec5SDimitry Andric } 9190b57cec5SDimitry Andric 9205ffd83dbSDimitry Andric return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && 9215ffd83dbSDimitry Andric generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, 9225ffd83dbSDimitry Andric C); 9230b57cec5SDimitry Andric } 9240b57cec5SDimitry Andric 9255ffd83dbSDimitry Andric bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, 926480093f4SDimitry Andric const FunctionData &FData, 927a7dea167SDimitry Andric CheckerContext &C) const { 928480093f4SDimitry Andric auto It = findFunctionInConfig(CustomSinks, FData); 929a7dea167SDimitry Andric if (It == CustomSinks.end()) 930a7dea167SDimitry Andric return false; 931a7dea167SDimitry Andric 932480093f4SDimitry Andric const auto &Value = It->second; 933480093f4SDimitry Andric const GenericTaintChecker::ArgVector &Args = Value.second; 934a7dea167SDimitry Andric for (unsigned ArgNum : Args) { 9355ffd83dbSDimitry Andric if (ArgNum >= Call.getNumArgs()) 936a7dea167SDimitry Andric continue; 937a7dea167SDimitry Andric 9385ffd83dbSDimitry Andric if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) 939a7dea167SDimitry Andric return true; 940a7dea167SDimitry Andric } 941a7dea167SDimitry Andric 942a7dea167SDimitry Andric return false; 943a7dea167SDimitry Andric } 944a7dea167SDimitry Andric 945a7dea167SDimitry Andric void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 946a7dea167SDimitry Andric auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 947a7dea167SDimitry Andric std::string Option{"Config"}; 948a7dea167SDimitry Andric StringRef ConfigFile = 949a7dea167SDimitry Andric Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 950a7dea167SDimitry Andric llvm::Optional<TaintConfig> Config = 951a7dea167SDimitry Andric getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 952a7dea167SDimitry Andric if (Config) 953a7dea167SDimitry Andric Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 9540b57cec5SDimitry Andric } 9550b57cec5SDimitry Andric 9565ffd83dbSDimitry Andric bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 9570b57cec5SDimitry Andric return true; 9580b57cec5SDimitry Andric } 959