10b57cec5SDimitry Andric //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This checker defines the attack surface for generic taint propagation. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // The taint information produced by it might be useful to other checkers. For 120b57cec5SDimitry Andric // example, checkers should report errors which involve tainted data more 130b57cec5SDimitry Andric // aggressively, even if the involved symbols are under constrained. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "Taint.h" 18a7dea167SDimitry Andric #include "Yaml.h" 190b57cec5SDimitry Andric #include "clang/AST/Attr.h" 200b57cec5SDimitry Andric #include "clang/Basic/Builtins.h" 21a7dea167SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 230b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h" 240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25*04eeddc0SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 265ffd83dbSDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 270b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 280b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 29a7dea167SDimitry Andric #include "llvm/Support/YAMLTraits.h" 305ffd83dbSDimitry Andric 31a7dea167SDimitry Andric #include <limits> 325ffd83dbSDimitry Andric #include <memory> 330b57cec5SDimitry Andric #include <utility> 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric using namespace clang; 360b57cec5SDimitry Andric using namespace ento; 370b57cec5SDimitry Andric using namespace taint; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric namespace { 40*04eeddc0SDimitry Andric 41*04eeddc0SDimitry Andric class GenericTaintChecker; 42*04eeddc0SDimitry Andric 43*04eeddc0SDimitry Andric /// Check for CWE-134: Uncontrolled Format String. 44*04eeddc0SDimitry Andric constexpr llvm::StringLiteral MsgUncontrolledFormatString = 45*04eeddc0SDimitry Andric "Untrusted data is used as a format string " 46*04eeddc0SDimitry Andric "(CWE-134: Uncontrolled Format String)"; 47*04eeddc0SDimitry Andric 48*04eeddc0SDimitry Andric /// Check for: 49*04eeddc0SDimitry Andric /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 50*04eeddc0SDimitry Andric /// CWE-78, "Failure to Sanitize Data into an OS Command" 51*04eeddc0SDimitry Andric constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 52*04eeddc0SDimitry Andric "Untrusted data is passed to a system call " 53*04eeddc0SDimitry Andric "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 54*04eeddc0SDimitry Andric 55*04eeddc0SDimitry Andric /// Check if tainted data is used as a buffer size in strn.. functions, 56*04eeddc0SDimitry Andric /// and allocators. 57*04eeddc0SDimitry Andric constexpr llvm::StringLiteral MsgTaintedBufferSize = 58*04eeddc0SDimitry Andric "Untrusted data is used to specify the buffer size " 59*04eeddc0SDimitry Andric "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 60*04eeddc0SDimitry Andric "for character data and the null terminator)"; 61*04eeddc0SDimitry Andric 62*04eeddc0SDimitry Andric /// Check if tainted data is used as a custom sink's parameter. 63*04eeddc0SDimitry Andric constexpr llvm::StringLiteral MsgCustomSink = 64*04eeddc0SDimitry Andric "Untrusted data is passed to a user-defined sink"; 65*04eeddc0SDimitry Andric 66*04eeddc0SDimitry Andric using ArgIdxTy = int; 67*04eeddc0SDimitry Andric using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; 68*04eeddc0SDimitry Andric 69*04eeddc0SDimitry Andric /// Denotes the return value. 70*04eeddc0SDimitry Andric constexpr ArgIdxTy ReturnValueIndex{-1}; 71*04eeddc0SDimitry Andric 72*04eeddc0SDimitry Andric static ArgIdxTy fromArgumentCount(unsigned Count) { 73*04eeddc0SDimitry Andric assert(Count <= 74*04eeddc0SDimitry Andric static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && 75*04eeddc0SDimitry Andric "ArgIdxTy is not large enough to represent the number of arguments."); 76*04eeddc0SDimitry Andric return Count; 77*04eeddc0SDimitry Andric } 78*04eeddc0SDimitry Andric 79*04eeddc0SDimitry Andric /// Check if the region the expression evaluates to is the standard input, 80*04eeddc0SDimitry Andric /// and thus, is tainted. 81*04eeddc0SDimitry Andric /// FIXME: Move this to Taint.cpp. 82*04eeddc0SDimitry Andric bool isStdin(SVal Val, const ASTContext &ACtx) { 83*04eeddc0SDimitry Andric // FIXME: What if Val is NonParamVarRegion? 84*04eeddc0SDimitry Andric 85*04eeddc0SDimitry Andric // The region should be symbolic, we do not know it's value. 86*04eeddc0SDimitry Andric const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion()); 87*04eeddc0SDimitry Andric if (!SymReg) 88*04eeddc0SDimitry Andric return false; 89*04eeddc0SDimitry Andric 90*04eeddc0SDimitry Andric // Get it's symbol and find the declaration region it's pointing to. 91*04eeddc0SDimitry Andric const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 92*04eeddc0SDimitry Andric if (!Sm) 93*04eeddc0SDimitry Andric return false; 94*04eeddc0SDimitry Andric const auto *DeclReg = dyn_cast<DeclRegion>(Sm->getRegion()); 95*04eeddc0SDimitry Andric if (!DeclReg) 96*04eeddc0SDimitry Andric return false; 97*04eeddc0SDimitry Andric 98*04eeddc0SDimitry Andric // This region corresponds to a declaration, find out if it's a global/extern 99*04eeddc0SDimitry Andric // variable named stdin with the proper type. 100*04eeddc0SDimitry Andric if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 101*04eeddc0SDimitry Andric D = D->getCanonicalDecl(); 102*04eeddc0SDimitry Andric // FIXME: This should look for an exact match. 103*04eeddc0SDimitry Andric if (D->getName().contains("stdin") && D->isExternC()) { 104*04eeddc0SDimitry Andric const QualType FILETy = ACtx.getFILEType().getCanonicalType(); 105*04eeddc0SDimitry Andric const QualType Ty = D->getType().getCanonicalType(); 106*04eeddc0SDimitry Andric 107*04eeddc0SDimitry Andric if (Ty->isPointerType()) 108*04eeddc0SDimitry Andric return Ty->getPointeeType() == FILETy; 109*04eeddc0SDimitry Andric } 110*04eeddc0SDimitry Andric } 111*04eeddc0SDimitry Andric return false; 112*04eeddc0SDimitry Andric } 113*04eeddc0SDimitry Andric 114*04eeddc0SDimitry Andric SVal getPointeeOf(const CheckerContext &C, Loc LValue) { 115*04eeddc0SDimitry Andric const QualType ArgTy = LValue.getType(C.getASTContext()); 116*04eeddc0SDimitry Andric if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) 117*04eeddc0SDimitry Andric return C.getState()->getSVal(LValue); 118*04eeddc0SDimitry Andric 119*04eeddc0SDimitry Andric // Do not dereference void pointers. Treat them as byte pointers instead. 120*04eeddc0SDimitry Andric // FIXME: we might want to consider more than just the first byte. 121*04eeddc0SDimitry Andric return C.getState()->getSVal(LValue, C.getASTContext().CharTy); 122*04eeddc0SDimitry Andric } 123*04eeddc0SDimitry Andric 124*04eeddc0SDimitry Andric /// Given a pointer/reference argument, return the value it refers to. 125*04eeddc0SDimitry Andric Optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) { 126*04eeddc0SDimitry Andric if (auto LValue = Arg.getAs<Loc>()) 127*04eeddc0SDimitry Andric return getPointeeOf(C, *LValue); 128*04eeddc0SDimitry Andric return None; 129*04eeddc0SDimitry Andric } 130*04eeddc0SDimitry Andric 131*04eeddc0SDimitry Andric /// Given a pointer, return the SVal of its pointee or if it is tainted, 132*04eeddc0SDimitry Andric /// otherwise return the pointer's SVal if tainted. 133*04eeddc0SDimitry Andric /// Also considers stdin as a taint source. 134*04eeddc0SDimitry Andric Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) { 135*04eeddc0SDimitry Andric const ProgramStateRef State = C.getState(); 136*04eeddc0SDimitry Andric 137*04eeddc0SDimitry Andric if (auto Pointee = getPointeeOf(C, Arg)) 138*04eeddc0SDimitry Andric if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; 139*04eeddc0SDimitry Andric return Pointee; 140*04eeddc0SDimitry Andric 141*04eeddc0SDimitry Andric if (isTainted(State, Arg)) 142*04eeddc0SDimitry Andric return Arg; 143*04eeddc0SDimitry Andric 144*04eeddc0SDimitry Andric // FIXME: This should be done by the isTainted() API. 145*04eeddc0SDimitry Andric if (isStdin(Arg, C.getASTContext())) 146*04eeddc0SDimitry Andric return Arg; 147*04eeddc0SDimitry Andric 148*04eeddc0SDimitry Andric return None; 149*04eeddc0SDimitry Andric } 150*04eeddc0SDimitry Andric 151*04eeddc0SDimitry Andric bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, 152*04eeddc0SDimitry Andric CheckerContext &C) { 153*04eeddc0SDimitry Andric return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue(); 154*04eeddc0SDimitry Andric } 155*04eeddc0SDimitry Andric 156*04eeddc0SDimitry Andric /// ArgSet is used to describe arguments relevant for taint detection or 157*04eeddc0SDimitry Andric /// taint application. A discrete set of argument indexes and a variadic 158*04eeddc0SDimitry Andric /// argument list signified by a starting index are supported. 159*04eeddc0SDimitry Andric class ArgSet { 160*04eeddc0SDimitry Andric public: 161*04eeddc0SDimitry Andric ArgSet() = default; 162*04eeddc0SDimitry Andric ArgSet(ArgVecTy &&DiscreteArgs, Optional<ArgIdxTy> VariadicIndex = None) 163*04eeddc0SDimitry Andric : DiscreteArgs(std::move(DiscreteArgs)), 164*04eeddc0SDimitry Andric VariadicIndex(std::move(VariadicIndex)) {} 165*04eeddc0SDimitry Andric 166*04eeddc0SDimitry Andric bool contains(ArgIdxTy ArgIdx) const { 167*04eeddc0SDimitry Andric if (llvm::is_contained(DiscreteArgs, ArgIdx)) 168*04eeddc0SDimitry Andric return true; 169*04eeddc0SDimitry Andric 170*04eeddc0SDimitry Andric return VariadicIndex && ArgIdx >= *VariadicIndex; 171*04eeddc0SDimitry Andric } 172*04eeddc0SDimitry Andric 173*04eeddc0SDimitry Andric bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } 174*04eeddc0SDimitry Andric 175*04eeddc0SDimitry Andric ArgVecTy ArgsUpTo(ArgIdxTy LastArgIdx) const { 176*04eeddc0SDimitry Andric ArgVecTy Args; 177*04eeddc0SDimitry Andric for (ArgIdxTy I = ReturnValueIndex; I <= LastArgIdx; ++I) { 178*04eeddc0SDimitry Andric if (contains(I)) 179*04eeddc0SDimitry Andric Args.push_back(I); 180*04eeddc0SDimitry Andric } 181*04eeddc0SDimitry Andric return Args; 182*04eeddc0SDimitry Andric } 183*04eeddc0SDimitry Andric 184*04eeddc0SDimitry Andric private: 185*04eeddc0SDimitry Andric ArgVecTy DiscreteArgs; 186*04eeddc0SDimitry Andric Optional<ArgIdxTy> VariadicIndex; 187*04eeddc0SDimitry Andric }; 188*04eeddc0SDimitry Andric 189*04eeddc0SDimitry Andric /// A struct used to specify taint propagation rules for a function. 190*04eeddc0SDimitry Andric /// 191*04eeddc0SDimitry Andric /// If any of the possible taint source arguments is tainted, all of the 192*04eeddc0SDimitry Andric /// destination arguments should also be tainted. If ReturnValueIndex is added 193*04eeddc0SDimitry Andric /// to the dst list, the return value will be tainted. 194*04eeddc0SDimitry Andric class GenericTaintRule { 195*04eeddc0SDimitry Andric /// Arguments which are taints sinks and should be checked, and a report 196*04eeddc0SDimitry Andric /// should be emitted if taint reaches these. 197*04eeddc0SDimitry Andric ArgSet SinkArgs; 198*04eeddc0SDimitry Andric /// Arguments which should be sanitized on function return. 199*04eeddc0SDimitry Andric ArgSet FilterArgs; 200*04eeddc0SDimitry Andric /// Arguments which can participate in taint propagationa. If any of the 201*04eeddc0SDimitry Andric /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should 202*04eeddc0SDimitry Andric /// be tainted. 203*04eeddc0SDimitry Andric ArgSet PropSrcArgs; 204*04eeddc0SDimitry Andric ArgSet PropDstArgs; 205*04eeddc0SDimitry Andric 206*04eeddc0SDimitry Andric /// A message that explains why the call is sensitive to taint. 207*04eeddc0SDimitry Andric Optional<StringRef> SinkMsg; 208*04eeddc0SDimitry Andric 209*04eeddc0SDimitry Andric GenericTaintRule() = default; 210*04eeddc0SDimitry Andric 211*04eeddc0SDimitry Andric GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, 212*04eeddc0SDimitry Andric Optional<StringRef> SinkMsg = None) 213*04eeddc0SDimitry Andric : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), 214*04eeddc0SDimitry Andric PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), 215*04eeddc0SDimitry Andric SinkMsg(SinkMsg) {} 216*04eeddc0SDimitry Andric 217*04eeddc0SDimitry Andric public: 218*04eeddc0SDimitry Andric /// Make a rule that reports a warning if taint reaches any of \p FilterArgs 219*04eeddc0SDimitry Andric /// arguments. 220*04eeddc0SDimitry Andric static GenericTaintRule Sink(ArgSet &&SinkArgs, 221*04eeddc0SDimitry Andric Optional<StringRef> Msg = None) { 222*04eeddc0SDimitry Andric return {std::move(SinkArgs), {}, {}, {}, Msg}; 223*04eeddc0SDimitry Andric } 224*04eeddc0SDimitry Andric 225*04eeddc0SDimitry Andric /// Make a rule that sanitizes all FilterArgs arguments. 226*04eeddc0SDimitry Andric static GenericTaintRule Filter(ArgSet &&FilterArgs) { 227*04eeddc0SDimitry Andric return {{}, std::move(FilterArgs), {}, {}}; 228*04eeddc0SDimitry Andric } 229*04eeddc0SDimitry Andric 230*04eeddc0SDimitry Andric /// Make a rule that unconditionally taints all Args. 231*04eeddc0SDimitry Andric /// If Func is provided, it must also return true for taint to propagate. 232*04eeddc0SDimitry Andric static GenericTaintRule Source(ArgSet &&SourceArgs) { 233*04eeddc0SDimitry Andric return {{}, {}, {}, std::move(SourceArgs)}; 234*04eeddc0SDimitry Andric } 235*04eeddc0SDimitry Andric 236*04eeddc0SDimitry Andric /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 237*04eeddc0SDimitry Andric static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { 238*04eeddc0SDimitry Andric return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; 239*04eeddc0SDimitry Andric } 240*04eeddc0SDimitry Andric 241*04eeddc0SDimitry Andric /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 242*04eeddc0SDimitry Andric static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, 243*04eeddc0SDimitry Andric ArgSet &&DstArgs, 244*04eeddc0SDimitry Andric Optional<StringRef> Msg = None) { 245*04eeddc0SDimitry Andric return { 246*04eeddc0SDimitry Andric std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; 247*04eeddc0SDimitry Andric } 248*04eeddc0SDimitry Andric 249*04eeddc0SDimitry Andric /// Process a function which could either be a taint source, a taint sink, a 250*04eeddc0SDimitry Andric /// taint filter or a taint propagator. 251*04eeddc0SDimitry Andric void process(const GenericTaintChecker &Checker, const CallEvent &Call, 252*04eeddc0SDimitry Andric CheckerContext &C) const; 253*04eeddc0SDimitry Andric 254*04eeddc0SDimitry Andric /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. 255*04eeddc0SDimitry Andric static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { 256*04eeddc0SDimitry Andric return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() 257*04eeddc0SDimitry Andric : Call.getArgExpr(ArgIdx); 258*04eeddc0SDimitry Andric }; 259*04eeddc0SDimitry Andric 260*04eeddc0SDimitry Andric /// Functions for custom taintedness propagation. 261*04eeddc0SDimitry Andric static bool UntrustedEnv(CheckerContext &C); 262*04eeddc0SDimitry Andric }; 263*04eeddc0SDimitry Andric 264*04eeddc0SDimitry Andric using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; 265*04eeddc0SDimitry Andric 266*04eeddc0SDimitry Andric /// Used to parse the configuration file. 267*04eeddc0SDimitry Andric struct TaintConfiguration { 268*04eeddc0SDimitry Andric using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; 269*04eeddc0SDimitry Andric enum class VariadicType { None, Src, Dst }; 270*04eeddc0SDimitry Andric 271*04eeddc0SDimitry Andric struct Common { 272*04eeddc0SDimitry Andric std::string Name; 273*04eeddc0SDimitry Andric std::string Scope; 274*04eeddc0SDimitry Andric }; 275*04eeddc0SDimitry Andric 276*04eeddc0SDimitry Andric struct Sink : Common { 277*04eeddc0SDimitry Andric ArgVecTy SinkArgs; 278*04eeddc0SDimitry Andric }; 279*04eeddc0SDimitry Andric 280*04eeddc0SDimitry Andric struct Filter : Common { 281*04eeddc0SDimitry Andric ArgVecTy FilterArgs; 282*04eeddc0SDimitry Andric }; 283*04eeddc0SDimitry Andric 284*04eeddc0SDimitry Andric struct Propagation : Common { 285*04eeddc0SDimitry Andric ArgVecTy SrcArgs; 286*04eeddc0SDimitry Andric ArgVecTy DstArgs; 287*04eeddc0SDimitry Andric VariadicType VarType; 288*04eeddc0SDimitry Andric ArgIdxTy VarIndex; 289*04eeddc0SDimitry Andric }; 290*04eeddc0SDimitry Andric 291*04eeddc0SDimitry Andric std::vector<Propagation> Propagations; 292*04eeddc0SDimitry Andric std::vector<Filter> Filters; 293*04eeddc0SDimitry Andric std::vector<Sink> Sinks; 294*04eeddc0SDimitry Andric 295*04eeddc0SDimitry Andric TaintConfiguration() = default; 296*04eeddc0SDimitry Andric TaintConfiguration(const TaintConfiguration &) = default; 297*04eeddc0SDimitry Andric TaintConfiguration(TaintConfiguration &&) = default; 298*04eeddc0SDimitry Andric TaintConfiguration &operator=(const TaintConfiguration &) = default; 299*04eeddc0SDimitry Andric TaintConfiguration &operator=(TaintConfiguration &&) = default; 300*04eeddc0SDimitry Andric }; 301*04eeddc0SDimitry Andric 302*04eeddc0SDimitry Andric struct GenericTaintRuleParser { 303*04eeddc0SDimitry Andric GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} 304*04eeddc0SDimitry Andric /// Container type used to gather call identification objects grouped into 305*04eeddc0SDimitry Andric /// pairs with their corresponding taint rules. It is temporary as it is used 306*04eeddc0SDimitry Andric /// to finally initialize RuleLookupTy, which is considered to be immutable. 307*04eeddc0SDimitry Andric using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; 308*04eeddc0SDimitry Andric RulesContTy parseConfiguration(const std::string &Option, 309*04eeddc0SDimitry Andric TaintConfiguration &&Config) const; 310*04eeddc0SDimitry Andric 311*04eeddc0SDimitry Andric private: 312*04eeddc0SDimitry Andric using NamePartsTy = llvm::SmallVector<SmallString<32>, 2>; 313*04eeddc0SDimitry Andric 314*04eeddc0SDimitry Andric /// Validate part of the configuration, which contains a list of argument 315*04eeddc0SDimitry Andric /// indexes. 316*04eeddc0SDimitry Andric void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; 317*04eeddc0SDimitry Andric 318*04eeddc0SDimitry Andric template <typename Config> static NamePartsTy parseNameParts(const Config &C); 319*04eeddc0SDimitry Andric 320*04eeddc0SDimitry Andric // Takes the config and creates a CallDescription for it and associates a Rule 321*04eeddc0SDimitry Andric // with that. 322*04eeddc0SDimitry Andric template <typename Config> 323*04eeddc0SDimitry Andric static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, 324*04eeddc0SDimitry Andric RulesContTy &Rules); 325*04eeddc0SDimitry Andric 326*04eeddc0SDimitry Andric void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, 327*04eeddc0SDimitry Andric RulesContTy &Rules) const; 328*04eeddc0SDimitry Andric void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, 329*04eeddc0SDimitry Andric RulesContTy &Rules) const; 330*04eeddc0SDimitry Andric void parseConfig(const std::string &Option, 331*04eeddc0SDimitry Andric TaintConfiguration::Propagation &&P, 332*04eeddc0SDimitry Andric RulesContTy &Rules) const; 333*04eeddc0SDimitry Andric 334*04eeddc0SDimitry Andric CheckerManager &Mgr; 335*04eeddc0SDimitry Andric }; 336*04eeddc0SDimitry Andric 3375ffd83dbSDimitry Andric class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 3380b57cec5SDimitry Andric public: 3390b57cec5SDimitry Andric static void *getTag() { 3400b57cec5SDimitry Andric static int Tag; 3410b57cec5SDimitry Andric return &Tag; 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3445ffd83dbSDimitry Andric void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 3455ffd83dbSDimitry Andric void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 3460b57cec5SDimitry Andric 347a7dea167SDimitry Andric void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 348a7dea167SDimitry Andric const char *Sep) const override; 349a7dea167SDimitry Andric 350a7dea167SDimitry Andric /// Generate a report if the expression is tainted or points to tainted data. 351a7dea167SDimitry Andric bool generateReportIfTainted(const Expr *E, StringRef Msg, 352a7dea167SDimitry Andric CheckerContext &C) const; 353a7dea167SDimitry Andric 354*04eeddc0SDimitry Andric private: 355*04eeddc0SDimitry Andric const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; 356480093f4SDimitry Andric 357*04eeddc0SDimitry Andric bool checkUncontrolledFormatString(const CallEvent &Call, 358*04eeddc0SDimitry Andric CheckerContext &C) const; 3590b57cec5SDimitry Andric 360*04eeddc0SDimitry Andric void taintUnsafeSocketProtocol(const CallEvent &Call, 361*04eeddc0SDimitry Andric CheckerContext &C) const; 3620b57cec5SDimitry Andric 363*04eeddc0SDimitry Andric /// Default taint rules are initilized with the help of a CheckerContext to 364*04eeddc0SDimitry Andric /// access the names of built-in functions like memcpy. 365*04eeddc0SDimitry Andric void initTaintRules(CheckerContext &C) const; 3660b57cec5SDimitry Andric 367*04eeddc0SDimitry Andric /// CallDescription currently cannot restrict matches to the global namespace 368*04eeddc0SDimitry Andric /// only, which is why multiple CallDescriptionMaps are used, as we want to 369*04eeddc0SDimitry Andric /// disambiguate global C functions from functions inside user-defined 370*04eeddc0SDimitry Andric /// namespaces. 371*04eeddc0SDimitry Andric // TODO: Remove separation to simplify matching logic once CallDescriptions 372*04eeddc0SDimitry Andric // are more expressive. 3730b57cec5SDimitry Andric 374*04eeddc0SDimitry Andric mutable Optional<RuleLookupTy> StaticTaintRules; 375*04eeddc0SDimitry Andric mutable Optional<RuleLookupTy> DynamicTaintRules; 3760b57cec5SDimitry Andric }; 3770b57cec5SDimitry Andric } // end of anonymous namespace 3780b57cec5SDimitry Andric 379*04eeddc0SDimitry Andric /// YAML serialization mapping. 380*04eeddc0SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) 381*04eeddc0SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) 382*04eeddc0SDimitry Andric LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) 383a7dea167SDimitry Andric 384a7dea167SDimitry Andric namespace llvm { 385a7dea167SDimitry Andric namespace yaml { 386*04eeddc0SDimitry Andric template <> struct MappingTraits<TaintConfiguration> { 387*04eeddc0SDimitry Andric static void mapping(IO &IO, TaintConfiguration &Config) { 388a7dea167SDimitry Andric IO.mapOptional("Propagations", Config.Propagations); 389a7dea167SDimitry Andric IO.mapOptional("Filters", Config.Filters); 390a7dea167SDimitry Andric IO.mapOptional("Sinks", Config.Sinks); 391a7dea167SDimitry Andric } 392a7dea167SDimitry Andric }; 393a7dea167SDimitry Andric 394*04eeddc0SDimitry Andric template <> struct MappingTraits<TaintConfiguration::Sink> { 395*04eeddc0SDimitry Andric static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { 396*04eeddc0SDimitry Andric IO.mapRequired("Name", Sink.Name); 397*04eeddc0SDimitry Andric IO.mapOptional("Scope", Sink.Scope); 398*04eeddc0SDimitry Andric IO.mapRequired("Args", Sink.SinkArgs); 399*04eeddc0SDimitry Andric } 400*04eeddc0SDimitry Andric }; 401*04eeddc0SDimitry Andric 402*04eeddc0SDimitry Andric template <> struct MappingTraits<TaintConfiguration::Filter> { 403*04eeddc0SDimitry Andric static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { 404*04eeddc0SDimitry Andric IO.mapRequired("Name", Filter.Name); 405*04eeddc0SDimitry Andric IO.mapOptional("Scope", Filter.Scope); 406*04eeddc0SDimitry Andric IO.mapRequired("Args", Filter.FilterArgs); 407*04eeddc0SDimitry Andric } 408*04eeddc0SDimitry Andric }; 409*04eeddc0SDimitry Andric 410*04eeddc0SDimitry Andric template <> struct MappingTraits<TaintConfiguration::Propagation> { 411*04eeddc0SDimitry Andric static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { 412a7dea167SDimitry Andric IO.mapRequired("Name", Propagation.Name); 413480093f4SDimitry Andric IO.mapOptional("Scope", Propagation.Scope); 414a7dea167SDimitry Andric IO.mapOptional("SrcArgs", Propagation.SrcArgs); 415a7dea167SDimitry Andric IO.mapOptional("DstArgs", Propagation.DstArgs); 416*04eeddc0SDimitry Andric IO.mapOptional("VariadicType", Propagation.VarType); 417*04eeddc0SDimitry Andric IO.mapOptional("VariadicIndex", Propagation.VarIndex); 418a7dea167SDimitry Andric } 419a7dea167SDimitry Andric }; 420a7dea167SDimitry Andric 421*04eeddc0SDimitry Andric template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { 422*04eeddc0SDimitry Andric static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { 423*04eeddc0SDimitry Andric IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None); 424*04eeddc0SDimitry Andric IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src); 425*04eeddc0SDimitry Andric IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst); 426a7dea167SDimitry Andric } 427a7dea167SDimitry Andric }; 428a7dea167SDimitry Andric } // namespace yaml 429a7dea167SDimitry Andric } // namespace llvm 430a7dea167SDimitry Andric 4310b57cec5SDimitry Andric /// A set which is used to pass information from call pre-visit instruction 432*04eeddc0SDimitry Andric /// to the call post-visit. The values are signed integers, which are either 4330b57cec5SDimitry Andric /// ReturnValueIndex, or indexes of the pointer/reference argument, which 4340b57cec5SDimitry Andric /// points to data, which should be tainted on return. 435*04eeddc0SDimitry Andric REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, ArgIdxTy) 4360b57cec5SDimitry Andric 437*04eeddc0SDimitry Andric void GenericTaintRuleParser::validateArgVector(const std::string &Option, 438*04eeddc0SDimitry Andric const ArgVecTy &Args) const { 439*04eeddc0SDimitry Andric for (ArgIdxTy Arg : Args) { 440*04eeddc0SDimitry Andric if (Arg < ReturnValueIndex) { 441a7dea167SDimitry Andric Mgr.reportInvalidCheckerOptionValue( 442*04eeddc0SDimitry Andric Mgr.getChecker<GenericTaintChecker>(), Option, 443a7dea167SDimitry Andric "an argument number for propagation rules greater or equal to -1"); 444a7dea167SDimitry Andric } 445a7dea167SDimitry Andric } 446a7dea167SDimitry Andric } 447a7dea167SDimitry Andric 448*04eeddc0SDimitry Andric template <typename Config> 449*04eeddc0SDimitry Andric GenericTaintRuleParser::NamePartsTy 450*04eeddc0SDimitry Andric GenericTaintRuleParser::parseNameParts(const Config &C) { 451*04eeddc0SDimitry Andric NamePartsTy NameParts; 452*04eeddc0SDimitry Andric if (!C.Scope.empty()) { 453*04eeddc0SDimitry Andric // If the Scope argument contains multiple "::" parts, those are considered 454*04eeddc0SDimitry Andric // namespace identifiers. 455*04eeddc0SDimitry Andric llvm::SmallVector<StringRef, 2> NSParts; 456*04eeddc0SDimitry Andric StringRef{C.Scope}.split(NSParts, "::", /*MaxSplit*/ -1, 457*04eeddc0SDimitry Andric /*KeepEmpty*/ false); 458*04eeddc0SDimitry Andric NameParts.append(NSParts.begin(), NSParts.end()); 459*04eeddc0SDimitry Andric } 460*04eeddc0SDimitry Andric NameParts.emplace_back(C.Name); 461*04eeddc0SDimitry Andric return NameParts; 462480093f4SDimitry Andric } 463480093f4SDimitry Andric 464*04eeddc0SDimitry Andric template <typename Config> 465*04eeddc0SDimitry Andric void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, 466*04eeddc0SDimitry Andric GenericTaintRule &&Rule, 467*04eeddc0SDimitry Andric RulesContTy &Rules) { 468*04eeddc0SDimitry Andric NamePartsTy NameParts = parseNameParts(C); 469*04eeddc0SDimitry Andric llvm::SmallVector<const char *, 2> CallDescParts{NameParts.size()}; 470*04eeddc0SDimitry Andric llvm::transform(NameParts, CallDescParts.begin(), 471*04eeddc0SDimitry Andric [](SmallString<32> &S) { return S.c_str(); }); 472*04eeddc0SDimitry Andric Rules.emplace_back(CallDescription(CallDescParts), std::move(Rule)); 473*04eeddc0SDimitry Andric } 4740b57cec5SDimitry Andric 475*04eeddc0SDimitry Andric void GenericTaintRuleParser::parseConfig(const std::string &Option, 476*04eeddc0SDimitry Andric TaintConfiguration::Sink &&S, 477*04eeddc0SDimitry Andric RulesContTy &Rules) const { 478*04eeddc0SDimitry Andric validateArgVector(Option, S.SinkArgs); 479*04eeddc0SDimitry Andric consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)), 480*04eeddc0SDimitry Andric Rules); 481*04eeddc0SDimitry Andric } 482*04eeddc0SDimitry Andric 483*04eeddc0SDimitry Andric void GenericTaintRuleParser::parseConfig(const std::string &Option, 484*04eeddc0SDimitry Andric TaintConfiguration::Filter &&S, 485*04eeddc0SDimitry Andric RulesContTy &Rules) const { 486*04eeddc0SDimitry Andric validateArgVector(Option, S.FilterArgs); 487*04eeddc0SDimitry Andric consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)), 488*04eeddc0SDimitry Andric Rules); 489*04eeddc0SDimitry Andric } 490*04eeddc0SDimitry Andric 491*04eeddc0SDimitry Andric void GenericTaintRuleParser::parseConfig(const std::string &Option, 492*04eeddc0SDimitry Andric TaintConfiguration::Propagation &&P, 493*04eeddc0SDimitry Andric RulesContTy &Rules) const { 494*04eeddc0SDimitry Andric validateArgVector(Option, P.SrcArgs); 495*04eeddc0SDimitry Andric validateArgVector(Option, P.DstArgs); 496*04eeddc0SDimitry Andric bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; 497*04eeddc0SDimitry Andric bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; 498*04eeddc0SDimitry Andric Optional<ArgIdxTy> JustVarIndex = P.VarIndex; 499*04eeddc0SDimitry Andric 500*04eeddc0SDimitry Andric ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None); 501*04eeddc0SDimitry Andric ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None); 502*04eeddc0SDimitry Andric 503*04eeddc0SDimitry Andric consumeRulesFromConfig( 504*04eeddc0SDimitry Andric P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); 505*04eeddc0SDimitry Andric } 506*04eeddc0SDimitry Andric 507*04eeddc0SDimitry Andric GenericTaintRuleParser::RulesContTy 508*04eeddc0SDimitry Andric GenericTaintRuleParser::parseConfiguration(const std::string &Option, 509*04eeddc0SDimitry Andric TaintConfiguration &&Config) const { 510*04eeddc0SDimitry Andric 511*04eeddc0SDimitry Andric RulesContTy Rules; 512*04eeddc0SDimitry Andric 513*04eeddc0SDimitry Andric for (auto &F : Config.Filters) 514*04eeddc0SDimitry Andric parseConfig(Option, std::move(F), Rules); 515*04eeddc0SDimitry Andric 516*04eeddc0SDimitry Andric for (auto &S : Config.Sinks) 517*04eeddc0SDimitry Andric parseConfig(Option, std::move(S), Rules); 518*04eeddc0SDimitry Andric 519*04eeddc0SDimitry Andric for (auto &P : Config.Propagations) 520*04eeddc0SDimitry Andric parseConfig(Option, std::move(P), Rules); 521*04eeddc0SDimitry Andric 522*04eeddc0SDimitry Andric return Rules; 523*04eeddc0SDimitry Andric } 524*04eeddc0SDimitry Andric 525*04eeddc0SDimitry Andric void GenericTaintChecker::initTaintRules(CheckerContext &C) const { 5260b57cec5SDimitry Andric // Check for exact name match for functions without builtin substitutes. 527480093f4SDimitry Andric // Use qualified name, because these are C functions without namespace. 5280b57cec5SDimitry Andric 529*04eeddc0SDimitry Andric if (StaticTaintRules || DynamicTaintRules) 530*04eeddc0SDimitry Andric return; 531*04eeddc0SDimitry Andric 532*04eeddc0SDimitry Andric using RulesConstructionTy = 533*04eeddc0SDimitry Andric std::vector<std::pair<CallDescription, GenericTaintRule>>; 534*04eeddc0SDimitry Andric using TR = GenericTaintRule; 535*04eeddc0SDimitry Andric 536*04eeddc0SDimitry Andric const Builtin::Context &BI = C.getASTContext().BuiltinInfo; 537*04eeddc0SDimitry Andric 538*04eeddc0SDimitry Andric RulesConstructionTy GlobalCRules{ 539*04eeddc0SDimitry Andric // Sources 540*04eeddc0SDimitry Andric {{"fdopen"}, TR::Source({{ReturnValueIndex}})}, 541*04eeddc0SDimitry Andric {{"fopen"}, TR::Source({{ReturnValueIndex}})}, 542*04eeddc0SDimitry Andric {{"freopen"}, TR::Source({{ReturnValueIndex}})}, 543*04eeddc0SDimitry Andric {{"getch"}, TR::Source({{ReturnValueIndex}})}, 544*04eeddc0SDimitry Andric {{"getchar"}, TR::Source({{ReturnValueIndex}})}, 545*04eeddc0SDimitry Andric {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})}, 546*04eeddc0SDimitry Andric {{"gets"}, TR::Source({{0}, ReturnValueIndex})}, 547*04eeddc0SDimitry Andric {{"scanf"}, TR::Source({{}, 1})}, 548*04eeddc0SDimitry Andric {{"wgetch"}, TR::Source({{}, ReturnValueIndex})}, 549*04eeddc0SDimitry Andric 550*04eeddc0SDimitry Andric // Props 551*04eeddc0SDimitry Andric {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 552*04eeddc0SDimitry Andric {{"atol"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 553*04eeddc0SDimitry Andric {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 554*04eeddc0SDimitry Andric {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 555*04eeddc0SDimitry Andric {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 556*04eeddc0SDimitry Andric {{"fgets"}, TR::Prop({{2}}, {{0}, ReturnValueIndex})}, 557*04eeddc0SDimitry Andric {{"fscanf"}, TR::Prop({{0}}, {{}, 2})}, 558*04eeddc0SDimitry Andric {{"sscanf"}, TR::Prop({{0}}, {{}, 2})}, 559*04eeddc0SDimitry Andric {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 560*04eeddc0SDimitry Andric {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 561*04eeddc0SDimitry Andric {{"getdelim"}, TR::Prop({{3}}, {{0}})}, 562*04eeddc0SDimitry Andric {{"getline"}, TR::Prop({{2}}, {{0}})}, 563*04eeddc0SDimitry Andric {{"getw"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 564*04eeddc0SDimitry Andric {{"pread"}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, 565*04eeddc0SDimitry Andric {{"read"}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, 566*04eeddc0SDimitry Andric {{"strchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 567*04eeddc0SDimitry Andric {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 568*04eeddc0SDimitry Andric {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 569*04eeddc0SDimitry Andric {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 570*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}}, 571*04eeddc0SDimitry Andric TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, 572*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}}, 573*04eeddc0SDimitry Andric TR::Prop({{1, 2}}, {{0}})}, 574*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}}, 575*04eeddc0SDimitry Andric TR::Prop({{1, 2}}, {{0}})}, 576*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"snprintf"}}, 577*04eeddc0SDimitry Andric TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, 578*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"sprintf"}}, 579*04eeddc0SDimitry Andric TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, 580*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"strcpy"}}, 581*04eeddc0SDimitry Andric TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 582*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"stpcpy"}}, 583*04eeddc0SDimitry Andric TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 584*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"strcat"}}, 585*04eeddc0SDimitry Andric TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 586*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 587*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 588*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 589*04eeddc0SDimitry Andric 590*04eeddc0SDimitry Andric // Sinks 591*04eeddc0SDimitry Andric {{"system"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 592*04eeddc0SDimitry Andric {{"popen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 593*04eeddc0SDimitry Andric {{"execl"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 594*04eeddc0SDimitry Andric {{"execle"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 595*04eeddc0SDimitry Andric {{"execlp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 596*04eeddc0SDimitry Andric {{"execvp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 597*04eeddc0SDimitry Andric {{"execvP"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 598*04eeddc0SDimitry Andric {{"execve"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 599*04eeddc0SDimitry Andric {{"dlopen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 600*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"malloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 601*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"calloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 602*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"alloca"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 603*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"memccpy"}}, TR::Sink({{3}}, MsgTaintedBufferSize)}, 604*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"realloc"}}, TR::Sink({{1}}, MsgTaintedBufferSize)}, 605*04eeddc0SDimitry Andric {{{"setproctitle"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 606*04eeddc0SDimitry Andric {{{"setproctitle_fast"}}, 607*04eeddc0SDimitry Andric TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 608*04eeddc0SDimitry Andric 609*04eeddc0SDimitry Andric // SinkProps 610*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)}, 611*04eeddc0SDimitry Andric TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 612*04eeddc0SDimitry Andric MsgTaintedBufferSize)}, 613*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}}, 614*04eeddc0SDimitry Andric TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 615*04eeddc0SDimitry Andric MsgTaintedBufferSize)}, 616*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}}, 617*04eeddc0SDimitry Andric TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 618*04eeddc0SDimitry Andric MsgTaintedBufferSize)}, 619*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}}, 620*04eeddc0SDimitry Andric TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}}, 621*04eeddc0SDimitry Andric MsgTaintedBufferSize)}, 622*04eeddc0SDimitry Andric {{CDF_MaybeBuiltin, {"bcopy"}}, 623*04eeddc0SDimitry Andric TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}}; 624349cc55cSDimitry Andric 625349cc55cSDimitry Andric // `getenv` returns taint only in untrusted environments. 626*04eeddc0SDimitry Andric if (TR::UntrustedEnv(C)) { 627*04eeddc0SDimitry Andric // void setproctitle_init(int argc, char *argv[], char *envp[]) 628*04eeddc0SDimitry Andric GlobalCRules.push_back( 629*04eeddc0SDimitry Andric {{{"setproctitle_init"}}, TR::Sink({{2}}, MsgCustomSink)}); 630*04eeddc0SDimitry Andric GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})}); 631349cc55cSDimitry Andric } 632349cc55cSDimitry Andric 633*04eeddc0SDimitry Andric StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), 634*04eeddc0SDimitry Andric std::make_move_iterator(GlobalCRules.end())); 6350b57cec5SDimitry Andric 636*04eeddc0SDimitry Andric // User-provided taint configuration. 637*04eeddc0SDimitry Andric CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); 638*04eeddc0SDimitry Andric assert(Mgr); 639*04eeddc0SDimitry Andric GenericTaintRuleParser ConfigParser{*Mgr}; 640*04eeddc0SDimitry Andric std::string Option{"Config"}; 641*04eeddc0SDimitry Andric StringRef ConfigFile = 642*04eeddc0SDimitry Andric Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option); 643*04eeddc0SDimitry Andric llvm::Optional<TaintConfiguration> Config = 644*04eeddc0SDimitry Andric getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile); 645*04eeddc0SDimitry Andric if (!Config) { 646*04eeddc0SDimitry Andric // We don't have external taint config, no parsing required. 647*04eeddc0SDimitry Andric DynamicTaintRules = RuleLookupTy{}; 648*04eeddc0SDimitry Andric return; 6495ffd83dbSDimitry Andric } 6500b57cec5SDimitry Andric 651*04eeddc0SDimitry Andric GenericTaintRuleParser::RulesContTy Rules{ 652*04eeddc0SDimitry Andric ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))}; 6530b57cec5SDimitry Andric 654*04eeddc0SDimitry Andric DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), 655*04eeddc0SDimitry Andric std::make_move_iterator(Rules.end())); 656480093f4SDimitry Andric } 657a7dea167SDimitry Andric 6585ffd83dbSDimitry Andric void GenericTaintChecker::checkPreCall(const CallEvent &Call, 6590b57cec5SDimitry Andric CheckerContext &C) const { 660*04eeddc0SDimitry Andric initTaintRules(C); 661480093f4SDimitry Andric 662*04eeddc0SDimitry Andric // FIXME: this should be much simpler. 663*04eeddc0SDimitry Andric if (const auto *Rule = 664*04eeddc0SDimitry Andric Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) 665*04eeddc0SDimitry Andric Rule->process(*this, Call, C); 666*04eeddc0SDimitry Andric else if (const auto *Rule = DynamicTaintRules->lookup(Call)) 667*04eeddc0SDimitry Andric Rule->process(*this, Call, C); 6680b57cec5SDimitry Andric 669*04eeddc0SDimitry Andric // FIXME: These edge cases are to be eliminated from here eventually. 670*04eeddc0SDimitry Andric // 671*04eeddc0SDimitry Andric // Additional check that is not supported by CallDescription. 672*04eeddc0SDimitry Andric // TODO: Make CallDescription be able to match attributes such as printf-like 673*04eeddc0SDimitry Andric // arguments. 674*04eeddc0SDimitry Andric checkUncontrolledFormatString(Call, C); 675480093f4SDimitry Andric 676*04eeddc0SDimitry Andric // TODO: Modeling sockets should be done in a specific checker. 677*04eeddc0SDimitry Andric // Socket is a source, which taints the return value. 678*04eeddc0SDimitry Andric taintUnsafeSocketProtocol(Call, C); 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric 6815ffd83dbSDimitry Andric void GenericTaintChecker::checkPostCall(const CallEvent &Call, 6820b57cec5SDimitry Andric CheckerContext &C) const { 6830b57cec5SDimitry Andric // Set the marked values as tainted. The return value only accessible from 6840b57cec5SDimitry Andric // checkPostStmt. 6850b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric // Depending on what was tainted at pre-visit, we determined a set of 6880b57cec5SDimitry Andric // arguments which should be tainted after the function returns. These are 6890b57cec5SDimitry Andric // stored in the state as TaintArgsOnPostVisit set. 6900b57cec5SDimitry Andric TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 6910b57cec5SDimitry Andric if (TaintArgs.isEmpty()) 692*04eeddc0SDimitry Andric return; 6930b57cec5SDimitry Andric 694*04eeddc0SDimitry Andric for (ArgIdxTy ArgNum : TaintArgs) { 6950b57cec5SDimitry Andric // Special handling for the tainted return value. 6960b57cec5SDimitry Andric if (ArgNum == ReturnValueIndex) { 6975ffd83dbSDimitry Andric State = addTaint(State, Call.getReturnValue()); 6980b57cec5SDimitry Andric continue; 6990b57cec5SDimitry Andric } 7000b57cec5SDimitry Andric 7010b57cec5SDimitry Andric // The arguments are pointer arguments. The data they are pointing at is 7020b57cec5SDimitry Andric // tainted after the call. 703*04eeddc0SDimitry Andric if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) 7040b57cec5SDimitry Andric State = addTaint(State, *V); 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric // Clear up the taint info from the state. 7080b57cec5SDimitry Andric State = State->remove<TaintArgsOnPostVisit>(); 7090b57cec5SDimitry Andric C.addTransition(State); 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 712*04eeddc0SDimitry Andric void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 713*04eeddc0SDimitry Andric const char *NL, const char *Sep) const { 714*04eeddc0SDimitry Andric printTaint(State, Out, NL, Sep); 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric 717*04eeddc0SDimitry Andric void GenericTaintRule::process(const GenericTaintChecker &Checker, 718*04eeddc0SDimitry Andric const CallEvent &Call, CheckerContext &C) const { 7190b57cec5SDimitry Andric ProgramStateRef State = C.getState(); 720*04eeddc0SDimitry Andric const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 7210b57cec5SDimitry Andric 722*04eeddc0SDimitry Andric /// Iterate every call argument, and get their corresponding Expr and SVal. 723*04eeddc0SDimitry Andric const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { 724*04eeddc0SDimitry Andric for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { 725*04eeddc0SDimitry Andric const Expr *E = GetArgExpr(I, Call); 726*04eeddc0SDimitry Andric Fun(I, E, C.getSVal(E)); 7270b57cec5SDimitry Andric } 728*04eeddc0SDimitry Andric }; 7290b57cec5SDimitry Andric 730*04eeddc0SDimitry Andric /// Check for taint sinks. 731*04eeddc0SDimitry Andric ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { 732*04eeddc0SDimitry Andric if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) 733*04eeddc0SDimitry Andric Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C); 734*04eeddc0SDimitry Andric }); 7350b57cec5SDimitry Andric 736*04eeddc0SDimitry Andric /// Check for taint filters. 737*04eeddc0SDimitry Andric ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) { 738*04eeddc0SDimitry Andric if (FilterArgs.contains(I)) { 739*04eeddc0SDimitry Andric State = removeTaint(State, S); 740*04eeddc0SDimitry Andric if (auto P = getPointeeOf(C, S)) 741*04eeddc0SDimitry Andric State = removeTaint(State, *P); 7420b57cec5SDimitry Andric } 743*04eeddc0SDimitry Andric }); 7440b57cec5SDimitry Andric 745*04eeddc0SDimitry Andric /// Check for taint propagation sources. 746*04eeddc0SDimitry Andric /// A rule is relevant if PropSrcArgs is empty, or if any of its signified 747*04eeddc0SDimitry Andric /// args are tainted in context of the current CallEvent. 748*04eeddc0SDimitry Andric bool IsMatching = PropSrcArgs.isEmpty(); 749*04eeddc0SDimitry Andric ForEachCallArg( 750*04eeddc0SDimitry Andric [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) { 751*04eeddc0SDimitry Andric IsMatching = IsMatching || (PropSrcArgs.contains(I) && 752*04eeddc0SDimitry Andric isTaintedOrPointsToTainted(E, State, C)); 753*04eeddc0SDimitry Andric }); 7540b57cec5SDimitry Andric 755*04eeddc0SDimitry Andric if (!IsMatching) 756*04eeddc0SDimitry Andric return; 7570b57cec5SDimitry Andric 758*04eeddc0SDimitry Andric const auto WouldEscape = [](SVal V, QualType Ty) -> bool { 759*04eeddc0SDimitry Andric if (!V.getAs<Loc>()) 7600b57cec5SDimitry Andric return false; 7610b57cec5SDimitry Andric 762*04eeddc0SDimitry Andric const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); 763*04eeddc0SDimitry Andric const bool IsNonConstPtr = 764*04eeddc0SDimitry Andric Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); 7650b57cec5SDimitry Andric 766*04eeddc0SDimitry Andric return IsNonConstRef || IsNonConstPtr; 767*04eeddc0SDimitry Andric }; 768*04eeddc0SDimitry Andric 769*04eeddc0SDimitry Andric /// Propagate taint where it is necessary. 770*04eeddc0SDimitry Andric ForEachCallArg( 771*04eeddc0SDimitry Andric [this, &State, WouldEscape](ArgIdxTy I, const Expr *E, SVal V) { 772*04eeddc0SDimitry Andric if (PropDstArgs.contains(I)) 773*04eeddc0SDimitry Andric State = State->add<TaintArgsOnPostVisit>(I); 774*04eeddc0SDimitry Andric 775*04eeddc0SDimitry Andric // TODO: We should traverse all reachable memory regions via the 776*04eeddc0SDimitry Andric // escaping parameter. Instead of doing that we simply mark only the 777*04eeddc0SDimitry Andric // referred memory region as tainted. 778*04eeddc0SDimitry Andric if (WouldEscape(V, E->getType())) 779*04eeddc0SDimitry Andric State = State->add<TaintArgsOnPostVisit>(I); 780*04eeddc0SDimitry Andric }); 781*04eeddc0SDimitry Andric 782*04eeddc0SDimitry Andric C.addTransition(State); 7830b57cec5SDimitry Andric } 7840b57cec5SDimitry Andric 785*04eeddc0SDimitry Andric bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { 786*04eeddc0SDimitry Andric return !C.getAnalysisManager() 787*04eeddc0SDimitry Andric .getAnalyzerOptions() 788*04eeddc0SDimitry Andric .ShouldAssumeControlledEnvironment; 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric 791a7dea167SDimitry Andric bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 7920b57cec5SDimitry Andric CheckerContext &C) const { 7930b57cec5SDimitry Andric assert(E); 794*04eeddc0SDimitry Andric Optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; 7950b57cec5SDimitry Andric 796*04eeddc0SDimitry Andric if (!TaintedSVal) 7970b57cec5SDimitry Andric return false; 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric // Generate diagnostic. 8000b57cec5SDimitry Andric if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 801*04eeddc0SDimitry Andric auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); 8020b57cec5SDimitry Andric report->addRange(E->getSourceRange()); 803*04eeddc0SDimitry Andric report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal)); 8040b57cec5SDimitry Andric C.emitReport(std::move(report)); 8050b57cec5SDimitry Andric return true; 8060b57cec5SDimitry Andric } 8070b57cec5SDimitry Andric return false; 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric 810*04eeddc0SDimitry Andric /// TODO: remove checking for printf format attributes and socket whitelisting 811*04eeddc0SDimitry Andric /// from GenericTaintChecker, and that means the following functions: 812*04eeddc0SDimitry Andric /// getPrintfFormatArgumentNum, 813*04eeddc0SDimitry Andric /// GenericTaintChecker::checkUncontrolledFormatString, 814*04eeddc0SDimitry Andric /// GenericTaintChecker::taintUnsafeSocketProtocol 815*04eeddc0SDimitry Andric 816*04eeddc0SDimitry Andric static bool getPrintfFormatArgumentNum(const CallEvent &Call, 817*04eeddc0SDimitry Andric const CheckerContext &C, 818*04eeddc0SDimitry Andric ArgIdxTy &ArgNum) { 819*04eeddc0SDimitry Andric // Find if the function contains a format string argument. 820*04eeddc0SDimitry Andric // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 821*04eeddc0SDimitry Andric // vsnprintf, syslog, custom annotated functions. 822*04eeddc0SDimitry Andric const Decl *CallDecl = Call.getDecl(); 823*04eeddc0SDimitry Andric if (!CallDecl) 824*04eeddc0SDimitry Andric return false; 825*04eeddc0SDimitry Andric const FunctionDecl *FDecl = CallDecl->getAsFunction(); 826*04eeddc0SDimitry Andric if (!FDecl) 827*04eeddc0SDimitry Andric return false; 828*04eeddc0SDimitry Andric 829*04eeddc0SDimitry Andric const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 830*04eeddc0SDimitry Andric 831*04eeddc0SDimitry Andric for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 832*04eeddc0SDimitry Andric ArgNum = Format->getFormatIdx() - 1; 833*04eeddc0SDimitry Andric if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum) 834*04eeddc0SDimitry Andric return true; 835*04eeddc0SDimitry Andric } 836*04eeddc0SDimitry Andric 837*04eeddc0SDimitry Andric return false; 838*04eeddc0SDimitry Andric } 839*04eeddc0SDimitry Andric 8400b57cec5SDimitry Andric bool GenericTaintChecker::checkUncontrolledFormatString( 8415ffd83dbSDimitry Andric const CallEvent &Call, CheckerContext &C) const { 8420b57cec5SDimitry Andric // Check if the function contains a format string argument. 843*04eeddc0SDimitry Andric ArgIdxTy ArgNum = 0; 8445ffd83dbSDimitry Andric if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 8450b57cec5SDimitry Andric return false; 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric // If either the format string content or the pointer itself are tainted, 8480b57cec5SDimitry Andric // warn. 8495ffd83dbSDimitry Andric return generateReportIfTainted(Call.getArgExpr(ArgNum), 8500b57cec5SDimitry Andric MsgUncontrolledFormatString, C); 8510b57cec5SDimitry Andric } 8520b57cec5SDimitry Andric 853*04eeddc0SDimitry Andric void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, 8540b57cec5SDimitry Andric CheckerContext &C) const { 855*04eeddc0SDimitry Andric if (Call.getNumArgs() < 1) 856*04eeddc0SDimitry Andric return; 857*04eeddc0SDimitry Andric const IdentifierInfo *ID = Call.getCalleeIdentifier(); 858*04eeddc0SDimitry Andric if (!ID) 859*04eeddc0SDimitry Andric return; 860*04eeddc0SDimitry Andric if (!ID->getName().equals("socket")) 861*04eeddc0SDimitry Andric return; 8620b57cec5SDimitry Andric 863*04eeddc0SDimitry Andric SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 864*04eeddc0SDimitry Andric StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 865*04eeddc0SDimitry Andric // Allow internal communication protocols. 866*04eeddc0SDimitry Andric bool SafeProtocol = DomName.equals("AF_SYSTEM") || 867*04eeddc0SDimitry Andric DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") || 868*04eeddc0SDimitry Andric DomName.equals("AF_RESERVED_36"); 869*04eeddc0SDimitry Andric if (SafeProtocol) 870*04eeddc0SDimitry Andric return; 8710b57cec5SDimitry Andric 872*04eeddc0SDimitry Andric C.addTransition(C.getState()->add<TaintArgsOnPostVisit>(ReturnValueIndex)); 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 875*04eeddc0SDimitry Andric /// Checker registration 876a7dea167SDimitry Andric 877a7dea167SDimitry Andric void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 878*04eeddc0SDimitry Andric Mgr.registerChecker<GenericTaintChecker>(); 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric 8815ffd83dbSDimitry Andric bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 8820b57cec5SDimitry Andric return true; 8830b57cec5SDimitry Andric } 884