xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision 1165fc9a526630487a1feb63daef65c5aee1a583)
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
29 #include "llvm/Support/YAMLTraits.h"
30 
31 #include <limits>
32 #include <memory>
33 #include <utility>
34 
35 using namespace clang;
36 using namespace ento;
37 using namespace taint;
38 
39 namespace {
40 
41 class GenericTaintChecker;
42 
43 /// Check for CWE-134: Uncontrolled Format String.
44 constexpr llvm::StringLiteral MsgUncontrolledFormatString =
45     "Untrusted data is used as a format string "
46     "(CWE-134: Uncontrolled Format String)";
47 
48 /// Check for:
49 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
50 /// CWE-78, "Failure to Sanitize Data into an OS Command"
51 constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
52     "Untrusted data is passed to a system call "
53     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
54 
55 /// Check if tainted data is used as a buffer size in strn.. functions,
56 /// and allocators.
57 constexpr llvm::StringLiteral MsgTaintedBufferSize =
58     "Untrusted data is used to specify the buffer size "
59     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
60     "for character data and the null terminator)";
61 
62 /// Check if tainted data is used as a custom sink's parameter.
63 constexpr llvm::StringLiteral MsgCustomSink =
64     "Untrusted data is passed to a user-defined sink";
65 
66 using ArgIdxTy = int;
67 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
68 
69 /// Denotes the return value.
70 constexpr ArgIdxTy ReturnValueIndex{-1};
71 
72 static ArgIdxTy fromArgumentCount(unsigned Count) {
73   assert(Count <=
74              static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) &&
75          "ArgIdxTy is not large enough to represent the number of arguments.");
76   return Count;
77 }
78 
79 /// Check if the region the expression evaluates to is the standard input,
80 /// and thus, is tainted.
81 /// FIXME: Move this to Taint.cpp.
82 bool isStdin(SVal Val, const ASTContext &ACtx) {
83   // FIXME: What if Val is NonParamVarRegion?
84 
85   // The region should be symbolic, we do not know it's value.
86   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
87   if (!SymReg)
88     return false;
89 
90   // Get it's symbol and find the declaration region it's pointing to.
91   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
92   if (!Sm)
93     return false;
94   const auto *DeclReg = dyn_cast<DeclRegion>(Sm->getRegion());
95   if (!DeclReg)
96     return false;
97 
98   // This region corresponds to a declaration, find out if it's a global/extern
99   // variable named stdin with the proper type.
100   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
101     D = D->getCanonicalDecl();
102     // FIXME: This should look for an exact match.
103     if (D->getName().contains("stdin") && D->isExternC()) {
104       const QualType FILETy = ACtx.getFILEType().getCanonicalType();
105       const QualType Ty = D->getType().getCanonicalType();
106 
107       if (Ty->isPointerType())
108         return Ty->getPointeeType() == FILETy;
109     }
110   }
111   return false;
112 }
113 
114 SVal getPointeeOf(const CheckerContext &C, Loc LValue) {
115   const QualType ArgTy = LValue.getType(C.getASTContext());
116   if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
117     return C.getState()->getSVal(LValue);
118 
119   // Do not dereference void pointers. Treat them as byte pointers instead.
120   // FIXME: we might want to consider more than just the first byte.
121   return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
122 }
123 
124 /// Given a pointer/reference argument, return the value it refers to.
125 Optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) {
126   if (auto LValue = Arg.getAs<Loc>())
127     return getPointeeOf(C, *LValue);
128   return None;
129 }
130 
131 /// Given a pointer, return the SVal of its pointee or if it is tainted,
132 /// otherwise return the pointer's SVal if tainted.
133 /// Also considers stdin as a taint source.
134 Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) {
135   const ProgramStateRef State = C.getState();
136 
137   if (auto Pointee = getPointeeOf(C, Arg))
138     if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
139       return Pointee;
140 
141   if (isTainted(State, Arg))
142     return Arg;
143 
144   // FIXME: This should be done by the isTainted() API.
145   if (isStdin(Arg, C.getASTContext()))
146     return Arg;
147 
148   return None;
149 }
150 
151 bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State,
152                                 CheckerContext &C) {
153   return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue();
154 }
155 
156 /// ArgSet is used to describe arguments relevant for taint detection or
157 /// taint application. A discrete set of argument indexes and a variadic
158 /// argument list signified by a starting index are supported.
159 class ArgSet {
160 public:
161   ArgSet() = default;
162   ArgSet(ArgVecTy &&DiscreteArgs, Optional<ArgIdxTy> VariadicIndex = None)
163       : DiscreteArgs(std::move(DiscreteArgs)),
164         VariadicIndex(std::move(VariadicIndex)) {}
165 
166   bool contains(ArgIdxTy ArgIdx) const {
167     if (llvm::is_contained(DiscreteArgs, ArgIdx))
168       return true;
169 
170     return VariadicIndex && ArgIdx >= *VariadicIndex;
171   }
172 
173   bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; }
174 
175   ArgVecTy ArgsUpTo(ArgIdxTy LastArgIdx) const {
176     ArgVecTy Args;
177     for (ArgIdxTy I = ReturnValueIndex; I <= LastArgIdx; ++I) {
178       if (contains(I))
179         Args.push_back(I);
180     }
181     return Args;
182   }
183 
184 private:
185   ArgVecTy DiscreteArgs;
186   Optional<ArgIdxTy> VariadicIndex;
187 };
188 
189 /// A struct used to specify taint propagation rules for a function.
190 ///
191 /// If any of the possible taint source arguments is tainted, all of the
192 /// destination arguments should also be tainted. If ReturnValueIndex is added
193 /// to the dst list, the return value will be tainted.
194 class GenericTaintRule {
195   /// Arguments which are taints sinks and should be checked, and a report
196   /// should be emitted if taint reaches these.
197   ArgSet SinkArgs;
198   /// Arguments which should be sanitized on function return.
199   ArgSet FilterArgs;
200   /// Arguments which can participate in taint propagationa. If any of the
201   /// arguments in PropSrcArgs is tainted, all arguments in  PropDstArgs should
202   /// be tainted.
203   ArgSet PropSrcArgs;
204   ArgSet PropDstArgs;
205 
206   /// A message that explains why the call is sensitive to taint.
207   Optional<StringRef> SinkMsg;
208 
209   GenericTaintRule() = default;
210 
211   GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
212                    Optional<StringRef> SinkMsg = None)
213       : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
214         PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
215         SinkMsg(SinkMsg) {}
216 
217 public:
218   /// Make a rule that reports a warning if taint reaches any of \p FilterArgs
219   /// arguments.
220   static GenericTaintRule Sink(ArgSet &&SinkArgs,
221                                Optional<StringRef> Msg = None) {
222     return {std::move(SinkArgs), {}, {}, {}, Msg};
223   }
224 
225   /// Make a rule that sanitizes all FilterArgs arguments.
226   static GenericTaintRule Filter(ArgSet &&FilterArgs) {
227     return {{}, std::move(FilterArgs), {}, {}};
228   }
229 
230   /// Make a rule that unconditionally taints all Args.
231   /// If Func is provided, it must also return true for taint to propagate.
232   static GenericTaintRule Source(ArgSet &&SourceArgs) {
233     return {{}, {}, {}, std::move(SourceArgs)};
234   }
235 
236   /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
237   static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
238     return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
239   }
240 
241   /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
242   static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs,
243                                    ArgSet &&DstArgs,
244                                    Optional<StringRef> Msg = None) {
245     return {
246         std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
247   }
248 
249   /// Process a function which could either be a taint source, a taint sink, a
250   /// taint filter or a taint propagator.
251   void process(const GenericTaintChecker &Checker, const CallEvent &Call,
252                CheckerContext &C) const;
253 
254   /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s.
255   static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) {
256     return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()
257                                       : Call.getArgExpr(ArgIdx);
258   };
259 
260   /// Functions for custom taintedness propagation.
261   static bool UntrustedEnv(CheckerContext &C);
262 };
263 
264 using RuleLookupTy = CallDescriptionMap<GenericTaintRule>;
265 
266 /// Used to parse the configuration file.
267 struct TaintConfiguration {
268   using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
269   enum class VariadicType { None, Src, Dst };
270 
271   struct Common {
272     std::string Name;
273     std::string Scope;
274   };
275 
276   struct Sink : Common {
277     ArgVecTy SinkArgs;
278   };
279 
280   struct Filter : Common {
281     ArgVecTy FilterArgs;
282   };
283 
284   struct Propagation : Common {
285     ArgVecTy SrcArgs;
286     ArgVecTy DstArgs;
287     VariadicType VarType;
288     ArgIdxTy VarIndex;
289   };
290 
291   std::vector<Propagation> Propagations;
292   std::vector<Filter> Filters;
293   std::vector<Sink> Sinks;
294 
295   TaintConfiguration() = default;
296   TaintConfiguration(const TaintConfiguration &) = default;
297   TaintConfiguration(TaintConfiguration &&) = default;
298   TaintConfiguration &operator=(const TaintConfiguration &) = default;
299   TaintConfiguration &operator=(TaintConfiguration &&) = default;
300 };
301 
302 struct GenericTaintRuleParser {
303   GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {}
304   /// Container type used to gather call identification objects grouped into
305   /// pairs with their corresponding taint rules. It is temporary as it is used
306   /// to finally initialize RuleLookupTy, which is considered to be immutable.
307   using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
308   RulesContTy parseConfiguration(const std::string &Option,
309                                  TaintConfiguration &&Config) const;
310 
311 private:
312   using NamePartsTy = llvm::SmallVector<SmallString<32>, 2>;
313 
314   /// Validate part of the configuration, which contains a list of argument
315   /// indexes.
316   void validateArgVector(const std::string &Option, const ArgVecTy &Args) const;
317 
318   template <typename Config> static NamePartsTy parseNameParts(const Config &C);
319 
320   // Takes the config and creates a CallDescription for it and associates a Rule
321   // with that.
322   template <typename Config>
323   static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule,
324                                      RulesContTy &Rules);
325 
326   void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P,
327                    RulesContTy &Rules) const;
328   void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P,
329                    RulesContTy &Rules) const;
330   void parseConfig(const std::string &Option,
331                    TaintConfiguration::Propagation &&P,
332                    RulesContTy &Rules) const;
333 
334   CheckerManager &Mgr;
335 };
336 
337 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
338 public:
339   static void *getTag() {
340     static int Tag;
341     return &Tag;
342   }
343 
344   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
345   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
346 
347   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
348                   const char *Sep) const override;
349 
350   /// Generate a report if the expression is tainted or points to tainted data.
351   bool generateReportIfTainted(const Expr *E, StringRef Msg,
352                                CheckerContext &C) const;
353 
354 private:
355   const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"};
356 
357   bool checkUncontrolledFormatString(const CallEvent &Call,
358                                      CheckerContext &C) const;
359 
360   void taintUnsafeSocketProtocol(const CallEvent &Call,
361                                  CheckerContext &C) const;
362 
363   /// Default taint rules are initilized with the help of a CheckerContext to
364   /// access the names of built-in functions like memcpy.
365   void initTaintRules(CheckerContext &C) const;
366 
367   /// CallDescription currently cannot restrict matches to the global namespace
368   /// only, which is why multiple CallDescriptionMaps are used, as we want to
369   /// disambiguate global C functions from functions inside user-defined
370   /// namespaces.
371   // TODO: Remove separation to simplify matching logic once CallDescriptions
372   // are more expressive.
373 
374   mutable Optional<RuleLookupTy> StaticTaintRules;
375   mutable Optional<RuleLookupTy> DynamicTaintRules;
376 };
377 } // end of anonymous namespace
378 
379 /// YAML serialization mapping.
380 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
381 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
382 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
383 
384 namespace llvm {
385 namespace yaml {
386 template <> struct MappingTraits<TaintConfiguration> {
387   static void mapping(IO &IO, TaintConfiguration &Config) {
388     IO.mapOptional("Propagations", Config.Propagations);
389     IO.mapOptional("Filters", Config.Filters);
390     IO.mapOptional("Sinks", Config.Sinks);
391   }
392 };
393 
394 template <> struct MappingTraits<TaintConfiguration::Sink> {
395   static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
396     IO.mapRequired("Name", Sink.Name);
397     IO.mapOptional("Scope", Sink.Scope);
398     IO.mapRequired("Args", Sink.SinkArgs);
399   }
400 };
401 
402 template <> struct MappingTraits<TaintConfiguration::Filter> {
403   static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
404     IO.mapRequired("Name", Filter.Name);
405     IO.mapOptional("Scope", Filter.Scope);
406     IO.mapRequired("Args", Filter.FilterArgs);
407   }
408 };
409 
410 template <> struct MappingTraits<TaintConfiguration::Propagation> {
411   static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
412     IO.mapRequired("Name", Propagation.Name);
413     IO.mapOptional("Scope", Propagation.Scope);
414     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
415     IO.mapOptional("DstArgs", Propagation.DstArgs);
416     IO.mapOptional("VariadicType", Propagation.VarType);
417     IO.mapOptional("VariadicIndex", Propagation.VarIndex);
418   }
419 };
420 
421 template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
422   static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) {
423     IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None);
424     IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src);
425     IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst);
426   }
427 };
428 } // namespace yaml
429 } // namespace llvm
430 
431 /// A set which is used to pass information from call pre-visit instruction
432 /// to the call post-visit. The values are signed integers, which are either
433 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
434 /// points to data, which should be tainted on return.
435 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, ArgIdxTy)
436 
437 void GenericTaintRuleParser::validateArgVector(const std::string &Option,
438                                                const ArgVecTy &Args) const {
439   for (ArgIdxTy Arg : Args) {
440     if (Arg < ReturnValueIndex) {
441       Mgr.reportInvalidCheckerOptionValue(
442           Mgr.getChecker<GenericTaintChecker>(), Option,
443           "an argument number for propagation rules greater or equal to -1");
444     }
445   }
446 }
447 
448 template <typename Config>
449 GenericTaintRuleParser::NamePartsTy
450 GenericTaintRuleParser::parseNameParts(const Config &C) {
451   NamePartsTy NameParts;
452   if (!C.Scope.empty()) {
453     // If the Scope argument contains multiple "::" parts, those are considered
454     // namespace identifiers.
455     llvm::SmallVector<StringRef, 2> NSParts;
456     StringRef{C.Scope}.split(NSParts, "::", /*MaxSplit*/ -1,
457                              /*KeepEmpty*/ false);
458     NameParts.append(NSParts.begin(), NSParts.end());
459   }
460   NameParts.emplace_back(C.Name);
461   return NameParts;
462 }
463 
464 template <typename Config>
465 void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C,
466                                                     GenericTaintRule &&Rule,
467                                                     RulesContTy &Rules) {
468   NamePartsTy NameParts = parseNameParts(C);
469   llvm::SmallVector<const char *, 2> CallDescParts{NameParts.size()};
470   llvm::transform(NameParts, CallDescParts.begin(),
471                   [](SmallString<32> &S) { return S.c_str(); });
472   Rules.emplace_back(CallDescription(CallDescParts), std::move(Rule));
473 }
474 
475 void GenericTaintRuleParser::parseConfig(const std::string &Option,
476                                          TaintConfiguration::Sink &&S,
477                                          RulesContTy &Rules) const {
478   validateArgVector(Option, S.SinkArgs);
479   consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
480                          Rules);
481 }
482 
483 void GenericTaintRuleParser::parseConfig(const std::string &Option,
484                                          TaintConfiguration::Filter &&S,
485                                          RulesContTy &Rules) const {
486   validateArgVector(Option, S.FilterArgs);
487   consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
488                          Rules);
489 }
490 
491 void GenericTaintRuleParser::parseConfig(const std::string &Option,
492                                          TaintConfiguration::Propagation &&P,
493                                          RulesContTy &Rules) const {
494   validateArgVector(Option, P.SrcArgs);
495   validateArgVector(Option, P.DstArgs);
496   bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
497   bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
498   Optional<ArgIdxTy> JustVarIndex = P.VarIndex;
499 
500   ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None);
501   ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None);
502 
503   consumeRulesFromConfig(
504       P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
505 }
506 
507 GenericTaintRuleParser::RulesContTy
508 GenericTaintRuleParser::parseConfiguration(const std::string &Option,
509                                            TaintConfiguration &&Config) const {
510 
511   RulesContTy Rules;
512 
513   for (auto &F : Config.Filters)
514     parseConfig(Option, std::move(F), Rules);
515 
516   for (auto &S : Config.Sinks)
517     parseConfig(Option, std::move(S), Rules);
518 
519   for (auto &P : Config.Propagations)
520     parseConfig(Option, std::move(P), Rules);
521 
522   return Rules;
523 }
524 
525 void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
526   // Check for exact name match for functions without builtin substitutes.
527   // Use qualified name, because these are C functions without namespace.
528 
529   if (StaticTaintRules || DynamicTaintRules)
530     return;
531 
532   using RulesConstructionTy =
533       std::vector<std::pair<CallDescription, GenericTaintRule>>;
534   using TR = GenericTaintRule;
535 
536   const Builtin::Context &BI = C.getASTContext().BuiltinInfo;
537 
538   RulesConstructionTy GlobalCRules{
539       // Sources
540       {{"fdopen"}, TR::Source({{ReturnValueIndex}})},
541       {{"fopen"}, TR::Source({{ReturnValueIndex}})},
542       {{"freopen"}, TR::Source({{ReturnValueIndex}})},
543       {{"getch"}, TR::Source({{ReturnValueIndex}})},
544       {{"getchar"}, TR::Source({{ReturnValueIndex}})},
545       {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})},
546       {{"gets"}, TR::Source({{0}, ReturnValueIndex})},
547       {{"scanf"}, TR::Source({{}, 1})},
548       {{"wgetch"}, TR::Source({{}, ReturnValueIndex})},
549 
550       // Props
551       {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
552       {{"atol"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
553       {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
554       {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
555       {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
556       {{"fgets"}, TR::Prop({{2}}, {{0}, ReturnValueIndex})},
557       {{"fscanf"}, TR::Prop({{0}}, {{}, 2})},
558       {{"sscanf"}, TR::Prop({{0}}, {{}, 2})},
559       {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
560       {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
561       {{"getdelim"}, TR::Prop({{3}}, {{0}})},
562       {{"getline"}, TR::Prop({{2}}, {{0}})},
563       {{"getw"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
564       {{"pread"}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
565       {{"read"}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
566       {{"strchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
567       {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
568       {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
569       {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
570       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}},
571        TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
572       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}},
573        TR::Prop({{1, 2}}, {{0}})},
574       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}},
575        TR::Prop({{1, 2}}, {{0}})},
576       {{CDF_MaybeBuiltin, {"snprintf"}},
577        TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
578       {{CDF_MaybeBuiltin, {"sprintf"}},
579        TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
580       {{CDF_MaybeBuiltin, {"strcpy"}},
581        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
582       {{CDF_MaybeBuiltin, {"stpcpy"}},
583        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
584       {{CDF_MaybeBuiltin, {"strcat"}},
585        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
586       {{CDF_MaybeBuiltin, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
587       {{CDF_MaybeBuiltin, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
588       {{CDF_MaybeBuiltin, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
589 
590       // Sinks
591       {{"system"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
592       {{"popen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
593       {{"execl"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
594       {{"execle"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
595       {{"execlp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
596       {{"execvp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
597       {{"execvP"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
598       {{"execve"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
599       {{"dlopen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
600       {{CDF_MaybeBuiltin, {"malloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
601       {{CDF_MaybeBuiltin, {"calloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
602       {{CDF_MaybeBuiltin, {"alloca"}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
603       {{CDF_MaybeBuiltin, {"memccpy"}}, TR::Sink({{3}}, MsgTaintedBufferSize)},
604       {{CDF_MaybeBuiltin, {"realloc"}}, TR::Sink({{1}}, MsgTaintedBufferSize)},
605       {{{"setproctitle"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
606       {{{"setproctitle_fast"}},
607        TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
608 
609       // SinkProps
610       {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
611        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
612                     MsgTaintedBufferSize)},
613       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
614        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
615                     MsgTaintedBufferSize)},
616       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
617        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
618                     MsgTaintedBufferSize)},
619       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
620        TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
621                     MsgTaintedBufferSize)},
622       {{CDF_MaybeBuiltin, {"bcopy"}},
623        TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
624 
625   // `getenv` returns taint only in untrusted environments.
626   if (TR::UntrustedEnv(C)) {
627     // void setproctitle_init(int argc, char *argv[], char *envp[])
628     GlobalCRules.push_back(
629         {{{"setproctitle_init"}}, TR::Sink({{2}}, MsgCustomSink)});
630     GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})});
631   }
632 
633   StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
634                            std::make_move_iterator(GlobalCRules.end()));
635 
636   // User-provided taint configuration.
637   CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager();
638   assert(Mgr);
639   GenericTaintRuleParser ConfigParser{*Mgr};
640   std::string Option{"Config"};
641   StringRef ConfigFile =
642       Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option);
643   llvm::Optional<TaintConfiguration> Config =
644       getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile);
645   if (!Config) {
646     // We don't have external taint config, no parsing required.
647     DynamicTaintRules = RuleLookupTy{};
648     return;
649   }
650 
651   GenericTaintRuleParser::RulesContTy Rules{
652       ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))};
653 
654   DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
655                             std::make_move_iterator(Rules.end()));
656 }
657 
658 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
659                                        CheckerContext &C) const {
660   initTaintRules(C);
661 
662   // FIXME: this should be much simpler.
663   if (const auto *Rule =
664           Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr)
665     Rule->process(*this, Call, C);
666   else if (const auto *Rule = DynamicTaintRules->lookup(Call))
667     Rule->process(*this, Call, C);
668 
669   // FIXME: These edge cases are to be eliminated from here eventually.
670   //
671   // Additional check that is not supported by CallDescription.
672   // TODO: Make CallDescription be able to match attributes such as printf-like
673   // arguments.
674   checkUncontrolledFormatString(Call, C);
675 
676   // TODO: Modeling sockets should be done in a specific checker.
677   // Socket is a source, which taints the return value.
678   taintUnsafeSocketProtocol(Call, C);
679 }
680 
681 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
682                                         CheckerContext &C) const {
683   // Set the marked values as tainted. The return value only accessible from
684   // checkPostStmt.
685   ProgramStateRef State = C.getState();
686 
687   // Depending on what was tainted at pre-visit, we determined a set of
688   // arguments which should be tainted after the function returns. These are
689   // stored in the state as TaintArgsOnPostVisit set.
690   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
691   if (TaintArgs.isEmpty())
692     return;
693 
694   for (ArgIdxTy ArgNum : TaintArgs) {
695     // Special handling for the tainted return value.
696     if (ArgNum == ReturnValueIndex) {
697       State = addTaint(State, Call.getReturnValue());
698       continue;
699     }
700 
701     // The arguments are pointer arguments. The data they are pointing at is
702     // tainted after the call.
703     if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum)))
704       State = addTaint(State, *V);
705   }
706 
707   // Clear up the taint info from the state.
708   State = State->remove<TaintArgsOnPostVisit>();
709   C.addTransition(State);
710 }
711 
712 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
713                                      const char *NL, const char *Sep) const {
714   printTaint(State, Out, NL, Sep);
715 }
716 
717 void GenericTaintRule::process(const GenericTaintChecker &Checker,
718                                const CallEvent &Call, CheckerContext &C) const {
719   ProgramStateRef State = C.getState();
720   const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
721 
722   /// Iterate every call argument, and get their corresponding Expr and SVal.
723   const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
724     for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
725       const Expr *E = GetArgExpr(I, Call);
726       Fun(I, E, C.getSVal(E));
727     }
728   };
729 
730   /// Check for taint sinks.
731   ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
732     if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
733       Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C);
734   });
735 
736   /// Check for taint filters.
737   ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) {
738     if (FilterArgs.contains(I)) {
739       State = removeTaint(State, S);
740       if (auto P = getPointeeOf(C, S))
741         State = removeTaint(State, *P);
742     }
743   });
744 
745   /// Check for taint propagation sources.
746   /// A rule is relevant if PropSrcArgs is empty, or if any of its signified
747   /// args are tainted in context of the current CallEvent.
748   bool IsMatching = PropSrcArgs.isEmpty();
749   ForEachCallArg(
750       [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) {
751         IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
752                                     isTaintedOrPointsToTainted(E, State, C));
753       });
754 
755   if (!IsMatching)
756     return;
757 
758   const auto WouldEscape = [](SVal V, QualType Ty) -> bool {
759     if (!V.getAs<Loc>())
760       return false;
761 
762     const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
763     const bool IsNonConstPtr =
764         Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
765 
766     return IsNonConstRef || IsNonConstPtr;
767   };
768 
769   /// Propagate taint where it is necessary.
770   ForEachCallArg(
771       [this, &State, WouldEscape](ArgIdxTy I, const Expr *E, SVal V) {
772         if (PropDstArgs.contains(I))
773           State = State->add<TaintArgsOnPostVisit>(I);
774 
775         // TODO: We should traverse all reachable memory regions via the
776         // escaping parameter. Instead of doing that we simply mark only the
777         // referred memory region as tainted.
778         if (WouldEscape(V, E->getType()))
779           State = State->add<TaintArgsOnPostVisit>(I);
780       });
781 
782   C.addTransition(State);
783 }
784 
785 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
786   return !C.getAnalysisManager()
787               .getAnalyzerOptions()
788               .ShouldAssumeControlledEnvironment;
789 }
790 
791 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
792                                                   CheckerContext &C) const {
793   assert(E);
794   Optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))};
795 
796   if (!TaintedSVal)
797     return false;
798 
799   // Generate diagnostic.
800   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
801     auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
802     report->addRange(E->getSourceRange());
803     report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal));
804     C.emitReport(std::move(report));
805     return true;
806   }
807   return false;
808 }
809 
810 /// TODO: remove checking for printf format attributes and socket whitelisting
811 /// from GenericTaintChecker, and that means the following functions:
812 /// getPrintfFormatArgumentNum,
813 /// GenericTaintChecker::checkUncontrolledFormatString,
814 /// GenericTaintChecker::taintUnsafeSocketProtocol
815 
816 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
817                                        const CheckerContext &C,
818                                        ArgIdxTy &ArgNum) {
819   // Find if the function contains a format string argument.
820   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
821   // vsnprintf, syslog, custom annotated functions.
822   const Decl *CallDecl = Call.getDecl();
823   if (!CallDecl)
824     return false;
825   const FunctionDecl *FDecl = CallDecl->getAsFunction();
826   if (!FDecl)
827     return false;
828 
829   const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
830 
831   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
832     ArgNum = Format->getFormatIdx() - 1;
833     if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum)
834       return true;
835   }
836 
837   return false;
838 }
839 
840 bool GenericTaintChecker::checkUncontrolledFormatString(
841     const CallEvent &Call, CheckerContext &C) const {
842   // Check if the function contains a format string argument.
843   ArgIdxTy ArgNum = 0;
844   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
845     return false;
846 
847   // If either the format string content or the pointer itself are tainted,
848   // warn.
849   return generateReportIfTainted(Call.getArgExpr(ArgNum),
850                                  MsgUncontrolledFormatString, C);
851 }
852 
853 void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,
854                                                     CheckerContext &C) const {
855   if (Call.getNumArgs() < 1)
856     return;
857   const IdentifierInfo *ID = Call.getCalleeIdentifier();
858   if (!ID)
859     return;
860   if (!ID->getName().equals("socket"))
861     return;
862 
863   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
864   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
865   // Allow internal communication protocols.
866   bool SafeProtocol = DomName.equals("AF_SYSTEM") ||
867                       DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") ||
868                       DomName.equals("AF_RESERVED_36");
869   if (SafeProtocol)
870     return;
871 
872   C.addTransition(C.getState()->add<TaintArgsOnPostVisit>(ReturnValueIndex));
873 }
874 
875 /// Checker registration
876 
877 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
878   Mgr.registerChecker<GenericTaintChecker>();
879 }
880 
881 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
882   return true;
883 }
884