xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27 #include "llvm/ADT/StringMap.h"
28 #include "llvm/Support/YAMLTraits.h"
29 #include <limits>
30 #include <utility>
31 
32 using namespace clang;
33 using namespace ento;
34 using namespace taint;
35 
36 namespace {
37 class GenericTaintChecker
38     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
39 public:
40   static void *getTag() {
41     static int Tag;
42     return &Tag;
43   }
44 
45   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
46 
47   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
48 
49   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
50                   const char *Sep) const override;
51 
52   using ArgVector = SmallVector<unsigned, 2>;
53   using SignedArgVector = SmallVector<int, 2>;
54 
55   enum class VariadicType { None, Src, Dst };
56 
57   /// Used to parse the configuration file.
58   struct TaintConfiguration {
59     using NameArgsPair = std::pair<std::string, ArgVector>;
60 
61     struct Propagation {
62       std::string Name;
63       ArgVector SrcArgs;
64       SignedArgVector DstArgs;
65       VariadicType VarType;
66       unsigned VarIndex;
67     };
68 
69     std::vector<Propagation> Propagations;
70     std::vector<NameArgsPair> Filters;
71     std::vector<NameArgsPair> Sinks;
72 
73     TaintConfiguration() = default;
74     TaintConfiguration(const TaintConfiguration &) = default;
75     TaintConfiguration(TaintConfiguration &&) = default;
76     TaintConfiguration &operator=(const TaintConfiguration &) = default;
77     TaintConfiguration &operator=(TaintConfiguration &&) = default;
78   };
79 
80   /// Convert SignedArgVector to ArgVector.
81   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
82                                SignedArgVector Args);
83 
84   /// Parse the config.
85   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
86                           TaintConfiguration &&Config);
87 
88   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
89   /// Denotes the return vale.
90   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
91                                          1};
92 
93 private:
94   mutable std::unique_ptr<BugType> BT;
95   void initBugType() const {
96     if (!BT)
97       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
98   }
99 
100   /// Catch taint related bugs. Check if tainted data is passed to a
101   /// system call etc.
102   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
103 
104   /// Add taint sources on a pre-visit.
105   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
106 
107   /// Propagate taint generated at pre-visit.
108   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
109 
110   /// Check if the region the expression evaluates to is the standard input,
111   /// and thus, is tainted.
112   static bool isStdin(const Expr *E, CheckerContext &C);
113 
114   /// Given a pointer argument, return the value it points to.
115   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
116 
117   /// Check for CWE-134: Uncontrolled Format String.
118   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
119       "Untrusted data is used as a format string "
120       "(CWE-134: Uncontrolled Format String)";
121   bool checkUncontrolledFormatString(const CallExpr *CE,
122                                      CheckerContext &C) const;
123 
124   /// Check for:
125   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
126   /// CWE-78, "Failure to Sanitize Data into an OS Command"
127   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
128       "Untrusted data is passed to a system call "
129       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
130   bool checkSystemCall(const CallExpr *CE, StringRef Name,
131                        CheckerContext &C) const;
132 
133   /// Check if tainted data is used as a buffer size ins strn.. functions,
134   /// and allocators.
135   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
136       "Untrusted data is used to specify the buffer size "
137       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
138       "for character data and the null terminator)";
139   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
140                               CheckerContext &C) const;
141 
142   /// Check if tainted data is used as a custom sink's parameter.
143   static constexpr llvm::StringLiteral MsgCustomSink =
144       "Untrusted data is passed to a user-defined sink";
145   bool checkCustomSinks(const CallExpr *CE, StringRef Name,
146                         CheckerContext &C) const;
147 
148   /// Generate a report if the expression is tainted or points to tainted data.
149   bool generateReportIfTainted(const Expr *E, StringRef Msg,
150                                CheckerContext &C) const;
151 
152   struct TaintPropagationRule;
153   using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
154   using NameArgMap = llvm::StringMap<ArgVector>;
155 
156   /// A struct used to specify taint propagation rules for a function.
157   ///
158   /// If any of the possible taint source arguments is tainted, all of the
159   /// destination arguments should also be tainted. Use InvalidArgIndex in the
160   /// src list to specify that all of the arguments can introduce taint. Use
161   /// InvalidArgIndex in the dst arguments to signify that all the non-const
162   /// pointer and reference arguments might be tainted on return. If
163   /// ReturnValueIndex is added to the dst list, the return value will be
164   /// tainted.
165   struct TaintPropagationRule {
166     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
167                                          CheckerContext &C);
168 
169     /// List of arguments which can be taint sources and should be checked.
170     ArgVector SrcArgs;
171     /// List of arguments which should be tainted on function return.
172     ArgVector DstArgs;
173     /// Index for the first variadic parameter if exist.
174     unsigned VariadicIndex;
175     /// Show when a function has variadic parameters. If it has, it marks all
176     /// of them as source or destination.
177     VariadicType VarType;
178     /// Special function for tainted source determination. If defined, it can
179     /// override the default behavior.
180     PropagationFuncType PropagationFunc;
181 
182     TaintPropagationRule()
183         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
184           PropagationFunc(nullptr) {}
185 
186     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
187                          VariadicType Var = VariadicType::None,
188                          unsigned VarIndex = InvalidArgIndex,
189                          PropagationFuncType Func = nullptr)
190         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
191           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
192 
193     /// Get the propagation rule for a given function.
194     static TaintPropagationRule
195     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
196                             const FunctionDecl *FDecl, StringRef Name,
197                             CheckerContext &C);
198 
199     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
200     void addDstArg(unsigned A) { DstArgs.push_back(A); }
201 
202     bool isNull() const {
203       return SrcArgs.empty() && DstArgs.empty() &&
204              VariadicType::None == VarType;
205     }
206 
207     bool isDestinationArgument(unsigned ArgNum) const {
208       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
209     }
210 
211     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
212                                            CheckerContext &C) {
213       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
214         return true;
215 
216       if (!E->getType().getTypePtr()->isPointerType())
217         return false;
218 
219       Optional<SVal> V = getPointedToSVal(C, E);
220       return (V && isTainted(State, *V));
221     }
222 
223     /// Pre-process a function which propagates taint according to the
224     /// taint rule.
225     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
226 
227     // Functions for custom taintedness propagation.
228     static bool postSocket(bool IsTainted, const CallExpr *CE,
229                            CheckerContext &C);
230   };
231 
232   /// Defines a map between the propagation function's name and
233   /// TaintPropagationRule.
234   NameRuleMap CustomPropagations;
235 
236   /// Defines a map between the filter function's name and filtering args.
237   NameArgMap CustomFilters;
238 
239   /// Defines a map between the sink function's name and sinking args.
240   NameArgMap CustomSinks;
241 };
242 
243 const unsigned GenericTaintChecker::ReturnValueIndex;
244 const unsigned GenericTaintChecker::InvalidArgIndex;
245 
246 // FIXME: these lines can be removed in C++17
247 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
248 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
249 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
250 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
251 } // end of anonymous namespace
252 
253 using TaintConfig = GenericTaintChecker::TaintConfiguration;
254 
255 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
256 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
257 
258 namespace llvm {
259 namespace yaml {
260 template <> struct MappingTraits<TaintConfig> {
261   static void mapping(IO &IO, TaintConfig &Config) {
262     IO.mapOptional("Propagations", Config.Propagations);
263     IO.mapOptional("Filters", Config.Filters);
264     IO.mapOptional("Sinks", Config.Sinks);
265   }
266 };
267 
268 template <> struct MappingTraits<TaintConfig::Propagation> {
269   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
270     IO.mapRequired("Name", Propagation.Name);
271     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
272     IO.mapOptional("DstArgs", Propagation.DstArgs);
273     IO.mapOptional("VariadicType", Propagation.VarType,
274                    GenericTaintChecker::VariadicType::None);
275     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
276                    GenericTaintChecker::InvalidArgIndex);
277   }
278 };
279 
280 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
281   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
282     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
283     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
284     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
285   }
286 };
287 
288 template <> struct MappingTraits<TaintConfig::NameArgsPair> {
289   static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
290     IO.mapRequired("Name", NameArg.first);
291     IO.mapRequired("Args", NameArg.second);
292   }
293 };
294 } // namespace yaml
295 } // namespace llvm
296 
297 /// A set which is used to pass information from call pre-visit instruction
298 /// to the call post-visit. The values are unsigned integers, which are either
299 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
300 /// points to data, which should be tainted on return.
301 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
302 
303 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
304     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
305   ArgVector Result;
306   for (int Arg : Args) {
307     if (Arg == -1)
308       Result.push_back(ReturnValueIndex);
309     else if (Arg < -1) {
310       Result.push_back(InvalidArgIndex);
311       Mgr.reportInvalidCheckerOptionValue(
312           this, Option,
313           "an argument number for propagation rules greater or equal to -1");
314     } else
315       Result.push_back(static_cast<unsigned>(Arg));
316   }
317   return Result;
318 }
319 
320 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
321                                              const std::string &Option,
322                                              TaintConfiguration &&Config) {
323   for (auto &P : Config.Propagations) {
324     GenericTaintChecker::CustomPropagations.try_emplace(
325         P.Name, std::move(P.SrcArgs),
326         convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
327   }
328 
329   for (auto &F : Config.Filters) {
330     GenericTaintChecker::CustomFilters.try_emplace(F.first,
331                                                    std::move(F.second));
332   }
333 
334   for (auto &S : Config.Sinks) {
335     GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
336   }
337 }
338 
339 GenericTaintChecker::TaintPropagationRule
340 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
341     const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl,
342     StringRef Name, CheckerContext &C) {
343   // TODO: Currently, we might lose precision here: we always mark a return
344   // value as tainted even if it's just a pointer, pointing to tainted data.
345 
346   // Check for exact name match for functions without builtin substitutes.
347   TaintPropagationRule Rule =
348       llvm::StringSwitch<TaintPropagationRule>(Name)
349           // Source functions
350           // TODO: Add support for vfscanf & family.
351           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
352           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
353           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
354           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
355           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
356           .Case("getchar_unlocked",
357                 TaintPropagationRule({}, {ReturnValueIndex}))
358           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
359           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
360           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
361           .Case("socket",
362                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
363                                      InvalidArgIndex,
364                                      &TaintPropagationRule::postSocket))
365           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
366           // Propagating functions
367           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
368           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
369           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
370           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
371           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
372           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
373           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
374           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
375           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
376           .Case("getdelim", TaintPropagationRule({3}, {0}))
377           .Case("getline", TaintPropagationRule({2}, {0}))
378           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
379           .Case("pread",
380                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
381           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
382           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
383           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
384           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
385           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
386           .Default(TaintPropagationRule());
387 
388   if (!Rule.isNull())
389     return Rule;
390 
391   // Check if it's one of the memory setting/copying functions.
392   // This check is specialized but faster then calling isCLibraryFunction.
393   unsigned BId = 0;
394   if ((BId = FDecl->getMemoryFunctionKind()))
395     switch (BId) {
396     case Builtin::BImemcpy:
397     case Builtin::BImemmove:
398     case Builtin::BIstrncpy:
399     case Builtin::BIstrncat:
400       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
401     case Builtin::BIstrlcpy:
402     case Builtin::BIstrlcat:
403       return TaintPropagationRule({1, 2}, {0});
404     case Builtin::BIstrndup:
405       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
406 
407     default:
408       break;
409     };
410 
411   // Process all other functions which could be defined as builtins.
412   if (Rule.isNull()) {
413     if (C.isCLibraryFunction(FDecl, "snprintf"))
414       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
415                                   3);
416     else if (C.isCLibraryFunction(FDecl, "sprintf"))
417       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
418                                   2);
419     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
420              C.isCLibraryFunction(FDecl, "stpcpy") ||
421              C.isCLibraryFunction(FDecl, "strcat"))
422       return TaintPropagationRule({1}, {0, ReturnValueIndex});
423     else if (C.isCLibraryFunction(FDecl, "bcopy"))
424       return TaintPropagationRule({0, 2}, {1});
425     else if (C.isCLibraryFunction(FDecl, "strdup") ||
426              C.isCLibraryFunction(FDecl, "strdupa"))
427       return TaintPropagationRule({0}, {ReturnValueIndex});
428     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
429       return TaintPropagationRule({0}, {ReturnValueIndex});
430   }
431 
432   // Skipping the following functions, since they might be used for cleansing
433   // or smart memory copy:
434   // - memccpy - copying until hitting a special character.
435 
436   auto It = CustomPropagations.find(Name);
437   if (It != CustomPropagations.end())
438     return It->getValue();
439 
440   return TaintPropagationRule();
441 }
442 
443 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
444                                        CheckerContext &C) const {
445   // Check for taintedness related errors first: system call, uncontrolled
446   // format string, tainted buffer size.
447   if (checkPre(CE, C))
448     return;
449 
450   // Marks the function's arguments and/or return value tainted if it present in
451   // the list.
452   addSourcesPre(CE, C);
453 }
454 
455 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
456                                         CheckerContext &C) const {
457   // Set the marked values as tainted. The return value only accessible from
458   // checkPostStmt.
459   propagateFromPre(CE, C);
460 }
461 
462 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
463                                      const char *NL, const char *Sep) const {
464   printTaint(State, Out, NL, Sep);
465 }
466 
467 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
468                                         CheckerContext &C) const {
469   ProgramStateRef State = nullptr;
470   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
471   if (!FDecl || FDecl->getKind() != Decl::Function)
472     return;
473 
474   StringRef Name = C.getCalleeName(FDecl);
475   if (Name.empty())
476     return;
477 
478   // First, try generating a propagation rule for this function.
479   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
480       this->CustomPropagations, FDecl, Name, C);
481   if (!Rule.isNull()) {
482     State = Rule.process(CE, C);
483     if (!State)
484       return;
485     C.addTransition(State);
486     return;
487   }
488 
489   if (!State)
490     return;
491   C.addTransition(State);
492 }
493 
494 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
495                                            CheckerContext &C) const {
496   ProgramStateRef State = C.getState();
497 
498   // Depending on what was tainted at pre-visit, we determined a set of
499   // arguments which should be tainted after the function returns. These are
500   // stored in the state as TaintArgsOnPostVisit set.
501   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
502   if (TaintArgs.isEmpty())
503     return false;
504 
505   for (unsigned ArgNum : TaintArgs) {
506     // Special handling for the tainted return value.
507     if (ArgNum == ReturnValueIndex) {
508       State = addTaint(State, CE, C.getLocationContext());
509       continue;
510     }
511 
512     // The arguments are pointer arguments. The data they are pointing at is
513     // tainted after the call.
514     if (CE->getNumArgs() < (ArgNum + 1))
515       return false;
516     const Expr *Arg = CE->getArg(ArgNum);
517     Optional<SVal> V = getPointedToSVal(C, Arg);
518     if (V)
519       State = addTaint(State, *V);
520   }
521 
522   // Clear up the taint info from the state.
523   State = State->remove<TaintArgsOnPostVisit>();
524 
525   if (State != C.getState()) {
526     C.addTransition(State);
527     return true;
528   }
529   return false;
530 }
531 
532 bool GenericTaintChecker::checkPre(const CallExpr *CE,
533                                    CheckerContext &C) const {
534 
535   if (checkUncontrolledFormatString(CE, C))
536     return true;
537 
538   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
539   if (!FDecl || FDecl->getKind() != Decl::Function)
540     return false;
541 
542   StringRef Name = C.getCalleeName(FDecl);
543   if (Name.empty())
544     return false;
545 
546   if (checkSystemCall(CE, Name, C))
547     return true;
548 
549   if (checkTaintedBufferSize(CE, FDecl, C))
550     return true;
551 
552   if (checkCustomSinks(CE, Name, C))
553     return true;
554 
555   return false;
556 }
557 
558 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
559                                                      const Expr *Arg) {
560   ProgramStateRef State = C.getState();
561   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
562   if (AddrVal.isUnknownOrUndef())
563     return None;
564 
565   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
566   if (!AddrLoc)
567     return None;
568 
569   QualType ArgTy = Arg->getType().getCanonicalType();
570   if (!ArgTy->isPointerType())
571     return None;
572 
573   QualType ValTy = ArgTy->getPointeeType();
574 
575   // Do not dereference void pointers. Treat them as byte pointers instead.
576   // FIXME: we might want to consider more than just the first byte.
577   if (ValTy->isVoidType())
578     ValTy = C.getASTContext().CharTy;
579 
580   return State->getSVal(*AddrLoc, ValTy);
581 }
582 
583 ProgramStateRef
584 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
585                                                    CheckerContext &C) const {
586   ProgramStateRef State = C.getState();
587 
588   // Check for taint in arguments.
589   bool IsTainted = true;
590   for (unsigned ArgNum : SrcArgs) {
591     if (ArgNum >= CE->getNumArgs())
592       continue;
593 
594     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
595       break;
596   }
597 
598   // Check for taint in variadic arguments.
599   if (!IsTainted && VariadicType::Src == VarType) {
600     // Check if any of the arguments is tainted
601     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
602       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
603         break;
604     }
605   }
606 
607   if (PropagationFunc)
608     IsTainted = PropagationFunc(IsTainted, CE, C);
609 
610   if (!IsTainted)
611     return State;
612 
613   // Mark the arguments which should be tainted after the function returns.
614   for (unsigned ArgNum : DstArgs) {
615     // Should mark the return value?
616     if (ArgNum == ReturnValueIndex) {
617       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
618       continue;
619     }
620 
621     if (ArgNum >= CE->getNumArgs())
622       continue;
623 
624     // Mark the given argument.
625     State = State->add<TaintArgsOnPostVisit>(ArgNum);
626   }
627 
628   // Mark all variadic arguments tainted if present.
629   if (VariadicType::Dst == VarType) {
630     // For all pointer and references that were passed in:
631     //   If they are not pointing to const data, mark data as tainted.
632     //   TODO: So far we are just going one level down; ideally we'd need to
633     //         recurse here.
634     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
635       const Expr *Arg = CE->getArg(i);
636       // Process pointer argument.
637       const Type *ArgTy = Arg->getType().getTypePtr();
638       QualType PType = ArgTy->getPointeeType();
639       if ((!PType.isNull() && !PType.isConstQualified()) ||
640           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
641         State = State->add<TaintArgsOnPostVisit>(i);
642     }
643   }
644 
645   return State;
646 }
647 
648 // If argument 0(protocol domain) is network, the return value should get taint.
649 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
650                                                            const CallExpr *CE,
651                                                            CheckerContext &C) {
652   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
653   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
654   // White list the internal communication protocols.
655   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
656       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
657     return false;
658 
659   return true;
660 }
661 
662 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
663   ProgramStateRef State = C.getState();
664   SVal Val = C.getSVal(E);
665 
666   // stdin is a pointer, so it would be a region.
667   const MemRegion *MemReg = Val.getAsRegion();
668 
669   // The region should be symbolic, we do not know it's value.
670   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
671   if (!SymReg)
672     return false;
673 
674   // Get it's symbol and find the declaration region it's pointing to.
675   const SymbolRegionValue *Sm =
676       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
677   if (!Sm)
678     return false;
679   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
680   if (!DeclReg)
681     return false;
682 
683   // This region corresponds to a declaration, find out if it's a global/extern
684   // variable named stdin with the proper type.
685   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
686     D = D->getCanonicalDecl();
687     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
688       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
689       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
690                        C.getASTContext().getFILEType().getCanonicalType())
691         return true;
692     }
693   }
694   return false;
695 }
696 
697 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
698                                        const CheckerContext &C,
699                                        unsigned &ArgNum) {
700   // Find if the function contains a format string argument.
701   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
702   // vsnprintf, syslog, custom annotated functions.
703   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
704   if (!FDecl)
705     return false;
706   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
707     ArgNum = Format->getFormatIdx() - 1;
708     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
709       return true;
710   }
711 
712   // Or if a function is named setproctitle (this is a heuristic).
713   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
714     ArgNum = 0;
715     return true;
716   }
717 
718   return false;
719 }
720 
721 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
722                                                   CheckerContext &C) const {
723   assert(E);
724 
725   // Check for taint.
726   ProgramStateRef State = C.getState();
727   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
728   SVal TaintedSVal;
729   if (PointedToSVal && isTainted(State, *PointedToSVal))
730     TaintedSVal = *PointedToSVal;
731   else if (isTainted(State, E, C.getLocationContext()))
732     TaintedSVal = C.getSVal(E);
733   else
734     return false;
735 
736   // Generate diagnostic.
737   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
738     initBugType();
739     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
740     report->addRange(E->getSourceRange());
741     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
742     C.emitReport(std::move(report));
743     return true;
744   }
745   return false;
746 }
747 
748 bool GenericTaintChecker::checkUncontrolledFormatString(
749     const CallExpr *CE, CheckerContext &C) const {
750   // Check if the function contains a format string argument.
751   unsigned ArgNum = 0;
752   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
753     return false;
754 
755   // If either the format string content or the pointer itself are tainted,
756   // warn.
757   return generateReportIfTainted(CE->getArg(ArgNum),
758                                  MsgUncontrolledFormatString, C);
759 }
760 
761 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
762                                           CheckerContext &C) const {
763   // TODO: It might make sense to run this check on demand. In some cases,
764   // we should check if the environment has been cleansed here. We also might
765   // need to know if the user was reset before these calls(seteuid).
766   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
767                         .Case("system", 0)
768                         .Case("popen", 0)
769                         .Case("execl", 0)
770                         .Case("execle", 0)
771                         .Case("execlp", 0)
772                         .Case("execv", 0)
773                         .Case("execvp", 0)
774                         .Case("execvP", 0)
775                         .Case("execve", 0)
776                         .Case("dlopen", 0)
777                         .Default(InvalidArgIndex);
778 
779   if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
780     return false;
781 
782   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
783 }
784 
785 // TODO: Should this check be a part of the CString checker?
786 // If yes, should taint be a global setting?
787 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
788                                                  const FunctionDecl *FDecl,
789                                                  CheckerContext &C) const {
790   // If the function has a buffer size argument, set ArgNum.
791   unsigned ArgNum = InvalidArgIndex;
792   unsigned BId = 0;
793   if ((BId = FDecl->getMemoryFunctionKind()))
794     switch (BId) {
795     case Builtin::BImemcpy:
796     case Builtin::BImemmove:
797     case Builtin::BIstrncpy:
798       ArgNum = 2;
799       break;
800     case Builtin::BIstrndup:
801       ArgNum = 1;
802       break;
803     default:
804       break;
805     };
806 
807   if (ArgNum == InvalidArgIndex) {
808     if (C.isCLibraryFunction(FDecl, "malloc") ||
809         C.isCLibraryFunction(FDecl, "calloc") ||
810         C.isCLibraryFunction(FDecl, "alloca"))
811       ArgNum = 0;
812     else if (C.isCLibraryFunction(FDecl, "memccpy"))
813       ArgNum = 3;
814     else if (C.isCLibraryFunction(FDecl, "realloc"))
815       ArgNum = 1;
816     else if (C.isCLibraryFunction(FDecl, "bcopy"))
817       ArgNum = 2;
818   }
819 
820   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
821          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
822 }
823 
824 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
825                                            CheckerContext &C) const {
826   auto It = CustomSinks.find(Name);
827   if (It == CustomSinks.end())
828     return false;
829 
830   const GenericTaintChecker::ArgVector &Args = It->getValue();
831   for (unsigned ArgNum : Args) {
832     if (ArgNum >= CE->getNumArgs())
833       continue;
834 
835     if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
836       return true;
837   }
838 
839   return false;
840 }
841 
842 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
843   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
844   std::string Option{"Config"};
845   StringRef ConfigFile =
846       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
847   llvm::Optional<TaintConfig> Config =
848       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
849   if (Config)
850     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
851 }
852 
853 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
854   return true;
855 }
856