xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision c203bd70b5957f85616424b6fa374479372d06e3)
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "llvm/Support/YAMLTraits.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35 
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39 
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
43   static void *getTag() {
44     static int Tag;
45     return &Tag;
46   }
47 
48   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50 
51   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52                   const char *Sep) const override;
53 
54   using ArgVector = SmallVector<unsigned, 2>;
55   using SignedArgVector = SmallVector<int, 2>;
56 
57   enum class VariadicType { None, Src, Dst };
58 
59   /// Used to parse the configuration file.
60   struct TaintConfiguration {
61     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62 
63     struct Propagation {
64       std::string Name;
65       std::string Scope;
66       ArgVector SrcArgs;
67       SignedArgVector DstArgs;
68       VariadicType VarType;
69       unsigned VarIndex;
70     };
71 
72     std::vector<Propagation> Propagations;
73     std::vector<NameScopeArgs> Filters;
74     std::vector<NameScopeArgs> Sinks;
75 
76     TaintConfiguration() = default;
77     TaintConfiguration(const TaintConfiguration &) = default;
78     TaintConfiguration(TaintConfiguration &&) = default;
79     TaintConfiguration &operator=(const TaintConfiguration &) = default;
80     TaintConfiguration &operator=(TaintConfiguration &&) = default;
81   };
82 
83   /// Convert SignedArgVector to ArgVector.
84   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85                                const SignedArgVector &Args);
86 
87   /// Parse the config.
88   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89                           TaintConfiguration &&Config);
90 
91   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92   /// Denotes the return vale.
93   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94                                          1};
95 
96 private:
97   mutable std::unique_ptr<BugType> BT;
98   void initBugType() const {
99     if (!BT)
100       BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101                                      "Untrusted Data");
102   }
103 
104   struct FunctionData {
105     FunctionData() = delete;
106     FunctionData(const FunctionData &) = default;
107     FunctionData(FunctionData &&) = default;
108     FunctionData &operator=(const FunctionData &) = delete;
109     FunctionData &operator=(FunctionData &&) = delete;
110 
111     static Optional<FunctionData> create(const CallEvent &Call,
112                                          const CheckerContext &C) {
113       if (!Call.getDecl())
114         return None;
115 
116       const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
117       if (!FDecl || (FDecl->getKind() != Decl::Function &&
118                      FDecl->getKind() != Decl::CXXMethod))
119         return None;
120 
121       StringRef Name = C.getCalleeName(FDecl);
122       std::string FullName = FDecl->getQualifiedNameAsString();
123       if (Name.empty() || FullName.empty())
124         return None;
125 
126       return FunctionData{FDecl, Name, FullName};
127     }
128 
129     bool isInScope(StringRef Scope) const {
130       return StringRef(FullName).startswith(Scope);
131     }
132 
133     const FunctionDecl *const FDecl;
134     const StringRef Name;
135     const std::string FullName;
136   };
137 
138   /// Catch taint related bugs. Check if tainted data is passed to a
139   /// system call etc. Returns true on matching.
140   bool checkPre(const CallEvent &Call, const FunctionData &FData,
141                 CheckerContext &C) const;
142 
143   /// Add taint sources on a pre-visit. Returns true on matching.
144   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
145                      CheckerContext &C) const;
146 
147   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
148   /// matching.
149   bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
150                      CheckerContext &C) const;
151 
152   /// Propagate taint generated at pre-visit. Returns true on matching.
153   static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
154 
155   /// Check if the region the expression evaluates to is the standard input,
156   /// and thus, is tainted.
157   static bool isStdin(const Expr *E, CheckerContext &C);
158 
159   /// Given a pointer argument, return the value it points to.
160   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
161 
162   /// Check for CWE-134: Uncontrolled Format String.
163   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
164       "Untrusted data is used as a format string "
165       "(CWE-134: Uncontrolled Format String)";
166   bool checkUncontrolledFormatString(const CallEvent &Call,
167                                      CheckerContext &C) const;
168 
169   /// Check for:
170   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
171   /// CWE-78, "Failure to Sanitize Data into an OS Command"
172   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
173       "Untrusted data is passed to a system call "
174       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
175   bool checkSystemCall(const CallEvent &Call, StringRef Name,
176                        CheckerContext &C) const;
177 
178   /// Check if tainted data is used as a buffer size ins strn.. functions,
179   /// and allocators.
180   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
181       "Untrusted data is used to specify the buffer size "
182       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
183       "for character data and the null terminator)";
184   bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
185 
186   /// Check if tainted data is used as a custom sink's parameter.
187   static constexpr llvm::StringLiteral MsgCustomSink =
188       "Untrusted data is passed to a user-defined sink";
189   bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
190                         CheckerContext &C) const;
191 
192   /// Generate a report if the expression is tainted or points to tainted data.
193   bool generateReportIfTainted(const Expr *E, StringRef Msg,
194                                CheckerContext &C) const;
195 
196   struct TaintPropagationRule;
197   template <typename T>
198   using ConfigDataMap =
199       std::unordered_multimap<std::string, std::pair<std::string, T>>;
200   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
201   using NameArgMap = ConfigDataMap<ArgVector>;
202 
203   /// Find a function with the given name and scope. Returns the first match
204   /// or the end of the map.
205   template <typename T>
206   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
207                                    const FunctionData &FData);
208 
209   /// A struct used to specify taint propagation rules for a function.
210   ///
211   /// If any of the possible taint source arguments is tainted, all of the
212   /// destination arguments should also be tainted. Use InvalidArgIndex in the
213   /// src list to specify that all of the arguments can introduce taint. Use
214   /// InvalidArgIndex in the dst arguments to signify that all the non-const
215   /// pointer and reference arguments might be tainted on return. If
216   /// ReturnValueIndex is added to the dst list, the return value will be
217   /// tainted.
218   struct TaintPropagationRule {
219     using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
220                                          CheckerContext &C);
221 
222     /// List of arguments which can be taint sources and should be checked.
223     ArgVector SrcArgs;
224     /// List of arguments which should be tainted on function return.
225     ArgVector DstArgs;
226     /// Index for the first variadic parameter if exist.
227     unsigned VariadicIndex;
228     /// Show when a function has variadic parameters. If it has, it marks all
229     /// of them as source or destination.
230     VariadicType VarType;
231     /// Special function for tainted source determination. If defined, it can
232     /// override the default behavior.
233     PropagationFuncType PropagationFunc;
234 
235     TaintPropagationRule()
236         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
237           PropagationFunc(nullptr) {}
238 
239     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
240                          VariadicType Var = VariadicType::None,
241                          unsigned VarIndex = InvalidArgIndex,
242                          PropagationFuncType Func = nullptr)
243         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
244           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
245 
246     /// Get the propagation rule for a given function.
247     static TaintPropagationRule
248     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
249                             const FunctionData &FData, CheckerContext &C);
250 
251     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
252     void addDstArg(unsigned A) { DstArgs.push_back(A); }
253 
254     bool isNull() const {
255       return SrcArgs.empty() && DstArgs.empty() &&
256              VariadicType::None == VarType;
257     }
258 
259     bool isDestinationArgument(unsigned ArgNum) const {
260       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
261     }
262 
263     static bool isTaintedOrPointsToTainted(const Expr *E,
264                                            const ProgramStateRef &State,
265                                            CheckerContext &C) {
266       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
267         return true;
268 
269       if (!E->getType().getTypePtr()->isPointerType())
270         return false;
271 
272       Optional<SVal> V = getPointeeOf(C, E);
273       return (V && isTainted(State, *V));
274     }
275 
276     /// Pre-process a function which propagates taint according to the
277     /// taint rule.
278     ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
279 
280     // Functions for custom taintedness propagation.
281     static bool postSocket(bool IsTainted, const CallEvent &Call,
282                            CheckerContext &C);
283   };
284 
285   /// Defines a map between the propagation function's name, scope
286   /// and TaintPropagationRule.
287   NameRuleMap CustomPropagations;
288 
289   /// Defines a map between the filter function's name, scope and filtering
290   /// args.
291   NameArgMap CustomFilters;
292 
293   /// Defines a map between the sink function's name, scope and sinking args.
294   NameArgMap CustomSinks;
295 };
296 
297 const unsigned GenericTaintChecker::ReturnValueIndex;
298 const unsigned GenericTaintChecker::InvalidArgIndex;
299 
300 // FIXME: these lines can be removed in C++17
301 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
302 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
303 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
305 } // end of anonymous namespace
306 
307 using TaintConfig = GenericTaintChecker::TaintConfiguration;
308 
309 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
310 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
311 
312 namespace llvm {
313 namespace yaml {
314 template <> struct MappingTraits<TaintConfig> {
315   static void mapping(IO &IO, TaintConfig &Config) {
316     IO.mapOptional("Propagations", Config.Propagations);
317     IO.mapOptional("Filters", Config.Filters);
318     IO.mapOptional("Sinks", Config.Sinks);
319   }
320 };
321 
322 template <> struct MappingTraits<TaintConfig::Propagation> {
323   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
324     IO.mapRequired("Name", Propagation.Name);
325     IO.mapOptional("Scope", Propagation.Scope);
326     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
327     IO.mapOptional("DstArgs", Propagation.DstArgs);
328     IO.mapOptional("VariadicType", Propagation.VarType,
329                    GenericTaintChecker::VariadicType::None);
330     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
331                    GenericTaintChecker::InvalidArgIndex);
332   }
333 };
334 
335 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
336   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
337     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
338     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
339     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
340   }
341 };
342 
343 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
344   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
345     IO.mapRequired("Name", std::get<0>(NSA));
346     IO.mapOptional("Scope", std::get<1>(NSA));
347     IO.mapRequired("Args", std::get<2>(NSA));
348   }
349 };
350 } // namespace yaml
351 } // namespace llvm
352 
353 /// A set which is used to pass information from call pre-visit instruction
354 /// to the call post-visit. The values are unsigned integers, which are either
355 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
356 /// points to data, which should be tainted on return.
357 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
358 
359 GenericTaintChecker::ArgVector
360 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
361                                         const std::string &Option,
362                                         const SignedArgVector &Args) {
363   ArgVector Result;
364   for (int Arg : Args) {
365     if (Arg == -1)
366       Result.push_back(ReturnValueIndex);
367     else if (Arg < -1) {
368       Result.push_back(InvalidArgIndex);
369       Mgr.reportInvalidCheckerOptionValue(
370           this, Option,
371           "an argument number for propagation rules greater or equal to -1");
372     } else
373       Result.push_back(static_cast<unsigned>(Arg));
374   }
375   return Result;
376 }
377 
378 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
379                                              const std::string &Option,
380                                              TaintConfiguration &&Config) {
381   for (auto &P : Config.Propagations) {
382     GenericTaintChecker::CustomPropagations.emplace(
383         P.Name,
384         std::make_pair(P.Scope, TaintPropagationRule{
385                                     std::move(P.SrcArgs),
386                                     convertToArgVector(Mgr, Option, P.DstArgs),
387                                     P.VarType, P.VarIndex}));
388   }
389 
390   for (auto &F : Config.Filters) {
391     GenericTaintChecker::CustomFilters.emplace(
392         std::get<0>(F),
393         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
394   }
395 
396   for (auto &S : Config.Sinks) {
397     GenericTaintChecker::CustomSinks.emplace(
398         std::get<0>(S),
399         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
400   }
401 }
402 
403 template <typename T>
404 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
405                                                const FunctionData &FData) {
406   auto Range = Map.equal_range(std::string(FData.Name));
407   auto It =
408       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
409         const auto &Value = Entry.second;
410         StringRef Scope = Value.first;
411         return Scope.empty() || FData.isInScope(Scope);
412       });
413   return It != Range.second ? It : Map.end();
414 }
415 
416 GenericTaintChecker::TaintPropagationRule
417 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
418     const NameRuleMap &CustomPropagations, const FunctionData &FData,
419     CheckerContext &C) {
420   // TODO: Currently, we might lose precision here: we always mark a return
421   // value as tainted even if it's just a pointer, pointing to tainted data.
422 
423   // Check for exact name match for functions without builtin substitutes.
424   // Use qualified name, because these are C functions without namespace.
425   TaintPropagationRule Rule =
426       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
427           // Source functions
428           // TODO: Add support for vfscanf & family.
429           .Case("fdopen", {{}, {ReturnValueIndex}})
430           .Case("fopen", {{}, {ReturnValueIndex}})
431           .Case("freopen", {{}, {ReturnValueIndex}})
432           .Case("getch", {{}, {ReturnValueIndex}})
433           .Case("getchar", {{}, {ReturnValueIndex}})
434           .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
435           .Case("getenv", {{}, {ReturnValueIndex}})
436           .Case("gets", {{}, {0, ReturnValueIndex}})
437           .Case("scanf", {{}, {}, VariadicType::Dst, 1})
438           .Case("socket", {{},
439                            {ReturnValueIndex},
440                            VariadicType::None,
441                            InvalidArgIndex,
442                            &TaintPropagationRule::postSocket})
443           .Case("wgetch", {{}, {ReturnValueIndex}})
444           // Propagating functions
445           .Case("atoi", {{0}, {ReturnValueIndex}})
446           .Case("atol", {{0}, {ReturnValueIndex}})
447           .Case("atoll", {{0}, {ReturnValueIndex}})
448           .Case("fgetc", {{0}, {ReturnValueIndex}})
449           .Case("fgetln", {{0}, {ReturnValueIndex}})
450           .Case("fgets", {{2}, {0, ReturnValueIndex}})
451           .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
452           .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
453           .Case("getc", {{0}, {ReturnValueIndex}})
454           .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
455           .Case("getdelim", {{3}, {0}})
456           .Case("getline", {{2}, {0}})
457           .Case("getw", {{0}, {ReturnValueIndex}})
458           .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
459           .Case("read", {{0, 2}, {1, ReturnValueIndex}})
460           .Case("strchr", {{0}, {ReturnValueIndex}})
461           .Case("strrchr", {{0}, {ReturnValueIndex}})
462           .Case("tolower", {{0}, {ReturnValueIndex}})
463           .Case("toupper", {{0}, {ReturnValueIndex}})
464           .Default({});
465 
466   if (!Rule.isNull())
467     return Rule;
468   assert(FData.FDecl);
469 
470   // Check if it's one of the memory setting/copying functions.
471   // This check is specialized but faster then calling isCLibraryFunction.
472   const FunctionDecl *FDecl = FData.FDecl;
473   unsigned BId = 0;
474   if ((BId = FDecl->getMemoryFunctionKind())) {
475     switch (BId) {
476     case Builtin::BImemcpy:
477     case Builtin::BImemmove:
478     case Builtin::BIstrncpy:
479     case Builtin::BIstrncat:
480       return {{1, 2}, {0, ReturnValueIndex}};
481     case Builtin::BIstrlcpy:
482     case Builtin::BIstrlcat:
483       return {{1, 2}, {0}};
484     case Builtin::BIstrndup:
485       return {{0, 1}, {ReturnValueIndex}};
486 
487     default:
488       break;
489     }
490   }
491 
492   // Process all other functions which could be defined as builtins.
493   if (Rule.isNull()) {
494     const auto OneOf = [FDecl](const auto &... Name) {
495       // FIXME: use fold expression in C++17
496       using unused = int[];
497       bool ret = false;
498       static_cast<void>(unused{
499           0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
500       return ret;
501     };
502     if (OneOf("snprintf"))
503       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
504     if (OneOf("sprintf"))
505       return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
506     if (OneOf("strcpy", "stpcpy", "strcat"))
507       return {{1}, {0, ReturnValueIndex}};
508     if (OneOf("bcopy"))
509       return {{0, 2}, {1}};
510     if (OneOf("strdup", "strdupa", "wcsdup"))
511       return {{0}, {ReturnValueIndex}};
512   }
513 
514   // Skipping the following functions, since they might be used for cleansing or
515   // smart memory copy:
516   // - memccpy - copying until hitting a special character.
517 
518   auto It = findFunctionInConfig(CustomPropagations, FData);
519   if (It != CustomPropagations.end())
520     return It->second.second;
521   return {};
522 }
523 
524 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
525                                        CheckerContext &C) const {
526   Optional<FunctionData> FData = FunctionData::create(Call, C);
527   if (!FData)
528     return;
529 
530   // Check for taintedness related errors first: system call, uncontrolled
531   // format string, tainted buffer size.
532   if (checkPre(Call, *FData, C))
533     return;
534 
535   // Marks the function's arguments and/or return value tainted if it present in
536   // the list.
537   if (addSourcesPre(Call, *FData, C))
538     return;
539 
540   addFiltersPre(Call, *FData, C);
541 }
542 
543 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
544                                         CheckerContext &C) const {
545   // Set the marked values as tainted. The return value only accessible from
546   // checkPostStmt.
547   propagateFromPre(Call, C);
548 }
549 
550 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
551                                      const char *NL, const char *Sep) const {
552   printTaint(State, Out, NL, Sep);
553 }
554 
555 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
556                                         const FunctionData &FData,
557                                         CheckerContext &C) const {
558   // First, try generating a propagation rule for this function.
559   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
560       this->CustomPropagations, FData, C);
561   if (!Rule.isNull()) {
562     ProgramStateRef State = Rule.process(Call, C);
563     if (State) {
564       C.addTransition(State);
565       return true;
566     }
567   }
568   return false;
569 }
570 
571 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
572                                         const FunctionData &FData,
573                                         CheckerContext &C) const {
574   auto It = findFunctionInConfig(CustomFilters, FData);
575   if (It == CustomFilters.end())
576     return false;
577 
578   ProgramStateRef State = C.getState();
579   const auto &Value = It->second;
580   const ArgVector &Args = Value.second;
581   for (unsigned ArgNum : Args) {
582     if (ArgNum >= Call.getNumArgs())
583       continue;
584 
585     const Expr *Arg = Call.getArgExpr(ArgNum);
586     Optional<SVal> V = getPointeeOf(C, Arg);
587     if (V)
588       State = removeTaint(State, *V);
589   }
590 
591   if (State != C.getState()) {
592     C.addTransition(State);
593     return true;
594   }
595   return false;
596 }
597 
598 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
599                                            CheckerContext &C) {
600   ProgramStateRef State = C.getState();
601 
602   // Depending on what was tainted at pre-visit, we determined a set of
603   // arguments which should be tainted after the function returns. These are
604   // stored in the state as TaintArgsOnPostVisit set.
605   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
606   if (TaintArgs.isEmpty())
607     return false;
608 
609   for (unsigned ArgNum : TaintArgs) {
610     // Special handling for the tainted return value.
611     if (ArgNum == ReturnValueIndex) {
612       State = addTaint(State, Call.getReturnValue());
613       continue;
614     }
615 
616     // The arguments are pointer arguments. The data they are pointing at is
617     // tainted after the call.
618     if (Call.getNumArgs() < (ArgNum + 1))
619       return false;
620     const Expr *Arg = Call.getArgExpr(ArgNum);
621     Optional<SVal> V = getPointeeOf(C, Arg);
622     if (V)
623       State = addTaint(State, *V);
624   }
625 
626   // Clear up the taint info from the state.
627   State = State->remove<TaintArgsOnPostVisit>();
628 
629   if (State != C.getState()) {
630     C.addTransition(State);
631     return true;
632   }
633   return false;
634 }
635 
636 bool GenericTaintChecker::checkPre(const CallEvent &Call,
637                                    const FunctionData &FData,
638                                    CheckerContext &C) const {
639   if (checkUncontrolledFormatString(Call, C))
640     return true;
641 
642   if (checkSystemCall(Call, FData.Name, C))
643     return true;
644 
645   if (checkTaintedBufferSize(Call, C))
646     return true;
647 
648   return checkCustomSinks(Call, FData, C);
649 }
650 
651 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
652                                                  const Expr *Arg) {
653   ProgramStateRef State = C.getState();
654   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
655   if (AddrVal.isUnknownOrUndef())
656     return None;
657 
658   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
659   if (!AddrLoc)
660     return None;
661 
662   QualType ArgTy = Arg->getType().getCanonicalType();
663   if (!ArgTy->isPointerType())
664     return State->getSVal(*AddrLoc);
665 
666   QualType ValTy = ArgTy->getPointeeType();
667 
668   // Do not dereference void pointers. Treat them as byte pointers instead.
669   // FIXME: we might want to consider more than just the first byte.
670   if (ValTy->isVoidType())
671     ValTy = C.getASTContext().CharTy;
672 
673   return State->getSVal(*AddrLoc, ValTy);
674 }
675 
676 ProgramStateRef
677 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
678                                                    CheckerContext &C) const {
679   ProgramStateRef State = C.getState();
680 
681   // Check for taint in arguments.
682   bool IsTainted = true;
683   for (unsigned ArgNum : SrcArgs) {
684     if (ArgNum >= Call.getNumArgs())
685       continue;
686 
687     if ((IsTainted =
688              isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
689       break;
690   }
691 
692   // Check for taint in variadic arguments.
693   if (!IsTainted && VariadicType::Src == VarType) {
694     // Check if any of the arguments is tainted
695     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
696       if ((IsTainted =
697                isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
698         break;
699     }
700   }
701 
702   if (PropagationFunc)
703     IsTainted = PropagationFunc(IsTainted, Call, C);
704 
705   if (!IsTainted)
706     return State;
707 
708   // Mark the arguments which should be tainted after the function returns.
709   for (unsigned ArgNum : DstArgs) {
710     // Should mark the return value?
711     if (ArgNum == ReturnValueIndex) {
712       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
713       continue;
714     }
715 
716     if (ArgNum >= Call.getNumArgs())
717       continue;
718 
719     // Mark the given argument.
720     State = State->add<TaintArgsOnPostVisit>(ArgNum);
721   }
722 
723   // Mark all variadic arguments tainted if present.
724   if (VariadicType::Dst == VarType) {
725     // For all pointer and references that were passed in:
726     //   If they are not pointing to const data, mark data as tainted.
727     //   TODO: So far we are just going one level down; ideally we'd need to
728     //         recurse here.
729     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
730       const Expr *Arg = Call.getArgExpr(i);
731       // Process pointer argument.
732       const Type *ArgTy = Arg->getType().getTypePtr();
733       QualType PType = ArgTy->getPointeeType();
734       if ((!PType.isNull() && !PType.isConstQualified()) ||
735           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
736         State = State->add<TaintArgsOnPostVisit>(i);
737       }
738     }
739   }
740 
741   return State;
742 }
743 
744 // If argument 0(protocol domain) is network, the return value should get taint.
745 bool GenericTaintChecker::TaintPropagationRule::postSocket(
746     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
747   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
748   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
749   // White list the internal communication protocols.
750   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
751       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
752     return false;
753   return true;
754 }
755 
756 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
757   ProgramStateRef State = C.getState();
758   SVal Val = C.getSVal(E);
759 
760   // stdin is a pointer, so it would be a region.
761   const MemRegion *MemReg = Val.getAsRegion();
762 
763   // The region should be symbolic, we do not know it's value.
764   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
765   if (!SymReg)
766     return false;
767 
768   // Get it's symbol and find the declaration region it's pointing to.
769   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
770   if (!Sm)
771     return false;
772   const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
773   if (!DeclReg)
774     return false;
775 
776   // This region corresponds to a declaration, find out if it's a global/extern
777   // variable named stdin with the proper type.
778   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
779     D = D->getCanonicalDecl();
780     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
781       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
782       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
783                        C.getASTContext().getFILEType().getCanonicalType())
784         return true;
785     }
786   }
787   return false;
788 }
789 
790 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
791                                        const CheckerContext &C,
792                                        unsigned &ArgNum) {
793   // Find if the function contains a format string argument.
794   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
795   // vsnprintf, syslog, custom annotated functions.
796   const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
797   if (!FDecl)
798     return false;
799   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
800     ArgNum = Format->getFormatIdx() - 1;
801     if ((Format->getType()->getName() == "printf") &&
802         Call.getNumArgs() > ArgNum)
803       return true;
804   }
805 
806   // Or if a function is named setproctitle (this is a heuristic).
807   if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
808     ArgNum = 0;
809     return true;
810   }
811 
812   return false;
813 }
814 
815 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
816                                                   CheckerContext &C) const {
817   assert(E);
818 
819   // Check for taint.
820   ProgramStateRef State = C.getState();
821   Optional<SVal> PointedToSVal = getPointeeOf(C, E);
822   SVal TaintedSVal;
823   if (PointedToSVal && isTainted(State, *PointedToSVal))
824     TaintedSVal = *PointedToSVal;
825   else if (isTainted(State, E, C.getLocationContext()))
826     TaintedSVal = C.getSVal(E);
827   else
828     return false;
829 
830   // Generate diagnostic.
831   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
832     initBugType();
833     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
834     report->addRange(E->getSourceRange());
835     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
836     C.emitReport(std::move(report));
837     return true;
838   }
839   return false;
840 }
841 
842 bool GenericTaintChecker::checkUncontrolledFormatString(
843     const CallEvent &Call, CheckerContext &C) const {
844   // Check if the function contains a format string argument.
845   unsigned ArgNum = 0;
846   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
847     return false;
848 
849   // If either the format string content or the pointer itself are tainted,
850   // warn.
851   return generateReportIfTainted(Call.getArgExpr(ArgNum),
852                                  MsgUncontrolledFormatString, C);
853 }
854 
855 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
856                                           CheckerContext &C) const {
857   // TODO: It might make sense to run this check on demand. In some cases,
858   // we should check if the environment has been cleansed here. We also might
859   // need to know if the user was reset before these calls(seteuid).
860   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
861                         .Case("system", 0)
862                         .Case("popen", 0)
863                         .Case("execl", 0)
864                         .Case("execle", 0)
865                         .Case("execlp", 0)
866                         .Case("execv", 0)
867                         .Case("execvp", 0)
868                         .Case("execvP", 0)
869                         .Case("execve", 0)
870                         .Case("dlopen", 0)
871                         .Default(InvalidArgIndex);
872 
873   if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
874     return false;
875 
876   return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
877                                  C);
878 }
879 
880 // TODO: Should this check be a part of the CString checker?
881 // If yes, should taint be a global setting?
882 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
883                                                  CheckerContext &C) const {
884   const auto *FDecl = Call.getDecl()->getAsFunction();
885   // If the function has a buffer size argument, set ArgNum.
886   unsigned ArgNum = InvalidArgIndex;
887   unsigned BId = 0;
888   if ((BId = FDecl->getMemoryFunctionKind())) {
889     switch (BId) {
890     case Builtin::BImemcpy:
891     case Builtin::BImemmove:
892     case Builtin::BIstrncpy:
893       ArgNum = 2;
894       break;
895     case Builtin::BIstrndup:
896       ArgNum = 1;
897       break;
898     default:
899       break;
900     }
901   }
902 
903   if (ArgNum == InvalidArgIndex) {
904     using CCtx = CheckerContext;
905     if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
906         CCtx::isCLibraryFunction(FDecl, "calloc") ||
907         CCtx::isCLibraryFunction(FDecl, "alloca"))
908       ArgNum = 0;
909     else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
910       ArgNum = 3;
911     else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
912       ArgNum = 1;
913     else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
914       ArgNum = 2;
915   }
916 
917   return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
918          generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
919                                  C);
920 }
921 
922 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
923                                            const FunctionData &FData,
924                                            CheckerContext &C) const {
925   auto It = findFunctionInConfig(CustomSinks, FData);
926   if (It == CustomSinks.end())
927     return false;
928 
929   const auto &Value = It->second;
930   const GenericTaintChecker::ArgVector &Args = Value.second;
931   for (unsigned ArgNum : Args) {
932     if (ArgNum >= Call.getNumArgs())
933       continue;
934 
935     if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
936       return true;
937   }
938 
939   return false;
940 }
941 
942 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
943   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
944   std::string Option{"Config"};
945   StringRef ConfigFile =
946       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
947   llvm::Optional<TaintConfig> Config =
948       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
949   if (Config)
950     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
951 }
952 
953 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
954   return true;
955 }
956