xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "llvm/Support/YAMLTraits.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35 
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39 
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
43   static void *getTag() {
44     static int Tag;
45     return &Tag;
46   }
47 
48   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50 
51   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52                   const char *Sep) const override;
53 
54   using ArgVector = SmallVector<unsigned, 2>;
55   using SignedArgVector = SmallVector<int, 2>;
56 
57   enum class VariadicType { None, Src, Dst };
58 
59   /// Used to parse the configuration file.
60   struct TaintConfiguration {
61     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62 
63     struct Propagation {
64       std::string Name;
65       std::string Scope;
66       ArgVector SrcArgs;
67       SignedArgVector DstArgs;
68       VariadicType VarType;
69       unsigned VarIndex;
70     };
71 
72     std::vector<Propagation> Propagations;
73     std::vector<NameScopeArgs> Filters;
74     std::vector<NameScopeArgs> Sinks;
75 
76     TaintConfiguration() = default;
77     TaintConfiguration(const TaintConfiguration &) = default;
78     TaintConfiguration(TaintConfiguration &&) = default;
79     TaintConfiguration &operator=(const TaintConfiguration &) = default;
80     TaintConfiguration &operator=(TaintConfiguration &&) = default;
81   };
82 
83   /// Convert SignedArgVector to ArgVector.
84   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85                                const SignedArgVector &Args);
86 
87   /// Parse the config.
88   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89                           TaintConfiguration &&Config);
90 
91   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92   /// Denotes the return vale.
93   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94                                          1};
95 
96 private:
97   mutable std::unique_ptr<BugType> BT;
98   void initBugType() const {
99     if (!BT)
100       BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101                                      "Untrusted Data");
102   }
103 
104   struct FunctionData {
105     FunctionData() = delete;
106     FunctionData(const FunctionDecl *FDecl, StringRef Name,
107                  std::string FullName)
108         : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {}
109     FunctionData(const FunctionData &) = default;
110     FunctionData(FunctionData &&) = default;
111     FunctionData &operator=(const FunctionData &) = delete;
112     FunctionData &operator=(FunctionData &&) = delete;
113 
114     static Optional<FunctionData> create(const CallEvent &Call,
115                                          const CheckerContext &C) {
116       if (!Call.getDecl())
117         return None;
118 
119       const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
120       if (!FDecl || (FDecl->getKind() != Decl::Function &&
121                      FDecl->getKind() != Decl::CXXMethod))
122         return None;
123 
124       StringRef Name = C.getCalleeName(FDecl);
125       std::string FullName = FDecl->getQualifiedNameAsString();
126       if (Name.empty() || FullName.empty())
127         return None;
128 
129       return FunctionData{FDecl, Name, std::move(FullName)};
130     }
131 
132     bool isInScope(StringRef Scope) const {
133       return StringRef(FullName).startswith(Scope);
134     }
135 
136     const FunctionDecl *const FDecl;
137     const StringRef Name;
138     const std::string FullName;
139   };
140 
141   /// Catch taint related bugs. Check if tainted data is passed to a
142   /// system call etc. Returns true on matching.
143   bool checkPre(const CallEvent &Call, const FunctionData &FData,
144                 CheckerContext &C) const;
145 
146   /// Add taint sources on a pre-visit. Returns true on matching.
147   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
148                      CheckerContext &C) const;
149 
150   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
151   /// matching.
152   bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
153                      CheckerContext &C) const;
154 
155   /// Propagate taint generated at pre-visit. Returns true on matching.
156   static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
157 
158   /// Check if the region the expression evaluates to is the standard input,
159   /// and thus, is tainted.
160   static bool isStdin(const Expr *E, CheckerContext &C);
161 
162   /// Given a pointer argument, return the value it points to.
163   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
164 
165   /// Check for CWE-134: Uncontrolled Format String.
166   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
167       "Untrusted data is used as a format string "
168       "(CWE-134: Uncontrolled Format String)";
169   bool checkUncontrolledFormatString(const CallEvent &Call,
170                                      CheckerContext &C) const;
171 
172   /// Check for:
173   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
174   /// CWE-78, "Failure to Sanitize Data into an OS Command"
175   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
176       "Untrusted data is passed to a system call "
177       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
178   bool checkSystemCall(const CallEvent &Call, StringRef Name,
179                        CheckerContext &C) const;
180 
181   /// Check if tainted data is used as a buffer size ins strn.. functions,
182   /// and allocators.
183   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
184       "Untrusted data is used to specify the buffer size "
185       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
186       "for character data and the null terminator)";
187   bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
188 
189   /// Check if tainted data is used as a custom sink's parameter.
190   static constexpr llvm::StringLiteral MsgCustomSink =
191       "Untrusted data is passed to a user-defined sink";
192   bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
193                         CheckerContext &C) const;
194 
195   /// Generate a report if the expression is tainted or points to tainted data.
196   bool generateReportIfTainted(const Expr *E, StringRef Msg,
197                                CheckerContext &C) const;
198 
199   struct TaintPropagationRule;
200   template <typename T>
201   using ConfigDataMap =
202       std::unordered_multimap<std::string, std::pair<std::string, T>>;
203   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
204   using NameArgMap = ConfigDataMap<ArgVector>;
205 
206   /// Find a function with the given name and scope. Returns the first match
207   /// or the end of the map.
208   template <typename T>
209   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
210                                    const FunctionData &FData);
211 
212   /// A struct used to specify taint propagation rules for a function.
213   ///
214   /// If any of the possible taint source arguments is tainted, all of the
215   /// destination arguments should also be tainted. Use InvalidArgIndex in the
216   /// src list to specify that all of the arguments can introduce taint. Use
217   /// InvalidArgIndex in the dst arguments to signify that all the non-const
218   /// pointer and reference arguments might be tainted on return. If
219   /// ReturnValueIndex is added to the dst list, the return value will be
220   /// tainted.
221   struct TaintPropagationRule {
222     using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
223                                          CheckerContext &C);
224 
225     /// List of arguments which can be taint sources and should be checked.
226     ArgVector SrcArgs;
227     /// List of arguments which should be tainted on function return.
228     ArgVector DstArgs;
229     /// Index for the first variadic parameter if exist.
230     unsigned VariadicIndex;
231     /// Show when a function has variadic parameters. If it has, it marks all
232     /// of them as source or destination.
233     VariadicType VarType;
234     /// Special function for tainted source determination. If defined, it can
235     /// override the default behavior.
236     PropagationFuncType PropagationFunc;
237 
238     TaintPropagationRule()
239         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
240           PropagationFunc(nullptr) {}
241 
242     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
243                          VariadicType Var = VariadicType::None,
244                          unsigned VarIndex = InvalidArgIndex,
245                          PropagationFuncType Func = nullptr)
246         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
247           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
248 
249     /// Get the propagation rule for a given function.
250     static TaintPropagationRule
251     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
252                             const FunctionData &FData, CheckerContext &C);
253 
254     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
255     void addDstArg(unsigned A) { DstArgs.push_back(A); }
256 
257     bool isNull() const {
258       return SrcArgs.empty() && DstArgs.empty() &&
259              VariadicType::None == VarType;
260     }
261 
262     bool isDestinationArgument(unsigned ArgNum) const {
263       return llvm::is_contained(DstArgs, ArgNum);
264     }
265 
266     static bool isTaintedOrPointsToTainted(const Expr *E,
267                                            const ProgramStateRef &State,
268                                            CheckerContext &C) {
269       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
270         return true;
271 
272       if (!E->getType().getTypePtr()->isPointerType())
273         return false;
274 
275       Optional<SVal> V = getPointeeOf(C, E);
276       return (V && isTainted(State, *V));
277     }
278 
279     /// Pre-process a function which propagates taint according to the
280     /// taint rule.
281     ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
282 
283     // Functions for custom taintedness propagation.
284     static bool postSocket(bool IsTainted, const CallEvent &Call,
285                            CheckerContext &C);
286   };
287 
288   /// Defines a map between the propagation function's name, scope
289   /// and TaintPropagationRule.
290   NameRuleMap CustomPropagations;
291 
292   /// Defines a map between the filter function's name, scope and filtering
293   /// args.
294   NameArgMap CustomFilters;
295 
296   /// Defines a map between the sink function's name, scope and sinking args.
297   NameArgMap CustomSinks;
298 };
299 
300 const unsigned GenericTaintChecker::ReturnValueIndex;
301 const unsigned GenericTaintChecker::InvalidArgIndex;
302 
303 // FIXME: these lines can be removed in C++17
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
305 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
306 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
307 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
308 } // end of anonymous namespace
309 
310 using TaintConfig = GenericTaintChecker::TaintConfiguration;
311 
312 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
313 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
314 
315 namespace llvm {
316 namespace yaml {
317 template <> struct MappingTraits<TaintConfig> {
318   static void mapping(IO &IO, TaintConfig &Config) {
319     IO.mapOptional("Propagations", Config.Propagations);
320     IO.mapOptional("Filters", Config.Filters);
321     IO.mapOptional("Sinks", Config.Sinks);
322   }
323 };
324 
325 template <> struct MappingTraits<TaintConfig::Propagation> {
326   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
327     IO.mapRequired("Name", Propagation.Name);
328     IO.mapOptional("Scope", Propagation.Scope);
329     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
330     IO.mapOptional("DstArgs", Propagation.DstArgs);
331     IO.mapOptional("VariadicType", Propagation.VarType,
332                    GenericTaintChecker::VariadicType::None);
333     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
334                    GenericTaintChecker::InvalidArgIndex);
335   }
336 };
337 
338 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
339   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
340     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
341     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
342     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
343   }
344 };
345 
346 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
347   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
348     IO.mapRequired("Name", std::get<0>(NSA));
349     IO.mapOptional("Scope", std::get<1>(NSA));
350     IO.mapRequired("Args", std::get<2>(NSA));
351   }
352 };
353 } // namespace yaml
354 } // namespace llvm
355 
356 /// A set which is used to pass information from call pre-visit instruction
357 /// to the call post-visit. The values are unsigned integers, which are either
358 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
359 /// points to data, which should be tainted on return.
360 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
361 
362 GenericTaintChecker::ArgVector
363 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
364                                         const std::string &Option,
365                                         const SignedArgVector &Args) {
366   ArgVector Result;
367   for (int Arg : Args) {
368     if (Arg == -1)
369       Result.push_back(ReturnValueIndex);
370     else if (Arg < -1) {
371       Result.push_back(InvalidArgIndex);
372       Mgr.reportInvalidCheckerOptionValue(
373           this, Option,
374           "an argument number for propagation rules greater or equal to -1");
375     } else
376       Result.push_back(static_cast<unsigned>(Arg));
377   }
378   return Result;
379 }
380 
381 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
382                                              const std::string &Option,
383                                              TaintConfiguration &&Config) {
384   for (auto &P : Config.Propagations) {
385     GenericTaintChecker::CustomPropagations.emplace(
386         P.Name,
387         std::make_pair(P.Scope, TaintPropagationRule{
388                                     std::move(P.SrcArgs),
389                                     convertToArgVector(Mgr, Option, P.DstArgs),
390                                     P.VarType, P.VarIndex}));
391   }
392 
393   for (auto &F : Config.Filters) {
394     GenericTaintChecker::CustomFilters.emplace(
395         std::get<0>(F),
396         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
397   }
398 
399   for (auto &S : Config.Sinks) {
400     GenericTaintChecker::CustomSinks.emplace(
401         std::get<0>(S),
402         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
403   }
404 }
405 
406 template <typename T>
407 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
408                                                const FunctionData &FData) {
409   auto Range = Map.equal_range(std::string(FData.Name));
410   auto It =
411       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
412         const auto &Value = Entry.second;
413         StringRef Scope = Value.first;
414         return Scope.empty() || FData.isInScope(Scope);
415       });
416   return It != Range.second ? It : Map.end();
417 }
418 
419 GenericTaintChecker::TaintPropagationRule
420 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
421     const NameRuleMap &CustomPropagations, const FunctionData &FData,
422     CheckerContext &C) {
423   // TODO: Currently, we might lose precision here: we always mark a return
424   // value as tainted even if it's just a pointer, pointing to tainted data.
425 
426   // Check for exact name match for functions without builtin substitutes.
427   // Use qualified name, because these are C functions without namespace.
428   TaintPropagationRule Rule =
429       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
430           // Source functions
431           // TODO: Add support for vfscanf & family.
432           .Case("fdopen", {{}, {ReturnValueIndex}})
433           .Case("fopen", {{}, {ReturnValueIndex}})
434           .Case("freopen", {{}, {ReturnValueIndex}})
435           .Case("getch", {{}, {ReturnValueIndex}})
436           .Case("getchar", {{}, {ReturnValueIndex}})
437           .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
438           .Case("gets", {{}, {0, ReturnValueIndex}})
439           .Case("scanf", {{}, {}, VariadicType::Dst, 1})
440           .Case("socket", {{},
441                            {ReturnValueIndex},
442                            VariadicType::None,
443                            InvalidArgIndex,
444                            &TaintPropagationRule::postSocket})
445           .Case("wgetch", {{}, {ReturnValueIndex}})
446           // Propagating functions
447           .Case("atoi", {{0}, {ReturnValueIndex}})
448           .Case("atol", {{0}, {ReturnValueIndex}})
449           .Case("atoll", {{0}, {ReturnValueIndex}})
450           .Case("fgetc", {{0}, {ReturnValueIndex}})
451           .Case("fgetln", {{0}, {ReturnValueIndex}})
452           .Case("fgets", {{2}, {0, ReturnValueIndex}})
453           .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
454           .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
455           .Case("getc", {{0}, {ReturnValueIndex}})
456           .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
457           .Case("getdelim", {{3}, {0}})
458           .Case("getline", {{2}, {0}})
459           .Case("getw", {{0}, {ReturnValueIndex}})
460           .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
461           .Case("read", {{0, 2}, {1, ReturnValueIndex}})
462           .Case("strchr", {{0}, {ReturnValueIndex}})
463           .Case("strrchr", {{0}, {ReturnValueIndex}})
464           .Case("tolower", {{0}, {ReturnValueIndex}})
465           .Case("toupper", {{0}, {ReturnValueIndex}})
466           .Default({});
467 
468   if (!Rule.isNull())
469     return Rule;
470 
471   // `getenv` returns taint only in untrusted environments.
472   if (FData.FullName == "getenv") {
473     if (C.getAnalysisManager()
474             .getAnalyzerOptions()
475             .ShouldAssumeControlledEnvironment)
476       return {};
477     return {{}, {ReturnValueIndex}};
478   }
479 
480   assert(FData.FDecl);
481 
482   // Check if it's one of the memory setting/copying functions.
483   // This check is specialized but faster then calling isCLibraryFunction.
484   const FunctionDecl *FDecl = FData.FDecl;
485   unsigned BId = 0;
486   if ((BId = FDecl->getMemoryFunctionKind())) {
487     switch (BId) {
488     case Builtin::BImemcpy:
489     case Builtin::BImemmove:
490     case Builtin::BIstrncpy:
491     case Builtin::BIstrncat:
492       return {{1, 2}, {0, ReturnValueIndex}};
493     case Builtin::BIstrlcpy:
494     case Builtin::BIstrlcat:
495       return {{1, 2}, {0}};
496     case Builtin::BIstrndup:
497       return {{0, 1}, {ReturnValueIndex}};
498 
499     default:
500       break;
501     }
502   }
503 
504   // Process all other functions which could be defined as builtins.
505   if (Rule.isNull()) {
506     const auto OneOf = [FDecl](const auto &... Name) {
507       // FIXME: use fold expression in C++17
508       using unused = int[];
509       bool ret = false;
510       static_cast<void>(unused{
511           0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
512       return ret;
513     };
514     if (OneOf("snprintf"))
515       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
516     if (OneOf("sprintf"))
517       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 2};
518     if (OneOf("strcpy", "stpcpy", "strcat"))
519       return {{1}, {0, ReturnValueIndex}};
520     if (OneOf("bcopy"))
521       return {{0, 2}, {1}};
522     if (OneOf("strdup", "strdupa", "wcsdup"))
523       return {{0}, {ReturnValueIndex}};
524   }
525 
526   // Skipping the following functions, since they might be used for cleansing or
527   // smart memory copy:
528   // - memccpy - copying until hitting a special character.
529 
530   auto It = findFunctionInConfig(CustomPropagations, FData);
531   if (It != CustomPropagations.end())
532     return It->second.second;
533   return {};
534 }
535 
536 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
537                                        CheckerContext &C) const {
538   Optional<FunctionData> FData = FunctionData::create(Call, C);
539   if (!FData)
540     return;
541 
542   // Check for taintedness related errors first: system call, uncontrolled
543   // format string, tainted buffer size.
544   if (checkPre(Call, *FData, C))
545     return;
546 
547   // Marks the function's arguments and/or return value tainted if it present in
548   // the list.
549   if (addSourcesPre(Call, *FData, C))
550     return;
551 
552   addFiltersPre(Call, *FData, C);
553 }
554 
555 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
556                                         CheckerContext &C) const {
557   // Set the marked values as tainted. The return value only accessible from
558   // checkPostStmt.
559   propagateFromPre(Call, C);
560 }
561 
562 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
563                                      const char *NL, const char *Sep) const {
564   printTaint(State, Out, NL, Sep);
565 }
566 
567 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
568                                         const FunctionData &FData,
569                                         CheckerContext &C) const {
570   // First, try generating a propagation rule for this function.
571   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
572       this->CustomPropagations, FData, C);
573   if (!Rule.isNull()) {
574     ProgramStateRef State = Rule.process(Call, C);
575     if (State) {
576       C.addTransition(State);
577       return true;
578     }
579   }
580   return false;
581 }
582 
583 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
584                                         const FunctionData &FData,
585                                         CheckerContext &C) const {
586   auto It = findFunctionInConfig(CustomFilters, FData);
587   if (It == CustomFilters.end())
588     return false;
589 
590   ProgramStateRef State = C.getState();
591   const auto &Value = It->second;
592   const ArgVector &Args = Value.second;
593   for (unsigned ArgNum : Args) {
594     if (ArgNum >= Call.getNumArgs())
595       continue;
596 
597     const Expr *Arg = Call.getArgExpr(ArgNum);
598     Optional<SVal> V = getPointeeOf(C, Arg);
599     if (V)
600       State = removeTaint(State, *V);
601   }
602 
603   if (State != C.getState()) {
604     C.addTransition(State);
605     return true;
606   }
607   return false;
608 }
609 
610 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
611                                            CheckerContext &C) {
612   ProgramStateRef State = C.getState();
613 
614   // Depending on what was tainted at pre-visit, we determined a set of
615   // arguments which should be tainted after the function returns. These are
616   // stored in the state as TaintArgsOnPostVisit set.
617   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
618   if (TaintArgs.isEmpty())
619     return false;
620 
621   for (unsigned ArgNum : TaintArgs) {
622     // Special handling for the tainted return value.
623     if (ArgNum == ReturnValueIndex) {
624       State = addTaint(State, Call.getReturnValue());
625       continue;
626     }
627 
628     // The arguments are pointer arguments. The data they are pointing at is
629     // tainted after the call.
630     if (Call.getNumArgs() < (ArgNum + 1))
631       return false;
632     const Expr *Arg = Call.getArgExpr(ArgNum);
633     Optional<SVal> V = getPointeeOf(C, Arg);
634     if (V)
635       State = addTaint(State, *V);
636   }
637 
638   // Clear up the taint info from the state.
639   State = State->remove<TaintArgsOnPostVisit>();
640 
641   if (State != C.getState()) {
642     C.addTransition(State);
643     return true;
644   }
645   return false;
646 }
647 
648 bool GenericTaintChecker::checkPre(const CallEvent &Call,
649                                    const FunctionData &FData,
650                                    CheckerContext &C) const {
651   if (checkUncontrolledFormatString(Call, C))
652     return true;
653 
654   if (checkSystemCall(Call, FData.Name, C))
655     return true;
656 
657   if (checkTaintedBufferSize(Call, C))
658     return true;
659 
660   return checkCustomSinks(Call, FData, C);
661 }
662 
663 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
664                                                  const Expr *Arg) {
665   ProgramStateRef State = C.getState();
666   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
667   if (AddrVal.isUnknownOrUndef())
668     return None;
669 
670   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
671   if (!AddrLoc)
672     return None;
673 
674   QualType ArgTy = Arg->getType().getCanonicalType();
675   if (!ArgTy->isPointerType())
676     return State->getSVal(*AddrLoc);
677 
678   QualType ValTy = ArgTy->getPointeeType();
679 
680   // Do not dereference void pointers. Treat them as byte pointers instead.
681   // FIXME: we might want to consider more than just the first byte.
682   if (ValTy->isVoidType())
683     ValTy = C.getASTContext().CharTy;
684 
685   return State->getSVal(*AddrLoc, ValTy);
686 }
687 
688 ProgramStateRef
689 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
690                                                    CheckerContext &C) const {
691   ProgramStateRef State = C.getState();
692 
693   // Check for taint in arguments.
694   bool IsTainted = true;
695   for (unsigned ArgNum : SrcArgs) {
696     if (ArgNum >= Call.getNumArgs())
697       continue;
698 
699     if ((IsTainted =
700              isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
701       break;
702   }
703 
704   // Check for taint in variadic arguments.
705   if (!IsTainted && VariadicType::Src == VarType) {
706     // Check if any of the arguments is tainted
707     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
708       if ((IsTainted =
709                isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
710         break;
711     }
712   }
713 
714   if (PropagationFunc)
715     IsTainted = PropagationFunc(IsTainted, Call, C);
716 
717   if (!IsTainted)
718     return State;
719 
720   // Mark the arguments which should be tainted after the function returns.
721   for (unsigned ArgNum : DstArgs) {
722     // Should mark the return value?
723     if (ArgNum == ReturnValueIndex) {
724       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
725       continue;
726     }
727 
728     if (ArgNum >= Call.getNumArgs())
729       continue;
730 
731     // Mark the given argument.
732     State = State->add<TaintArgsOnPostVisit>(ArgNum);
733   }
734 
735   // Mark all variadic arguments tainted if present.
736   if (VariadicType::Dst == VarType) {
737     // For all pointer and references that were passed in:
738     //   If they are not pointing to const data, mark data as tainted.
739     //   TODO: So far we are just going one level down; ideally we'd need to
740     //         recurse here.
741     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
742       const Expr *Arg = Call.getArgExpr(i);
743       // Process pointer argument.
744       const Type *ArgTy = Arg->getType().getTypePtr();
745       QualType PType = ArgTy->getPointeeType();
746       if ((!PType.isNull() && !PType.isConstQualified()) ||
747           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
748         State = State->add<TaintArgsOnPostVisit>(i);
749       }
750     }
751   }
752 
753   return State;
754 }
755 
756 // If argument 0(protocol domain) is network, the return value should get taint.
757 bool GenericTaintChecker::TaintPropagationRule::postSocket(
758     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
759   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
760   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
761   // White list the internal communication protocols.
762   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
763       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
764     return false;
765   return true;
766 }
767 
768 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
769   ProgramStateRef State = C.getState();
770   SVal Val = C.getSVal(E);
771 
772   // stdin is a pointer, so it would be a region.
773   const MemRegion *MemReg = Val.getAsRegion();
774 
775   // The region should be symbolic, we do not know it's value.
776   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
777   if (!SymReg)
778     return false;
779 
780   // Get it's symbol and find the declaration region it's pointing to.
781   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
782   if (!Sm)
783     return false;
784   const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
785   if (!DeclReg)
786     return false;
787 
788   // This region corresponds to a declaration, find out if it's a global/extern
789   // variable named stdin with the proper type.
790   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
791     D = D->getCanonicalDecl();
792     if (D->getName().contains("stdin") && D->isExternC()) {
793       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
794       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
795                        C.getASTContext().getFILEType().getCanonicalType())
796         return true;
797     }
798   }
799   return false;
800 }
801 
802 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
803                                        const CheckerContext &C,
804                                        unsigned &ArgNum) {
805   // Find if the function contains a format string argument.
806   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
807   // vsnprintf, syslog, custom annotated functions.
808   const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
809   if (!FDecl)
810     return false;
811   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
812     ArgNum = Format->getFormatIdx() - 1;
813     if ((Format->getType()->getName() == "printf") &&
814         Call.getNumArgs() > ArgNum)
815       return true;
816   }
817 
818   // Or if a function is named setproctitle (this is a heuristic).
819   if (C.getCalleeName(FDecl).contains("setproctitle")) {
820     ArgNum = 0;
821     return true;
822   }
823 
824   return false;
825 }
826 
827 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
828                                                   CheckerContext &C) const {
829   assert(E);
830 
831   // Check for taint.
832   ProgramStateRef State = C.getState();
833   Optional<SVal> PointedToSVal = getPointeeOf(C, E);
834   SVal TaintedSVal;
835   if (PointedToSVal && isTainted(State, *PointedToSVal))
836     TaintedSVal = *PointedToSVal;
837   else if (isTainted(State, E, C.getLocationContext()))
838     TaintedSVal = C.getSVal(E);
839   else
840     return false;
841 
842   // Generate diagnostic.
843   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
844     initBugType();
845     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
846     report->addRange(E->getSourceRange());
847     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
848     C.emitReport(std::move(report));
849     return true;
850   }
851   return false;
852 }
853 
854 bool GenericTaintChecker::checkUncontrolledFormatString(
855     const CallEvent &Call, CheckerContext &C) const {
856   // Check if the function contains a format string argument.
857   unsigned ArgNum = 0;
858   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
859     return false;
860 
861   // If either the format string content or the pointer itself are tainted,
862   // warn.
863   return generateReportIfTainted(Call.getArgExpr(ArgNum),
864                                  MsgUncontrolledFormatString, C);
865 }
866 
867 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
868                                           CheckerContext &C) const {
869   // TODO: It might make sense to run this check on demand. In some cases,
870   // we should check if the environment has been cleansed here. We also might
871   // need to know if the user was reset before these calls(seteuid).
872   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
873                         .Case("system", 0)
874                         .Case("popen", 0)
875                         .Case("execl", 0)
876                         .Case("execle", 0)
877                         .Case("execlp", 0)
878                         .Case("execv", 0)
879                         .Case("execvp", 0)
880                         .Case("execvP", 0)
881                         .Case("execve", 0)
882                         .Case("dlopen", 0)
883                         .Default(InvalidArgIndex);
884 
885   if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
886     return false;
887 
888   return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
889                                  C);
890 }
891 
892 // TODO: Should this check be a part of the CString checker?
893 // If yes, should taint be a global setting?
894 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
895                                                  CheckerContext &C) const {
896   const auto *FDecl = Call.getDecl()->getAsFunction();
897   // If the function has a buffer size argument, set ArgNum.
898   unsigned ArgNum = InvalidArgIndex;
899   unsigned BId = 0;
900   if ((BId = FDecl->getMemoryFunctionKind())) {
901     switch (BId) {
902     case Builtin::BImemcpy:
903     case Builtin::BImemmove:
904     case Builtin::BIstrncpy:
905       ArgNum = 2;
906       break;
907     case Builtin::BIstrndup:
908       ArgNum = 1;
909       break;
910     default:
911       break;
912     }
913   }
914 
915   if (ArgNum == InvalidArgIndex) {
916     using CCtx = CheckerContext;
917     if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
918         CCtx::isCLibraryFunction(FDecl, "calloc") ||
919         CCtx::isCLibraryFunction(FDecl, "alloca"))
920       ArgNum = 0;
921     else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
922       ArgNum = 3;
923     else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
924       ArgNum = 1;
925     else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
926       ArgNum = 2;
927   }
928 
929   return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
930          generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
931                                  C);
932 }
933 
934 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
935                                            const FunctionData &FData,
936                                            CheckerContext &C) const {
937   auto It = findFunctionInConfig(CustomSinks, FData);
938   if (It == CustomSinks.end())
939     return false;
940 
941   const auto &Value = It->second;
942   const GenericTaintChecker::ArgVector &Args = Value.second;
943   for (unsigned ArgNum : Args) {
944     if (ArgNum >= Call.getNumArgs())
945       continue;
946 
947     if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
948       return true;
949   }
950 
951   return false;
952 }
953 
954 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
955   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
956   std::string Option{"Config"};
957   StringRef ConfigFile =
958       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
959   llvm::Optional<TaintConfig> Config =
960       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
961   if (Config)
962     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
963 }
964 
965 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
966   return true;
967 }
968