1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 27 #include "llvm/Support/YAMLTraits.h" 28 #include <algorithm> 29 #include <limits> 30 #include <unordered_map> 31 #include <utility> 32 33 using namespace clang; 34 using namespace ento; 35 using namespace taint; 36 37 namespace { 38 class GenericTaintChecker 39 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { 40 public: 41 static void *getTag() { 42 static int Tag; 43 return &Tag; 44 } 45 46 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 47 48 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 49 50 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 51 const char *Sep) const override; 52 53 using ArgVector = SmallVector<unsigned, 2>; 54 using SignedArgVector = SmallVector<int, 2>; 55 56 enum class VariadicType { None, Src, Dst }; 57 58 /// Used to parse the configuration file. 59 struct TaintConfiguration { 60 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 61 62 struct Propagation { 63 std::string Name; 64 std::string Scope; 65 ArgVector SrcArgs; 66 SignedArgVector DstArgs; 67 VariadicType VarType; 68 unsigned VarIndex; 69 }; 70 71 std::vector<Propagation> Propagations; 72 std::vector<NameScopeArgs> Filters; 73 std::vector<NameScopeArgs> Sinks; 74 75 TaintConfiguration() = default; 76 TaintConfiguration(const TaintConfiguration &) = default; 77 TaintConfiguration(TaintConfiguration &&) = default; 78 TaintConfiguration &operator=(const TaintConfiguration &) = default; 79 TaintConfiguration &operator=(TaintConfiguration &&) = default; 80 }; 81 82 /// Convert SignedArgVector to ArgVector. 83 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 84 SignedArgVector Args); 85 86 /// Parse the config. 87 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 88 TaintConfiguration &&Config); 89 90 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 91 /// Denotes the return vale. 92 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 93 1}; 94 95 private: 96 mutable std::unique_ptr<BugType> BT; 97 void initBugType() const { 98 if (!BT) 99 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 100 } 101 102 struct FunctionData { 103 FunctionData() = delete; 104 FunctionData(const FunctionData &) = default; 105 FunctionData(FunctionData &&) = default; 106 FunctionData &operator=(const FunctionData &) = delete; 107 FunctionData &operator=(FunctionData &&) = delete; 108 109 static Optional<FunctionData> create(const CallExpr *CE, 110 const CheckerContext &C) { 111 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 112 if (!FDecl || (FDecl->getKind() != Decl::Function && 113 FDecl->getKind() != Decl::CXXMethod)) 114 return None; 115 116 StringRef Name = C.getCalleeName(FDecl); 117 std::string FullName = FDecl->getQualifiedNameAsString(); 118 if (Name.empty() || FullName.empty()) 119 return None; 120 121 return FunctionData{FDecl, Name, FullName}; 122 } 123 124 bool isInScope(StringRef Scope) const { 125 return StringRef(FullName).startswith(Scope); 126 } 127 128 const FunctionDecl *const FDecl; 129 const StringRef Name; 130 const std::string FullName; 131 }; 132 133 /// Catch taint related bugs. Check if tainted data is passed to a 134 /// system call etc. Returns true on matching. 135 bool checkPre(const CallExpr *CE, const FunctionData &FData, 136 CheckerContext &C) const; 137 138 /// Add taint sources on a pre-visit. Returns true on matching. 139 bool addSourcesPre(const CallExpr *CE, const FunctionData &FData, 140 CheckerContext &C) const; 141 142 /// Mark filter's arguments not tainted on a pre-visit. Returns true on 143 /// matching. 144 bool addFiltersPre(const CallExpr *CE, const FunctionData &FData, 145 CheckerContext &C) const; 146 147 /// Propagate taint generated at pre-visit. Returns true on matching. 148 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 149 150 /// Check if the region the expression evaluates to is the standard input, 151 /// and thus, is tainted. 152 static bool isStdin(const Expr *E, CheckerContext &C); 153 154 /// Given a pointer argument, return the value it points to. 155 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 156 157 /// Check for CWE-134: Uncontrolled Format String. 158 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 159 "Untrusted data is used as a format string " 160 "(CWE-134: Uncontrolled Format String)"; 161 bool checkUncontrolledFormatString(const CallExpr *CE, 162 CheckerContext &C) const; 163 164 /// Check for: 165 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 166 /// CWE-78, "Failure to Sanitize Data into an OS Command" 167 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 168 "Untrusted data is passed to a system call " 169 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 170 bool checkSystemCall(const CallExpr *CE, StringRef Name, 171 CheckerContext &C) const; 172 173 /// Check if tainted data is used as a buffer size ins strn.. functions, 174 /// and allocators. 175 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 176 "Untrusted data is used to specify the buffer size " 177 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 178 "for character data and the null terminator)"; 179 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 180 CheckerContext &C) const; 181 182 /// Check if tainted data is used as a custom sink's parameter. 183 static constexpr llvm::StringLiteral MsgCustomSink = 184 "Untrusted data is passed to a user-defined sink"; 185 bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData, 186 CheckerContext &C) const; 187 188 /// Generate a report if the expression is tainted or points to tainted data. 189 bool generateReportIfTainted(const Expr *E, StringRef Msg, 190 CheckerContext &C) const; 191 192 struct TaintPropagationRule; 193 template <typename T> 194 using ConfigDataMap = 195 std::unordered_multimap<std::string, std::pair<std::string, T>>; 196 using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 197 using NameArgMap = ConfigDataMap<ArgVector>; 198 199 /// Find a function with the given name and scope. Returns the first match 200 /// or the end of the map. 201 template <typename T> 202 static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 203 const FunctionData &FData); 204 205 /// A struct used to specify taint propagation rules for a function. 206 /// 207 /// If any of the possible taint source arguments is tainted, all of the 208 /// destination arguments should also be tainted. Use InvalidArgIndex in the 209 /// src list to specify that all of the arguments can introduce taint. Use 210 /// InvalidArgIndex in the dst arguments to signify that all the non-const 211 /// pointer and reference arguments might be tainted on return. If 212 /// ReturnValueIndex is added to the dst list, the return value will be 213 /// tainted. 214 struct TaintPropagationRule { 215 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, 216 CheckerContext &C); 217 218 /// List of arguments which can be taint sources and should be checked. 219 ArgVector SrcArgs; 220 /// List of arguments which should be tainted on function return. 221 ArgVector DstArgs; 222 /// Index for the first variadic parameter if exist. 223 unsigned VariadicIndex; 224 /// Show when a function has variadic parameters. If it has, it marks all 225 /// of them as source or destination. 226 VariadicType VarType; 227 /// Special function for tainted source determination. If defined, it can 228 /// override the default behavior. 229 PropagationFuncType PropagationFunc; 230 231 TaintPropagationRule() 232 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 233 PropagationFunc(nullptr) {} 234 235 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 236 VariadicType Var = VariadicType::None, 237 unsigned VarIndex = InvalidArgIndex, 238 PropagationFuncType Func = nullptr) 239 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 240 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 241 242 /// Get the propagation rule for a given function. 243 static TaintPropagationRule 244 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 245 const FunctionData &FData, CheckerContext &C); 246 247 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 248 void addDstArg(unsigned A) { DstArgs.push_back(A); } 249 250 bool isNull() const { 251 return SrcArgs.empty() && DstArgs.empty() && 252 VariadicType::None == VarType; 253 } 254 255 bool isDestinationArgument(unsigned ArgNum) const { 256 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 257 } 258 259 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, 260 CheckerContext &C) { 261 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 262 return true; 263 264 if (!E->getType().getTypePtr()->isPointerType()) 265 return false; 266 267 Optional<SVal> V = getPointedToSVal(C, E); 268 return (V && isTainted(State, *V)); 269 } 270 271 /// Pre-process a function which propagates taint according to the 272 /// taint rule. 273 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 274 275 // Functions for custom taintedness propagation. 276 static bool postSocket(bool IsTainted, const CallExpr *CE, 277 CheckerContext &C); 278 }; 279 280 /// Defines a map between the propagation function's name, scope 281 /// and TaintPropagationRule. 282 NameRuleMap CustomPropagations; 283 284 /// Defines a map between the filter function's name, scope and filtering 285 /// args. 286 NameArgMap CustomFilters; 287 288 /// Defines a map between the sink function's name, scope and sinking args. 289 NameArgMap CustomSinks; 290 }; 291 292 const unsigned GenericTaintChecker::ReturnValueIndex; 293 const unsigned GenericTaintChecker::InvalidArgIndex; 294 295 // FIXME: these lines can be removed in C++17 296 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 297 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 298 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 299 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 300 } // end of anonymous namespace 301 302 using TaintConfig = GenericTaintChecker::TaintConfiguration; 303 304 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 305 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 306 307 namespace llvm { 308 namespace yaml { 309 template <> struct MappingTraits<TaintConfig> { 310 static void mapping(IO &IO, TaintConfig &Config) { 311 IO.mapOptional("Propagations", Config.Propagations); 312 IO.mapOptional("Filters", Config.Filters); 313 IO.mapOptional("Sinks", Config.Sinks); 314 } 315 }; 316 317 template <> struct MappingTraits<TaintConfig::Propagation> { 318 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 319 IO.mapRequired("Name", Propagation.Name); 320 IO.mapOptional("Scope", Propagation.Scope); 321 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 322 IO.mapOptional("DstArgs", Propagation.DstArgs); 323 IO.mapOptional("VariadicType", Propagation.VarType, 324 GenericTaintChecker::VariadicType::None); 325 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 326 GenericTaintChecker::InvalidArgIndex); 327 } 328 }; 329 330 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 331 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 332 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 333 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 334 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 335 } 336 }; 337 338 template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 339 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 340 IO.mapRequired("Name", std::get<0>(NSA)); 341 IO.mapOptional("Scope", std::get<1>(NSA)); 342 IO.mapRequired("Args", std::get<2>(NSA)); 343 } 344 }; 345 } // namespace yaml 346 } // namespace llvm 347 348 /// A set which is used to pass information from call pre-visit instruction 349 /// to the call post-visit. The values are unsigned integers, which are either 350 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 351 /// points to data, which should be tainted on return. 352 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 353 354 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( 355 CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { 356 ArgVector Result; 357 for (int Arg : Args) { 358 if (Arg == -1) 359 Result.push_back(ReturnValueIndex); 360 else if (Arg < -1) { 361 Result.push_back(InvalidArgIndex); 362 Mgr.reportInvalidCheckerOptionValue( 363 this, Option, 364 "an argument number for propagation rules greater or equal to -1"); 365 } else 366 Result.push_back(static_cast<unsigned>(Arg)); 367 } 368 return Result; 369 } 370 371 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 372 const std::string &Option, 373 TaintConfiguration &&Config) { 374 for (auto &P : Config.Propagations) { 375 GenericTaintChecker::CustomPropagations.emplace( 376 P.Name, 377 std::make_pair(P.Scope, TaintPropagationRule{ 378 std::move(P.SrcArgs), 379 convertToArgVector(Mgr, Option, P.DstArgs), 380 P.VarType, P.VarIndex})); 381 } 382 383 for (auto &F : Config.Filters) { 384 GenericTaintChecker::CustomFilters.emplace( 385 std::get<0>(F), 386 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 387 } 388 389 for (auto &S : Config.Sinks) { 390 GenericTaintChecker::CustomSinks.emplace( 391 std::get<0>(S), 392 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 393 } 394 } 395 396 template <typename T> 397 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 398 const FunctionData &FData) { 399 auto Range = Map.equal_range(FData.Name); 400 auto It = 401 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 402 const auto &Value = Entry.second; 403 StringRef Scope = Value.first; 404 return Scope.empty() || FData.isInScope(Scope); 405 }); 406 return It != Range.second ? It : Map.end(); 407 } 408 409 GenericTaintChecker::TaintPropagationRule 410 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 411 const NameRuleMap &CustomPropagations, const FunctionData &FData, 412 CheckerContext &C) { 413 // TODO: Currently, we might lose precision here: we always mark a return 414 // value as tainted even if it's just a pointer, pointing to tainted data. 415 416 // Check for exact name match for functions without builtin substitutes. 417 // Use qualified name, because these are C functions without namespace. 418 TaintPropagationRule Rule = 419 llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 420 // Source functions 421 // TODO: Add support for vfscanf & family. 422 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) 423 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) 424 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) 425 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) 426 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) 427 .Case("getchar_unlocked", 428 TaintPropagationRule({}, {ReturnValueIndex})) 429 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) 430 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) 431 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) 432 .Case("socket", 433 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, 434 InvalidArgIndex, 435 &TaintPropagationRule::postSocket)) 436 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) 437 // Propagating functions 438 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) 439 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) 440 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) 441 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) 442 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) 443 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) 444 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) 445 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) 446 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) 447 .Case("getdelim", TaintPropagationRule({3}, {0})) 448 .Case("getline", TaintPropagationRule({2}, {0})) 449 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) 450 .Case("pread", 451 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) 452 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) 453 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) 454 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) 455 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) 456 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) 457 .Default(TaintPropagationRule()); 458 459 if (!Rule.isNull()) 460 return Rule; 461 462 // Check if it's one of the memory setting/copying functions. 463 // This check is specialized but faster then calling isCLibraryFunction. 464 const FunctionDecl *FDecl = FData.FDecl; 465 unsigned BId = 0; 466 if ((BId = FDecl->getMemoryFunctionKind())) 467 switch (BId) { 468 case Builtin::BImemcpy: 469 case Builtin::BImemmove: 470 case Builtin::BIstrncpy: 471 case Builtin::BIstrncat: 472 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); 473 case Builtin::BIstrlcpy: 474 case Builtin::BIstrlcat: 475 return TaintPropagationRule({1, 2}, {0}); 476 case Builtin::BIstrndup: 477 return TaintPropagationRule({0, 1}, {ReturnValueIndex}); 478 479 default: 480 break; 481 }; 482 483 // Process all other functions which could be defined as builtins. 484 if (Rule.isNull()) { 485 if (C.isCLibraryFunction(FDecl, "snprintf")) 486 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, 487 3); 488 else if (C.isCLibraryFunction(FDecl, "sprintf")) 489 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, 490 2); 491 else if (C.isCLibraryFunction(FDecl, "strcpy") || 492 C.isCLibraryFunction(FDecl, "stpcpy") || 493 C.isCLibraryFunction(FDecl, "strcat")) 494 return TaintPropagationRule({1}, {0, ReturnValueIndex}); 495 else if (C.isCLibraryFunction(FDecl, "bcopy")) 496 return TaintPropagationRule({0, 2}, {1}); 497 else if (C.isCLibraryFunction(FDecl, "strdup") || 498 C.isCLibraryFunction(FDecl, "strdupa")) 499 return TaintPropagationRule({0}, {ReturnValueIndex}); 500 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 501 return TaintPropagationRule({0}, {ReturnValueIndex}); 502 } 503 504 // Skipping the following functions, since they might be used for cleansing 505 // or smart memory copy: 506 // - memccpy - copying until hitting a special character. 507 508 auto It = findFunctionInConfig(CustomPropagations, FData); 509 if (It != CustomPropagations.end()) { 510 const auto &Value = It->second; 511 return Value.second; 512 } 513 514 return TaintPropagationRule(); 515 } 516 517 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 518 CheckerContext &C) const { 519 Optional<FunctionData> FData = FunctionData::create(CE, C); 520 if (!FData) 521 return; 522 523 // Check for taintedness related errors first: system call, uncontrolled 524 // format string, tainted buffer size. 525 if (checkPre(CE, *FData, C)) 526 return; 527 528 // Marks the function's arguments and/or return value tainted if it present in 529 // the list. 530 if (addSourcesPre(CE, *FData, C)) 531 return; 532 533 addFiltersPre(CE, *FData, C); 534 } 535 536 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 537 CheckerContext &C) const { 538 // Set the marked values as tainted. The return value only accessible from 539 // checkPostStmt. 540 propagateFromPre(CE, C); 541 } 542 543 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 544 const char *NL, const char *Sep) const { 545 printTaint(State, Out, NL, Sep); 546 } 547 548 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE, 549 const FunctionData &FData, 550 CheckerContext &C) const { 551 // First, try generating a propagation rule for this function. 552 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 553 this->CustomPropagations, FData, C); 554 if (!Rule.isNull()) { 555 ProgramStateRef State = Rule.process(CE, C); 556 if (State) { 557 C.addTransition(State); 558 return true; 559 } 560 } 561 return false; 562 } 563 564 bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, 565 const FunctionData &FData, 566 CheckerContext &C) const { 567 auto It = findFunctionInConfig(CustomFilters, FData); 568 if (It == CustomFilters.end()) 569 return false; 570 571 ProgramStateRef State = C.getState(); 572 const auto &Value = It->second; 573 const ArgVector &Args = Value.second; 574 for (unsigned ArgNum : Args) { 575 if (ArgNum >= CE->getNumArgs()) 576 continue; 577 578 const Expr *Arg = CE->getArg(ArgNum); 579 Optional<SVal> V = getPointedToSVal(C, Arg); 580 if (V) 581 State = removeTaint(State, *V); 582 } 583 584 if (State != C.getState()) { 585 C.addTransition(State); 586 return true; 587 } 588 return false; 589 } 590 591 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 592 CheckerContext &C) const { 593 ProgramStateRef State = C.getState(); 594 595 // Depending on what was tainted at pre-visit, we determined a set of 596 // arguments which should be tainted after the function returns. These are 597 // stored in the state as TaintArgsOnPostVisit set. 598 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 599 if (TaintArgs.isEmpty()) 600 return false; 601 602 for (unsigned ArgNum : TaintArgs) { 603 // Special handling for the tainted return value. 604 if (ArgNum == ReturnValueIndex) { 605 State = addTaint(State, CE, C.getLocationContext()); 606 continue; 607 } 608 609 // The arguments are pointer arguments. The data they are pointing at is 610 // tainted after the call. 611 if (CE->getNumArgs() < (ArgNum + 1)) 612 return false; 613 const Expr *Arg = CE->getArg(ArgNum); 614 Optional<SVal> V = getPointedToSVal(C, Arg); 615 if (V) 616 State = addTaint(State, *V); 617 } 618 619 // Clear up the taint info from the state. 620 State = State->remove<TaintArgsOnPostVisit>(); 621 622 if (State != C.getState()) { 623 C.addTransition(State); 624 return true; 625 } 626 return false; 627 } 628 629 bool GenericTaintChecker::checkPre(const CallExpr *CE, 630 const FunctionData &FData, 631 CheckerContext &C) const { 632 633 if (checkUncontrolledFormatString(CE, C)) 634 return true; 635 636 if (checkSystemCall(CE, FData.Name, C)) 637 return true; 638 639 if (checkTaintedBufferSize(CE, FData.FDecl, C)) 640 return true; 641 642 if (checkCustomSinks(CE, FData, C)) 643 return true; 644 645 return false; 646 } 647 648 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 649 const Expr *Arg) { 650 ProgramStateRef State = C.getState(); 651 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 652 if (AddrVal.isUnknownOrUndef()) 653 return None; 654 655 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 656 if (!AddrLoc) 657 return None; 658 659 QualType ArgTy = Arg->getType().getCanonicalType(); 660 if (!ArgTy->isPointerType()) 661 return State->getSVal(*AddrLoc); 662 663 QualType ValTy = ArgTy->getPointeeType(); 664 665 // Do not dereference void pointers. Treat them as byte pointers instead. 666 // FIXME: we might want to consider more than just the first byte. 667 if (ValTy->isVoidType()) 668 ValTy = C.getASTContext().CharTy; 669 670 return State->getSVal(*AddrLoc, ValTy); 671 } 672 673 ProgramStateRef 674 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 675 CheckerContext &C) const { 676 ProgramStateRef State = C.getState(); 677 678 // Check for taint in arguments. 679 bool IsTainted = true; 680 for (unsigned ArgNum : SrcArgs) { 681 if (ArgNum >= CE->getNumArgs()) 682 continue; 683 684 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 685 break; 686 } 687 688 // Check for taint in variadic arguments. 689 if (!IsTainted && VariadicType::Src == VarType) { 690 // Check if any of the arguments is tainted 691 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 692 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 693 break; 694 } 695 } 696 697 if (PropagationFunc) 698 IsTainted = PropagationFunc(IsTainted, CE, C); 699 700 if (!IsTainted) 701 return State; 702 703 // Mark the arguments which should be tainted after the function returns. 704 for (unsigned ArgNum : DstArgs) { 705 // Should mark the return value? 706 if (ArgNum == ReturnValueIndex) { 707 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 708 continue; 709 } 710 711 if (ArgNum >= CE->getNumArgs()) 712 continue; 713 714 // Mark the given argument. 715 State = State->add<TaintArgsOnPostVisit>(ArgNum); 716 } 717 718 // Mark all variadic arguments tainted if present. 719 if (VariadicType::Dst == VarType) { 720 // For all pointer and references that were passed in: 721 // If they are not pointing to const data, mark data as tainted. 722 // TODO: So far we are just going one level down; ideally we'd need to 723 // recurse here. 724 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 725 const Expr *Arg = CE->getArg(i); 726 // Process pointer argument. 727 const Type *ArgTy = Arg->getType().getTypePtr(); 728 QualType PType = ArgTy->getPointeeType(); 729 if ((!PType.isNull() && !PType.isConstQualified()) || 730 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 731 State = State->add<TaintArgsOnPostVisit>(i); 732 } 733 } 734 735 return State; 736 } 737 738 // If argument 0(protocol domain) is network, the return value should get taint. 739 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, 740 const CallExpr *CE, 741 CheckerContext &C) { 742 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 743 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 744 // White list the internal communication protocols. 745 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 746 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 747 return false; 748 749 return true; 750 } 751 752 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 753 ProgramStateRef State = C.getState(); 754 SVal Val = C.getSVal(E); 755 756 // stdin is a pointer, so it would be a region. 757 const MemRegion *MemReg = Val.getAsRegion(); 758 759 // The region should be symbolic, we do not know it's value. 760 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 761 if (!SymReg) 762 return false; 763 764 // Get it's symbol and find the declaration region it's pointing to. 765 const SymbolRegionValue *Sm = 766 dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 767 if (!Sm) 768 return false; 769 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 770 if (!DeclReg) 771 return false; 772 773 // This region corresponds to a declaration, find out if it's a global/extern 774 // variable named stdin with the proper type. 775 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 776 D = D->getCanonicalDecl(); 777 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 778 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 779 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 780 C.getASTContext().getFILEType().getCanonicalType()) 781 return true; 782 } 783 } 784 return false; 785 } 786 787 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 788 const CheckerContext &C, 789 unsigned &ArgNum) { 790 // Find if the function contains a format string argument. 791 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 792 // vsnprintf, syslog, custom annotated functions. 793 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 794 if (!FDecl) 795 return false; 796 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 797 ArgNum = Format->getFormatIdx() - 1; 798 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) 799 return true; 800 } 801 802 // Or if a function is named setproctitle (this is a heuristic). 803 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 804 ArgNum = 0; 805 return true; 806 } 807 808 return false; 809 } 810 811 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 812 CheckerContext &C) const { 813 assert(E); 814 815 // Check for taint. 816 ProgramStateRef State = C.getState(); 817 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 818 SVal TaintedSVal; 819 if (PointedToSVal && isTainted(State, *PointedToSVal)) 820 TaintedSVal = *PointedToSVal; 821 else if (isTainted(State, E, C.getLocationContext())) 822 TaintedSVal = C.getSVal(E); 823 else 824 return false; 825 826 // Generate diagnostic. 827 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 828 initBugType(); 829 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 830 report->addRange(E->getSourceRange()); 831 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 832 C.emitReport(std::move(report)); 833 return true; 834 } 835 return false; 836 } 837 838 bool GenericTaintChecker::checkUncontrolledFormatString( 839 const CallExpr *CE, CheckerContext &C) const { 840 // Check if the function contains a format string argument. 841 unsigned ArgNum = 0; 842 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 843 return false; 844 845 // If either the format string content or the pointer itself are tainted, 846 // warn. 847 return generateReportIfTainted(CE->getArg(ArgNum), 848 MsgUncontrolledFormatString, C); 849 } 850 851 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, 852 CheckerContext &C) const { 853 // TODO: It might make sense to run this check on demand. In some cases, 854 // we should check if the environment has been cleansed here. We also might 855 // need to know if the user was reset before these calls(seteuid). 856 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 857 .Case("system", 0) 858 .Case("popen", 0) 859 .Case("execl", 0) 860 .Case("execle", 0) 861 .Case("execlp", 0) 862 .Case("execv", 0) 863 .Case("execvp", 0) 864 .Case("execvP", 0) 865 .Case("execve", 0) 866 .Case("dlopen", 0) 867 .Default(InvalidArgIndex); 868 869 if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) 870 return false; 871 872 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 873 } 874 875 // TODO: Should this check be a part of the CString checker? 876 // If yes, should taint be a global setting? 877 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 878 const FunctionDecl *FDecl, 879 CheckerContext &C) const { 880 // If the function has a buffer size argument, set ArgNum. 881 unsigned ArgNum = InvalidArgIndex; 882 unsigned BId = 0; 883 if ((BId = FDecl->getMemoryFunctionKind())) 884 switch (BId) { 885 case Builtin::BImemcpy: 886 case Builtin::BImemmove: 887 case Builtin::BIstrncpy: 888 ArgNum = 2; 889 break; 890 case Builtin::BIstrndup: 891 ArgNum = 1; 892 break; 893 default: 894 break; 895 }; 896 897 if (ArgNum == InvalidArgIndex) { 898 if (C.isCLibraryFunction(FDecl, "malloc") || 899 C.isCLibraryFunction(FDecl, "calloc") || 900 C.isCLibraryFunction(FDecl, "alloca")) 901 ArgNum = 0; 902 else if (C.isCLibraryFunction(FDecl, "memccpy")) 903 ArgNum = 3; 904 else if (C.isCLibraryFunction(FDecl, "realloc")) 905 ArgNum = 1; 906 else if (C.isCLibraryFunction(FDecl, "bcopy")) 907 ArgNum = 2; 908 } 909 910 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 911 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 912 } 913 914 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, 915 const FunctionData &FData, 916 CheckerContext &C) const { 917 auto It = findFunctionInConfig(CustomSinks, FData); 918 if (It == CustomSinks.end()) 919 return false; 920 921 const auto &Value = It->second; 922 const GenericTaintChecker::ArgVector &Args = Value.second; 923 for (unsigned ArgNum : Args) { 924 if (ArgNum >= CE->getNumArgs()) 925 continue; 926 927 if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) 928 return true; 929 } 930 931 return false; 932 } 933 934 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 935 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 936 std::string Option{"Config"}; 937 StringRef ConfigFile = 938 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 939 llvm::Optional<TaintConfig> Config = 940 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 941 if (Config) 942 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 943 } 944 945 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { 946 return true; 947 } 948