1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 28 #include "llvm/Support/YAMLTraits.h" 29 30 #include <algorithm> 31 #include <limits> 32 #include <memory> 33 #include <unordered_map> 34 #include <utility> 35 36 using namespace clang; 37 using namespace ento; 38 using namespace taint; 39 40 namespace { 41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 42 public: 43 static void *getTag() { 44 static int Tag; 45 return &Tag; 46 } 47 48 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 49 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 50 51 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 52 const char *Sep) const override; 53 54 using ArgVector = SmallVector<unsigned, 2>; 55 using SignedArgVector = SmallVector<int, 2>; 56 57 enum class VariadicType { None, Src, Dst }; 58 59 /// Used to parse the configuration file. 60 struct TaintConfiguration { 61 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 62 63 struct Propagation { 64 std::string Name; 65 std::string Scope; 66 ArgVector SrcArgs; 67 SignedArgVector DstArgs; 68 VariadicType VarType; 69 unsigned VarIndex; 70 }; 71 72 std::vector<Propagation> Propagations; 73 std::vector<NameScopeArgs> Filters; 74 std::vector<NameScopeArgs> Sinks; 75 76 TaintConfiguration() = default; 77 TaintConfiguration(const TaintConfiguration &) = default; 78 TaintConfiguration(TaintConfiguration &&) = default; 79 TaintConfiguration &operator=(const TaintConfiguration &) = default; 80 TaintConfiguration &operator=(TaintConfiguration &&) = default; 81 }; 82 83 /// Convert SignedArgVector to ArgVector. 84 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 85 const SignedArgVector &Args); 86 87 /// Parse the config. 88 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 89 TaintConfiguration &&Config); 90 91 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 92 /// Denotes the return vale. 93 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 94 1}; 95 96 private: 97 mutable std::unique_ptr<BugType> BT; 98 void initBugType() const { 99 if (!BT) 100 BT = std::make_unique<BugType>(this, "Use of Untrusted Data", 101 "Untrusted Data"); 102 } 103 104 struct FunctionData { 105 FunctionData() = delete; 106 FunctionData(const FunctionDecl *FDecl, StringRef Name, 107 std::string FullName) 108 : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {} 109 FunctionData(const FunctionData &) = default; 110 FunctionData(FunctionData &&) = default; 111 FunctionData &operator=(const FunctionData &) = delete; 112 FunctionData &operator=(FunctionData &&) = delete; 113 114 static Optional<FunctionData> create(const CallEvent &Call, 115 const CheckerContext &C) { 116 if (!Call.getDecl()) 117 return None; 118 119 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 120 if (!FDecl || (FDecl->getKind() != Decl::Function && 121 FDecl->getKind() != Decl::CXXMethod)) 122 return None; 123 124 StringRef Name = C.getCalleeName(FDecl); 125 std::string FullName = FDecl->getQualifiedNameAsString(); 126 if (Name.empty() || FullName.empty()) 127 return None; 128 129 return FunctionData{FDecl, Name, std::move(FullName)}; 130 } 131 132 bool isInScope(StringRef Scope) const { 133 return StringRef(FullName).startswith(Scope); 134 } 135 136 const FunctionDecl *const FDecl; 137 const StringRef Name; 138 const std::string FullName; 139 }; 140 141 /// Catch taint related bugs. Check if tainted data is passed to a 142 /// system call etc. Returns true on matching. 143 bool checkPre(const CallEvent &Call, const FunctionData &FData, 144 CheckerContext &C) const; 145 146 /// Add taint sources on a pre-visit. Returns true on matching. 147 bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, 148 CheckerContext &C) const; 149 150 /// Mark filter's arguments not tainted on a pre-visit. Returns true on 151 /// matching. 152 bool addFiltersPre(const CallEvent &Call, const FunctionData &FData, 153 CheckerContext &C) const; 154 155 /// Propagate taint generated at pre-visit. Returns true on matching. 156 static bool propagateFromPre(const CallEvent &Call, CheckerContext &C); 157 158 /// Check if the region the expression evaluates to is the standard input, 159 /// and thus, is tainted. 160 static bool isStdin(const Expr *E, CheckerContext &C); 161 162 /// Given a pointer argument, return the value it points to. 163 static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg); 164 165 /// Check for CWE-134: Uncontrolled Format String. 166 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 167 "Untrusted data is used as a format string " 168 "(CWE-134: Uncontrolled Format String)"; 169 bool checkUncontrolledFormatString(const CallEvent &Call, 170 CheckerContext &C) const; 171 172 /// Check for: 173 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 174 /// CWE-78, "Failure to Sanitize Data into an OS Command" 175 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 176 "Untrusted data is passed to a system call " 177 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 178 bool checkSystemCall(const CallEvent &Call, StringRef Name, 179 CheckerContext &C) const; 180 181 /// Check if tainted data is used as a buffer size ins strn.. functions, 182 /// and allocators. 183 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 184 "Untrusted data is used to specify the buffer size " 185 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 186 "for character data and the null terminator)"; 187 bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const; 188 189 /// Check if tainted data is used as a custom sink's parameter. 190 static constexpr llvm::StringLiteral MsgCustomSink = 191 "Untrusted data is passed to a user-defined sink"; 192 bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData, 193 CheckerContext &C) const; 194 195 /// Generate a report if the expression is tainted or points to tainted data. 196 bool generateReportIfTainted(const Expr *E, StringRef Msg, 197 CheckerContext &C) const; 198 199 struct TaintPropagationRule; 200 template <typename T> 201 using ConfigDataMap = 202 std::unordered_multimap<std::string, std::pair<std::string, T>>; 203 using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 204 using NameArgMap = ConfigDataMap<ArgVector>; 205 206 /// Find a function with the given name and scope. Returns the first match 207 /// or the end of the map. 208 template <typename T> 209 static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 210 const FunctionData &FData); 211 212 /// A struct used to specify taint propagation rules for a function. 213 /// 214 /// If any of the possible taint source arguments is tainted, all of the 215 /// destination arguments should also be tainted. Use InvalidArgIndex in the 216 /// src list to specify that all of the arguments can introduce taint. Use 217 /// InvalidArgIndex in the dst arguments to signify that all the non-const 218 /// pointer and reference arguments might be tainted on return. If 219 /// ReturnValueIndex is added to the dst list, the return value will be 220 /// tainted. 221 struct TaintPropagationRule { 222 using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call, 223 CheckerContext &C); 224 225 /// List of arguments which can be taint sources and should be checked. 226 ArgVector SrcArgs; 227 /// List of arguments which should be tainted on function return. 228 ArgVector DstArgs; 229 /// Index for the first variadic parameter if exist. 230 unsigned VariadicIndex; 231 /// Show when a function has variadic parameters. If it has, it marks all 232 /// of them as source or destination. 233 VariadicType VarType; 234 /// Special function for tainted source determination. If defined, it can 235 /// override the default behavior. 236 PropagationFuncType PropagationFunc; 237 238 TaintPropagationRule() 239 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 240 PropagationFunc(nullptr) {} 241 242 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 243 VariadicType Var = VariadicType::None, 244 unsigned VarIndex = InvalidArgIndex, 245 PropagationFuncType Func = nullptr) 246 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 247 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 248 249 /// Get the propagation rule for a given function. 250 static TaintPropagationRule 251 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 252 const FunctionData &FData, CheckerContext &C); 253 254 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 255 void addDstArg(unsigned A) { DstArgs.push_back(A); } 256 257 bool isNull() const { 258 return SrcArgs.empty() && DstArgs.empty() && 259 VariadicType::None == VarType; 260 } 261 262 bool isDestinationArgument(unsigned ArgNum) const { 263 return llvm::is_contained(DstArgs, ArgNum); 264 } 265 266 static bool isTaintedOrPointsToTainted(const Expr *E, 267 const ProgramStateRef &State, 268 CheckerContext &C) { 269 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 270 return true; 271 272 if (!E->getType().getTypePtr()->isPointerType()) 273 return false; 274 275 Optional<SVal> V = getPointeeOf(C, E); 276 return (V && isTainted(State, *V)); 277 } 278 279 /// Pre-process a function which propagates taint according to the 280 /// taint rule. 281 ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const; 282 283 // Functions for custom taintedness propagation. 284 static bool postSocket(bool IsTainted, const CallEvent &Call, 285 CheckerContext &C); 286 }; 287 288 /// Defines a map between the propagation function's name, scope 289 /// and TaintPropagationRule. 290 NameRuleMap CustomPropagations; 291 292 /// Defines a map between the filter function's name, scope and filtering 293 /// args. 294 NameArgMap CustomFilters; 295 296 /// Defines a map between the sink function's name, scope and sinking args. 297 NameArgMap CustomSinks; 298 }; 299 300 const unsigned GenericTaintChecker::ReturnValueIndex; 301 const unsigned GenericTaintChecker::InvalidArgIndex; 302 303 // FIXME: these lines can be removed in C++17 304 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 305 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 306 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 307 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 308 } // end of anonymous namespace 309 310 using TaintConfig = GenericTaintChecker::TaintConfiguration; 311 312 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 313 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 314 315 namespace llvm { 316 namespace yaml { 317 template <> struct MappingTraits<TaintConfig> { 318 static void mapping(IO &IO, TaintConfig &Config) { 319 IO.mapOptional("Propagations", Config.Propagations); 320 IO.mapOptional("Filters", Config.Filters); 321 IO.mapOptional("Sinks", Config.Sinks); 322 } 323 }; 324 325 template <> struct MappingTraits<TaintConfig::Propagation> { 326 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 327 IO.mapRequired("Name", Propagation.Name); 328 IO.mapOptional("Scope", Propagation.Scope); 329 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 330 IO.mapOptional("DstArgs", Propagation.DstArgs); 331 IO.mapOptional("VariadicType", Propagation.VarType, 332 GenericTaintChecker::VariadicType::None); 333 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 334 GenericTaintChecker::InvalidArgIndex); 335 } 336 }; 337 338 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 339 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 340 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 341 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 342 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 343 } 344 }; 345 346 template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 347 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 348 IO.mapRequired("Name", std::get<0>(NSA)); 349 IO.mapOptional("Scope", std::get<1>(NSA)); 350 IO.mapRequired("Args", std::get<2>(NSA)); 351 } 352 }; 353 } // namespace yaml 354 } // namespace llvm 355 356 /// A set which is used to pass information from call pre-visit instruction 357 /// to the call post-visit. The values are unsigned integers, which are either 358 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 359 /// points to data, which should be tainted on return. 360 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 361 362 GenericTaintChecker::ArgVector 363 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, 364 const std::string &Option, 365 const SignedArgVector &Args) { 366 ArgVector Result; 367 for (int Arg : Args) { 368 if (Arg == -1) 369 Result.push_back(ReturnValueIndex); 370 else if (Arg < -1) { 371 Result.push_back(InvalidArgIndex); 372 Mgr.reportInvalidCheckerOptionValue( 373 this, Option, 374 "an argument number for propagation rules greater or equal to -1"); 375 } else 376 Result.push_back(static_cast<unsigned>(Arg)); 377 } 378 return Result; 379 } 380 381 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 382 const std::string &Option, 383 TaintConfiguration &&Config) { 384 for (auto &P : Config.Propagations) { 385 GenericTaintChecker::CustomPropagations.emplace( 386 P.Name, 387 std::make_pair(P.Scope, TaintPropagationRule{ 388 std::move(P.SrcArgs), 389 convertToArgVector(Mgr, Option, P.DstArgs), 390 P.VarType, P.VarIndex})); 391 } 392 393 for (auto &F : Config.Filters) { 394 GenericTaintChecker::CustomFilters.emplace( 395 std::get<0>(F), 396 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 397 } 398 399 for (auto &S : Config.Sinks) { 400 GenericTaintChecker::CustomSinks.emplace( 401 std::get<0>(S), 402 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 403 } 404 } 405 406 template <typename T> 407 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 408 const FunctionData &FData) { 409 auto Range = Map.equal_range(std::string(FData.Name)); 410 auto It = 411 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 412 const auto &Value = Entry.second; 413 StringRef Scope = Value.first; 414 return Scope.empty() || FData.isInScope(Scope); 415 }); 416 return It != Range.second ? It : Map.end(); 417 } 418 419 GenericTaintChecker::TaintPropagationRule 420 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 421 const NameRuleMap &CustomPropagations, const FunctionData &FData, 422 CheckerContext &C) { 423 // TODO: Currently, we might lose precision here: we always mark a return 424 // value as tainted even if it's just a pointer, pointing to tainted data. 425 426 // Check for exact name match for functions without builtin substitutes. 427 // Use qualified name, because these are C functions without namespace. 428 TaintPropagationRule Rule = 429 llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 430 // Source functions 431 // TODO: Add support for vfscanf & family. 432 .Case("fdopen", {{}, {ReturnValueIndex}}) 433 .Case("fopen", {{}, {ReturnValueIndex}}) 434 .Case("freopen", {{}, {ReturnValueIndex}}) 435 .Case("getch", {{}, {ReturnValueIndex}}) 436 .Case("getchar", {{}, {ReturnValueIndex}}) 437 .Case("getchar_unlocked", {{}, {ReturnValueIndex}}) 438 .Case("gets", {{}, {0, ReturnValueIndex}}) 439 .Case("scanf", {{}, {}, VariadicType::Dst, 1}) 440 .Case("socket", {{}, 441 {ReturnValueIndex}, 442 VariadicType::None, 443 InvalidArgIndex, 444 &TaintPropagationRule::postSocket}) 445 .Case("wgetch", {{}, {ReturnValueIndex}}) 446 // Propagating functions 447 .Case("atoi", {{0}, {ReturnValueIndex}}) 448 .Case("atol", {{0}, {ReturnValueIndex}}) 449 .Case("atoll", {{0}, {ReturnValueIndex}}) 450 .Case("fgetc", {{0}, {ReturnValueIndex}}) 451 .Case("fgetln", {{0}, {ReturnValueIndex}}) 452 .Case("fgets", {{2}, {0, ReturnValueIndex}}) 453 .Case("fscanf", {{0}, {}, VariadicType::Dst, 2}) 454 .Case("sscanf", {{0}, {}, VariadicType::Dst, 2}) 455 .Case("getc", {{0}, {ReturnValueIndex}}) 456 .Case("getc_unlocked", {{0}, {ReturnValueIndex}}) 457 .Case("getdelim", {{3}, {0}}) 458 .Case("getline", {{2}, {0}}) 459 .Case("getw", {{0}, {ReturnValueIndex}}) 460 .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}}) 461 .Case("read", {{0, 2}, {1, ReturnValueIndex}}) 462 .Case("strchr", {{0}, {ReturnValueIndex}}) 463 .Case("strrchr", {{0}, {ReturnValueIndex}}) 464 .Case("tolower", {{0}, {ReturnValueIndex}}) 465 .Case("toupper", {{0}, {ReturnValueIndex}}) 466 .Default({}); 467 468 if (!Rule.isNull()) 469 return Rule; 470 471 // `getenv` returns taint only in untrusted environments. 472 if (FData.FullName == "getenv") { 473 if (C.getAnalysisManager() 474 .getAnalyzerOptions() 475 .ShouldAssumeControlledEnvironment) 476 return {}; 477 return {{}, {ReturnValueIndex}}; 478 } 479 480 assert(FData.FDecl); 481 482 // Check if it's one of the memory setting/copying functions. 483 // This check is specialized but faster then calling isCLibraryFunction. 484 const FunctionDecl *FDecl = FData.FDecl; 485 unsigned BId = 0; 486 if ((BId = FDecl->getMemoryFunctionKind())) { 487 switch (BId) { 488 case Builtin::BImemcpy: 489 case Builtin::BImemmove: 490 case Builtin::BIstrncpy: 491 case Builtin::BIstrncat: 492 return {{1, 2}, {0, ReturnValueIndex}}; 493 case Builtin::BIstrlcpy: 494 case Builtin::BIstrlcat: 495 return {{1, 2}, {0}}; 496 case Builtin::BIstrndup: 497 return {{0, 1}, {ReturnValueIndex}}; 498 499 default: 500 break; 501 } 502 } 503 504 // Process all other functions which could be defined as builtins. 505 if (Rule.isNull()) { 506 const auto OneOf = [FDecl](const auto &... Name) { 507 // FIXME: use fold expression in C++17 508 using unused = int[]; 509 bool ret = false; 510 static_cast<void>(unused{ 511 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...}); 512 return ret; 513 }; 514 if (OneOf("snprintf")) 515 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3}; 516 if (OneOf("sprintf")) 517 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 2}; 518 if (OneOf("strcpy", "stpcpy", "strcat")) 519 return {{1}, {0, ReturnValueIndex}}; 520 if (OneOf("bcopy")) 521 return {{0, 2}, {1}}; 522 if (OneOf("strdup", "strdupa", "wcsdup")) 523 return {{0}, {ReturnValueIndex}}; 524 } 525 526 // Skipping the following functions, since they might be used for cleansing or 527 // smart memory copy: 528 // - memccpy - copying until hitting a special character. 529 530 auto It = findFunctionInConfig(CustomPropagations, FData); 531 if (It != CustomPropagations.end()) 532 return It->second.second; 533 return {}; 534 } 535 536 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 537 CheckerContext &C) const { 538 Optional<FunctionData> FData = FunctionData::create(Call, C); 539 if (!FData) 540 return; 541 542 // Check for taintedness related errors first: system call, uncontrolled 543 // format string, tainted buffer size. 544 if (checkPre(Call, *FData, C)) 545 return; 546 547 // Marks the function's arguments and/or return value tainted if it present in 548 // the list. 549 if (addSourcesPre(Call, *FData, C)) 550 return; 551 552 addFiltersPre(Call, *FData, C); 553 } 554 555 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 556 CheckerContext &C) const { 557 // Set the marked values as tainted. The return value only accessible from 558 // checkPostStmt. 559 propagateFromPre(Call, C); 560 } 561 562 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 563 const char *NL, const char *Sep) const { 564 printTaint(State, Out, NL, Sep); 565 } 566 567 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, 568 const FunctionData &FData, 569 CheckerContext &C) const { 570 // First, try generating a propagation rule for this function. 571 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 572 this->CustomPropagations, FData, C); 573 if (!Rule.isNull()) { 574 ProgramStateRef State = Rule.process(Call, C); 575 if (State) { 576 C.addTransition(State); 577 return true; 578 } 579 } 580 return false; 581 } 582 583 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call, 584 const FunctionData &FData, 585 CheckerContext &C) const { 586 auto It = findFunctionInConfig(CustomFilters, FData); 587 if (It == CustomFilters.end()) 588 return false; 589 590 ProgramStateRef State = C.getState(); 591 const auto &Value = It->second; 592 const ArgVector &Args = Value.second; 593 for (unsigned ArgNum : Args) { 594 if (ArgNum >= Call.getNumArgs()) 595 continue; 596 597 const Expr *Arg = Call.getArgExpr(ArgNum); 598 Optional<SVal> V = getPointeeOf(C, Arg); 599 if (V) 600 State = removeTaint(State, *V); 601 } 602 603 if (State != C.getState()) { 604 C.addTransition(State); 605 return true; 606 } 607 return false; 608 } 609 610 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call, 611 CheckerContext &C) { 612 ProgramStateRef State = C.getState(); 613 614 // Depending on what was tainted at pre-visit, we determined a set of 615 // arguments which should be tainted after the function returns. These are 616 // stored in the state as TaintArgsOnPostVisit set. 617 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 618 if (TaintArgs.isEmpty()) 619 return false; 620 621 for (unsigned ArgNum : TaintArgs) { 622 // Special handling for the tainted return value. 623 if (ArgNum == ReturnValueIndex) { 624 State = addTaint(State, Call.getReturnValue()); 625 continue; 626 } 627 628 // The arguments are pointer arguments. The data they are pointing at is 629 // tainted after the call. 630 if (Call.getNumArgs() < (ArgNum + 1)) 631 return false; 632 const Expr *Arg = Call.getArgExpr(ArgNum); 633 Optional<SVal> V = getPointeeOf(C, Arg); 634 if (V) 635 State = addTaint(State, *V); 636 } 637 638 // Clear up the taint info from the state. 639 State = State->remove<TaintArgsOnPostVisit>(); 640 641 if (State != C.getState()) { 642 C.addTransition(State); 643 return true; 644 } 645 return false; 646 } 647 648 bool GenericTaintChecker::checkPre(const CallEvent &Call, 649 const FunctionData &FData, 650 CheckerContext &C) const { 651 if (checkUncontrolledFormatString(Call, C)) 652 return true; 653 654 if (checkSystemCall(Call, FData.Name, C)) 655 return true; 656 657 if (checkTaintedBufferSize(Call, C)) 658 return true; 659 660 return checkCustomSinks(Call, FData, C); 661 } 662 663 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C, 664 const Expr *Arg) { 665 ProgramStateRef State = C.getState(); 666 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 667 if (AddrVal.isUnknownOrUndef()) 668 return None; 669 670 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 671 if (!AddrLoc) 672 return None; 673 674 QualType ArgTy = Arg->getType().getCanonicalType(); 675 if (!ArgTy->isPointerType()) 676 return State->getSVal(*AddrLoc); 677 678 QualType ValTy = ArgTy->getPointeeType(); 679 680 // Do not dereference void pointers. Treat them as byte pointers instead. 681 // FIXME: we might want to consider more than just the first byte. 682 if (ValTy->isVoidType()) 683 ValTy = C.getASTContext().CharTy; 684 685 return State->getSVal(*AddrLoc, ValTy); 686 } 687 688 ProgramStateRef 689 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call, 690 CheckerContext &C) const { 691 ProgramStateRef State = C.getState(); 692 693 // Check for taint in arguments. 694 bool IsTainted = true; 695 for (unsigned ArgNum : SrcArgs) { 696 if (ArgNum >= Call.getNumArgs()) 697 continue; 698 699 if ((IsTainted = 700 isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) 701 break; 702 } 703 704 // Check for taint in variadic arguments. 705 if (!IsTainted && VariadicType::Src == VarType) { 706 // Check if any of the arguments is tainted 707 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 708 if ((IsTainted = 709 isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) 710 break; 711 } 712 } 713 714 if (PropagationFunc) 715 IsTainted = PropagationFunc(IsTainted, Call, C); 716 717 if (!IsTainted) 718 return State; 719 720 // Mark the arguments which should be tainted after the function returns. 721 for (unsigned ArgNum : DstArgs) { 722 // Should mark the return value? 723 if (ArgNum == ReturnValueIndex) { 724 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 725 continue; 726 } 727 728 if (ArgNum >= Call.getNumArgs()) 729 continue; 730 731 // Mark the given argument. 732 State = State->add<TaintArgsOnPostVisit>(ArgNum); 733 } 734 735 // Mark all variadic arguments tainted if present. 736 if (VariadicType::Dst == VarType) { 737 // For all pointer and references that were passed in: 738 // If they are not pointing to const data, mark data as tainted. 739 // TODO: So far we are just going one level down; ideally we'd need to 740 // recurse here. 741 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 742 const Expr *Arg = Call.getArgExpr(i); 743 // Process pointer argument. 744 const Type *ArgTy = Arg->getType().getTypePtr(); 745 QualType PType = ArgTy->getPointeeType(); 746 if ((!PType.isNull() && !PType.isConstQualified()) || 747 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) { 748 State = State->add<TaintArgsOnPostVisit>(i); 749 } 750 } 751 } 752 753 return State; 754 } 755 756 // If argument 0(protocol domain) is network, the return value should get taint. 757 bool GenericTaintChecker::TaintPropagationRule::postSocket( 758 bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { 759 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 760 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 761 // White list the internal communication protocols. 762 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 763 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 764 return false; 765 return true; 766 } 767 768 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 769 ProgramStateRef State = C.getState(); 770 SVal Val = C.getSVal(E); 771 772 // stdin is a pointer, so it would be a region. 773 const MemRegion *MemReg = Val.getAsRegion(); 774 775 // The region should be symbolic, we do not know it's value. 776 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 777 if (!SymReg) 778 return false; 779 780 // Get it's symbol and find the declaration region it's pointing to. 781 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 782 if (!Sm) 783 return false; 784 const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 785 if (!DeclReg) 786 return false; 787 788 // This region corresponds to a declaration, find out if it's a global/extern 789 // variable named stdin with the proper type. 790 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 791 D = D->getCanonicalDecl(); 792 if (D->getName().contains("stdin") && D->isExternC()) { 793 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 794 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 795 C.getASTContext().getFILEType().getCanonicalType()) 796 return true; 797 } 798 } 799 return false; 800 } 801 802 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 803 const CheckerContext &C, 804 unsigned &ArgNum) { 805 // Find if the function contains a format string argument. 806 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 807 // vsnprintf, syslog, custom annotated functions. 808 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 809 if (!FDecl) 810 return false; 811 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 812 ArgNum = Format->getFormatIdx() - 1; 813 if ((Format->getType()->getName() == "printf") && 814 Call.getNumArgs() > ArgNum) 815 return true; 816 } 817 818 // Or if a function is named setproctitle (this is a heuristic). 819 if (C.getCalleeName(FDecl).contains("setproctitle")) { 820 ArgNum = 0; 821 return true; 822 } 823 824 return false; 825 } 826 827 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 828 CheckerContext &C) const { 829 assert(E); 830 831 // Check for taint. 832 ProgramStateRef State = C.getState(); 833 Optional<SVal> PointedToSVal = getPointeeOf(C, E); 834 SVal TaintedSVal; 835 if (PointedToSVal && isTainted(State, *PointedToSVal)) 836 TaintedSVal = *PointedToSVal; 837 else if (isTainted(State, E, C.getLocationContext())) 838 TaintedSVal = C.getSVal(E); 839 else 840 return false; 841 842 // Generate diagnostic. 843 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 844 initBugType(); 845 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 846 report->addRange(E->getSourceRange()); 847 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 848 C.emitReport(std::move(report)); 849 return true; 850 } 851 return false; 852 } 853 854 bool GenericTaintChecker::checkUncontrolledFormatString( 855 const CallEvent &Call, CheckerContext &C) const { 856 // Check if the function contains a format string argument. 857 unsigned ArgNum = 0; 858 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 859 return false; 860 861 // If either the format string content or the pointer itself are tainted, 862 // warn. 863 return generateReportIfTainted(Call.getArgExpr(ArgNum), 864 MsgUncontrolledFormatString, C); 865 } 866 867 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name, 868 CheckerContext &C) const { 869 // TODO: It might make sense to run this check on demand. In some cases, 870 // we should check if the environment has been cleansed here. We also might 871 // need to know if the user was reset before these calls(seteuid). 872 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 873 .Case("system", 0) 874 .Case("popen", 0) 875 .Case("execl", 0) 876 .Case("execle", 0) 877 .Case("execlp", 0) 878 .Case("execv", 0) 879 .Case("execvp", 0) 880 .Case("execvP", 0) 881 .Case("execve", 0) 882 .Case("dlopen", 0) 883 .Default(InvalidArgIndex); 884 885 if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) 886 return false; 887 888 return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, 889 C); 890 } 891 892 // TODO: Should this check be a part of the CString checker? 893 // If yes, should taint be a global setting? 894 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call, 895 CheckerContext &C) const { 896 const auto *FDecl = Call.getDecl()->getAsFunction(); 897 // If the function has a buffer size argument, set ArgNum. 898 unsigned ArgNum = InvalidArgIndex; 899 unsigned BId = 0; 900 if ((BId = FDecl->getMemoryFunctionKind())) { 901 switch (BId) { 902 case Builtin::BImemcpy: 903 case Builtin::BImemmove: 904 case Builtin::BIstrncpy: 905 ArgNum = 2; 906 break; 907 case Builtin::BIstrndup: 908 ArgNum = 1; 909 break; 910 default: 911 break; 912 } 913 } 914 915 if (ArgNum == InvalidArgIndex) { 916 using CCtx = CheckerContext; 917 if (CCtx::isCLibraryFunction(FDecl, "malloc") || 918 CCtx::isCLibraryFunction(FDecl, "calloc") || 919 CCtx::isCLibraryFunction(FDecl, "alloca")) 920 ArgNum = 0; 921 else if (CCtx::isCLibraryFunction(FDecl, "memccpy")) 922 ArgNum = 3; 923 else if (CCtx::isCLibraryFunction(FDecl, "realloc")) 924 ArgNum = 1; 925 else if (CCtx::isCLibraryFunction(FDecl, "bcopy")) 926 ArgNum = 2; 927 } 928 929 return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && 930 generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, 931 C); 932 } 933 934 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, 935 const FunctionData &FData, 936 CheckerContext &C) const { 937 auto It = findFunctionInConfig(CustomSinks, FData); 938 if (It == CustomSinks.end()) 939 return false; 940 941 const auto &Value = It->second; 942 const GenericTaintChecker::ArgVector &Args = Value.second; 943 for (unsigned ArgNum : Args) { 944 if (ArgNum >= Call.getNumArgs()) 945 continue; 946 947 if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) 948 return true; 949 } 950 951 return false; 952 } 953 954 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 955 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 956 std::string Option{"Config"}; 957 StringRef ConfigFile = 958 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 959 llvm::Optional<TaintConfig> Config = 960 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 961 if (Config) 962 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 963 } 964 965 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 966 return true; 967 } 968