1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 28 #include "llvm/Support/YAMLTraits.h" 29 30 #include <algorithm> 31 #include <limits> 32 #include <memory> 33 #include <unordered_map> 34 #include <utility> 35 36 using namespace clang; 37 using namespace ento; 38 using namespace taint; 39 40 namespace { 41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 42 public: 43 static void *getTag() { 44 static int Tag; 45 return &Tag; 46 } 47 48 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 49 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 50 51 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 52 const char *Sep) const override; 53 54 using ArgVector = SmallVector<unsigned, 2>; 55 using SignedArgVector = SmallVector<int, 2>; 56 57 enum class VariadicType { None, Src, Dst }; 58 59 /// Used to parse the configuration file. 60 struct TaintConfiguration { 61 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 62 63 struct Propagation { 64 std::string Name; 65 std::string Scope; 66 ArgVector SrcArgs; 67 SignedArgVector DstArgs; 68 VariadicType VarType; 69 unsigned VarIndex; 70 }; 71 72 std::vector<Propagation> Propagations; 73 std::vector<NameScopeArgs> Filters; 74 std::vector<NameScopeArgs> Sinks; 75 76 TaintConfiguration() = default; 77 TaintConfiguration(const TaintConfiguration &) = default; 78 TaintConfiguration(TaintConfiguration &&) = default; 79 TaintConfiguration &operator=(const TaintConfiguration &) = default; 80 TaintConfiguration &operator=(TaintConfiguration &&) = default; 81 }; 82 83 /// Convert SignedArgVector to ArgVector. 84 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 85 const SignedArgVector &Args); 86 87 /// Parse the config. 88 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 89 TaintConfiguration &&Config); 90 91 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 92 /// Denotes the return vale. 93 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 94 1}; 95 96 private: 97 mutable std::unique_ptr<BugType> BT; 98 void initBugType() const { 99 if (!BT) 100 BT = std::make_unique<BugType>(this, "Use of Untrusted Data", 101 "Untrusted Data"); 102 } 103 104 struct FunctionData { 105 FunctionData() = delete; 106 FunctionData(const FunctionData &) = default; 107 FunctionData(FunctionData &&) = default; 108 FunctionData &operator=(const FunctionData &) = delete; 109 FunctionData &operator=(FunctionData &&) = delete; 110 111 static Optional<FunctionData> create(const CallEvent &Call, 112 const CheckerContext &C) { 113 if (!Call.getDecl()) 114 return None; 115 116 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 117 if (!FDecl || (FDecl->getKind() != Decl::Function && 118 FDecl->getKind() != Decl::CXXMethod)) 119 return None; 120 121 StringRef Name = C.getCalleeName(FDecl); 122 std::string FullName = FDecl->getQualifiedNameAsString(); 123 if (Name.empty() || FullName.empty()) 124 return None; 125 126 return FunctionData{FDecl, Name, FullName}; 127 } 128 129 bool isInScope(StringRef Scope) const { 130 return StringRef(FullName).startswith(Scope); 131 } 132 133 const FunctionDecl *const FDecl; 134 const StringRef Name; 135 const std::string FullName; 136 }; 137 138 /// Catch taint related bugs. Check if tainted data is passed to a 139 /// system call etc. Returns true on matching. 140 bool checkPre(const CallEvent &Call, const FunctionData &FData, 141 CheckerContext &C) const; 142 143 /// Add taint sources on a pre-visit. Returns true on matching. 144 bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, 145 CheckerContext &C) const; 146 147 /// Mark filter's arguments not tainted on a pre-visit. Returns true on 148 /// matching. 149 bool addFiltersPre(const CallEvent &Call, const FunctionData &FData, 150 CheckerContext &C) const; 151 152 /// Propagate taint generated at pre-visit. Returns true on matching. 153 static bool propagateFromPre(const CallEvent &Call, CheckerContext &C); 154 155 /// Check if the region the expression evaluates to is the standard input, 156 /// and thus, is tainted. 157 static bool isStdin(const Expr *E, CheckerContext &C); 158 159 /// Given a pointer argument, return the value it points to. 160 static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg); 161 162 /// Check for CWE-134: Uncontrolled Format String. 163 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 164 "Untrusted data is used as a format string " 165 "(CWE-134: Uncontrolled Format String)"; 166 bool checkUncontrolledFormatString(const CallEvent &Call, 167 CheckerContext &C) const; 168 169 /// Check for: 170 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 171 /// CWE-78, "Failure to Sanitize Data into an OS Command" 172 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 173 "Untrusted data is passed to a system call " 174 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 175 bool checkSystemCall(const CallEvent &Call, StringRef Name, 176 CheckerContext &C) const; 177 178 /// Check if tainted data is used as a buffer size ins strn.. functions, 179 /// and allocators. 180 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 181 "Untrusted data is used to specify the buffer size " 182 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 183 "for character data and the null terminator)"; 184 bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const; 185 186 /// Check if tainted data is used as a custom sink's parameter. 187 static constexpr llvm::StringLiteral MsgCustomSink = 188 "Untrusted data is passed to a user-defined sink"; 189 bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData, 190 CheckerContext &C) const; 191 192 /// Generate a report if the expression is tainted or points to tainted data. 193 bool generateReportIfTainted(const Expr *E, StringRef Msg, 194 CheckerContext &C) const; 195 196 struct TaintPropagationRule; 197 template <typename T> 198 using ConfigDataMap = 199 std::unordered_multimap<std::string, std::pair<std::string, T>>; 200 using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 201 using NameArgMap = ConfigDataMap<ArgVector>; 202 203 /// Find a function with the given name and scope. Returns the first match 204 /// or the end of the map. 205 template <typename T> 206 static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 207 const FunctionData &FData); 208 209 /// A struct used to specify taint propagation rules for a function. 210 /// 211 /// If any of the possible taint source arguments is tainted, all of the 212 /// destination arguments should also be tainted. Use InvalidArgIndex in the 213 /// src list to specify that all of the arguments can introduce taint. Use 214 /// InvalidArgIndex in the dst arguments to signify that all the non-const 215 /// pointer and reference arguments might be tainted on return. If 216 /// ReturnValueIndex is added to the dst list, the return value will be 217 /// tainted. 218 struct TaintPropagationRule { 219 using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call, 220 CheckerContext &C); 221 222 /// List of arguments which can be taint sources and should be checked. 223 ArgVector SrcArgs; 224 /// List of arguments which should be tainted on function return. 225 ArgVector DstArgs; 226 /// Index for the first variadic parameter if exist. 227 unsigned VariadicIndex; 228 /// Show when a function has variadic parameters. If it has, it marks all 229 /// of them as source or destination. 230 VariadicType VarType; 231 /// Special function for tainted source determination. If defined, it can 232 /// override the default behavior. 233 PropagationFuncType PropagationFunc; 234 235 TaintPropagationRule() 236 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 237 PropagationFunc(nullptr) {} 238 239 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 240 VariadicType Var = VariadicType::None, 241 unsigned VarIndex = InvalidArgIndex, 242 PropagationFuncType Func = nullptr) 243 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 244 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 245 246 /// Get the propagation rule for a given function. 247 static TaintPropagationRule 248 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 249 const FunctionData &FData, CheckerContext &C); 250 251 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 252 void addDstArg(unsigned A) { DstArgs.push_back(A); } 253 254 bool isNull() const { 255 return SrcArgs.empty() && DstArgs.empty() && 256 VariadicType::None == VarType; 257 } 258 259 bool isDestinationArgument(unsigned ArgNum) const { 260 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 261 } 262 263 static bool isTaintedOrPointsToTainted(const Expr *E, 264 const ProgramStateRef &State, 265 CheckerContext &C) { 266 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 267 return true; 268 269 if (!E->getType().getTypePtr()->isPointerType()) 270 return false; 271 272 Optional<SVal> V = getPointeeOf(C, E); 273 return (V && isTainted(State, *V)); 274 } 275 276 /// Pre-process a function which propagates taint according to the 277 /// taint rule. 278 ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const; 279 280 // Functions for custom taintedness propagation. 281 static bool postSocket(bool IsTainted, const CallEvent &Call, 282 CheckerContext &C); 283 }; 284 285 /// Defines a map between the propagation function's name, scope 286 /// and TaintPropagationRule. 287 NameRuleMap CustomPropagations; 288 289 /// Defines a map between the filter function's name, scope and filtering 290 /// args. 291 NameArgMap CustomFilters; 292 293 /// Defines a map between the sink function's name, scope and sinking args. 294 NameArgMap CustomSinks; 295 }; 296 297 const unsigned GenericTaintChecker::ReturnValueIndex; 298 const unsigned GenericTaintChecker::InvalidArgIndex; 299 300 // FIXME: these lines can be removed in C++17 301 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 302 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 303 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 304 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 305 } // end of anonymous namespace 306 307 using TaintConfig = GenericTaintChecker::TaintConfiguration; 308 309 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 310 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 311 312 namespace llvm { 313 namespace yaml { 314 template <> struct MappingTraits<TaintConfig> { 315 static void mapping(IO &IO, TaintConfig &Config) { 316 IO.mapOptional("Propagations", Config.Propagations); 317 IO.mapOptional("Filters", Config.Filters); 318 IO.mapOptional("Sinks", Config.Sinks); 319 } 320 }; 321 322 template <> struct MappingTraits<TaintConfig::Propagation> { 323 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 324 IO.mapRequired("Name", Propagation.Name); 325 IO.mapOptional("Scope", Propagation.Scope); 326 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 327 IO.mapOptional("DstArgs", Propagation.DstArgs); 328 IO.mapOptional("VariadicType", Propagation.VarType, 329 GenericTaintChecker::VariadicType::None); 330 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 331 GenericTaintChecker::InvalidArgIndex); 332 } 333 }; 334 335 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 336 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 337 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 338 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 339 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 340 } 341 }; 342 343 template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 344 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 345 IO.mapRequired("Name", std::get<0>(NSA)); 346 IO.mapOptional("Scope", std::get<1>(NSA)); 347 IO.mapRequired("Args", std::get<2>(NSA)); 348 } 349 }; 350 } // namespace yaml 351 } // namespace llvm 352 353 /// A set which is used to pass information from call pre-visit instruction 354 /// to the call post-visit. The values are unsigned integers, which are either 355 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 356 /// points to data, which should be tainted on return. 357 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 358 359 GenericTaintChecker::ArgVector 360 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, 361 const std::string &Option, 362 const SignedArgVector &Args) { 363 ArgVector Result; 364 for (int Arg : Args) { 365 if (Arg == -1) 366 Result.push_back(ReturnValueIndex); 367 else if (Arg < -1) { 368 Result.push_back(InvalidArgIndex); 369 Mgr.reportInvalidCheckerOptionValue( 370 this, Option, 371 "an argument number for propagation rules greater or equal to -1"); 372 } else 373 Result.push_back(static_cast<unsigned>(Arg)); 374 } 375 return Result; 376 } 377 378 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 379 const std::string &Option, 380 TaintConfiguration &&Config) { 381 for (auto &P : Config.Propagations) { 382 GenericTaintChecker::CustomPropagations.emplace( 383 P.Name, 384 std::make_pair(P.Scope, TaintPropagationRule{ 385 std::move(P.SrcArgs), 386 convertToArgVector(Mgr, Option, P.DstArgs), 387 P.VarType, P.VarIndex})); 388 } 389 390 for (auto &F : Config.Filters) { 391 GenericTaintChecker::CustomFilters.emplace( 392 std::get<0>(F), 393 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 394 } 395 396 for (auto &S : Config.Sinks) { 397 GenericTaintChecker::CustomSinks.emplace( 398 std::get<0>(S), 399 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 400 } 401 } 402 403 template <typename T> 404 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 405 const FunctionData &FData) { 406 auto Range = Map.equal_range(std::string(FData.Name)); 407 auto It = 408 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 409 const auto &Value = Entry.second; 410 StringRef Scope = Value.first; 411 return Scope.empty() || FData.isInScope(Scope); 412 }); 413 return It != Range.second ? It : Map.end(); 414 } 415 416 GenericTaintChecker::TaintPropagationRule 417 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 418 const NameRuleMap &CustomPropagations, const FunctionData &FData, 419 CheckerContext &C) { 420 // TODO: Currently, we might lose precision here: we always mark a return 421 // value as tainted even if it's just a pointer, pointing to tainted data. 422 423 // Check for exact name match for functions without builtin substitutes. 424 // Use qualified name, because these are C functions without namespace. 425 TaintPropagationRule Rule = 426 llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 427 // Source functions 428 // TODO: Add support for vfscanf & family. 429 .Case("fdopen", {{}, {ReturnValueIndex}}) 430 .Case("fopen", {{}, {ReturnValueIndex}}) 431 .Case("freopen", {{}, {ReturnValueIndex}}) 432 .Case("getch", {{}, {ReturnValueIndex}}) 433 .Case("getchar", {{}, {ReturnValueIndex}}) 434 .Case("getchar_unlocked", {{}, {ReturnValueIndex}}) 435 .Case("getenv", {{}, {ReturnValueIndex}}) 436 .Case("gets", {{}, {0, ReturnValueIndex}}) 437 .Case("scanf", {{}, {}, VariadicType::Dst, 1}) 438 .Case("socket", {{}, 439 {ReturnValueIndex}, 440 VariadicType::None, 441 InvalidArgIndex, 442 &TaintPropagationRule::postSocket}) 443 .Case("wgetch", {{}, {ReturnValueIndex}}) 444 // Propagating functions 445 .Case("atoi", {{0}, {ReturnValueIndex}}) 446 .Case("atol", {{0}, {ReturnValueIndex}}) 447 .Case("atoll", {{0}, {ReturnValueIndex}}) 448 .Case("fgetc", {{0}, {ReturnValueIndex}}) 449 .Case("fgetln", {{0}, {ReturnValueIndex}}) 450 .Case("fgets", {{2}, {0, ReturnValueIndex}}) 451 .Case("fscanf", {{0}, {}, VariadicType::Dst, 2}) 452 .Case("sscanf", {{0}, {}, VariadicType::Dst, 2}) 453 .Case("getc", {{0}, {ReturnValueIndex}}) 454 .Case("getc_unlocked", {{0}, {ReturnValueIndex}}) 455 .Case("getdelim", {{3}, {0}}) 456 .Case("getline", {{2}, {0}}) 457 .Case("getw", {{0}, {ReturnValueIndex}}) 458 .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}}) 459 .Case("read", {{0, 2}, {1, ReturnValueIndex}}) 460 .Case("strchr", {{0}, {ReturnValueIndex}}) 461 .Case("strrchr", {{0}, {ReturnValueIndex}}) 462 .Case("tolower", {{0}, {ReturnValueIndex}}) 463 .Case("toupper", {{0}, {ReturnValueIndex}}) 464 .Default({}); 465 466 if (!Rule.isNull()) 467 return Rule; 468 assert(FData.FDecl); 469 470 // Check if it's one of the memory setting/copying functions. 471 // This check is specialized but faster then calling isCLibraryFunction. 472 const FunctionDecl *FDecl = FData.FDecl; 473 unsigned BId = 0; 474 if ((BId = FDecl->getMemoryFunctionKind())) { 475 switch (BId) { 476 case Builtin::BImemcpy: 477 case Builtin::BImemmove: 478 case Builtin::BIstrncpy: 479 case Builtin::BIstrncat: 480 return {{1, 2}, {0, ReturnValueIndex}}; 481 case Builtin::BIstrlcpy: 482 case Builtin::BIstrlcat: 483 return {{1, 2}, {0}}; 484 case Builtin::BIstrndup: 485 return {{0, 1}, {ReturnValueIndex}}; 486 487 default: 488 break; 489 } 490 } 491 492 // Process all other functions which could be defined as builtins. 493 if (Rule.isNull()) { 494 const auto OneOf = [FDecl](const auto &... Name) { 495 // FIXME: use fold expression in C++17 496 using unused = int[]; 497 bool ret = false; 498 static_cast<void>(unused{ 499 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...}); 500 return ret; 501 }; 502 if (OneOf("snprintf")) 503 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3}; 504 if (OneOf("sprintf")) 505 return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2}; 506 if (OneOf("strcpy", "stpcpy", "strcat")) 507 return {{1}, {0, ReturnValueIndex}}; 508 if (OneOf("bcopy")) 509 return {{0, 2}, {1}}; 510 if (OneOf("strdup", "strdupa", "wcsdup")) 511 return {{0}, {ReturnValueIndex}}; 512 } 513 514 // Skipping the following functions, since they might be used for cleansing or 515 // smart memory copy: 516 // - memccpy - copying until hitting a special character. 517 518 auto It = findFunctionInConfig(CustomPropagations, FData); 519 if (It != CustomPropagations.end()) 520 return It->second.second; 521 return {}; 522 } 523 524 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 525 CheckerContext &C) const { 526 Optional<FunctionData> FData = FunctionData::create(Call, C); 527 if (!FData) 528 return; 529 530 // Check for taintedness related errors first: system call, uncontrolled 531 // format string, tainted buffer size. 532 if (checkPre(Call, *FData, C)) 533 return; 534 535 // Marks the function's arguments and/or return value tainted if it present in 536 // the list. 537 if (addSourcesPre(Call, *FData, C)) 538 return; 539 540 addFiltersPre(Call, *FData, C); 541 } 542 543 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 544 CheckerContext &C) const { 545 // Set the marked values as tainted. The return value only accessible from 546 // checkPostStmt. 547 propagateFromPre(Call, C); 548 } 549 550 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 551 const char *NL, const char *Sep) const { 552 printTaint(State, Out, NL, Sep); 553 } 554 555 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, 556 const FunctionData &FData, 557 CheckerContext &C) const { 558 // First, try generating a propagation rule for this function. 559 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 560 this->CustomPropagations, FData, C); 561 if (!Rule.isNull()) { 562 ProgramStateRef State = Rule.process(Call, C); 563 if (State) { 564 C.addTransition(State); 565 return true; 566 } 567 } 568 return false; 569 } 570 571 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call, 572 const FunctionData &FData, 573 CheckerContext &C) const { 574 auto It = findFunctionInConfig(CustomFilters, FData); 575 if (It == CustomFilters.end()) 576 return false; 577 578 ProgramStateRef State = C.getState(); 579 const auto &Value = It->second; 580 const ArgVector &Args = Value.second; 581 for (unsigned ArgNum : Args) { 582 if (ArgNum >= Call.getNumArgs()) 583 continue; 584 585 const Expr *Arg = Call.getArgExpr(ArgNum); 586 Optional<SVal> V = getPointeeOf(C, Arg); 587 if (V) 588 State = removeTaint(State, *V); 589 } 590 591 if (State != C.getState()) { 592 C.addTransition(State); 593 return true; 594 } 595 return false; 596 } 597 598 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call, 599 CheckerContext &C) { 600 ProgramStateRef State = C.getState(); 601 602 // Depending on what was tainted at pre-visit, we determined a set of 603 // arguments which should be tainted after the function returns. These are 604 // stored in the state as TaintArgsOnPostVisit set. 605 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 606 if (TaintArgs.isEmpty()) 607 return false; 608 609 for (unsigned ArgNum : TaintArgs) { 610 // Special handling for the tainted return value. 611 if (ArgNum == ReturnValueIndex) { 612 State = addTaint(State, Call.getReturnValue()); 613 continue; 614 } 615 616 // The arguments are pointer arguments. The data they are pointing at is 617 // tainted after the call. 618 if (Call.getNumArgs() < (ArgNum + 1)) 619 return false; 620 const Expr *Arg = Call.getArgExpr(ArgNum); 621 Optional<SVal> V = getPointeeOf(C, Arg); 622 if (V) 623 State = addTaint(State, *V); 624 } 625 626 // Clear up the taint info from the state. 627 State = State->remove<TaintArgsOnPostVisit>(); 628 629 if (State != C.getState()) { 630 C.addTransition(State); 631 return true; 632 } 633 return false; 634 } 635 636 bool GenericTaintChecker::checkPre(const CallEvent &Call, 637 const FunctionData &FData, 638 CheckerContext &C) const { 639 if (checkUncontrolledFormatString(Call, C)) 640 return true; 641 642 if (checkSystemCall(Call, FData.Name, C)) 643 return true; 644 645 if (checkTaintedBufferSize(Call, C)) 646 return true; 647 648 return checkCustomSinks(Call, FData, C); 649 } 650 651 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C, 652 const Expr *Arg) { 653 ProgramStateRef State = C.getState(); 654 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 655 if (AddrVal.isUnknownOrUndef()) 656 return None; 657 658 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 659 if (!AddrLoc) 660 return None; 661 662 QualType ArgTy = Arg->getType().getCanonicalType(); 663 if (!ArgTy->isPointerType()) 664 return State->getSVal(*AddrLoc); 665 666 QualType ValTy = ArgTy->getPointeeType(); 667 668 // Do not dereference void pointers. Treat them as byte pointers instead. 669 // FIXME: we might want to consider more than just the first byte. 670 if (ValTy->isVoidType()) 671 ValTy = C.getASTContext().CharTy; 672 673 return State->getSVal(*AddrLoc, ValTy); 674 } 675 676 ProgramStateRef 677 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call, 678 CheckerContext &C) const { 679 ProgramStateRef State = C.getState(); 680 681 // Check for taint in arguments. 682 bool IsTainted = true; 683 for (unsigned ArgNum : SrcArgs) { 684 if (ArgNum >= Call.getNumArgs()) 685 continue; 686 687 if ((IsTainted = 688 isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) 689 break; 690 } 691 692 // Check for taint in variadic arguments. 693 if (!IsTainted && VariadicType::Src == VarType) { 694 // Check if any of the arguments is tainted 695 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 696 if ((IsTainted = 697 isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) 698 break; 699 } 700 } 701 702 if (PropagationFunc) 703 IsTainted = PropagationFunc(IsTainted, Call, C); 704 705 if (!IsTainted) 706 return State; 707 708 // Mark the arguments which should be tainted after the function returns. 709 for (unsigned ArgNum : DstArgs) { 710 // Should mark the return value? 711 if (ArgNum == ReturnValueIndex) { 712 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 713 continue; 714 } 715 716 if (ArgNum >= Call.getNumArgs()) 717 continue; 718 719 // Mark the given argument. 720 State = State->add<TaintArgsOnPostVisit>(ArgNum); 721 } 722 723 // Mark all variadic arguments tainted if present. 724 if (VariadicType::Dst == VarType) { 725 // For all pointer and references that were passed in: 726 // If they are not pointing to const data, mark data as tainted. 727 // TODO: So far we are just going one level down; ideally we'd need to 728 // recurse here. 729 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 730 const Expr *Arg = Call.getArgExpr(i); 731 // Process pointer argument. 732 const Type *ArgTy = Arg->getType().getTypePtr(); 733 QualType PType = ArgTy->getPointeeType(); 734 if ((!PType.isNull() && !PType.isConstQualified()) || 735 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) { 736 State = State->add<TaintArgsOnPostVisit>(i); 737 } 738 } 739 } 740 741 return State; 742 } 743 744 // If argument 0(protocol domain) is network, the return value should get taint. 745 bool GenericTaintChecker::TaintPropagationRule::postSocket( 746 bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { 747 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 748 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 749 // White list the internal communication protocols. 750 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 751 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 752 return false; 753 return true; 754 } 755 756 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 757 ProgramStateRef State = C.getState(); 758 SVal Val = C.getSVal(E); 759 760 // stdin is a pointer, so it would be a region. 761 const MemRegion *MemReg = Val.getAsRegion(); 762 763 // The region should be symbolic, we do not know it's value. 764 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 765 if (!SymReg) 766 return false; 767 768 // Get it's symbol and find the declaration region it's pointing to. 769 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 770 if (!Sm) 771 return false; 772 const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 773 if (!DeclReg) 774 return false; 775 776 // This region corresponds to a declaration, find out if it's a global/extern 777 // variable named stdin with the proper type. 778 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 779 D = D->getCanonicalDecl(); 780 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 781 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 782 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 783 C.getASTContext().getFILEType().getCanonicalType()) 784 return true; 785 } 786 } 787 return false; 788 } 789 790 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 791 const CheckerContext &C, 792 unsigned &ArgNum) { 793 // Find if the function contains a format string argument. 794 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 795 // vsnprintf, syslog, custom annotated functions. 796 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 797 if (!FDecl) 798 return false; 799 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 800 ArgNum = Format->getFormatIdx() - 1; 801 if ((Format->getType()->getName() == "printf") && 802 Call.getNumArgs() > ArgNum) 803 return true; 804 } 805 806 // Or if a function is named setproctitle (this is a heuristic). 807 if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) { 808 ArgNum = 0; 809 return true; 810 } 811 812 return false; 813 } 814 815 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 816 CheckerContext &C) const { 817 assert(E); 818 819 // Check for taint. 820 ProgramStateRef State = C.getState(); 821 Optional<SVal> PointedToSVal = getPointeeOf(C, E); 822 SVal TaintedSVal; 823 if (PointedToSVal && isTainted(State, *PointedToSVal)) 824 TaintedSVal = *PointedToSVal; 825 else if (isTainted(State, E, C.getLocationContext())) 826 TaintedSVal = C.getSVal(E); 827 else 828 return false; 829 830 // Generate diagnostic. 831 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 832 initBugType(); 833 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 834 report->addRange(E->getSourceRange()); 835 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 836 C.emitReport(std::move(report)); 837 return true; 838 } 839 return false; 840 } 841 842 bool GenericTaintChecker::checkUncontrolledFormatString( 843 const CallEvent &Call, CheckerContext &C) const { 844 // Check if the function contains a format string argument. 845 unsigned ArgNum = 0; 846 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 847 return false; 848 849 // If either the format string content or the pointer itself are tainted, 850 // warn. 851 return generateReportIfTainted(Call.getArgExpr(ArgNum), 852 MsgUncontrolledFormatString, C); 853 } 854 855 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name, 856 CheckerContext &C) const { 857 // TODO: It might make sense to run this check on demand. In some cases, 858 // we should check if the environment has been cleansed here. We also might 859 // need to know if the user was reset before these calls(seteuid). 860 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 861 .Case("system", 0) 862 .Case("popen", 0) 863 .Case("execl", 0) 864 .Case("execle", 0) 865 .Case("execlp", 0) 866 .Case("execv", 0) 867 .Case("execvp", 0) 868 .Case("execvP", 0) 869 .Case("execve", 0) 870 .Case("dlopen", 0) 871 .Default(InvalidArgIndex); 872 873 if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) 874 return false; 875 876 return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, 877 C); 878 } 879 880 // TODO: Should this check be a part of the CString checker? 881 // If yes, should taint be a global setting? 882 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call, 883 CheckerContext &C) const { 884 const auto *FDecl = Call.getDecl()->getAsFunction(); 885 // If the function has a buffer size argument, set ArgNum. 886 unsigned ArgNum = InvalidArgIndex; 887 unsigned BId = 0; 888 if ((BId = FDecl->getMemoryFunctionKind())) { 889 switch (BId) { 890 case Builtin::BImemcpy: 891 case Builtin::BImemmove: 892 case Builtin::BIstrncpy: 893 ArgNum = 2; 894 break; 895 case Builtin::BIstrndup: 896 ArgNum = 1; 897 break; 898 default: 899 break; 900 } 901 } 902 903 if (ArgNum == InvalidArgIndex) { 904 using CCtx = CheckerContext; 905 if (CCtx::isCLibraryFunction(FDecl, "malloc") || 906 CCtx::isCLibraryFunction(FDecl, "calloc") || 907 CCtx::isCLibraryFunction(FDecl, "alloca")) 908 ArgNum = 0; 909 else if (CCtx::isCLibraryFunction(FDecl, "memccpy")) 910 ArgNum = 3; 911 else if (CCtx::isCLibraryFunction(FDecl, "realloc")) 912 ArgNum = 1; 913 else if (CCtx::isCLibraryFunction(FDecl, "bcopy")) 914 ArgNum = 2; 915 } 916 917 return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && 918 generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, 919 C); 920 } 921 922 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, 923 const FunctionData &FData, 924 CheckerContext &C) const { 925 auto It = findFunctionInConfig(CustomSinks, FData); 926 if (It == CustomSinks.end()) 927 return false; 928 929 const auto &Value = It->second; 930 const GenericTaintChecker::ArgVector &Args = Value.second; 931 for (unsigned ArgNum : Args) { 932 if (ArgNum >= Call.getNumArgs()) 933 continue; 934 935 if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) 936 return true; 937 } 938 939 return false; 940 } 941 942 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 943 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 944 std::string Option{"Config"}; 945 StringRef ConfigFile = 946 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 947 llvm::Optional<TaintConfig> Config = 948 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 949 if (Config) 950 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 951 } 952 953 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 954 return true; 955 } 956