1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 28 #include "llvm/Support/YAMLTraits.h" 29 30 #include <algorithm> 31 #include <limits> 32 #include <memory> 33 #include <unordered_map> 34 #include <utility> 35 36 using namespace clang; 37 using namespace ento; 38 using namespace taint; 39 40 namespace { 41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 42 public: 43 static void *getTag() { 44 static int Tag; 45 return &Tag; 46 } 47 48 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 49 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 50 51 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 52 const char *Sep) const override; 53 54 using ArgVector = SmallVector<unsigned, 2>; 55 using SignedArgVector = SmallVector<int, 2>; 56 57 enum class VariadicType { None, Src, Dst }; 58 59 /// Used to parse the configuration file. 60 struct TaintConfiguration { 61 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 62 63 struct Propagation { 64 std::string Name; 65 std::string Scope; 66 ArgVector SrcArgs; 67 SignedArgVector DstArgs; 68 VariadicType VarType; 69 unsigned VarIndex; 70 }; 71 72 std::vector<Propagation> Propagations; 73 std::vector<NameScopeArgs> Filters; 74 std::vector<NameScopeArgs> Sinks; 75 76 TaintConfiguration() = default; 77 TaintConfiguration(const TaintConfiguration &) = default; 78 TaintConfiguration(TaintConfiguration &&) = default; 79 TaintConfiguration &operator=(const TaintConfiguration &) = default; 80 TaintConfiguration &operator=(TaintConfiguration &&) = default; 81 }; 82 83 /// Convert SignedArgVector to ArgVector. 84 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 85 const SignedArgVector &Args); 86 87 /// Parse the config. 88 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 89 TaintConfiguration &&Config); 90 91 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 92 /// Denotes the return vale. 93 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 94 1}; 95 96 private: 97 mutable std::unique_ptr<BugType> BT; 98 void initBugType() const { 99 if (!BT) 100 BT = std::make_unique<BugType>(this, "Use of Untrusted Data", 101 "Untrusted Data"); 102 } 103 104 struct FunctionData { 105 FunctionData() = delete; 106 FunctionData(const FunctionDecl *FDecl, StringRef Name, 107 std::string FullName) 108 : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {} 109 FunctionData(const FunctionData &) = default; 110 FunctionData(FunctionData &&) = default; 111 FunctionData &operator=(const FunctionData &) = delete; 112 FunctionData &operator=(FunctionData &&) = delete; 113 114 static Optional<FunctionData> create(const CallEvent &Call, 115 const CheckerContext &C) { 116 if (!Call.getDecl()) 117 return None; 118 119 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 120 if (!FDecl || (FDecl->getKind() != Decl::Function && 121 FDecl->getKind() != Decl::CXXMethod)) 122 return None; 123 124 StringRef Name = C.getCalleeName(FDecl); 125 std::string FullName = FDecl->getQualifiedNameAsString(); 126 if (Name.empty() || FullName.empty()) 127 return None; 128 129 return FunctionData{FDecl, Name, std::move(FullName)}; 130 } 131 132 bool isInScope(StringRef Scope) const { 133 return StringRef(FullName).startswith(Scope); 134 } 135 136 const FunctionDecl *const FDecl; 137 const StringRef Name; 138 const std::string FullName; 139 }; 140 141 /// Catch taint related bugs. Check if tainted data is passed to a 142 /// system call etc. Returns true on matching. 143 bool checkPre(const CallEvent &Call, const FunctionData &FData, 144 CheckerContext &C) const; 145 146 /// Add taint sources on a pre-visit. Returns true on matching. 147 bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, 148 CheckerContext &C) const; 149 150 /// Mark filter's arguments not tainted on a pre-visit. Returns true on 151 /// matching. 152 bool addFiltersPre(const CallEvent &Call, const FunctionData &FData, 153 CheckerContext &C) const; 154 155 /// Propagate taint generated at pre-visit. Returns true on matching. 156 static bool propagateFromPre(const CallEvent &Call, CheckerContext &C); 157 158 /// Check if the region the expression evaluates to is the standard input, 159 /// and thus, is tainted. 160 static bool isStdin(const Expr *E, CheckerContext &C); 161 162 /// Given a pointer argument, return the value it points to. 163 static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg); 164 165 /// Check for CWE-134: Uncontrolled Format String. 166 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 167 "Untrusted data is used as a format string " 168 "(CWE-134: Uncontrolled Format String)"; 169 bool checkUncontrolledFormatString(const CallEvent &Call, 170 CheckerContext &C) const; 171 172 /// Check for: 173 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 174 /// CWE-78, "Failure to Sanitize Data into an OS Command" 175 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 176 "Untrusted data is passed to a system call " 177 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 178 bool checkSystemCall(const CallEvent &Call, StringRef Name, 179 CheckerContext &C) const; 180 181 /// Check if tainted data is used as a buffer size ins strn.. functions, 182 /// and allocators. 183 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 184 "Untrusted data is used to specify the buffer size " 185 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 186 "for character data and the null terminator)"; 187 bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const; 188 189 /// Check if tainted data is used as a custom sink's parameter. 190 static constexpr llvm::StringLiteral MsgCustomSink = 191 "Untrusted data is passed to a user-defined sink"; 192 bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData, 193 CheckerContext &C) const; 194 195 /// Generate a report if the expression is tainted or points to tainted data. 196 bool generateReportIfTainted(const Expr *E, StringRef Msg, 197 CheckerContext &C) const; 198 199 struct TaintPropagationRule; 200 template <typename T> 201 using ConfigDataMap = 202 std::unordered_multimap<std::string, std::pair<std::string, T>>; 203 using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 204 using NameArgMap = ConfigDataMap<ArgVector>; 205 206 /// Find a function with the given name and scope. Returns the first match 207 /// or the end of the map. 208 template <typename T> 209 static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 210 const FunctionData &FData); 211 212 /// A struct used to specify taint propagation rules for a function. 213 /// 214 /// If any of the possible taint source arguments is tainted, all of the 215 /// destination arguments should also be tainted. Use InvalidArgIndex in the 216 /// src list to specify that all of the arguments can introduce taint. Use 217 /// InvalidArgIndex in the dst arguments to signify that all the non-const 218 /// pointer and reference arguments might be tainted on return. If 219 /// ReturnValueIndex is added to the dst list, the return value will be 220 /// tainted. 221 struct TaintPropagationRule { 222 using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call, 223 CheckerContext &C); 224 225 /// List of arguments which can be taint sources and should be checked. 226 ArgVector SrcArgs; 227 /// List of arguments which should be tainted on function return. 228 ArgVector DstArgs; 229 /// Index for the first variadic parameter if exist. 230 unsigned VariadicIndex; 231 /// Show when a function has variadic parameters. If it has, it marks all 232 /// of them as source or destination. 233 VariadicType VarType; 234 /// Special function for tainted source determination. If defined, it can 235 /// override the default behavior. 236 PropagationFuncType PropagationFunc; 237 238 TaintPropagationRule() 239 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 240 PropagationFunc(nullptr) {} 241 242 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 243 VariadicType Var = VariadicType::None, 244 unsigned VarIndex = InvalidArgIndex, 245 PropagationFuncType Func = nullptr) 246 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 247 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 248 249 /// Get the propagation rule for a given function. 250 static TaintPropagationRule 251 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 252 const FunctionData &FData, CheckerContext &C); 253 254 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 255 void addDstArg(unsigned A) { DstArgs.push_back(A); } 256 257 bool isNull() const { 258 return SrcArgs.empty() && DstArgs.empty() && 259 VariadicType::None == VarType; 260 } 261 262 bool isDestinationArgument(unsigned ArgNum) const { 263 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 264 } 265 266 static bool isTaintedOrPointsToTainted(const Expr *E, 267 const ProgramStateRef &State, 268 CheckerContext &C) { 269 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 270 return true; 271 272 if (!E->getType().getTypePtr()->isPointerType()) 273 return false; 274 275 Optional<SVal> V = getPointeeOf(C, E); 276 return (V && isTainted(State, *V)); 277 } 278 279 /// Pre-process a function which propagates taint according to the 280 /// taint rule. 281 ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const; 282 283 // Functions for custom taintedness propagation. 284 static bool postSocket(bool IsTainted, const CallEvent &Call, 285 CheckerContext &C); 286 }; 287 288 /// Defines a map between the propagation function's name, scope 289 /// and TaintPropagationRule. 290 NameRuleMap CustomPropagations; 291 292 /// Defines a map between the filter function's name, scope and filtering 293 /// args. 294 NameArgMap CustomFilters; 295 296 /// Defines a map between the sink function's name, scope and sinking args. 297 NameArgMap CustomSinks; 298 }; 299 300 const unsigned GenericTaintChecker::ReturnValueIndex; 301 const unsigned GenericTaintChecker::InvalidArgIndex; 302 303 // FIXME: these lines can be removed in C++17 304 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 305 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 306 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 307 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 308 } // end of anonymous namespace 309 310 using TaintConfig = GenericTaintChecker::TaintConfiguration; 311 312 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 313 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 314 315 namespace llvm { 316 namespace yaml { 317 template <> struct MappingTraits<TaintConfig> { 318 static void mapping(IO &IO, TaintConfig &Config) { 319 IO.mapOptional("Propagations", Config.Propagations); 320 IO.mapOptional("Filters", Config.Filters); 321 IO.mapOptional("Sinks", Config.Sinks); 322 } 323 }; 324 325 template <> struct MappingTraits<TaintConfig::Propagation> { 326 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 327 IO.mapRequired("Name", Propagation.Name); 328 IO.mapOptional("Scope", Propagation.Scope); 329 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 330 IO.mapOptional("DstArgs", Propagation.DstArgs); 331 IO.mapOptional("VariadicType", Propagation.VarType, 332 GenericTaintChecker::VariadicType::None); 333 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 334 GenericTaintChecker::InvalidArgIndex); 335 } 336 }; 337 338 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 339 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 340 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 341 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 342 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 343 } 344 }; 345 346 template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 347 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 348 IO.mapRequired("Name", std::get<0>(NSA)); 349 IO.mapOptional("Scope", std::get<1>(NSA)); 350 IO.mapRequired("Args", std::get<2>(NSA)); 351 } 352 }; 353 } // namespace yaml 354 } // namespace llvm 355 356 /// A set which is used to pass information from call pre-visit instruction 357 /// to the call post-visit. The values are unsigned integers, which are either 358 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 359 /// points to data, which should be tainted on return. 360 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 361 362 GenericTaintChecker::ArgVector 363 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, 364 const std::string &Option, 365 const SignedArgVector &Args) { 366 ArgVector Result; 367 for (int Arg : Args) { 368 if (Arg == -1) 369 Result.push_back(ReturnValueIndex); 370 else if (Arg < -1) { 371 Result.push_back(InvalidArgIndex); 372 Mgr.reportInvalidCheckerOptionValue( 373 this, Option, 374 "an argument number for propagation rules greater or equal to -1"); 375 } else 376 Result.push_back(static_cast<unsigned>(Arg)); 377 } 378 return Result; 379 } 380 381 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 382 const std::string &Option, 383 TaintConfiguration &&Config) { 384 for (auto &P : Config.Propagations) { 385 GenericTaintChecker::CustomPropagations.emplace( 386 P.Name, 387 std::make_pair(P.Scope, TaintPropagationRule{ 388 std::move(P.SrcArgs), 389 convertToArgVector(Mgr, Option, P.DstArgs), 390 P.VarType, P.VarIndex})); 391 } 392 393 for (auto &F : Config.Filters) { 394 GenericTaintChecker::CustomFilters.emplace( 395 std::get<0>(F), 396 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 397 } 398 399 for (auto &S : Config.Sinks) { 400 GenericTaintChecker::CustomSinks.emplace( 401 std::get<0>(S), 402 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 403 } 404 } 405 406 template <typename T> 407 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 408 const FunctionData &FData) { 409 auto Range = Map.equal_range(std::string(FData.Name)); 410 auto It = 411 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 412 const auto &Value = Entry.second; 413 StringRef Scope = Value.first; 414 return Scope.empty() || FData.isInScope(Scope); 415 }); 416 return It != Range.second ? It : Map.end(); 417 } 418 419 GenericTaintChecker::TaintPropagationRule 420 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 421 const NameRuleMap &CustomPropagations, const FunctionData &FData, 422 CheckerContext &C) { 423 // TODO: Currently, we might lose precision here: we always mark a return 424 // value as tainted even if it's just a pointer, pointing to tainted data. 425 426 // Check for exact name match for functions without builtin substitutes. 427 // Use qualified name, because these are C functions without namespace. 428 TaintPropagationRule Rule = 429 llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 430 // Source functions 431 // TODO: Add support for vfscanf & family. 432 .Case("fdopen", {{}, {ReturnValueIndex}}) 433 .Case("fopen", {{}, {ReturnValueIndex}}) 434 .Case("freopen", {{}, {ReturnValueIndex}}) 435 .Case("getch", {{}, {ReturnValueIndex}}) 436 .Case("getchar", {{}, {ReturnValueIndex}}) 437 .Case("getchar_unlocked", {{}, {ReturnValueIndex}}) 438 .Case("getenv", {{}, {ReturnValueIndex}}) 439 .Case("gets", {{}, {0, ReturnValueIndex}}) 440 .Case("scanf", {{}, {}, VariadicType::Dst, 1}) 441 .Case("socket", {{}, 442 {ReturnValueIndex}, 443 VariadicType::None, 444 InvalidArgIndex, 445 &TaintPropagationRule::postSocket}) 446 .Case("wgetch", {{}, {ReturnValueIndex}}) 447 // Propagating functions 448 .Case("atoi", {{0}, {ReturnValueIndex}}) 449 .Case("atol", {{0}, {ReturnValueIndex}}) 450 .Case("atoll", {{0}, {ReturnValueIndex}}) 451 .Case("fgetc", {{0}, {ReturnValueIndex}}) 452 .Case("fgetln", {{0}, {ReturnValueIndex}}) 453 .Case("fgets", {{2}, {0, ReturnValueIndex}}) 454 .Case("fscanf", {{0}, {}, VariadicType::Dst, 2}) 455 .Case("sscanf", {{0}, {}, VariadicType::Dst, 2}) 456 .Case("getc", {{0}, {ReturnValueIndex}}) 457 .Case("getc_unlocked", {{0}, {ReturnValueIndex}}) 458 .Case("getdelim", {{3}, {0}}) 459 .Case("getline", {{2}, {0}}) 460 .Case("getw", {{0}, {ReturnValueIndex}}) 461 .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}}) 462 .Case("read", {{0, 2}, {1, ReturnValueIndex}}) 463 .Case("strchr", {{0}, {ReturnValueIndex}}) 464 .Case("strrchr", {{0}, {ReturnValueIndex}}) 465 .Case("tolower", {{0}, {ReturnValueIndex}}) 466 .Case("toupper", {{0}, {ReturnValueIndex}}) 467 .Default({}); 468 469 if (!Rule.isNull()) 470 return Rule; 471 assert(FData.FDecl); 472 473 // Check if it's one of the memory setting/copying functions. 474 // This check is specialized but faster then calling isCLibraryFunction. 475 const FunctionDecl *FDecl = FData.FDecl; 476 unsigned BId = 0; 477 if ((BId = FDecl->getMemoryFunctionKind())) { 478 switch (BId) { 479 case Builtin::BImemcpy: 480 case Builtin::BImemmove: 481 case Builtin::BIstrncpy: 482 case Builtin::BIstrncat: 483 return {{1, 2}, {0, ReturnValueIndex}}; 484 case Builtin::BIstrlcpy: 485 case Builtin::BIstrlcat: 486 return {{1, 2}, {0}}; 487 case Builtin::BIstrndup: 488 return {{0, 1}, {ReturnValueIndex}}; 489 490 default: 491 break; 492 } 493 } 494 495 // Process all other functions which could be defined as builtins. 496 if (Rule.isNull()) { 497 const auto OneOf = [FDecl](const auto &... Name) { 498 // FIXME: use fold expression in C++17 499 using unused = int[]; 500 bool ret = false; 501 static_cast<void>(unused{ 502 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...}); 503 return ret; 504 }; 505 if (OneOf("snprintf")) 506 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3}; 507 if (OneOf("sprintf")) 508 return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2}; 509 if (OneOf("strcpy", "stpcpy", "strcat")) 510 return {{1}, {0, ReturnValueIndex}}; 511 if (OneOf("bcopy")) 512 return {{0, 2}, {1}}; 513 if (OneOf("strdup", "strdupa", "wcsdup")) 514 return {{0}, {ReturnValueIndex}}; 515 } 516 517 // Skipping the following functions, since they might be used for cleansing or 518 // smart memory copy: 519 // - memccpy - copying until hitting a special character. 520 521 auto It = findFunctionInConfig(CustomPropagations, FData); 522 if (It != CustomPropagations.end()) 523 return It->second.second; 524 return {}; 525 } 526 527 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 528 CheckerContext &C) const { 529 Optional<FunctionData> FData = FunctionData::create(Call, C); 530 if (!FData) 531 return; 532 533 // Check for taintedness related errors first: system call, uncontrolled 534 // format string, tainted buffer size. 535 if (checkPre(Call, *FData, C)) 536 return; 537 538 // Marks the function's arguments and/or return value tainted if it present in 539 // the list. 540 if (addSourcesPre(Call, *FData, C)) 541 return; 542 543 addFiltersPre(Call, *FData, C); 544 } 545 546 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 547 CheckerContext &C) const { 548 // Set the marked values as tainted. The return value only accessible from 549 // checkPostStmt. 550 propagateFromPre(Call, C); 551 } 552 553 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 554 const char *NL, const char *Sep) const { 555 printTaint(State, Out, NL, Sep); 556 } 557 558 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, 559 const FunctionData &FData, 560 CheckerContext &C) const { 561 // First, try generating a propagation rule for this function. 562 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 563 this->CustomPropagations, FData, C); 564 if (!Rule.isNull()) { 565 ProgramStateRef State = Rule.process(Call, C); 566 if (State) { 567 C.addTransition(State); 568 return true; 569 } 570 } 571 return false; 572 } 573 574 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call, 575 const FunctionData &FData, 576 CheckerContext &C) const { 577 auto It = findFunctionInConfig(CustomFilters, FData); 578 if (It == CustomFilters.end()) 579 return false; 580 581 ProgramStateRef State = C.getState(); 582 const auto &Value = It->second; 583 const ArgVector &Args = Value.second; 584 for (unsigned ArgNum : Args) { 585 if (ArgNum >= Call.getNumArgs()) 586 continue; 587 588 const Expr *Arg = Call.getArgExpr(ArgNum); 589 Optional<SVal> V = getPointeeOf(C, Arg); 590 if (V) 591 State = removeTaint(State, *V); 592 } 593 594 if (State != C.getState()) { 595 C.addTransition(State); 596 return true; 597 } 598 return false; 599 } 600 601 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call, 602 CheckerContext &C) { 603 ProgramStateRef State = C.getState(); 604 605 // Depending on what was tainted at pre-visit, we determined a set of 606 // arguments which should be tainted after the function returns. These are 607 // stored in the state as TaintArgsOnPostVisit set. 608 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 609 if (TaintArgs.isEmpty()) 610 return false; 611 612 for (unsigned ArgNum : TaintArgs) { 613 // Special handling for the tainted return value. 614 if (ArgNum == ReturnValueIndex) { 615 State = addTaint(State, Call.getReturnValue()); 616 continue; 617 } 618 619 // The arguments are pointer arguments. The data they are pointing at is 620 // tainted after the call. 621 if (Call.getNumArgs() < (ArgNum + 1)) 622 return false; 623 const Expr *Arg = Call.getArgExpr(ArgNum); 624 Optional<SVal> V = getPointeeOf(C, Arg); 625 if (V) 626 State = addTaint(State, *V); 627 } 628 629 // Clear up the taint info from the state. 630 State = State->remove<TaintArgsOnPostVisit>(); 631 632 if (State != C.getState()) { 633 C.addTransition(State); 634 return true; 635 } 636 return false; 637 } 638 639 bool GenericTaintChecker::checkPre(const CallEvent &Call, 640 const FunctionData &FData, 641 CheckerContext &C) const { 642 if (checkUncontrolledFormatString(Call, C)) 643 return true; 644 645 if (checkSystemCall(Call, FData.Name, C)) 646 return true; 647 648 if (checkTaintedBufferSize(Call, C)) 649 return true; 650 651 return checkCustomSinks(Call, FData, C); 652 } 653 654 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C, 655 const Expr *Arg) { 656 ProgramStateRef State = C.getState(); 657 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 658 if (AddrVal.isUnknownOrUndef()) 659 return None; 660 661 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 662 if (!AddrLoc) 663 return None; 664 665 QualType ArgTy = Arg->getType().getCanonicalType(); 666 if (!ArgTy->isPointerType()) 667 return State->getSVal(*AddrLoc); 668 669 QualType ValTy = ArgTy->getPointeeType(); 670 671 // Do not dereference void pointers. Treat them as byte pointers instead. 672 // FIXME: we might want to consider more than just the first byte. 673 if (ValTy->isVoidType()) 674 ValTy = C.getASTContext().CharTy; 675 676 return State->getSVal(*AddrLoc, ValTy); 677 } 678 679 ProgramStateRef 680 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call, 681 CheckerContext &C) const { 682 ProgramStateRef State = C.getState(); 683 684 // Check for taint in arguments. 685 bool IsTainted = true; 686 for (unsigned ArgNum : SrcArgs) { 687 if (ArgNum >= Call.getNumArgs()) 688 continue; 689 690 if ((IsTainted = 691 isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) 692 break; 693 } 694 695 // Check for taint in variadic arguments. 696 if (!IsTainted && VariadicType::Src == VarType) { 697 // Check if any of the arguments is tainted 698 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 699 if ((IsTainted = 700 isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) 701 break; 702 } 703 } 704 705 if (PropagationFunc) 706 IsTainted = PropagationFunc(IsTainted, Call, C); 707 708 if (!IsTainted) 709 return State; 710 711 // Mark the arguments which should be tainted after the function returns. 712 for (unsigned ArgNum : DstArgs) { 713 // Should mark the return value? 714 if (ArgNum == ReturnValueIndex) { 715 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 716 continue; 717 } 718 719 if (ArgNum >= Call.getNumArgs()) 720 continue; 721 722 // Mark the given argument. 723 State = State->add<TaintArgsOnPostVisit>(ArgNum); 724 } 725 726 // Mark all variadic arguments tainted if present. 727 if (VariadicType::Dst == VarType) { 728 // For all pointer and references that were passed in: 729 // If they are not pointing to const data, mark data as tainted. 730 // TODO: So far we are just going one level down; ideally we'd need to 731 // recurse here. 732 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 733 const Expr *Arg = Call.getArgExpr(i); 734 // Process pointer argument. 735 const Type *ArgTy = Arg->getType().getTypePtr(); 736 QualType PType = ArgTy->getPointeeType(); 737 if ((!PType.isNull() && !PType.isConstQualified()) || 738 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) { 739 State = State->add<TaintArgsOnPostVisit>(i); 740 } 741 } 742 } 743 744 return State; 745 } 746 747 // If argument 0(protocol domain) is network, the return value should get taint. 748 bool GenericTaintChecker::TaintPropagationRule::postSocket( 749 bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { 750 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 751 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 752 // White list the internal communication protocols. 753 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 754 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 755 return false; 756 return true; 757 } 758 759 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 760 ProgramStateRef State = C.getState(); 761 SVal Val = C.getSVal(E); 762 763 // stdin is a pointer, so it would be a region. 764 const MemRegion *MemReg = Val.getAsRegion(); 765 766 // The region should be symbolic, we do not know it's value. 767 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 768 if (!SymReg) 769 return false; 770 771 // Get it's symbol and find the declaration region it's pointing to. 772 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 773 if (!Sm) 774 return false; 775 const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 776 if (!DeclReg) 777 return false; 778 779 // This region corresponds to a declaration, find out if it's a global/extern 780 // variable named stdin with the proper type. 781 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 782 D = D->getCanonicalDecl(); 783 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 784 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 785 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 786 C.getASTContext().getFILEType().getCanonicalType()) 787 return true; 788 } 789 } 790 return false; 791 } 792 793 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 794 const CheckerContext &C, 795 unsigned &ArgNum) { 796 // Find if the function contains a format string argument. 797 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 798 // vsnprintf, syslog, custom annotated functions. 799 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 800 if (!FDecl) 801 return false; 802 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 803 ArgNum = Format->getFormatIdx() - 1; 804 if ((Format->getType()->getName() == "printf") && 805 Call.getNumArgs() > ArgNum) 806 return true; 807 } 808 809 // Or if a function is named setproctitle (this is a heuristic). 810 if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) { 811 ArgNum = 0; 812 return true; 813 } 814 815 return false; 816 } 817 818 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 819 CheckerContext &C) const { 820 assert(E); 821 822 // Check for taint. 823 ProgramStateRef State = C.getState(); 824 Optional<SVal> PointedToSVal = getPointeeOf(C, E); 825 SVal TaintedSVal; 826 if (PointedToSVal && isTainted(State, *PointedToSVal)) 827 TaintedSVal = *PointedToSVal; 828 else if (isTainted(State, E, C.getLocationContext())) 829 TaintedSVal = C.getSVal(E); 830 else 831 return false; 832 833 // Generate diagnostic. 834 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 835 initBugType(); 836 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 837 report->addRange(E->getSourceRange()); 838 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 839 C.emitReport(std::move(report)); 840 return true; 841 } 842 return false; 843 } 844 845 bool GenericTaintChecker::checkUncontrolledFormatString( 846 const CallEvent &Call, CheckerContext &C) const { 847 // Check if the function contains a format string argument. 848 unsigned ArgNum = 0; 849 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 850 return false; 851 852 // If either the format string content or the pointer itself are tainted, 853 // warn. 854 return generateReportIfTainted(Call.getArgExpr(ArgNum), 855 MsgUncontrolledFormatString, C); 856 } 857 858 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name, 859 CheckerContext &C) const { 860 // TODO: It might make sense to run this check on demand. In some cases, 861 // we should check if the environment has been cleansed here. We also might 862 // need to know if the user was reset before these calls(seteuid). 863 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 864 .Case("system", 0) 865 .Case("popen", 0) 866 .Case("execl", 0) 867 .Case("execle", 0) 868 .Case("execlp", 0) 869 .Case("execv", 0) 870 .Case("execvp", 0) 871 .Case("execvP", 0) 872 .Case("execve", 0) 873 .Case("dlopen", 0) 874 .Default(InvalidArgIndex); 875 876 if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) 877 return false; 878 879 return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, 880 C); 881 } 882 883 // TODO: Should this check be a part of the CString checker? 884 // If yes, should taint be a global setting? 885 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call, 886 CheckerContext &C) const { 887 const auto *FDecl = Call.getDecl()->getAsFunction(); 888 // If the function has a buffer size argument, set ArgNum. 889 unsigned ArgNum = InvalidArgIndex; 890 unsigned BId = 0; 891 if ((BId = FDecl->getMemoryFunctionKind())) { 892 switch (BId) { 893 case Builtin::BImemcpy: 894 case Builtin::BImemmove: 895 case Builtin::BIstrncpy: 896 ArgNum = 2; 897 break; 898 case Builtin::BIstrndup: 899 ArgNum = 1; 900 break; 901 default: 902 break; 903 } 904 } 905 906 if (ArgNum == InvalidArgIndex) { 907 using CCtx = CheckerContext; 908 if (CCtx::isCLibraryFunction(FDecl, "malloc") || 909 CCtx::isCLibraryFunction(FDecl, "calloc") || 910 CCtx::isCLibraryFunction(FDecl, "alloca")) 911 ArgNum = 0; 912 else if (CCtx::isCLibraryFunction(FDecl, "memccpy")) 913 ArgNum = 3; 914 else if (CCtx::isCLibraryFunction(FDecl, "realloc")) 915 ArgNum = 1; 916 else if (CCtx::isCLibraryFunction(FDecl, "bcopy")) 917 ArgNum = 2; 918 } 919 920 return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && 921 generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, 922 C); 923 } 924 925 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, 926 const FunctionData &FData, 927 CheckerContext &C) const { 928 auto It = findFunctionInConfig(CustomSinks, FData); 929 if (It == CustomSinks.end()) 930 return false; 931 932 const auto &Value = It->second; 933 const GenericTaintChecker::ArgVector &Args = Value.second; 934 for (unsigned ArgNum : Args) { 935 if (ArgNum >= Call.getNumArgs()) 936 continue; 937 938 if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) 939 return true; 940 } 941 942 return false; 943 } 944 945 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 946 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 947 std::string Option{"Config"}; 948 StringRef ConfigFile = 949 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 950 llvm::Optional<TaintConfig> Config = 951 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 952 if (Config) 953 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 954 } 955 956 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 957 return true; 958 } 959