1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 27 #include "llvm/ADT/StringMap.h" 28 #include "llvm/Support/YAMLTraits.h" 29 #include <limits> 30 #include <utility> 31 32 using namespace clang; 33 using namespace ento; 34 using namespace taint; 35 36 namespace { 37 class GenericTaintChecker 38 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> { 39 public: 40 static void *getTag() { 41 static int Tag; 42 return &Tag; 43 } 44 45 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 46 47 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 48 49 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 50 const char *Sep) const override; 51 52 using ArgVector = SmallVector<unsigned, 2>; 53 using SignedArgVector = SmallVector<int, 2>; 54 55 enum class VariadicType { None, Src, Dst }; 56 57 /// Used to parse the configuration file. 58 struct TaintConfiguration { 59 using NameArgsPair = std::pair<std::string, ArgVector>; 60 61 struct Propagation { 62 std::string Name; 63 ArgVector SrcArgs; 64 SignedArgVector DstArgs; 65 VariadicType VarType; 66 unsigned VarIndex; 67 }; 68 69 std::vector<Propagation> Propagations; 70 std::vector<NameArgsPair> Filters; 71 std::vector<NameArgsPair> Sinks; 72 73 TaintConfiguration() = default; 74 TaintConfiguration(const TaintConfiguration &) = default; 75 TaintConfiguration(TaintConfiguration &&) = default; 76 TaintConfiguration &operator=(const TaintConfiguration &) = default; 77 TaintConfiguration &operator=(TaintConfiguration &&) = default; 78 }; 79 80 /// Convert SignedArgVector to ArgVector. 81 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 82 SignedArgVector Args); 83 84 /// Parse the config. 85 void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 86 TaintConfiguration &&Config); 87 88 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 89 /// Denotes the return vale. 90 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 91 1}; 92 93 private: 94 mutable std::unique_ptr<BugType> BT; 95 void initBugType() const { 96 if (!BT) 97 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 98 } 99 100 /// Catch taint related bugs. Check if tainted data is passed to a 101 /// system call etc. 102 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 103 104 /// Add taint sources on a pre-visit. 105 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 106 107 /// Propagate taint generated at pre-visit. 108 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 109 110 /// Check if the region the expression evaluates to is the standard input, 111 /// and thus, is tainted. 112 static bool isStdin(const Expr *E, CheckerContext &C); 113 114 /// Given a pointer argument, return the value it points to. 115 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); 116 117 /// Check for CWE-134: Uncontrolled Format String. 118 static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 119 "Untrusted data is used as a format string " 120 "(CWE-134: Uncontrolled Format String)"; 121 bool checkUncontrolledFormatString(const CallExpr *CE, 122 CheckerContext &C) const; 123 124 /// Check for: 125 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 126 /// CWE-78, "Failure to Sanitize Data into an OS Command" 127 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 128 "Untrusted data is passed to a system call " 129 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 130 bool checkSystemCall(const CallExpr *CE, StringRef Name, 131 CheckerContext &C) const; 132 133 /// Check if tainted data is used as a buffer size ins strn.. functions, 134 /// and allocators. 135 static constexpr llvm::StringLiteral MsgTaintedBufferSize = 136 "Untrusted data is used to specify the buffer size " 137 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 138 "for character data and the null terminator)"; 139 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 140 CheckerContext &C) const; 141 142 /// Check if tainted data is used as a custom sink's parameter. 143 static constexpr llvm::StringLiteral MsgCustomSink = 144 "Untrusted data is passed to a user-defined sink"; 145 bool checkCustomSinks(const CallExpr *CE, StringRef Name, 146 CheckerContext &C) const; 147 148 /// Generate a report if the expression is tainted or points to tainted data. 149 bool generateReportIfTainted(const Expr *E, StringRef Msg, 150 CheckerContext &C) const; 151 152 struct TaintPropagationRule; 153 using NameRuleMap = llvm::StringMap<TaintPropagationRule>; 154 using NameArgMap = llvm::StringMap<ArgVector>; 155 156 /// A struct used to specify taint propagation rules for a function. 157 /// 158 /// If any of the possible taint source arguments is tainted, all of the 159 /// destination arguments should also be tainted. Use InvalidArgIndex in the 160 /// src list to specify that all of the arguments can introduce taint. Use 161 /// InvalidArgIndex in the dst arguments to signify that all the non-const 162 /// pointer and reference arguments might be tainted on return. If 163 /// ReturnValueIndex is added to the dst list, the return value will be 164 /// tainted. 165 struct TaintPropagationRule { 166 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, 167 CheckerContext &C); 168 169 /// List of arguments which can be taint sources and should be checked. 170 ArgVector SrcArgs; 171 /// List of arguments which should be tainted on function return. 172 ArgVector DstArgs; 173 /// Index for the first variadic parameter if exist. 174 unsigned VariadicIndex; 175 /// Show when a function has variadic parameters. If it has, it marks all 176 /// of them as source or destination. 177 VariadicType VarType; 178 /// Special function for tainted source determination. If defined, it can 179 /// override the default behavior. 180 PropagationFuncType PropagationFunc; 181 182 TaintPropagationRule() 183 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 184 PropagationFunc(nullptr) {} 185 186 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 187 VariadicType Var = VariadicType::None, 188 unsigned VarIndex = InvalidArgIndex, 189 PropagationFuncType Func = nullptr) 190 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 191 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 192 193 /// Get the propagation rule for a given function. 194 static TaintPropagationRule 195 getTaintPropagationRule(const NameRuleMap &CustomPropagations, 196 const FunctionDecl *FDecl, StringRef Name, 197 CheckerContext &C); 198 199 void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 200 void addDstArg(unsigned A) { DstArgs.push_back(A); } 201 202 bool isNull() const { 203 return SrcArgs.empty() && DstArgs.empty() && 204 VariadicType::None == VarType; 205 } 206 207 bool isDestinationArgument(unsigned ArgNum) const { 208 return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 209 } 210 211 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, 212 CheckerContext &C) { 213 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 214 return true; 215 216 if (!E->getType().getTypePtr()->isPointerType()) 217 return false; 218 219 Optional<SVal> V = getPointedToSVal(C, E); 220 return (V && isTainted(State, *V)); 221 } 222 223 /// Pre-process a function which propagates taint according to the 224 /// taint rule. 225 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 226 227 // Functions for custom taintedness propagation. 228 static bool postSocket(bool IsTainted, const CallExpr *CE, 229 CheckerContext &C); 230 }; 231 232 /// Defines a map between the propagation function's name and 233 /// TaintPropagationRule. 234 NameRuleMap CustomPropagations; 235 236 /// Defines a map between the filter function's name and filtering args. 237 NameArgMap CustomFilters; 238 239 /// Defines a map between the sink function's name and sinking args. 240 NameArgMap CustomSinks; 241 }; 242 243 const unsigned GenericTaintChecker::ReturnValueIndex; 244 const unsigned GenericTaintChecker::InvalidArgIndex; 245 246 // FIXME: these lines can be removed in C++17 247 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 248 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 249 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 250 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 251 } // end of anonymous namespace 252 253 using TaintConfig = GenericTaintChecker::TaintConfiguration; 254 255 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 256 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) 257 258 namespace llvm { 259 namespace yaml { 260 template <> struct MappingTraits<TaintConfig> { 261 static void mapping(IO &IO, TaintConfig &Config) { 262 IO.mapOptional("Propagations", Config.Propagations); 263 IO.mapOptional("Filters", Config.Filters); 264 IO.mapOptional("Sinks", Config.Sinks); 265 } 266 }; 267 268 template <> struct MappingTraits<TaintConfig::Propagation> { 269 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 270 IO.mapRequired("Name", Propagation.Name); 271 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 272 IO.mapOptional("DstArgs", Propagation.DstArgs); 273 IO.mapOptional("VariadicType", Propagation.VarType, 274 GenericTaintChecker::VariadicType::None); 275 IO.mapOptional("VariadicIndex", Propagation.VarIndex, 276 GenericTaintChecker::InvalidArgIndex); 277 } 278 }; 279 280 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 281 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 282 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 283 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 284 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 285 } 286 }; 287 288 template <> struct MappingTraits<TaintConfig::NameArgsPair> { 289 static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { 290 IO.mapRequired("Name", NameArg.first); 291 IO.mapRequired("Args", NameArg.second); 292 } 293 }; 294 } // namespace yaml 295 } // namespace llvm 296 297 /// A set which is used to pass information from call pre-visit instruction 298 /// to the call post-visit. The values are unsigned integers, which are either 299 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 300 /// points to data, which should be tainted on return. 301 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 302 303 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( 304 CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { 305 ArgVector Result; 306 for (int Arg : Args) { 307 if (Arg == -1) 308 Result.push_back(ReturnValueIndex); 309 else if (Arg < -1) { 310 Result.push_back(InvalidArgIndex); 311 Mgr.reportInvalidCheckerOptionValue( 312 this, Option, 313 "an argument number for propagation rules greater or equal to -1"); 314 } else 315 Result.push_back(static_cast<unsigned>(Arg)); 316 } 317 return Result; 318 } 319 320 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 321 const std::string &Option, 322 TaintConfiguration &&Config) { 323 for (auto &P : Config.Propagations) { 324 GenericTaintChecker::CustomPropagations.try_emplace( 325 P.Name, std::move(P.SrcArgs), 326 convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); 327 } 328 329 for (auto &F : Config.Filters) { 330 GenericTaintChecker::CustomFilters.try_emplace(F.first, 331 std::move(F.second)); 332 } 333 334 for (auto &S : Config.Sinks) { 335 GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); 336 } 337 } 338 339 GenericTaintChecker::TaintPropagationRule 340 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 341 const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl, 342 StringRef Name, CheckerContext &C) { 343 // TODO: Currently, we might lose precision here: we always mark a return 344 // value as tainted even if it's just a pointer, pointing to tainted data. 345 346 // Check for exact name match for functions without builtin substitutes. 347 TaintPropagationRule Rule = 348 llvm::StringSwitch<TaintPropagationRule>(Name) 349 // Source functions 350 // TODO: Add support for vfscanf & family. 351 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex})) 352 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex})) 353 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) 354 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) 355 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) 356 .Case("getchar_unlocked", 357 TaintPropagationRule({}, {ReturnValueIndex})) 358 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) 359 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) 360 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) 361 .Case("socket", 362 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None, 363 InvalidArgIndex, 364 &TaintPropagationRule::postSocket)) 365 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex})) 366 // Propagating functions 367 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) 368 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) 369 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) 370 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) 371 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) 372 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) 373 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2)) 374 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) 375 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) 376 .Case("getdelim", TaintPropagationRule({3}, {0})) 377 .Case("getline", TaintPropagationRule({2}, {0})) 378 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) 379 .Case("pread", 380 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex})) 381 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) 382 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) 383 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) 384 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) 385 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) 386 .Default(TaintPropagationRule()); 387 388 if (!Rule.isNull()) 389 return Rule; 390 391 // Check if it's one of the memory setting/copying functions. 392 // This check is specialized but faster then calling isCLibraryFunction. 393 unsigned BId = 0; 394 if ((BId = FDecl->getMemoryFunctionKind())) 395 switch (BId) { 396 case Builtin::BImemcpy: 397 case Builtin::BImemmove: 398 case Builtin::BIstrncpy: 399 case Builtin::BIstrncat: 400 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); 401 case Builtin::BIstrlcpy: 402 case Builtin::BIstrlcat: 403 return TaintPropagationRule({1, 2}, {0}); 404 case Builtin::BIstrndup: 405 return TaintPropagationRule({0, 1}, {ReturnValueIndex}); 406 407 default: 408 break; 409 }; 410 411 // Process all other functions which could be defined as builtins. 412 if (Rule.isNull()) { 413 if (C.isCLibraryFunction(FDecl, "snprintf")) 414 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, 415 3); 416 else if (C.isCLibraryFunction(FDecl, "sprintf")) 417 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, 418 2); 419 else if (C.isCLibraryFunction(FDecl, "strcpy") || 420 C.isCLibraryFunction(FDecl, "stpcpy") || 421 C.isCLibraryFunction(FDecl, "strcat")) 422 return TaintPropagationRule({1}, {0, ReturnValueIndex}); 423 else if (C.isCLibraryFunction(FDecl, "bcopy")) 424 return TaintPropagationRule({0, 2}, {1}); 425 else if (C.isCLibraryFunction(FDecl, "strdup") || 426 C.isCLibraryFunction(FDecl, "strdupa")) 427 return TaintPropagationRule({0}, {ReturnValueIndex}); 428 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 429 return TaintPropagationRule({0}, {ReturnValueIndex}); 430 } 431 432 // Skipping the following functions, since they might be used for cleansing 433 // or smart memory copy: 434 // - memccpy - copying until hitting a special character. 435 436 auto It = CustomPropagations.find(Name); 437 if (It != CustomPropagations.end()) 438 return It->getValue(); 439 440 return TaintPropagationRule(); 441 } 442 443 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 444 CheckerContext &C) const { 445 // Check for taintedness related errors first: system call, uncontrolled 446 // format string, tainted buffer size. 447 if (checkPre(CE, C)) 448 return; 449 450 // Marks the function's arguments and/or return value tainted if it present in 451 // the list. 452 addSourcesPre(CE, C); 453 } 454 455 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 456 CheckerContext &C) const { 457 // Set the marked values as tainted. The return value only accessible from 458 // checkPostStmt. 459 propagateFromPre(CE, C); 460 } 461 462 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 463 const char *NL, const char *Sep) const { 464 printTaint(State, Out, NL, Sep); 465 } 466 467 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 468 CheckerContext &C) const { 469 ProgramStateRef State = nullptr; 470 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 471 if (!FDecl || FDecl->getKind() != Decl::Function) 472 return; 473 474 StringRef Name = C.getCalleeName(FDecl); 475 if (Name.empty()) 476 return; 477 478 // First, try generating a propagation rule for this function. 479 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 480 this->CustomPropagations, FDecl, Name, C); 481 if (!Rule.isNull()) { 482 State = Rule.process(CE, C); 483 if (!State) 484 return; 485 C.addTransition(State); 486 return; 487 } 488 489 if (!State) 490 return; 491 C.addTransition(State); 492 } 493 494 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 495 CheckerContext &C) const { 496 ProgramStateRef State = C.getState(); 497 498 // Depending on what was tainted at pre-visit, we determined a set of 499 // arguments which should be tainted after the function returns. These are 500 // stored in the state as TaintArgsOnPostVisit set. 501 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 502 if (TaintArgs.isEmpty()) 503 return false; 504 505 for (unsigned ArgNum : TaintArgs) { 506 // Special handling for the tainted return value. 507 if (ArgNum == ReturnValueIndex) { 508 State = addTaint(State, CE, C.getLocationContext()); 509 continue; 510 } 511 512 // The arguments are pointer arguments. The data they are pointing at is 513 // tainted after the call. 514 if (CE->getNumArgs() < (ArgNum + 1)) 515 return false; 516 const Expr *Arg = CE->getArg(ArgNum); 517 Optional<SVal> V = getPointedToSVal(C, Arg); 518 if (V) 519 State = addTaint(State, *V); 520 } 521 522 // Clear up the taint info from the state. 523 State = State->remove<TaintArgsOnPostVisit>(); 524 525 if (State != C.getState()) { 526 C.addTransition(State); 527 return true; 528 } 529 return false; 530 } 531 532 bool GenericTaintChecker::checkPre(const CallExpr *CE, 533 CheckerContext &C) const { 534 535 if (checkUncontrolledFormatString(CE, C)) 536 return true; 537 538 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 539 if (!FDecl || FDecl->getKind() != Decl::Function) 540 return false; 541 542 StringRef Name = C.getCalleeName(FDecl); 543 if (Name.empty()) 544 return false; 545 546 if (checkSystemCall(CE, Name, C)) 547 return true; 548 549 if (checkTaintedBufferSize(CE, FDecl, C)) 550 return true; 551 552 if (checkCustomSinks(CE, Name, C)) 553 return true; 554 555 return false; 556 } 557 558 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, 559 const Expr *Arg) { 560 ProgramStateRef State = C.getState(); 561 SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 562 if (AddrVal.isUnknownOrUndef()) 563 return None; 564 565 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 566 if (!AddrLoc) 567 return None; 568 569 QualType ArgTy = Arg->getType().getCanonicalType(); 570 if (!ArgTy->isPointerType()) 571 return None; 572 573 QualType ValTy = ArgTy->getPointeeType(); 574 575 // Do not dereference void pointers. Treat them as byte pointers instead. 576 // FIXME: we might want to consider more than just the first byte. 577 if (ValTy->isVoidType()) 578 ValTy = C.getASTContext().CharTy; 579 580 return State->getSVal(*AddrLoc, ValTy); 581 } 582 583 ProgramStateRef 584 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 585 CheckerContext &C) const { 586 ProgramStateRef State = C.getState(); 587 588 // Check for taint in arguments. 589 bool IsTainted = true; 590 for (unsigned ArgNum : SrcArgs) { 591 if (ArgNum >= CE->getNumArgs()) 592 continue; 593 594 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 595 break; 596 } 597 598 // Check for taint in variadic arguments. 599 if (!IsTainted && VariadicType::Src == VarType) { 600 // Check if any of the arguments is tainted 601 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 602 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 603 break; 604 } 605 } 606 607 if (PropagationFunc) 608 IsTainted = PropagationFunc(IsTainted, CE, C); 609 610 if (!IsTainted) 611 return State; 612 613 // Mark the arguments which should be tainted after the function returns. 614 for (unsigned ArgNum : DstArgs) { 615 // Should mark the return value? 616 if (ArgNum == ReturnValueIndex) { 617 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 618 continue; 619 } 620 621 if (ArgNum >= CE->getNumArgs()) 622 continue; 623 624 // Mark the given argument. 625 State = State->add<TaintArgsOnPostVisit>(ArgNum); 626 } 627 628 // Mark all variadic arguments tainted if present. 629 if (VariadicType::Dst == VarType) { 630 // For all pointer and references that were passed in: 631 // If they are not pointing to const data, mark data as tainted. 632 // TODO: So far we are just going one level down; ideally we'd need to 633 // recurse here. 634 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { 635 const Expr *Arg = CE->getArg(i); 636 // Process pointer argument. 637 const Type *ArgTy = Arg->getType().getTypePtr(); 638 QualType PType = ArgTy->getPointeeType(); 639 if ((!PType.isNull() && !PType.isConstQualified()) || 640 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 641 State = State->add<TaintArgsOnPostVisit>(i); 642 } 643 } 644 645 return State; 646 } 647 648 // If argument 0(protocol domain) is network, the return value should get taint. 649 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/, 650 const CallExpr *CE, 651 CheckerContext &C) { 652 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 653 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 654 // White list the internal communication protocols. 655 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 656 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 657 return false; 658 659 return true; 660 } 661 662 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 663 ProgramStateRef State = C.getState(); 664 SVal Val = C.getSVal(E); 665 666 // stdin is a pointer, so it would be a region. 667 const MemRegion *MemReg = Val.getAsRegion(); 668 669 // The region should be symbolic, we do not know it's value. 670 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 671 if (!SymReg) 672 return false; 673 674 // Get it's symbol and find the declaration region it's pointing to. 675 const SymbolRegionValue *Sm = 676 dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 677 if (!Sm) 678 return false; 679 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 680 if (!DeclReg) 681 return false; 682 683 // This region corresponds to a declaration, find out if it's a global/extern 684 // variable named stdin with the proper type. 685 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 686 D = D->getCanonicalDecl(); 687 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 688 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 689 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 690 C.getASTContext().getFILEType().getCanonicalType()) 691 return true; 692 } 693 } 694 return false; 695 } 696 697 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 698 const CheckerContext &C, 699 unsigned &ArgNum) { 700 // Find if the function contains a format string argument. 701 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 702 // vsnprintf, syslog, custom annotated functions. 703 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 704 if (!FDecl) 705 return false; 706 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 707 ArgNum = Format->getFormatIdx() - 1; 708 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum) 709 return true; 710 } 711 712 // Or if a function is named setproctitle (this is a heuristic). 713 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 714 ArgNum = 0; 715 return true; 716 } 717 718 return false; 719 } 720 721 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 722 CheckerContext &C) const { 723 assert(E); 724 725 // Check for taint. 726 ProgramStateRef State = C.getState(); 727 Optional<SVal> PointedToSVal = getPointedToSVal(C, E); 728 SVal TaintedSVal; 729 if (PointedToSVal && isTainted(State, *PointedToSVal)) 730 TaintedSVal = *PointedToSVal; 731 else if (isTainted(State, E, C.getLocationContext())) 732 TaintedSVal = C.getSVal(E); 733 else 734 return false; 735 736 // Generate diagnostic. 737 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 738 initBugType(); 739 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 740 report->addRange(E->getSourceRange()); 741 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 742 C.emitReport(std::move(report)); 743 return true; 744 } 745 return false; 746 } 747 748 bool GenericTaintChecker::checkUncontrolledFormatString( 749 const CallExpr *CE, CheckerContext &C) const { 750 // Check if the function contains a format string argument. 751 unsigned ArgNum = 0; 752 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 753 return false; 754 755 // If either the format string content or the pointer itself are tainted, 756 // warn. 757 return generateReportIfTainted(CE->getArg(ArgNum), 758 MsgUncontrolledFormatString, C); 759 } 760 761 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, 762 CheckerContext &C) const { 763 // TODO: It might make sense to run this check on demand. In some cases, 764 // we should check if the environment has been cleansed here. We also might 765 // need to know if the user was reset before these calls(seteuid). 766 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 767 .Case("system", 0) 768 .Case("popen", 0) 769 .Case("execl", 0) 770 .Case("execle", 0) 771 .Case("execlp", 0) 772 .Case("execv", 0) 773 .Case("execvp", 0) 774 .Case("execvP", 0) 775 .Case("execve", 0) 776 .Case("dlopen", 0) 777 .Default(InvalidArgIndex); 778 779 if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) 780 return false; 781 782 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); 783 } 784 785 // TODO: Should this check be a part of the CString checker? 786 // If yes, should taint be a global setting? 787 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 788 const FunctionDecl *FDecl, 789 CheckerContext &C) const { 790 // If the function has a buffer size argument, set ArgNum. 791 unsigned ArgNum = InvalidArgIndex; 792 unsigned BId = 0; 793 if ((BId = FDecl->getMemoryFunctionKind())) 794 switch (BId) { 795 case Builtin::BImemcpy: 796 case Builtin::BImemmove: 797 case Builtin::BIstrncpy: 798 ArgNum = 2; 799 break; 800 case Builtin::BIstrndup: 801 ArgNum = 1; 802 break; 803 default: 804 break; 805 }; 806 807 if (ArgNum == InvalidArgIndex) { 808 if (C.isCLibraryFunction(FDecl, "malloc") || 809 C.isCLibraryFunction(FDecl, "calloc") || 810 C.isCLibraryFunction(FDecl, "alloca")) 811 ArgNum = 0; 812 else if (C.isCLibraryFunction(FDecl, "memccpy")) 813 ArgNum = 3; 814 else if (C.isCLibraryFunction(FDecl, "realloc")) 815 ArgNum = 1; 816 else if (C.isCLibraryFunction(FDecl, "bcopy")) 817 ArgNum = 2; 818 } 819 820 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 821 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); 822 } 823 824 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name, 825 CheckerContext &C) const { 826 auto It = CustomSinks.find(Name); 827 if (It == CustomSinks.end()) 828 return false; 829 830 const GenericTaintChecker::ArgVector &Args = It->getValue(); 831 for (unsigned ArgNum : Args) { 832 if (ArgNum >= CE->getNumArgs()) 833 continue; 834 835 if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) 836 return true; 837 } 838 839 return false; 840 } 841 842 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 843 auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 844 std::string Option{"Config"}; 845 StringRef ConfigFile = 846 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 847 llvm::Optional<TaintConfig> Config = 848 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 849 if (Config) 850 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 851 } 852 853 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { 854 return true; 855 } 856