1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Taint.h" 18 #include "Yaml.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/Basic/Builtins.h" 21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 29 #include "llvm/Support/YAMLTraits.h" 30 31 #include <limits> 32 #include <memory> 33 #include <utility> 34 35 using namespace clang; 36 using namespace ento; 37 using namespace taint; 38 39 namespace { 40 41 class GenericTaintChecker; 42 43 /// Check for CWE-134: Uncontrolled Format String. 44 constexpr llvm::StringLiteral MsgUncontrolledFormatString = 45 "Untrusted data is used as a format string " 46 "(CWE-134: Uncontrolled Format String)"; 47 48 /// Check for: 49 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 50 /// CWE-78, "Failure to Sanitize Data into an OS Command" 51 constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 52 "Untrusted data is passed to a system call " 53 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 54 55 /// Check if tainted data is used as a buffer size in strn.. functions, 56 /// and allocators. 57 constexpr llvm::StringLiteral MsgTaintedBufferSize = 58 "Untrusted data is used to specify the buffer size " 59 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 60 "for character data and the null terminator)"; 61 62 /// Check if tainted data is used as a custom sink's parameter. 63 constexpr llvm::StringLiteral MsgCustomSink = 64 "Untrusted data is passed to a user-defined sink"; 65 66 using ArgIdxTy = int; 67 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; 68 69 /// Denotes the return value. 70 constexpr ArgIdxTy ReturnValueIndex{-1}; 71 72 static ArgIdxTy fromArgumentCount(unsigned Count) { 73 assert(Count <= 74 static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && 75 "ArgIdxTy is not large enough to represent the number of arguments."); 76 return Count; 77 } 78 79 /// Check if the region the expression evaluates to is the standard input, 80 /// and thus, is tainted. 81 /// FIXME: Move this to Taint.cpp. 82 bool isStdin(SVal Val, const ASTContext &ACtx) { 83 // FIXME: What if Val is NonParamVarRegion? 84 85 // The region should be symbolic, we do not know it's value. 86 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion()); 87 if (!SymReg) 88 return false; 89 90 // Get it's symbol and find the declaration region it's pointing to. 91 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 92 if (!Sm) 93 return false; 94 const auto *DeclReg = dyn_cast<DeclRegion>(Sm->getRegion()); 95 if (!DeclReg) 96 return false; 97 98 // This region corresponds to a declaration, find out if it's a global/extern 99 // variable named stdin with the proper type. 100 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 101 D = D->getCanonicalDecl(); 102 // FIXME: This should look for an exact match. 103 if (D->getName().contains("stdin") && D->isExternC()) { 104 const QualType FILETy = ACtx.getFILEType().getCanonicalType(); 105 const QualType Ty = D->getType().getCanonicalType(); 106 107 if (Ty->isPointerType()) 108 return Ty->getPointeeType() == FILETy; 109 } 110 } 111 return false; 112 } 113 114 SVal getPointeeOf(const CheckerContext &C, Loc LValue) { 115 const QualType ArgTy = LValue.getType(C.getASTContext()); 116 if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) 117 return C.getState()->getSVal(LValue); 118 119 // Do not dereference void pointers. Treat them as byte pointers instead. 120 // FIXME: we might want to consider more than just the first byte. 121 return C.getState()->getSVal(LValue, C.getASTContext().CharTy); 122 } 123 124 /// Given a pointer/reference argument, return the value it refers to. 125 Optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) { 126 if (auto LValue = Arg.getAs<Loc>()) 127 return getPointeeOf(C, *LValue); 128 return None; 129 } 130 131 /// Given a pointer, return the SVal of its pointee or if it is tainted, 132 /// otherwise return the pointer's SVal if tainted. 133 /// Also considers stdin as a taint source. 134 Optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C, SVal Arg) { 135 const ProgramStateRef State = C.getState(); 136 137 if (auto Pointee = getPointeeOf(C, Arg)) 138 if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; 139 return Pointee; 140 141 if (isTainted(State, Arg)) 142 return Arg; 143 144 // FIXME: This should be done by the isTainted() API. 145 if (isStdin(Arg, C.getASTContext())) 146 return Arg; 147 148 return None; 149 } 150 151 bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, 152 CheckerContext &C) { 153 return getTaintedPointeeOrPointer(C, C.getSVal(E)).hasValue(); 154 } 155 156 /// ArgSet is used to describe arguments relevant for taint detection or 157 /// taint application. A discrete set of argument indexes and a variadic 158 /// argument list signified by a starting index are supported. 159 class ArgSet { 160 public: 161 ArgSet() = default; 162 ArgSet(ArgVecTy &&DiscreteArgs, Optional<ArgIdxTy> VariadicIndex = None) 163 : DiscreteArgs(std::move(DiscreteArgs)), 164 VariadicIndex(std::move(VariadicIndex)) {} 165 166 bool contains(ArgIdxTy ArgIdx) const { 167 if (llvm::is_contained(DiscreteArgs, ArgIdx)) 168 return true; 169 170 return VariadicIndex && ArgIdx >= *VariadicIndex; 171 } 172 173 bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } 174 175 ArgVecTy ArgsUpTo(ArgIdxTy LastArgIdx) const { 176 ArgVecTy Args; 177 for (ArgIdxTy I = ReturnValueIndex; I <= LastArgIdx; ++I) { 178 if (contains(I)) 179 Args.push_back(I); 180 } 181 return Args; 182 } 183 184 private: 185 ArgVecTy DiscreteArgs; 186 Optional<ArgIdxTy> VariadicIndex; 187 }; 188 189 /// A struct used to specify taint propagation rules for a function. 190 /// 191 /// If any of the possible taint source arguments is tainted, all of the 192 /// destination arguments should also be tainted. If ReturnValueIndex is added 193 /// to the dst list, the return value will be tainted. 194 class GenericTaintRule { 195 /// Arguments which are taints sinks and should be checked, and a report 196 /// should be emitted if taint reaches these. 197 ArgSet SinkArgs; 198 /// Arguments which should be sanitized on function return. 199 ArgSet FilterArgs; 200 /// Arguments which can participate in taint propagationa. If any of the 201 /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should 202 /// be tainted. 203 ArgSet PropSrcArgs; 204 ArgSet PropDstArgs; 205 206 /// A message that explains why the call is sensitive to taint. 207 Optional<StringRef> SinkMsg; 208 209 GenericTaintRule() = default; 210 211 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, 212 Optional<StringRef> SinkMsg = None) 213 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), 214 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), 215 SinkMsg(SinkMsg) {} 216 217 public: 218 /// Make a rule that reports a warning if taint reaches any of \p FilterArgs 219 /// arguments. 220 static GenericTaintRule Sink(ArgSet &&SinkArgs, 221 Optional<StringRef> Msg = None) { 222 return {std::move(SinkArgs), {}, {}, {}, Msg}; 223 } 224 225 /// Make a rule that sanitizes all FilterArgs arguments. 226 static GenericTaintRule Filter(ArgSet &&FilterArgs) { 227 return {{}, std::move(FilterArgs), {}, {}}; 228 } 229 230 /// Make a rule that unconditionally taints all Args. 231 /// If Func is provided, it must also return true for taint to propagate. 232 static GenericTaintRule Source(ArgSet &&SourceArgs) { 233 return {{}, {}, {}, std::move(SourceArgs)}; 234 } 235 236 /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 237 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { 238 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; 239 } 240 241 /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 242 static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, 243 ArgSet &&DstArgs, 244 Optional<StringRef> Msg = None) { 245 return { 246 std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; 247 } 248 249 /// Process a function which could either be a taint source, a taint sink, a 250 /// taint filter or a taint propagator. 251 void process(const GenericTaintChecker &Checker, const CallEvent &Call, 252 CheckerContext &C) const; 253 254 /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. 255 static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { 256 return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() 257 : Call.getArgExpr(ArgIdx); 258 }; 259 260 /// Functions for custom taintedness propagation. 261 static bool UntrustedEnv(CheckerContext &C); 262 }; 263 264 using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; 265 266 /// Used to parse the configuration file. 267 struct TaintConfiguration { 268 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; 269 enum class VariadicType { None, Src, Dst }; 270 271 struct Common { 272 std::string Name; 273 std::string Scope; 274 }; 275 276 struct Sink : Common { 277 ArgVecTy SinkArgs; 278 }; 279 280 struct Filter : Common { 281 ArgVecTy FilterArgs; 282 }; 283 284 struct Propagation : Common { 285 ArgVecTy SrcArgs; 286 ArgVecTy DstArgs; 287 VariadicType VarType; 288 ArgIdxTy VarIndex; 289 }; 290 291 std::vector<Propagation> Propagations; 292 std::vector<Filter> Filters; 293 std::vector<Sink> Sinks; 294 295 TaintConfiguration() = default; 296 TaintConfiguration(const TaintConfiguration &) = default; 297 TaintConfiguration(TaintConfiguration &&) = default; 298 TaintConfiguration &operator=(const TaintConfiguration &) = default; 299 TaintConfiguration &operator=(TaintConfiguration &&) = default; 300 }; 301 302 struct GenericTaintRuleParser { 303 GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} 304 /// Container type used to gather call identification objects grouped into 305 /// pairs with their corresponding taint rules. It is temporary as it is used 306 /// to finally initialize RuleLookupTy, which is considered to be immutable. 307 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; 308 RulesContTy parseConfiguration(const std::string &Option, 309 TaintConfiguration &&Config) const; 310 311 private: 312 using NamePartsTy = llvm::SmallVector<SmallString<32>, 2>; 313 314 /// Validate part of the configuration, which contains a list of argument 315 /// indexes. 316 void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; 317 318 template <typename Config> static NamePartsTy parseNameParts(const Config &C); 319 320 // Takes the config and creates a CallDescription for it and associates a Rule 321 // with that. 322 template <typename Config> 323 static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, 324 RulesContTy &Rules); 325 326 void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, 327 RulesContTy &Rules) const; 328 void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, 329 RulesContTy &Rules) const; 330 void parseConfig(const std::string &Option, 331 TaintConfiguration::Propagation &&P, 332 RulesContTy &Rules) const; 333 334 CheckerManager &Mgr; 335 }; 336 337 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 338 public: 339 static void *getTag() { 340 static int Tag; 341 return &Tag; 342 } 343 344 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 345 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 346 347 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 348 const char *Sep) const override; 349 350 /// Generate a report if the expression is tainted or points to tainted data. 351 bool generateReportIfTainted(const Expr *E, StringRef Msg, 352 CheckerContext &C) const; 353 354 private: 355 const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"}; 356 357 bool checkUncontrolledFormatString(const CallEvent &Call, 358 CheckerContext &C) const; 359 360 void taintUnsafeSocketProtocol(const CallEvent &Call, 361 CheckerContext &C) const; 362 363 /// Default taint rules are initilized with the help of a CheckerContext to 364 /// access the names of built-in functions like memcpy. 365 void initTaintRules(CheckerContext &C) const; 366 367 /// CallDescription currently cannot restrict matches to the global namespace 368 /// only, which is why multiple CallDescriptionMaps are used, as we want to 369 /// disambiguate global C functions from functions inside user-defined 370 /// namespaces. 371 // TODO: Remove separation to simplify matching logic once CallDescriptions 372 // are more expressive. 373 374 mutable Optional<RuleLookupTy> StaticTaintRules; 375 mutable Optional<RuleLookupTy> DynamicTaintRules; 376 }; 377 } // end of anonymous namespace 378 379 /// YAML serialization mapping. 380 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) 381 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) 382 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) 383 384 namespace llvm { 385 namespace yaml { 386 template <> struct MappingTraits<TaintConfiguration> { 387 static void mapping(IO &IO, TaintConfiguration &Config) { 388 IO.mapOptional("Propagations", Config.Propagations); 389 IO.mapOptional("Filters", Config.Filters); 390 IO.mapOptional("Sinks", Config.Sinks); 391 } 392 }; 393 394 template <> struct MappingTraits<TaintConfiguration::Sink> { 395 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { 396 IO.mapRequired("Name", Sink.Name); 397 IO.mapOptional("Scope", Sink.Scope); 398 IO.mapRequired("Args", Sink.SinkArgs); 399 } 400 }; 401 402 template <> struct MappingTraits<TaintConfiguration::Filter> { 403 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { 404 IO.mapRequired("Name", Filter.Name); 405 IO.mapOptional("Scope", Filter.Scope); 406 IO.mapRequired("Args", Filter.FilterArgs); 407 } 408 }; 409 410 template <> struct MappingTraits<TaintConfiguration::Propagation> { 411 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { 412 IO.mapRequired("Name", Propagation.Name); 413 IO.mapOptional("Scope", Propagation.Scope); 414 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 415 IO.mapOptional("DstArgs", Propagation.DstArgs); 416 IO.mapOptional("VariadicType", Propagation.VarType); 417 IO.mapOptional("VariadicIndex", Propagation.VarIndex); 418 } 419 }; 420 421 template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { 422 static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { 423 IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None); 424 IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src); 425 IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst); 426 } 427 }; 428 } // namespace yaml 429 } // namespace llvm 430 431 /// A set which is used to pass information from call pre-visit instruction 432 /// to the call post-visit. The values are signed integers, which are either 433 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 434 /// points to data, which should be tainted on return. 435 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, ArgIdxTy) 436 437 void GenericTaintRuleParser::validateArgVector(const std::string &Option, 438 const ArgVecTy &Args) const { 439 for (ArgIdxTy Arg : Args) { 440 if (Arg < ReturnValueIndex) { 441 Mgr.reportInvalidCheckerOptionValue( 442 Mgr.getChecker<GenericTaintChecker>(), Option, 443 "an argument number for propagation rules greater or equal to -1"); 444 } 445 } 446 } 447 448 template <typename Config> 449 GenericTaintRuleParser::NamePartsTy 450 GenericTaintRuleParser::parseNameParts(const Config &C) { 451 NamePartsTy NameParts; 452 if (!C.Scope.empty()) { 453 // If the Scope argument contains multiple "::" parts, those are considered 454 // namespace identifiers. 455 llvm::SmallVector<StringRef, 2> NSParts; 456 StringRef{C.Scope}.split(NSParts, "::", /*MaxSplit*/ -1, 457 /*KeepEmpty*/ false); 458 NameParts.append(NSParts.begin(), NSParts.end()); 459 } 460 NameParts.emplace_back(C.Name); 461 return NameParts; 462 } 463 464 template <typename Config> 465 void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, 466 GenericTaintRule &&Rule, 467 RulesContTy &Rules) { 468 NamePartsTy NameParts = parseNameParts(C); 469 llvm::SmallVector<const char *, 2> CallDescParts{NameParts.size()}; 470 llvm::transform(NameParts, CallDescParts.begin(), 471 [](SmallString<32> &S) { return S.c_str(); }); 472 Rules.emplace_back(CallDescription(CallDescParts), std::move(Rule)); 473 } 474 475 void GenericTaintRuleParser::parseConfig(const std::string &Option, 476 TaintConfiguration::Sink &&S, 477 RulesContTy &Rules) const { 478 validateArgVector(Option, S.SinkArgs); 479 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)), 480 Rules); 481 } 482 483 void GenericTaintRuleParser::parseConfig(const std::string &Option, 484 TaintConfiguration::Filter &&S, 485 RulesContTy &Rules) const { 486 validateArgVector(Option, S.FilterArgs); 487 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)), 488 Rules); 489 } 490 491 void GenericTaintRuleParser::parseConfig(const std::string &Option, 492 TaintConfiguration::Propagation &&P, 493 RulesContTy &Rules) const { 494 validateArgVector(Option, P.SrcArgs); 495 validateArgVector(Option, P.DstArgs); 496 bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; 497 bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; 498 Optional<ArgIdxTy> JustVarIndex = P.VarIndex; 499 500 ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None); 501 ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None); 502 503 consumeRulesFromConfig( 504 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); 505 } 506 507 GenericTaintRuleParser::RulesContTy 508 GenericTaintRuleParser::parseConfiguration(const std::string &Option, 509 TaintConfiguration &&Config) const { 510 511 RulesContTy Rules; 512 513 for (auto &F : Config.Filters) 514 parseConfig(Option, std::move(F), Rules); 515 516 for (auto &S : Config.Sinks) 517 parseConfig(Option, std::move(S), Rules); 518 519 for (auto &P : Config.Propagations) 520 parseConfig(Option, std::move(P), Rules); 521 522 return Rules; 523 } 524 525 void GenericTaintChecker::initTaintRules(CheckerContext &C) const { 526 // Check for exact name match for functions without builtin substitutes. 527 // Use qualified name, because these are C functions without namespace. 528 529 if (StaticTaintRules || DynamicTaintRules) 530 return; 531 532 using RulesConstructionTy = 533 std::vector<std::pair<CallDescription, GenericTaintRule>>; 534 using TR = GenericTaintRule; 535 536 const Builtin::Context &BI = C.getASTContext().BuiltinInfo; 537 538 RulesConstructionTy GlobalCRules{ 539 // Sources 540 {{"fdopen"}, TR::Source({{ReturnValueIndex}})}, 541 {{"fopen"}, TR::Source({{ReturnValueIndex}})}, 542 {{"freopen"}, TR::Source({{ReturnValueIndex}})}, 543 {{"getch"}, TR::Source({{ReturnValueIndex}})}, 544 {{"getchar"}, TR::Source({{ReturnValueIndex}})}, 545 {{"getchar_unlocked"}, TR::Source({{ReturnValueIndex}})}, 546 {{"gets"}, TR::Source({{0}, ReturnValueIndex})}, 547 {{"scanf"}, TR::Source({{}, 1})}, 548 {{"wgetch"}, TR::Source({{}, ReturnValueIndex})}, 549 550 // Props 551 {{"atoi"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 552 {{"atol"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 553 {{"atoll"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 554 {{"fgetc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 555 {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 556 {{"fgets"}, TR::Prop({{2}}, {{0}, ReturnValueIndex})}, 557 {{"fscanf"}, TR::Prop({{0}}, {{}, 2})}, 558 {{"sscanf"}, TR::Prop({{0}}, {{}, 2})}, 559 {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 560 {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 561 {{"getdelim"}, TR::Prop({{3}}, {{0}})}, 562 {{"getline"}, TR::Prop({{2}}, {{0}})}, 563 {{"getw"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 564 {{"pread"}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, 565 {{"read"}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, 566 {{"strchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 567 {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 568 {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 569 {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 570 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}}, 571 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, 572 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}}, 573 TR::Prop({{1, 2}}, {{0}})}, 574 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}}, 575 TR::Prop({{1, 2}}, {{0}})}, 576 {{CDF_MaybeBuiltin, {"snprintf"}}, 577 TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, 578 {{CDF_MaybeBuiltin, {"sprintf"}}, 579 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, 580 {{CDF_MaybeBuiltin, {"strcpy"}}, 581 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 582 {{CDF_MaybeBuiltin, {"stpcpy"}}, 583 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 584 {{CDF_MaybeBuiltin, {"strcat"}}, 585 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 586 {{CDF_MaybeBuiltin, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 587 {{CDF_MaybeBuiltin, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 588 {{CDF_MaybeBuiltin, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 589 590 // Sinks 591 {{"system"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 592 {{"popen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 593 {{"execl"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 594 {{"execle"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 595 {{"execlp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 596 {{"execvp"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 597 {{"execvP"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 598 {{"execve"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 599 {{"dlopen"}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 600 {{CDF_MaybeBuiltin, {"malloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 601 {{CDF_MaybeBuiltin, {"calloc"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 602 {{CDF_MaybeBuiltin, {"alloca"}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, 603 {{CDF_MaybeBuiltin, {"memccpy"}}, TR::Sink({{3}}, MsgTaintedBufferSize)}, 604 {{CDF_MaybeBuiltin, {"realloc"}}, TR::Sink({{1}}, MsgTaintedBufferSize)}, 605 {{{"setproctitle"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 606 {{{"setproctitle_fast"}}, 607 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 608 609 // SinkProps 610 {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)}, 611 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 612 MsgTaintedBufferSize)}, 613 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}}, 614 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 615 MsgTaintedBufferSize)}, 616 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}}, 617 TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, 618 MsgTaintedBufferSize)}, 619 {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}}, 620 TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}}, 621 MsgTaintedBufferSize)}, 622 {{CDF_MaybeBuiltin, {"bcopy"}}, 623 TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}}; 624 625 // `getenv` returns taint only in untrusted environments. 626 if (TR::UntrustedEnv(C)) { 627 // void setproctitle_init(int argc, char *argv[], char *envp[]) 628 GlobalCRules.push_back( 629 {{{"setproctitle_init"}}, TR::Sink({{2}}, MsgCustomSink)}); 630 GlobalCRules.push_back({{"getenv"}, TR::Source({{ReturnValueIndex}})}); 631 } 632 633 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), 634 std::make_move_iterator(GlobalCRules.end())); 635 636 // User-provided taint configuration. 637 CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); 638 assert(Mgr); 639 GenericTaintRuleParser ConfigParser{*Mgr}; 640 std::string Option{"Config"}; 641 StringRef ConfigFile = 642 Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option); 643 llvm::Optional<TaintConfiguration> Config = 644 getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile); 645 if (!Config) { 646 // We don't have external taint config, no parsing required. 647 DynamicTaintRules = RuleLookupTy{}; 648 return; 649 } 650 651 GenericTaintRuleParser::RulesContTy Rules{ 652 ConfigParser.parseConfiguration(Option, std::move(Config.getValue()))}; 653 654 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), 655 std::make_move_iterator(Rules.end())); 656 } 657 658 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 659 CheckerContext &C) const { 660 initTaintRules(C); 661 662 // FIXME: this should be much simpler. 663 if (const auto *Rule = 664 Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) 665 Rule->process(*this, Call, C); 666 else if (const auto *Rule = DynamicTaintRules->lookup(Call)) 667 Rule->process(*this, Call, C); 668 669 // FIXME: These edge cases are to be eliminated from here eventually. 670 // 671 // Additional check that is not supported by CallDescription. 672 // TODO: Make CallDescription be able to match attributes such as printf-like 673 // arguments. 674 checkUncontrolledFormatString(Call, C); 675 676 // TODO: Modeling sockets should be done in a specific checker. 677 // Socket is a source, which taints the return value. 678 taintUnsafeSocketProtocol(Call, C); 679 } 680 681 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 682 CheckerContext &C) const { 683 // Set the marked values as tainted. The return value only accessible from 684 // checkPostStmt. 685 ProgramStateRef State = C.getState(); 686 687 // Depending on what was tainted at pre-visit, we determined a set of 688 // arguments which should be tainted after the function returns. These are 689 // stored in the state as TaintArgsOnPostVisit set. 690 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 691 if (TaintArgs.isEmpty()) 692 return; 693 694 for (ArgIdxTy ArgNum : TaintArgs) { 695 // Special handling for the tainted return value. 696 if (ArgNum == ReturnValueIndex) { 697 State = addTaint(State, Call.getReturnValue()); 698 continue; 699 } 700 701 // The arguments are pointer arguments. The data they are pointing at is 702 // tainted after the call. 703 if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum))) 704 State = addTaint(State, *V); 705 } 706 707 // Clear up the taint info from the state. 708 State = State->remove<TaintArgsOnPostVisit>(); 709 C.addTransition(State); 710 } 711 712 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 713 const char *NL, const char *Sep) const { 714 printTaint(State, Out, NL, Sep); 715 } 716 717 void GenericTaintRule::process(const GenericTaintChecker &Checker, 718 const CallEvent &Call, CheckerContext &C) const { 719 ProgramStateRef State = C.getState(); 720 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 721 722 /// Iterate every call argument, and get their corresponding Expr and SVal. 723 const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { 724 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { 725 const Expr *E = GetArgExpr(I, Call); 726 Fun(I, E, C.getSVal(E)); 727 } 728 }; 729 730 /// Check for taint sinks. 731 ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { 732 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) 733 Checker.generateReportIfTainted(E, SinkMsg.getValueOr(MsgCustomSink), C); 734 }); 735 736 /// Check for taint filters. 737 ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) { 738 if (FilterArgs.contains(I)) { 739 State = removeTaint(State, S); 740 if (auto P = getPointeeOf(C, S)) 741 State = removeTaint(State, *P); 742 } 743 }); 744 745 /// Check for taint propagation sources. 746 /// A rule is relevant if PropSrcArgs is empty, or if any of its signified 747 /// args are tainted in context of the current CallEvent. 748 bool IsMatching = PropSrcArgs.isEmpty(); 749 ForEachCallArg( 750 [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) { 751 IsMatching = IsMatching || (PropSrcArgs.contains(I) && 752 isTaintedOrPointsToTainted(E, State, C)); 753 }); 754 755 if (!IsMatching) 756 return; 757 758 const auto WouldEscape = [](SVal V, QualType Ty) -> bool { 759 if (!V.getAs<Loc>()) 760 return false; 761 762 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); 763 const bool IsNonConstPtr = 764 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); 765 766 return IsNonConstRef || IsNonConstPtr; 767 }; 768 769 /// Propagate taint where it is necessary. 770 ForEachCallArg( 771 [this, &State, WouldEscape](ArgIdxTy I, const Expr *E, SVal V) { 772 if (PropDstArgs.contains(I)) 773 State = State->add<TaintArgsOnPostVisit>(I); 774 775 // TODO: We should traverse all reachable memory regions via the 776 // escaping parameter. Instead of doing that we simply mark only the 777 // referred memory region as tainted. 778 if (WouldEscape(V, E->getType())) 779 State = State->add<TaintArgsOnPostVisit>(I); 780 }); 781 782 C.addTransition(State); 783 } 784 785 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { 786 return !C.getAnalysisManager() 787 .getAnalyzerOptions() 788 .ShouldAssumeControlledEnvironment; 789 } 790 791 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 792 CheckerContext &C) const { 793 assert(E); 794 Optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))}; 795 796 if (!TaintedSVal) 797 return false; 798 799 // Generate diagnostic. 800 if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 801 auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); 802 report->addRange(E->getSourceRange()); 803 report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal)); 804 C.emitReport(std::move(report)); 805 return true; 806 } 807 return false; 808 } 809 810 /// TODO: remove checking for printf format attributes and socket whitelisting 811 /// from GenericTaintChecker, and that means the following functions: 812 /// getPrintfFormatArgumentNum, 813 /// GenericTaintChecker::checkUncontrolledFormatString, 814 /// GenericTaintChecker::taintUnsafeSocketProtocol 815 816 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 817 const CheckerContext &C, 818 ArgIdxTy &ArgNum) { 819 // Find if the function contains a format string argument. 820 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 821 // vsnprintf, syslog, custom annotated functions. 822 const Decl *CallDecl = Call.getDecl(); 823 if (!CallDecl) 824 return false; 825 const FunctionDecl *FDecl = CallDecl->getAsFunction(); 826 if (!FDecl) 827 return false; 828 829 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 830 831 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 832 ArgNum = Format->getFormatIdx() - 1; 833 if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum) 834 return true; 835 } 836 837 return false; 838 } 839 840 bool GenericTaintChecker::checkUncontrolledFormatString( 841 const CallEvent &Call, CheckerContext &C) const { 842 // Check if the function contains a format string argument. 843 ArgIdxTy ArgNum = 0; 844 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 845 return false; 846 847 // If either the format string content or the pointer itself are tainted, 848 // warn. 849 return generateReportIfTainted(Call.getArgExpr(ArgNum), 850 MsgUncontrolledFormatString, C); 851 } 852 853 void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, 854 CheckerContext &C) const { 855 if (Call.getNumArgs() < 1) 856 return; 857 const IdentifierInfo *ID = Call.getCalleeIdentifier(); 858 if (!ID) 859 return; 860 if (!ID->getName().equals("socket")) 861 return; 862 863 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 864 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 865 // Allow internal communication protocols. 866 bool SafeProtocol = DomName.equals("AF_SYSTEM") || 867 DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") || 868 DomName.equals("AF_RESERVED_36"); 869 if (SafeProtocol) 870 return; 871 872 C.addTransition(C.getState()->add<TaintArgsOnPostVisit>(ReturnValueIndex)); 873 } 874 875 /// Checker registration 876 877 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 878 Mgr.registerChecker<GenericTaintChecker>(); 879 } 880 881 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 882 return true; 883 } 884