1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker defines the attack surface for generic taint propagation. 10 // 11 // The taint information produced by it might be useful to other checkers. For 12 // example, checkers should report errors which involve tainted data more 13 // aggressively, even if the involved symbols are under constrained. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Yaml.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/Basic/Builtins.h" 20 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 21 #include "clang/StaticAnalyzer/Checkers/Taint.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/Checker.h" 24 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringRef.h" 31 #include "llvm/Support/YAMLTraits.h" 32 33 #include <limits> 34 #include <memory> 35 #include <optional> 36 #include <utility> 37 #include <vector> 38 39 #define DEBUG_TYPE "taint-checker" 40 41 using namespace clang; 42 using namespace ento; 43 using namespace taint; 44 45 using llvm::ImmutableSet; 46 47 namespace { 48 49 class GenericTaintChecker; 50 51 /// Check for CWE-134: Uncontrolled Format String. 52 constexpr llvm::StringLiteral MsgUncontrolledFormatString = 53 "Untrusted data is used as a format string " 54 "(CWE-134: Uncontrolled Format String)"; 55 56 /// Check for: 57 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 58 /// CWE-78, "Failure to Sanitize Data into an OS Command" 59 constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 60 "Untrusted data is passed to a system call " 61 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 62 63 /// Check if tainted data is used as a custom sink's parameter. 64 constexpr llvm::StringLiteral MsgCustomSink = 65 "Untrusted data is passed to a user-defined sink"; 66 67 using ArgIdxTy = int; 68 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; 69 70 /// Denotes the return value. 71 constexpr ArgIdxTy ReturnValueIndex{-1}; 72 73 static ArgIdxTy fromArgumentCount(unsigned Count) { 74 assert(Count <= 75 static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && 76 "ArgIdxTy is not large enough to represent the number of arguments."); 77 return Count; 78 } 79 80 /// Check if the region the expression evaluates to is the standard input, 81 /// and thus, is tainted. 82 /// FIXME: Move this to Taint.cpp. 83 bool isStdin(SVal Val, const ASTContext &ACtx) { 84 // FIXME: What if Val is NonParamVarRegion? 85 86 // The region should be symbolic, we do not know it's value. 87 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion()); 88 if (!SymReg) 89 return false; 90 91 // Get it's symbol and find the declaration region it's pointing to. 92 const auto *DeclReg = 93 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion()); 94 if (!DeclReg) 95 return false; 96 97 // This region corresponds to a declaration, find out if it's a global/extern 98 // variable named stdin with the proper type. 99 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 100 D = D->getCanonicalDecl(); 101 if (D->getName() == "stdin" && D->hasExternalStorage() && D->isExternC()) { 102 const QualType FILETy = ACtx.getFILEType().getCanonicalType(); 103 const QualType Ty = D->getType().getCanonicalType(); 104 105 if (Ty->isPointerType()) 106 return Ty->getPointeeType() == FILETy; 107 } 108 } 109 return false; 110 } 111 112 SVal getPointeeOf(ProgramStateRef State, Loc LValue) { 113 const QualType ArgTy = LValue.getType(State->getStateManager().getContext()); 114 if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) 115 return State->getSVal(LValue); 116 117 // Do not dereference void pointers. Treat them as byte pointers instead. 118 // FIXME: we might want to consider more than just the first byte. 119 return State->getSVal(LValue, State->getStateManager().getContext().CharTy); 120 } 121 122 /// Given a pointer/reference argument, return the value it refers to. 123 std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) { 124 if (auto LValue = Arg.getAs<Loc>()) 125 return getPointeeOf(State, *LValue); 126 return std::nullopt; 127 } 128 129 /// Given a pointer, return the SVal of its pointee or if it is tainted, 130 /// otherwise return the pointer's SVal if tainted. 131 /// Also considers stdin as a taint source. 132 std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State, 133 SVal Arg) { 134 if (auto Pointee = getPointeeOf(State, Arg)) 135 if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None; 136 return Pointee; 137 138 if (isTainted(State, Arg)) 139 return Arg; 140 return std::nullopt; 141 } 142 143 bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) { 144 return getTaintedPointeeOrPointer(State, ExprSVal).has_value(); 145 } 146 147 /// Helps in printing taint diagnostics. 148 /// Marks the incoming parameters of a function interesting (to be printed) 149 /// when the return value, or the outgoing parameters are tainted. 150 const NoteTag *taintOriginTrackerTag(CheckerContext &C, 151 std::vector<SymbolRef> TaintedSymbols, 152 std::vector<ArgIdxTy> TaintedArgs, 153 const LocationContext *CallLocation) { 154 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols), 155 TaintedArgs = std::move(TaintedArgs), CallLocation]( 156 PathSensitiveBugReport &BR) -> std::string { 157 SmallString<256> Msg; 158 // We give diagnostics only for taint related reports 159 if (!BR.isInteresting(CallLocation) || 160 BR.getBugType().getCategory() != categories::TaintedData) { 161 return ""; 162 } 163 if (TaintedSymbols.empty()) 164 return "Taint originated here"; 165 166 for (auto Sym : TaintedSymbols) { 167 BR.markInteresting(Sym); 168 } 169 LLVM_DEBUG(for (auto Arg 170 : TaintedArgs) { 171 llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n"; 172 }); 173 return ""; 174 }); 175 } 176 177 /// Helps in printing taint diagnostics. 178 /// Marks the function interesting (to be printed) 179 /// when the return value, or the outgoing parameters are tainted. 180 const NoteTag *taintPropagationExplainerTag( 181 CheckerContext &C, std::vector<SymbolRef> TaintedSymbols, 182 std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) { 183 assert(TaintedSymbols.size() == TaintedArgs.size()); 184 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols), 185 TaintedArgs = std::move(TaintedArgs), CallLocation]( 186 PathSensitiveBugReport &BR) -> std::string { 187 SmallString<256> Msg; 188 llvm::raw_svector_ostream Out(Msg); 189 // We give diagnostics only for taint related reports 190 if (TaintedSymbols.empty() || 191 BR.getBugType().getCategory() != categories::TaintedData) { 192 return ""; 193 } 194 int nofTaintedArgs = 0; 195 for (auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) { 196 if (BR.isInteresting(Sym)) { 197 BR.markInteresting(CallLocation); 198 if (TaintedArgs[Idx] != ReturnValueIndex) { 199 LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " 200 << TaintedArgs[Idx] + 1 << "\n"); 201 if (nofTaintedArgs == 0) 202 Out << "Taint propagated to the "; 203 else 204 Out << ", "; 205 Out << TaintedArgs[Idx] + 1 206 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) << " argument"; 207 nofTaintedArgs++; 208 } else { 209 LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n"); 210 Out << "Taint propagated to the return value"; 211 } 212 } 213 } 214 return std::string(Out.str()); 215 }); 216 } 217 218 /// ArgSet is used to describe arguments relevant for taint detection or 219 /// taint application. A discrete set of argument indexes and a variadic 220 /// argument list signified by a starting index are supported. 221 class ArgSet { 222 public: 223 ArgSet() = default; 224 ArgSet(ArgVecTy &&DiscreteArgs, 225 std::optional<ArgIdxTy> VariadicIndex = std::nullopt) 226 : DiscreteArgs(std::move(DiscreteArgs)), 227 VariadicIndex(std::move(VariadicIndex)) {} 228 229 bool contains(ArgIdxTy ArgIdx) const { 230 if (llvm::is_contained(DiscreteArgs, ArgIdx)) 231 return true; 232 233 return VariadicIndex && ArgIdx >= *VariadicIndex; 234 } 235 236 bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } 237 238 private: 239 ArgVecTy DiscreteArgs; 240 std::optional<ArgIdxTy> VariadicIndex; 241 }; 242 243 /// A struct used to specify taint propagation rules for a function. 244 /// 245 /// If any of the possible taint source arguments is tainted, all of the 246 /// destination arguments should also be tainted. If ReturnValueIndex is added 247 /// to the dst list, the return value will be tainted. 248 class GenericTaintRule { 249 /// Arguments which are taints sinks and should be checked, and a report 250 /// should be emitted if taint reaches these. 251 ArgSet SinkArgs; 252 /// Arguments which should be sanitized on function return. 253 ArgSet FilterArgs; 254 /// Arguments which can participate in taint propagation. If any of the 255 /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should 256 /// be tainted. 257 ArgSet PropSrcArgs; 258 ArgSet PropDstArgs; 259 260 /// A message that explains why the call is sensitive to taint. 261 std::optional<StringRef> SinkMsg; 262 263 GenericTaintRule() = default; 264 265 GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, 266 std::optional<StringRef> SinkMsg = std::nullopt) 267 : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), 268 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), 269 SinkMsg(SinkMsg) {} 270 271 public: 272 /// Make a rule that reports a warning if taint reaches any of \p FilterArgs 273 /// arguments. 274 static GenericTaintRule Sink(ArgSet &&SinkArgs, 275 std::optional<StringRef> Msg = std::nullopt) { 276 return {std::move(SinkArgs), {}, {}, {}, Msg}; 277 } 278 279 /// Make a rule that sanitizes all FilterArgs arguments. 280 static GenericTaintRule Filter(ArgSet &&FilterArgs) { 281 return {{}, std::move(FilterArgs), {}, {}}; 282 } 283 284 /// Make a rule that unconditionally taints all Args. 285 /// If Func is provided, it must also return true for taint to propagate. 286 static GenericTaintRule Source(ArgSet &&SourceArgs) { 287 return {{}, {}, {}, std::move(SourceArgs)}; 288 } 289 290 /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. 291 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { 292 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; 293 } 294 295 /// Process a function which could either be a taint source, a taint sink, a 296 /// taint filter or a taint propagator. 297 void process(const GenericTaintChecker &Checker, const CallEvent &Call, 298 CheckerContext &C) const; 299 300 /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. 301 static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { 302 return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() 303 : Call.getArgExpr(ArgIdx); 304 }; 305 306 /// Functions for custom taintedness propagation. 307 static bool UntrustedEnv(CheckerContext &C); 308 }; 309 310 using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; 311 312 /// Used to parse the configuration file. 313 struct TaintConfiguration { 314 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; 315 enum class VariadicType { None, Src, Dst }; 316 317 struct Common { 318 std::string Name; 319 std::string Scope; 320 }; 321 322 struct Sink : Common { 323 ArgVecTy SinkArgs; 324 }; 325 326 struct Filter : Common { 327 ArgVecTy FilterArgs; 328 }; 329 330 struct Propagation : Common { 331 ArgVecTy SrcArgs; 332 ArgVecTy DstArgs; 333 VariadicType VarType; 334 ArgIdxTy VarIndex; 335 }; 336 337 std::vector<Propagation> Propagations; 338 std::vector<Filter> Filters; 339 std::vector<Sink> Sinks; 340 341 TaintConfiguration() = default; 342 TaintConfiguration(const TaintConfiguration &) = default; 343 TaintConfiguration(TaintConfiguration &&) = default; 344 TaintConfiguration &operator=(const TaintConfiguration &) = default; 345 TaintConfiguration &operator=(TaintConfiguration &&) = default; 346 }; 347 348 struct GenericTaintRuleParser { 349 GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} 350 /// Container type used to gather call identification objects grouped into 351 /// pairs with their corresponding taint rules. It is temporary as it is used 352 /// to finally initialize RuleLookupTy, which is considered to be immutable. 353 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; 354 RulesContTy parseConfiguration(const std::string &Option, 355 TaintConfiguration &&Config) const; 356 357 private: 358 using NamePartsTy = llvm::SmallVector<StringRef, 2>; 359 360 /// Validate part of the configuration, which contains a list of argument 361 /// indexes. 362 void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; 363 364 template <typename Config> static NamePartsTy parseNameParts(const Config &C); 365 366 // Takes the config and creates a CallDescription for it and associates a Rule 367 // with that. 368 template <typename Config> 369 static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, 370 RulesContTy &Rules); 371 372 void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, 373 RulesContTy &Rules) const; 374 void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, 375 RulesContTy &Rules) const; 376 void parseConfig(const std::string &Option, 377 TaintConfiguration::Propagation &&P, 378 RulesContTy &Rules) const; 379 380 CheckerManager &Mgr; 381 }; 382 383 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 384 public: 385 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 386 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 387 388 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 389 const char *Sep) const override; 390 391 /// Generate a report if the expression is tainted or points to tainted data. 392 bool generateReportIfTainted(const Expr *E, StringRef Msg, 393 CheckerContext &C) const; 394 395 bool isTaintReporterCheckerEnabled = false; 396 std::optional<BugType> BT; 397 398 private: 399 bool checkUncontrolledFormatString(const CallEvent &Call, 400 CheckerContext &C) const; 401 402 void taintUnsafeSocketProtocol(const CallEvent &Call, 403 CheckerContext &C) const; 404 405 /// The taint rules are initalized with the help of a CheckerContext to 406 /// access user-provided configuration. 407 void initTaintRules(CheckerContext &C) const; 408 409 // TODO: The two separate `CallDescriptionMap`s were introduced when 410 // `CallDescription` was unable to restrict matches to the global namespace 411 // only. This limitation no longer exists, so the following two maps should 412 // be unified. 413 mutable std::optional<RuleLookupTy> StaticTaintRules; 414 mutable std::optional<RuleLookupTy> DynamicTaintRules; 415 }; 416 } // end of anonymous namespace 417 418 /// YAML serialization mapping. 419 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) 420 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) 421 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) 422 423 namespace llvm { 424 namespace yaml { 425 template <> struct MappingTraits<TaintConfiguration> { 426 static void mapping(IO &IO, TaintConfiguration &Config) { 427 IO.mapOptional("Propagations", Config.Propagations); 428 IO.mapOptional("Filters", Config.Filters); 429 IO.mapOptional("Sinks", Config.Sinks); 430 } 431 }; 432 433 template <> struct MappingTraits<TaintConfiguration::Sink> { 434 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { 435 IO.mapRequired("Name", Sink.Name); 436 IO.mapOptional("Scope", Sink.Scope); 437 IO.mapRequired("Args", Sink.SinkArgs); 438 } 439 }; 440 441 template <> struct MappingTraits<TaintConfiguration::Filter> { 442 static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { 443 IO.mapRequired("Name", Filter.Name); 444 IO.mapOptional("Scope", Filter.Scope); 445 IO.mapRequired("Args", Filter.FilterArgs); 446 } 447 }; 448 449 template <> struct MappingTraits<TaintConfiguration::Propagation> { 450 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { 451 IO.mapRequired("Name", Propagation.Name); 452 IO.mapOptional("Scope", Propagation.Scope); 453 IO.mapOptional("SrcArgs", Propagation.SrcArgs); 454 IO.mapOptional("DstArgs", Propagation.DstArgs); 455 IO.mapOptional("VariadicType", Propagation.VarType); 456 IO.mapOptional("VariadicIndex", Propagation.VarIndex); 457 } 458 }; 459 460 template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { 461 static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { 462 IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None); 463 IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src); 464 IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst); 465 } 466 }; 467 } // namespace yaml 468 } // namespace llvm 469 470 /// A set which is used to pass information from call pre-visit instruction 471 /// to the call post-visit. The values are signed integers, which are either 472 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 473 /// points to data, which should be tainted on return. 474 REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, 475 ImmutableSet<ArgIdxTy>) 476 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) 477 478 void GenericTaintRuleParser::validateArgVector(const std::string &Option, 479 const ArgVecTy &Args) const { 480 for (ArgIdxTy Arg : Args) { 481 if (Arg < ReturnValueIndex) { 482 Mgr.reportInvalidCheckerOptionValue( 483 Mgr.getChecker<GenericTaintChecker>(), Option, 484 "an argument number for propagation rules greater or equal to -1"); 485 } 486 } 487 } 488 489 template <typename Config> 490 GenericTaintRuleParser::NamePartsTy 491 GenericTaintRuleParser::parseNameParts(const Config &C) { 492 NamePartsTy NameParts; 493 if (!C.Scope.empty()) { 494 // If the Scope argument contains multiple "::" parts, those are considered 495 // namespace identifiers. 496 StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1, 497 /*KeepEmpty*/ false); 498 } 499 NameParts.emplace_back(C.Name); 500 return NameParts; 501 } 502 503 template <typename Config> 504 void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, 505 GenericTaintRule &&Rule, 506 RulesContTy &Rules) { 507 NamePartsTy NameParts = parseNameParts(C); 508 Rules.emplace_back(CallDescription(CDM::Unspecified, NameParts), 509 std::move(Rule)); 510 } 511 512 void GenericTaintRuleParser::parseConfig(const std::string &Option, 513 TaintConfiguration::Sink &&S, 514 RulesContTy &Rules) const { 515 validateArgVector(Option, S.SinkArgs); 516 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)), 517 Rules); 518 } 519 520 void GenericTaintRuleParser::parseConfig(const std::string &Option, 521 TaintConfiguration::Filter &&S, 522 RulesContTy &Rules) const { 523 validateArgVector(Option, S.FilterArgs); 524 consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)), 525 Rules); 526 } 527 528 void GenericTaintRuleParser::parseConfig(const std::string &Option, 529 TaintConfiguration::Propagation &&P, 530 RulesContTy &Rules) const { 531 validateArgVector(Option, P.SrcArgs); 532 validateArgVector(Option, P.DstArgs); 533 bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; 534 bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; 535 std::optional<ArgIdxTy> JustVarIndex = P.VarIndex; 536 537 ArgSet SrcDesc(std::move(P.SrcArgs), 538 IsSrcVariadic ? JustVarIndex : std::nullopt); 539 ArgSet DstDesc(std::move(P.DstArgs), 540 IsDstVariadic ? JustVarIndex : std::nullopt); 541 542 consumeRulesFromConfig( 543 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); 544 } 545 546 GenericTaintRuleParser::RulesContTy 547 GenericTaintRuleParser::parseConfiguration(const std::string &Option, 548 TaintConfiguration &&Config) const { 549 550 RulesContTy Rules; 551 552 for (auto &F : Config.Filters) 553 parseConfig(Option, std::move(F), Rules); 554 555 for (auto &S : Config.Sinks) 556 parseConfig(Option, std::move(S), Rules); 557 558 for (auto &P : Config.Propagations) 559 parseConfig(Option, std::move(P), Rules); 560 561 return Rules; 562 } 563 564 void GenericTaintChecker::initTaintRules(CheckerContext &C) const { 565 // Check for exact name match for functions without builtin substitutes. 566 // Use qualified name, because these are C functions without namespace. 567 568 if (StaticTaintRules || DynamicTaintRules) 569 return; 570 571 using RulesConstructionTy = 572 std::vector<std::pair<CallDescription, GenericTaintRule>>; 573 using TR = GenericTaintRule; 574 575 RulesConstructionTy GlobalCRules{ 576 // Sources 577 {{CDM::CLibrary, {"fdopen"}}, TR::Source({{ReturnValueIndex}})}, 578 {{CDM::CLibrary, {"fopen"}}, TR::Source({{ReturnValueIndex}})}, 579 {{CDM::CLibrary, {"freopen"}}, TR::Source({{ReturnValueIndex}})}, 580 {{CDM::CLibrary, {"getch"}}, TR::Source({{ReturnValueIndex}})}, 581 {{CDM::CLibrary, {"getchar"}}, TR::Source({{ReturnValueIndex}})}, 582 {{CDM::CLibrary, {"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, 583 {{CDM::CLibrary, {"gets"}}, TR::Source({{0, ReturnValueIndex}})}, 584 {{CDM::CLibrary, {"gets_s"}}, TR::Source({{0, ReturnValueIndex}})}, 585 {{CDM::CLibrary, {"scanf"}}, TR::Source({{}, 1})}, 586 {{CDM::CLibrary, {"scanf_s"}}, TR::Source({{}, 1})}, 587 {{CDM::CLibrary, {"wgetch"}}, TR::Source({{ReturnValueIndex}})}, 588 // Sometimes the line between taint sources and propagators is blurry. 589 // _IO_getc is choosen to be a source, but could also be a propagator. 590 // This way it is simpler, as modeling it as a propagator would require 591 // to model the possible sources of _IO_FILE * values, which the _IO_getc 592 // function takes as parameters. 593 {{CDM::CLibrary, {"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, 594 {{CDM::CLibrary, {"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, 595 {{CDM::CLibrary, {"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, 596 {{CDM::CLibrary, {"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, 597 {{CDM::CLibrary, {"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, 598 {{CDM::CLibrary, {"get_current_dir_name"}}, 599 TR::Source({{ReturnValueIndex}})}, 600 {{CDM::CLibrary, {"gethostname"}}, TR::Source({{0}})}, 601 {{CDM::CLibrary, {"getnameinfo"}}, TR::Source({{2, 4}})}, 602 {{CDM::CLibrary, {"getseuserbyname"}}, TR::Source({{1, 2}})}, 603 {{CDM::CLibrary, {"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, 604 {{CDM::CLibrary, {"getlogin"}}, TR::Source({{ReturnValueIndex}})}, 605 {{CDM::CLibrary, {"getlogin_r"}}, TR::Source({{0}})}, 606 607 // Props 608 {{CDM::CLibrary, {"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 609 {{CDM::CLibrary, {"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 610 {{CDM::CLibrary, {"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 611 {{CDM::CLibrary, {"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 612 {{CDM::CLibrary, {"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 613 {{CDM::CLibrary, {"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 614 {{CDM::CLibraryMaybeHardened, {"fgets"}}, 615 TR::Prop({{2}}, {{0, ReturnValueIndex}})}, 616 {{CDM::CLibraryMaybeHardened, {"fgetws"}}, 617 TR::Prop({{2}}, {{0, ReturnValueIndex}})}, 618 {{CDM::CLibrary, {"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, 619 {{CDM::CLibrary, {"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, 620 {{CDM::CLibrary, {"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, 621 {{CDM::CLibrary, {"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, 622 623 {{CDM::CLibrary, {"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 624 {{CDM::CLibrary, {"getc_unlocked"}}, 625 TR::Prop({{0}}, {{ReturnValueIndex}})}, 626 {{CDM::CLibrary, {"getdelim"}}, TR::Prop({{3}}, {{0}})}, 627 // TODO: this intends to match the C function `getline()`, but the call 628 // description also matches the C++ function `std::getline()`; it should 629 // be ruled out by some additional logic. 630 {{CDM::CLibrary, {"getline"}}, TR::Prop({{2}}, {{0}})}, 631 {{CDM::CLibrary, {"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 632 {{CDM::CLibraryMaybeHardened, {"pread"}}, 633 TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, 634 {{CDM::CLibraryMaybeHardened, {"read"}}, 635 TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, 636 {{CDM::CLibraryMaybeHardened, {"fread"}}, 637 TR::Prop({{3}}, {{0, ReturnValueIndex}})}, 638 {{CDM::CLibraryMaybeHardened, {"recv"}}, 639 TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 640 {{CDM::CLibraryMaybeHardened, {"recvfrom"}}, 641 TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 642 643 {{CDM::CLibrary, {"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 644 {{CDM::CLibrary, {"ttyname_r"}}, 645 TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 646 647 {{CDM::CLibrary, {"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 648 {{CDM::CLibrary, {"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 649 {{CDM::CLibrary, {"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, 650 651 {{CDM::CLibrary, {"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 652 {{CDM::CLibrary, {"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 653 {{CDM::CLibrary, {"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 654 655 {{CDM::CLibrary, {"memcmp"}}, 656 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, 657 {{CDM::CLibraryMaybeHardened, {"memcpy"}}, 658 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, 659 {{CDM::CLibraryMaybeHardened, {"memmove"}}, 660 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, 661 {{CDM::CLibraryMaybeHardened, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})}, 662 663 // Note: "memmem" and its variants search for a byte sequence ("needle") 664 // in a larger area ("haystack"). Currently we only propagate taint from 665 // the haystack to the result, but in theory tampering with the needle 666 // could also produce incorrect results. 667 {{CDM::CLibrary, {"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 668 {{CDM::CLibrary, {"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 669 {{CDM::CLibrary, {"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 670 671 // Analogously, the following functions search for a byte within a buffer 672 // and we only propagate taint from the buffer to the result. 673 {{CDM::CLibraryMaybeHardened, {"memchr"}}, 674 TR::Prop({{0}}, {{ReturnValueIndex}})}, 675 {{CDM::CLibraryMaybeHardened, {"memrchr"}}, 676 TR::Prop({{0}}, {{ReturnValueIndex}})}, 677 {{CDM::CLibrary, {"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 678 {{CDM::CLibraryMaybeHardened, {"strchr"}}, 679 TR::Prop({{0}}, {{ReturnValueIndex}})}, 680 {{CDM::CLibraryMaybeHardened, {"strrchr"}}, 681 TR::Prop({{0}}, {{ReturnValueIndex}})}, 682 {{CDM::CLibraryMaybeHardened, {"strchrnul"}}, 683 TR::Prop({{0}}, {{ReturnValueIndex}})}, 684 {{CDM::CLibrary, {"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 685 {{CDM::CLibrary, {"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 686 687 // FIXME: In case of arrays, only the first element of the array gets 688 // tainted. 689 {{CDM::CLibrary, {"qsort"}}, TR::Prop({{0}}, {{0}})}, 690 {{CDM::CLibrary, {"qsort_r"}}, TR::Prop({{0}}, {{0}})}, 691 692 {{CDM::CLibrary, {"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 693 {{CDM::CLibrary, {"strcasecmp"}}, 694 TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 695 {{CDM::CLibrary, {"strncmp"}}, 696 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, 697 {{CDM::CLibrary, {"strncasecmp"}}, 698 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, 699 {{CDM::CLibrary, {"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 700 {{CDM::CLibrary, {"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 701 {{CDM::CLibrary, {"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 702 703 {{CDM::CLibrary, {"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 704 {{CDM::CLibrary, {"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, 705 {{CDM::CLibrary, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 706 {{CDM::CLibrary, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 707 {{CDM::CLibrary, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 708 709 // strlen, wcslen, strnlen and alike intentionally don't propagate taint. 710 // See the details here: https://github.com/llvm/llvm-project/pull/66086 711 712 {{CDM::CLibrary, {"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 713 {{CDM::CLibrary, {"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 714 {{CDM::CLibrary, {"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 715 {{CDM::CLibrary, {"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, 716 717 {{CDM::CLibrary, {"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 718 {{CDM::CLibrary, {"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 719 720 {{CDM::CLibrary, {"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 721 {{CDM::CLibrary, {"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 722 {{CDM::CLibrary, {"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 723 {{CDM::CLibrary, {"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 724 {{CDM::CLibrary, {"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 725 {{CDM::CLibrary, {"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 726 {{CDM::CLibrary, {"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 727 {{CDM::CLibrary, {"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 728 {{CDM::CLibrary, {"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 729 {{CDM::CLibrary, {"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 730 {{CDM::CLibrary, {"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 731 {{CDM::CLibrary, {"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 732 {{CDM::CLibrary, {"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, 733 734 {{CDM::CLibraryMaybeHardened, {"strcpy"}}, 735 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 736 {{CDM::CLibraryMaybeHardened, {"stpcpy"}}, 737 TR::Prop({{1}}, {{0, ReturnValueIndex}})}, 738 {{CDM::CLibraryMaybeHardened, {"strcat"}}, 739 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, 740 {{CDM::CLibraryMaybeHardened, {"wcsncat"}}, 741 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, 742 {{CDM::CLibraryMaybeHardened, {"strncpy"}}, 743 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, 744 {{CDM::CLibraryMaybeHardened, {"strncat"}}, 745 TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})}, 746 {{CDM::CLibraryMaybeHardened, {"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})}, 747 {{CDM::CLibraryMaybeHardened, {"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})}, 748 749 // Usually the matching mode `CDM::CLibraryMaybeHardened` is sufficient 750 // for unified handling of a function `FOO()` and its hardened variant 751 // `__FOO_chk()`, but in the "sprintf" family the extra parameters of the 752 // hardened variants are inserted into the middle of the parameter list, 753 // so that would not work in their case. 754 // int snprintf(char * str, size_t maxlen, const char * format, ...); 755 {{CDM::CLibrary, {"snprintf"}}, 756 TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})}, 757 // int sprintf(char * str, const char * format, ...); 758 {{CDM::CLibrary, {"sprintf"}}, 759 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, 760 // int __snprintf_chk(char * str, size_t maxlen, int flag, size_t strlen, 761 // const char * format, ...); 762 {{CDM::CLibrary, {"__snprintf_chk"}}, 763 TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})}, 764 // int __sprintf_chk(char * str, int flag, size_t strlen, const char * 765 // format, ...); 766 {{CDM::CLibrary, {"__sprintf_chk"}}, 767 TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})}, 768 769 // Sinks 770 {{CDM::CLibrary, {"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 771 {{CDM::CLibrary, {"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 772 {{CDM::CLibrary, {"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, 773 {{CDM::CLibrary, {"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, 774 {{CDM::CLibrary, {"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, 775 {{CDM::CLibrary, {"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, 776 {{CDM::CLibrary, {"execve"}}, 777 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, 778 {{CDM::CLibrary, {"fexecve"}}, 779 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, 780 {{CDM::CLibrary, {"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, 781 {{CDM::CLibrary, {"execvpe"}}, 782 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, 783 {{CDM::CLibrary, {"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, 784 785 // malloc, calloc, alloca, realloc, memccpy 786 // are intentionally not marked as taint sinks because unconditional 787 // reporting for these functions generates many false positives. 788 // These taint sinks should be implemented in other checkers with more 789 // sophisticated sanitation heuristics. 790 791 {{CDM::CLibrary, {"setproctitle"}}, 792 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, 793 {{CDM::CLibrary, {"setproctitle_fast"}}, 794 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}}; 795 796 if (TR::UntrustedEnv(C)) { 797 // void setproctitle_init(int argc, char *argv[], char *envp[]) 798 // TODO: replace `MsgCustomSink` with a message that fits this situation. 799 GlobalCRules.push_back({{CDM::CLibrary, {"setproctitle_init"}}, 800 TR::Sink({{1, 2}}, MsgCustomSink)}); 801 802 // `getenv` returns taint only in untrusted environments. 803 GlobalCRules.push_back( 804 {{CDM::CLibrary, {"getenv"}}, TR::Source({{ReturnValueIndex}})}); 805 } 806 807 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()), 808 std::make_move_iterator(GlobalCRules.end())); 809 810 // User-provided taint configuration. 811 CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); 812 assert(Mgr); 813 GenericTaintRuleParser ConfigParser{*Mgr}; 814 std::string Option{"Config"}; 815 StringRef ConfigFile = 816 Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option); 817 std::optional<TaintConfiguration> Config = 818 getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile); 819 if (!Config) { 820 // We don't have external taint config, no parsing required. 821 DynamicTaintRules = RuleLookupTy{}; 822 return; 823 } 824 825 GenericTaintRuleParser::RulesContTy Rules{ 826 ConfigParser.parseConfiguration(Option, std::move(*Config))}; 827 828 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()), 829 std::make_move_iterator(Rules.end())); 830 } 831 832 void GenericTaintChecker::checkPreCall(const CallEvent &Call, 833 CheckerContext &C) const { 834 initTaintRules(C); 835 836 // FIXME: this should be much simpler. 837 if (const auto *Rule = 838 Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) 839 Rule->process(*this, Call, C); 840 else if (const auto *Rule = DynamicTaintRules->lookup(Call)) 841 Rule->process(*this, Call, C); 842 843 // FIXME: These edge cases are to be eliminated from here eventually. 844 // 845 // Additional check that is not supported by CallDescription. 846 // TODO: Make CallDescription be able to match attributes such as printf-like 847 // arguments. 848 checkUncontrolledFormatString(Call, C); 849 850 // TODO: Modeling sockets should be done in a specific checker. 851 // Socket is a source, which taints the return value. 852 taintUnsafeSocketProtocol(Call, C); 853 } 854 855 void GenericTaintChecker::checkPostCall(const CallEvent &Call, 856 CheckerContext &C) const { 857 // Set the marked values as tainted. The return value only accessible from 858 // checkPostStmt. 859 ProgramStateRef State = C.getState(); 860 const StackFrameContext *CurrentFrame = C.getStackFrame(); 861 862 // Depending on what was tainted at pre-visit, we determined a set of 863 // arguments which should be tainted after the function returns. These are 864 // stored in the state as TaintArgsOnPostVisit set. 865 TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); 866 867 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame); 868 if (!TaintArgs) 869 return; 870 assert(!TaintArgs->isEmpty()); 871 872 LLVM_DEBUG(for (ArgIdxTy I 873 : *TaintArgs) { 874 llvm::dbgs() << "PostCall<"; 875 Call.dump(llvm::dbgs()); 876 llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; 877 }); 878 879 const NoteTag *InjectionTag = nullptr; 880 std::vector<SymbolRef> TaintedSymbols; 881 std::vector<ArgIdxTy> TaintedIndexes; 882 for (ArgIdxTy ArgNum : *TaintArgs) { 883 // Special handling for the tainted return value. 884 if (ArgNum == ReturnValueIndex) { 885 State = addTaint(State, Call.getReturnValue()); 886 std::vector<SymbolRef> TaintedSyms = 887 getTaintedSymbols(State, Call.getReturnValue()); 888 if (!TaintedSyms.empty()) { 889 TaintedSymbols.push_back(TaintedSyms[0]); 890 TaintedIndexes.push_back(ArgNum); 891 } 892 continue; 893 } 894 // The arguments are pointer arguments. The data they are pointing at is 895 // tainted after the call. 896 if (auto V = getPointeeOf(State, Call.getArgSVal(ArgNum))) { 897 State = addTaint(State, *V); 898 std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, *V); 899 if (!TaintedSyms.empty()) { 900 TaintedSymbols.push_back(TaintedSyms[0]); 901 TaintedIndexes.push_back(ArgNum); 902 } 903 } 904 } 905 // Create a NoteTag callback, which prints to the user where the taintedness 906 // was propagated to. 907 InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes, 908 Call.getCalleeStackFrame(0)); 909 // Clear up the taint info from the state. 910 State = State->remove<TaintArgsOnPostVisit>(CurrentFrame); 911 C.addTransition(State, InjectionTag); 912 } 913 914 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 915 const char *NL, const char *Sep) const { 916 printTaint(State, Out, NL, Sep); 917 } 918 919 void GenericTaintRule::process(const GenericTaintChecker &Checker, 920 const CallEvent &Call, CheckerContext &C) const { 921 ProgramStateRef State = C.getState(); 922 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 923 924 /// Iterate every call argument, and get their corresponding Expr and SVal. 925 const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { 926 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { 927 const Expr *E = GetArgExpr(I, Call); 928 Fun(I, E, C.getSVal(E)); 929 } 930 }; 931 932 /// Check for taint sinks. 933 ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { 934 // Add taintedness to stdin parameters 935 if (isStdin(C.getSVal(E), C.getASTContext())) { 936 State = addTaint(State, C.getSVal(E)); 937 } 938 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State, C.getSVal(E))) 939 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C); 940 }); 941 942 /// Check for taint filters. 943 ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) { 944 if (FilterArgs.contains(I)) { 945 State = removeTaint(State, S); 946 if (auto P = getPointeeOf(State, S)) 947 State = removeTaint(State, *P); 948 } 949 }); 950 951 /// Check for taint propagation sources. 952 /// A rule will make the destination variables tainted if PropSrcArgs 953 /// is empty (taints the destination 954 /// arguments unconditionally), or if any of its signified 955 /// args are tainted in context of the current CallEvent. 956 bool IsMatching = PropSrcArgs.isEmpty(); 957 std::vector<SymbolRef> TaintedSymbols; 958 std::vector<ArgIdxTy> TaintedIndexes; 959 ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, 960 &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { 961 std::optional<SVal> TaintedSVal = 962 getTaintedPointeeOrPointer(State, C.getSVal(E)); 963 IsMatching = 964 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value()); 965 966 // We track back tainted arguments except for stdin 967 if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) { 968 std::vector<SymbolRef> TaintedArgSyms = 969 getTaintedSymbols(State, *TaintedSVal); 970 if (!TaintedArgSyms.empty()) { 971 llvm::append_range(TaintedSymbols, TaintedArgSyms); 972 TaintedIndexes.push_back(I); 973 } 974 } 975 }); 976 977 // Early return for propagation rules which dont match. 978 // Matching propagations, Sinks and Filters will pass this point. 979 if (!IsMatching) 980 return; 981 982 const auto WouldEscape = [](SVal V, QualType Ty) -> bool { 983 if (!isa<Loc>(V)) 984 return false; 985 986 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); 987 const bool IsNonConstPtr = 988 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); 989 990 return IsNonConstRef || IsNonConstPtr; 991 }; 992 993 /// Propagate taint where it is necessary. 994 auto &F = State->getStateManager().get_context<ArgIdxFactory>(); 995 ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); 996 ForEachCallArg( 997 [&](ArgIdxTy I, const Expr *E, SVal V) { 998 if (PropDstArgs.contains(I)) { 999 LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs()); 1000 llvm::dbgs() 1001 << "> prepares tainting arg index: " << I << '\n';); 1002 Result = F.add(Result, I); 1003 } 1004 1005 // Taint property gets lost if the variable is passed as a 1006 // non-const pointer or reference to a function which is 1007 // not inlined. For matching rules we want to preserve the taintedness. 1008 // TODO: We should traverse all reachable memory regions via the 1009 // escaping parameter. Instead of doing that we simply mark only the 1010 // referred memory region as tainted. 1011 if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, V)) { 1012 LLVM_DEBUG(if (!Result.contains(I)) { 1013 llvm::dbgs() << "PreCall<"; 1014 Call.dump(llvm::dbgs()); 1015 llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; 1016 }); 1017 Result = F.add(Result, I); 1018 } 1019 }); 1020 1021 if (!Result.isEmpty()) 1022 State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); 1023 const NoteTag *InjectionTag = taintOriginTrackerTag( 1024 C, std::move(TaintedSymbols), std::move(TaintedIndexes), 1025 Call.getCalleeStackFrame(0)); 1026 C.addTransition(State, InjectionTag); 1027 } 1028 1029 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { 1030 return !C.getAnalysisManager() 1031 .getAnalyzerOptions() 1032 .ShouldAssumeControlledEnvironment; 1033 } 1034 1035 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 1036 CheckerContext &C) const { 1037 assert(E); 1038 if (!isTaintReporterCheckerEnabled) 1039 return false; 1040 std::optional<SVal> TaintedSVal = 1041 getTaintedPointeeOrPointer(C.getState(), C.getSVal(E)); 1042 1043 if (!TaintedSVal) 1044 return false; 1045 1046 // Generate diagnostic. 1047 assert(BT); 1048 static CheckerProgramPointTag Tag(BT->getCheckerName(), Msg); 1049 if (ExplodedNode *N = C.generateNonFatalErrorNode(C.getState(), &Tag)) { 1050 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 1051 report->addRange(E->getSourceRange()); 1052 for (auto TaintedSym : getTaintedSymbols(C.getState(), *TaintedSVal)) { 1053 report->markInteresting(TaintedSym); 1054 } 1055 C.emitReport(std::move(report)); 1056 return true; 1057 } 1058 return false; 1059 } 1060 1061 /// TODO: remove checking for printf format attributes and socket whitelisting 1062 /// from GenericTaintChecker, and that means the following functions: 1063 /// getPrintfFormatArgumentNum, 1064 /// GenericTaintChecker::checkUncontrolledFormatString, 1065 /// GenericTaintChecker::taintUnsafeSocketProtocol 1066 1067 static bool getPrintfFormatArgumentNum(const CallEvent &Call, 1068 const CheckerContext &C, 1069 ArgIdxTy &ArgNum) { 1070 // Find if the function contains a format string argument. 1071 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 1072 // vsnprintf, syslog, custom annotated functions. 1073 const Decl *CallDecl = Call.getDecl(); 1074 if (!CallDecl) 1075 return false; 1076 const FunctionDecl *FDecl = CallDecl->getAsFunction(); 1077 if (!FDecl) 1078 return false; 1079 1080 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); 1081 1082 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 1083 ArgNum = Format->getFormatIdx() - 1; 1084 if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum) 1085 return true; 1086 } 1087 1088 return false; 1089 } 1090 1091 bool GenericTaintChecker::checkUncontrolledFormatString( 1092 const CallEvent &Call, CheckerContext &C) const { 1093 // Check if the function contains a format string argument. 1094 ArgIdxTy ArgNum = 0; 1095 if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 1096 return false; 1097 1098 // If either the format string content or the pointer itself are tainted, 1099 // warn. 1100 return generateReportIfTainted(Call.getArgExpr(ArgNum), 1101 MsgUncontrolledFormatString, C); 1102 } 1103 1104 void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, 1105 CheckerContext &C) const { 1106 if (Call.getNumArgs() < 1) 1107 return; 1108 const IdentifierInfo *ID = Call.getCalleeIdentifier(); 1109 if (!ID) 1110 return; 1111 if (ID->getName() != "socket") 1112 return; 1113 1114 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 1115 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 1116 // Allow internal communication protocols. 1117 bool SafeProtocol = DomName == "AF_SYSTEM" || DomName == "AF_LOCAL" || 1118 DomName == "AF_UNIX" || DomName == "AF_RESERVED_36"; 1119 if (SafeProtocol) 1120 return; 1121 1122 ProgramStateRef State = C.getState(); 1123 auto &F = State->getStateManager().get_context<ArgIdxFactory>(); 1124 ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex); 1125 State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result); 1126 C.addTransition(State); 1127 } 1128 1129 /// Checker registration 1130 void ento::registerTaintPropagationChecker(CheckerManager &Mgr) { 1131 Mgr.registerChecker<GenericTaintChecker>(); 1132 } 1133 1134 bool ento::shouldRegisterTaintPropagationChecker(const CheckerManager &mgr) { 1135 return true; 1136 } 1137 1138 void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 1139 GenericTaintChecker *checker = Mgr.getChecker<GenericTaintChecker>(); 1140 checker->isTaintReporterCheckerEnabled = true; 1141 checker->BT.emplace(Mgr.getCurrentCheckerName(), "Use of Untrusted Data", 1142 categories::TaintedData); 1143 } 1144 1145 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 1146 return true; 1147 } 1148