10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "Taint.h" 140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric using namespace clang; 180b57cec5SDimitry Andric using namespace ento; 190b57cec5SDimitry Andric using namespace taint; 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric // Fully tainted symbols. 220b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric // Partially tainted symbols. 250b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, 260b57cec5SDimitry Andric TaintTagType) 270b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, 300b57cec5SDimitry Andric const char *Sep) { 310b57cec5SDimitry Andric TaintMapTy TM = State->get<TaintMap>(); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric if (!TM.isEmpty()) 340b57cec5SDimitry Andric Out << "Tainted symbols:" << NL; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric for (const auto &I : TM) 370b57cec5SDimitry Andric Out << I.first << " : " << I.second << NL; 380b57cec5SDimitry Andric } 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric void dumpTaint(ProgramStateRef State) { 410b57cec5SDimitry Andric printTaint(State, llvm::errs()); 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, 450b57cec5SDimitry Andric const LocationContext *LCtx, 460b57cec5SDimitry Andric TaintTagType Kind) { 470b57cec5SDimitry Andric return addTaint(State, State->getSVal(S, LCtx), Kind); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, 510b57cec5SDimitry Andric TaintTagType Kind) { 520b57cec5SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 530b57cec5SDimitry Andric if (Sym) 540b57cec5SDimitry Andric return addTaint(State, Sym, Kind); 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric // If the SVal represents a structure, try to mass-taint all values within the 570b57cec5SDimitry Andric // structure. For now it only works efficiently on lazy compound values that 580b57cec5SDimitry Andric // were conjured during a conservative evaluation of a function - either as 590b57cec5SDimitry Andric // return values of functions that return structures or arrays by value, or as 600b57cec5SDimitry Andric // values of structures or arrays passed into the function by reference, 610b57cec5SDimitry Andric // directly or through pointer aliasing. Such lazy compound values are 620b57cec5SDimitry Andric // characterized by having exactly one binding in their captured store within 630b57cec5SDimitry Andric // their parent region, which is a conjured symbol default-bound to the base 640b57cec5SDimitry Andric // region of the parent region. 650b57cec5SDimitry Andric if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { 660b57cec5SDimitry Andric if (Optional<SVal> binding = 670b57cec5SDimitry Andric State->getStateManager().getStoreManager() 680b57cec5SDimitry Andric .getDefaultBinding(*LCV)) { 690b57cec5SDimitry Andric if (SymbolRef Sym = binding->getAsSymbol()) 700b57cec5SDimitry Andric return addPartialTaint(State, Sym, LCV->getRegion(), Kind); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric const MemRegion *R = V.getAsRegion(); 750b57cec5SDimitry Andric return addTaint(State, R, Kind); 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, 790b57cec5SDimitry Andric TaintTagType Kind) { 800b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 810b57cec5SDimitry Andric return addTaint(State, SR->getSymbol(), Kind); 820b57cec5SDimitry Andric return State; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, 860b57cec5SDimitry Andric TaintTagType Kind) { 870b57cec5SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 880b57cec5SDimitry Andric // is cast agnostic. 890b57cec5SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 900b57cec5SDimitry Andric Sym = SC->getOperand(); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); 930b57cec5SDimitry Andric assert(NewState); 940b57cec5SDimitry Andric return NewState; 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State, 980b57cec5SDimitry Andric SymbolRef ParentSym, 990b57cec5SDimitry Andric const SubRegion *SubRegion, 1000b57cec5SDimitry Andric TaintTagType Kind) { 1010b57cec5SDimitry Andric // Ignore partial taint if the entire parent symbol is already tainted. 1020b57cec5SDimitry Andric if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) 1030b57cec5SDimitry Andric if (*T == Kind) 1040b57cec5SDimitry Andric return State; 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric // Partial taint applies if only a portion of the symbol is tainted. 1070b57cec5SDimitry Andric if (SubRegion == SubRegion->getBaseRegion()) 1080b57cec5SDimitry Andric return addTaint(State, ParentSym, Kind); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); 1110b57cec5SDimitry Andric TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); 1120b57cec5SDimitry Andric TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric Regs = F.add(Regs, SubRegion, Kind); 1150b57cec5SDimitry Andric ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); 1160b57cec5SDimitry Andric assert(NewState); 1170b57cec5SDimitry Andric return NewState; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S, 1210b57cec5SDimitry Andric const LocationContext *LCtx, TaintTagType Kind) { 1220b57cec5SDimitry Andric SVal val = State->getSVal(S, LCtx); 1230b57cec5SDimitry Andric return isTainted(State, val, Kind); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { 1270b57cec5SDimitry Andric if (const SymExpr *Sym = V.getAsSymExpr()) 1280b57cec5SDimitry Andric return isTainted(State, Sym, Kind); 1290b57cec5SDimitry Andric if (const MemRegion *Reg = V.getAsRegion()) 1300b57cec5SDimitry Andric return isTainted(State, Reg, Kind); 1310b57cec5SDimitry Andric return false; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, 1350b57cec5SDimitry Andric TaintTagType K) { 1360b57cec5SDimitry Andric if (!Reg) 1370b57cec5SDimitry Andric return false; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric // Element region (array element) is tainted if either the base or the offset 1400b57cec5SDimitry Andric // are tainted. 1410b57cec5SDimitry Andric if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) 1420b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K) || 1430b57cec5SDimitry Andric isTainted(State, ER->getIndex(), K); 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) 1460b57cec5SDimitry Andric return isTainted(State, SR->getSymbol(), K); 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) 1490b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K); 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric return false; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { 1550b57cec5SDimitry Andric if (!Sym) 1560b57cec5SDimitry Andric return false; 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric // Traverse all the symbols this symbol depends on to see if any are tainted. 1590b57cec5SDimitry Andric for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), 1600b57cec5SDimitry Andric SE = Sym->symbol_end(); SI != SE; ++SI) { 1610b57cec5SDimitry Andric if (!isa<SymbolData>(*SI)) 1620b57cec5SDimitry Andric continue; 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) { 1650b57cec5SDimitry Andric if (*Tag == Kind) 1660b57cec5SDimitry Andric return true; 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) { 1700b57cec5SDimitry Andric // If this is a SymbolDerived with a tainted parent, it's also tainted. 1710b57cec5SDimitry Andric if (isTainted(State, SD->getParentSymbol(), Kind)) 1720b57cec5SDimitry Andric return true; 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric // If this is a SymbolDerived with the same parent symbol as another 1750b57cec5SDimitry Andric // tainted SymbolDerived and a region that's a sub-region of that tainted 1760b57cec5SDimitry Andric // symbol, it's also tainted. 1770b57cec5SDimitry Andric if (const TaintedSubRegions *Regs = 1780b57cec5SDimitry Andric State->get<DerivedSymTaint>(SD->getParentSymbol())) { 1790b57cec5SDimitry Andric const TypedValueRegion *R = SD->getRegion(); 1800b57cec5SDimitry Andric for (auto I : *Regs) { 1810b57cec5SDimitry Andric // FIXME: The logic to identify tainted regions could be more 1820b57cec5SDimitry Andric // complete. For example, this would not currently identify 1830b57cec5SDimitry Andric // overlapping fields in a union as tainted. To identify this we can 1840b57cec5SDimitry Andric // check for overlapping/nested byte offsets. 1850b57cec5SDimitry Andric if (Kind == I.second && R->isSubRegionOf(I.first)) 1860b57cec5SDimitry Andric return true; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric // If memory region is tainted, data is also tainted. 1920b57cec5SDimitry Andric if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) { 1930b57cec5SDimitry Andric if (isTainted(State, SRV->getRegion(), Kind)) 1940b57cec5SDimitry Andric return true; 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric // If this is a SymbolCast from a tainted value, it's also tainted. 1980b57cec5SDimitry Andric if (const auto *SC = dyn_cast<SymbolCast>(*SI)) { 1990b57cec5SDimitry Andric if (isTainted(State, SC->getOperand(), Kind)) 2000b57cec5SDimitry Andric return true; 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric return false; 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 207*a7dea167SDimitry Andric PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, 208*a7dea167SDimitry Andric BugReporterContext &BRC, 209*a7dea167SDimitry Andric PathSensitiveBugReport &BR) { 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric // Find the ExplodedNode where the taint was first introduced 2120b57cec5SDimitry Andric if (!isTainted(N->getState(), V) || 2130b57cec5SDimitry Andric isTainted(N->getFirstPred()->getState(), V)) 2140b57cec5SDimitry Andric return nullptr; 2150b57cec5SDimitry Andric 216*a7dea167SDimitry Andric const Stmt *S = N->getStmtForDiagnostics(); 2170b57cec5SDimitry Andric if (!S) 2180b57cec5SDimitry Andric return nullptr; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric const LocationContext *NCtx = N->getLocationContext(); 2210b57cec5SDimitry Andric PathDiagnosticLocation L = 2220b57cec5SDimitry Andric PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 2230b57cec5SDimitry Andric if (!L.isValid() || !L.asLocation().isValid()) 2240b57cec5SDimitry Andric return nullptr; 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); 2270b57cec5SDimitry Andric } 228