10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 13*81ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h" 140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric using namespace clang; 180b57cec5SDimitry Andric using namespace ento; 190b57cec5SDimitry Andric using namespace taint; 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric // Fully tainted symbols. 220b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric // Partially tainted symbols. 250b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, 260b57cec5SDimitry Andric TaintTagType) 270b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, 300b57cec5SDimitry Andric const char *Sep) { 310b57cec5SDimitry Andric TaintMapTy TM = State->get<TaintMap>(); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric if (!TM.isEmpty()) 340b57cec5SDimitry Andric Out << "Tainted symbols:" << NL; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric for (const auto &I : TM) 370b57cec5SDimitry Andric Out << I.first << " : " << I.second << NL; 380b57cec5SDimitry Andric } 390b57cec5SDimitry Andric 40*81ad6265SDimitry Andric void taint::dumpTaint(ProgramStateRef State) { 41*81ad6265SDimitry Andric printTaint(State, llvm::errs()); 42*81ad6265SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, 450b57cec5SDimitry Andric const LocationContext *LCtx, 460b57cec5SDimitry Andric TaintTagType Kind) { 470b57cec5SDimitry Andric return addTaint(State, State->getSVal(S, LCtx), Kind); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, 510b57cec5SDimitry Andric TaintTagType Kind) { 520b57cec5SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 530b57cec5SDimitry Andric if (Sym) 540b57cec5SDimitry Andric return addTaint(State, Sym, Kind); 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric // If the SVal represents a structure, try to mass-taint all values within the 570b57cec5SDimitry Andric // structure. For now it only works efficiently on lazy compound values that 580b57cec5SDimitry Andric // were conjured during a conservative evaluation of a function - either as 590b57cec5SDimitry Andric // return values of functions that return structures or arrays by value, or as 600b57cec5SDimitry Andric // values of structures or arrays passed into the function by reference, 610b57cec5SDimitry Andric // directly or through pointer aliasing. Such lazy compound values are 620b57cec5SDimitry Andric // characterized by having exactly one binding in their captured store within 630b57cec5SDimitry Andric // their parent region, which is a conjured symbol default-bound to the base 640b57cec5SDimitry Andric // region of the parent region. 650b57cec5SDimitry Andric if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { 660b57cec5SDimitry Andric if (Optional<SVal> binding = 67480093f4SDimitry Andric State->getStateManager().getStoreManager().getDefaultBinding( 68480093f4SDimitry Andric *LCV)) { 690b57cec5SDimitry Andric if (SymbolRef Sym = binding->getAsSymbol()) 700b57cec5SDimitry Andric return addPartialTaint(State, Sym, LCV->getRegion(), Kind); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric const MemRegion *R = V.getAsRegion(); 750b57cec5SDimitry Andric return addTaint(State, R, Kind); 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, 790b57cec5SDimitry Andric TaintTagType Kind) { 800b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 810b57cec5SDimitry Andric return addTaint(State, SR->getSymbol(), Kind); 820b57cec5SDimitry Andric return State; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, 860b57cec5SDimitry Andric TaintTagType Kind) { 870b57cec5SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 880b57cec5SDimitry Andric // is cast agnostic. 890b57cec5SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 900b57cec5SDimitry Andric Sym = SC->getOperand(); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); 930b57cec5SDimitry Andric assert(NewState); 940b57cec5SDimitry Andric return NewState; 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 97480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { 98480093f4SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 99480093f4SDimitry Andric if (Sym) 100480093f4SDimitry Andric return removeTaint(State, Sym); 101480093f4SDimitry Andric 102480093f4SDimitry Andric const MemRegion *R = V.getAsRegion(); 103480093f4SDimitry Andric return removeTaint(State, R); 104480093f4SDimitry Andric } 105480093f4SDimitry Andric 106480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { 107480093f4SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 108480093f4SDimitry Andric return removeTaint(State, SR->getSymbol()); 109480093f4SDimitry Andric return State; 110480093f4SDimitry Andric } 111480093f4SDimitry Andric 112480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { 113480093f4SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 114480093f4SDimitry Andric // is cast agnostic. 115480093f4SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 116480093f4SDimitry Andric Sym = SC->getOperand(); 117480093f4SDimitry Andric 118480093f4SDimitry Andric ProgramStateRef NewState = State->remove<TaintMap>(Sym); 119480093f4SDimitry Andric assert(NewState); 120480093f4SDimitry Andric return NewState; 121480093f4SDimitry Andric } 122480093f4SDimitry Andric 1230b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State, 1240b57cec5SDimitry Andric SymbolRef ParentSym, 1250b57cec5SDimitry Andric const SubRegion *SubRegion, 1260b57cec5SDimitry Andric TaintTagType Kind) { 1270b57cec5SDimitry Andric // Ignore partial taint if the entire parent symbol is already tainted. 1280b57cec5SDimitry Andric if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) 1290b57cec5SDimitry Andric if (*T == Kind) 1300b57cec5SDimitry Andric return State; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // Partial taint applies if only a portion of the symbol is tainted. 1330b57cec5SDimitry Andric if (SubRegion == SubRegion->getBaseRegion()) 1340b57cec5SDimitry Andric return addTaint(State, ParentSym, Kind); 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); 1370b57cec5SDimitry Andric TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); 1380b57cec5SDimitry Andric TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric Regs = F.add(Regs, SubRegion, Kind); 1410b57cec5SDimitry Andric ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); 1420b57cec5SDimitry Andric assert(NewState); 1430b57cec5SDimitry Andric return NewState; 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S, 1470b57cec5SDimitry Andric const LocationContext *LCtx, TaintTagType Kind) { 1480b57cec5SDimitry Andric SVal val = State->getSVal(S, LCtx); 1490b57cec5SDimitry Andric return isTainted(State, val, Kind); 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { 153e8d8bef9SDimitry Andric if (SymbolRef Sym = V.getAsSymbol()) 1540b57cec5SDimitry Andric return isTainted(State, Sym, Kind); 1550b57cec5SDimitry Andric if (const MemRegion *Reg = V.getAsRegion()) 1560b57cec5SDimitry Andric return isTainted(State, Reg, Kind); 1570b57cec5SDimitry Andric return false; 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, 1610b57cec5SDimitry Andric TaintTagType K) { 1620b57cec5SDimitry Andric if (!Reg) 1630b57cec5SDimitry Andric return false; 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric // Element region (array element) is tainted if either the base or the offset 1660b57cec5SDimitry Andric // are tainted. 1670b57cec5SDimitry Andric if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) 1680b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K) || 1690b57cec5SDimitry Andric isTainted(State, ER->getIndex(), K); 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) 1720b57cec5SDimitry Andric return isTainted(State, SR->getSymbol(), K); 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) 1750b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K); 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric return false; 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { 1810b57cec5SDimitry Andric if (!Sym) 1820b57cec5SDimitry Andric return false; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric // Traverse all the symbols this symbol depends on to see if any are tainted. 1850b57cec5SDimitry Andric for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), 186480093f4SDimitry Andric SE = Sym->symbol_end(); 187480093f4SDimitry Andric SI != SE; ++SI) { 1880b57cec5SDimitry Andric if (!isa<SymbolData>(*SI)) 1890b57cec5SDimitry Andric continue; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) { 1920b57cec5SDimitry Andric if (*Tag == Kind) 1930b57cec5SDimitry Andric return true; 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) { 1970b57cec5SDimitry Andric // If this is a SymbolDerived with a tainted parent, it's also tainted. 1980b57cec5SDimitry Andric if (isTainted(State, SD->getParentSymbol(), Kind)) 1990b57cec5SDimitry Andric return true; 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric // If this is a SymbolDerived with the same parent symbol as another 2020b57cec5SDimitry Andric // tainted SymbolDerived and a region that's a sub-region of that tainted 2030b57cec5SDimitry Andric // symbol, it's also tainted. 2040b57cec5SDimitry Andric if (const TaintedSubRegions *Regs = 2050b57cec5SDimitry Andric State->get<DerivedSymTaint>(SD->getParentSymbol())) { 2060b57cec5SDimitry Andric const TypedValueRegion *R = SD->getRegion(); 2070b57cec5SDimitry Andric for (auto I : *Regs) { 2080b57cec5SDimitry Andric // FIXME: The logic to identify tainted regions could be more 2090b57cec5SDimitry Andric // complete. For example, this would not currently identify 2100b57cec5SDimitry Andric // overlapping fields in a union as tainted. To identify this we can 2110b57cec5SDimitry Andric // check for overlapping/nested byte offsets. 2120b57cec5SDimitry Andric if (Kind == I.second && R->isSubRegionOf(I.first)) 2130b57cec5SDimitry Andric return true; 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric // If memory region is tainted, data is also tainted. 2190b57cec5SDimitry Andric if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) { 2200b57cec5SDimitry Andric if (isTainted(State, SRV->getRegion(), Kind)) 2210b57cec5SDimitry Andric return true; 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric // If this is a SymbolCast from a tainted value, it's also tainted. 2250b57cec5SDimitry Andric if (const auto *SC = dyn_cast<SymbolCast>(*SI)) { 2260b57cec5SDimitry Andric if (isTainted(State, SC->getOperand(), Kind)) 2270b57cec5SDimitry Andric return true; 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric return false; 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric 234a7dea167SDimitry Andric PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, 235a7dea167SDimitry Andric BugReporterContext &BRC, 236a7dea167SDimitry Andric PathSensitiveBugReport &BR) { 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric // Find the ExplodedNode where the taint was first introduced 2390b57cec5SDimitry Andric if (!isTainted(N->getState(), V) || 2400b57cec5SDimitry Andric isTainted(N->getFirstPred()->getState(), V)) 2410b57cec5SDimitry Andric return nullptr; 2420b57cec5SDimitry Andric 243a7dea167SDimitry Andric const Stmt *S = N->getStmtForDiagnostics(); 2440b57cec5SDimitry Andric if (!S) 2450b57cec5SDimitry Andric return nullptr; 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric const LocationContext *NCtx = N->getLocationContext(); 2480b57cec5SDimitry Andric PathDiagnosticLocation L = 2490b57cec5SDimitry Andric PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 2500b57cec5SDimitry Andric if (!L.isValid() || !L.asLocation().isValid()) 2510b57cec5SDimitry Andric return nullptr; 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); 2540b57cec5SDimitry Andric } 255