10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 1381ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h" 140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 16*bdd1243dSDimitry Andric #include <optional> 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric using namespace clang; 190b57cec5SDimitry Andric using namespace ento; 200b57cec5SDimitry Andric using namespace taint; 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric // Fully tainted symbols. 230b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric // Partially tainted symbols. 260b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, 270b57cec5SDimitry Andric TaintTagType) 280b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, 310b57cec5SDimitry Andric const char *Sep) { 320b57cec5SDimitry Andric TaintMapTy TM = State->get<TaintMap>(); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric if (!TM.isEmpty()) 350b57cec5SDimitry Andric Out << "Tainted symbols:" << NL; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric for (const auto &I : TM) 380b57cec5SDimitry Andric Out << I.first << " : " << I.second << NL; 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric 4181ad6265SDimitry Andric void taint::dumpTaint(ProgramStateRef State) { 4281ad6265SDimitry Andric printTaint(State, llvm::errs()); 4381ad6265SDimitry Andric } 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, 460b57cec5SDimitry Andric const LocationContext *LCtx, 470b57cec5SDimitry Andric TaintTagType Kind) { 480b57cec5SDimitry Andric return addTaint(State, State->getSVal(S, LCtx), Kind); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, 520b57cec5SDimitry Andric TaintTagType Kind) { 530b57cec5SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 540b57cec5SDimitry Andric if (Sym) 550b57cec5SDimitry Andric return addTaint(State, Sym, Kind); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric // If the SVal represents a structure, try to mass-taint all values within the 580b57cec5SDimitry Andric // structure. For now it only works efficiently on lazy compound values that 590b57cec5SDimitry Andric // were conjured during a conservative evaluation of a function - either as 600b57cec5SDimitry Andric // return values of functions that return structures or arrays by value, or as 610b57cec5SDimitry Andric // values of structures or arrays passed into the function by reference, 620b57cec5SDimitry Andric // directly or through pointer aliasing. Such lazy compound values are 630b57cec5SDimitry Andric // characterized by having exactly one binding in their captured store within 640b57cec5SDimitry Andric // their parent region, which is a conjured symbol default-bound to the base 650b57cec5SDimitry Andric // region of the parent region. 660b57cec5SDimitry Andric if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { 67*bdd1243dSDimitry Andric if (std::optional<SVal> binding = 68480093f4SDimitry Andric State->getStateManager().getStoreManager().getDefaultBinding( 69480093f4SDimitry Andric *LCV)) { 700b57cec5SDimitry Andric if (SymbolRef Sym = binding->getAsSymbol()) 710b57cec5SDimitry Andric return addPartialTaint(State, Sym, LCV->getRegion(), Kind); 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric const MemRegion *R = V.getAsRegion(); 760b57cec5SDimitry Andric return addTaint(State, R, Kind); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, 800b57cec5SDimitry Andric TaintTagType Kind) { 810b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 820b57cec5SDimitry Andric return addTaint(State, SR->getSymbol(), Kind); 830b57cec5SDimitry Andric return State; 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, 870b57cec5SDimitry Andric TaintTagType Kind) { 880b57cec5SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 890b57cec5SDimitry Andric // is cast agnostic. 900b57cec5SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 910b57cec5SDimitry Andric Sym = SC->getOperand(); 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); 940b57cec5SDimitry Andric assert(NewState); 950b57cec5SDimitry Andric return NewState; 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 98480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { 99480093f4SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 100480093f4SDimitry Andric if (Sym) 101480093f4SDimitry Andric return removeTaint(State, Sym); 102480093f4SDimitry Andric 103480093f4SDimitry Andric const MemRegion *R = V.getAsRegion(); 104480093f4SDimitry Andric return removeTaint(State, R); 105480093f4SDimitry Andric } 106480093f4SDimitry Andric 107480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { 108480093f4SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 109480093f4SDimitry Andric return removeTaint(State, SR->getSymbol()); 110480093f4SDimitry Andric return State; 111480093f4SDimitry Andric } 112480093f4SDimitry Andric 113480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { 114480093f4SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 115480093f4SDimitry Andric // is cast agnostic. 116480093f4SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 117480093f4SDimitry Andric Sym = SC->getOperand(); 118480093f4SDimitry Andric 119480093f4SDimitry Andric ProgramStateRef NewState = State->remove<TaintMap>(Sym); 120480093f4SDimitry Andric assert(NewState); 121480093f4SDimitry Andric return NewState; 122480093f4SDimitry Andric } 123480093f4SDimitry Andric 1240b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State, 1250b57cec5SDimitry Andric SymbolRef ParentSym, 1260b57cec5SDimitry Andric const SubRegion *SubRegion, 1270b57cec5SDimitry Andric TaintTagType Kind) { 1280b57cec5SDimitry Andric // Ignore partial taint if the entire parent symbol is already tainted. 1290b57cec5SDimitry Andric if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) 1300b57cec5SDimitry Andric if (*T == Kind) 1310b57cec5SDimitry Andric return State; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric // Partial taint applies if only a portion of the symbol is tainted. 1340b57cec5SDimitry Andric if (SubRegion == SubRegion->getBaseRegion()) 1350b57cec5SDimitry Andric return addTaint(State, ParentSym, Kind); 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); 1380b57cec5SDimitry Andric TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); 1390b57cec5SDimitry Andric TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric Regs = F.add(Regs, SubRegion, Kind); 1420b57cec5SDimitry Andric ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); 1430b57cec5SDimitry Andric assert(NewState); 1440b57cec5SDimitry Andric return NewState; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S, 1480b57cec5SDimitry Andric const LocationContext *LCtx, TaintTagType Kind) { 1490b57cec5SDimitry Andric SVal val = State->getSVal(S, LCtx); 1500b57cec5SDimitry Andric return isTainted(State, val, Kind); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { 154e8d8bef9SDimitry Andric if (SymbolRef Sym = V.getAsSymbol()) 1550b57cec5SDimitry Andric return isTainted(State, Sym, Kind); 1560b57cec5SDimitry Andric if (const MemRegion *Reg = V.getAsRegion()) 1570b57cec5SDimitry Andric return isTainted(State, Reg, Kind); 1580b57cec5SDimitry Andric return false; 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, 1620b57cec5SDimitry Andric TaintTagType K) { 1630b57cec5SDimitry Andric if (!Reg) 1640b57cec5SDimitry Andric return false; 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric // Element region (array element) is tainted if either the base or the offset 1670b57cec5SDimitry Andric // are tainted. 1680b57cec5SDimitry Andric if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) 1690b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K) || 1700b57cec5SDimitry Andric isTainted(State, ER->getIndex(), K); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) 1730b57cec5SDimitry Andric return isTainted(State, SR->getSymbol(), K); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) 1760b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K); 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric return false; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { 1820b57cec5SDimitry Andric if (!Sym) 1830b57cec5SDimitry Andric return false; 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric // Traverse all the symbols this symbol depends on to see if any are tainted. 1860b57cec5SDimitry Andric for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), 187480093f4SDimitry Andric SE = Sym->symbol_end(); 188480093f4SDimitry Andric SI != SE; ++SI) { 1890b57cec5SDimitry Andric if (!isa<SymbolData>(*SI)) 1900b57cec5SDimitry Andric continue; 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) { 1930b57cec5SDimitry Andric if (*Tag == Kind) 1940b57cec5SDimitry Andric return true; 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) { 1980b57cec5SDimitry Andric // If this is a SymbolDerived with a tainted parent, it's also tainted. 1990b57cec5SDimitry Andric if (isTainted(State, SD->getParentSymbol(), Kind)) 2000b57cec5SDimitry Andric return true; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric // If this is a SymbolDerived with the same parent symbol as another 2030b57cec5SDimitry Andric // tainted SymbolDerived and a region that's a sub-region of that tainted 2040b57cec5SDimitry Andric // symbol, it's also tainted. 2050b57cec5SDimitry Andric if (const TaintedSubRegions *Regs = 2060b57cec5SDimitry Andric State->get<DerivedSymTaint>(SD->getParentSymbol())) { 2070b57cec5SDimitry Andric const TypedValueRegion *R = SD->getRegion(); 2080b57cec5SDimitry Andric for (auto I : *Regs) { 2090b57cec5SDimitry Andric // FIXME: The logic to identify tainted regions could be more 2100b57cec5SDimitry Andric // complete. For example, this would not currently identify 2110b57cec5SDimitry Andric // overlapping fields in a union as tainted. To identify this we can 2120b57cec5SDimitry Andric // check for overlapping/nested byte offsets. 2130b57cec5SDimitry Andric if (Kind == I.second && R->isSubRegionOf(I.first)) 2140b57cec5SDimitry Andric return true; 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric // If memory region is tainted, data is also tainted. 2200b57cec5SDimitry Andric if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) { 2210b57cec5SDimitry Andric if (isTainted(State, SRV->getRegion(), Kind)) 2220b57cec5SDimitry Andric return true; 2230b57cec5SDimitry Andric } 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric // If this is a SymbolCast from a tainted value, it's also tainted. 2260b57cec5SDimitry Andric if (const auto *SC = dyn_cast<SymbolCast>(*SI)) { 2270b57cec5SDimitry Andric if (isTainted(State, SC->getOperand(), Kind)) 2280b57cec5SDimitry Andric return true; 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric return false; 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 235a7dea167SDimitry Andric PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N, 236a7dea167SDimitry Andric BugReporterContext &BRC, 237a7dea167SDimitry Andric PathSensitiveBugReport &BR) { 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric // Find the ExplodedNode where the taint was first introduced 2400b57cec5SDimitry Andric if (!isTainted(N->getState(), V) || 2410b57cec5SDimitry Andric isTainted(N->getFirstPred()->getState(), V)) 2420b57cec5SDimitry Andric return nullptr; 2430b57cec5SDimitry Andric 244a7dea167SDimitry Andric const Stmt *S = N->getStmtForDiagnostics(); 2450b57cec5SDimitry Andric if (!S) 2460b57cec5SDimitry Andric return nullptr; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric const LocationContext *NCtx = N->getLocationContext(); 2490b57cec5SDimitry Andric PathDiagnosticLocation L = 2500b57cec5SDimitry Andric PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 2510b57cec5SDimitry Andric if (!L.isValid() || !L.asLocation().isValid()) 2520b57cec5SDimitry Andric return nullptr; 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); 2550b57cec5SDimitry Andric } 256