xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp (revision 480093f4440d54b30b3025afeac24b48f2ba7a2e)
10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "Taint.h"
140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric using namespace clang;
180b57cec5SDimitry Andric using namespace ento;
190b57cec5SDimitry Andric using namespace taint;
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric // Fully tainted symbols.
220b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric // Partially tainted symbols.
250b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
260b57cec5SDimitry Andric                                        TaintTagType)
270b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
300b57cec5SDimitry Andric                        const char *Sep) {
310b57cec5SDimitry Andric   TaintMapTy TM = State->get<TaintMap>();
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   if (!TM.isEmpty())
340b57cec5SDimitry Andric     Out << "Tainted symbols:" << NL;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric   for (const auto &I : TM)
370b57cec5SDimitry Andric     Out << I.first << " : " << I.second << NL;
380b57cec5SDimitry Andric }
390b57cec5SDimitry Andric 
40*480093f4SDimitry Andric void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
430b57cec5SDimitry Andric                                 const LocationContext *LCtx,
440b57cec5SDimitry Andric                                 TaintTagType Kind) {
450b57cec5SDimitry Andric   return addTaint(State, State->getSVal(S, LCtx), Kind);
460b57cec5SDimitry Andric }
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
490b57cec5SDimitry Andric                                 TaintTagType Kind) {
500b57cec5SDimitry Andric   SymbolRef Sym = V.getAsSymbol();
510b57cec5SDimitry Andric   if (Sym)
520b57cec5SDimitry Andric     return addTaint(State, Sym, Kind);
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric   // If the SVal represents a structure, try to mass-taint all values within the
550b57cec5SDimitry Andric   // structure. For now it only works efficiently on lazy compound values that
560b57cec5SDimitry Andric   // were conjured during a conservative evaluation of a function - either as
570b57cec5SDimitry Andric   // return values of functions that return structures or arrays by value, or as
580b57cec5SDimitry Andric   // values of structures or arrays passed into the function by reference,
590b57cec5SDimitry Andric   // directly or through pointer aliasing. Such lazy compound values are
600b57cec5SDimitry Andric   // characterized by having exactly one binding in their captured store within
610b57cec5SDimitry Andric   // their parent region, which is a conjured symbol default-bound to the base
620b57cec5SDimitry Andric   // region of the parent region.
630b57cec5SDimitry Andric   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
640b57cec5SDimitry Andric     if (Optional<SVal> binding =
65*480093f4SDimitry Andric             State->getStateManager().getStoreManager().getDefaultBinding(
66*480093f4SDimitry Andric                 *LCV)) {
670b57cec5SDimitry Andric       if (SymbolRef Sym = binding->getAsSymbol())
680b57cec5SDimitry Andric         return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
690b57cec5SDimitry Andric     }
700b57cec5SDimitry Andric   }
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   const MemRegion *R = V.getAsRegion();
730b57cec5SDimitry Andric   return addTaint(State, R, Kind);
740b57cec5SDimitry Andric }
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
770b57cec5SDimitry Andric                                 TaintTagType Kind) {
780b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
790b57cec5SDimitry Andric     return addTaint(State, SR->getSymbol(), Kind);
800b57cec5SDimitry Andric   return State;
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
840b57cec5SDimitry Andric                                 TaintTagType Kind) {
850b57cec5SDimitry Andric   // If this is a symbol cast, remove the cast before adding the taint. Taint
860b57cec5SDimitry Andric   // is cast agnostic.
870b57cec5SDimitry Andric   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
880b57cec5SDimitry Andric     Sym = SC->getOperand();
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
910b57cec5SDimitry Andric   assert(NewState);
920b57cec5SDimitry Andric   return NewState;
930b57cec5SDimitry Andric }
940b57cec5SDimitry Andric 
95*480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
96*480093f4SDimitry Andric   SymbolRef Sym = V.getAsSymbol();
97*480093f4SDimitry Andric   if (Sym)
98*480093f4SDimitry Andric     return removeTaint(State, Sym);
99*480093f4SDimitry Andric 
100*480093f4SDimitry Andric   const MemRegion *R = V.getAsRegion();
101*480093f4SDimitry Andric   return removeTaint(State, R);
102*480093f4SDimitry Andric }
103*480093f4SDimitry Andric 
104*480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
105*480093f4SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
106*480093f4SDimitry Andric     return removeTaint(State, SR->getSymbol());
107*480093f4SDimitry Andric   return State;
108*480093f4SDimitry Andric }
109*480093f4SDimitry Andric 
110*480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
111*480093f4SDimitry Andric   // If this is a symbol cast, remove the cast before adding the taint. Taint
112*480093f4SDimitry Andric   // is cast agnostic.
113*480093f4SDimitry Andric   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
114*480093f4SDimitry Andric     Sym = SC->getOperand();
115*480093f4SDimitry Andric 
116*480093f4SDimitry Andric   ProgramStateRef NewState = State->remove<TaintMap>(Sym);
117*480093f4SDimitry Andric   assert(NewState);
118*480093f4SDimitry Andric   return NewState;
119*480093f4SDimitry Andric }
120*480093f4SDimitry Andric 
1210b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
1220b57cec5SDimitry Andric                                        SymbolRef ParentSym,
1230b57cec5SDimitry Andric                                        const SubRegion *SubRegion,
1240b57cec5SDimitry Andric                                        TaintTagType Kind) {
1250b57cec5SDimitry Andric   // Ignore partial taint if the entire parent symbol is already tainted.
1260b57cec5SDimitry Andric   if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
1270b57cec5SDimitry Andric     if (*T == Kind)
1280b57cec5SDimitry Andric       return State;
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric   // Partial taint applies if only a portion of the symbol is tainted.
1310b57cec5SDimitry Andric   if (SubRegion == SubRegion->getBaseRegion())
1320b57cec5SDimitry Andric     return addTaint(State, ParentSym, Kind);
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric   const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
1350b57cec5SDimitry Andric   TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
1360b57cec5SDimitry Andric   TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   Regs = F.add(Regs, SubRegion, Kind);
1390b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
1400b57cec5SDimitry Andric   assert(NewState);
1410b57cec5SDimitry Andric   return NewState;
1420b57cec5SDimitry Andric }
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S,
1450b57cec5SDimitry Andric                       const LocationContext *LCtx, TaintTagType Kind) {
1460b57cec5SDimitry Andric   SVal val = State->getSVal(S, LCtx);
1470b57cec5SDimitry Andric   return isTainted(State, val, Kind);
1480b57cec5SDimitry Andric }
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
1510b57cec5SDimitry Andric   if (const SymExpr *Sym = V.getAsSymExpr())
1520b57cec5SDimitry Andric     return isTainted(State, Sym, Kind);
1530b57cec5SDimitry Andric   if (const MemRegion *Reg = V.getAsRegion())
1540b57cec5SDimitry Andric     return isTainted(State, Reg, Kind);
1550b57cec5SDimitry Andric   return false;
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
1590b57cec5SDimitry Andric                       TaintTagType K) {
1600b57cec5SDimitry Andric   if (!Reg)
1610b57cec5SDimitry Andric     return false;
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   // Element region (array element) is tainted if either the base or the offset
1640b57cec5SDimitry Andric   // are tainted.
1650b57cec5SDimitry Andric   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
1660b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K) ||
1670b57cec5SDimitry Andric            isTainted(State, ER->getIndex(), K);
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
1700b57cec5SDimitry Andric     return isTainted(State, SR->getSymbol(), K);
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
1730b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K);
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   return false;
1760b57cec5SDimitry Andric }
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
1790b57cec5SDimitry Andric   if (!Sym)
1800b57cec5SDimitry Andric     return false;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   // Traverse all the symbols this symbol depends on to see if any are tainted.
1830b57cec5SDimitry Andric   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
184*480093f4SDimitry Andric                                 SE = Sym->symbol_end();
185*480093f4SDimitry Andric        SI != SE; ++SI) {
1860b57cec5SDimitry Andric     if (!isa<SymbolData>(*SI))
1870b57cec5SDimitry Andric       continue;
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
1900b57cec5SDimitry Andric       if (*Tag == Kind)
1910b57cec5SDimitry Andric         return true;
1920b57cec5SDimitry Andric     }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
1950b57cec5SDimitry Andric       // If this is a SymbolDerived with a tainted parent, it's also tainted.
1960b57cec5SDimitry Andric       if (isTainted(State, SD->getParentSymbol(), Kind))
1970b57cec5SDimitry Andric         return true;
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric       // If this is a SymbolDerived with the same parent symbol as another
2000b57cec5SDimitry Andric       // tainted SymbolDerived and a region that's a sub-region of that tainted
2010b57cec5SDimitry Andric       // symbol, it's also tainted.
2020b57cec5SDimitry Andric       if (const TaintedSubRegions *Regs =
2030b57cec5SDimitry Andric               State->get<DerivedSymTaint>(SD->getParentSymbol())) {
2040b57cec5SDimitry Andric         const TypedValueRegion *R = SD->getRegion();
2050b57cec5SDimitry Andric         for (auto I : *Regs) {
2060b57cec5SDimitry Andric           // FIXME: The logic to identify tainted regions could be more
2070b57cec5SDimitry Andric           // complete. For example, this would not currently identify
2080b57cec5SDimitry Andric           // overlapping fields in a union as tainted. To identify this we can
2090b57cec5SDimitry Andric           // check for overlapping/nested byte offsets.
2100b57cec5SDimitry Andric           if (Kind == I.second && R->isSubRegionOf(I.first))
2110b57cec5SDimitry Andric             return true;
2120b57cec5SDimitry Andric         }
2130b57cec5SDimitry Andric       }
2140b57cec5SDimitry Andric     }
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric     // If memory region is tainted, data is also tainted.
2170b57cec5SDimitry Andric     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
2180b57cec5SDimitry Andric       if (isTainted(State, SRV->getRegion(), Kind))
2190b57cec5SDimitry Andric         return true;
2200b57cec5SDimitry Andric     }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric     // If this is a SymbolCast from a tainted value, it's also tainted.
2230b57cec5SDimitry Andric     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
2240b57cec5SDimitry Andric       if (isTainted(State, SC->getOperand(), Kind))
2250b57cec5SDimitry Andric         return true;
2260b57cec5SDimitry Andric     }
2270b57cec5SDimitry Andric   }
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric   return false;
2300b57cec5SDimitry Andric }
2310b57cec5SDimitry Andric 
232a7dea167SDimitry Andric PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
233a7dea167SDimitry Andric                                                   BugReporterContext &BRC,
234a7dea167SDimitry Andric                                                   PathSensitiveBugReport &BR) {
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric   // Find the ExplodedNode where the taint was first introduced
2370b57cec5SDimitry Andric   if (!isTainted(N->getState(), V) ||
2380b57cec5SDimitry Andric       isTainted(N->getFirstPred()->getState(), V))
2390b57cec5SDimitry Andric     return nullptr;
2400b57cec5SDimitry Andric 
241a7dea167SDimitry Andric   const Stmt *S = N->getStmtForDiagnostics();
2420b57cec5SDimitry Andric   if (!S)
2430b57cec5SDimitry Andric     return nullptr;
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   const LocationContext *NCtx = N->getLocationContext();
2460b57cec5SDimitry Andric   PathDiagnosticLocation L =
2470b57cec5SDimitry Andric       PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
2480b57cec5SDimitry Andric   if (!L.isValid() || !L.asLocation().isValid())
2490b57cec5SDimitry Andric     return nullptr;
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric   return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
2520b57cec5SDimitry Andric }
253