xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
1381ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h"
140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
16*bdd1243dSDimitry Andric #include <optional>
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric using namespace clang;
190b57cec5SDimitry Andric using namespace ento;
200b57cec5SDimitry Andric using namespace taint;
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric // Fully tainted symbols.
230b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric // Partially tainted symbols.
260b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
270b57cec5SDimitry Andric                                        TaintTagType)
280b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
310b57cec5SDimitry Andric                        const char *Sep) {
320b57cec5SDimitry Andric   TaintMapTy TM = State->get<TaintMap>();
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric   if (!TM.isEmpty())
350b57cec5SDimitry Andric     Out << "Tainted symbols:" << NL;
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric   for (const auto &I : TM)
380b57cec5SDimitry Andric     Out << I.first << " : " << I.second << NL;
390b57cec5SDimitry Andric }
400b57cec5SDimitry Andric 
4181ad6265SDimitry Andric void taint::dumpTaint(ProgramStateRef State) {
4281ad6265SDimitry Andric   printTaint(State, llvm::errs());
4381ad6265SDimitry Andric }
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
460b57cec5SDimitry Andric                                 const LocationContext *LCtx,
470b57cec5SDimitry Andric                                 TaintTagType Kind) {
480b57cec5SDimitry Andric   return addTaint(State, State->getSVal(S, LCtx), Kind);
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
520b57cec5SDimitry Andric                                 TaintTagType Kind) {
530b57cec5SDimitry Andric   SymbolRef Sym = V.getAsSymbol();
540b57cec5SDimitry Andric   if (Sym)
550b57cec5SDimitry Andric     return addTaint(State, Sym, Kind);
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   // If the SVal represents a structure, try to mass-taint all values within the
580b57cec5SDimitry Andric   // structure. For now it only works efficiently on lazy compound values that
590b57cec5SDimitry Andric   // were conjured during a conservative evaluation of a function - either as
600b57cec5SDimitry Andric   // return values of functions that return structures or arrays by value, or as
610b57cec5SDimitry Andric   // values of structures or arrays passed into the function by reference,
620b57cec5SDimitry Andric   // directly or through pointer aliasing. Such lazy compound values are
630b57cec5SDimitry Andric   // characterized by having exactly one binding in their captured store within
640b57cec5SDimitry Andric   // their parent region, which is a conjured symbol default-bound to the base
650b57cec5SDimitry Andric   // region of the parent region.
660b57cec5SDimitry Andric   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
67*bdd1243dSDimitry Andric     if (std::optional<SVal> binding =
68480093f4SDimitry Andric             State->getStateManager().getStoreManager().getDefaultBinding(
69480093f4SDimitry Andric                 *LCV)) {
700b57cec5SDimitry Andric       if (SymbolRef Sym = binding->getAsSymbol())
710b57cec5SDimitry Andric         return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
720b57cec5SDimitry Andric     }
730b57cec5SDimitry Andric   }
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric   const MemRegion *R = V.getAsRegion();
760b57cec5SDimitry Andric   return addTaint(State, R, Kind);
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
800b57cec5SDimitry Andric                                 TaintTagType Kind) {
810b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
820b57cec5SDimitry Andric     return addTaint(State, SR->getSymbol(), Kind);
830b57cec5SDimitry Andric   return State;
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
870b57cec5SDimitry Andric                                 TaintTagType Kind) {
880b57cec5SDimitry Andric   // If this is a symbol cast, remove the cast before adding the taint. Taint
890b57cec5SDimitry Andric   // is cast agnostic.
900b57cec5SDimitry Andric   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
910b57cec5SDimitry Andric     Sym = SC->getOperand();
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
940b57cec5SDimitry Andric   assert(NewState);
950b57cec5SDimitry Andric   return NewState;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
98480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
99480093f4SDimitry Andric   SymbolRef Sym = V.getAsSymbol();
100480093f4SDimitry Andric   if (Sym)
101480093f4SDimitry Andric     return removeTaint(State, Sym);
102480093f4SDimitry Andric 
103480093f4SDimitry Andric   const MemRegion *R = V.getAsRegion();
104480093f4SDimitry Andric   return removeTaint(State, R);
105480093f4SDimitry Andric }
106480093f4SDimitry Andric 
107480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
108480093f4SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
109480093f4SDimitry Andric     return removeTaint(State, SR->getSymbol());
110480093f4SDimitry Andric   return State;
111480093f4SDimitry Andric }
112480093f4SDimitry Andric 
113480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
114480093f4SDimitry Andric   // If this is a symbol cast, remove the cast before adding the taint. Taint
115480093f4SDimitry Andric   // is cast agnostic.
116480093f4SDimitry Andric   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
117480093f4SDimitry Andric     Sym = SC->getOperand();
118480093f4SDimitry Andric 
119480093f4SDimitry Andric   ProgramStateRef NewState = State->remove<TaintMap>(Sym);
120480093f4SDimitry Andric   assert(NewState);
121480093f4SDimitry Andric   return NewState;
122480093f4SDimitry Andric }
123480093f4SDimitry Andric 
1240b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
1250b57cec5SDimitry Andric                                        SymbolRef ParentSym,
1260b57cec5SDimitry Andric                                        const SubRegion *SubRegion,
1270b57cec5SDimitry Andric                                        TaintTagType Kind) {
1280b57cec5SDimitry Andric   // Ignore partial taint if the entire parent symbol is already tainted.
1290b57cec5SDimitry Andric   if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
1300b57cec5SDimitry Andric     if (*T == Kind)
1310b57cec5SDimitry Andric       return State;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric   // Partial taint applies if only a portion of the symbol is tainted.
1340b57cec5SDimitry Andric   if (SubRegion == SubRegion->getBaseRegion())
1350b57cec5SDimitry Andric     return addTaint(State, ParentSym, Kind);
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric   const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
1380b57cec5SDimitry Andric   TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
1390b57cec5SDimitry Andric   TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   Regs = F.add(Regs, SubRegion, Kind);
1420b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
1430b57cec5SDimitry Andric   assert(NewState);
1440b57cec5SDimitry Andric   return NewState;
1450b57cec5SDimitry Andric }
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S,
1480b57cec5SDimitry Andric                       const LocationContext *LCtx, TaintTagType Kind) {
1490b57cec5SDimitry Andric   SVal val = State->getSVal(S, LCtx);
1500b57cec5SDimitry Andric   return isTainted(State, val, Kind);
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
154e8d8bef9SDimitry Andric   if (SymbolRef Sym = V.getAsSymbol())
1550b57cec5SDimitry Andric     return isTainted(State, Sym, Kind);
1560b57cec5SDimitry Andric   if (const MemRegion *Reg = V.getAsRegion())
1570b57cec5SDimitry Andric     return isTainted(State, Reg, Kind);
1580b57cec5SDimitry Andric   return false;
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
1620b57cec5SDimitry Andric                       TaintTagType K) {
1630b57cec5SDimitry Andric   if (!Reg)
1640b57cec5SDimitry Andric     return false;
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric   // Element region (array element) is tainted if either the base or the offset
1670b57cec5SDimitry Andric   // are tainted.
1680b57cec5SDimitry Andric   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
1690b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K) ||
1700b57cec5SDimitry Andric            isTainted(State, ER->getIndex(), K);
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
1730b57cec5SDimitry Andric     return isTainted(State, SR->getSymbol(), K);
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
1760b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K);
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   return false;
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
1820b57cec5SDimitry Andric   if (!Sym)
1830b57cec5SDimitry Andric     return false;
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric   // Traverse all the symbols this symbol depends on to see if any are tainted.
1860b57cec5SDimitry Andric   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
187480093f4SDimitry Andric                                 SE = Sym->symbol_end();
188480093f4SDimitry Andric        SI != SE; ++SI) {
1890b57cec5SDimitry Andric     if (!isa<SymbolData>(*SI))
1900b57cec5SDimitry Andric       continue;
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
1930b57cec5SDimitry Andric       if (*Tag == Kind)
1940b57cec5SDimitry Andric         return true;
1950b57cec5SDimitry Andric     }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
1980b57cec5SDimitry Andric       // If this is a SymbolDerived with a tainted parent, it's also tainted.
1990b57cec5SDimitry Andric       if (isTainted(State, SD->getParentSymbol(), Kind))
2000b57cec5SDimitry Andric         return true;
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric       // If this is a SymbolDerived with the same parent symbol as another
2030b57cec5SDimitry Andric       // tainted SymbolDerived and a region that's a sub-region of that tainted
2040b57cec5SDimitry Andric       // symbol, it's also tainted.
2050b57cec5SDimitry Andric       if (const TaintedSubRegions *Regs =
2060b57cec5SDimitry Andric               State->get<DerivedSymTaint>(SD->getParentSymbol())) {
2070b57cec5SDimitry Andric         const TypedValueRegion *R = SD->getRegion();
2080b57cec5SDimitry Andric         for (auto I : *Regs) {
2090b57cec5SDimitry Andric           // FIXME: The logic to identify tainted regions could be more
2100b57cec5SDimitry Andric           // complete. For example, this would not currently identify
2110b57cec5SDimitry Andric           // overlapping fields in a union as tainted. To identify this we can
2120b57cec5SDimitry Andric           // check for overlapping/nested byte offsets.
2130b57cec5SDimitry Andric           if (Kind == I.second && R->isSubRegionOf(I.first))
2140b57cec5SDimitry Andric             return true;
2150b57cec5SDimitry Andric         }
2160b57cec5SDimitry Andric       }
2170b57cec5SDimitry Andric     }
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric     // If memory region is tainted, data is also tainted.
2200b57cec5SDimitry Andric     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
2210b57cec5SDimitry Andric       if (isTainted(State, SRV->getRegion(), Kind))
2220b57cec5SDimitry Andric         return true;
2230b57cec5SDimitry Andric     }
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric     // If this is a SymbolCast from a tainted value, it's also tainted.
2260b57cec5SDimitry Andric     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
2270b57cec5SDimitry Andric       if (isTainted(State, SC->getOperand(), Kind))
2280b57cec5SDimitry Andric         return true;
2290b57cec5SDimitry Andric     }
2300b57cec5SDimitry Andric   }
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric   return false;
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric 
235a7dea167SDimitry Andric PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
236a7dea167SDimitry Andric                                                   BugReporterContext &BRC,
237a7dea167SDimitry Andric                                                   PathSensitiveBugReport &BR) {
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   // Find the ExplodedNode where the taint was first introduced
2400b57cec5SDimitry Andric   if (!isTainted(N->getState(), V) ||
2410b57cec5SDimitry Andric       isTainted(N->getFirstPred()->getState(), V))
2420b57cec5SDimitry Andric     return nullptr;
2430b57cec5SDimitry Andric 
244a7dea167SDimitry Andric   const Stmt *S = N->getStmtForDiagnostics();
2450b57cec5SDimitry Andric   if (!S)
2460b57cec5SDimitry Andric     return nullptr;
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric   const LocationContext *NCtx = N->getLocationContext();
2490b57cec5SDimitry Andric   PathDiagnosticLocation L =
2500b57cec5SDimitry Andric       PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
2510b57cec5SDimitry Andric   if (!L.isValid() || !L.asLocation().isValid())
2520b57cec5SDimitry Andric     return nullptr;
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
2550b57cec5SDimitry Andric }
256