xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "Taint.h"
140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric using namespace clang;
180b57cec5SDimitry Andric using namespace ento;
190b57cec5SDimitry Andric using namespace taint;
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric // Fully tainted symbols.
220b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric // Partially tainted symbols.
250b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
260b57cec5SDimitry Andric                                        TaintTagType)
270b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
300b57cec5SDimitry Andric                        const char *Sep) {
310b57cec5SDimitry Andric   TaintMapTy TM = State->get<TaintMap>();
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   if (!TM.isEmpty())
340b57cec5SDimitry Andric     Out << "Tainted symbols:" << NL;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric   for (const auto &I : TM)
370b57cec5SDimitry Andric     Out << I.first << " : " << I.second << NL;
380b57cec5SDimitry Andric }
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric void dumpTaint(ProgramStateRef State) {
410b57cec5SDimitry Andric   printTaint(State, llvm::errs());
420b57cec5SDimitry Andric }
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
450b57cec5SDimitry Andric                                 const LocationContext *LCtx,
460b57cec5SDimitry Andric                                 TaintTagType Kind) {
470b57cec5SDimitry Andric   return addTaint(State, State->getSVal(S, LCtx), Kind);
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
510b57cec5SDimitry Andric                                 TaintTagType Kind) {
520b57cec5SDimitry Andric   SymbolRef Sym = V.getAsSymbol();
530b57cec5SDimitry Andric   if (Sym)
540b57cec5SDimitry Andric     return addTaint(State, Sym, Kind);
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric   // If the SVal represents a structure, try to mass-taint all values within the
570b57cec5SDimitry Andric   // structure. For now it only works efficiently on lazy compound values that
580b57cec5SDimitry Andric   // were conjured during a conservative evaluation of a function - either as
590b57cec5SDimitry Andric   // return values of functions that return structures or arrays by value, or as
600b57cec5SDimitry Andric   // values of structures or arrays passed into the function by reference,
610b57cec5SDimitry Andric   // directly or through pointer aliasing. Such lazy compound values are
620b57cec5SDimitry Andric   // characterized by having exactly one binding in their captured store within
630b57cec5SDimitry Andric   // their parent region, which is a conjured symbol default-bound to the base
640b57cec5SDimitry Andric   // region of the parent region.
650b57cec5SDimitry Andric   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
660b57cec5SDimitry Andric     if (Optional<SVal> binding =
670b57cec5SDimitry Andric             State->getStateManager().getStoreManager()
680b57cec5SDimitry Andric                                     .getDefaultBinding(*LCV)) {
690b57cec5SDimitry Andric       if (SymbolRef Sym = binding->getAsSymbol())
700b57cec5SDimitry Andric         return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
710b57cec5SDimitry Andric     }
720b57cec5SDimitry Andric   }
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric   const MemRegion *R = V.getAsRegion();
750b57cec5SDimitry Andric   return addTaint(State, R, Kind);
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
790b57cec5SDimitry Andric                                 TaintTagType Kind) {
800b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
810b57cec5SDimitry Andric     return addTaint(State, SR->getSymbol(), Kind);
820b57cec5SDimitry Andric   return State;
830b57cec5SDimitry Andric }
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
860b57cec5SDimitry Andric                                 TaintTagType Kind) {
870b57cec5SDimitry Andric   // If this is a symbol cast, remove the cast before adding the taint. Taint
880b57cec5SDimitry Andric   // is cast agnostic.
890b57cec5SDimitry Andric   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
900b57cec5SDimitry Andric     Sym = SC->getOperand();
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
930b57cec5SDimitry Andric   assert(NewState);
940b57cec5SDimitry Andric   return NewState;
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
980b57cec5SDimitry Andric                                        SymbolRef ParentSym,
990b57cec5SDimitry Andric                                        const SubRegion *SubRegion,
1000b57cec5SDimitry Andric                                        TaintTagType Kind) {
1010b57cec5SDimitry Andric   // Ignore partial taint if the entire parent symbol is already tainted.
1020b57cec5SDimitry Andric   if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
1030b57cec5SDimitry Andric     if (*T == Kind)
1040b57cec5SDimitry Andric       return State;
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   // Partial taint applies if only a portion of the symbol is tainted.
1070b57cec5SDimitry Andric   if (SubRegion == SubRegion->getBaseRegion())
1080b57cec5SDimitry Andric     return addTaint(State, ParentSym, Kind);
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
1110b57cec5SDimitry Andric   TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
1120b57cec5SDimitry Andric   TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric   Regs = F.add(Regs, SubRegion, Kind);
1150b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
1160b57cec5SDimitry Andric   assert(NewState);
1170b57cec5SDimitry Andric   return NewState;
1180b57cec5SDimitry Andric }
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S,
1210b57cec5SDimitry Andric                       const LocationContext *LCtx, TaintTagType Kind) {
1220b57cec5SDimitry Andric   SVal val = State->getSVal(S, LCtx);
1230b57cec5SDimitry Andric   return isTainted(State, val, Kind);
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
1270b57cec5SDimitry Andric   if (const SymExpr *Sym = V.getAsSymExpr())
1280b57cec5SDimitry Andric     return isTainted(State, Sym, Kind);
1290b57cec5SDimitry Andric   if (const MemRegion *Reg = V.getAsRegion())
1300b57cec5SDimitry Andric     return isTainted(State, Reg, Kind);
1310b57cec5SDimitry Andric   return false;
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
1350b57cec5SDimitry Andric                       TaintTagType K) {
1360b57cec5SDimitry Andric   if (!Reg)
1370b57cec5SDimitry Andric     return false;
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric   // Element region (array element) is tainted if either the base or the offset
1400b57cec5SDimitry Andric   // are tainted.
1410b57cec5SDimitry Andric   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
1420b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K) ||
1430b57cec5SDimitry Andric            isTainted(State, ER->getIndex(), K);
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
1460b57cec5SDimitry Andric     return isTainted(State, SR->getSymbol(), K);
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
1490b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K);
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric   return false;
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
1550b57cec5SDimitry Andric   if (!Sym)
1560b57cec5SDimitry Andric     return false;
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   // Traverse all the symbols this symbol depends on to see if any are tainted.
1590b57cec5SDimitry Andric   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
1600b57cec5SDimitry Andric                                 SE = Sym->symbol_end(); SI != SE; ++SI) {
1610b57cec5SDimitry Andric     if (!isa<SymbolData>(*SI))
1620b57cec5SDimitry Andric       continue;
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
1650b57cec5SDimitry Andric       if (*Tag == Kind)
1660b57cec5SDimitry Andric         return true;
1670b57cec5SDimitry Andric     }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
1700b57cec5SDimitry Andric       // If this is a SymbolDerived with a tainted parent, it's also tainted.
1710b57cec5SDimitry Andric       if (isTainted(State, SD->getParentSymbol(), Kind))
1720b57cec5SDimitry Andric         return true;
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric       // If this is a SymbolDerived with the same parent symbol as another
1750b57cec5SDimitry Andric       // tainted SymbolDerived and a region that's a sub-region of that tainted
1760b57cec5SDimitry Andric       // symbol, it's also tainted.
1770b57cec5SDimitry Andric       if (const TaintedSubRegions *Regs =
1780b57cec5SDimitry Andric               State->get<DerivedSymTaint>(SD->getParentSymbol())) {
1790b57cec5SDimitry Andric         const TypedValueRegion *R = SD->getRegion();
1800b57cec5SDimitry Andric         for (auto I : *Regs) {
1810b57cec5SDimitry Andric           // FIXME: The logic to identify tainted regions could be more
1820b57cec5SDimitry Andric           // complete. For example, this would not currently identify
1830b57cec5SDimitry Andric           // overlapping fields in a union as tainted. To identify this we can
1840b57cec5SDimitry Andric           // check for overlapping/nested byte offsets.
1850b57cec5SDimitry Andric           if (Kind == I.second && R->isSubRegionOf(I.first))
1860b57cec5SDimitry Andric             return true;
1870b57cec5SDimitry Andric         }
1880b57cec5SDimitry Andric       }
1890b57cec5SDimitry Andric     }
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric     // If memory region is tainted, data is also tainted.
1920b57cec5SDimitry Andric     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
1930b57cec5SDimitry Andric       if (isTainted(State, SRV->getRegion(), Kind))
1940b57cec5SDimitry Andric         return true;
1950b57cec5SDimitry Andric     }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric     // If this is a SymbolCast from a tainted value, it's also tainted.
1980b57cec5SDimitry Andric     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
1990b57cec5SDimitry Andric       if (isTainted(State, SC->getOperand(), Kind))
2000b57cec5SDimitry Andric         return true;
2010b57cec5SDimitry Andric     }
2020b57cec5SDimitry Andric   }
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   return false;
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric 
207*a7dea167SDimitry Andric PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
208*a7dea167SDimitry Andric                                                   BugReporterContext &BRC,
209*a7dea167SDimitry Andric                                                   PathSensitiveBugReport &BR) {
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric   // Find the ExplodedNode where the taint was first introduced
2120b57cec5SDimitry Andric   if (!isTainted(N->getState(), V) ||
2130b57cec5SDimitry Andric       isTainted(N->getFirstPred()->getState(), V))
2140b57cec5SDimitry Andric     return nullptr;
2150b57cec5SDimitry Andric 
216*a7dea167SDimitry Andric   const Stmt *S = N->getStmtForDiagnostics();
2170b57cec5SDimitry Andric   if (!S)
2180b57cec5SDimitry Andric     return nullptr;
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric   const LocationContext *NCtx = N->getLocationContext();
2210b57cec5SDimitry Andric   PathDiagnosticLocation L =
2220b57cec5SDimitry Andric       PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
2230b57cec5SDimitry Andric   if (!L.isValid() || !L.asLocation().isValid())
2240b57cec5SDimitry Andric     return nullptr;
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
2270b57cec5SDimitry Andric }
228