xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/Taint.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values.
10*0b57cec5SDimitry Andric //
11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
12*0b57cec5SDimitry Andric 
13*0b57cec5SDimitry Andric #include "Taint.h"
14*0b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
15*0b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
16*0b57cec5SDimitry Andric 
17*0b57cec5SDimitry Andric using namespace clang;
18*0b57cec5SDimitry Andric using namespace ento;
19*0b57cec5SDimitry Andric using namespace taint;
20*0b57cec5SDimitry Andric 
21*0b57cec5SDimitry Andric // Fully tainted symbols.
22*0b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
23*0b57cec5SDimitry Andric 
24*0b57cec5SDimitry Andric // Partially tainted symbols.
25*0b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
26*0b57cec5SDimitry Andric                                        TaintTagType)
27*0b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
28*0b57cec5SDimitry Andric 
29*0b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
30*0b57cec5SDimitry Andric                        const char *Sep) {
31*0b57cec5SDimitry Andric   TaintMapTy TM = State->get<TaintMap>();
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric   if (!TM.isEmpty())
34*0b57cec5SDimitry Andric     Out << "Tainted symbols:" << NL;
35*0b57cec5SDimitry Andric 
36*0b57cec5SDimitry Andric   for (const auto &I : TM)
37*0b57cec5SDimitry Andric     Out << I.first << " : " << I.second << NL;
38*0b57cec5SDimitry Andric }
39*0b57cec5SDimitry Andric 
40*0b57cec5SDimitry Andric void dumpTaint(ProgramStateRef State) {
41*0b57cec5SDimitry Andric   printTaint(State, llvm::errs());
42*0b57cec5SDimitry Andric }
43*0b57cec5SDimitry Andric 
44*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
45*0b57cec5SDimitry Andric                                 const LocationContext *LCtx,
46*0b57cec5SDimitry Andric                                 TaintTagType Kind) {
47*0b57cec5SDimitry Andric   return addTaint(State, State->getSVal(S, LCtx), Kind);
48*0b57cec5SDimitry Andric }
49*0b57cec5SDimitry Andric 
50*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
51*0b57cec5SDimitry Andric                                 TaintTagType Kind) {
52*0b57cec5SDimitry Andric   SymbolRef Sym = V.getAsSymbol();
53*0b57cec5SDimitry Andric   if (Sym)
54*0b57cec5SDimitry Andric     return addTaint(State, Sym, Kind);
55*0b57cec5SDimitry Andric 
56*0b57cec5SDimitry Andric   // If the SVal represents a structure, try to mass-taint all values within the
57*0b57cec5SDimitry Andric   // structure. For now it only works efficiently on lazy compound values that
58*0b57cec5SDimitry Andric   // were conjured during a conservative evaluation of a function - either as
59*0b57cec5SDimitry Andric   // return values of functions that return structures or arrays by value, or as
60*0b57cec5SDimitry Andric   // values of structures or arrays passed into the function by reference,
61*0b57cec5SDimitry Andric   // directly or through pointer aliasing. Such lazy compound values are
62*0b57cec5SDimitry Andric   // characterized by having exactly one binding in their captured store within
63*0b57cec5SDimitry Andric   // their parent region, which is a conjured symbol default-bound to the base
64*0b57cec5SDimitry Andric   // region of the parent region.
65*0b57cec5SDimitry Andric   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
66*0b57cec5SDimitry Andric     if (Optional<SVal> binding =
67*0b57cec5SDimitry Andric             State->getStateManager().getStoreManager()
68*0b57cec5SDimitry Andric                                     .getDefaultBinding(*LCV)) {
69*0b57cec5SDimitry Andric       if (SymbolRef Sym = binding->getAsSymbol())
70*0b57cec5SDimitry Andric         return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
71*0b57cec5SDimitry Andric     }
72*0b57cec5SDimitry Andric   }
73*0b57cec5SDimitry Andric 
74*0b57cec5SDimitry Andric   const MemRegion *R = V.getAsRegion();
75*0b57cec5SDimitry Andric   return addTaint(State, R, Kind);
76*0b57cec5SDimitry Andric }
77*0b57cec5SDimitry Andric 
78*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
79*0b57cec5SDimitry Andric                                 TaintTagType Kind) {
80*0b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
81*0b57cec5SDimitry Andric     return addTaint(State, SR->getSymbol(), Kind);
82*0b57cec5SDimitry Andric   return State;
83*0b57cec5SDimitry Andric }
84*0b57cec5SDimitry Andric 
85*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
86*0b57cec5SDimitry Andric                                 TaintTagType Kind) {
87*0b57cec5SDimitry Andric   // If this is a symbol cast, remove the cast before adding the taint. Taint
88*0b57cec5SDimitry Andric   // is cast agnostic.
89*0b57cec5SDimitry Andric   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
90*0b57cec5SDimitry Andric     Sym = SC->getOperand();
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
93*0b57cec5SDimitry Andric   assert(NewState);
94*0b57cec5SDimitry Andric   return NewState;
95*0b57cec5SDimitry Andric }
96*0b57cec5SDimitry Andric 
97*0b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
98*0b57cec5SDimitry Andric                                        SymbolRef ParentSym,
99*0b57cec5SDimitry Andric                                        const SubRegion *SubRegion,
100*0b57cec5SDimitry Andric                                        TaintTagType Kind) {
101*0b57cec5SDimitry Andric   // Ignore partial taint if the entire parent symbol is already tainted.
102*0b57cec5SDimitry Andric   if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
103*0b57cec5SDimitry Andric     if (*T == Kind)
104*0b57cec5SDimitry Andric       return State;
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric   // Partial taint applies if only a portion of the symbol is tainted.
107*0b57cec5SDimitry Andric   if (SubRegion == SubRegion->getBaseRegion())
108*0b57cec5SDimitry Andric     return addTaint(State, ParentSym, Kind);
109*0b57cec5SDimitry Andric 
110*0b57cec5SDimitry Andric   const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
111*0b57cec5SDimitry Andric   TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
112*0b57cec5SDimitry Andric   TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
113*0b57cec5SDimitry Andric 
114*0b57cec5SDimitry Andric   Regs = F.add(Regs, SubRegion, Kind);
115*0b57cec5SDimitry Andric   ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
116*0b57cec5SDimitry Andric   assert(NewState);
117*0b57cec5SDimitry Andric   return NewState;
118*0b57cec5SDimitry Andric }
119*0b57cec5SDimitry Andric 
120*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S,
121*0b57cec5SDimitry Andric                       const LocationContext *LCtx, TaintTagType Kind) {
122*0b57cec5SDimitry Andric   SVal val = State->getSVal(S, LCtx);
123*0b57cec5SDimitry Andric   return isTainted(State, val, Kind);
124*0b57cec5SDimitry Andric }
125*0b57cec5SDimitry Andric 
126*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
127*0b57cec5SDimitry Andric   if (const SymExpr *Sym = V.getAsSymExpr())
128*0b57cec5SDimitry Andric     return isTainted(State, Sym, Kind);
129*0b57cec5SDimitry Andric   if (const MemRegion *Reg = V.getAsRegion())
130*0b57cec5SDimitry Andric     return isTainted(State, Reg, Kind);
131*0b57cec5SDimitry Andric   return false;
132*0b57cec5SDimitry Andric }
133*0b57cec5SDimitry Andric 
134*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
135*0b57cec5SDimitry Andric                       TaintTagType K) {
136*0b57cec5SDimitry Andric   if (!Reg)
137*0b57cec5SDimitry Andric     return false;
138*0b57cec5SDimitry Andric 
139*0b57cec5SDimitry Andric   // Element region (array element) is tainted if either the base or the offset
140*0b57cec5SDimitry Andric   // are tainted.
141*0b57cec5SDimitry Andric   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
142*0b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K) ||
143*0b57cec5SDimitry Andric            isTainted(State, ER->getIndex(), K);
144*0b57cec5SDimitry Andric 
145*0b57cec5SDimitry Andric   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
146*0b57cec5SDimitry Andric     return isTainted(State, SR->getSymbol(), K);
147*0b57cec5SDimitry Andric 
148*0b57cec5SDimitry Andric   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
149*0b57cec5SDimitry Andric     return isTainted(State, ER->getSuperRegion(), K);
150*0b57cec5SDimitry Andric 
151*0b57cec5SDimitry Andric   return false;
152*0b57cec5SDimitry Andric }
153*0b57cec5SDimitry Andric 
154*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
155*0b57cec5SDimitry Andric   if (!Sym)
156*0b57cec5SDimitry Andric     return false;
157*0b57cec5SDimitry Andric 
158*0b57cec5SDimitry Andric   // Traverse all the symbols this symbol depends on to see if any are tainted.
159*0b57cec5SDimitry Andric   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
160*0b57cec5SDimitry Andric                                 SE = Sym->symbol_end(); SI != SE; ++SI) {
161*0b57cec5SDimitry Andric     if (!isa<SymbolData>(*SI))
162*0b57cec5SDimitry Andric       continue;
163*0b57cec5SDimitry Andric 
164*0b57cec5SDimitry Andric     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
165*0b57cec5SDimitry Andric       if (*Tag == Kind)
166*0b57cec5SDimitry Andric         return true;
167*0b57cec5SDimitry Andric     }
168*0b57cec5SDimitry Andric 
169*0b57cec5SDimitry Andric     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
170*0b57cec5SDimitry Andric       // If this is a SymbolDerived with a tainted parent, it's also tainted.
171*0b57cec5SDimitry Andric       if (isTainted(State, SD->getParentSymbol(), Kind))
172*0b57cec5SDimitry Andric         return true;
173*0b57cec5SDimitry Andric 
174*0b57cec5SDimitry Andric       // If this is a SymbolDerived with the same parent symbol as another
175*0b57cec5SDimitry Andric       // tainted SymbolDerived and a region that's a sub-region of that tainted
176*0b57cec5SDimitry Andric       // symbol, it's also tainted.
177*0b57cec5SDimitry Andric       if (const TaintedSubRegions *Regs =
178*0b57cec5SDimitry Andric               State->get<DerivedSymTaint>(SD->getParentSymbol())) {
179*0b57cec5SDimitry Andric         const TypedValueRegion *R = SD->getRegion();
180*0b57cec5SDimitry Andric         for (auto I : *Regs) {
181*0b57cec5SDimitry Andric           // FIXME: The logic to identify tainted regions could be more
182*0b57cec5SDimitry Andric           // complete. For example, this would not currently identify
183*0b57cec5SDimitry Andric           // overlapping fields in a union as tainted. To identify this we can
184*0b57cec5SDimitry Andric           // check for overlapping/nested byte offsets.
185*0b57cec5SDimitry Andric           if (Kind == I.second && R->isSubRegionOf(I.first))
186*0b57cec5SDimitry Andric             return true;
187*0b57cec5SDimitry Andric         }
188*0b57cec5SDimitry Andric       }
189*0b57cec5SDimitry Andric     }
190*0b57cec5SDimitry Andric 
191*0b57cec5SDimitry Andric     // If memory region is tainted, data is also tainted.
192*0b57cec5SDimitry Andric     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
193*0b57cec5SDimitry Andric       if (isTainted(State, SRV->getRegion(), Kind))
194*0b57cec5SDimitry Andric         return true;
195*0b57cec5SDimitry Andric     }
196*0b57cec5SDimitry Andric 
197*0b57cec5SDimitry Andric     // If this is a SymbolCast from a tainted value, it's also tainted.
198*0b57cec5SDimitry Andric     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
199*0b57cec5SDimitry Andric       if (isTainted(State, SC->getOperand(), Kind))
200*0b57cec5SDimitry Andric         return true;
201*0b57cec5SDimitry Andric     }
202*0b57cec5SDimitry Andric   }
203*0b57cec5SDimitry Andric 
204*0b57cec5SDimitry Andric   return false;
205*0b57cec5SDimitry Andric }
206*0b57cec5SDimitry Andric 
207*0b57cec5SDimitry Andric std::shared_ptr<PathDiagnosticPiece>
208*0b57cec5SDimitry Andric TaintBugVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC,
209*0b57cec5SDimitry Andric                            BugReport &BR) {
210*0b57cec5SDimitry Andric 
211*0b57cec5SDimitry Andric   // Find the ExplodedNode where the taint was first introduced
212*0b57cec5SDimitry Andric   if (!isTainted(N->getState(), V) ||
213*0b57cec5SDimitry Andric       isTainted(N->getFirstPred()->getState(), V))
214*0b57cec5SDimitry Andric     return nullptr;
215*0b57cec5SDimitry Andric 
216*0b57cec5SDimitry Andric   const Stmt *S = PathDiagnosticLocation::getStmt(N);
217*0b57cec5SDimitry Andric   if (!S)
218*0b57cec5SDimitry Andric     return nullptr;
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric   const LocationContext *NCtx = N->getLocationContext();
221*0b57cec5SDimitry Andric   PathDiagnosticLocation L =
222*0b57cec5SDimitry Andric       PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
223*0b57cec5SDimitry Andric   if (!L.isValid() || !L.asLocation().isValid())
224*0b57cec5SDimitry Andric     return nullptr;
225*0b57cec5SDimitry Andric 
226*0b57cec5SDimitry Andric   return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
227*0b57cec5SDimitry Andric }
228