1*0b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values. 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #include "Taint.h" 14*0b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 15*0b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 16*0b57cec5SDimitry Andric 17*0b57cec5SDimitry Andric using namespace clang; 18*0b57cec5SDimitry Andric using namespace ento; 19*0b57cec5SDimitry Andric using namespace taint; 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric // Fully tainted symbols. 22*0b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) 23*0b57cec5SDimitry Andric 24*0b57cec5SDimitry Andric // Partially tainted symbols. 25*0b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, 26*0b57cec5SDimitry Andric TaintTagType) 27*0b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) 28*0b57cec5SDimitry Andric 29*0b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, 30*0b57cec5SDimitry Andric const char *Sep) { 31*0b57cec5SDimitry Andric TaintMapTy TM = State->get<TaintMap>(); 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric if (!TM.isEmpty()) 34*0b57cec5SDimitry Andric Out << "Tainted symbols:" << NL; 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric for (const auto &I : TM) 37*0b57cec5SDimitry Andric Out << I.first << " : " << I.second << NL; 38*0b57cec5SDimitry Andric } 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric void dumpTaint(ProgramStateRef State) { 41*0b57cec5SDimitry Andric printTaint(State, llvm::errs()); 42*0b57cec5SDimitry Andric } 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, 45*0b57cec5SDimitry Andric const LocationContext *LCtx, 46*0b57cec5SDimitry Andric TaintTagType Kind) { 47*0b57cec5SDimitry Andric return addTaint(State, State->getSVal(S, LCtx), Kind); 48*0b57cec5SDimitry Andric } 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, 51*0b57cec5SDimitry Andric TaintTagType Kind) { 52*0b57cec5SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 53*0b57cec5SDimitry Andric if (Sym) 54*0b57cec5SDimitry Andric return addTaint(State, Sym, Kind); 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric // If the SVal represents a structure, try to mass-taint all values within the 57*0b57cec5SDimitry Andric // structure. For now it only works efficiently on lazy compound values that 58*0b57cec5SDimitry Andric // were conjured during a conservative evaluation of a function - either as 59*0b57cec5SDimitry Andric // return values of functions that return structures or arrays by value, or as 60*0b57cec5SDimitry Andric // values of structures or arrays passed into the function by reference, 61*0b57cec5SDimitry Andric // directly or through pointer aliasing. Such lazy compound values are 62*0b57cec5SDimitry Andric // characterized by having exactly one binding in their captured store within 63*0b57cec5SDimitry Andric // their parent region, which is a conjured symbol default-bound to the base 64*0b57cec5SDimitry Andric // region of the parent region. 65*0b57cec5SDimitry Andric if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { 66*0b57cec5SDimitry Andric if (Optional<SVal> binding = 67*0b57cec5SDimitry Andric State->getStateManager().getStoreManager() 68*0b57cec5SDimitry Andric .getDefaultBinding(*LCV)) { 69*0b57cec5SDimitry Andric if (SymbolRef Sym = binding->getAsSymbol()) 70*0b57cec5SDimitry Andric return addPartialTaint(State, Sym, LCV->getRegion(), Kind); 71*0b57cec5SDimitry Andric } 72*0b57cec5SDimitry Andric } 73*0b57cec5SDimitry Andric 74*0b57cec5SDimitry Andric const MemRegion *R = V.getAsRegion(); 75*0b57cec5SDimitry Andric return addTaint(State, R, Kind); 76*0b57cec5SDimitry Andric } 77*0b57cec5SDimitry Andric 78*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, 79*0b57cec5SDimitry Andric TaintTagType Kind) { 80*0b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 81*0b57cec5SDimitry Andric return addTaint(State, SR->getSymbol(), Kind); 82*0b57cec5SDimitry Andric return State; 83*0b57cec5SDimitry Andric } 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, 86*0b57cec5SDimitry Andric TaintTagType Kind) { 87*0b57cec5SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 88*0b57cec5SDimitry Andric // is cast agnostic. 89*0b57cec5SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 90*0b57cec5SDimitry Andric Sym = SC->getOperand(); 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); 93*0b57cec5SDimitry Andric assert(NewState); 94*0b57cec5SDimitry Andric return NewState; 95*0b57cec5SDimitry Andric } 96*0b57cec5SDimitry Andric 97*0b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State, 98*0b57cec5SDimitry Andric SymbolRef ParentSym, 99*0b57cec5SDimitry Andric const SubRegion *SubRegion, 100*0b57cec5SDimitry Andric TaintTagType Kind) { 101*0b57cec5SDimitry Andric // Ignore partial taint if the entire parent symbol is already tainted. 102*0b57cec5SDimitry Andric if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) 103*0b57cec5SDimitry Andric if (*T == Kind) 104*0b57cec5SDimitry Andric return State; 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric // Partial taint applies if only a portion of the symbol is tainted. 107*0b57cec5SDimitry Andric if (SubRegion == SubRegion->getBaseRegion()) 108*0b57cec5SDimitry Andric return addTaint(State, ParentSym, Kind); 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); 111*0b57cec5SDimitry Andric TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); 112*0b57cec5SDimitry Andric TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); 113*0b57cec5SDimitry Andric 114*0b57cec5SDimitry Andric Regs = F.add(Regs, SubRegion, Kind); 115*0b57cec5SDimitry Andric ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); 116*0b57cec5SDimitry Andric assert(NewState); 117*0b57cec5SDimitry Andric return NewState; 118*0b57cec5SDimitry Andric } 119*0b57cec5SDimitry Andric 120*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S, 121*0b57cec5SDimitry Andric const LocationContext *LCtx, TaintTagType Kind) { 122*0b57cec5SDimitry Andric SVal val = State->getSVal(S, LCtx); 123*0b57cec5SDimitry Andric return isTainted(State, val, Kind); 124*0b57cec5SDimitry Andric } 125*0b57cec5SDimitry Andric 126*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { 127*0b57cec5SDimitry Andric if (const SymExpr *Sym = V.getAsSymExpr()) 128*0b57cec5SDimitry Andric return isTainted(State, Sym, Kind); 129*0b57cec5SDimitry Andric if (const MemRegion *Reg = V.getAsRegion()) 130*0b57cec5SDimitry Andric return isTainted(State, Reg, Kind); 131*0b57cec5SDimitry Andric return false; 132*0b57cec5SDimitry Andric } 133*0b57cec5SDimitry Andric 134*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, 135*0b57cec5SDimitry Andric TaintTagType K) { 136*0b57cec5SDimitry Andric if (!Reg) 137*0b57cec5SDimitry Andric return false; 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric // Element region (array element) is tainted if either the base or the offset 140*0b57cec5SDimitry Andric // are tainted. 141*0b57cec5SDimitry Andric if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) 142*0b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K) || 143*0b57cec5SDimitry Andric isTainted(State, ER->getIndex(), K); 144*0b57cec5SDimitry Andric 145*0b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) 146*0b57cec5SDimitry Andric return isTainted(State, SR->getSymbol(), K); 147*0b57cec5SDimitry Andric 148*0b57cec5SDimitry Andric if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) 149*0b57cec5SDimitry Andric return isTainted(State, ER->getSuperRegion(), K); 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andric return false; 152*0b57cec5SDimitry Andric } 153*0b57cec5SDimitry Andric 154*0b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { 155*0b57cec5SDimitry Andric if (!Sym) 156*0b57cec5SDimitry Andric return false; 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andric // Traverse all the symbols this symbol depends on to see if any are tainted. 159*0b57cec5SDimitry Andric for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), 160*0b57cec5SDimitry Andric SE = Sym->symbol_end(); SI != SE; ++SI) { 161*0b57cec5SDimitry Andric if (!isa<SymbolData>(*SI)) 162*0b57cec5SDimitry Andric continue; 163*0b57cec5SDimitry Andric 164*0b57cec5SDimitry Andric if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) { 165*0b57cec5SDimitry Andric if (*Tag == Kind) 166*0b57cec5SDimitry Andric return true; 167*0b57cec5SDimitry Andric } 168*0b57cec5SDimitry Andric 169*0b57cec5SDimitry Andric if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) { 170*0b57cec5SDimitry Andric // If this is a SymbolDerived with a tainted parent, it's also tainted. 171*0b57cec5SDimitry Andric if (isTainted(State, SD->getParentSymbol(), Kind)) 172*0b57cec5SDimitry Andric return true; 173*0b57cec5SDimitry Andric 174*0b57cec5SDimitry Andric // If this is a SymbolDerived with the same parent symbol as another 175*0b57cec5SDimitry Andric // tainted SymbolDerived and a region that's a sub-region of that tainted 176*0b57cec5SDimitry Andric // symbol, it's also tainted. 177*0b57cec5SDimitry Andric if (const TaintedSubRegions *Regs = 178*0b57cec5SDimitry Andric State->get<DerivedSymTaint>(SD->getParentSymbol())) { 179*0b57cec5SDimitry Andric const TypedValueRegion *R = SD->getRegion(); 180*0b57cec5SDimitry Andric for (auto I : *Regs) { 181*0b57cec5SDimitry Andric // FIXME: The logic to identify tainted regions could be more 182*0b57cec5SDimitry Andric // complete. For example, this would not currently identify 183*0b57cec5SDimitry Andric // overlapping fields in a union as tainted. To identify this we can 184*0b57cec5SDimitry Andric // check for overlapping/nested byte offsets. 185*0b57cec5SDimitry Andric if (Kind == I.second && R->isSubRegionOf(I.first)) 186*0b57cec5SDimitry Andric return true; 187*0b57cec5SDimitry Andric } 188*0b57cec5SDimitry Andric } 189*0b57cec5SDimitry Andric } 190*0b57cec5SDimitry Andric 191*0b57cec5SDimitry Andric // If memory region is tainted, data is also tainted. 192*0b57cec5SDimitry Andric if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) { 193*0b57cec5SDimitry Andric if (isTainted(State, SRV->getRegion(), Kind)) 194*0b57cec5SDimitry Andric return true; 195*0b57cec5SDimitry Andric } 196*0b57cec5SDimitry Andric 197*0b57cec5SDimitry Andric // If this is a SymbolCast from a tainted value, it's also tainted. 198*0b57cec5SDimitry Andric if (const auto *SC = dyn_cast<SymbolCast>(*SI)) { 199*0b57cec5SDimitry Andric if (isTainted(State, SC->getOperand(), Kind)) 200*0b57cec5SDimitry Andric return true; 201*0b57cec5SDimitry Andric } 202*0b57cec5SDimitry Andric } 203*0b57cec5SDimitry Andric 204*0b57cec5SDimitry Andric return false; 205*0b57cec5SDimitry Andric } 206*0b57cec5SDimitry Andric 207*0b57cec5SDimitry Andric std::shared_ptr<PathDiagnosticPiece> 208*0b57cec5SDimitry Andric TaintBugVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, 209*0b57cec5SDimitry Andric BugReport &BR) { 210*0b57cec5SDimitry Andric 211*0b57cec5SDimitry Andric // Find the ExplodedNode where the taint was first introduced 212*0b57cec5SDimitry Andric if (!isTainted(N->getState(), V) || 213*0b57cec5SDimitry Andric isTainted(N->getFirstPred()->getState(), V)) 214*0b57cec5SDimitry Andric return nullptr; 215*0b57cec5SDimitry Andric 216*0b57cec5SDimitry Andric const Stmt *S = PathDiagnosticLocation::getStmt(N); 217*0b57cec5SDimitry Andric if (!S) 218*0b57cec5SDimitry Andric return nullptr; 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric const LocationContext *NCtx = N->getLocationContext(); 221*0b57cec5SDimitry Andric PathDiagnosticLocation L = 222*0b57cec5SDimitry Andric PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); 223*0b57cec5SDimitry Andric if (!L.isValid() || !L.asLocation().isValid()) 224*0b57cec5SDimitry Andric return nullptr; 225*0b57cec5SDimitry Andric 226*0b57cec5SDimitry Andric return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); 227*0b57cec5SDimitry Andric } 228