10b57cec5SDimitry Andric //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Defines basic, non-domain-specific mechanisms for tracking tainted values. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 1381ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h" 140b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 16bdd1243dSDimitry Andric #include <optional> 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric using namespace clang; 190b57cec5SDimitry Andric using namespace ento; 200b57cec5SDimitry Andric using namespace taint; 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric // Fully tainted symbols. 230b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric // Partially tainted symbols. 260b57cec5SDimitry Andric REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, 270b57cec5SDimitry Andric TaintTagType) 280b57cec5SDimitry Andric REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, 310b57cec5SDimitry Andric const char *Sep) { 320b57cec5SDimitry Andric TaintMapTy TM = State->get<TaintMap>(); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric if (!TM.isEmpty()) 350b57cec5SDimitry Andric Out << "Tainted symbols:" << NL; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric for (const auto &I : TM) 380b57cec5SDimitry Andric Out << I.first << " : " << I.second << NL; 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric 4181ad6265SDimitry Andric void taint::dumpTaint(ProgramStateRef State) { 4281ad6265SDimitry Andric printTaint(State, llvm::errs()); 4381ad6265SDimitry Andric } 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, 460b57cec5SDimitry Andric const LocationContext *LCtx, 470b57cec5SDimitry Andric TaintTagType Kind) { 480b57cec5SDimitry Andric return addTaint(State, State->getSVal(S, LCtx), Kind); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, 520b57cec5SDimitry Andric TaintTagType Kind) { 530b57cec5SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 540b57cec5SDimitry Andric if (Sym) 550b57cec5SDimitry Andric return addTaint(State, Sym, Kind); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric // If the SVal represents a structure, try to mass-taint all values within the 580b57cec5SDimitry Andric // structure. For now it only works efficiently on lazy compound values that 590b57cec5SDimitry Andric // were conjured during a conservative evaluation of a function - either as 600b57cec5SDimitry Andric // return values of functions that return structures or arrays by value, or as 610b57cec5SDimitry Andric // values of structures or arrays passed into the function by reference, 620b57cec5SDimitry Andric // directly or through pointer aliasing. Such lazy compound values are 630b57cec5SDimitry Andric // characterized by having exactly one binding in their captured store within 640b57cec5SDimitry Andric // their parent region, which is a conjured symbol default-bound to the base 650b57cec5SDimitry Andric // region of the parent region. 660b57cec5SDimitry Andric if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { 67bdd1243dSDimitry Andric if (std::optional<SVal> binding = 68480093f4SDimitry Andric State->getStateManager().getStoreManager().getDefaultBinding( 69480093f4SDimitry Andric *LCV)) { 700b57cec5SDimitry Andric if (SymbolRef Sym = binding->getAsSymbol()) 710b57cec5SDimitry Andric return addPartialTaint(State, Sym, LCV->getRegion(), Kind); 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric const MemRegion *R = V.getAsRegion(); 760b57cec5SDimitry Andric return addTaint(State, R, Kind); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, 800b57cec5SDimitry Andric TaintTagType Kind) { 810b57cec5SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 820b57cec5SDimitry Andric return addTaint(State, SR->getSymbol(), Kind); 830b57cec5SDimitry Andric return State; 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, 870b57cec5SDimitry Andric TaintTagType Kind) { 880b57cec5SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 890b57cec5SDimitry Andric // is cast agnostic. 900b57cec5SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 910b57cec5SDimitry Andric Sym = SC->getOperand(); 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); 940b57cec5SDimitry Andric assert(NewState); 950b57cec5SDimitry Andric return NewState; 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 98480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { 99480093f4SDimitry Andric SymbolRef Sym = V.getAsSymbol(); 100480093f4SDimitry Andric if (Sym) 101480093f4SDimitry Andric return removeTaint(State, Sym); 102480093f4SDimitry Andric 103480093f4SDimitry Andric const MemRegion *R = V.getAsRegion(); 104480093f4SDimitry Andric return removeTaint(State, R); 105480093f4SDimitry Andric } 106480093f4SDimitry Andric 107480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { 108480093f4SDimitry Andric if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) 109480093f4SDimitry Andric return removeTaint(State, SR->getSymbol()); 110480093f4SDimitry Andric return State; 111480093f4SDimitry Andric } 112480093f4SDimitry Andric 113480093f4SDimitry Andric ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { 114480093f4SDimitry Andric // If this is a symbol cast, remove the cast before adding the taint. Taint 115480093f4SDimitry Andric // is cast agnostic. 116480093f4SDimitry Andric while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) 117480093f4SDimitry Andric Sym = SC->getOperand(); 118480093f4SDimitry Andric 119480093f4SDimitry Andric ProgramStateRef NewState = State->remove<TaintMap>(Sym); 120480093f4SDimitry Andric assert(NewState); 121480093f4SDimitry Andric return NewState; 122480093f4SDimitry Andric } 123480093f4SDimitry Andric 1240b57cec5SDimitry Andric ProgramStateRef taint::addPartialTaint(ProgramStateRef State, 1250b57cec5SDimitry Andric SymbolRef ParentSym, 1260b57cec5SDimitry Andric const SubRegion *SubRegion, 1270b57cec5SDimitry Andric TaintTagType Kind) { 1280b57cec5SDimitry Andric // Ignore partial taint if the entire parent symbol is already tainted. 1290b57cec5SDimitry Andric if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) 1300b57cec5SDimitry Andric if (*T == Kind) 1310b57cec5SDimitry Andric return State; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric // Partial taint applies if only a portion of the symbol is tainted. 1340b57cec5SDimitry Andric if (SubRegion == SubRegion->getBaseRegion()) 1350b57cec5SDimitry Andric return addTaint(State, ParentSym, Kind); 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); 1380b57cec5SDimitry Andric TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); 1390b57cec5SDimitry Andric TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric Regs = F.add(Regs, SubRegion, Kind); 1420b57cec5SDimitry Andric ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); 1430b57cec5SDimitry Andric assert(NewState); 1440b57cec5SDimitry Andric return NewState; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const Stmt *S, 1480b57cec5SDimitry Andric const LocationContext *LCtx, TaintTagType Kind) { 14906c3fb27SDimitry Andric return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/true) 15006c3fb27SDimitry Andric .empty(); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { 15406c3fb27SDimitry Andric return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/true) 15506c3fb27SDimitry Andric .empty(); 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, 1590b57cec5SDimitry Andric TaintTagType K) { 16006c3fb27SDimitry Andric return !getTaintedSymbolsImpl(State, Reg, K, /*ReturnFirstOnly=*/true) 16106c3fb27SDimitry Andric .empty(); 1620b57cec5SDimitry Andric } 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { 16506c3fb27SDimitry Andric return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/true) 16606c3fb27SDimitry Andric .empty(); 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 16906c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, 17006c3fb27SDimitry Andric const Stmt *S, 17106c3fb27SDimitry Andric const LocationContext *LCtx, 17206c3fb27SDimitry Andric TaintTagType Kind) { 17306c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/false); 17406c3fb27SDimitry Andric } 17506c3fb27SDimitry Andric 17606c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V, 17706c3fb27SDimitry Andric TaintTagType Kind) { 17806c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/false); 17906c3fb27SDimitry Andric } 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, 18206c3fb27SDimitry Andric SymbolRef Sym, 18306c3fb27SDimitry Andric TaintTagType Kind) { 18406c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/false); 18506c3fb27SDimitry Andric } 18606c3fb27SDimitry Andric 18706c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, 18806c3fb27SDimitry Andric const MemRegion *Reg, 18906c3fb27SDimitry Andric TaintTagType Kind) { 19006c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/false); 19106c3fb27SDimitry Andric } 19206c3fb27SDimitry Andric 19306c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, 19406c3fb27SDimitry Andric const Stmt *S, 19506c3fb27SDimitry Andric const LocationContext *LCtx, 19606c3fb27SDimitry Andric TaintTagType Kind, 19706c3fb27SDimitry Andric bool returnFirstOnly) { 19806c3fb27SDimitry Andric SVal val = State->getSVal(S, LCtx); 19906c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, val, Kind, returnFirstOnly); 20006c3fb27SDimitry Andric } 20106c3fb27SDimitry Andric 20206c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, 20306c3fb27SDimitry Andric SVal V, TaintTagType Kind, 20406c3fb27SDimitry Andric bool returnFirstOnly) { 20506c3fb27SDimitry Andric if (SymbolRef Sym = V.getAsSymbol()) 20606c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly); 20706c3fb27SDimitry Andric if (const MemRegion *Reg = V.getAsRegion()) 20806c3fb27SDimitry Andric return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly); 20906c3fb27SDimitry Andric return {}; 21006c3fb27SDimitry Andric } 21106c3fb27SDimitry Andric 21206c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, 21306c3fb27SDimitry Andric const MemRegion *Reg, 21406c3fb27SDimitry Andric TaintTagType K, 21506c3fb27SDimitry Andric bool returnFirstOnly) { 21606c3fb27SDimitry Andric std::vector<SymbolRef> TaintedSymbols; 21706c3fb27SDimitry Andric if (!Reg) 21806c3fb27SDimitry Andric return TaintedSymbols; 219*5678d1d9SDimitry Andric 220*5678d1d9SDimitry Andric // Element region (array element) is tainted if the offset is tainted. 22106c3fb27SDimitry Andric if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) { 22206c3fb27SDimitry Andric std::vector<SymbolRef> TaintedIndex = 22306c3fb27SDimitry Andric getTaintedSymbolsImpl(State, ER->getIndex(), K, returnFirstOnly); 22406c3fb27SDimitry Andric llvm::append_range(TaintedSymbols, TaintedIndex); 22506c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 22606c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 22706c3fb27SDimitry Andric } 22806c3fb27SDimitry Andric 229*5678d1d9SDimitry Andric // Symbolic region is tainted if the corresponding symbol is tainted. 23006c3fb27SDimitry Andric if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) { 23106c3fb27SDimitry Andric std::vector<SymbolRef> TaintedRegions = 23206c3fb27SDimitry Andric getTaintedSymbolsImpl(State, SR->getSymbol(), K, returnFirstOnly); 23306c3fb27SDimitry Andric llvm::append_range(TaintedSymbols, TaintedRegions); 23406c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 23506c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 23606c3fb27SDimitry Andric } 23706c3fb27SDimitry Andric 238*5678d1d9SDimitry Andric // Any subregion (including Element and Symbolic regions) is tainted if its 239*5678d1d9SDimitry Andric // super-region is tainted. 24006c3fb27SDimitry Andric if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) { 24106c3fb27SDimitry Andric std::vector<SymbolRef> TaintedSubRegions = 24206c3fb27SDimitry Andric getTaintedSymbolsImpl(State, ER->getSuperRegion(), K, returnFirstOnly); 24306c3fb27SDimitry Andric llvm::append_range(TaintedSymbols, TaintedSubRegions); 24406c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 24506c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 24606c3fb27SDimitry Andric } 24706c3fb27SDimitry Andric 24806c3fb27SDimitry Andric return TaintedSymbols; 24906c3fb27SDimitry Andric } 25006c3fb27SDimitry Andric 25106c3fb27SDimitry Andric std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, 25206c3fb27SDimitry Andric SymbolRef Sym, 25306c3fb27SDimitry Andric TaintTagType Kind, 25406c3fb27SDimitry Andric bool returnFirstOnly) { 25506c3fb27SDimitry Andric std::vector<SymbolRef> TaintedSymbols; 25606c3fb27SDimitry Andric if (!Sym) 25706c3fb27SDimitry Andric return TaintedSymbols; 25806c3fb27SDimitry Andric 25906c3fb27SDimitry Andric // Traverse all the symbols this symbol depends on to see if any are tainted. 26006c3fb27SDimitry Andric for (SymbolRef SubSym : Sym->symbols()) { 26106c3fb27SDimitry Andric if (!isa<SymbolData>(SubSym)) 26206c3fb27SDimitry Andric continue; 26306c3fb27SDimitry Andric 26406c3fb27SDimitry Andric if (const TaintTagType *Tag = State->get<TaintMap>(SubSym)) { 26506c3fb27SDimitry Andric if (*Tag == Kind) { 26606c3fb27SDimitry Andric TaintedSymbols.push_back(SubSym); 26706c3fb27SDimitry Andric if (returnFirstOnly) 26806c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 26906c3fb27SDimitry Andric } 27006c3fb27SDimitry Andric } 27106c3fb27SDimitry Andric 27206c3fb27SDimitry Andric if (const auto *SD = dyn_cast<SymbolDerived>(SubSym)) { 2730b57cec5SDimitry Andric // If this is a SymbolDerived with a tainted parent, it's also tainted. 27406c3fb27SDimitry Andric std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl( 27506c3fb27SDimitry Andric State, SD->getParentSymbol(), Kind, returnFirstOnly); 27606c3fb27SDimitry Andric llvm::append_range(TaintedSymbols, TaintedParents); 27706c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 27806c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric // If this is a SymbolDerived with the same parent symbol as another 28106c3fb27SDimitry Andric // tainted SymbolDerived and a region that's a sub-region of that 28206c3fb27SDimitry Andric // tainted symbol, it's also tainted. 2830b57cec5SDimitry Andric if (const TaintedSubRegions *Regs = 2840b57cec5SDimitry Andric State->get<DerivedSymTaint>(SD->getParentSymbol())) { 2850b57cec5SDimitry Andric const TypedValueRegion *R = SD->getRegion(); 2860b57cec5SDimitry Andric for (auto I : *Regs) { 2870b57cec5SDimitry Andric // FIXME: The logic to identify tainted regions could be more 2880b57cec5SDimitry Andric // complete. For example, this would not currently identify 2890b57cec5SDimitry Andric // overlapping fields in a union as tainted. To identify this we can 2900b57cec5SDimitry Andric // check for overlapping/nested byte offsets. 29106c3fb27SDimitry Andric if (Kind == I.second && R->isSubRegionOf(I.first)) { 29206c3fb27SDimitry Andric TaintedSymbols.push_back(SD->getParentSymbol()); 29306c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 29406c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 29506c3fb27SDimitry Andric } 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric } 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric // If memory region is tainted, data is also tainted. 30106c3fb27SDimitry Andric if (const auto *SRV = dyn_cast<SymbolRegionValue>(SubSym)) { 30206c3fb27SDimitry Andric std::vector<SymbolRef> TaintedRegions = 30306c3fb27SDimitry Andric getTaintedSymbolsImpl(State, SRV->getRegion(), Kind, returnFirstOnly); 30406c3fb27SDimitry Andric llvm::append_range(TaintedSymbols, TaintedRegions); 30506c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 30606c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // If this is a SymbolCast from a tainted value, it's also tainted. 31006c3fb27SDimitry Andric if (const auto *SC = dyn_cast<SymbolCast>(SubSym)) { 31106c3fb27SDimitry Andric std::vector<SymbolRef> TaintedCasts = 31206c3fb27SDimitry Andric getTaintedSymbolsImpl(State, SC->getOperand(), Kind, returnFirstOnly); 31306c3fb27SDimitry Andric llvm::append_range(TaintedSymbols, TaintedCasts); 31406c3fb27SDimitry Andric if (returnFirstOnly && !TaintedSymbols.empty()) 31506c3fb27SDimitry Andric return TaintedSymbols; // return early if needed 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric } 31806c3fb27SDimitry Andric return TaintedSymbols; 3190b57cec5SDimitry Andric } 320