xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a check that marks a raw pointer to a C++ container's
10 // inner buffer released when the object is destroyed. This information can
11 // be used by MallocChecker to detect use-after-free problems.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AllocationState.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
20 #include "clang/StaticAnalyzer/Core/Checker.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 
25 using namespace clang;
26 using namespace ento;
27 
28 // Associate container objects with a set of raw pointer symbols.
29 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef)
30 REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet)
31 
32 
33 namespace {
34 
35 class InnerPointerChecker
36     : public Checker<check::DeadSymbols, check::PostCall> {
37 
38   CallDescriptionSet InvalidatingMemberFunctions{
39       CallDescription(CDM::CXXMethod, {"std", "basic_string", "append"}),
40       CallDescription(CDM::CXXMethod, {"std", "basic_string", "assign"}),
41       CallDescription(CDM::CXXMethod, {"std", "basic_string", "clear"}),
42       CallDescription(CDM::CXXMethod, {"std", "basic_string", "erase"}),
43       CallDescription(CDM::CXXMethod, {"std", "basic_string", "insert"}),
44       CallDescription(CDM::CXXMethod, {"std", "basic_string", "pop_back"}),
45       CallDescription(CDM::CXXMethod, {"std", "basic_string", "push_back"}),
46       CallDescription(CDM::CXXMethod, {"std", "basic_string", "replace"}),
47       CallDescription(CDM::CXXMethod, {"std", "basic_string", "reserve"}),
48       CallDescription(CDM::CXXMethod, {"std", "basic_string", "resize"}),
49       CallDescription(CDM::CXXMethod, {"std", "basic_string", "shrink_to_fit"}),
50       CallDescription(CDM::CXXMethod, {"std", "basic_string", "swap"})};
51 
52   CallDescriptionSet AddressofFunctions{
53       CallDescription(CDM::SimpleFunc, {"std", "addressof"}),
54       CallDescription(CDM::SimpleFunc, {"std", "__addressof"})};
55 
56   CallDescriptionSet InnerPointerAccessFunctions{
57       CallDescription(CDM::CXXMethod, {"std", "basic_string", "c_str"}),
58       CallDescription(CDM::SimpleFunc, {"std", "data"}, 1),
59       CallDescription(CDM::CXXMethod, {"std", "basic_string", "data"})};
60 
61 public:
62   class InnerPointerBRVisitor : public BugReporterVisitor {
63     SymbolRef PtrToBuf;
64 
65   public:
66     InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {}
67 
68     static void *getTag() {
69       static int Tag = 0;
70       return &Tag;
71     }
72 
73     void Profile(llvm::FoldingSetNodeID &ID) const override {
74       ID.AddPointer(getTag());
75     }
76 
77     PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
78                                      BugReporterContext &BRC,
79                                      PathSensitiveBugReport &BR) override;
80 
81     // FIXME: Scan the map once in the visitor's constructor and do a direct
82     // lookup by region.
83     bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) {
84       RawPtrMapTy Map = State->get<RawPtrMap>();
85       for (const auto &Entry : Map) {
86         if (Entry.second.contains(Sym))
87           return true;
88       }
89       return false;
90     }
91   };
92 
93   /// Check whether the called member function potentially invalidates
94   /// pointers referring to the container object's inner buffer.
95   bool isInvalidatingMemberFunction(const CallEvent &Call) const;
96 
97   /// Mark pointer symbols associated with the given memory region released
98   /// in the program state.
99   void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State,
100                               const MemRegion *ObjRegion,
101                               CheckerContext &C) const;
102 
103   /// Standard library functions that take a non-const `basic_string` argument by
104   /// reference may invalidate its inner pointers. Check for these cases and
105   /// mark the pointers released.
106   void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State,
107                               CheckerContext &C) const;
108 
109   /// Record the connection between raw pointers referring to a container
110   /// object's inner buffer and the object's memory region in the program state.
111   /// Mark potentially invalidated pointers released.
112   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
113 
114   /// Clean up the program state map.
115   void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
116 };
117 
118 } // end anonymous namespace
119 
120 bool InnerPointerChecker::isInvalidatingMemberFunction(
121         const CallEvent &Call) const {
122   if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) {
123     OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator();
124     if (Opc == OO_Equal || Opc == OO_PlusEqual)
125       return true;
126     return false;
127   }
128   return isa<CXXDestructorCall>(Call) ||
129          InvalidatingMemberFunctions.contains(Call);
130 }
131 
132 void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
133                                                  ProgramStateRef State,
134                                                  const MemRegion *MR,
135                                                  CheckerContext &C) const {
136   if (const PtrSet *PS = State->get<RawPtrMap>(MR)) {
137     const Expr *Origin = Call.getOriginExpr();
138     for (const auto Symbol : *PS) {
139       // NOTE: `Origin` may be null, and will be stored so in the symbol's
140       // `RefState` in MallocChecker's `RegionState` program state map.
141       State = allocation_state::markReleased(State, Symbol, Origin);
142     }
143     State = State->remove<RawPtrMap>(MR);
144     C.addTransition(State);
145     return;
146   }
147 }
148 
149 void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
150                                                  ProgramStateRef State,
151                                                  CheckerContext &C) const {
152   if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) {
153     const FunctionDecl *FD = FC->getDecl();
154     if (!FD || !FD->isInStdNamespace())
155       return;
156 
157     for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) {
158       QualType ParamTy = FD->getParamDecl(I)->getType();
159       if (!ParamTy->isReferenceType() ||
160           ParamTy->getPointeeType().isConstQualified())
161         continue;
162 
163       // In case of member operator calls, `this` is counted as an
164       // argument but not as a parameter.
165       bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC);
166       unsigned ArgI = isaMemberOpCall ? I+1 : I;
167 
168       SVal Arg = FC->getArgSVal(ArgI);
169       const auto *ArgRegion =
170           dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion());
171       if (!ArgRegion)
172         continue;
173 
174       // std::addressof functions accepts a non-const reference as an argument,
175       // but doesn't modify it.
176       if (AddressofFunctions.contains(Call))
177         continue;
178 
179       markPtrSymbolsReleased(Call, State, ArgRegion, C);
180     }
181   }
182 }
183 
184 // [string.require]
185 //
186 // "References, pointers, and iterators referring to the elements of a
187 // basic_string sequence may be invalidated by the following uses of that
188 // basic_string object:
189 //
190 // -- As an argument to any standard library function taking a reference
191 // to non-const basic_string as an argument. For example, as an argument to
192 // non-member functions swap(), operator>>(), and getline(), or as an argument
193 // to basic_string::swap().
194 //
195 // -- Calling non-const member functions, except operator[], at, front, back,
196 // begin, rbegin, end, and rend."
197 
198 void InnerPointerChecker::checkPostCall(const CallEvent &Call,
199                                         CheckerContext &C) const {
200   ProgramStateRef State = C.getState();
201 
202   // TODO: Do we need these to be typed?
203   const TypedValueRegion *ObjRegion = nullptr;
204 
205   if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) {
206     ObjRegion = dyn_cast_or_null<TypedValueRegion>(
207         ICall->getCXXThisVal().getAsRegion());
208 
209     // Check [string.require] / second point.
210     if (isInvalidatingMemberFunction(Call)) {
211       markPtrSymbolsReleased(Call, State, ObjRegion, C);
212       return;
213     }
214   }
215 
216   if (InnerPointerAccessFunctions.contains(Call)) {
217 
218     if (isa<SimpleFunctionCall>(Call)) {
219       // NOTE: As of now, we only have one free access function: std::data.
220       //       If we add more functions like this in the list, hardcoded
221       //       argument index should be changed.
222       ObjRegion =
223           dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion());
224     }
225 
226     if (!ObjRegion)
227       return;
228 
229     SVal RawPtr = Call.getReturnValue();
230     if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) {
231       // Start tracking this raw pointer by adding it to the set of symbols
232       // associated with this container object in the program state map.
233 
234       PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
235       const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion);
236       PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet();
237       assert(C.wasInlined || !Set.contains(Sym));
238       Set = F.add(Set, Sym);
239 
240       State = State->set<RawPtrMap>(ObjRegion, Set);
241       C.addTransition(State);
242     }
243 
244     return;
245   }
246 
247   // Check [string.require] / first point.
248   checkFunctionArguments(Call, State, C);
249 }
250 
251 void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper,
252                                            CheckerContext &C) const {
253   ProgramStateRef State = C.getState();
254   PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
255   RawPtrMapTy RPM = State->get<RawPtrMap>();
256   for (const auto &Entry : RPM) {
257     if (!SymReaper.isLiveRegion(Entry.first)) {
258       // Due to incomplete destructor support, some dead regions might
259       // remain in the program state map. Clean them up.
260       State = State->remove<RawPtrMap>(Entry.first);
261     }
262     if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) {
263       PtrSet CleanedUpSet = *OldSet;
264       for (const auto Symbol : Entry.second) {
265         if (!SymReaper.isLive(Symbol))
266           CleanedUpSet = F.remove(CleanedUpSet, Symbol);
267       }
268       State = CleanedUpSet.isEmpty()
269                   ? State->remove<RawPtrMap>(Entry.first)
270                   : State->set<RawPtrMap>(Entry.first, CleanedUpSet);
271     }
272   }
273   C.addTransition(State);
274 }
275 
276 namespace clang {
277 namespace ento {
278 namespace allocation_state {
279 
280 std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) {
281   return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym);
282 }
283 
284 const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) {
285   RawPtrMapTy Map = State->get<RawPtrMap>();
286   for (const auto &Entry : Map) {
287     if (Entry.second.contains(Sym)) {
288       return Entry.first;
289     }
290   }
291   return nullptr;
292 }
293 
294 } // end namespace allocation_state
295 } // end namespace ento
296 } // end namespace clang
297 
298 PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode(
299     const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) {
300   if (!isSymbolTracked(N->getState(), PtrToBuf) ||
301       isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf))
302     return nullptr;
303 
304   const Stmt *S = N->getStmtForDiagnostics();
305   if (!S)
306     return nullptr;
307 
308   const MemRegion *ObjRegion =
309       allocation_state::getContainerObjRegion(N->getState(), PtrToBuf);
310   const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion);
311   QualType ObjTy = TypedRegion->getValueType();
312 
313   SmallString<256> Buf;
314   llvm::raw_svector_ostream OS(Buf);
315   OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here";
316   PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
317                              N->getLocationContext());
318   return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true);
319 }
320 
321 void ento::registerInnerPointerChecker(CheckerManager &Mgr) {
322   registerInnerPointerCheckerAux(Mgr);
323   Mgr.registerChecker<InnerPointerChecker>();
324 }
325 
326 bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) {
327   return true;
328 }
329