xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a check that marks a raw pointer to a C++ container's
10 // inner buffer released when the object is destroyed. This information can
11 // be used by MallocChecker to detect use-after-free problems.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AllocationState.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 // Associate container objects with a set of raw pointer symbols.
27 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(PtrSet, SymbolRef)
28 REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet)
29 
30 
31 namespace {
32 
33 class InnerPointerChecker
34     : public Checker<check::DeadSymbols, check::PostCall> {
35 
36   CallDescriptionSet InvalidatingMemberFunctions{
37       CallDescription(CDM::CXXMethod, {"std", "basic_string", "append"}),
38       CallDescription(CDM::CXXMethod, {"std", "basic_string", "assign"}),
39       CallDescription(CDM::CXXMethod, {"std", "basic_string", "clear"}),
40       CallDescription(CDM::CXXMethod, {"std", "basic_string", "erase"}),
41       CallDescription(CDM::CXXMethod, {"std", "basic_string", "insert"}),
42       CallDescription(CDM::CXXMethod, {"std", "basic_string", "pop_back"}),
43       CallDescription(CDM::CXXMethod, {"std", "basic_string", "push_back"}),
44       CallDescription(CDM::CXXMethod, {"std", "basic_string", "replace"}),
45       CallDescription(CDM::CXXMethod, {"std", "basic_string", "reserve"}),
46       CallDescription(CDM::CXXMethod, {"std", "basic_string", "resize"}),
47       CallDescription(CDM::CXXMethod, {"std", "basic_string", "shrink_to_fit"}),
48       CallDescription(CDM::CXXMethod, {"std", "basic_string", "swap"})};
49 
50   CallDescriptionSet AddressofFunctions{
51       CallDescription(CDM::SimpleFunc, {"std", "addressof"}),
52       CallDescription(CDM::SimpleFunc, {"std", "__addressof"})};
53 
54   CallDescriptionSet InnerPointerAccessFunctions{
55       CallDescription(CDM::CXXMethod, {"std", "basic_string", "c_str"}),
56       CallDescription(CDM::SimpleFunc, {"std", "data"}, 1),
57       CallDescription(CDM::CXXMethod, {"std", "basic_string", "data"})};
58 
59 public:
60   class InnerPointerBRVisitor : public BugReporterVisitor {
61     SymbolRef PtrToBuf;
62 
63   public:
64     InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {}
65 
66     static void *getTag() {
67       static int Tag = 0;
68       return &Tag;
69     }
70 
71     void Profile(llvm::FoldingSetNodeID &ID) const override {
72       ID.AddPointer(getTag());
73     }
74 
75     PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
76                                      BugReporterContext &BRC,
77                                      PathSensitiveBugReport &BR) override;
78 
79     // FIXME: Scan the map once in the visitor's constructor and do a direct
80     // lookup by region.
81     bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) {
82       RawPtrMapTy Map = State->get<RawPtrMap>();
83       for (const auto &Entry : Map) {
84         if (Entry.second.contains(Sym))
85           return true;
86       }
87       return false;
88     }
89   };
90 
91   /// Check whether the called member function potentially invalidates
92   /// pointers referring to the container object's inner buffer.
93   bool isInvalidatingMemberFunction(const CallEvent &Call) const;
94 
95   /// Mark pointer symbols associated with the given memory region released
96   /// in the program state.
97   void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State,
98                               const MemRegion *ObjRegion,
99                               CheckerContext &C) const;
100 
101   /// Standard library functions that take a non-const `basic_string` argument by
102   /// reference may invalidate its inner pointers. Check for these cases and
103   /// mark the pointers released.
104   void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State,
105                               CheckerContext &C) const;
106 
107   /// Record the connection between raw pointers referring to a container
108   /// object's inner buffer and the object's memory region in the program state.
109   /// Mark potentially invalidated pointers released.
110   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
111 
112   /// Clean up the program state map.
113   void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
114 };
115 
116 } // end anonymous namespace
117 
118 bool InnerPointerChecker::isInvalidatingMemberFunction(
119         const CallEvent &Call) const {
120   if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) {
121     OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator();
122     if (Opc == OO_Equal || Opc == OO_PlusEqual)
123       return true;
124     return false;
125   }
126   return isa<CXXDestructorCall>(Call) ||
127          InvalidatingMemberFunctions.contains(Call);
128 }
129 
130 void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
131                                                  ProgramStateRef State,
132                                                  const MemRegion *MR,
133                                                  CheckerContext &C) const {
134   if (const PtrSet *PS = State->get<RawPtrMap>(MR)) {
135     const Expr *Origin = Call.getOriginExpr();
136     for (const auto Symbol : *PS) {
137       // NOTE: `Origin` may be null, and will be stored so in the symbol's
138       // `RefState` in MallocChecker's `RegionState` program state map.
139       State = allocation_state::markReleased(State, Symbol, Origin);
140     }
141     State = State->remove<RawPtrMap>(MR);
142     C.addTransition(State);
143     return;
144   }
145 }
146 
147 void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
148                                                  ProgramStateRef State,
149                                                  CheckerContext &C) const {
150   if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) {
151     const FunctionDecl *FD = FC->getDecl();
152     if (!FD || !FD->isInStdNamespace())
153       return;
154 
155     for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) {
156       QualType ParamTy = FD->getParamDecl(I)->getType();
157       if (!ParamTy->isReferenceType() ||
158           ParamTy->getPointeeType().isConstQualified())
159         continue;
160 
161       // In case of member operator calls, `this` is counted as an
162       // argument but not as a parameter.
163       bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC);
164       unsigned ArgI = isaMemberOpCall ? I+1 : I;
165 
166       SVal Arg = FC->getArgSVal(ArgI);
167       const auto *ArgRegion =
168           dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion());
169       if (!ArgRegion)
170         continue;
171 
172       // std::addressof functions accepts a non-const reference as an argument,
173       // but doesn't modify it.
174       if (AddressofFunctions.contains(Call))
175         continue;
176 
177       markPtrSymbolsReleased(Call, State, ArgRegion, C);
178     }
179   }
180 }
181 
182 // [string.require]
183 //
184 // "References, pointers, and iterators referring to the elements of a
185 // basic_string sequence may be invalidated by the following uses of that
186 // basic_string object:
187 //
188 // -- As an argument to any standard library function taking a reference
189 // to non-const basic_string as an argument. For example, as an argument to
190 // non-member functions swap(), operator>>(), and getline(), or as an argument
191 // to basic_string::swap().
192 //
193 // -- Calling non-const member functions, except operator[], at, front, back,
194 // begin, rbegin, end, and rend."
195 
196 void InnerPointerChecker::checkPostCall(const CallEvent &Call,
197                                         CheckerContext &C) const {
198   ProgramStateRef State = C.getState();
199 
200   // TODO: Do we need these to be typed?
201   const TypedValueRegion *ObjRegion = nullptr;
202 
203   if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) {
204     ObjRegion = dyn_cast_or_null<TypedValueRegion>(
205         ICall->getCXXThisVal().getAsRegion());
206 
207     // Check [string.require] / second point.
208     if (isInvalidatingMemberFunction(Call)) {
209       markPtrSymbolsReleased(Call, State, ObjRegion, C);
210       return;
211     }
212   }
213 
214   if (InnerPointerAccessFunctions.contains(Call)) {
215 
216     if (isa<SimpleFunctionCall>(Call)) {
217       // NOTE: As of now, we only have one free access function: std::data.
218       //       If we add more functions like this in the list, hardcoded
219       //       argument index should be changed.
220       ObjRegion =
221           dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion());
222     }
223 
224     if (!ObjRegion)
225       return;
226 
227     SVal RawPtr = Call.getReturnValue();
228     if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) {
229       // Start tracking this raw pointer by adding it to the set of symbols
230       // associated with this container object in the program state map.
231 
232       PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
233       const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion);
234       PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet();
235       assert(C.wasInlined || !Set.contains(Sym));
236       Set = F.add(Set, Sym);
237 
238       State = State->set<RawPtrMap>(ObjRegion, Set);
239       C.addTransition(State);
240     }
241 
242     return;
243   }
244 
245   // Check [string.require] / first point.
246   checkFunctionArguments(Call, State, C);
247 }
248 
249 void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper,
250                                            CheckerContext &C) const {
251   ProgramStateRef State = C.getState();
252   PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
253   RawPtrMapTy RPM = State->get<RawPtrMap>();
254   for (const auto &Entry : RPM) {
255     if (!SymReaper.isLiveRegion(Entry.first)) {
256       // Due to incomplete destructor support, some dead regions might
257       // remain in the program state map. Clean them up.
258       State = State->remove<RawPtrMap>(Entry.first);
259     }
260     if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) {
261       PtrSet CleanedUpSet = *OldSet;
262       for (const auto Symbol : Entry.second) {
263         if (!SymReaper.isLive(Symbol))
264           CleanedUpSet = F.remove(CleanedUpSet, Symbol);
265       }
266       State = CleanedUpSet.isEmpty()
267                   ? State->remove<RawPtrMap>(Entry.first)
268                   : State->set<RawPtrMap>(Entry.first, CleanedUpSet);
269     }
270   }
271   C.addTransition(State);
272 }
273 
274 namespace clang {
275 namespace ento {
276 namespace allocation_state {
277 
278 std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) {
279   return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym);
280 }
281 
282 const MemRegion *getContainerObjRegion(ProgramStateRef State, SymbolRef Sym) {
283   RawPtrMapTy Map = State->get<RawPtrMap>();
284   for (const auto &Entry : Map) {
285     if (Entry.second.contains(Sym)) {
286       return Entry.first;
287     }
288   }
289   return nullptr;
290 }
291 
292 } // end namespace allocation_state
293 } // end namespace ento
294 } // end namespace clang
295 
296 PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode(
297     const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &) {
298   if (!isSymbolTracked(N->getState(), PtrToBuf) ||
299       isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf))
300     return nullptr;
301 
302   const Stmt *S = N->getStmtForDiagnostics();
303   if (!S)
304     return nullptr;
305 
306   const MemRegion *ObjRegion =
307       allocation_state::getContainerObjRegion(N->getState(), PtrToBuf);
308   const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion);
309   QualType ObjTy = TypedRegion->getValueType();
310 
311   SmallString<256> Buf;
312   llvm::raw_svector_ostream OS(Buf);
313   OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here";
314   PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
315                              N->getLocationContext());
316   return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true);
317 }
318 
319 void ento::registerInnerPointerChecker(CheckerManager &Mgr) {
320   registerInnerPointerCheckerAux(Mgr);
321   Mgr.registerChecker<InnerPointerChecker>();
322 }
323 
324 bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) {
325   return true;
326 }
327