xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the template classes ExplodedNode and ExplodedGraph,
10 //  which represent a path-sensitive, intra-procedural "exploded graph."
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprObjC.h"
17 #include "clang/AST/ParentMap.h"
18 #include "clang/AST/Stmt.h"
19 #include "clang/Analysis/CFGStmtMap.h"
20 #include "clang/Analysis/ProgramPoint.h"
21 #include "clang/Analysis/Support/BumpVector.h"
22 #include "clang/Basic/LLVM.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/FoldingSet.h"
28 #include "llvm/ADT/PointerUnion.h"
29 #include <cassert>
30 #include <memory>
31 #include <optional>
32 
33 using namespace clang;
34 using namespace ento;
35 
36 //===----------------------------------------------------------------------===//
37 // Cleanup.
38 //===----------------------------------------------------------------------===//
39 
40 ExplodedGraph::ExplodedGraph() = default;
41 
42 ExplodedGraph::~ExplodedGraph() = default;
43 
44 //===----------------------------------------------------------------------===//
45 // Node reclamation.
46 //===----------------------------------------------------------------------===//
47 
isInterestingLValueExpr(const Expr * Ex)48 bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) {
49   if (!Ex->isLValue())
50     return false;
51   return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex);
52 }
53 
shouldCollect(const ExplodedNode * node)54 bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
55   // First, we only consider nodes for reclamation of the following
56   // conditions apply:
57   //
58   // (1) 1 predecessor (that has one successor)
59   // (2) 1 successor (that has one predecessor)
60   //
61   // If a node has no successor it is on the "frontier", while a node
62   // with no predecessor is a root.
63   //
64   // After these prerequisites, we discard all "filler" nodes that
65   // are used only for intermediate processing, and are not essential
66   // for analyzer history:
67   //
68   // (a) PreStmtPurgeDeadSymbols
69   //
70   // We then discard all other nodes where *all* of the following conditions
71   // apply:
72   //
73   // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
74   // (4) There is no 'tag' for the ProgramPoint.
75   // (5) The 'store' is the same as the predecessor.
76   // (6) The 'GDM' is the same as the predecessor.
77   // (7) The LocationContext is the same as the predecessor.
78   // (8) Expressions that are *not* lvalue expressions.
79   // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
80   // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
81   //      PreImplicitCall (so that we would be able to find it when retrying a
82   //      call with no inlining).
83   // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
84 
85   // Conditions 1 and 2.
86   if (node->pred_size() != 1 || node->succ_size() != 1)
87     return false;
88 
89   const ExplodedNode *pred = *(node->pred_begin());
90   if (pred->succ_size() != 1)
91     return false;
92 
93   const ExplodedNode *succ = *(node->succ_begin());
94   if (succ->pred_size() != 1)
95     return false;
96 
97   // Now reclaim any nodes that are (by definition) not essential to
98   // analysis history and are not consulted by any client code.
99   ProgramPoint progPoint = node->getLocation();
100   if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
101     return !progPoint.getTag();
102 
103   // Condition 3.
104   if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
105     return false;
106 
107   // Condition 4.
108   if (progPoint.getTag())
109     return false;
110 
111   // Conditions 5, 6, and 7.
112   ProgramStateRef state = node->getState();
113   ProgramStateRef pred_state = pred->getState();
114   if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
115       progPoint.getLocationContext() != pred->getLocationContext())
116     return false;
117 
118   // All further checks require expressions. As per #3, we know that we have
119   // a PostStmt.
120   const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
121   if (!Ex)
122     return false;
123 
124   // Condition 8.
125   // Do not collect nodes for "interesting" lvalue expressions since they are
126   // used extensively for generating path diagnostics.
127   if (isInterestingLValueExpr(Ex))
128     return false;
129 
130   // Condition 9.
131   // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
132   // diagnostic generation; specifically, so that we could anchor arrows
133   // pointing to the beginning of statements (as written in code).
134   const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
135   if (!PM.isConsumedExpr(Ex))
136     return false;
137 
138   // Condition 10.
139   const ProgramPoint SuccLoc = succ->getLocation();
140   if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
141     if (CallEvent::isCallStmt(SP->getStmt()))
142       return false;
143 
144   // Condition 10, continuation.
145   if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
146     return false;
147 
148   return true;
149 }
150 
collectNode(ExplodedNode * node)151 void ExplodedGraph::collectNode(ExplodedNode *node) {
152   // Removing a node means:
153   // (a) changing the predecessors successor to the successor of this node
154   // (b) changing the successors predecessor to the predecessor of this node
155   // (c) Putting 'node' onto freeNodes.
156   assert(node->pred_size() == 1 || node->succ_size() == 1);
157   ExplodedNode *pred = *(node->pred_begin());
158   ExplodedNode *succ = *(node->succ_begin());
159   pred->replaceSuccessor(succ);
160   succ->replacePredecessor(pred);
161   FreeNodes.push_back(node);
162   Nodes.RemoveNode(node);
163   --NumNodes;
164   node->~ExplodedNode();
165 }
166 
reclaimRecentlyAllocatedNodes()167 void ExplodedGraph::reclaimRecentlyAllocatedNodes() {
168   if (ChangedNodes.empty())
169     return;
170 
171   // Only periodically reclaim nodes so that we can build up a set of
172   // nodes that meet the reclamation criteria.  Freshly created nodes
173   // by definition have no successor, and thus cannot be reclaimed (see below).
174   assert(ReclaimCounter > 0);
175   if (--ReclaimCounter != 0)
176     return;
177   ReclaimCounter = ReclaimNodeInterval;
178 
179   for (const auto node : ChangedNodes)
180     if (shouldCollect(node))
181       collectNode(node);
182   ChangedNodes.clear();
183 }
184 
185 //===----------------------------------------------------------------------===//
186 // ExplodedNode.
187 //===----------------------------------------------------------------------===//
188 
189 // An NodeGroup's storage type is actually very much like a TinyPtrVector:
190 // it can be either a pointer to a single ExplodedNode, or a pointer to a
191 // BumpVector allocated with the ExplodedGraph's allocator. This allows the
192 // common case of single-node NodeGroups to be implemented with no extra memory.
193 //
194 // Consequently, each of the NodeGroup methods have up to four cases to handle:
195 // 1. The flag is set and this group does not actually contain any nodes.
196 // 2. The group is empty, in which case the storage value is null.
197 // 3. The group contains a single node.
198 // 4. The group contains more than one node.
199 using ExplodedNodeVector = BumpVector<ExplodedNode *>;
200 using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
201 
addPredecessor(ExplodedNode * V,ExplodedGraph & G)202 void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) {
203   assert(!V->isSink());
204   Preds.addNode(V, G);
205   V->Succs.addNode(this, G);
206 }
207 
replaceNode(ExplodedNode * node)208 void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
209   assert(!getFlag());
210 
211   GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
212   assert(isa<ExplodedNode *>(Storage));
213   Storage = node;
214   assert(isa<ExplodedNode *>(Storage));
215 }
216 
addNode(ExplodedNode * N,ExplodedGraph & G)217 void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
218   assert(!getFlag());
219 
220   GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
221   if (Storage.isNull()) {
222     Storage = N;
223     assert(isa<ExplodedNode *>(Storage));
224     return;
225   }
226 
227   ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage);
228 
229   if (!V) {
230     // Switch from single-node to multi-node representation.
231     auto *Old = cast<ExplodedNode *>(Storage);
232 
233     BumpVectorContext &Ctx = G.getNodeAllocator();
234     V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
235     V->push_back(Old, Ctx);
236 
237     Storage = V;
238     assert(!getFlag());
239     assert(isa<ExplodedNodeVector *>(Storage));
240   }
241 
242   V->push_back(N, G.getNodeAllocator());
243 }
244 
size() const245 unsigned ExplodedNode::NodeGroup::size() const {
246   if (getFlag())
247     return 0;
248 
249   const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
250   if (Storage.isNull())
251     return 0;
252   if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
253     return V->size();
254   return 1;
255 }
256 
begin() const257 ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
258   if (getFlag())
259     return nullptr;
260 
261   const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
262   if (Storage.isNull())
263     return nullptr;
264   if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
265     return V->begin();
266   return Storage.getAddrOfPtr1();
267 }
268 
end() const269 ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
270   if (getFlag())
271     return nullptr;
272 
273   const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
274   if (Storage.isNull())
275     return nullptr;
276   if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Storage))
277     return V->end();
278   return Storage.getAddrOfPtr1() + 1;
279 }
280 
isTrivial() const281 bool ExplodedNode::isTrivial() const {
282   return pred_size() == 1 && succ_size() == 1 &&
283          getFirstPred()->getState()->getID() == getState()->getID() &&
284          getFirstPred()->succ_size() == 1;
285 }
286 
getCFGBlock() const287 const CFGBlock *ExplodedNode::getCFGBlock() const {
288   ProgramPoint P = getLocation();
289   if (auto BEP = P.getAs<BlockEntrance>())
290     return BEP->getBlock();
291 
292   // Find the node's current statement in the CFG.
293   // FIXME: getStmtForDiagnostics() does nasty things in order to provide
294   // a valid statement for body farms, do we need this behavior here?
295   if (const Stmt *S = getStmtForDiagnostics())
296     return getLocationContext()
297         ->getAnalysisDeclContext()
298         ->getCFGStmtMap()
299         ->getBlock(S);
300 
301   return nullptr;
302 }
303 
304 static const LocationContext *
findTopAutosynthesizedParentContext(const LocationContext * LC)305 findTopAutosynthesizedParentContext(const LocationContext *LC) {
306   assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized());
307   const LocationContext *ParentLC = LC->getParent();
308   assert(ParentLC && "We don't start analysis from autosynthesized code");
309   while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
310     LC = ParentLC;
311     ParentLC = LC->getParent();
312     assert(ParentLC && "We don't start analysis from autosynthesized code");
313   }
314   return LC;
315 }
316 
getStmtForDiagnostics() const317 const Stmt *ExplodedNode::getStmtForDiagnostics() const {
318   // We cannot place diagnostics on autosynthesized code.
319   // Put them onto the call site through which we jumped into autosynthesized
320   // code for the first time.
321   const LocationContext *LC = getLocationContext();
322   if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
323     // It must be a stack frame because we only autosynthesize functions.
324     return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC))
325         ->getCallSite();
326   }
327   // Otherwise, see if the node's program point directly points to a statement.
328   // FIXME: Refactor into a ProgramPoint method?
329   ProgramPoint P = getLocation();
330   if (auto SP = P.getAs<StmtPoint>())
331     return SP->getStmt();
332   if (auto BE = P.getAs<BlockEdge>())
333     return BE->getSrc()->getTerminatorStmt();
334   if (auto CE = P.getAs<CallEnter>())
335     return CE->getCallExpr();
336   if (auto CEE = P.getAs<CallExitEnd>())
337     return CEE->getCalleeContext()->getCallSite();
338   if (auto PIPP = P.getAs<PostInitializer>())
339     return PIPP->getInitializer()->getInit();
340   if (auto CEB = P.getAs<CallExitBegin>())
341     return CEB->getReturnStmt();
342   if (auto FEP = P.getAs<FunctionExitPoint>())
343     return FEP->getStmt();
344 
345   return nullptr;
346 }
347 
getNextStmtForDiagnostics() const348 const Stmt *ExplodedNode::getNextStmtForDiagnostics() const {
349   for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
350     if (N->getLocation().isPurgeKind())
351       continue;
352     if (const Stmt *S = N->getStmtForDiagnostics()) {
353       // Check if the statement is '?' or '&&'/'||'.  These are "merges",
354       // not actual statement points.
355       switch (S->getStmtClass()) {
356         case Stmt::ChooseExprClass:
357         case Stmt::BinaryConditionalOperatorClass:
358         case Stmt::ConditionalOperatorClass:
359           continue;
360         case Stmt::BinaryOperatorClass: {
361           BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
362           if (Op == BO_LAnd || Op == BO_LOr)
363             continue;
364           break;
365         }
366         default:
367           break;
368       }
369       // We found the statement, so return it.
370       return S;
371     }
372   }
373 
374   return nullptr;
375 }
376 
getPreviousStmtForDiagnostics() const377 const Stmt *ExplodedNode::getPreviousStmtForDiagnostics() const {
378   for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
379     if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(S))
380       return S;
381 
382   return nullptr;
383 }
384 
getCurrentOrPreviousStmtForDiagnostics() const385 const Stmt *ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const {
386   if (const Stmt *S = getStmtForDiagnostics())
387     return S;
388 
389   return getPreviousStmtForDiagnostics();
390 }
391 
getNode(const ProgramPoint & L,ProgramStateRef State,bool IsSink,bool * IsNew)392 ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L,
393                                      ProgramStateRef State,
394                                      bool IsSink,
395                                      bool* IsNew) {
396   // Profile 'State' to determine if we already have an existing node.
397   llvm::FoldingSetNodeID profile;
398   void *InsertPos = nullptr;
399 
400   NodeTy::Profile(profile, L, State, IsSink);
401   NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
402 
403   if (!V) {
404     if (!FreeNodes.empty()) {
405       V = FreeNodes.back();
406       FreeNodes.pop_back();
407     }
408     else {
409       // Allocate a new node.
410       V = getAllocator().Allocate<NodeTy>();
411     }
412 
413     ++NumNodes;
414     new (V) NodeTy(L, State, NumNodes, IsSink);
415 
416     if (ReclaimNodeInterval)
417       ChangedNodes.push_back(V);
418 
419     // Insert the node into the node set and return it.
420     Nodes.InsertNode(V, InsertPos);
421 
422     if (IsNew) *IsNew = true;
423   }
424   else
425     if (IsNew) *IsNew = false;
426 
427   return V;
428 }
429 
createUncachedNode(const ProgramPoint & L,ProgramStateRef State,int64_t Id,bool IsSink)430 ExplodedNode *ExplodedGraph::createUncachedNode(const ProgramPoint &L,
431                                                 ProgramStateRef State,
432                                                 int64_t Id,
433                                                 bool IsSink) {
434   NodeTy *V = getAllocator().Allocate<NodeTy>();
435   new (V) NodeTy(L, State, Id, IsSink);
436   return V;
437 }
438 
439 std::unique_ptr<ExplodedGraph>
trim(ArrayRef<const NodeTy * > Sinks,InterExplodedGraphMap * ForwardMap,InterExplodedGraphMap * InverseMap) const440 ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
441                     InterExplodedGraphMap *ForwardMap,
442                     InterExplodedGraphMap *InverseMap) const {
443   // FIXME: The two-pass algorithm of this function (which was introduced in
444   // 2008) is terribly overcomplicated and should be replaced by a single
445   // (backward) pass.
446 
447   if (Nodes.empty())
448     return nullptr;
449 
450   using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
451   Pass1Ty Pass1;
452 
453   using Pass2Ty = InterExplodedGraphMap;
454   InterExplodedGraphMap Pass2Scratch;
455   Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
456 
457   SmallVector<const ExplodedNode*, 10> WL1, WL2;
458 
459   // ===- Pass 1 (reverse DFS) -===
460   for (const auto Sink : Sinks)
461     if (Sink)
462       WL1.push_back(Sink);
463 
464   // Process the first worklist until it is empty.
465   while (!WL1.empty()) {
466     const ExplodedNode *N = WL1.pop_back_val();
467 
468     // Have we already visited this node?  If so, continue to the next one.
469     if (!Pass1.insert(N).second)
470       continue;
471 
472     // If this is the root enqueue it to the second worklist.
473     if (N->Preds.empty()) {
474       assert(N == getRoot() && "Found non-root node with no predecessors!");
475       WL2.push_back(N);
476       continue;
477     }
478 
479     // Visit our predecessors and enqueue them.
480     WL1.append(N->Preds.begin(), N->Preds.end());
481   }
482 
483   // We didn't hit the root? Return with a null pointer for the new graph.
484   if (WL2.empty())
485     return nullptr;
486 
487   assert(WL2.size() == 1 && "There must be only one root!");
488 
489   // Create an empty graph.
490   std::unique_ptr<ExplodedGraph> G = std::make_unique<ExplodedGraph>();
491 
492   // ===- Pass 2 (forward DFS to construct the new graph) -===
493   while (!WL2.empty()) {
494     const ExplodedNode *N = WL2.pop_back_val();
495 
496     auto [Place, Inserted] = Pass2.try_emplace(N);
497 
498     // Skip this node if we have already processed it.
499     if (!Inserted)
500       continue;
501 
502     // Create the corresponding node in the new graph and record the mapping
503     // from the old node to the new node.
504     ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
505                                                N->getID(), N->isSink());
506     Place->second = NewN;
507 
508     // Also record the reverse mapping from the new node to the old node.
509     if (InverseMap) (*InverseMap)[NewN] = N;
510 
511     // If this node is the root, designate it as such in the graph.
512     if (N->Preds.empty()) {
513       assert(N == getRoot());
514       G->designateAsRoot(NewN);
515     }
516 
517     // In the case that some of the intended predecessors of NewN have already
518     // been created, we should hook them up as predecessors.
519 
520     // Walk through the predecessors of 'N' and hook up their corresponding
521     // nodes in the new graph (if any) to the freshly created node.
522     for (const ExplodedNode *Pred : N->Preds) {
523       Pass2Ty::iterator PI = Pass2.find(Pred);
524       if (PI == Pass2.end())
525         continue;
526 
527       NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
528     }
529 
530     // In the case that some of the intended successors of NewN have already
531     // been created, we should hook them up as successors.  Otherwise, enqueue
532     // the new nodes from the original graph that should have nodes created
533     // in the new graph.
534     for (const ExplodedNode *Succ : N->Succs) {
535       Pass2Ty::iterator PI = Pass2.find(Succ);
536       if (PI != Pass2.end()) {
537         const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
538         continue;
539       }
540 
541       // Enqueue nodes to the worklist that were marked during pass 1.
542       if (Pass1.count(Succ))
543         WL2.push_back(Succ);
544     }
545   }
546 
547   return G;
548 }
549