xref: /freebsd/contrib/llvm-project/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines base types and functions for building dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs).
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
15 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
16 
17 #include <iterator>
18 #include <optional>
19 #include <type_traits>
20 #include <utility>
21 #include <vector>
22 
23 #include "clang/AST/ASTContext.h"
24 #include "clang/Analysis/CFG.h"
25 #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
26 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
27 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
28 #include "clang/Analysis/FlowSensitive/MatchSwitch.h"
29 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
30 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/STLFunctionalExtras.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Support/Errc.h"
35 #include "llvm/Support/Error.h"
36 
37 namespace clang {
38 namespace dataflow {
39 
40 /// Base class template for dataflow analyses built on a single lattice type.
41 ///
42 /// Requirements:
43 ///
44 ///  `Derived` must be derived from a specialization of this class template and
45 ///  must provide the following public members:
46 ///   * `LatticeT initialElement()` - returns a lattice element that models the
47 ///     initial state of a basic block;
48 ///   * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies
49 ///     the analysis transfer function for a given CFG element and lattice
50 ///     element.
51 ///
52 ///  `Derived` can optionally provide the following members:
53 ///  * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E,
54 ///                         Environment &Env)` - applies the analysis transfer
55 ///    function for a given edge from a CFG block of a conditional statement.
56 ///
57 ///  `Derived` can optionally override the virtual functions in the
58 ///  `Environment::ValueModel` interface (which is an indirect base class of
59 ///  this class).
60 ///
61 ///  `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
62 ///  provide the following public members:
63 ///   * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
64 ///     argument by computing their least upper bound, modifies the object if
65 ///     necessary, and returns an effect indicating whether any changes were
66 ///     made to it;
67 ///     FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)`
68 ///   * `bool operator==(const LatticeT &) const` - returns true if and only if
69 ///     the object is equal to the argument.
70 ///
71 /// `LatticeT` can optionally provide the following members:
72 ///  * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the
73 ///    lattice element with an  approximation that can reach a fixed point more
74 ///    quickly than iterated application of the transfer function alone. The
75 ///    previous value is provided to inform the choice of widened value. The
76 ///    function must also serve as a comparison operation, by indicating whether
77 ///    the widened value is equivalent to the previous value with the returned
78 ///    `LatticeJoinEffect`.
79 template <typename Derived, typename LatticeT>
80 class DataflowAnalysis : public TypeErasedDataflowAnalysis {
81 public:
82   /// Bounded join-semilattice that is used in the analysis.
83   using Lattice = LatticeT;
84 
DataflowAnalysis(ASTContext & Context)85   explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
86 
DataflowAnalysis(ASTContext & Context,DataflowAnalysisOptions Options)87   explicit DataflowAnalysis(ASTContext &Context,
88                             DataflowAnalysisOptions Options)
89       : TypeErasedDataflowAnalysis(Options), Context(Context) {}
90 
getASTContext()91   ASTContext &getASTContext() final { return Context; }
92 
typeErasedInitialElement()93   TypeErasedLattice typeErasedInitialElement() final {
94     return {static_cast<Derived *>(this)->initialElement()};
95   }
96 
joinTypeErased(const TypeErasedLattice & E1,const TypeErasedLattice & E2)97   TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1,
98                                    const TypeErasedLattice &E2) final {
99     // FIXME: change the signature of join() to avoid copying here.
100     Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value);
101     const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
102     L1.join(L2);
103     return {std::move(L1)};
104   }
105 
widenTypeErased(TypeErasedLattice & Current,const TypeErasedLattice & Previous)106   LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current,
107                                     const TypeErasedLattice &Previous) final {
108     Lattice &C = llvm::any_cast<Lattice &>(Current.Value);
109     const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value);
110     return widenInternal(Rank0{}, C, P);
111   }
112 
isEqualTypeErased(const TypeErasedLattice & E1,const TypeErasedLattice & E2)113   bool isEqualTypeErased(const TypeErasedLattice &E1,
114                          const TypeErasedLattice &E2) final {
115     const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
116     const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
117     return L1 == L2;
118   }
119 
transferTypeErased(const CFGElement & Element,TypeErasedLattice & E,Environment & Env)120   void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E,
121                           Environment &Env) final {
122     Lattice &L = llvm::any_cast<Lattice &>(E.Value);
123     static_cast<Derived *>(this)->transfer(Element, L, Env);
124   }
125 
transferBranchTypeErased(bool Branch,const Stmt * Stmt,TypeErasedLattice & E,Environment & Env)126   void transferBranchTypeErased(bool Branch, const Stmt *Stmt,
127                                 TypeErasedLattice &E, Environment &Env) final {
128     transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt,
129                            E, Env);
130   }
131 
132 private:
133   // These `Rank` structs are used for template metaprogramming to choose
134   // between overloads.
135   struct Rank1 {};
136   struct Rank0 : Rank1 {};
137 
138   // The first-choice implementation: use `widen` when it is available.
139   template <typename T>
140   static auto widenInternal(Rank0, T &Current, const T &Prev)
141       -> decltype(Current.widen(Prev)) {
142     return Current.widen(Prev);
143   }
144 
145   // The second-choice implementation: `widen` is unavailable. Widening is
146   // merged with equality checking, so when widening is unimplemented, we
147   // default to equality checking.
widenInternal(Rank1,const Lattice & Current,const Lattice & Prev)148   static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current,
149                                          const Lattice &Prev) {
150     return Prev == Current ? LatticeJoinEffect::Unchanged
151                            : LatticeJoinEffect::Changed;
152   }
153 
154   // The first-choice implementation: `transferBranch` is implemented.
155   template <typename Analysis>
156   static auto transferBranchInternal(Rank0, Analysis &A, bool Branch,
157                                      const Stmt *Stmt, TypeErasedLattice &L,
158                                      Environment &Env)
159       -> std::void_t<decltype(A.transferBranch(
160           Branch, Stmt, std::declval<LatticeT &>(), Env))> {
161     A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env);
162   }
163 
164   // The second-choice implementation: `transferBranch` is unimplemented. No-op.
165   template <typename Analysis>
transferBranchInternal(Rank1,Analysis & A,bool,const Stmt *,TypeErasedLattice &,Environment &)166   static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *,
167                                      TypeErasedLattice &, Environment &) {}
168 
169   ASTContext &Context;
170 };
171 
172 // Model of the program at a given program point.
173 template <typename LatticeT> struct DataflowAnalysisState {
174   // Model of a program property.
175   LatticeT Lattice;
176 
177   // Model of the state of the program (store and heap).
178   Environment Env;
179 };
180 
181 /// A callback to be called with the state before or after visiting a CFG
182 /// element.
183 template <typename AnalysisT>
184 using CFGEltCallback = std::function<void(
185     const CFGElement &,
186     const DataflowAnalysisState<typename AnalysisT::Lattice> &)>;
187 
188 /// A pair of callbacks to be called with the state before and after visiting a
189 /// CFG element.
190 /// Either or both of the callbacks may be null.
191 template <typename AnalysisT> struct CFGEltCallbacks {
192   CFGEltCallback<AnalysisT> Before;
193   CFGEltCallback<AnalysisT> After;
194 };
195 
196 /// A callback for performing diagnosis on a CFG element, called with the state
197 /// before or after visiting that CFG element. Returns a list of diagnostics
198 /// to emit (if any).
199 template <typename AnalysisT, typename Diagnostic>
200 using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>(
201     const CFGElement &, ASTContext &,
202     const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>;
203 
204 /// A pair of callbacks for performing diagnosis on a CFG element, called with
205 /// the state before and after visiting that CFG element.
206 /// Either or both of the callbacks may be null.
207 template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks {
208   DiagnosisCallback<AnalysisT, Diagnostic> Before;
209   DiagnosisCallback<AnalysisT, Diagnostic> After;
210 };
211 
212 /// Default for the maximum number of SAT solver iterations during analysis.
213 inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000;
214 
215 /// Default for the maximum number of block visits during analysis.
216 inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000;
217 
218 /// Performs dataflow analysis and returns a mapping from basic block IDs to
219 /// dataflow analysis states that model the respective basic blocks. The
220 /// returned vector, if any, will have the same size as the number of CFG
221 /// blocks, with indices corresponding to basic block IDs. Returns an error if
222 /// the dataflow analysis cannot be performed successfully. Otherwise, calls
223 /// `PostAnalysisCallbacks` on each CFG element with the final analysis results
224 /// before and after that program point.
225 ///
226 /// `MaxBlockVisits` caps the number of block visits during analysis. See
227 /// `runTypeErasedDataflowAnalysis` for a full description. The default value is
228 /// essentially arbitrary -- large enough to accommodate what seems like any
229 /// reasonable CFG, but still small enough to limit the cost of hitting the
230 /// limit.
231 template <typename AnalysisT>
232 llvm::Expected<std::vector<
233     std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
234 runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis,
235                     const Environment &InitEnv,
236                     CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks,
237                     std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
238   CFGEltCallbacksTypeErased TypeErasedCallbacks;
239   if (PostAnalysisCallbacks.Before) {
240     TypeErasedCallbacks.Before =
241         [&PostAnalysisCallbacks](const CFGElement &Element,
242                                  const TypeErasedDataflowAnalysisState &State) {
243           auto *Lattice =
244               llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
245           // FIXME: we should not be copying the environment here!
246           // Ultimately the `CFGEltCallback` only gets a const reference anyway.
247           PostAnalysisCallbacks.Before(
248               Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
249                            *Lattice, State.Env.fork()});
250         };
251   }
252   if (PostAnalysisCallbacks.After) {
253     TypeErasedCallbacks.After =
254         [&PostAnalysisCallbacks](const CFGElement &Element,
255                                  const TypeErasedDataflowAnalysisState &State) {
256           auto *Lattice =
257               llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
258           // FIXME: we should not be copying the environment here!
259           // Ultimately the `CFGEltCallback` only gets a const reference anyway.
260           PostAnalysisCallbacks.After(
261               Element, DataflowAnalysisState<typename AnalysisT::Lattice>{
262                            *Lattice, State.Env.fork()});
263         };
264   }
265 
266   auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
267       ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits);
268   if (!TypeErasedBlockStates)
269     return TypeErasedBlockStates.takeError();
270 
271   std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
272       BlockStates;
273   BlockStates.reserve(TypeErasedBlockStates->size());
274 
275   llvm::transform(
276       std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
277       [](auto &OptState) {
278         return llvm::transformOptional(
279             std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
280               return DataflowAnalysisState<typename AnalysisT::Lattice>{
281                   llvm::any_cast<typename AnalysisT::Lattice>(
282                       std::move(State.Lattice.Value)),
283                   std::move(State.Env)};
284             });
285       });
286   return std::move(BlockStates);
287 }
288 
289 /// Overload that takes only one post-analysis callback, which is run on the
290 /// state after visiting the `CFGElement`. This is provided for backwards
291 /// compatibility; new callers should call the overload taking `CFGEltCallbacks`
292 /// instead.
293 template <typename AnalysisT>
294 llvm::Expected<std::vector<
295     std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
296 runDataflowAnalysis(
297     const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
298     CFGEltCallback<AnalysisT> PostAnalysisCallbackAfterElt = nullptr,
299     std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
300   return runDataflowAnalysis(ACFG, Analysis, InitEnv,
301                              {nullptr, PostAnalysisCallbackAfterElt},
302                              MaxBlockVisits);
303 }
304 
305 // Create an analysis class that is derived from `DataflowAnalysis`. This is an
306 // SFINAE adapter that allows us to call two different variants of constructor
307 // (either with or without the optional `Environment` parameter).
308 // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment`
309 // parameter in their constructor so that we can get rid of this abomination.
310 template <typename AnalysisT>
311 auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
312     -> decltype(AnalysisT(ASTCtx, Env)) {
313   return AnalysisT(ASTCtx, Env);
314 }
315 template <typename AnalysisT>
316 auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
317     -> decltype(AnalysisT(ASTCtx)) {
318   return AnalysisT(ASTCtx);
319 }
320 
321 /// Runs a dataflow analysis over the given function and then runs `Diagnoser`
322 /// over the results. Returns a list of diagnostics for `FuncDecl` or an
323 /// error. Currently, errors can occur (at least) because the analysis requires
324 /// too many iterations over the CFG or the SAT solver times out.
325 ///
326 /// The default value of `MaxSATIterations` was chosen based on the following
327 /// observations:
328 /// - Non-pathological calls to the solver typically require only a few hundred
329 ///   iterations.
330 /// - This limit is still low enough to keep runtimes acceptable (on typical
331 ///   machines) in cases where we hit the limit.
332 ///
333 /// `MaxBlockVisits` caps the number of block visits during analysis. See
334 /// `runDataflowAnalysis` for a full description and explanation of the default
335 /// value.
336 template <typename AnalysisT, typename Diagnostic>
337 llvm::Expected<llvm::SmallVector<Diagnostic>>
338 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
339                  DiagnosisCallbacks<AnalysisT, Diagnostic> Diagnoser,
340                  std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
341                  std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
342   llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl);
343   if (!Context)
344     return Context.takeError();
345 
346   auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations);
347   DataflowAnalysisContext AnalysisContext(*Solver);
348   Environment Env(AnalysisContext, FuncDecl);
349   AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
350   llvm::SmallVector<Diagnostic> Diagnostics;
351   CFGEltCallbacksTypeErased PostAnalysisCallbacks;
352   if (Diagnoser.Before) {
353     PostAnalysisCallbacks.Before =
354         [&ASTCtx, &Diagnoser,
355          &Diagnostics](const CFGElement &Elt,
356                        const TypeErasedDataflowAnalysisState &State) mutable {
357           auto EltDiagnostics = Diagnoser.Before(
358               Elt, ASTCtx,
359               TransferStateForDiagnostics<typename AnalysisT::Lattice>(
360                   llvm::any_cast<const typename AnalysisT::Lattice &>(
361                       State.Lattice.Value),
362                   State.Env));
363           llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
364         };
365   }
366   if (Diagnoser.After) {
367     PostAnalysisCallbacks.After =
368         [&ASTCtx, &Diagnoser,
369          &Diagnostics](const CFGElement &Elt,
370                        const TypeErasedDataflowAnalysisState &State) mutable {
371           auto EltDiagnostics = Diagnoser.After(
372               Elt, ASTCtx,
373               TransferStateForDiagnostics<typename AnalysisT::Lattice>(
374                   llvm::any_cast<const typename AnalysisT::Lattice &>(
375                       State.Lattice.Value),
376                   State.Env));
377           llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
378         };
379   }
380   if (llvm::Error Err =
381           runTypeErasedDataflowAnalysis(*Context, Analysis, Env,
382                                         PostAnalysisCallbacks, MaxBlockVisits)
383               .takeError())
384     return std::move(Err);
385 
386   if (Solver->reachedLimit())
387     return llvm::createStringError(llvm::errc::interrupted,
388                                    "SAT solver timed out");
389 
390   return Diagnostics;
391 }
392 
393 /// Overload that takes only one diagnosis callback, which is run on the state
394 /// after visiting the `CFGElement`. This is provided for backwards
395 /// compatibility; new callers should call the overload taking
396 /// `DiagnosisCallbacks` instead.
397 template <typename AnalysisT, typename Diagnostic>
398 llvm::Expected<llvm::SmallVector<Diagnostic>>
399 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
400                  DiagnosisCallback<AnalysisT, Diagnostic> Diagnoser,
401                  std::int64_t MaxSATIterations = kDefaultMaxSATIterations,
402                  std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) {
403   DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser};
404   return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations,
405                           MaxBlockVisits);
406 }
407 
408 /// Abstract base class for dataflow "models": reusable analysis components that
409 /// model a particular aspect of program semantics in the `Environment`. For
410 /// example, a model may capture a type and its related functions.
411 class DataflowModel : public Environment::ValueModel {
412 public:
413   /// Return value indicates whether the model processed the `Element`.
414   virtual bool transfer(const CFGElement &Element, Environment &Env) = 0;
415 };
416 
417 } // namespace dataflow
418 } // namespace clang
419 
420 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
421