1 //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines base types and functions for building dataflow analyses 10 // that run over Control-Flow Graphs (CFGs). 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 15 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 16 17 #include <iterator> 18 #include <optional> 19 #include <type_traits> 20 #include <utility> 21 #include <vector> 22 23 #include "clang/AST/ASTContext.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 26 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 27 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 28 #include "clang/Analysis/FlowSensitive/MatchSwitch.h" 29 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 30 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/STLFunctionalExtras.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/Support/Errc.h" 35 #include "llvm/Support/Error.h" 36 37 namespace clang { 38 namespace dataflow { 39 40 /// Base class template for dataflow analyses built on a single lattice type. 41 /// 42 /// Requirements: 43 /// 44 /// `Derived` must be derived from a specialization of this class template and 45 /// must provide the following public members: 46 /// * `LatticeT initialElement()` - returns a lattice element that models the 47 /// initial state of a basic block; 48 /// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies 49 /// the analysis transfer function for a given CFG element and lattice 50 /// element. 51 /// 52 /// `Derived` can optionally provide the following members: 53 /// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, 54 /// Environment &Env)` - applies the analysis transfer 55 /// function for a given edge from a CFG block of a conditional statement. 56 /// 57 /// `Derived` can optionally override the virtual functions in the 58 /// `Environment::ValueModel` interface (which is an indirect base class of 59 /// this class). 60 /// 61 /// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must 62 /// provide the following public members: 63 /// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the 64 /// argument by computing their least upper bound, modifies the object if 65 /// necessary, and returns an effect indicating whether any changes were 66 /// made to it; 67 /// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)` 68 /// * `bool operator==(const LatticeT &) const` - returns true if and only if 69 /// the object is equal to the argument. 70 /// 71 /// `LatticeT` can optionally provide the following members: 72 /// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the 73 /// lattice element with an approximation that can reach a fixed point more 74 /// quickly than iterated application of the transfer function alone. The 75 /// previous value is provided to inform the choice of widened value. The 76 /// function must also serve as a comparison operation, by indicating whether 77 /// the widened value is equivalent to the previous value with the returned 78 /// `LatticeJoinEffect`. 79 template <typename Derived, typename LatticeT> 80 class DataflowAnalysis : public TypeErasedDataflowAnalysis { 81 public: 82 /// Bounded join-semilattice that is used in the analysis. 83 using Lattice = LatticeT; 84 DataflowAnalysis(ASTContext & Context)85 explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} 86 DataflowAnalysis(ASTContext & Context,DataflowAnalysisOptions Options)87 explicit DataflowAnalysis(ASTContext &Context, 88 DataflowAnalysisOptions Options) 89 : TypeErasedDataflowAnalysis(Options), Context(Context) {} 90 getASTContext()91 ASTContext &getASTContext() final { return Context; } 92 typeErasedInitialElement()93 TypeErasedLattice typeErasedInitialElement() final { 94 return {static_cast<Derived *>(this)->initialElement()}; 95 } 96 joinTypeErased(const TypeErasedLattice & E1,const TypeErasedLattice & E2)97 TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, 98 const TypeErasedLattice &E2) final { 99 // FIXME: change the signature of join() to avoid copying here. 100 Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value); 101 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 102 L1.join(L2); 103 return {std::move(L1)}; 104 } 105 widenTypeErased(TypeErasedLattice & Current,const TypeErasedLattice & Previous)106 LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, 107 const TypeErasedLattice &Previous) final { 108 Lattice &C = llvm::any_cast<Lattice &>(Current.Value); 109 const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value); 110 return widenInternal(Rank0{}, C, P); 111 } 112 isEqualTypeErased(const TypeErasedLattice & E1,const TypeErasedLattice & E2)113 bool isEqualTypeErased(const TypeErasedLattice &E1, 114 const TypeErasedLattice &E2) final { 115 const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value); 116 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 117 return L1 == L2; 118 } 119 transferTypeErased(const CFGElement & Element,TypeErasedLattice & E,Environment & Env)120 void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, 121 Environment &Env) final { 122 Lattice &L = llvm::any_cast<Lattice &>(E.Value); 123 static_cast<Derived *>(this)->transfer(Element, L, Env); 124 } 125 transferBranchTypeErased(bool Branch,const Stmt * Stmt,TypeErasedLattice & E,Environment & Env)126 void transferBranchTypeErased(bool Branch, const Stmt *Stmt, 127 TypeErasedLattice &E, Environment &Env) final { 128 transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt, 129 E, Env); 130 } 131 132 private: 133 // These `Rank` structs are used for template metaprogramming to choose 134 // between overloads. 135 struct Rank1 {}; 136 struct Rank0 : Rank1 {}; 137 138 // The first-choice implementation: use `widen` when it is available. 139 template <typename T> 140 static auto widenInternal(Rank0, T &Current, const T &Prev) 141 -> decltype(Current.widen(Prev)) { 142 return Current.widen(Prev); 143 } 144 145 // The second-choice implementation: `widen` is unavailable. Widening is 146 // merged with equality checking, so when widening is unimplemented, we 147 // default to equality checking. widenInternal(Rank1,const Lattice & Current,const Lattice & Prev)148 static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current, 149 const Lattice &Prev) { 150 return Prev == Current ? LatticeJoinEffect::Unchanged 151 : LatticeJoinEffect::Changed; 152 } 153 154 // The first-choice implementation: `transferBranch` is implemented. 155 template <typename Analysis> 156 static auto transferBranchInternal(Rank0, Analysis &A, bool Branch, 157 const Stmt *Stmt, TypeErasedLattice &L, 158 Environment &Env) 159 -> std::void_t<decltype(A.transferBranch( 160 Branch, Stmt, std::declval<LatticeT &>(), Env))> { 161 A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env); 162 } 163 164 // The second-choice implementation: `transferBranch` is unimplemented. No-op. 165 template <typename Analysis> transferBranchInternal(Rank1,Analysis & A,bool,const Stmt *,TypeErasedLattice &,Environment &)166 static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *, 167 TypeErasedLattice &, Environment &) {} 168 169 ASTContext &Context; 170 }; 171 172 // Model of the program at a given program point. 173 template <typename LatticeT> struct DataflowAnalysisState { 174 // Model of a program property. 175 LatticeT Lattice; 176 177 // Model of the state of the program (store and heap). 178 Environment Env; 179 }; 180 181 /// A callback to be called with the state before or after visiting a CFG 182 /// element. 183 template <typename AnalysisT> 184 using CFGEltCallback = std::function<void( 185 const CFGElement &, 186 const DataflowAnalysisState<typename AnalysisT::Lattice> &)>; 187 188 /// A pair of callbacks to be called with the state before and after visiting a 189 /// CFG element. 190 /// Either or both of the callbacks may be null. 191 template <typename AnalysisT> struct CFGEltCallbacks { 192 CFGEltCallback<AnalysisT> Before; 193 CFGEltCallback<AnalysisT> After; 194 }; 195 196 /// A callback for performing diagnosis on a CFG element, called with the state 197 /// before or after visiting that CFG element. Returns a list of diagnostics 198 /// to emit (if any). 199 template <typename AnalysisT, typename Diagnostic> 200 using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>( 201 const CFGElement &, ASTContext &, 202 const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>; 203 204 /// A pair of callbacks for performing diagnosis on a CFG element, called with 205 /// the state before and after visiting that CFG element. 206 /// Either or both of the callbacks may be null. 207 template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks { 208 DiagnosisCallback<AnalysisT, Diagnostic> Before; 209 DiagnosisCallback<AnalysisT, Diagnostic> After; 210 }; 211 212 /// Default for the maximum number of SAT solver iterations during analysis. 213 inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000; 214 215 /// Default for the maximum number of block visits during analysis. 216 inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000; 217 218 /// Performs dataflow analysis and returns a mapping from basic block IDs to 219 /// dataflow analysis states that model the respective basic blocks. The 220 /// returned vector, if any, will have the same size as the number of CFG 221 /// blocks, with indices corresponding to basic block IDs. Returns an error if 222 /// the dataflow analysis cannot be performed successfully. Otherwise, calls 223 /// `PostAnalysisCallbacks` on each CFG element with the final analysis results 224 /// before and after that program point. 225 /// 226 /// `MaxBlockVisits` caps the number of block visits during analysis. See 227 /// `runTypeErasedDataflowAnalysis` for a full description. The default value is 228 /// essentially arbitrary -- large enough to accommodate what seems like any 229 /// reasonable CFG, but still small enough to limit the cost of hitting the 230 /// limit. 231 template <typename AnalysisT> 232 llvm::Expected<std::vector< 233 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> 234 runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis, 235 const Environment &InitEnv, 236 CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks, 237 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 238 CFGEltCallbacksTypeErased TypeErasedCallbacks; 239 if (PostAnalysisCallbacks.Before) { 240 TypeErasedCallbacks.Before = 241 [&PostAnalysisCallbacks](const CFGElement &Element, 242 const TypeErasedDataflowAnalysisState &State) { 243 auto *Lattice = 244 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); 245 // FIXME: we should not be copying the environment here! 246 // Ultimately the `CFGEltCallback` only gets a const reference anyway. 247 PostAnalysisCallbacks.Before( 248 Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ 249 *Lattice, State.Env.fork()}); 250 }; 251 } 252 if (PostAnalysisCallbacks.After) { 253 TypeErasedCallbacks.After = 254 [&PostAnalysisCallbacks](const CFGElement &Element, 255 const TypeErasedDataflowAnalysisState &State) { 256 auto *Lattice = 257 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); 258 // FIXME: we should not be copying the environment here! 259 // Ultimately the `CFGEltCallback` only gets a const reference anyway. 260 PostAnalysisCallbacks.After( 261 Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ 262 *Lattice, State.Env.fork()}); 263 }; 264 } 265 266 auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis( 267 ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits); 268 if (!TypeErasedBlockStates) 269 return TypeErasedBlockStates.takeError(); 270 271 std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> 272 BlockStates; 273 BlockStates.reserve(TypeErasedBlockStates->size()); 274 275 llvm::transform( 276 std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates), 277 [](auto &OptState) { 278 return llvm::transformOptional( 279 std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) { 280 return DataflowAnalysisState<typename AnalysisT::Lattice>{ 281 llvm::any_cast<typename AnalysisT::Lattice>( 282 std::move(State.Lattice.Value)), 283 std::move(State.Env)}; 284 }); 285 }); 286 return std::move(BlockStates); 287 } 288 289 /// Overload that takes only one post-analysis callback, which is run on the 290 /// state after visiting the `CFGElement`. This is provided for backwards 291 /// compatibility; new callers should call the overload taking `CFGEltCallbacks` 292 /// instead. 293 template <typename AnalysisT> 294 llvm::Expected<std::vector< 295 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> 296 runDataflowAnalysis( 297 const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv, 298 CFGEltCallback<AnalysisT> PostAnalysisCallbackAfterElt = nullptr, 299 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 300 return runDataflowAnalysis(ACFG, Analysis, InitEnv, 301 {nullptr, PostAnalysisCallbackAfterElt}, 302 MaxBlockVisits); 303 } 304 305 // Create an analysis class that is derived from `DataflowAnalysis`. This is an 306 // SFINAE adapter that allows us to call two different variants of constructor 307 // (either with or without the optional `Environment` parameter). 308 // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment` 309 // parameter in their constructor so that we can get rid of this abomination. 310 template <typename AnalysisT> 311 auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 312 -> decltype(AnalysisT(ASTCtx, Env)) { 313 return AnalysisT(ASTCtx, Env); 314 } 315 template <typename AnalysisT> 316 auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 317 -> decltype(AnalysisT(ASTCtx)) { 318 return AnalysisT(ASTCtx); 319 } 320 321 /// Runs a dataflow analysis over the given function and then runs `Diagnoser` 322 /// over the results. Returns a list of diagnostics for `FuncDecl` or an 323 /// error. Currently, errors can occur (at least) because the analysis requires 324 /// too many iterations over the CFG or the SAT solver times out. 325 /// 326 /// The default value of `MaxSATIterations` was chosen based on the following 327 /// observations: 328 /// - Non-pathological calls to the solver typically require only a few hundred 329 /// iterations. 330 /// - This limit is still low enough to keep runtimes acceptable (on typical 331 /// machines) in cases where we hit the limit. 332 /// 333 /// `MaxBlockVisits` caps the number of block visits during analysis. See 334 /// `runDataflowAnalysis` for a full description and explanation of the default 335 /// value. 336 template <typename AnalysisT, typename Diagnostic> 337 llvm::Expected<llvm::SmallVector<Diagnostic>> 338 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, 339 DiagnosisCallbacks<AnalysisT, Diagnostic> Diagnoser, 340 std::int64_t MaxSATIterations = kDefaultMaxSATIterations, 341 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 342 llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl); 343 if (!Context) 344 return Context.takeError(); 345 346 auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations); 347 DataflowAnalysisContext AnalysisContext(*Solver); 348 Environment Env(AnalysisContext, FuncDecl); 349 AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env); 350 llvm::SmallVector<Diagnostic> Diagnostics; 351 CFGEltCallbacksTypeErased PostAnalysisCallbacks; 352 if (Diagnoser.Before) { 353 PostAnalysisCallbacks.Before = 354 [&ASTCtx, &Diagnoser, 355 &Diagnostics](const CFGElement &Elt, 356 const TypeErasedDataflowAnalysisState &State) mutable { 357 auto EltDiagnostics = Diagnoser.Before( 358 Elt, ASTCtx, 359 TransferStateForDiagnostics<typename AnalysisT::Lattice>( 360 llvm::any_cast<const typename AnalysisT::Lattice &>( 361 State.Lattice.Value), 362 State.Env)); 363 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); 364 }; 365 } 366 if (Diagnoser.After) { 367 PostAnalysisCallbacks.After = 368 [&ASTCtx, &Diagnoser, 369 &Diagnostics](const CFGElement &Elt, 370 const TypeErasedDataflowAnalysisState &State) mutable { 371 auto EltDiagnostics = Diagnoser.After( 372 Elt, ASTCtx, 373 TransferStateForDiagnostics<typename AnalysisT::Lattice>( 374 llvm::any_cast<const typename AnalysisT::Lattice &>( 375 State.Lattice.Value), 376 State.Env)); 377 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); 378 }; 379 } 380 if (llvm::Error Err = 381 runTypeErasedDataflowAnalysis(*Context, Analysis, Env, 382 PostAnalysisCallbacks, MaxBlockVisits) 383 .takeError()) 384 return std::move(Err); 385 386 if (Solver->reachedLimit()) 387 return llvm::createStringError(llvm::errc::interrupted, 388 "SAT solver timed out"); 389 390 return Diagnostics; 391 } 392 393 /// Overload that takes only one diagnosis callback, which is run on the state 394 /// after visiting the `CFGElement`. This is provided for backwards 395 /// compatibility; new callers should call the overload taking 396 /// `DiagnosisCallbacks` instead. 397 template <typename AnalysisT, typename Diagnostic> 398 llvm::Expected<llvm::SmallVector<Diagnostic>> 399 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, 400 DiagnosisCallback<AnalysisT, Diagnostic> Diagnoser, 401 std::int64_t MaxSATIterations = kDefaultMaxSATIterations, 402 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 403 DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser}; 404 return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations, 405 MaxBlockVisits); 406 } 407 408 /// Abstract base class for dataflow "models": reusable analysis components that 409 /// model a particular aspect of program semantics in the `Environment`. For 410 /// example, a model may capture a type and its related functions. 411 class DataflowModel : public Environment::ValueModel { 412 public: 413 /// Return value indicates whether the model processed the `Element`. 414 virtual bool transfer(const CFGElement &Element, Environment &Env) = 0; 415 }; 416 417 } // namespace dataflow 418 } // namespace clang 419 420 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 421