1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 17 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/Expr.h" 20 #include "clang/AST/TypeOrdering.h" 21 #include "clang/Analysis/FlowSensitive/ASTOps.h" 22 #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 23 #include "clang/Analysis/FlowSensitive/Arena.h" 24 #include "clang/Analysis/FlowSensitive/Solver.h" 25 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 26 #include "clang/Analysis/FlowSensitive/Value.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/DenseSet.h" 29 #include "llvm/ADT/SetVector.h" 30 #include "llvm/Support/Compiler.h" 31 #include <cassert> 32 #include <memory> 33 #include <optional> 34 35 namespace clang { 36 namespace dataflow { 37 class Logger; 38 39 struct ContextSensitiveOptions { 40 /// The maximum depth to analyze. A value of zero is equivalent to disabling 41 /// context-sensitive analysis entirely. 42 unsigned Depth = 2; 43 }; 44 45 /// Owns objects that encompass the state of a program and stores context that 46 /// is used during dataflow analysis. 47 class DataflowAnalysisContext { 48 public: 49 struct Options { 50 /// Options for analyzing function bodies when present in the translation 51 /// unit, or empty to disable context-sensitive analysis. Note that this is 52 /// fundamentally limited: some constructs, such as recursion, are 53 /// explicitly unsupported. 54 std::optional<ContextSensitiveOptions> ContextSensitiveOpts; 55 56 /// If provided, analysis details will be recorded here. 57 /// (This is always non-null within an AnalysisContext, the framework 58 /// provides a fallback no-op logger). 59 Logger *Log = nullptr; 60 }; 61 62 /// Constructs a dataflow analysis context. 63 /// 64 /// Requirements: 65 /// 66 /// `S` must not be null. 67 DataflowAnalysisContext(std::unique_ptr<Solver> S, 68 Options Opts = Options{ 69 /*ContextSensitiveOpts=*/std::nullopt, 70 /*Logger=*/nullptr}) 71 : DataflowAnalysisContext(*S, std::move(S), Opts) {} 72 73 /// Constructs a dataflow analysis context. 74 /// 75 /// Requirements: 76 /// 77 /// `S` must outlive the `DataflowAnalysisContext`. 78 DataflowAnalysisContext(Solver &S, Options Opts = Options{ 79 /*ContextSensitiveOpts=*/std::nullopt, 80 /*Logger=*/nullptr}) DataflowAnalysisContext(S,nullptr,Opts)81 : DataflowAnalysisContext(S, nullptr, Opts) {} 82 83 ~DataflowAnalysisContext(); 84 85 /// Sets a callback that returns the names and types of the synthetic fields 86 /// to add to a `RecordStorageLocation` of a given type. 87 /// Typically, this is called from the constructor of a `DataflowAnalysis` 88 /// 89 /// The field types returned by the callback may not have reference type. 90 /// 91 /// To maintain the invariant that all `RecordStorageLocation`s of a given 92 /// type have the same fields: 93 /// * The callback must always return the same result for a given type 94 /// * `setSyntheticFieldCallback()` must be called before any 95 // `RecordStorageLocation`s are created. setSyntheticFieldCallback(std::function<llvm::StringMap<QualType> (QualType)> CB)96 void setSyntheticFieldCallback( 97 std::function<llvm::StringMap<QualType>(QualType)> CB) { 98 assert(!RecordStorageLocationCreated); 99 SyntheticFieldCallback = CB; 100 } 101 102 /// Returns a new storage location appropriate for `Type`. 103 /// 104 /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. 105 StorageLocation &createStorageLocation(QualType Type); 106 107 /// Creates a `RecordStorageLocation` for the given type and with the given 108 /// fields. 109 /// 110 /// Requirements: 111 /// 112 /// `FieldLocs` must contain exactly the fields returned by 113 /// `getModeledFields(Type)`. 114 /// `SyntheticFields` must contain exactly the fields returned by 115 /// `getSyntheticFields(Type)`. 116 RecordStorageLocation &createRecordStorageLocation( 117 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 118 RecordStorageLocation::SyntheticFieldMap SyntheticFields); 119 120 /// Returns a stable storage location for `D`. 121 StorageLocation &getStableStorageLocation(const ValueDecl &D); 122 123 /// Returns a stable storage location for `E`. 124 StorageLocation &getStableStorageLocation(const Expr &E); 125 126 /// Returns a pointer value that represents a null pointer. Calls with 127 /// `PointeeType` that are canonically equivalent will return the same result. 128 /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 129 PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 130 131 /// Adds `Constraint` to current and future flow conditions in this context. 132 /// 133 /// Invariants must contain only flow-insensitive information, i.e. facts that 134 /// are true on all paths through the program. 135 /// Information can be added eagerly (when analysis begins), or lazily (e.g. 136 /// when values are first used). The analysis must be careful that the same 137 /// information is added regardless of which order blocks are analyzed in. 138 void addInvariant(const Formula &Constraint); 139 140 /// Adds `Constraint` to the flow condition identified by `Token`. 141 void addFlowConditionConstraint(Atom Token, const Formula &Constraint); 142 143 /// Creates a new flow condition with the same constraints as the flow 144 /// condition identified by `Token` and returns its token. 145 Atom forkFlowCondition(Atom Token); 146 147 /// Creates a new flow condition that represents the disjunction of the flow 148 /// conditions identified by `FirstToken` and `SecondToken`, and returns its 149 /// token. 150 Atom joinFlowConditions(Atom FirstToken, Atom SecondToken); 151 152 /// Returns true if the constraints of the flow condition identified by 153 /// `Token` imply that `F` is true. 154 /// Returns false if the flow condition does not imply `F` or if the solver 155 /// times out. 156 bool flowConditionImplies(Atom Token, const Formula &F); 157 158 /// Returns true if the constraints of the flow condition identified by 159 /// `Token` still allow `F` to be true. 160 /// Returns false if the flow condition implies that `F` is false or if the 161 /// solver times out. 162 bool flowConditionAllows(Atom Token, const Formula &F); 163 164 /// Returns true if `Val1` is equivalent to `Val2`. 165 /// Note: This function doesn't take into account constraints on `Val1` and 166 /// `Val2` imposed by the flow condition. 167 bool equivalentFormulas(const Formula &Val1, const Formula &Val2); 168 169 LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, 170 llvm::raw_ostream &OS = llvm::dbgs()); 171 172 /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise, 173 /// returns null. 174 const AdornedCFG *getAdornedCFG(const FunctionDecl *F); 175 getOptions()176 const Options &getOptions() { return Opts; } 177 arena()178 Arena &arena() { return *A; } 179 180 /// Returns the outcome of satisfiability checking on `Constraints`. 181 /// 182 /// Flow conditions are not incorporated, so they may need to be manually 183 /// included in `Constraints` to provide contextually-accurate results, e.g. 184 /// if any definitions or relationships of the values in `Constraints` have 185 /// been stored in flow conditions. 186 Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints); 187 188 /// Returns the fields of `Type`, limited to the set of fields modeled by this 189 /// context. 190 FieldSet getModeledFields(QualType Type); 191 192 /// Returns the names and types of the synthetic fields for the given record 193 /// type. getSyntheticFields(QualType Type)194 llvm::StringMap<QualType> getSyntheticFields(QualType Type) { 195 assert(Type->isRecordType()); 196 if (SyntheticFieldCallback) { 197 llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type); 198 // Synthetic fields are not allowed to have reference type. 199 assert([&Result] { 200 for (const auto &Entry : Result) 201 if (Entry.getValue()->isReferenceType()) 202 return false; 203 return true; 204 }()); 205 return Result; 206 } 207 return {}; 208 } 209 210 private: 211 friend class Environment; 212 213 struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { getEmptyKeyNullableQualTypeDenseMapInfo214 static QualType getEmptyKey() { 215 // Allow a NULL `QualType` by using a different value as the empty key. 216 return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 217 } 218 219 using DenseMapInfo::getHashValue; 220 using DenseMapInfo::getTombstoneKey; 221 using DenseMapInfo::isEqual; 222 }; 223 224 /// `S` is the solver to use. `OwnedSolver` may be: 225 /// * Null (in which case `S` is non-onwed and must outlive this object), or 226 /// * Non-null (in which case it must refer to `S`, and the 227 /// `DataflowAnalysisContext will take ownership of `OwnedSolver`). 228 DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver, 229 Options Opts); 230 231 // Extends the set of modeled field declarations. 232 void addModeledFields(const FieldSet &Fields); 233 234 /// Adds all constraints of the flow condition identified by `Token` and all 235 /// of its transitive dependencies to `Constraints`. 236 void 237 addTransitiveFlowConditionConstraints(Atom Token, 238 llvm::SetVector<const Formula *> &Out); 239 240 /// Returns true if the solver is able to prove that there is a satisfying 241 /// assignment for `Constraints`. isSatisfiable(llvm::SetVector<const Formula * > Constraints)242 bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) { 243 return querySolver(std::move(Constraints)).getStatus() == 244 Solver::Result::Status::Satisfiable; 245 } 246 247 /// Returns true if the solver is able to prove that there is no satisfying 248 /// assignment for `Constraints` isUnsatisfiable(llvm::SetVector<const Formula * > Constraints)249 bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) { 250 return querySolver(std::move(Constraints)).getStatus() == 251 Solver::Result::Status::Unsatisfiable; 252 } 253 254 Solver &S; 255 std::unique_ptr<Solver> OwnedSolver; 256 std::unique_ptr<Arena> A; 257 258 // Maps from program declarations and statements to storage locations that are 259 // assigned to them. These assignments are global (aggregated across all basic 260 // blocks) and are used to produce stable storage locations when the same 261 // basic blocks are evaluated multiple times. The storage locations that are 262 // in scope for a particular basic block are stored in `Environment`. 263 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 264 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 265 266 // Null pointer values, keyed by the canonical pointee type. 267 // 268 // FIXME: The pointer values are indexed by the pointee types which are 269 // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 270 // creating a type-independent `NullPointerValue` without a `PointeeLoc` 271 // field. 272 llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 273 NullPointerVals; 274 275 Options Opts; 276 277 // Flow conditions are tracked symbolically: each unique flow condition is 278 // associated with a fresh symbolic variable (token), bound to the clause that 279 // defines the flow condition. Conceptually, each binding corresponds to an 280 // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 281 // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 282 // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 283 // the `FlowConditionConstraints` map, keyed by the token of the flow 284 // condition. 285 // 286 // Flow conditions depend on other flow conditions if they are created using 287 // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 288 // dependencies is stored in the `FlowConditionDeps` map. 289 llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps; 290 llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints; 291 const Formula *Invariant = nullptr; 292 293 llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts; 294 295 // Fields modeled by environments covered by this context. 296 FieldSet ModeledFields; 297 298 std::unique_ptr<Logger> LogOwner; // If created via flags. 299 300 std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback; 301 302 /// Has any `RecordStorageLocation` been created yet? 303 bool RecordStorageLocationCreated = false; 304 }; 305 306 } // namespace dataflow 307 } // namespace clang 308 309 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 310