1 //===-- DataflowEnvironment.h -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H 16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H 17 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/DeclBase.h" 20 #include "clang/AST/Expr.h" 21 #include "clang/AST/Type.h" 22 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 23 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 24 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 25 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 26 #include "clang/Analysis/FlowSensitive/Value.h" 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/DenseSet.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include <memory> 31 #include <type_traits> 32 #include <utility> 33 34 namespace clang { 35 namespace dataflow { 36 37 /// Indicates what kind of indirections should be skipped past when retrieving 38 /// storage locations or values. 39 /// 40 /// FIXME: Consider renaming this or replacing it with a more appropriate model. 41 /// See the discussion in https://reviews.llvm.org/D116596 for context. 42 enum class SkipPast { 43 /// No indirections should be skipped past. 44 None, 45 /// An optional reference should be skipped past. 46 Reference, 47 /// An optional reference should be skipped past, then an optional pointer 48 /// should be skipped past. 49 ReferenceThenPointer, 50 }; 51 52 /// Indicates the result of a tentative comparison. 53 enum class ComparisonResult { 54 Same, 55 Different, 56 Unknown, 57 }; 58 59 /// Holds the state of the program (store and heap) at a given program point. 60 /// 61 /// WARNING: Symbolic values that are created by the environment for static 62 /// local and global variables are not currently invalidated on function calls. 63 /// This is unsound and should be taken into account when designing dataflow 64 /// analyses. 65 class Environment { 66 public: 67 /// Supplements `Environment` with non-standard comparison and join 68 /// operations. 69 class ValueModel { 70 public: 71 virtual ~ValueModel() = default; 72 73 /// Returns: 74 /// `Same`: `Val1` is equivalent to `Val2`, according to the model. 75 /// `Different`: `Val1` is distinct from `Val2`, according to the model. 76 /// `Unknown`: The model can't determine a relationship between `Val1` and 77 /// `Val2`. 78 /// 79 /// Requirements: 80 /// 81 /// `Val1` and `Val2` must be distinct. 82 /// 83 /// `Val1` and `Val2` must model values of type `Type`. 84 /// 85 /// `Val1` and `Val2` must be assigned to the same storage location in 86 /// `Env1` and `Env2` respectively. 87 virtual ComparisonResult compare(QualType Type, const Value &Val1, 88 const Environment &Env1, const Value &Val2, 89 const Environment &Env2) { 90 // FIXME: Consider adding QualType to StructValue and removing the Type 91 // argument here. 92 return ComparisonResult::Unknown; 93 } 94 95 /// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could 96 /// be a strict lattice join or a more general widening operation. 97 /// 98 /// If this function returns true, `MergedVal` will be assigned to a storage 99 /// location of type `Type` in `MergedEnv`. 100 /// 101 /// `Env1` and `Env2` can be used to query child values and path condition 102 /// implications of `Val1` and `Val2` respectively. 103 /// 104 /// Requirements: 105 /// 106 /// `Val1` and `Val2` must be distinct. 107 /// 108 /// `Val1`, `Val2`, and `MergedVal` must model values of type `Type`. 109 /// 110 /// `Val1` and `Val2` must be assigned to the same storage location in 111 /// `Env1` and `Env2` respectively. 112 virtual bool merge(QualType Type, const Value &Val1, 113 const Environment &Env1, const Value &Val2, 114 const Environment &Env2, Value &MergedVal, 115 Environment &MergedEnv) { 116 return true; 117 } 118 119 /// This function may widen the current value -- replace it with an 120 /// approximation that can reach a fixed point more quickly than iterated 121 /// application of the transfer function alone. The previous value is 122 /// provided to inform the choice of widened value. The function must also 123 /// serve as a comparison operation, by indicating whether the widened value 124 /// is equivalent to the previous value. 125 /// 126 /// Returns either: 127 /// 128 /// `nullptr`, if this value is not of interest to the model, or 129 /// 130 /// `&Prev`, if the widened value is equivalent to `Prev`, or 131 /// 132 /// A non-null value that approximates `Current`. `Prev` is available to 133 /// inform the chosen approximation. 134 /// 135 /// `PrevEnv` and `CurrentEnv` can be used to query child values and path 136 /// condition implications of `Prev` and `Current`, respectively. 137 /// 138 /// Requirements: 139 /// 140 /// `Prev` and `Current` must model values of type `Type`. 141 /// 142 /// `Prev` and `Current` must be assigned to the same storage location in 143 /// `PrevEnv` and `CurrentEnv`, respectively. 144 virtual Value *widen(QualType Type, Value &Prev, const Environment &PrevEnv, 145 Value &Current, Environment &CurrentEnv) { 146 // The default implementation reduces to just comparison, since comparison 147 // is required by the API, even if no widening is performed. 148 switch (compare(Type, Prev, PrevEnv, Current, CurrentEnv)) { 149 case ComparisonResult::Same: 150 return &Prev; 151 case ComparisonResult::Different: 152 return &Current; 153 case ComparisonResult::Unknown: 154 return nullptr; 155 } 156 llvm_unreachable("all cases in switch covered"); 157 } 158 }; 159 160 /// Creates an environment that uses `DACtx` to store objects that encompass 161 /// the state of a program. 162 explicit Environment(DataflowAnalysisContext &DACtx); 163 164 Environment(const Environment &Other); 165 Environment &operator=(const Environment &Other); 166 167 Environment(Environment &&Other) = default; 168 Environment &operator=(Environment &&Other) = default; 169 170 /// Creates an environment that uses `DACtx` to store objects that encompass 171 /// the state of a program. 172 /// 173 /// If `DeclCtx` is a function, initializes the environment with symbolic 174 /// representations of the function parameters. 175 /// 176 /// If `DeclCtx` is a non-static member function, initializes the environment 177 /// with a symbolic representation of the `this` pointee. 178 Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); 179 180 const DataflowAnalysisContext::Options &getAnalysisOptions() { 181 return DACtx->getOptions(); 182 } 183 184 /// Creates and returns an environment to use for an inline analysis of the 185 /// callee. Uses the storage location from each argument in the `Call` as the 186 /// storage location for the corresponding parameter in the callee. 187 /// 188 /// Requirements: 189 /// 190 /// The callee of `Call` must be a `FunctionDecl`. 191 /// 192 /// The body of the callee must not reference globals. 193 /// 194 /// The arguments of `Call` must map 1:1 to the callee's parameters. 195 Environment pushCall(const CallExpr *Call) const; 196 Environment pushCall(const CXXConstructExpr *Call) const; 197 198 /// Moves gathered information back into `this` from a `CalleeEnv` created via 199 /// `pushCall`. 200 void popCall(const Environment &CalleeEnv); 201 202 /// Returns true if and only if the environment is equivalent to `Other`, i.e 203 /// the two environments: 204 /// - have the same mappings from declarations to storage locations, 205 /// - have the same mappings from expressions to storage locations, 206 /// - have the same or equivalent (according to `Model`) values assigned to 207 /// the same storage locations. 208 /// 209 /// Requirements: 210 /// 211 /// `Other` and `this` must use the same `DataflowAnalysisContext`. 212 bool equivalentTo(const Environment &Other, 213 Environment::ValueModel &Model) const; 214 215 /// Joins the environment with `Other` by taking the intersection of storage 216 /// locations and values that are stored in them. Distinct values that are 217 /// assigned to the same storage locations in the environment and `Other` are 218 /// merged using `Model`. 219 /// 220 /// Requirements: 221 /// 222 /// `Other` and `this` must use the same `DataflowAnalysisContext`. 223 LatticeJoinEffect join(const Environment &Other, 224 Environment::ValueModel &Model); 225 226 227 /// Widens the environment point-wise, using `PrevEnv` as needed to inform the 228 /// approximation. 229 /// 230 /// Requirements: 231 /// 232 /// `PrevEnv` must be the immediate previous version of the environment. 233 /// `PrevEnv` and `this` must use the same `DataflowAnalysisContext`. 234 LatticeJoinEffect widen(const Environment &PrevEnv, 235 Environment::ValueModel &Model); 236 237 // FIXME: Rename `createOrGetStorageLocation` to `getOrCreateStorageLocation`, 238 // `getStableStorageLocation`, or something more appropriate. 239 240 /// Creates a storage location appropriate for `Type`. Does not assign a value 241 /// to the returned storage location in the environment. 242 /// 243 /// Requirements: 244 /// 245 /// `Type` must not be null. 246 StorageLocation &createStorageLocation(QualType Type); 247 248 /// Creates a storage location for `D`. Does not assign the returned storage 249 /// location to `D` in the environment. Does not assign a value to the 250 /// returned storage location in the environment. 251 StorageLocation &createStorageLocation(const VarDecl &D); 252 253 /// Creates a storage location for `E`. Does not assign the returned storage 254 /// location to `E` in the environment. Does not assign a value to the 255 /// returned storage location in the environment. 256 StorageLocation &createStorageLocation(const Expr &E); 257 258 /// Assigns `Loc` as the storage location of `D` in the environment. 259 /// 260 /// Requirements: 261 /// 262 /// `D` must not be assigned a storage location in the environment. 263 void setStorageLocation(const ValueDecl &D, StorageLocation &Loc); 264 265 /// Returns the storage location assigned to `D` in the environment, applying 266 /// the `SP` policy for skipping past indirections, or null if `D` isn't 267 /// assigned a storage location in the environment. 268 StorageLocation *getStorageLocation(const ValueDecl &D, SkipPast SP) const; 269 270 /// Assigns `Loc` as the storage location of `E` in the environment. 271 /// 272 /// Requirements: 273 /// 274 /// `E` must not be assigned a storage location in the environment. 275 void setStorageLocation(const Expr &E, StorageLocation &Loc); 276 277 /// Returns the storage location assigned to `E` in the environment, applying 278 /// the `SP` policy for skipping past indirections, or null if `E` isn't 279 /// assigned a storage location in the environment. 280 StorageLocation *getStorageLocation(const Expr &E, SkipPast SP) const; 281 282 /// Returns the storage location assigned to the `this` pointee in the 283 /// environment or null if the `this` pointee has no assigned storage location 284 /// in the environment. 285 StorageLocation *getThisPointeeStorageLocation() const; 286 287 /// Returns the storage location of the return value or null, if unset. 288 StorageLocation *getReturnStorageLocation() const; 289 290 /// Returns a pointer value that represents a null pointer. Calls with 291 /// `PointeeType` that are canonically equivalent will return the same result. 292 PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 293 294 /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise 295 /// return null. If `Type` is a pointer or reference type, creates all the 296 /// necessary storage locations and values for indirections until it finds a 297 /// non-pointer/non-reference type. 298 /// 299 /// Requirements: 300 /// 301 /// `Type` must not be null. 302 Value *createValue(QualType Type); 303 304 /// Assigns `Val` as the value of `Loc` in the environment. 305 void setValue(const StorageLocation &Loc, Value &Val); 306 307 /// Returns the value assigned to `Loc` in the environment or null if `Loc` 308 /// isn't assigned a value in the environment. 309 Value *getValue(const StorageLocation &Loc) const; 310 311 /// Equivalent to `getValue(getStorageLocation(D, SP), SkipPast::None)` if `D` 312 /// is assigned a storage location in the environment, otherwise returns null. 313 Value *getValue(const ValueDecl &D, SkipPast SP) const; 314 315 /// Equivalent to `getValue(getStorageLocation(E, SP), SkipPast::None)` if `E` 316 /// is assigned a storage location in the environment, otherwise returns null. 317 Value *getValue(const Expr &E, SkipPast SP) const; 318 319 /// Transfers ownership of `Loc` to the analysis context and returns a 320 /// reference to it. 321 /// 322 /// Requirements: 323 /// 324 /// `Loc` must not be null. 325 template <typename T> 326 std::enable_if_t<std::is_base_of<StorageLocation, T>::value, T &> 327 takeOwnership(std::unique_ptr<T> Loc) { 328 return DACtx->takeOwnership(std::move(Loc)); 329 } 330 331 /// Transfers ownership of `Val` to the analysis context and returns a 332 /// reference to it. 333 /// 334 /// Requirements: 335 /// 336 /// `Val` must not be null. 337 template <typename T> 338 std::enable_if_t<std::is_base_of<Value, T>::value, T &> 339 takeOwnership(std::unique_ptr<T> Val) { 340 return DACtx->takeOwnership(std::move(Val)); 341 } 342 343 /// Returns a symbolic boolean value that models a boolean literal equal to 344 /// `Value` 345 AtomicBoolValue &getBoolLiteralValue(bool Value) const { 346 return DACtx->getBoolLiteralValue(Value); 347 } 348 349 /// Returns an atomic boolean value. 350 BoolValue &makeAtomicBoolValue() const { 351 return DACtx->createAtomicBoolValue(); 352 } 353 354 /// Returns a unique instance of boolean Top. 355 BoolValue &makeTopBoolValue() const { 356 return DACtx->createTopBoolValue(); 357 } 358 359 /// Returns a boolean value that represents the conjunction of `LHS` and 360 /// `RHS`. Subsequent calls with the same arguments, regardless of their 361 /// order, will return the same result. If the given boolean values represent 362 /// the same value, the result will be the value itself. 363 BoolValue &makeAnd(BoolValue &LHS, BoolValue &RHS) const { 364 return DACtx->getOrCreateConjunction(LHS, RHS); 365 } 366 367 /// Returns a boolean value that represents the disjunction of `LHS` and 368 /// `RHS`. Subsequent calls with the same arguments, regardless of their 369 /// order, will return the same result. If the given boolean values represent 370 /// the same value, the result will be the value itself. 371 BoolValue &makeOr(BoolValue &LHS, BoolValue &RHS) const { 372 return DACtx->getOrCreateDisjunction(LHS, RHS); 373 } 374 375 /// Returns a boolean value that represents the negation of `Val`. Subsequent 376 /// calls with the same argument will return the same result. 377 BoolValue &makeNot(BoolValue &Val) const { 378 return DACtx->getOrCreateNegation(Val); 379 } 380 381 /// Returns a boolean value represents `LHS` => `RHS`. Subsequent calls with 382 /// the same arguments, will return the same result. If the given boolean 383 /// values represent the same value, the result will be a value that 384 /// represents the true boolean literal. 385 BoolValue &makeImplication(BoolValue &LHS, BoolValue &RHS) const { 386 return DACtx->getOrCreateImplication(LHS, RHS); 387 } 388 389 /// Returns a boolean value represents `LHS` <=> `RHS`. Subsequent calls with 390 /// the same arguments, regardless of their order, will return the same 391 /// result. If the given boolean values represent the same value, the result 392 /// will be a value that represents the true boolean literal. 393 BoolValue &makeIff(BoolValue &LHS, BoolValue &RHS) const { 394 return DACtx->getOrCreateIff(LHS, RHS); 395 } 396 397 /// Returns the token that identifies the flow condition of the environment. 398 AtomicBoolValue &getFlowConditionToken() const { return *FlowConditionToken; } 399 400 /// Builds and returns the logical formula defining the flow condition 401 /// identified by `Token`. If a value in the formula is present as a key in 402 /// `Substitutions`, it will be substituted with the value it maps to. 403 BoolValue &buildAndSubstituteFlowCondition( 404 AtomicBoolValue &Token, 405 llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions) { 406 return DACtx->buildAndSubstituteFlowCondition(Token, 407 std::move(Substitutions)); 408 } 409 410 /// Adds `Val` to the set of clauses that constitute the flow condition. 411 void addToFlowCondition(BoolValue &Val); 412 413 /// Returns true if and only if the clauses that constitute the flow condition 414 /// imply that `Val` is true. 415 bool flowConditionImplies(BoolValue &Val) const; 416 417 /// Returns the `DeclContext` of the block being analysed, if any. Otherwise, 418 /// returns null. 419 const DeclContext *getDeclCtx() const { return CallStack.back(); } 420 421 /// Returns whether this `Environment` can be extended to analyze the given 422 /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a 423 /// given `MaxDepth`. 424 bool canDescend(unsigned MaxDepth, const DeclContext *Callee) const; 425 426 /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, 427 /// returns null. 428 const ControlFlowContext *getControlFlowContext(const FunctionDecl *F) { 429 return DACtx->getControlFlowContext(F); 430 } 431 432 LLVM_DUMP_METHOD void dump() const; 433 LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; 434 435 private: 436 /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise 437 /// return null. 438 /// 439 /// Recursively initializes storage locations and values until it sees a 440 /// self-referential pointer or reference type. `Visited` is used to track 441 /// which types appeared in the reference/pointer chain in order to avoid 442 /// creating a cyclic dependency with self-referential pointers/references. 443 /// 444 /// Requirements: 445 /// 446 /// `Type` must not be null. 447 Value *createValueUnlessSelfReferential(QualType Type, 448 llvm::DenseSet<QualType> &Visited, 449 int Depth, int &CreatedValuesCount); 450 451 StorageLocation &skip(StorageLocation &Loc, SkipPast SP) const; 452 const StorageLocation &skip(const StorageLocation &Loc, SkipPast SP) const; 453 454 /// Shared implementation of `pushCall` overloads. Note that unlike 455 /// `pushCall`, this member is invoked on the environment of the callee, not 456 /// of the caller. 457 void pushCallInternal(const FunctionDecl *FuncDecl, 458 ArrayRef<const Expr *> Args); 459 460 /// Assigns storage locations and values to all variables in `Vars`. 461 void initVars(llvm::DenseSet<const VarDecl *> Vars); 462 463 // `DACtx` is not null and not owned by this object. 464 DataflowAnalysisContext *DACtx; 465 466 467 // FIXME: move the fields `CallStack`, `ReturnLoc` and `ThisPointeeLoc` into a 468 // separate call-context object, shared between environments in the same call. 469 // https://github.com/llvm/llvm-project/issues/59005 470 471 // `DeclContext` of the block being analysed if provided. 472 std::vector<const DeclContext *> CallStack; 473 474 // In a properly initialized `Environment`, `ReturnLoc` should only be null if 475 // its `DeclContext` could not be cast to a `FunctionDecl`. 476 StorageLocation *ReturnLoc = nullptr; 477 // The storage location of the `this` pointee. Should only be null if the 478 // function being analyzed is only a function and not a method. 479 StorageLocation *ThisPointeeLoc = nullptr; 480 481 // Maps from program declarations and statements to storage locations that are 482 // assigned to them. Unlike the maps in `DataflowAnalysisContext`, these 483 // include only storage locations that are in scope for a particular basic 484 // block. 485 llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 486 llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 487 488 llvm::DenseMap<const StorageLocation *, Value *> LocToVal; 489 490 // Maps locations of struct members to symbolic values of the structs that own 491 // them and the decls of the struct members. 492 llvm::DenseMap<const StorageLocation *, 493 std::pair<StructValue *, const ValueDecl *>> 494 MemberLocToStruct; 495 496 AtomicBoolValue *FlowConditionToken; 497 }; 498 499 } // namespace dataflow 500 } // namespace clang 501 502 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H 503