1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <utility> 28 29 namespace clang { 30 namespace dataflow { 31 32 // FIXME: convert these to parameters of the analysis or environment. Current 33 // settings have been experimentaly validated, but only for a particular 34 // analysis. 35 static constexpr int MaxCompositeValueDepth = 3; 36 static constexpr int MaxCompositeValueSize = 1000; 37 38 /// Returns a map consisting of key-value entries that are present in both maps. 39 static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc( 40 const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1, 41 const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) { 42 llvm::DenseMap<const ValueDecl *, StorageLocation *> Result; 43 for (auto &Entry : DeclToLoc1) { 44 auto It = DeclToLoc2.find(Entry.first); 45 if (It != DeclToLoc2.end() && Entry.second == It->second) 46 Result.insert({Entry.first, Entry.second}); 47 } 48 return Result; 49 } 50 51 // Whether to consider equivalent two values with an unknown relation. 52 // 53 // FIXME: this function is a hack enabling unsoundness to support 54 // convergence. Once we have widening support for the reference/pointer and 55 // struct built-in models, this should be unconditionally `false` (and inlined 56 // as such at its call sites). 57 static bool equateUnknownValues(Value::Kind K) { 58 switch (K) { 59 case Value::Kind::Integer: 60 case Value::Kind::Pointer: 61 case Value::Kind::Record: 62 return true; 63 default: 64 return false; 65 } 66 } 67 68 static bool compareDistinctValues(QualType Type, Value &Val1, 69 const Environment &Env1, Value &Val2, 70 const Environment &Env2, 71 Environment::ValueModel &Model) { 72 // Note: Potentially costly, but, for booleans, we could check whether both 73 // can be proven equivalent in their respective environments. 74 75 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 76 // and implement separate, join/widen specific handling for 77 // reference/pointers. 78 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 79 case ComparisonResult::Same: 80 return true; 81 case ComparisonResult::Different: 82 return false; 83 case ComparisonResult::Unknown: 84 return equateUnknownValues(Val1.getKind()); 85 } 86 llvm_unreachable("All cases covered in switch"); 87 } 88 89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 90 /// respectively, of the same type `Type`. Merging generally produces a single 91 /// value that (soundly) approximates the two inputs, although the actual 92 /// meaning depends on `Model`. 93 static Value *mergeDistinctValues(QualType Type, Value &Val1, 94 const Environment &Env1, Value &Val2, 95 const Environment &Env2, 96 Environment &MergedEnv, 97 Environment::ValueModel &Model) { 98 // Join distinct boolean values preserving information about the constraints 99 // in the respective path conditions. 100 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 101 // FIXME: Checking both values should be unnecessary, since they should have 102 // a consistent shape. However, right now we can end up with BoolValue's in 103 // integer-typed variables due to our incorrect handling of 104 // boolean-to-integer casts (we just propagate the BoolValue to the result 105 // of the cast). So, a join can encounter an integer in one branch but a 106 // bool in the other. 107 // For example: 108 // ``` 109 // std::optional<bool> o; 110 // int x; 111 // if (o.has_value()) 112 // x = o.value(); 113 // ``` 114 auto &Expr1 = cast<BoolValue>(Val1).formula(); 115 auto &Expr2 = cast<BoolValue>(Val2).formula(); 116 auto &A = MergedEnv.arena(); 117 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 118 MergedEnv.assume( 119 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 120 A.makeEquals(MergedVal, Expr1)), 121 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 122 A.makeEquals(MergedVal, Expr2)))); 123 return &A.makeBoolValue(MergedVal); 124 } 125 126 Value *MergedVal = nullptr; 127 if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) { 128 auto *RecordVal2 = cast<RecordValue>(&Val2); 129 130 if (&RecordVal1->getLoc() == &RecordVal2->getLoc()) 131 // `RecordVal1` and `RecordVal2` may have different properties associated 132 // with them. Create a new `RecordValue` with the same location but 133 // without any properties so that we soundly approximate both values. If a 134 // particular analysis needs to merge properties, it should do so in 135 // `DataflowAnalysis::merge()`. 136 MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc()); 137 else 138 // If the locations for the two records are different, need to create a 139 // completely new value. 140 MergedVal = MergedEnv.createValue(Type); 141 } else { 142 MergedVal = MergedEnv.createValue(Type); 143 } 144 145 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 146 // returns false to avoid storing unneeded values in `DACtx`. 147 if (MergedVal) 148 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 149 return MergedVal; 150 151 return nullptr; 152 } 153 154 // When widening does not change `Current`, return value will equal `&Prev`. 155 static Value &widenDistinctValues(QualType Type, Value &Prev, 156 const Environment &PrevEnv, Value &Current, 157 Environment &CurrentEnv, 158 Environment::ValueModel &Model) { 159 // Boolean-model widening. 160 if (auto *PrevBool = dyn_cast<BoolValue>(&Prev)) { 161 // If previous value was already Top, re-use that to (implicitly) indicate 162 // that no change occurred. 163 if (isa<TopBoolValue>(Prev)) 164 return Prev; 165 166 // We may need to widen to Top, but before we do so, check whether both 167 // values are implied to be either true or false in the current environment. 168 // In that case, we can simply return a literal instead. 169 auto &CurBool = cast<BoolValue>(Current); 170 bool TruePrev = PrevEnv.proves(PrevBool->formula()); 171 bool TrueCur = CurrentEnv.proves(CurBool.formula()); 172 if (TruePrev && TrueCur) 173 return CurrentEnv.getBoolLiteralValue(true); 174 if (!TruePrev && !TrueCur && 175 PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) && 176 CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) 177 return CurrentEnv.getBoolLiteralValue(false); 178 179 return CurrentEnv.makeTopBoolValue(); 180 } 181 182 // FIXME: Add other built-in model widening. 183 184 // Custom-model widening. 185 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 186 return *W; 187 188 return equateUnknownValues(Prev.getKind()) ? Prev : Current; 189 } 190 191 // Returns whether the values in `Map1` and `Map2` compare equal for those 192 // keys that `Map1` and `Map2` have in common. 193 template <typename Key> 194 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, 195 const llvm::MapVector<Key, Value *> &Map2, 196 const Environment &Env1, const Environment &Env2, 197 Environment::ValueModel &Model) { 198 for (auto &Entry : Map1) { 199 Key K = Entry.first; 200 assert(K != nullptr); 201 202 Value *Val = Entry.second; 203 assert(Val != nullptr); 204 205 auto It = Map2.find(K); 206 if (It == Map2.end()) 207 continue; 208 assert(It->second != nullptr); 209 210 if (!areEquivalentValues(*Val, *It->second) && 211 !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, 212 Model)) 213 return false; 214 } 215 216 return true; 217 } 218 219 // Perform a join on two `LocToVal` maps. 220 static llvm::MapVector<const StorageLocation *, Value *> 221 joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal, 222 const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2, 223 const Environment &Env1, const Environment &Env2, 224 Environment &JoinedEnv, Environment::ValueModel &Model) { 225 llvm::MapVector<const StorageLocation *, Value *> Result; 226 for (auto &Entry : LocToVal) { 227 const StorageLocation *Loc = Entry.first; 228 assert(Loc != nullptr); 229 230 Value *Val = Entry.second; 231 assert(Val != nullptr); 232 233 auto It = LocToVal2.find(Loc); 234 if (It == LocToVal2.end()) 235 continue; 236 assert(It->second != nullptr); 237 238 if (areEquivalentValues(*Val, *It->second)) { 239 Result.insert({Loc, Val}); 240 continue; 241 } 242 243 if (Value *MergedVal = mergeDistinctValues( 244 Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { 245 Result.insert({Loc, MergedVal}); 246 } 247 } 248 249 return Result; 250 } 251 252 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either 253 // `const StorageLocation *` or `const Expr *`. 254 template <typename Key> 255 llvm::MapVector<Key, Value *> 256 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, 257 const llvm::MapVector<Key, Value *> &PrevMap, 258 Environment &CurEnv, const Environment &PrevEnv, 259 Environment::ValueModel &Model, LatticeJoinEffect &Effect) { 260 llvm::MapVector<Key, Value *> WidenedMap; 261 for (auto &Entry : CurMap) { 262 Key K = Entry.first; 263 assert(K != nullptr); 264 265 Value *Val = Entry.second; 266 assert(Val != nullptr); 267 268 auto PrevIt = PrevMap.find(K); 269 if (PrevIt == PrevMap.end()) 270 continue; 271 assert(PrevIt->second != nullptr); 272 273 if (areEquivalentValues(*Val, *PrevIt->second)) { 274 WidenedMap.insert({K, Val}); 275 continue; 276 } 277 278 Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, 279 PrevEnv, *Val, CurEnv, Model); 280 WidenedMap.insert({K, &WidenedVal}); 281 if (&WidenedVal != PrevIt->second) 282 Effect = LatticeJoinEffect::Changed; 283 } 284 285 return WidenedMap; 286 } 287 288 /// Initializes a global storage value. 289 static void insertIfGlobal(const Decl &D, 290 llvm::DenseSet<const VarDecl *> &Vars) { 291 if (auto *V = dyn_cast<VarDecl>(&D)) 292 if (V->hasGlobalStorage()) 293 Vars.insert(V); 294 } 295 296 static void insertIfFunction(const Decl &D, 297 llvm::DenseSet<const FunctionDecl *> &Funcs) { 298 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 299 Funcs.insert(FD); 300 } 301 302 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { 303 // Use getCalleeDecl instead of getMethodDecl in order to handle 304 // pointer-to-member calls. 305 const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl()); 306 if (!MethodDecl) 307 return nullptr; 308 auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody()); 309 if (!Body || Body->size() != 1) 310 return nullptr; 311 if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin())) 312 if (auto *Return = RS->getRetValue()) 313 return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts()); 314 return nullptr; 315 } 316 317 static void 318 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 319 llvm::DenseSet<const VarDecl *> &Vars, 320 llvm::DenseSet<const FunctionDecl *> &Funcs) { 321 insertIfGlobal(D, Vars); 322 insertIfFunction(D, Funcs); 323 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 324 for (const auto *B : Decomp->bindings()) 325 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 326 // FIXME: should we be using `E->getFoundDecl()`? 327 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 328 Fields.insert(FD); 329 } 330 331 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 332 /// global variables and functions that are declared in or referenced from 333 /// sub-statements. 334 static void 335 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 336 llvm::DenseSet<const VarDecl *> &Vars, 337 llvm::DenseSet<const FunctionDecl *> &Funcs) { 338 for (auto *Child : S.children()) 339 if (Child != nullptr) 340 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 341 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 342 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 343 344 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 345 if (DS->isSingleDecl()) 346 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 347 else 348 for (auto *D : DS->getDeclGroup()) 349 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 350 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 351 insertIfGlobal(*E->getDecl(), Vars); 352 insertIfFunction(*E->getDecl(), Funcs); 353 } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) { 354 // If this is a method that returns a member variable but does nothing else, 355 // model the field of the return value. 356 if (MemberExpr *E = getMemberForAccessor(*C)) 357 if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) 358 Fields.insert(FD); 359 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 360 // FIXME: should we be using `E->getFoundDecl()`? 361 const ValueDecl *VD = E->getMemberDecl(); 362 insertIfGlobal(*VD, Vars); 363 insertIfFunction(*VD, Funcs); 364 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 365 Fields.insert(FD); 366 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 367 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 368 for (const auto *FD : getFieldsForInitListExpr(RD)) 369 Fields.insert(FD); 370 } 371 } 372 373 Environment::Environment(DataflowAnalysisContext &DACtx) 374 : DACtx(&DACtx), 375 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 376 377 Environment::Environment(DataflowAnalysisContext &DACtx, 378 const DeclContext &DeclCtx) 379 : Environment(DACtx) { 380 CallStack.push_back(&DeclCtx); 381 } 382 383 void Environment::initialize() { 384 const DeclContext *DeclCtx = getDeclCtx(); 385 if (DeclCtx == nullptr) 386 return; 387 388 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx)) { 389 assert(FuncDecl->doesThisDeclarationHaveABody()); 390 391 initFieldsGlobalsAndFuncs(FuncDecl); 392 393 for (const auto *ParamDecl : FuncDecl->parameters()) { 394 assert(ParamDecl != nullptr); 395 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 396 } 397 } 398 399 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) { 400 auto *Parent = MethodDecl->getParent(); 401 assert(Parent != nullptr); 402 403 if (Parent->isLambda()) { 404 for (auto Capture : Parent->captures()) { 405 if (Capture.capturesVariable()) { 406 const auto *VarDecl = Capture.getCapturedVar(); 407 assert(VarDecl != nullptr); 408 setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr)); 409 } else if (Capture.capturesThis()) { 410 const auto *SurroundingMethodDecl = 411 cast<CXXMethodDecl>(DeclCtx->getNonClosureAncestor()); 412 QualType ThisPointeeType = 413 SurroundingMethodDecl->getFunctionObjectParameterType(); 414 setThisPointeeStorageLocation( 415 cast<RecordValue>(createValue(ThisPointeeType))->getLoc()); 416 } 417 } 418 } else if (MethodDecl->isImplicitObjectMemberFunction()) { 419 QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); 420 setThisPointeeStorageLocation( 421 cast<RecordValue>(createValue(ThisPointeeType))->getLoc()); 422 } 423 } 424 } 425 426 // FIXME: Add support for resetting globals after function calls to enable 427 // the implementation of sound analyses. 428 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 429 assert(FuncDecl->doesThisDeclarationHaveABody()); 430 431 FieldSet Fields; 432 llvm::DenseSet<const VarDecl *> Vars; 433 llvm::DenseSet<const FunctionDecl *> Funcs; 434 435 // Look for global variable and field references in the 436 // constructor-initializers. 437 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 438 for (const auto *Init : CtorDecl->inits()) { 439 if (Init->isMemberInitializer()) { 440 Fields.insert(Init->getMember()); 441 } else if (Init->isIndirectMemberInitializer()) { 442 for (const auto *I : Init->getIndirectMember()->chain()) 443 Fields.insert(cast<FieldDecl>(I)); 444 } 445 const Expr *E = Init->getInit(); 446 assert(E != nullptr); 447 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 448 } 449 // Add all fields mentioned in default member initializers. 450 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 451 if (const auto *I = F->getInClassInitializer()) 452 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 453 } 454 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 455 456 // These have to be added before the lines that follow to ensure that 457 // `create*` work correctly for structs. 458 DACtx->addModeledFields(Fields); 459 460 for (const VarDecl *D : Vars) { 461 if (getStorageLocation(*D) != nullptr) 462 continue; 463 464 setStorageLocation(*D, createObject(*D)); 465 } 466 467 for (const FunctionDecl *FD : Funcs) { 468 if (getStorageLocation(*FD) != nullptr) 469 continue; 470 auto &Loc = createStorageLocation(FD->getType()); 471 setStorageLocation(*FD, Loc); 472 } 473 } 474 475 Environment Environment::fork() const { 476 Environment Copy(*this); 477 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 478 return Copy; 479 } 480 481 bool Environment::canDescend(unsigned MaxDepth, 482 const DeclContext *Callee) const { 483 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 484 } 485 486 Environment Environment::pushCall(const CallExpr *Call) const { 487 Environment Env(*this); 488 489 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 490 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 491 if (!isa<CXXThisExpr>(Arg)) 492 Env.ThisPointeeLoc = 493 cast<RecordStorageLocation>(getStorageLocation(*Arg)); 494 // Otherwise (when the argument is `this`), retain the current 495 // environment's `ThisPointeeLoc`. 496 } 497 } 498 499 Env.pushCallInternal(Call->getDirectCallee(), 500 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 501 502 return Env; 503 } 504 505 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 506 Environment Env(*this); 507 508 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 509 510 Env.pushCallInternal(Call->getConstructor(), 511 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 512 513 return Env; 514 } 515 516 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 517 ArrayRef<const Expr *> Args) { 518 // Canonicalize to the definition of the function. This ensures that we're 519 // putting arguments into the same `ParamVarDecl`s` that the callee will later 520 // be retrieving them from. 521 assert(FuncDecl->getDefinition() != nullptr); 522 FuncDecl = FuncDecl->getDefinition(); 523 524 CallStack.push_back(FuncDecl); 525 526 initFieldsGlobalsAndFuncs(FuncDecl); 527 528 const auto *ParamIt = FuncDecl->param_begin(); 529 530 // FIXME: Parameters don't always map to arguments 1:1; examples include 531 // overloaded operators implemented as member functions, and parameter packs. 532 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 533 assert(ParamIt != FuncDecl->param_end()); 534 const VarDecl *Param = *ParamIt; 535 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 536 } 537 } 538 539 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 540 // We ignore some entries of `CalleeEnv`: 541 // - `DACtx` because is already the same in both 542 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or 543 // `ThisPointeeLoc` because they don't apply to us. 544 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the 545 // callee's local scope, so when popping that scope, we do not propagate 546 // the maps. 547 this->LocToVal = std::move(CalleeEnv.LocToVal); 548 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 549 550 if (Call->isGLValue()) { 551 if (CalleeEnv.ReturnLoc != nullptr) 552 setStorageLocation(*Call, *CalleeEnv.ReturnLoc); 553 } else if (!Call->getType()->isVoidType()) { 554 if (CalleeEnv.ReturnVal != nullptr) 555 setValue(*Call, *CalleeEnv.ReturnVal); 556 } 557 } 558 559 void Environment::popCall(const CXXConstructExpr *Call, 560 const Environment &CalleeEnv) { 561 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 562 this->LocToVal = std::move(CalleeEnv.LocToVal); 563 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 564 565 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 566 setValue(*Call, *Val); 567 } 568 } 569 570 bool Environment::equivalentTo(const Environment &Other, 571 Environment::ValueModel &Model) const { 572 assert(DACtx == Other.DACtx); 573 574 if (ReturnVal != Other.ReturnVal) 575 return false; 576 577 if (ReturnLoc != Other.ReturnLoc) 578 return false; 579 580 if (ThisPointeeLoc != Other.ThisPointeeLoc) 581 return false; 582 583 if (DeclToLoc != Other.DeclToLoc) 584 return false; 585 586 if (ExprToLoc != Other.ExprToLoc) 587 return false; 588 589 if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) 590 return false; 591 592 if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) 593 return false; 594 595 return true; 596 } 597 598 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 599 Environment::ValueModel &Model) { 600 assert(DACtx == PrevEnv.DACtx); 601 assert(ReturnVal == PrevEnv.ReturnVal); 602 assert(ReturnLoc == PrevEnv.ReturnLoc); 603 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 604 assert(CallStack == PrevEnv.CallStack); 605 606 auto Effect = LatticeJoinEffect::Unchanged; 607 608 // By the API, `PrevEnv` is a previous version of the environment for the same 609 // block, so we have some guarantees about its shape. In particular, it will 610 // be the result of a join or widen operation on previous values for this 611 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that 612 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain 613 // this property here, we don't need change their current values to widen. 614 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 615 assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); 616 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 617 618 ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, 619 Model, Effect); 620 621 LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, 622 Model, Effect); 623 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 624 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 625 ExprToVal.size() != PrevEnv.ExprToVal.size() || 626 LocToVal.size() != PrevEnv.LocToVal.size()) 627 Effect = LatticeJoinEffect::Changed; 628 629 return Effect; 630 } 631 632 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 633 Environment::ValueModel &Model) { 634 assert(EnvA.DACtx == EnvB.DACtx); 635 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 636 assert(EnvA.CallStack == EnvB.CallStack); 637 638 Environment JoinedEnv(*EnvA.DACtx); 639 640 JoinedEnv.CallStack = EnvA.CallStack; 641 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 642 643 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 644 // `ReturnVal` might not always get set -- for example if we have a return 645 // statement of the form `return some_other_func()` and we decide not to 646 // analyze `some_other_func()`. 647 // In this case, we can't say anything about the joined return value -- we 648 // don't simply want to propagate the return value that we do have, because 649 // it might not be the correct one. 650 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 651 JoinedEnv.ReturnVal = nullptr; 652 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 653 JoinedEnv.ReturnVal = EnvA.ReturnVal; 654 } else { 655 assert(!EnvA.CallStack.empty()); 656 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 657 // cast. 658 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 659 assert(Func != nullptr); 660 if (Value *MergedVal = 661 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 662 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 663 JoinedEnv.ReturnVal = MergedVal; 664 } 665 666 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 667 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 668 else 669 JoinedEnv.ReturnLoc = nullptr; 670 671 JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc); 672 673 // FIXME: update join to detect backedges and simplify the flow condition 674 // accordingly. 675 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 676 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 677 678 JoinedEnv.LocToVal = 679 joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); 680 681 // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal` 682 // empty, as we never need to access entries in these maps outside of the 683 // basic block that sets them. 684 685 return JoinedEnv; 686 } 687 688 StorageLocation &Environment::createStorageLocation(QualType Type) { 689 return DACtx->createStorageLocation(Type); 690 } 691 692 StorageLocation &Environment::createStorageLocation(const ValueDecl &D) { 693 // Evaluated declarations are always assigned the same storage locations to 694 // ensure that the environment stabilizes across loop iterations. Storage 695 // locations for evaluated declarations are stored in the analysis context. 696 return DACtx->getStableStorageLocation(D); 697 } 698 699 StorageLocation &Environment::createStorageLocation(const Expr &E) { 700 // Evaluated expressions are always assigned the same storage locations to 701 // ensure that the environment stabilizes across loop iterations. Storage 702 // locations for evaluated expressions are stored in the analysis context. 703 return DACtx->getStableStorageLocation(E); 704 } 705 706 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 707 assert(!DeclToLoc.contains(&D)); 708 DeclToLoc[&D] = &Loc; 709 } 710 711 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 712 auto It = DeclToLoc.find(&D); 713 if (It == DeclToLoc.end()) 714 return nullptr; 715 716 StorageLocation *Loc = It->second; 717 718 return Loc; 719 } 720 721 void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); } 722 723 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 724 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 725 // but we still want to be able to associate a `StorageLocation` with them, 726 // so allow these as an exception. 727 assert(E.isGLValue() || 728 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 729 const Expr &CanonE = ignoreCFGOmittedNodes(E); 730 assert(!ExprToLoc.contains(&CanonE)); 731 ExprToLoc[&CanonE] = &Loc; 732 } 733 734 StorageLocation *Environment::getStorageLocation(const Expr &E) const { 735 // See comment in `setStorageLocation()`. 736 assert(E.isGLValue() || 737 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 738 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 739 return It == ExprToLoc.end() ? nullptr : &*It->second; 740 } 741 742 // Returns whether a prvalue of record type is the one that originally 743 // constructs the object (i.e. it doesn't propagate it from one of its 744 // children). 745 static bool isOriginalRecordConstructor(const Expr &RecordPRValue) { 746 if (auto *Init = dyn_cast<InitListExpr>(&RecordPRValue)) 747 return !Init->isSemanticForm() || !Init->isTransparent(); 748 return isa<CXXConstructExpr>(RecordPRValue) || isa<CallExpr>(RecordPRValue) || 749 isa<LambdaExpr>(RecordPRValue) || 750 isa<CXXDefaultInitExpr>(RecordPRValue) || 751 // The framework currently does not propagate the objects created in 752 // the two branches of a `ConditionalOperator` because there is no way 753 // to reconcile their storage locations, which are different. We 754 // therefore claim that the `ConditionalOperator` is the expression 755 // that originally constructs the object. 756 // Ultimately, this will be fixed by propagating locations down from 757 // the result object, rather than up from the original constructor as 758 // we do now (see also the FIXME in the documentation for 759 // `getResultObjectLocation()`). 760 isa<ConditionalOperator>(RecordPRValue); 761 } 762 763 RecordStorageLocation & 764 Environment::getResultObjectLocation(const Expr &RecordPRValue) const { 765 assert(RecordPRValue.getType()->isRecordType()); 766 assert(RecordPRValue.isPRValue()); 767 768 // Returns a storage location that we can use if assertions fail. 769 auto FallbackForAssertFailure = 770 [this, &RecordPRValue]() -> RecordStorageLocation & { 771 return cast<RecordStorageLocation>( 772 DACtx->getStableStorageLocation(RecordPRValue)); 773 }; 774 775 if (isOriginalRecordConstructor(RecordPRValue)) { 776 auto *Val = cast_or_null<RecordValue>(getValue(RecordPRValue)); 777 // The builtin transfer function should have created a `RecordValue` for all 778 // original record constructors. 779 assert(Val); 780 if (!Val) 781 return FallbackForAssertFailure(); 782 return Val->getLoc(); 783 } 784 785 if (auto *Op = dyn_cast<BinaryOperator>(&RecordPRValue); 786 Op && Op->isCommaOp()) { 787 return getResultObjectLocation(*Op->getRHS()); 788 } 789 790 // All other expression nodes that propagate a record prvalue should have 791 // exactly one child. 792 llvm::SmallVector<const Stmt *> children(RecordPRValue.child_begin(), 793 RecordPRValue.child_end()); 794 assert(children.size() == 1); 795 if (children.empty()) 796 return FallbackForAssertFailure(); 797 798 return getResultObjectLocation(*cast<Expr>(children[0])); 799 } 800 801 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 802 return DACtx->getOrCreateNullPointerValue(PointeeType); 803 } 804 805 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 806 assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc); 807 808 LocToVal[&Loc] = &Val; 809 } 810 811 void Environment::setValue(const Expr &E, Value &Val) { 812 const Expr &CanonE = ignoreCFGOmittedNodes(E); 813 814 if (auto *RecordVal = dyn_cast<RecordValue>(&Val)) { 815 assert(isOriginalRecordConstructor(CanonE) || 816 &RecordVal->getLoc() == &getResultObjectLocation(CanonE)); 817 } 818 819 assert(CanonE.isPRValue()); 820 ExprToVal[&CanonE] = &Val; 821 } 822 823 Value *Environment::getValue(const StorageLocation &Loc) const { 824 return LocToVal.lookup(&Loc); 825 } 826 827 Value *Environment::getValue(const ValueDecl &D) const { 828 auto *Loc = getStorageLocation(D); 829 if (Loc == nullptr) 830 return nullptr; 831 return getValue(*Loc); 832 } 833 834 Value *Environment::getValue(const Expr &E) const { 835 if (E.isPRValue()) { 836 auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); 837 return It == ExprToVal.end() ? nullptr : It->second; 838 } 839 840 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 841 if (It == ExprToLoc.end()) 842 return nullptr; 843 return getValue(*It->second); 844 } 845 846 Value *Environment::createValue(QualType Type) { 847 llvm::DenseSet<QualType> Visited; 848 int CreatedValuesCount = 0; 849 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 850 CreatedValuesCount); 851 if (CreatedValuesCount > MaxCompositeValueSize) { 852 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 853 << '\n'; 854 } 855 return Val; 856 } 857 858 Value *Environment::createValueUnlessSelfReferential( 859 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 860 int &CreatedValuesCount) { 861 assert(!Type.isNull()); 862 assert(!Type->isReferenceType()); 863 864 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 865 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 866 Depth > MaxCompositeValueDepth) 867 return nullptr; 868 869 if (Type->isBooleanType()) { 870 CreatedValuesCount++; 871 return &makeAtomicBoolValue(); 872 } 873 874 if (Type->isIntegerType()) { 875 // FIXME: consider instead `return nullptr`, given that we do nothing useful 876 // with integers, and so distinguishing them serves no purpose, but could 877 // prevent convergence. 878 CreatedValuesCount++; 879 return &arena().create<IntegerValue>(); 880 } 881 882 if (Type->isPointerType()) { 883 CreatedValuesCount++; 884 QualType PointeeType = Type->getPointeeType(); 885 StorageLocation &PointeeLoc = 886 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 887 888 return &arena().create<PointerValue>(PointeeLoc); 889 } 890 891 if (Type->isRecordType()) { 892 CreatedValuesCount++; 893 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 894 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 895 assert(Field != nullptr); 896 897 QualType FieldType = Field->getType(); 898 899 FieldLocs.insert( 900 {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 901 CreatedValuesCount)}); 902 } 903 904 RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs; 905 for (const auto &Entry : DACtx->getSyntheticFields(Type)) { 906 SyntheticFieldLocs.insert( 907 {Entry.getKey(), 908 &createLocAndMaybeValue(Entry.getValue(), Visited, Depth + 1, 909 CreatedValuesCount)}); 910 } 911 912 RecordStorageLocation &Loc = DACtx->createRecordStorageLocation( 913 Type, std::move(FieldLocs), std::move(SyntheticFieldLocs)); 914 RecordValue &RecordVal = create<RecordValue>(Loc); 915 916 // As we already have a storage location for the `RecordValue`, we can and 917 // should associate them in the environment. 918 setValue(Loc, RecordVal); 919 920 return &RecordVal; 921 } 922 923 return nullptr; 924 } 925 926 StorageLocation & 927 Environment::createLocAndMaybeValue(QualType Ty, 928 llvm::DenseSet<QualType> &Visited, 929 int Depth, int &CreatedValuesCount) { 930 if (!Visited.insert(Ty.getCanonicalType()).second) 931 return createStorageLocation(Ty.getNonReferenceType()); 932 Value *Val = createValueUnlessSelfReferential( 933 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 934 Visited.erase(Ty.getCanonicalType()); 935 936 Ty = Ty.getNonReferenceType(); 937 938 if (Val == nullptr) 939 return createStorageLocation(Ty); 940 941 if (Ty->isRecordType()) 942 return cast<RecordValue>(Val)->getLoc(); 943 944 StorageLocation &Loc = createStorageLocation(Ty); 945 setValue(Loc, *Val); 946 return Loc; 947 } 948 949 StorageLocation &Environment::createObjectInternal(const ValueDecl *D, 950 QualType Ty, 951 const Expr *InitExpr) { 952 if (Ty->isReferenceType()) { 953 // Although variables of reference type always need to be initialized, it 954 // can happen that we can't see the initializer, so `InitExpr` may still 955 // be null. 956 if (InitExpr) { 957 if (auto *InitExprLoc = getStorageLocation(*InitExpr)) 958 return *InitExprLoc; 959 } 960 961 // Even though we have an initializer, we might not get an 962 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 963 // don't have a function body. In this case, we just invent a storage 964 // location and value -- it's the best we can do. 965 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 966 } 967 968 Value *Val = nullptr; 969 if (InitExpr) 970 // In the (few) cases where an expression is intentionally 971 // "uninterpreted", `InitExpr` is not associated with a value. There are 972 // two ways to handle this situation: propagate the status, so that 973 // uninterpreted initializers result in uninterpreted variables, or 974 // provide a default value. We choose the latter so that later refinements 975 // of the variable can be used for reasoning about the surrounding code. 976 // For this reason, we let this case be handled by the `createValue()` 977 // call below. 978 // 979 // FIXME. If and when we interpret all language cases, change this to 980 // assert that `InitExpr` is interpreted, rather than supplying a 981 // default value (assuming we don't update the environment API to return 982 // references). 983 Val = getValue(*InitExpr); 984 if (!Val) 985 Val = createValue(Ty); 986 987 if (Ty->isRecordType()) 988 return cast<RecordValue>(Val)->getLoc(); 989 990 StorageLocation &Loc = 991 D ? createStorageLocation(*D) : createStorageLocation(Ty); 992 993 if (Val) 994 setValue(Loc, *Val); 995 996 return Loc; 997 } 998 999 void Environment::assume(const Formula &F) { 1000 DACtx->addFlowConditionConstraint(FlowConditionToken, F); 1001 } 1002 1003 bool Environment::proves(const Formula &F) const { 1004 return DACtx->flowConditionImplies(FlowConditionToken, F); 1005 } 1006 1007 bool Environment::allows(const Formula &F) const { 1008 return DACtx->flowConditionAllows(FlowConditionToken, F); 1009 } 1010 1011 void Environment::dump(raw_ostream &OS) const { 1012 // FIXME: add printing for remaining fields and allow caller to decide what 1013 // fields are printed. 1014 OS << "DeclToLoc:\n"; 1015 for (auto [D, L] : DeclToLoc) 1016 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 1017 1018 OS << "ExprToLoc:\n"; 1019 for (auto [E, L] : ExprToLoc) 1020 OS << " [" << E << ", " << L << "]\n"; 1021 1022 OS << "ExprToVal:\n"; 1023 for (auto [E, V] : ExprToVal) 1024 OS << " [" << E << ", " << V << ": " << *V << "]\n"; 1025 1026 OS << "LocToVal:\n"; 1027 for (auto [L, V] : LocToVal) { 1028 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 1029 } 1030 1031 OS << "\n"; 1032 DACtx->dumpFlowCondition(FlowConditionToken, OS); 1033 } 1034 1035 void Environment::dump() const { 1036 dump(llvm::dbgs()); 1037 } 1038 1039 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 1040 const Environment &Env) { 1041 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 1042 if (ImplicitObject == nullptr) 1043 return nullptr; 1044 if (ImplicitObject->getType()->isPointerType()) { 1045 if (auto *Val = Env.get<PointerValue>(*ImplicitObject)) 1046 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 1047 return nullptr; 1048 } 1049 return cast_or_null<RecordStorageLocation>( 1050 Env.getStorageLocation(*ImplicitObject)); 1051 } 1052 1053 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 1054 const Environment &Env) { 1055 Expr *Base = ME.getBase(); 1056 if (Base == nullptr) 1057 return nullptr; 1058 if (ME.isArrow()) { 1059 if (auto *Val = Env.get<PointerValue>(*Base)) 1060 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 1061 return nullptr; 1062 } 1063 return Env.get<RecordStorageLocation>(*Base); 1064 } 1065 1066 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 1067 // Unnamed bitfields are only used for padding and do not appear in 1068 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 1069 // field list, and we thus need to remove them before mapping inits to 1070 // fields to avoid mapping inits to the wrongs fields. 1071 std::vector<FieldDecl *> Fields; 1072 llvm::copy_if( 1073 RD->fields(), std::back_inserter(Fields), 1074 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 1075 return Fields; 1076 } 1077 1078 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { 1079 auto &NewVal = Env.create<RecordValue>(Loc); 1080 Env.setValue(Loc, NewVal); 1081 return NewVal; 1082 } 1083 1084 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { 1085 assert(Expr.getType()->isRecordType()); 1086 1087 if (Expr.isPRValue()) { 1088 if (auto *ExistingVal = Env.get<RecordValue>(Expr)) { 1089 auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc()); 1090 Env.setValue(Expr, NewVal); 1091 Env.setValue(NewVal.getLoc(), NewVal); 1092 return NewVal; 1093 } 1094 1095 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1096 Env.setValue(Expr, NewVal); 1097 return NewVal; 1098 } 1099 1100 if (auto *Loc = Env.get<RecordStorageLocation>(Expr)) { 1101 auto &NewVal = Env.create<RecordValue>(*Loc); 1102 Env.setValue(*Loc, NewVal); 1103 return NewVal; 1104 } 1105 1106 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1107 Env.setStorageLocation(Expr, NewVal.getLoc()); 1108 return NewVal; 1109 } 1110 1111 } // namespace dataflow 1112 } // namespace clang 1113