//===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines an Environment class that is used by dataflow analyses // that run over Control-Flow Graphs (CFGs) to keep track of the state of the // program at given program points. // //===----------------------------------------------------------------------===// #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Type.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include #include namespace clang { namespace dataflow { // FIXME: convert these to parameters of the analysis or environment. Current // settings have been experimentaly validated, but only for a particular // analysis. static constexpr int MaxCompositeValueDepth = 3; static constexpr int MaxCompositeValueSize = 1000; /// Returns a map consisting of key-value entries that are present in both maps. static llvm::DenseMap intersectDeclToLoc( const llvm::DenseMap &DeclToLoc1, const llvm::DenseMap &DeclToLoc2) { llvm::DenseMap Result; for (auto &Entry : DeclToLoc1) { auto It = DeclToLoc2.find(Entry.first); if (It != DeclToLoc2.end() && Entry.second == It->second) Result.insert({Entry.first, Entry.second}); } return Result; } // Whether to consider equivalent two values with an unknown relation. // // FIXME: this function is a hack enabling unsoundness to support // convergence. Once we have widening support for the reference/pointer and // struct built-in models, this should be unconditionally `false` (and inlined // as such at its call sites). static bool equateUnknownValues(Value::Kind K) { switch (K) { case Value::Kind::Integer: case Value::Kind::Pointer: case Value::Kind::Record: return true; default: return false; } } static bool compareDistinctValues(QualType Type, Value &Val1, const Environment &Env1, Value &Val2, const Environment &Env2, Environment::ValueModel &Model) { // Note: Potentially costly, but, for booleans, we could check whether both // can be proven equivalent in their respective environments. // FIXME: move the reference/pointers logic from `areEquivalentValues` to here // and implement separate, join/widen specific handling for // reference/pointers. switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { case ComparisonResult::Same: return true; case ComparisonResult::Different: return false; case ComparisonResult::Unknown: return equateUnknownValues(Val1.getKind()); } llvm_unreachable("All cases covered in switch"); } /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, /// respectively, of the same type `Type`. Merging generally produces a single /// value that (soundly) approximates the two inputs, although the actual /// meaning depends on `Model`. static Value *mergeDistinctValues(QualType Type, Value &Val1, const Environment &Env1, Value &Val2, const Environment &Env2, Environment &MergedEnv, Environment::ValueModel &Model) { // Join distinct boolean values preserving information about the constraints // in the respective path conditions. if (isa(&Val1) && isa(&Val2)) { // FIXME: Checking both values should be unnecessary, since they should have // a consistent shape. However, right now we can end up with BoolValue's in // integer-typed variables due to our incorrect handling of // boolean-to-integer casts (we just propagate the BoolValue to the result // of the cast). So, a join can encounter an integer in one branch but a // bool in the other. // For example: // ``` // std::optional o; // int x; // if (o.has_value()) // x = o.value(); // ``` auto &Expr1 = cast(Val1).formula(); auto &Expr2 = cast(Val2).formula(); auto &A = MergedEnv.arena(); auto &MergedVal = A.makeAtomRef(A.makeAtom()); MergedEnv.assume( A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), A.makeEquals(MergedVal, Expr1)), A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), A.makeEquals(MergedVal, Expr2)))); return &A.makeBoolValue(MergedVal); } Value *MergedVal = nullptr; if (auto *RecordVal1 = dyn_cast(&Val1)) { auto *RecordVal2 = cast(&Val2); if (&RecordVal1->getLoc() == &RecordVal2->getLoc()) // `RecordVal1` and `RecordVal2` may have different properties associated // with them. Create a new `RecordValue` with the same location but // without any properties so that we soundly approximate both values. If a // particular analysis needs to merge properties, it should do so in // `DataflowAnalysis::merge()`. MergedVal = &MergedEnv.create(RecordVal1->getLoc()); else // If the locations for the two records are different, need to create a // completely new value. MergedVal = MergedEnv.createValue(Type); } else { MergedVal = MergedEnv.createValue(Type); } // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` // returns false to avoid storing unneeded values in `DACtx`. if (MergedVal) if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) return MergedVal; return nullptr; } // When widening does not change `Current`, return value will equal `&Prev`. static Value &widenDistinctValues(QualType Type, Value &Prev, const Environment &PrevEnv, Value &Current, Environment &CurrentEnv, Environment::ValueModel &Model) { // Boolean-model widening. if (auto *PrevBool = dyn_cast(&Prev)) { // If previous value was already Top, re-use that to (implicitly) indicate // that no change occurred. if (isa(Prev)) return Prev; // We may need to widen to Top, but before we do so, check whether both // values are implied to be either true or false in the current environment. // In that case, we can simply return a literal instead. auto &CurBool = cast(Current); bool TruePrev = PrevEnv.proves(PrevBool->formula()); bool TrueCur = CurrentEnv.proves(CurBool.formula()); if (TruePrev && TrueCur) return CurrentEnv.getBoolLiteralValue(true); if (!TruePrev && !TrueCur && PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) && CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) return CurrentEnv.getBoolLiteralValue(false); return CurrentEnv.makeTopBoolValue(); } // FIXME: Add other built-in model widening. // Custom-model widening. if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) return *W; return equateUnknownValues(Prev.getKind()) ? Prev : Current; } // Returns whether the values in `Map1` and `Map2` compare equal for those // keys that `Map1` and `Map2` have in common. template bool compareKeyToValueMaps(const llvm::MapVector &Map1, const llvm::MapVector &Map2, const Environment &Env1, const Environment &Env2, Environment::ValueModel &Model) { for (auto &Entry : Map1) { Key K = Entry.first; assert(K != nullptr); Value *Val = Entry.second; assert(Val != nullptr); auto It = Map2.find(K); if (It == Map2.end()) continue; assert(It->second != nullptr); if (!areEquivalentValues(*Val, *It->second) && !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, Model)) return false; } return true; } // Perform a join on two `LocToVal` maps. static llvm::MapVector joinLocToVal(const llvm::MapVector &LocToVal, const llvm::MapVector &LocToVal2, const Environment &Env1, const Environment &Env2, Environment &JoinedEnv, Environment::ValueModel &Model) { llvm::MapVector Result; for (auto &Entry : LocToVal) { const StorageLocation *Loc = Entry.first; assert(Loc != nullptr); Value *Val = Entry.second; assert(Val != nullptr); auto It = LocToVal2.find(Loc); if (It == LocToVal2.end()) continue; assert(It->second != nullptr); if (areEquivalentValues(*Val, *It->second)) { Result.insert({Loc, Val}); continue; } if (Value *MergedVal = mergeDistinctValues( Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { Result.insert({Loc, MergedVal}); } } return Result; } // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either // `const StorageLocation *` or `const Expr *`. template llvm::MapVector widenKeyToValueMap(const llvm::MapVector &CurMap, const llvm::MapVector &PrevMap, Environment &CurEnv, const Environment &PrevEnv, Environment::ValueModel &Model, LatticeJoinEffect &Effect) { llvm::MapVector WidenedMap; for (auto &Entry : CurMap) { Key K = Entry.first; assert(K != nullptr); Value *Val = Entry.second; assert(Val != nullptr); auto PrevIt = PrevMap.find(K); if (PrevIt == PrevMap.end()) continue; assert(PrevIt->second != nullptr); if (areEquivalentValues(*Val, *PrevIt->second)) { WidenedMap.insert({K, Val}); continue; } Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, PrevEnv, *Val, CurEnv, Model); WidenedMap.insert({K, &WidenedVal}); if (&WidenedVal != PrevIt->second) Effect = LatticeJoinEffect::Changed; } return WidenedMap; } /// Initializes a global storage value. static void insertIfGlobal(const Decl &D, llvm::DenseSet &Vars) { if (auto *V = dyn_cast(&D)) if (V->hasGlobalStorage()) Vars.insert(V); } static void insertIfFunction(const Decl &D, llvm::DenseSet &Funcs) { if (auto *FD = dyn_cast(&D)) Funcs.insert(FD); } static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { // Use getCalleeDecl instead of getMethodDecl in order to handle // pointer-to-member calls. const auto *MethodDecl = dyn_cast_or_null(C.getCalleeDecl()); if (!MethodDecl) return nullptr; auto *Body = dyn_cast_or_null(MethodDecl->getBody()); if (!Body || Body->size() != 1) return nullptr; if (auto *RS = dyn_cast(*Body->body_begin())) if (auto *Return = RS->getRetValue()) return dyn_cast(Return->IgnoreParenImpCasts()); return nullptr; } static void getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, llvm::DenseSet &Vars, llvm::DenseSet &Funcs) { insertIfGlobal(D, Vars); insertIfFunction(D, Funcs); if (const auto *Decomp = dyn_cast(&D)) for (const auto *B : Decomp->bindings()) if (auto *ME = dyn_cast_or_null(B->getBinding())) // FIXME: should we be using `E->getFoundDecl()`? if (const auto *FD = dyn_cast(ME->getMemberDecl())) Fields.insert(FD); } /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, /// global variables and functions that are declared in or referenced from /// sub-statements. static void getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, llvm::DenseSet &Vars, llvm::DenseSet &Funcs) { for (auto *Child : S.children()) if (Child != nullptr) getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); if (const auto *DefaultInit = dyn_cast(&S)) getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); if (auto *DS = dyn_cast(&S)) { if (DS->isSingleDecl()) getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); else for (auto *D : DS->getDeclGroup()) getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); } else if (auto *E = dyn_cast(&S)) { insertIfGlobal(*E->getDecl(), Vars); insertIfFunction(*E->getDecl(), Funcs); } else if (const auto *C = dyn_cast(&S)) { // If this is a method that returns a member variable but does nothing else, // model the field of the return value. if (MemberExpr *E = getMemberForAccessor(*C)) if (const auto *FD = dyn_cast(E->getMemberDecl())) Fields.insert(FD); } else if (auto *E = dyn_cast(&S)) { // FIXME: should we be using `E->getFoundDecl()`? const ValueDecl *VD = E->getMemberDecl(); insertIfGlobal(*VD, Vars); insertIfFunction(*VD, Funcs); if (const auto *FD = dyn_cast(VD)) Fields.insert(FD); } else if (auto *InitList = dyn_cast(&S)) { if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) for (const auto *FD : getFieldsForInitListExpr(RD)) Fields.insert(FD); } } Environment::Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx), FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} Environment::Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx) : Environment(DACtx) { CallStack.push_back(&DeclCtx); } void Environment::initialize() { const DeclContext *DeclCtx = getDeclCtx(); if (DeclCtx == nullptr) return; if (const auto *FuncDecl = dyn_cast(DeclCtx)) { assert(FuncDecl->doesThisDeclarationHaveABody()); initFieldsGlobalsAndFuncs(FuncDecl); for (const auto *ParamDecl : FuncDecl->parameters()) { assert(ParamDecl != nullptr); setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); } } if (const auto *MethodDecl = dyn_cast(DeclCtx)) { auto *Parent = MethodDecl->getParent(); assert(Parent != nullptr); if (Parent->isLambda()) { for (auto Capture : Parent->captures()) { if (Capture.capturesVariable()) { const auto *VarDecl = Capture.getCapturedVar(); assert(VarDecl != nullptr); setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr)); } else if (Capture.capturesThis()) { const auto *SurroundingMethodDecl = cast(DeclCtx->getNonClosureAncestor()); QualType ThisPointeeType = SurroundingMethodDecl->getFunctionObjectParameterType(); setThisPointeeStorageLocation( cast(createValue(ThisPointeeType))->getLoc()); } } } else if (MethodDecl->isImplicitObjectMemberFunction()) { QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); setThisPointeeStorageLocation( cast(createValue(ThisPointeeType))->getLoc()); } } } // FIXME: Add support for resetting globals after function calls to enable // the implementation of sound analyses. void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { assert(FuncDecl->doesThisDeclarationHaveABody()); FieldSet Fields; llvm::DenseSet Vars; llvm::DenseSet Funcs; // Look for global variable and field references in the // constructor-initializers. if (const auto *CtorDecl = dyn_cast(FuncDecl)) { for (const auto *Init : CtorDecl->inits()) { if (Init->isMemberInitializer()) { Fields.insert(Init->getMember()); } else if (Init->isIndirectMemberInitializer()) { for (const auto *I : Init->getIndirectMember()->chain()) Fields.insert(cast(I)); } const Expr *E = Init->getInit(); assert(E != nullptr); getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); } // Add all fields mentioned in default member initializers. for (const FieldDecl *F : CtorDecl->getParent()->fields()) if (const auto *I = F->getInClassInitializer()) getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); } getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); // These have to be added before the lines that follow to ensure that // `create*` work correctly for structs. DACtx->addModeledFields(Fields); for (const VarDecl *D : Vars) { if (getStorageLocation(*D) != nullptr) continue; setStorageLocation(*D, createObject(*D)); } for (const FunctionDecl *FD : Funcs) { if (getStorageLocation(*FD) != nullptr) continue; auto &Loc = createStorageLocation(FD->getType()); setStorageLocation(*FD, Loc); } } Environment Environment::fork() const { Environment Copy(*this); Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); return Copy; } bool Environment::canDescend(unsigned MaxDepth, const DeclContext *Callee) const { return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); } Environment Environment::pushCall(const CallExpr *Call) const { Environment Env(*this); if (const auto *MethodCall = dyn_cast(Call)) { if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { if (!isa(Arg)) Env.ThisPointeeLoc = cast(getStorageLocation(*Arg)); // Otherwise (when the argument is `this`), retain the current // environment's `ThisPointeeLoc`. } } Env.pushCallInternal(Call->getDirectCallee(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); return Env; } Environment Environment::pushCall(const CXXConstructExpr *Call) const { Environment Env(*this); Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); Env.pushCallInternal(Call->getConstructor(), llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); return Env; } void Environment::pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef Args) { // Canonicalize to the definition of the function. This ensures that we're // putting arguments into the same `ParamVarDecl`s` that the callee will later // be retrieving them from. assert(FuncDecl->getDefinition() != nullptr); FuncDecl = FuncDecl->getDefinition(); CallStack.push_back(FuncDecl); initFieldsGlobalsAndFuncs(FuncDecl); const auto *ParamIt = FuncDecl->param_begin(); // FIXME: Parameters don't always map to arguments 1:1; examples include // overloaded operators implemented as member functions, and parameter packs. for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { assert(ParamIt != FuncDecl->param_end()); const VarDecl *Param = *ParamIt; setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); } } void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { // We ignore some entries of `CalleeEnv`: // - `DACtx` because is already the same in both // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or // `ThisPointeeLoc` because they don't apply to us. // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the // callee's local scope, so when popping that scope, we do not propagate // the maps. this->LocToVal = std::move(CalleeEnv.LocToVal); this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); if (Call->isGLValue()) { if (CalleeEnv.ReturnLoc != nullptr) setStorageLocation(*Call, *CalleeEnv.ReturnLoc); } else if (!Call->getType()->isVoidType()) { if (CalleeEnv.ReturnVal != nullptr) setValue(*Call, *CalleeEnv.ReturnVal); } } void Environment::popCall(const CXXConstructExpr *Call, const Environment &CalleeEnv) { // See also comment in `popCall(const CallExpr *, const Environment &)` above. this->LocToVal = std::move(CalleeEnv.LocToVal); this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { setValue(*Call, *Val); } } bool Environment::equivalentTo(const Environment &Other, Environment::ValueModel &Model) const { assert(DACtx == Other.DACtx); if (ReturnVal != Other.ReturnVal) return false; if (ReturnLoc != Other.ReturnLoc) return false; if (ThisPointeeLoc != Other.ThisPointeeLoc) return false; if (DeclToLoc != Other.DeclToLoc) return false; if (ExprToLoc != Other.ExprToLoc) return false; if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) return false; if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) return false; return true; } LatticeJoinEffect Environment::widen(const Environment &PrevEnv, Environment::ValueModel &Model) { assert(DACtx == PrevEnv.DACtx); assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); auto Effect = LatticeJoinEffect::Unchanged; // By the API, `PrevEnv` is a previous version of the environment for the same // block, so we have some guarantees about its shape. In particular, it will // be the result of a join or widen operation on previous values for this // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain // this property here, we don't need change their current values to widen. assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, Model, Effect); LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, Model, Effect); if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || ExprToLoc.size() != PrevEnv.ExprToLoc.size() || ExprToVal.size() != PrevEnv.ExprToVal.size() || LocToVal.size() != PrevEnv.LocToVal.size()) Effect = LatticeJoinEffect::Changed; return Effect; } Environment Environment::join(const Environment &EnvA, const Environment &EnvB, Environment::ValueModel &Model) { assert(EnvA.DACtx == EnvB.DACtx); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); Environment JoinedEnv(*EnvA.DACtx); JoinedEnv.CallStack = EnvA.CallStack; JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { // `ReturnVal` might not always get set -- for example if we have a return // statement of the form `return some_other_func()` and we decide not to // analyze `some_other_func()`. // In this case, we can't say anything about the joined return value -- we // don't simply want to propagate the return value that we do have, because // it might not be the correct one. // This occurs for example in the test `ContextSensitiveMutualRecursion`. JoinedEnv.ReturnVal = nullptr; } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { JoinedEnv.ReturnVal = EnvA.ReturnVal; } else { assert(!EnvA.CallStack.empty()); // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this // cast. auto *Func = dyn_cast(EnvA.CallStack.back()); assert(Func != nullptr); if (Value *MergedVal = mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) JoinedEnv.ReturnVal = MergedVal; } if (EnvA.ReturnLoc == EnvB.ReturnLoc) JoinedEnv.ReturnLoc = EnvA.ReturnLoc; else JoinedEnv.ReturnLoc = nullptr; JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc); // FIXME: update join to detect backedges and simplify the flow condition // accordingly. JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( EnvA.FlowConditionToken, EnvB.FlowConditionToken); JoinedEnv.LocToVal = joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal` // empty, as we never need to access entries in these maps outside of the // basic block that sets them. return JoinedEnv; } StorageLocation &Environment::createStorageLocation(QualType Type) { return DACtx->createStorageLocation(Type); } StorageLocation &Environment::createStorageLocation(const ValueDecl &D) { // Evaluated declarations are always assigned the same storage locations to // ensure that the environment stabilizes across loop iterations. Storage // locations for evaluated declarations are stored in the analysis context. return DACtx->getStableStorageLocation(D); } StorageLocation &Environment::createStorageLocation(const Expr &E) { // Evaluated expressions are always assigned the same storage locations to // ensure that the environment stabilizes across loop iterations. Storage // locations for evaluated expressions are stored in the analysis context. return DACtx->getStableStorageLocation(E); } void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { assert(!DeclToLoc.contains(&D)); DeclToLoc[&D] = &Loc; } StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { auto It = DeclToLoc.find(&D); if (It == DeclToLoc.end()) return nullptr; StorageLocation *Loc = It->second; return Loc; } void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); } void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, // but we still want to be able to associate a `StorageLocation` with them, // so allow these as an exception. assert(E.isGLValue() || E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); const Expr &CanonE = ignoreCFGOmittedNodes(E); assert(!ExprToLoc.contains(&CanonE)); ExprToLoc[&CanonE] = &Loc; } StorageLocation *Environment::getStorageLocation(const Expr &E) const { // See comment in `setStorageLocation()`. assert(E.isGLValue() || E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); return It == ExprToLoc.end() ? nullptr : &*It->second; } // Returns whether a prvalue of record type is the one that originally // constructs the object (i.e. it doesn't propagate it from one of its // children). static bool isOriginalRecordConstructor(const Expr &RecordPRValue) { if (auto *Init = dyn_cast(&RecordPRValue)) return !Init->isSemanticForm() || !Init->isTransparent(); return isa(RecordPRValue) || isa(RecordPRValue) || isa(RecordPRValue) || isa(RecordPRValue) || // The framework currently does not propagate the objects created in // the two branches of a `ConditionalOperator` because there is no way // to reconcile their storage locations, which are different. We // therefore claim that the `ConditionalOperator` is the expression // that originally constructs the object. // Ultimately, this will be fixed by propagating locations down from // the result object, rather than up from the original constructor as // we do now (see also the FIXME in the documentation for // `getResultObjectLocation()`). isa(RecordPRValue); } RecordStorageLocation & Environment::getResultObjectLocation(const Expr &RecordPRValue) const { assert(RecordPRValue.getType()->isRecordType()); assert(RecordPRValue.isPRValue()); // Returns a storage location that we can use if assertions fail. auto FallbackForAssertFailure = [this, &RecordPRValue]() -> RecordStorageLocation & { return cast( DACtx->getStableStorageLocation(RecordPRValue)); }; if (isOriginalRecordConstructor(RecordPRValue)) { auto *Val = cast_or_null(getValue(RecordPRValue)); // The builtin transfer function should have created a `RecordValue` for all // original record constructors. assert(Val); if (!Val) return FallbackForAssertFailure(); return Val->getLoc(); } if (auto *Op = dyn_cast(&RecordPRValue); Op && Op->isCommaOp()) { return getResultObjectLocation(*Op->getRHS()); } // All other expression nodes that propagate a record prvalue should have // exactly one child. llvm::SmallVector children(RecordPRValue.child_begin(), RecordPRValue.child_end()); assert(children.size() == 1); if (children.empty()) return FallbackForAssertFailure(); return getResultObjectLocation(*cast(children[0])); } PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { return DACtx->getOrCreateNullPointerValue(PointeeType); } void Environment::setValue(const StorageLocation &Loc, Value &Val) { assert(!isa(&Val) || &cast(&Val)->getLoc() == &Loc); LocToVal[&Loc] = &Val; } void Environment::setValue(const Expr &E, Value &Val) { const Expr &CanonE = ignoreCFGOmittedNodes(E); if (auto *RecordVal = dyn_cast(&Val)) { assert(isOriginalRecordConstructor(CanonE) || &RecordVal->getLoc() == &getResultObjectLocation(CanonE)); } assert(CanonE.isPRValue()); ExprToVal[&CanonE] = &Val; } Value *Environment::getValue(const StorageLocation &Loc) const { return LocToVal.lookup(&Loc); } Value *Environment::getValue(const ValueDecl &D) const { auto *Loc = getStorageLocation(D); if (Loc == nullptr) return nullptr; return getValue(*Loc); } Value *Environment::getValue(const Expr &E) const { if (E.isPRValue()) { auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); return It == ExprToVal.end() ? nullptr : It->second; } auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); if (It == ExprToLoc.end()) return nullptr; return getValue(*It->second); } Value *Environment::createValue(QualType Type) { llvm::DenseSet Visited; int CreatedValuesCount = 0; Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, CreatedValuesCount); if (CreatedValuesCount > MaxCompositeValueSize) { llvm::errs() << "Attempting to initialize a huge value of type: " << Type << '\n'; } return Val; } Value *Environment::createValueUnlessSelfReferential( QualType Type, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount) { assert(!Type.isNull()); assert(!Type->isReferenceType()); // Allow unlimited fields at depth 1; only cap at deeper nesting levels. if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || Depth > MaxCompositeValueDepth) return nullptr; if (Type->isBooleanType()) { CreatedValuesCount++; return &makeAtomicBoolValue(); } if (Type->isIntegerType()) { // FIXME: consider instead `return nullptr`, given that we do nothing useful // with integers, and so distinguishing them serves no purpose, but could // prevent convergence. CreatedValuesCount++; return &arena().create(); } if (Type->isPointerType()) { CreatedValuesCount++; QualType PointeeType = Type->getPointeeType(); StorageLocation &PointeeLoc = createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); return &arena().create(PointeeLoc); } if (Type->isRecordType()) { CreatedValuesCount++; llvm::DenseMap FieldLocs; for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { assert(Field != nullptr); QualType FieldType = Field->getType(); FieldLocs.insert( {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, CreatedValuesCount)}); } RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs; for (const auto &Entry : DACtx->getSyntheticFields(Type)) { SyntheticFieldLocs.insert( {Entry.getKey(), &createLocAndMaybeValue(Entry.getValue(), Visited, Depth + 1, CreatedValuesCount)}); } RecordStorageLocation &Loc = DACtx->createRecordStorageLocation( Type, std::move(FieldLocs), std::move(SyntheticFieldLocs)); RecordValue &RecordVal = create(Loc); // As we already have a storage location for the `RecordValue`, we can and // should associate them in the environment. setValue(Loc, RecordVal); return &RecordVal; } return nullptr; } StorageLocation & Environment::createLocAndMaybeValue(QualType Ty, llvm::DenseSet &Visited, int Depth, int &CreatedValuesCount) { if (!Visited.insert(Ty.getCanonicalType()).second) return createStorageLocation(Ty.getNonReferenceType()); Value *Val = createValueUnlessSelfReferential( Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); Visited.erase(Ty.getCanonicalType()); Ty = Ty.getNonReferenceType(); if (Val == nullptr) return createStorageLocation(Ty); if (Ty->isRecordType()) return cast(Val)->getLoc(); StorageLocation &Loc = createStorageLocation(Ty); setValue(Loc, *Val); return Loc; } StorageLocation &Environment::createObjectInternal(const ValueDecl *D, QualType Ty, const Expr *InitExpr) { if (Ty->isReferenceType()) { // Although variables of reference type always need to be initialized, it // can happen that we can't see the initializer, so `InitExpr` may still // be null. if (InitExpr) { if (auto *InitExprLoc = getStorageLocation(*InitExpr)) return *InitExprLoc; } // Even though we have an initializer, we might not get an // InitExprLoc, for example if the InitExpr is a CallExpr for which we // don't have a function body. In this case, we just invent a storage // location and value -- it's the best we can do. return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); } Value *Val = nullptr; if (InitExpr) // In the (few) cases where an expression is intentionally // "uninterpreted", `InitExpr` is not associated with a value. There are // two ways to handle this situation: propagate the status, so that // uninterpreted initializers result in uninterpreted variables, or // provide a default value. We choose the latter so that later refinements // of the variable can be used for reasoning about the surrounding code. // For this reason, we let this case be handled by the `createValue()` // call below. // // FIXME. If and when we interpret all language cases, change this to // assert that `InitExpr` is interpreted, rather than supplying a // default value (assuming we don't update the environment API to return // references). Val = getValue(*InitExpr); if (!Val) Val = createValue(Ty); if (Ty->isRecordType()) return cast(Val)->getLoc(); StorageLocation &Loc = D ? createStorageLocation(*D) : createStorageLocation(Ty); if (Val) setValue(Loc, *Val); return Loc; } void Environment::assume(const Formula &F) { DACtx->addFlowConditionConstraint(FlowConditionToken, F); } bool Environment::proves(const Formula &F) const { return DACtx->flowConditionImplies(FlowConditionToken, F); } bool Environment::allows(const Formula &F) const { return DACtx->flowConditionAllows(FlowConditionToken, F); } void Environment::dump(raw_ostream &OS) const { // FIXME: add printing for remaining fields and allow caller to decide what // fields are printed. OS << "DeclToLoc:\n"; for (auto [D, L] : DeclToLoc) OS << " [" << D->getNameAsString() << ", " << L << "]\n"; OS << "ExprToLoc:\n"; for (auto [E, L] : ExprToLoc) OS << " [" << E << ", " << L << "]\n"; OS << "ExprToVal:\n"; for (auto [E, V] : ExprToVal) OS << " [" << E << ", " << V << ": " << *V << "]\n"; OS << "LocToVal:\n"; for (auto [L, V] : LocToVal) { OS << " [" << L << ", " << V << ": " << *V << "]\n"; } OS << "\n"; DACtx->dumpFlowCondition(FlowConditionToken, OS); } void Environment::dump() const { dump(llvm::dbgs()); } RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, const Environment &Env) { Expr *ImplicitObject = MCE.getImplicitObjectArgument(); if (ImplicitObject == nullptr) return nullptr; if (ImplicitObject->getType()->isPointerType()) { if (auto *Val = Env.get(*ImplicitObject)) return &cast(Val->getPointeeLoc()); return nullptr; } return cast_or_null( Env.getStorageLocation(*ImplicitObject)); } RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, const Environment &Env) { Expr *Base = ME.getBase(); if (Base == nullptr) return nullptr; if (ME.isArrow()) { if (auto *Val = Env.get(*Base)) return &cast(Val->getPointeeLoc()); return nullptr; } return Env.get(*Base); } std::vector getFieldsForInitListExpr(const RecordDecl *RD) { // Unnamed bitfields are only used for padding and do not appear in // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s // field list, and we thus need to remove them before mapping inits to // fields to avoid mapping inits to the wrongs fields. std::vector Fields; llvm::copy_if( RD->fields(), std::back_inserter(Fields), [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); return Fields; } RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { auto &NewVal = Env.create(Loc); Env.setValue(Loc, NewVal); return NewVal; } RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { assert(Expr.getType()->isRecordType()); if (Expr.isPRValue()) { if (auto *ExistingVal = Env.get(Expr)) { auto &NewVal = Env.create(ExistingVal->getLoc()); Env.setValue(Expr, NewVal); Env.setValue(NewVal.getLoc(), NewVal); return NewVal; } auto &NewVal = *cast(Env.createValue(Expr.getType())); Env.setValue(Expr, NewVal); return NewVal; } if (auto *Loc = Env.get(Expr)) { auto &NewVal = Env.create(*Loc); Env.setValue(*Loc, NewVal); return NewVal; } auto &NewVal = *cast(Env.createValue(Expr.getType())); Env.setStorageLocation(Expr, NewVal.getLoc()); return NewVal; } } // namespace dataflow } // namespace clang