//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines a DataflowAnalysisContext class that owns objects that // encompass the state of a program and stores context that is used during // dataflow analysis. // //===----------------------------------------------------------------------===// #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" #include "clang/Analysis/FlowSensitive/Formula.h" #include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include static llvm::cl::opt DataflowLog( "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " "log to stderr. With an arg, writes HTML logs under the " "specified directory (one per analyzed function).")); namespace clang { namespace dataflow { FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { // During context-sensitive analysis, a struct may be allocated in one // function, but its field accessed in a function lower in the stack than // the allocation. Since we only collect fields used in the function where // the allocation occurs, we can't apply that filter when performing // context-sensitive analysis. But, this only applies to storage locations, // since field access it not allowed to fail. In contrast, field *values* // don't need this allowance, since the API allows for uninitialized fields. if (Opts.ContextSensitiveOpts) return getObjectFields(Type); return llvm::set_intersection(getObjectFields(Type), ModeledFields); } void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { ModeledFields.set_union(Fields); } StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { if (!Type.isNull() && Type->isRecordType()) { llvm::DenseMap FieldLocs; for (const FieldDecl *Field : getModeledFields(Type)) if (Field->getType()->isReferenceType()) FieldLocs.insert({Field, nullptr}); else FieldLocs.insert({Field, &createStorageLocation( Field->getType().getNonReferenceType())}); RecordStorageLocation::SyntheticFieldMap SyntheticFields; for (const auto &Entry : getSyntheticFields(Type)) SyntheticFields.insert( {Entry.getKey(), &createStorageLocation(Entry.getValue().getNonReferenceType())}); return createRecordStorageLocation(Type, std::move(FieldLocs), std::move(SyntheticFields)); } return arena().create(Type); } // Returns the keys for a given `StringMap`. // Can't use `StringSet` as the return type as it doesn't support `operator==`. template static llvm::DenseSet getKeys(const llvm::StringMap &Map) { return llvm::DenseSet(Map.keys().begin(), Map.keys().end()); } RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields) { assert(Type->isRecordType()); assert(containsSameFields(getModeledFields(Type), FieldLocs)); assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); RecordStorageLocationCreated = true; return arena().create(Type, std::move(FieldLocs), std::move(SyntheticFields)); } StorageLocation & DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { if (auto *Loc = DeclToLoc.lookup(&D)) return *Loc; auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); DeclToLoc[&D] = &Loc; return Loc; } StorageLocation & DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { const Expr &CanonE = ignoreCFGOmittedNodes(E); if (auto *Loc = ExprToLoc.lookup(&CanonE)) return *Loc; auto &Loc = createStorageLocation(CanonE.getType()); ExprToLoc[&CanonE] = &Loc; return Loc; } PointerValue & DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { auto CanonicalPointeeType = PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); if (Res.second) { auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); Res.first->second = &arena().create(PointeeLoc); } return *Res.first->second; } void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { if (Invariant == nullptr) Invariant = &Constraint; else Invariant = &arena().makeAnd(*Invariant, Constraint); } void DataflowAnalysisContext::addFlowConditionConstraint( Atom Token, const Formula &Constraint) { auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); if (!Res.second) { Res.first->second = &arena().makeAnd(*Res.first->second, Constraint); } } Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { Atom ForkToken = arena().makeFlowConditionToken(); FlowConditionDeps[ForkToken].insert(Token); addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); return ForkToken; } Atom DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, Atom SecondToken) { Atom Token = arena().makeFlowConditionToken(); FlowConditionDeps[Token].insert(FirstToken); FlowConditionDeps[Token].insert(SecondToken); addFlowConditionConstraint(Token, arena().makeOr(arena().makeAtomRef(FirstToken), arena().makeAtomRef(SecondToken))); return Token; } Solver::Result DataflowAnalysisContext::querySolver( llvm::SetVector Constraints) { return S->solve(Constraints.getArrayRef()); } bool DataflowAnalysisContext::flowConditionImplies(Atom Token, const Formula &F) { if (F.isLiteral(true)) return true; // Returns true if and only if truth assignment of the flow condition implies // that `F` is also true. We prove whether or not this property holds by // reducing the problem to satisfiability checking. In other words, we attempt // to show that assuming `F` is false makes the constraints induced by the // flow condition unsatisfiable. llvm::SetVector Constraints; Constraints.insert(&arena().makeAtomRef(Token)); Constraints.insert(&arena().makeNot(F)); addTransitiveFlowConditionConstraints(Token, Constraints); return isUnsatisfiable(std::move(Constraints)); } bool DataflowAnalysisContext::flowConditionAllows(Atom Token, const Formula &F) { if (F.isLiteral(false)) return false; llvm::SetVector Constraints; Constraints.insert(&arena().makeAtomRef(Token)); Constraints.insert(&F); addTransitiveFlowConditionConstraints(Token, Constraints); return isSatisfiable(std::move(Constraints)); } bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, const Formula &Val2) { llvm::SetVector Constraints; Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); return isUnsatisfiable(std::move(Constraints)); } void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( Atom Token, llvm::SetVector &Constraints) { llvm::DenseSet AddedTokens; std::vector Remaining = {Token}; if (Invariant) Constraints.insert(Invariant); // Define all the flow conditions that might be referenced in constraints. while (!Remaining.empty()) { auto Token = Remaining.back(); Remaining.pop_back(); if (!AddedTokens.insert(Token).second) continue; auto ConstraintsIt = FlowConditionConstraints.find(Token); if (ConstraintsIt == FlowConditionConstraints.end()) { Constraints.insert(&arena().makeAtomRef(Token)); } else { // Bind flow condition token via `iff` to its set of constraints: // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), *ConstraintsIt->second)); } if (auto DepsIt = FlowConditionDeps.find(Token); DepsIt != FlowConditionDeps.end()) for (Atom A : DepsIt->second) Remaining.push_back(A); } } static void printAtomList(const llvm::SmallVector &Atoms, llvm::raw_ostream &OS) { OS << "("; for (size_t i = 0; i < Atoms.size(); ++i) { OS << Atoms[i]; if (i + 1 < Atoms.size()) OS << ", "; } OS << ")\n"; } void DataflowAnalysisContext::dumpFlowCondition(Atom Token, llvm::raw_ostream &OS) { llvm::SetVector Constraints; Constraints.insert(&arena().makeAtomRef(Token)); addTransitiveFlowConditionConstraints(Token, Constraints); OS << "Flow condition token: " << Token << "\n"; SimplifyConstraintsInfo Info; llvm::SetVector OriginalConstraints = Constraints; simplifyConstraints(Constraints, arena(), &Info); if (!Constraints.empty()) { OS << "Constraints:\n"; for (const auto *Constraint : Constraints) { Constraint->print(OS); OS << "\n"; } } if (!Info.TrueAtoms.empty()) { OS << "True atoms: "; printAtomList(Info.TrueAtoms, OS); } if (!Info.FalseAtoms.empty()) { OS << "False atoms: "; printAtomList(Info.FalseAtoms, OS); } if (!Info.EquivalentAtoms.empty()) { OS << "Equivalent atoms:\n"; for (const llvm::SmallVector &Class : Info.EquivalentAtoms) printAtomList(Class, OS); } OS << "\nFlow condition constraints before simplification:\n"; for (const auto *Constraint : OriginalConstraints) { Constraint->print(OS); OS << "\n"; } } const ControlFlowContext * DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { // Canonicalize the key: F = F->getDefinition(); if (F == nullptr) return nullptr; auto It = FunctionContexts.find(F); if (It != FunctionContexts.end()) return &It->second; if (F->doesThisDeclarationHaveABody()) { auto CFCtx = ControlFlowContext::build(*F); // FIXME: Handle errors. assert(CFCtx); auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); return &Result.first->second; } return nullptr; } static std::unique_ptr makeLoggerFromCommandLine() { if (DataflowLog.empty()) return Logger::textual(llvm::errs()); llvm::StringRef Dir = DataflowLog; if (auto EC = llvm::sys::fs::create_directories(Dir)) llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; // All analysis runs within a process will log to the same directory. // Share a counter so they don't all overwrite each other's 0.html. // (Don't share a logger, it's not threadsafe). static std::atomic Counter = {0}; auto StreamFactory = [Dir(Dir.str())]() mutable -> std::unique_ptr { llvm::SmallString<256> File(Dir); llvm::sys::path::append(File, std::to_string(Counter.fetch_add(1)) + ".html"); std::error_code EC; auto OS = std::make_unique(File, EC); if (EC) { llvm::errs() << "Failed to create log " << File << ": " << EC.message() << "\n"; return std::make_unique(); } return OS; }; return Logger::html(std::move(StreamFactory)); } DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr S, Options Opts) : S(std::move(S)), A(std::make_unique()), Opts(Opts) { assert(this->S != nullptr); // If the -dataflow-log command-line flag was set, synthesize a logger. // This is ugly but provides a uniform method for ad-hoc debugging dataflow- // based tools. if (Opts.Log == nullptr) { if (DataflowLog.getNumOccurrences() > 0) { LogOwner = makeLoggerFromCommandLine(); this->Opts.Log = LogOwner.get(); // FIXME: if the flag is given a value, write an HTML log to a file. } else { this->Opts.Log = &Logger::null(); } } } DataflowAnalysisContext::~DataflowAnalysisContext() = default; } // namespace dataflow } // namespace clang using namespace clang; const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { const Expr *Current = &E; if (auto *EWC = dyn_cast(Current)) { Current = EWC->getSubExpr(); assert(Current != nullptr); } Current = Current->IgnoreParens(); assert(Current != nullptr); return *Current; } const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { if (auto *E = dyn_cast(&S)) return ignoreCFGOmittedNodes(*E); return S; } // FIXME: Does not precisely handle non-virtual diamond inheritance. A single // field decl will be modeled for all instances of the inherited field. static void getFieldsFromClassHierarchy(QualType Type, clang::dataflow::FieldSet &Fields) { if (Type->isIncompleteType() || Type->isDependentType() || !Type->isRecordType()) return; for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) Fields.insert(Field); if (auto *CXXRecord = Type->getAsCXXRecordDecl()) for (const CXXBaseSpecifier &Base : CXXRecord->bases()) getFieldsFromClassHierarchy(Base.getType(), Fields); } /// Gets the set of all fields in the type. clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { FieldSet Fields; getFieldsFromClassHierarchy(Type, Fields); return Fields; } bool clang::dataflow::containsSameFields( const clang::dataflow::FieldSet &Fields, const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) { if (Fields.size() != FieldLocs.size()) return false; for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) if (!Fields.contains(cast_or_null(Field))) return false; return true; }