181ad6265SDimitry Andric //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric // 981ad6265SDimitry Andric // This file defines a DataflowAnalysisContext class that owns objects that 1081ad6265SDimitry Andric // encompass the state of a program and stores context that is used during 1181ad6265SDimitry Andric // dataflow analysis. 1281ad6265SDimitry Andric // 1381ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1481ad6265SDimitry Andric 1581ad6265SDimitry Andric #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 1681ad6265SDimitry Andric #include "clang/AST/ExprCXX.h" 17fcaf7f86SDimitry Andric #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Formula.h" 19*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Logger.h" 2081ad6265SDimitry Andric #include "clang/Analysis/FlowSensitive/Value.h" 21bdd1243dSDimitry Andric #include "llvm/ADT/SetOperations.h" 22*06c3fb27SDimitry Andric #include "llvm/ADT/SetVector.h" 23*06c3fb27SDimitry Andric #include "llvm/Support/CommandLine.h" 24fcaf7f86SDimitry Andric #include "llvm/Support/Debug.h" 25*06c3fb27SDimitry Andric #include "llvm/Support/FileSystem.h" 26*06c3fb27SDimitry Andric #include "llvm/Support/Path.h" 27*06c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 2881ad6265SDimitry Andric #include <cassert> 2981ad6265SDimitry Andric #include <memory> 30*06c3fb27SDimitry Andric #include <string> 3181ad6265SDimitry Andric #include <utility> 32*06c3fb27SDimitry Andric #include <vector> 33*06c3fb27SDimitry Andric 34*06c3fb27SDimitry Andric static llvm::cl::opt<std::string> DataflowLog( 35*06c3fb27SDimitry Andric "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36*06c3fb27SDimitry Andric llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37*06c3fb27SDimitry Andric "log to stderr. With an arg, writes HTML logs under the " 38*06c3fb27SDimitry Andric "specified directory (one per analyzed function).")); 3981ad6265SDimitry Andric 4081ad6265SDimitry Andric namespace clang { 4181ad6265SDimitry Andric namespace dataflow { 4281ad6265SDimitry Andric 43*06c3fb27SDimitry Andric FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 44bdd1243dSDimitry Andric // During context-sensitive analysis, a struct may be allocated in one 45bdd1243dSDimitry Andric // function, but its field accessed in a function lower in the stack than 46bdd1243dSDimitry Andric // the allocation. Since we only collect fields used in the function where 47bdd1243dSDimitry Andric // the allocation occurs, we can't apply that filter when performing 48bdd1243dSDimitry Andric // context-sensitive analysis. But, this only applies to storage locations, 49bdd1243dSDimitry Andric // since field access it not allowed to fail. In contrast, field *values* 50bdd1243dSDimitry Andric // don't need this allowance, since the API allows for uninitialized fields. 51*06c3fb27SDimitry Andric if (Opts.ContextSensitiveOpts) 52*06c3fb27SDimitry Andric return getObjectFields(Type); 53*06c3fb27SDimitry Andric 54*06c3fb27SDimitry Andric return llvm::set_intersection(getObjectFields(Type), ModeledFields); 5581ad6265SDimitry Andric } 56*06c3fb27SDimitry Andric 57*06c3fb27SDimitry Andric void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 58*06c3fb27SDimitry Andric ModeledFields.set_union(Fields); 59*06c3fb27SDimitry Andric } 60*06c3fb27SDimitry Andric 61*06c3fb27SDimitry Andric StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 62*06c3fb27SDimitry Andric if (!Type.isNull() && Type->isRecordType()) { 63*06c3fb27SDimitry Andric llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 64*06c3fb27SDimitry Andric for (const FieldDecl *Field : getModeledFields(Type)) 65*06c3fb27SDimitry Andric if (Field->getType()->isReferenceType()) 66*06c3fb27SDimitry Andric FieldLocs.insert({Field, nullptr}); 67*06c3fb27SDimitry Andric else 68*06c3fb27SDimitry Andric FieldLocs.insert({Field, &createStorageLocation( 69*06c3fb27SDimitry Andric Field->getType().getNonReferenceType())}); 70*06c3fb27SDimitry Andric return arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); 71*06c3fb27SDimitry Andric } 72*06c3fb27SDimitry Andric return arena().create<ScalarStorageLocation>(Type); 7381ad6265SDimitry Andric } 7481ad6265SDimitry Andric 7581ad6265SDimitry Andric StorageLocation & 7681ad6265SDimitry Andric DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { 7781ad6265SDimitry Andric if (auto *Loc = getStorageLocation(D)) 7881ad6265SDimitry Andric return *Loc; 79*06c3fb27SDimitry Andric auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 8081ad6265SDimitry Andric setStorageLocation(D, Loc); 8181ad6265SDimitry Andric return Loc; 8281ad6265SDimitry Andric } 8381ad6265SDimitry Andric 8481ad6265SDimitry Andric StorageLocation & 8581ad6265SDimitry Andric DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 8681ad6265SDimitry Andric if (auto *Loc = getStorageLocation(E)) 8781ad6265SDimitry Andric return *Loc; 88bdd1243dSDimitry Andric auto &Loc = createStorageLocation(E.getType()); 8981ad6265SDimitry Andric setStorageLocation(E, Loc); 9081ad6265SDimitry Andric return Loc; 9181ad6265SDimitry Andric } 9281ad6265SDimitry Andric 9381ad6265SDimitry Andric PointerValue & 9481ad6265SDimitry Andric DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 95753f127fSDimitry Andric auto CanonicalPointeeType = 96753f127fSDimitry Andric PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 9781ad6265SDimitry Andric auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 9881ad6265SDimitry Andric if (Res.second) { 99bdd1243dSDimitry Andric auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 100*06c3fb27SDimitry Andric Res.first->second = &arena().create<PointerValue>(PointeeLoc); 10181ad6265SDimitry Andric } 10281ad6265SDimitry Andric return *Res.first->second; 10381ad6265SDimitry Andric } 10481ad6265SDimitry Andric 10581ad6265SDimitry Andric void DataflowAnalysisContext::addFlowConditionConstraint( 106*06c3fb27SDimitry Andric Atom Token, const Formula &Constraint) { 107*06c3fb27SDimitry Andric auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 10881ad6265SDimitry Andric if (!Res.second) { 109*06c3fb27SDimitry Andric Res.first->second = 110*06c3fb27SDimitry Andric &arena().makeAnd(*Res.first->second, Constraint); 11181ad6265SDimitry Andric } 11281ad6265SDimitry Andric } 11381ad6265SDimitry Andric 114*06c3fb27SDimitry Andric Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 115*06c3fb27SDimitry Andric Atom ForkToken = arena().makeFlowConditionToken(); 116*06c3fb27SDimitry Andric FlowConditionDeps[ForkToken].insert(Token); 117*06c3fb27SDimitry Andric addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 11881ad6265SDimitry Andric return ForkToken; 11981ad6265SDimitry Andric } 12081ad6265SDimitry Andric 121*06c3fb27SDimitry Andric Atom 122*06c3fb27SDimitry Andric DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 123*06c3fb27SDimitry Andric Atom SecondToken) { 124*06c3fb27SDimitry Andric Atom Token = arena().makeFlowConditionToken(); 125*06c3fb27SDimitry Andric FlowConditionDeps[Token].insert(FirstToken); 126*06c3fb27SDimitry Andric FlowConditionDeps[Token].insert(SecondToken); 12781ad6265SDimitry Andric addFlowConditionConstraint(Token, 128*06c3fb27SDimitry Andric arena().makeOr(arena().makeAtomRef(FirstToken), 129*06c3fb27SDimitry Andric arena().makeAtomRef(SecondToken))); 13081ad6265SDimitry Andric return Token; 13181ad6265SDimitry Andric } 13281ad6265SDimitry Andric 133*06c3fb27SDimitry Andric Solver::Result DataflowAnalysisContext::querySolver( 134*06c3fb27SDimitry Andric llvm::SetVector<const Formula *> Constraints) { 135*06c3fb27SDimitry Andric Constraints.insert(&arena().makeLiteral(true)); 136*06c3fb27SDimitry Andric Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); 137*06c3fb27SDimitry Andric return S->solve(Constraints.getArrayRef()); 13881ad6265SDimitry Andric } 13981ad6265SDimitry Andric 140*06c3fb27SDimitry Andric bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 141*06c3fb27SDimitry Andric const Formula &Val) { 14281ad6265SDimitry Andric // Returns true if and only if truth assignment of the flow condition implies 14381ad6265SDimitry Andric // that `Val` is also true. We prove whether or not this property holds by 14481ad6265SDimitry Andric // reducing the problem to satisfiability checking. In other words, we attempt 14581ad6265SDimitry Andric // to show that assuming `Val` is false makes the constraints induced by the 14681ad6265SDimitry Andric // flow condition unsatisfiable. 147*06c3fb27SDimitry Andric llvm::SetVector<const Formula *> Constraints; 148*06c3fb27SDimitry Andric Constraints.insert(&arena().makeAtomRef(Token)); 149*06c3fb27SDimitry Andric Constraints.insert(&arena().makeNot(Val)); 150*06c3fb27SDimitry Andric llvm::DenseSet<Atom> VisitedTokens; 15181ad6265SDimitry Andric addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 15281ad6265SDimitry Andric return isUnsatisfiable(std::move(Constraints)); 15381ad6265SDimitry Andric } 15481ad6265SDimitry Andric 155*06c3fb27SDimitry Andric bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { 15681ad6265SDimitry Andric // Returns true if and only if we cannot prove that the flow condition can 15781ad6265SDimitry Andric // ever be false. 158*06c3fb27SDimitry Andric llvm::SetVector<const Formula *> Constraints; 159*06c3fb27SDimitry Andric Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); 160*06c3fb27SDimitry Andric llvm::DenseSet<Atom> VisitedTokens; 16181ad6265SDimitry Andric addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 16281ad6265SDimitry Andric return isUnsatisfiable(std::move(Constraints)); 16381ad6265SDimitry Andric } 16481ad6265SDimitry Andric 165*06c3fb27SDimitry Andric bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 166*06c3fb27SDimitry Andric const Formula &Val2) { 167*06c3fb27SDimitry Andric llvm::SetVector<const Formula *> Constraints; 168*06c3fb27SDimitry Andric Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 169*06c3fb27SDimitry Andric return isUnsatisfiable(std::move(Constraints)); 17081ad6265SDimitry Andric } 17181ad6265SDimitry Andric 17281ad6265SDimitry Andric void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 173*06c3fb27SDimitry Andric Atom Token, llvm::SetVector<const Formula *> &Constraints, 174*06c3fb27SDimitry Andric llvm::DenseSet<Atom> &VisitedTokens) { 175*06c3fb27SDimitry Andric auto Res = VisitedTokens.insert(Token); 17681ad6265SDimitry Andric if (!Res.second) 17781ad6265SDimitry Andric return; 17881ad6265SDimitry Andric 179*06c3fb27SDimitry Andric auto ConstraintsIt = FlowConditionConstraints.find(Token); 180972a253aSDimitry Andric if (ConstraintsIt == FlowConditionConstraints.end()) { 181*06c3fb27SDimitry Andric Constraints.insert(&arena().makeAtomRef(Token)); 18281ad6265SDimitry Andric } else { 18381ad6265SDimitry Andric // Bind flow condition token via `iff` to its set of constraints: 18481ad6265SDimitry Andric // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 185*06c3fb27SDimitry Andric Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 186*06c3fb27SDimitry Andric *ConstraintsIt->second)); 18781ad6265SDimitry Andric } 18881ad6265SDimitry Andric 189*06c3fb27SDimitry Andric auto DepsIt = FlowConditionDeps.find(Token); 190972a253aSDimitry Andric if (DepsIt != FlowConditionDeps.end()) { 191*06c3fb27SDimitry Andric for (Atom DepToken : DepsIt->second) { 192*06c3fb27SDimitry Andric addTransitiveFlowConditionConstraints(DepToken, Constraints, 19381ad6265SDimitry Andric VisitedTokens); 19481ad6265SDimitry Andric } 19581ad6265SDimitry Andric } 19681ad6265SDimitry Andric } 19781ad6265SDimitry Andric 198*06c3fb27SDimitry Andric void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 199*06c3fb27SDimitry Andric llvm::raw_ostream &OS) { 200*06c3fb27SDimitry Andric llvm::SetVector<const Formula *> Constraints; 201*06c3fb27SDimitry Andric Constraints.insert(&arena().makeAtomRef(Token)); 202*06c3fb27SDimitry Andric llvm::DenseSet<Atom> VisitedTokens; 203fcaf7f86SDimitry Andric addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 204fcaf7f86SDimitry Andric 205*06c3fb27SDimitry Andric // TODO: have formulas know about true/false directly instead 206*06c3fb27SDimitry Andric Atom True = arena().makeLiteral(true).getAtom(); 207*06c3fb27SDimitry Andric Atom False = arena().makeLiteral(false).getAtom(); 208*06c3fb27SDimitry Andric Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; 209*06c3fb27SDimitry Andric 210*06c3fb27SDimitry Andric for (const auto *Constraint : Constraints) { 211*06c3fb27SDimitry Andric Constraint->print(OS, &Names); 212*06c3fb27SDimitry Andric OS << "\n"; 213*06c3fb27SDimitry Andric } 214fcaf7f86SDimitry Andric } 215fcaf7f86SDimitry Andric 216bdd1243dSDimitry Andric const ControlFlowContext * 217bdd1243dSDimitry Andric DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 218bdd1243dSDimitry Andric // Canonicalize the key: 219bdd1243dSDimitry Andric F = F->getDefinition(); 220bdd1243dSDimitry Andric if (F == nullptr) 221bdd1243dSDimitry Andric return nullptr; 222bdd1243dSDimitry Andric auto It = FunctionContexts.find(F); 223bdd1243dSDimitry Andric if (It != FunctionContexts.end()) 224bdd1243dSDimitry Andric return &It->second; 225bdd1243dSDimitry Andric 226*06c3fb27SDimitry Andric if (F->hasBody()) { 227*06c3fb27SDimitry Andric auto CFCtx = ControlFlowContext::build(*F); 228bdd1243dSDimitry Andric // FIXME: Handle errors. 229bdd1243dSDimitry Andric assert(CFCtx); 230bdd1243dSDimitry Andric auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 231bdd1243dSDimitry Andric return &Result.first->second; 232bdd1243dSDimitry Andric } 233bdd1243dSDimitry Andric 234bdd1243dSDimitry Andric return nullptr; 235bdd1243dSDimitry Andric } 236bdd1243dSDimitry Andric 237*06c3fb27SDimitry Andric static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 238*06c3fb27SDimitry Andric if (DataflowLog.empty()) 239*06c3fb27SDimitry Andric return Logger::textual(llvm::errs()); 240*06c3fb27SDimitry Andric 241*06c3fb27SDimitry Andric llvm::StringRef Dir = DataflowLog; 242*06c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::create_directories(Dir)) 243*06c3fb27SDimitry Andric llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 244*06c3fb27SDimitry Andric // All analysis runs within a process will log to the same directory. 245*06c3fb27SDimitry Andric // Share a counter so they don't all overwrite each other's 0.html. 246*06c3fb27SDimitry Andric // (Don't share a logger, it's not threadsafe). 247*06c3fb27SDimitry Andric static std::atomic<unsigned> Counter = {0}; 248*06c3fb27SDimitry Andric auto StreamFactory = 249*06c3fb27SDimitry Andric [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 250*06c3fb27SDimitry Andric llvm::SmallString<256> File(Dir); 251*06c3fb27SDimitry Andric llvm::sys::path::append(File, 252*06c3fb27SDimitry Andric std::to_string(Counter.fetch_add(1)) + ".html"); 253*06c3fb27SDimitry Andric std::error_code EC; 254*06c3fb27SDimitry Andric auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 255*06c3fb27SDimitry Andric if (EC) { 256*06c3fb27SDimitry Andric llvm::errs() << "Failed to create log " << File << ": " << EC.message() 257*06c3fb27SDimitry Andric << "\n"; 258*06c3fb27SDimitry Andric return std::make_unique<llvm::raw_null_ostream>(); 259*06c3fb27SDimitry Andric } 260*06c3fb27SDimitry Andric return OS; 261*06c3fb27SDimitry Andric }; 262*06c3fb27SDimitry Andric return Logger::html(std::move(StreamFactory)); 263*06c3fb27SDimitry Andric } 264*06c3fb27SDimitry Andric 265*06c3fb27SDimitry Andric DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 266*06c3fb27SDimitry Andric Options Opts) 267*06c3fb27SDimitry Andric : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 268*06c3fb27SDimitry Andric assert(this->S != nullptr); 269*06c3fb27SDimitry Andric // If the -dataflow-log command-line flag was set, synthesize a logger. 270*06c3fb27SDimitry Andric // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 271*06c3fb27SDimitry Andric // based tools. 272*06c3fb27SDimitry Andric if (Opts.Log == nullptr) { 273*06c3fb27SDimitry Andric if (DataflowLog.getNumOccurrences() > 0) { 274*06c3fb27SDimitry Andric LogOwner = makeLoggerFromCommandLine(); 275*06c3fb27SDimitry Andric this->Opts.Log = LogOwner.get(); 276*06c3fb27SDimitry Andric // FIXME: if the flag is given a value, write an HTML log to a file. 277*06c3fb27SDimitry Andric } else { 278*06c3fb27SDimitry Andric this->Opts.Log = &Logger::null(); 279*06c3fb27SDimitry Andric } 280*06c3fb27SDimitry Andric } 281*06c3fb27SDimitry Andric } 282*06c3fb27SDimitry Andric 283*06c3fb27SDimitry Andric DataflowAnalysisContext::~DataflowAnalysisContext() = default; 284*06c3fb27SDimitry Andric 28581ad6265SDimitry Andric } // namespace dataflow 28681ad6265SDimitry Andric } // namespace clang 28781ad6265SDimitry Andric 28881ad6265SDimitry Andric using namespace clang; 28981ad6265SDimitry Andric 29081ad6265SDimitry Andric const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 29181ad6265SDimitry Andric const Expr *Current = &E; 29281ad6265SDimitry Andric if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 29381ad6265SDimitry Andric Current = EWC->getSubExpr(); 29481ad6265SDimitry Andric assert(Current != nullptr); 29581ad6265SDimitry Andric } 29681ad6265SDimitry Andric Current = Current->IgnoreParens(); 29781ad6265SDimitry Andric assert(Current != nullptr); 29881ad6265SDimitry Andric return *Current; 29981ad6265SDimitry Andric } 30081ad6265SDimitry Andric 30181ad6265SDimitry Andric const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 30281ad6265SDimitry Andric if (auto *E = dyn_cast<Expr>(&S)) 30381ad6265SDimitry Andric return ignoreCFGOmittedNodes(*E); 30481ad6265SDimitry Andric return S; 30581ad6265SDimitry Andric } 30681ad6265SDimitry Andric 30781ad6265SDimitry Andric // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 30881ad6265SDimitry Andric // field decl will be modeled for all instances of the inherited field. 309*06c3fb27SDimitry Andric static void getFieldsFromClassHierarchy(QualType Type, 310*06c3fb27SDimitry Andric clang::dataflow::FieldSet &Fields) { 31181ad6265SDimitry Andric if (Type->isIncompleteType() || Type->isDependentType() || 31281ad6265SDimitry Andric !Type->isRecordType()) 31381ad6265SDimitry Andric return; 31481ad6265SDimitry Andric 31581ad6265SDimitry Andric for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 31681ad6265SDimitry Andric Fields.insert(Field); 31781ad6265SDimitry Andric if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 31881ad6265SDimitry Andric for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 31981ad6265SDimitry Andric getFieldsFromClassHierarchy(Base.getType(), Fields); 32081ad6265SDimitry Andric } 32181ad6265SDimitry Andric 32281ad6265SDimitry Andric /// Gets the set of all fields in the type. 323*06c3fb27SDimitry Andric clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 324*06c3fb27SDimitry Andric FieldSet Fields; 32581ad6265SDimitry Andric getFieldsFromClassHierarchy(Type, Fields); 32681ad6265SDimitry Andric return Fields; 32781ad6265SDimitry Andric } 328