1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/SetOperations.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 #include <memory> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 static llvm::cl::opt<std::string> DataflowLog( 35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37 "log to stderr. With an arg, writes HTML logs under the " 38 "specified directory (one per analyzed function).")); 39 40 namespace clang { 41 namespace dataflow { 42 43 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 44 // During context-sensitive analysis, a struct may be allocated in one 45 // function, but its field accessed in a function lower in the stack than 46 // the allocation. Since we only collect fields used in the function where 47 // the allocation occurs, we can't apply that filter when performing 48 // context-sensitive analysis. But, this only applies to storage locations, 49 // since field access it not allowed to fail. In contrast, field *values* 50 // don't need this allowance, since the API allows for uninitialized fields. 51 if (Opts.ContextSensitiveOpts) 52 return getObjectFields(Type); 53 54 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 55 } 56 57 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 58 ModeledFields.set_union(Fields); 59 } 60 61 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 62 if (!Type.isNull() && Type->isRecordType()) { 63 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 64 for (const FieldDecl *Field : getModeledFields(Type)) 65 if (Field->getType()->isReferenceType()) 66 FieldLocs.insert({Field, nullptr}); 67 else 68 FieldLocs.insert({Field, &createStorageLocation( 69 Field->getType().getNonReferenceType())}); 70 return arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); 71 } 72 return arena().create<ScalarStorageLocation>(Type); 73 } 74 75 StorageLocation & 76 DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { 77 if (auto *Loc = getStorageLocation(D)) 78 return *Loc; 79 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 80 setStorageLocation(D, Loc); 81 return Loc; 82 } 83 84 StorageLocation & 85 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 86 if (auto *Loc = getStorageLocation(E)) 87 return *Loc; 88 auto &Loc = createStorageLocation(E.getType()); 89 setStorageLocation(E, Loc); 90 return Loc; 91 } 92 93 PointerValue & 94 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 95 auto CanonicalPointeeType = 96 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 97 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 98 if (Res.second) { 99 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 100 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 101 } 102 return *Res.first->second; 103 } 104 105 void DataflowAnalysisContext::addFlowConditionConstraint( 106 Atom Token, const Formula &Constraint) { 107 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 108 if (!Res.second) { 109 Res.first->second = 110 &arena().makeAnd(*Res.first->second, Constraint); 111 } 112 } 113 114 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 115 Atom ForkToken = arena().makeFlowConditionToken(); 116 FlowConditionDeps[ForkToken].insert(Token); 117 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 118 return ForkToken; 119 } 120 121 Atom 122 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 123 Atom SecondToken) { 124 Atom Token = arena().makeFlowConditionToken(); 125 FlowConditionDeps[Token].insert(FirstToken); 126 FlowConditionDeps[Token].insert(SecondToken); 127 addFlowConditionConstraint(Token, 128 arena().makeOr(arena().makeAtomRef(FirstToken), 129 arena().makeAtomRef(SecondToken))); 130 return Token; 131 } 132 133 Solver::Result DataflowAnalysisContext::querySolver( 134 llvm::SetVector<const Formula *> Constraints) { 135 Constraints.insert(&arena().makeLiteral(true)); 136 Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); 137 return S->solve(Constraints.getArrayRef()); 138 } 139 140 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 141 const Formula &Val) { 142 // Returns true if and only if truth assignment of the flow condition implies 143 // that `Val` is also true. We prove whether or not this property holds by 144 // reducing the problem to satisfiability checking. In other words, we attempt 145 // to show that assuming `Val` is false makes the constraints induced by the 146 // flow condition unsatisfiable. 147 llvm::SetVector<const Formula *> Constraints; 148 Constraints.insert(&arena().makeAtomRef(Token)); 149 Constraints.insert(&arena().makeNot(Val)); 150 llvm::DenseSet<Atom> VisitedTokens; 151 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 152 return isUnsatisfiable(std::move(Constraints)); 153 } 154 155 bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { 156 // Returns true if and only if we cannot prove that the flow condition can 157 // ever be false. 158 llvm::SetVector<const Formula *> Constraints; 159 Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); 160 llvm::DenseSet<Atom> VisitedTokens; 161 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 162 return isUnsatisfiable(std::move(Constraints)); 163 } 164 165 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 166 const Formula &Val2) { 167 llvm::SetVector<const Formula *> Constraints; 168 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 169 return isUnsatisfiable(std::move(Constraints)); 170 } 171 172 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 173 Atom Token, llvm::SetVector<const Formula *> &Constraints, 174 llvm::DenseSet<Atom> &VisitedTokens) { 175 auto Res = VisitedTokens.insert(Token); 176 if (!Res.second) 177 return; 178 179 auto ConstraintsIt = FlowConditionConstraints.find(Token); 180 if (ConstraintsIt == FlowConditionConstraints.end()) { 181 Constraints.insert(&arena().makeAtomRef(Token)); 182 } else { 183 // Bind flow condition token via `iff` to its set of constraints: 184 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 185 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 186 *ConstraintsIt->second)); 187 } 188 189 auto DepsIt = FlowConditionDeps.find(Token); 190 if (DepsIt != FlowConditionDeps.end()) { 191 for (Atom DepToken : DepsIt->second) { 192 addTransitiveFlowConditionConstraints(DepToken, Constraints, 193 VisitedTokens); 194 } 195 } 196 } 197 198 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 199 llvm::raw_ostream &OS) { 200 llvm::SetVector<const Formula *> Constraints; 201 Constraints.insert(&arena().makeAtomRef(Token)); 202 llvm::DenseSet<Atom> VisitedTokens; 203 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 204 205 // TODO: have formulas know about true/false directly instead 206 Atom True = arena().makeLiteral(true).getAtom(); 207 Atom False = arena().makeLiteral(false).getAtom(); 208 Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; 209 210 for (const auto *Constraint : Constraints) { 211 Constraint->print(OS, &Names); 212 OS << "\n"; 213 } 214 } 215 216 const ControlFlowContext * 217 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 218 // Canonicalize the key: 219 F = F->getDefinition(); 220 if (F == nullptr) 221 return nullptr; 222 auto It = FunctionContexts.find(F); 223 if (It != FunctionContexts.end()) 224 return &It->second; 225 226 if (F->hasBody()) { 227 auto CFCtx = ControlFlowContext::build(*F); 228 // FIXME: Handle errors. 229 assert(CFCtx); 230 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 231 return &Result.first->second; 232 } 233 234 return nullptr; 235 } 236 237 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 238 if (DataflowLog.empty()) 239 return Logger::textual(llvm::errs()); 240 241 llvm::StringRef Dir = DataflowLog; 242 if (auto EC = llvm::sys::fs::create_directories(Dir)) 243 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 244 // All analysis runs within a process will log to the same directory. 245 // Share a counter so they don't all overwrite each other's 0.html. 246 // (Don't share a logger, it's not threadsafe). 247 static std::atomic<unsigned> Counter = {0}; 248 auto StreamFactory = 249 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 250 llvm::SmallString<256> File(Dir); 251 llvm::sys::path::append(File, 252 std::to_string(Counter.fetch_add(1)) + ".html"); 253 std::error_code EC; 254 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 255 if (EC) { 256 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 257 << "\n"; 258 return std::make_unique<llvm::raw_null_ostream>(); 259 } 260 return OS; 261 }; 262 return Logger::html(std::move(StreamFactory)); 263 } 264 265 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 266 Options Opts) 267 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 268 assert(this->S != nullptr); 269 // If the -dataflow-log command-line flag was set, synthesize a logger. 270 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 271 // based tools. 272 if (Opts.Log == nullptr) { 273 if (DataflowLog.getNumOccurrences() > 0) { 274 LogOwner = makeLoggerFromCommandLine(); 275 this->Opts.Log = LogOwner.get(); 276 // FIXME: if the flag is given a value, write an HTML log to a file. 277 } else { 278 this->Opts.Log = &Logger::null(); 279 } 280 } 281 } 282 283 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 284 285 } // namespace dataflow 286 } // namespace clang 287 288 using namespace clang; 289 290 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 291 const Expr *Current = &E; 292 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 293 Current = EWC->getSubExpr(); 294 assert(Current != nullptr); 295 } 296 Current = Current->IgnoreParens(); 297 assert(Current != nullptr); 298 return *Current; 299 } 300 301 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 302 if (auto *E = dyn_cast<Expr>(&S)) 303 return ignoreCFGOmittedNodes(*E); 304 return S; 305 } 306 307 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 308 // field decl will be modeled for all instances of the inherited field. 309 static void getFieldsFromClassHierarchy(QualType Type, 310 clang::dataflow::FieldSet &Fields) { 311 if (Type->isIncompleteType() || Type->isDependentType() || 312 !Type->isRecordType()) 313 return; 314 315 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 316 Fields.insert(Field); 317 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 318 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 319 getFieldsFromClassHierarchy(Base.getType(), Fields); 320 } 321 322 /// Gets the set of all fields in the type. 323 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 324 FieldSet Fields; 325 getFieldsFromClassHierarchy(Type, Fields); 326 return Fields; 327 } 328