1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" 21 #include "clang/Analysis/FlowSensitive/Value.h" 22 #include "llvm/ADT/SetOperations.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 #include <memory> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 static llvm::cl::opt<std::string> DataflowLog( 36 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 37 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 38 "log to stderr. With an arg, writes HTML logs under the " 39 "specified directory (one per analyzed function).")); 40 41 namespace clang { 42 namespace dataflow { 43 44 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 45 // During context-sensitive analysis, a struct may be allocated in one 46 // function, but its field accessed in a function lower in the stack than 47 // the allocation. Since we only collect fields used in the function where 48 // the allocation occurs, we can't apply that filter when performing 49 // context-sensitive analysis. But, this only applies to storage locations, 50 // since field access it not allowed to fail. In contrast, field *values* 51 // don't need this allowance, since the API allows for uninitialized fields. 52 if (Opts.ContextSensitiveOpts) 53 return getObjectFields(Type); 54 55 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 56 } 57 58 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 59 ModeledFields.set_union(Fields); 60 } 61 62 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 63 if (!Type.isNull() && Type->isRecordType()) { 64 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 65 for (const FieldDecl *Field : getModeledFields(Type)) 66 if (Field->getType()->isReferenceType()) 67 FieldLocs.insert({Field, nullptr}); 68 else 69 FieldLocs.insert({Field, &createStorageLocation( 70 Field->getType().getNonReferenceType())}); 71 72 RecordStorageLocation::SyntheticFieldMap SyntheticFields; 73 for (const auto &Entry : getSyntheticFields(Type)) 74 SyntheticFields.insert( 75 {Entry.getKey(), 76 &createStorageLocation(Entry.getValue().getNonReferenceType())}); 77 78 return createRecordStorageLocation(Type, std::move(FieldLocs), 79 std::move(SyntheticFields)); 80 } 81 return arena().create<ScalarStorageLocation>(Type); 82 } 83 84 // Returns the keys for a given `StringMap`. 85 // Can't use `StringSet` as the return type as it doesn't support `operator==`. 86 template <typename T> 87 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { 88 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); 89 } 90 91 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( 92 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 93 RecordStorageLocation::SyntheticFieldMap SyntheticFields) { 94 assert(Type->isRecordType()); 95 assert(containsSameFields(getModeledFields(Type), FieldLocs)); 96 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); 97 98 RecordStorageLocationCreated = true; 99 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), 100 std::move(SyntheticFields)); 101 } 102 103 StorageLocation & 104 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { 105 if (auto *Loc = DeclToLoc.lookup(&D)) 106 return *Loc; 107 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 108 DeclToLoc[&D] = &Loc; 109 return Loc; 110 } 111 112 StorageLocation & 113 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 114 const Expr &CanonE = ignoreCFGOmittedNodes(E); 115 116 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 117 return *Loc; 118 auto &Loc = createStorageLocation(CanonE.getType()); 119 ExprToLoc[&CanonE] = &Loc; 120 return Loc; 121 } 122 123 PointerValue & 124 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 125 auto CanonicalPointeeType = 126 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 127 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 128 if (Res.second) { 129 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 130 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 131 } 132 return *Res.first->second; 133 } 134 135 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 136 if (Invariant == nullptr) 137 Invariant = &Constraint; 138 else 139 Invariant = &arena().makeAnd(*Invariant, Constraint); 140 } 141 142 void DataflowAnalysisContext::addFlowConditionConstraint( 143 Atom Token, const Formula &Constraint) { 144 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 145 if (!Res.second) { 146 Res.first->second = 147 &arena().makeAnd(*Res.first->second, Constraint); 148 } 149 } 150 151 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 152 Atom ForkToken = arena().makeFlowConditionToken(); 153 FlowConditionDeps[ForkToken].insert(Token); 154 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 155 return ForkToken; 156 } 157 158 Atom 159 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 160 Atom SecondToken) { 161 Atom Token = arena().makeFlowConditionToken(); 162 FlowConditionDeps[Token].insert(FirstToken); 163 FlowConditionDeps[Token].insert(SecondToken); 164 addFlowConditionConstraint(Token, 165 arena().makeOr(arena().makeAtomRef(FirstToken), 166 arena().makeAtomRef(SecondToken))); 167 return Token; 168 } 169 170 Solver::Result DataflowAnalysisContext::querySolver( 171 llvm::SetVector<const Formula *> Constraints) { 172 return S->solve(Constraints.getArrayRef()); 173 } 174 175 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 176 const Formula &F) { 177 if (F.isLiteral(true)) 178 return true; 179 180 // Returns true if and only if truth assignment of the flow condition implies 181 // that `F` is also true. We prove whether or not this property holds by 182 // reducing the problem to satisfiability checking. In other words, we attempt 183 // to show that assuming `F` is false makes the constraints induced by the 184 // flow condition unsatisfiable. 185 llvm::SetVector<const Formula *> Constraints; 186 Constraints.insert(&arena().makeAtomRef(Token)); 187 Constraints.insert(&arena().makeNot(F)); 188 addTransitiveFlowConditionConstraints(Token, Constraints); 189 return isUnsatisfiable(std::move(Constraints)); 190 } 191 192 bool DataflowAnalysisContext::flowConditionAllows(Atom Token, 193 const Formula &F) { 194 if (F.isLiteral(false)) 195 return false; 196 197 llvm::SetVector<const Formula *> Constraints; 198 Constraints.insert(&arena().makeAtomRef(Token)); 199 Constraints.insert(&F); 200 addTransitiveFlowConditionConstraints(Token, Constraints); 201 return isSatisfiable(std::move(Constraints)); 202 } 203 204 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 205 const Formula &Val2) { 206 llvm::SetVector<const Formula *> Constraints; 207 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 208 return isUnsatisfiable(std::move(Constraints)); 209 } 210 211 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 212 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 213 llvm::DenseSet<Atom> AddedTokens; 214 std::vector<Atom> Remaining = {Token}; 215 216 if (Invariant) 217 Constraints.insert(Invariant); 218 // Define all the flow conditions that might be referenced in constraints. 219 while (!Remaining.empty()) { 220 auto Token = Remaining.back(); 221 Remaining.pop_back(); 222 if (!AddedTokens.insert(Token).second) 223 continue; 224 225 auto ConstraintsIt = FlowConditionConstraints.find(Token); 226 if (ConstraintsIt == FlowConditionConstraints.end()) { 227 Constraints.insert(&arena().makeAtomRef(Token)); 228 } else { 229 // Bind flow condition token via `iff` to its set of constraints: 230 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 231 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 232 *ConstraintsIt->second)); 233 } 234 235 if (auto DepsIt = FlowConditionDeps.find(Token); 236 DepsIt != FlowConditionDeps.end()) 237 for (Atom A : DepsIt->second) 238 Remaining.push_back(A); 239 } 240 } 241 242 static void printAtomList(const llvm::SmallVector<Atom> &Atoms, 243 llvm::raw_ostream &OS) { 244 OS << "("; 245 for (size_t i = 0; i < Atoms.size(); ++i) { 246 OS << Atoms[i]; 247 if (i + 1 < Atoms.size()) 248 OS << ", "; 249 } 250 OS << ")\n"; 251 } 252 253 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 254 llvm::raw_ostream &OS) { 255 llvm::SetVector<const Formula *> Constraints; 256 Constraints.insert(&arena().makeAtomRef(Token)); 257 addTransitiveFlowConditionConstraints(Token, Constraints); 258 259 OS << "Flow condition token: " << Token << "\n"; 260 SimplifyConstraintsInfo Info; 261 llvm::SetVector<const Formula *> OriginalConstraints = Constraints; 262 simplifyConstraints(Constraints, arena(), &Info); 263 if (!Constraints.empty()) { 264 OS << "Constraints:\n"; 265 for (const auto *Constraint : Constraints) { 266 Constraint->print(OS); 267 OS << "\n"; 268 } 269 } 270 if (!Info.TrueAtoms.empty()) { 271 OS << "True atoms: "; 272 printAtomList(Info.TrueAtoms, OS); 273 } 274 if (!Info.FalseAtoms.empty()) { 275 OS << "False atoms: "; 276 printAtomList(Info.FalseAtoms, OS); 277 } 278 if (!Info.EquivalentAtoms.empty()) { 279 OS << "Equivalent atoms:\n"; 280 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) 281 printAtomList(Class, OS); 282 } 283 284 OS << "\nFlow condition constraints before simplification:\n"; 285 for (const auto *Constraint : OriginalConstraints) { 286 Constraint->print(OS); 287 OS << "\n"; 288 } 289 } 290 291 const ControlFlowContext * 292 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 293 // Canonicalize the key: 294 F = F->getDefinition(); 295 if (F == nullptr) 296 return nullptr; 297 auto It = FunctionContexts.find(F); 298 if (It != FunctionContexts.end()) 299 return &It->second; 300 301 if (F->doesThisDeclarationHaveABody()) { 302 auto CFCtx = ControlFlowContext::build(*F); 303 // FIXME: Handle errors. 304 assert(CFCtx); 305 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 306 return &Result.first->second; 307 } 308 309 return nullptr; 310 } 311 312 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 313 if (DataflowLog.empty()) 314 return Logger::textual(llvm::errs()); 315 316 llvm::StringRef Dir = DataflowLog; 317 if (auto EC = llvm::sys::fs::create_directories(Dir)) 318 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 319 // All analysis runs within a process will log to the same directory. 320 // Share a counter so they don't all overwrite each other's 0.html. 321 // (Don't share a logger, it's not threadsafe). 322 static std::atomic<unsigned> Counter = {0}; 323 auto StreamFactory = 324 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 325 llvm::SmallString<256> File(Dir); 326 llvm::sys::path::append(File, 327 std::to_string(Counter.fetch_add(1)) + ".html"); 328 std::error_code EC; 329 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 330 if (EC) { 331 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 332 << "\n"; 333 return std::make_unique<llvm::raw_null_ostream>(); 334 } 335 return OS; 336 }; 337 return Logger::html(std::move(StreamFactory)); 338 } 339 340 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 341 Options Opts) 342 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 343 assert(this->S != nullptr); 344 // If the -dataflow-log command-line flag was set, synthesize a logger. 345 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 346 // based tools. 347 if (Opts.Log == nullptr) { 348 if (DataflowLog.getNumOccurrences() > 0) { 349 LogOwner = makeLoggerFromCommandLine(); 350 this->Opts.Log = LogOwner.get(); 351 // FIXME: if the flag is given a value, write an HTML log to a file. 352 } else { 353 this->Opts.Log = &Logger::null(); 354 } 355 } 356 } 357 358 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 359 360 } // namespace dataflow 361 } // namespace clang 362 363 using namespace clang; 364 365 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 366 const Expr *Current = &E; 367 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 368 Current = EWC->getSubExpr(); 369 assert(Current != nullptr); 370 } 371 Current = Current->IgnoreParens(); 372 assert(Current != nullptr); 373 return *Current; 374 } 375 376 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 377 if (auto *E = dyn_cast<Expr>(&S)) 378 return ignoreCFGOmittedNodes(*E); 379 return S; 380 } 381 382 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 383 // field decl will be modeled for all instances of the inherited field. 384 static void getFieldsFromClassHierarchy(QualType Type, 385 clang::dataflow::FieldSet &Fields) { 386 if (Type->isIncompleteType() || Type->isDependentType() || 387 !Type->isRecordType()) 388 return; 389 390 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 391 Fields.insert(Field); 392 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 393 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 394 getFieldsFromClassHierarchy(Base.getType(), Fields); 395 } 396 397 /// Gets the set of all fields in the type. 398 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 399 FieldSet Fields; 400 getFieldsFromClassHierarchy(Type, Fields); 401 return Fields; 402 } 403 404 bool clang::dataflow::containsSameFields( 405 const clang::dataflow::FieldSet &Fields, 406 const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) { 407 if (Fields.size() != FieldLocs.size()) 408 return false; 409 for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) 410 if (!Fields.contains(cast_or_null<FieldDecl>(Field))) 411 return false; 412 return true; 413 } 414