1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/ASTOps.h" 18 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 19 #include "clang/Analysis/FlowSensitive/Formula.h" 20 #include "clang/Analysis/FlowSensitive/Logger.h" 21 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" 22 #include "clang/Analysis/FlowSensitive/Value.h" 23 #include "llvm/ADT/SetOperations.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cassert> 31 #include <memory> 32 #include <string> 33 #include <utility> 34 #include <vector> 35 36 static llvm::cl::opt<std::string> DataflowLog( 37 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 38 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 39 "log to stderr. With an arg, writes HTML logs under the " 40 "specified directory (one per analyzed function).")); 41 42 namespace clang { 43 namespace dataflow { 44 45 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 46 // During context-sensitive analysis, a struct may be allocated in one 47 // function, but its field accessed in a function lower in the stack than 48 // the allocation. Since we only collect fields used in the function where 49 // the allocation occurs, we can't apply that filter when performing 50 // context-sensitive analysis. But, this only applies to storage locations, 51 // since field access it not allowed to fail. In contrast, field *values* 52 // don't need this allowance, since the API allows for uninitialized fields. 53 if (Opts.ContextSensitiveOpts) 54 return getObjectFields(Type); 55 56 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 57 } 58 59 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 60 ModeledFields.set_union(Fields); 61 } 62 63 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 64 if (!Type.isNull() && Type->isRecordType()) { 65 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 66 for (const FieldDecl *Field : getModeledFields(Type)) 67 if (Field->getType()->isReferenceType()) 68 FieldLocs.insert({Field, nullptr}); 69 else 70 FieldLocs.insert({Field, &createStorageLocation( 71 Field->getType().getNonReferenceType())}); 72 73 RecordStorageLocation::SyntheticFieldMap SyntheticFields; 74 for (const auto &Entry : getSyntheticFields(Type)) 75 SyntheticFields.insert( 76 {Entry.getKey(), 77 &createStorageLocation(Entry.getValue().getNonReferenceType())}); 78 79 return createRecordStorageLocation(Type, std::move(FieldLocs), 80 std::move(SyntheticFields)); 81 } 82 return arena().create<ScalarStorageLocation>(Type); 83 } 84 85 // Returns the keys for a given `StringMap`. 86 // Can't use `StringSet` as the return type as it doesn't support `operator==`. 87 template <typename T> 88 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { 89 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); 90 } 91 92 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( 93 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 94 RecordStorageLocation::SyntheticFieldMap SyntheticFields) { 95 assert(Type->isRecordType()); 96 assert(containsSameFields(getModeledFields(Type), FieldLocs)); 97 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); 98 99 RecordStorageLocationCreated = true; 100 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), 101 std::move(SyntheticFields)); 102 } 103 104 StorageLocation & 105 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { 106 if (auto *Loc = DeclToLoc.lookup(&D)) 107 return *Loc; 108 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 109 DeclToLoc[&D] = &Loc; 110 return Loc; 111 } 112 113 StorageLocation & 114 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 115 const Expr &CanonE = ignoreCFGOmittedNodes(E); 116 117 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 118 return *Loc; 119 auto &Loc = createStorageLocation(CanonE.getType()); 120 ExprToLoc[&CanonE] = &Loc; 121 return Loc; 122 } 123 124 PointerValue & 125 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 126 auto CanonicalPointeeType = 127 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 128 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 129 if (Res.second) { 130 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 131 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 132 } 133 return *Res.first->second; 134 } 135 136 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 137 if (Invariant == nullptr) 138 Invariant = &Constraint; 139 else 140 Invariant = &arena().makeAnd(*Invariant, Constraint); 141 } 142 143 void DataflowAnalysisContext::addFlowConditionConstraint( 144 Atom Token, const Formula &Constraint) { 145 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 146 if (!Res.second) { 147 Res.first->second = 148 &arena().makeAnd(*Res.first->second, Constraint); 149 } 150 } 151 152 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 153 Atom ForkToken = arena().makeFlowConditionToken(); 154 FlowConditionDeps[ForkToken].insert(Token); 155 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 156 return ForkToken; 157 } 158 159 Atom 160 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 161 Atom SecondToken) { 162 Atom Token = arena().makeFlowConditionToken(); 163 FlowConditionDeps[Token].insert(FirstToken); 164 FlowConditionDeps[Token].insert(SecondToken); 165 addFlowConditionConstraint(Token, 166 arena().makeOr(arena().makeAtomRef(FirstToken), 167 arena().makeAtomRef(SecondToken))); 168 return Token; 169 } 170 171 Solver::Result DataflowAnalysisContext::querySolver( 172 llvm::SetVector<const Formula *> Constraints) { 173 return S.solve(Constraints.getArrayRef()); 174 } 175 176 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 177 const Formula &F) { 178 if (F.isLiteral(true)) 179 return true; 180 181 // Returns true if and only if truth assignment of the flow condition implies 182 // that `F` is also true. We prove whether or not this property holds by 183 // reducing the problem to satisfiability checking. In other words, we attempt 184 // to show that assuming `F` is false makes the constraints induced by the 185 // flow condition unsatisfiable. 186 llvm::SetVector<const Formula *> Constraints; 187 Constraints.insert(&arena().makeAtomRef(Token)); 188 Constraints.insert(&arena().makeNot(F)); 189 addTransitiveFlowConditionConstraints(Token, Constraints); 190 return isUnsatisfiable(std::move(Constraints)); 191 } 192 193 bool DataflowAnalysisContext::flowConditionAllows(Atom Token, 194 const Formula &F) { 195 if (F.isLiteral(false)) 196 return false; 197 198 llvm::SetVector<const Formula *> Constraints; 199 Constraints.insert(&arena().makeAtomRef(Token)); 200 Constraints.insert(&F); 201 addTransitiveFlowConditionConstraints(Token, Constraints); 202 return isSatisfiable(std::move(Constraints)); 203 } 204 205 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 206 const Formula &Val2) { 207 llvm::SetVector<const Formula *> Constraints; 208 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 209 return isUnsatisfiable(std::move(Constraints)); 210 } 211 212 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 213 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 214 llvm::DenseSet<Atom> AddedTokens; 215 std::vector<Atom> Remaining = {Token}; 216 217 if (Invariant) 218 Constraints.insert(Invariant); 219 // Define all the flow conditions that might be referenced in constraints. 220 while (!Remaining.empty()) { 221 auto Token = Remaining.back(); 222 Remaining.pop_back(); 223 if (!AddedTokens.insert(Token).second) 224 continue; 225 226 auto ConstraintsIt = FlowConditionConstraints.find(Token); 227 if (ConstraintsIt == FlowConditionConstraints.end()) { 228 Constraints.insert(&arena().makeAtomRef(Token)); 229 } else { 230 // Bind flow condition token via `iff` to its set of constraints: 231 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 232 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 233 *ConstraintsIt->second)); 234 } 235 236 if (auto DepsIt = FlowConditionDeps.find(Token); 237 DepsIt != FlowConditionDeps.end()) 238 for (Atom A : DepsIt->second) 239 Remaining.push_back(A); 240 } 241 } 242 243 static void printAtomList(const llvm::SmallVector<Atom> &Atoms, 244 llvm::raw_ostream &OS) { 245 OS << "("; 246 for (size_t i = 0; i < Atoms.size(); ++i) { 247 OS << Atoms[i]; 248 if (i + 1 < Atoms.size()) 249 OS << ", "; 250 } 251 OS << ")\n"; 252 } 253 254 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 255 llvm::raw_ostream &OS) { 256 llvm::SetVector<const Formula *> Constraints; 257 Constraints.insert(&arena().makeAtomRef(Token)); 258 addTransitiveFlowConditionConstraints(Token, Constraints); 259 260 OS << "Flow condition token: " << Token << "\n"; 261 SimplifyConstraintsInfo Info; 262 llvm::SetVector<const Formula *> OriginalConstraints = Constraints; 263 simplifyConstraints(Constraints, arena(), &Info); 264 if (!Constraints.empty()) { 265 OS << "Constraints:\n"; 266 for (const auto *Constraint : Constraints) { 267 Constraint->print(OS); 268 OS << "\n"; 269 } 270 } 271 if (!Info.TrueAtoms.empty()) { 272 OS << "True atoms: "; 273 printAtomList(Info.TrueAtoms, OS); 274 } 275 if (!Info.FalseAtoms.empty()) { 276 OS << "False atoms: "; 277 printAtomList(Info.FalseAtoms, OS); 278 } 279 if (!Info.EquivalentAtoms.empty()) { 280 OS << "Equivalent atoms:\n"; 281 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) 282 printAtomList(Class, OS); 283 } 284 285 OS << "\nFlow condition constraints before simplification:\n"; 286 for (const auto *Constraint : OriginalConstraints) { 287 Constraint->print(OS); 288 OS << "\n"; 289 } 290 } 291 292 const AdornedCFG * 293 DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) { 294 // Canonicalize the key: 295 F = F->getDefinition(); 296 if (F == nullptr) 297 return nullptr; 298 auto It = FunctionContexts.find(F); 299 if (It != FunctionContexts.end()) 300 return &It->second; 301 302 if (F->doesThisDeclarationHaveABody()) { 303 auto ACFG = AdornedCFG::build(*F); 304 // FIXME: Handle errors. 305 assert(ACFG); 306 auto Result = FunctionContexts.insert({F, std::move(*ACFG)}); 307 return &Result.first->second; 308 } 309 310 return nullptr; 311 } 312 313 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 314 if (DataflowLog.empty()) 315 return Logger::textual(llvm::errs()); 316 317 llvm::StringRef Dir = DataflowLog; 318 if (auto EC = llvm::sys::fs::create_directories(Dir)) 319 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 320 // All analysis runs within a process will log to the same directory. 321 // Share a counter so they don't all overwrite each other's 0.html. 322 // (Don't share a logger, it's not threadsafe). 323 static std::atomic<unsigned> Counter = {0}; 324 auto StreamFactory = 325 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 326 llvm::SmallString<256> File(Dir); 327 llvm::sys::path::append(File, 328 std::to_string(Counter.fetch_add(1)) + ".html"); 329 std::error_code EC; 330 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 331 if (EC) { 332 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 333 << "\n"; 334 return std::make_unique<llvm::raw_null_ostream>(); 335 } 336 return OS; 337 }; 338 return Logger::html(std::move(StreamFactory)); 339 } 340 341 DataflowAnalysisContext::DataflowAnalysisContext( 342 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts) 343 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()), 344 Opts(Opts) { 345 // If the -dataflow-log command-line flag was set, synthesize a logger. 346 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 347 // based tools. 348 if (Opts.Log == nullptr) { 349 if (DataflowLog.getNumOccurrences() > 0) { 350 LogOwner = makeLoggerFromCommandLine(); 351 this->Opts.Log = LogOwner.get(); 352 // FIXME: if the flag is given a value, write an HTML log to a file. 353 } else { 354 this->Opts.Log = &Logger::null(); 355 } 356 } 357 } 358 359 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 360 361 } // namespace dataflow 362 } // namespace clang 363