1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/Analysis/FlowSensitive/ASTOps.h" 17 #include "clang/Analysis/FlowSensitive/Formula.h" 18 #include "clang/Analysis/FlowSensitive/Logger.h" 19 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/SetOperations.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 #include <memory> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 static llvm::cl::opt<std::string> DataflowLog( 35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37 "log to stderr. With an arg, writes HTML logs under the " 38 "specified directory (one per analyzed function).")); 39 40 namespace clang { 41 namespace dataflow { 42 43 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 44 // During context-sensitive analysis, a struct may be allocated in one 45 // function, but its field accessed in a function lower in the stack than 46 // the allocation. Since we only collect fields used in the function where 47 // the allocation occurs, we can't apply that filter when performing 48 // context-sensitive analysis. But, this only applies to storage locations, 49 // since field access it not allowed to fail. In contrast, field *values* 50 // don't need this allowance, since the API allows for uninitialized fields. 51 if (Opts.ContextSensitiveOpts) 52 return getObjectFields(Type); 53 54 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 55 } 56 57 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 58 ModeledFields.set_union(Fields); 59 } 60 61 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 62 if (!Type.isNull() && Type->isRecordType()) { 63 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 64 for (const FieldDecl *Field : getModeledFields(Type)) 65 if (Field->getType()->isReferenceType()) 66 FieldLocs.insert({Field, nullptr}); 67 else 68 FieldLocs.insert({Field, &createStorageLocation( 69 Field->getType().getNonReferenceType())}); 70 71 RecordStorageLocation::SyntheticFieldMap SyntheticFields; 72 for (const auto &Entry : getSyntheticFields(Type)) 73 SyntheticFields.insert( 74 {Entry.getKey(), 75 &createStorageLocation(Entry.getValue().getNonReferenceType())}); 76 77 return createRecordStorageLocation(Type, std::move(FieldLocs), 78 std::move(SyntheticFields)); 79 } 80 return arena().create<ScalarStorageLocation>(Type); 81 } 82 83 // Returns the keys for a given `StringMap`. 84 // Can't use `StringSet` as the return type as it doesn't support `operator==`. 85 template <typename T> 86 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { 87 return llvm::DenseSet<llvm::StringRef>(llvm::from_range, Map.keys()); 88 } 89 90 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( 91 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 92 RecordStorageLocation::SyntheticFieldMap SyntheticFields) { 93 assert(Type->isRecordType()); 94 assert(containsSameFields(getModeledFields(Type), FieldLocs)); 95 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); 96 97 RecordStorageLocationCreated = true; 98 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), 99 std::move(SyntheticFields)); 100 } 101 102 StorageLocation & 103 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { 104 if (auto *Loc = DeclToLoc.lookup(&D)) 105 return *Loc; 106 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 107 DeclToLoc[&D] = &Loc; 108 return Loc; 109 } 110 111 StorageLocation & 112 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 113 const Expr &CanonE = ignoreCFGOmittedNodes(E); 114 115 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 116 return *Loc; 117 auto &Loc = createStorageLocation(CanonE.getType()); 118 ExprToLoc[&CanonE] = &Loc; 119 return Loc; 120 } 121 122 PointerValue & 123 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 124 auto CanonicalPointeeType = 125 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 126 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 127 if (Res.second) { 128 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 129 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 130 } 131 return *Res.first->second; 132 } 133 134 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 135 if (Invariant == nullptr) 136 Invariant = &Constraint; 137 else 138 Invariant = &arena().makeAnd(*Invariant, Constraint); 139 } 140 141 void DataflowAnalysisContext::addFlowConditionConstraint( 142 Atom Token, const Formula &Constraint) { 143 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 144 if (!Res.second) { 145 Res.first->second = 146 &arena().makeAnd(*Res.first->second, Constraint); 147 } 148 } 149 150 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 151 Atom ForkToken = arena().makeFlowConditionToken(); 152 FlowConditionDeps[ForkToken].insert(Token); 153 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 154 return ForkToken; 155 } 156 157 Atom 158 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 159 Atom SecondToken) { 160 Atom Token = arena().makeFlowConditionToken(); 161 auto &TokenDeps = FlowConditionDeps[Token]; 162 TokenDeps.insert(FirstToken); 163 TokenDeps.insert(SecondToken); 164 addFlowConditionConstraint(Token, 165 arena().makeOr(arena().makeAtomRef(FirstToken), 166 arena().makeAtomRef(SecondToken))); 167 return Token; 168 } 169 170 Solver::Result DataflowAnalysisContext::querySolver( 171 llvm::SetVector<const Formula *> Constraints) { 172 return S.solve(Constraints.getArrayRef()); 173 } 174 175 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 176 const Formula &F) { 177 if (F.isLiteral(true)) 178 return true; 179 180 // Returns true if and only if truth assignment of the flow condition implies 181 // that `F` is also true. We prove whether or not this property holds by 182 // reducing the problem to satisfiability checking. In other words, we attempt 183 // to show that assuming `F` is false makes the constraints induced by the 184 // flow condition unsatisfiable. 185 llvm::SetVector<const Formula *> Constraints; 186 Constraints.insert(&arena().makeAtomRef(Token)); 187 Constraints.insert(&arena().makeNot(F)); 188 addTransitiveFlowConditionConstraints(Token, Constraints); 189 return isUnsatisfiable(std::move(Constraints)); 190 } 191 192 bool DataflowAnalysisContext::flowConditionAllows(Atom Token, 193 const Formula &F) { 194 if (F.isLiteral(false)) 195 return false; 196 197 llvm::SetVector<const Formula *> Constraints; 198 Constraints.insert(&arena().makeAtomRef(Token)); 199 Constraints.insert(&F); 200 addTransitiveFlowConditionConstraints(Token, Constraints); 201 return isSatisfiable(std::move(Constraints)); 202 } 203 204 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 205 const Formula &Val2) { 206 llvm::SetVector<const Formula *> Constraints; 207 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 208 return isUnsatisfiable(std::move(Constraints)); 209 } 210 211 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 212 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 213 llvm::DenseSet<Atom> AddedTokens; 214 std::vector<Atom> Remaining = {Token}; 215 216 if (Invariant) 217 Constraints.insert(Invariant); 218 // Define all the flow conditions that might be referenced in constraints. 219 while (!Remaining.empty()) { 220 auto Token = Remaining.back(); 221 Remaining.pop_back(); 222 if (!AddedTokens.insert(Token).second) 223 continue; 224 225 auto ConstraintsIt = FlowConditionConstraints.find(Token); 226 if (ConstraintsIt == FlowConditionConstraints.end()) { 227 Constraints.insert(&arena().makeAtomRef(Token)); 228 } else { 229 // Bind flow condition token via `iff` to its set of constraints: 230 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 231 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 232 *ConstraintsIt->second)); 233 } 234 235 if (auto DepsIt = FlowConditionDeps.find(Token); 236 DepsIt != FlowConditionDeps.end()) 237 for (Atom A : DepsIt->second) 238 Remaining.push_back(A); 239 } 240 } 241 242 static void printAtomList(const llvm::SmallVector<Atom> &Atoms, 243 llvm::raw_ostream &OS) { 244 OS << "("; 245 for (size_t i = 0; i < Atoms.size(); ++i) { 246 OS << Atoms[i]; 247 if (i + 1 < Atoms.size()) 248 OS << ", "; 249 } 250 OS << ")\n"; 251 } 252 253 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 254 llvm::raw_ostream &OS) { 255 llvm::SetVector<const Formula *> Constraints; 256 Constraints.insert(&arena().makeAtomRef(Token)); 257 addTransitiveFlowConditionConstraints(Token, Constraints); 258 259 OS << "Flow condition token: " << Token << "\n"; 260 SimplifyConstraintsInfo Info; 261 llvm::SetVector<const Formula *> OriginalConstraints = Constraints; 262 simplifyConstraints(Constraints, arena(), &Info); 263 if (!Constraints.empty()) { 264 OS << "Constraints:\n"; 265 for (const auto *Constraint : Constraints) { 266 Constraint->print(OS); 267 OS << "\n"; 268 } 269 } 270 if (!Info.TrueAtoms.empty()) { 271 OS << "True atoms: "; 272 printAtomList(Info.TrueAtoms, OS); 273 } 274 if (!Info.FalseAtoms.empty()) { 275 OS << "False atoms: "; 276 printAtomList(Info.FalseAtoms, OS); 277 } 278 if (!Info.EquivalentAtoms.empty()) { 279 OS << "Equivalent atoms:\n"; 280 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) 281 printAtomList(Class, OS); 282 } 283 284 OS << "\nFlow condition constraints before simplification:\n"; 285 for (const auto *Constraint : OriginalConstraints) { 286 Constraint->print(OS); 287 OS << "\n"; 288 } 289 } 290 291 const AdornedCFG * 292 DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) { 293 // Canonicalize the key: 294 F = F->getDefinition(); 295 if (F == nullptr) 296 return nullptr; 297 auto It = FunctionContexts.find(F); 298 if (It != FunctionContexts.end()) 299 return &It->second; 300 301 if (F->doesThisDeclarationHaveABody()) { 302 auto ACFG = AdornedCFG::build(*F); 303 // FIXME: Handle errors. 304 assert(ACFG); 305 auto Result = FunctionContexts.insert({F, std::move(*ACFG)}); 306 return &Result.first->second; 307 } 308 309 return nullptr; 310 } 311 312 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 313 if (DataflowLog.empty()) 314 return Logger::textual(llvm::errs()); 315 316 llvm::StringRef Dir = DataflowLog; 317 if (auto EC = llvm::sys::fs::create_directories(Dir)) 318 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 319 // All analysis runs within a process will log to the same directory. 320 // Share a counter so they don't all overwrite each other's 0.html. 321 // (Don't share a logger, it's not threadsafe). 322 static std::atomic<unsigned> Counter = {0}; 323 auto StreamFactory = 324 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 325 llvm::SmallString<256> File(Dir); 326 llvm::sys::path::append(File, 327 std::to_string(Counter.fetch_add(1)) + ".html"); 328 std::error_code EC; 329 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 330 if (EC) { 331 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 332 << "\n"; 333 return std::make_unique<llvm::raw_null_ostream>(); 334 } 335 return OS; 336 }; 337 return Logger::html(std::move(StreamFactory)); 338 } 339 340 DataflowAnalysisContext::DataflowAnalysisContext( 341 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts) 342 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()), 343 Opts(Opts) { 344 // If the -dataflow-log command-line flag was set, synthesize a logger. 345 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 346 // based tools. 347 if (Opts.Log == nullptr) { 348 if (DataflowLog.getNumOccurrences() > 0) { 349 LogOwner = makeLoggerFromCommandLine(); 350 this->Opts.Log = LogOwner.get(); 351 // FIXME: if the flag is given a value, write an HTML log to a file. 352 } else { 353 this->Opts.Log = &Logger::null(); 354 } 355 } 356 } 357 358 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 359 360 } // namespace dataflow 361 } // namespace clang 362