xref: /freebsd/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/ASTOps.h"
18 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
19 #include "clang/Analysis/FlowSensitive/Formula.h"
20 #include "clang/Analysis/FlowSensitive/Logger.h"
21 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
22 #include "clang/Analysis/FlowSensitive/Value.h"
23 #include "llvm/ADT/SetOperations.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 #include <memory>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 static llvm::cl::opt<std::string> DataflowLog(
37     "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
38     llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
39                    "log to stderr. With an arg, writes HTML logs under the "
40                    "specified directory (one per analyzed function)."));
41 
42 namespace clang {
43 namespace dataflow {
44 
getModeledFields(QualType Type)45 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
46   // During context-sensitive analysis, a struct may be allocated in one
47   // function, but its field accessed in a function lower in the stack than
48   // the allocation. Since we only collect fields used in the function where
49   // the allocation occurs, we can't apply that filter when performing
50   // context-sensitive analysis. But, this only applies to storage locations,
51   // since field access it not allowed to fail. In contrast, field *values*
52   // don't need this allowance, since the API allows for uninitialized fields.
53   if (Opts.ContextSensitiveOpts)
54     return getObjectFields(Type);
55 
56   return llvm::set_intersection(getObjectFields(Type), ModeledFields);
57 }
58 
addModeledFields(const FieldSet & Fields)59 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
60   ModeledFields.set_union(Fields);
61 }
62 
createStorageLocation(QualType Type)63 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
64   if (!Type.isNull() && Type->isRecordType()) {
65     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
66     for (const FieldDecl *Field : getModeledFields(Type))
67       if (Field->getType()->isReferenceType())
68         FieldLocs.insert({Field, nullptr});
69       else
70         FieldLocs.insert({Field, &createStorageLocation(
71                                      Field->getType().getNonReferenceType())});
72 
73     RecordStorageLocation::SyntheticFieldMap SyntheticFields;
74     for (const auto &Entry : getSyntheticFields(Type))
75       SyntheticFields.insert(
76           {Entry.getKey(),
77            &createStorageLocation(Entry.getValue().getNonReferenceType())});
78 
79     return createRecordStorageLocation(Type, std::move(FieldLocs),
80                                        std::move(SyntheticFields));
81   }
82   return arena().create<ScalarStorageLocation>(Type);
83 }
84 
85 // Returns the keys for a given `StringMap`.
86 // Can't use `StringSet` as the return type as it doesn't support `operator==`.
87 template <typename T>
getKeys(const llvm::StringMap<T> & Map)88 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
89   return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
90 }
91 
createRecordStorageLocation(QualType Type,RecordStorageLocation::FieldToLoc FieldLocs,RecordStorageLocation::SyntheticFieldMap SyntheticFields)92 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
93     QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
94     RecordStorageLocation::SyntheticFieldMap SyntheticFields) {
95   assert(Type->isRecordType());
96   assert(containsSameFields(getModeledFields(Type), FieldLocs));
97   assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
98 
99   RecordStorageLocationCreated = true;
100   return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
101                                                std::move(SyntheticFields));
102 }
103 
104 StorageLocation &
getStableStorageLocation(const ValueDecl & D)105 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
106   if (auto *Loc = DeclToLoc.lookup(&D))
107     return *Loc;
108   auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
109   DeclToLoc[&D] = &Loc;
110   return Loc;
111 }
112 
113 StorageLocation &
getStableStorageLocation(const Expr & E)114 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
115   const Expr &CanonE = ignoreCFGOmittedNodes(E);
116 
117   if (auto *Loc = ExprToLoc.lookup(&CanonE))
118     return *Loc;
119   auto &Loc = createStorageLocation(CanonE.getType());
120   ExprToLoc[&CanonE] = &Loc;
121   return Loc;
122 }
123 
124 PointerValue &
getOrCreateNullPointerValue(QualType PointeeType)125 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
126   auto CanonicalPointeeType =
127       PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
128   auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
129   if (Res.second) {
130     auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
131     Res.first->second = &arena().create<PointerValue>(PointeeLoc);
132   }
133   return *Res.first->second;
134 }
135 
addInvariant(const Formula & Constraint)136 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
137   if (Invariant == nullptr)
138     Invariant = &Constraint;
139   else
140     Invariant = &arena().makeAnd(*Invariant, Constraint);
141 }
142 
addFlowConditionConstraint(Atom Token,const Formula & Constraint)143 void DataflowAnalysisContext::addFlowConditionConstraint(
144     Atom Token, const Formula &Constraint) {
145   auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
146   if (!Res.second) {
147     Res.first->second =
148         &arena().makeAnd(*Res.first->second, Constraint);
149   }
150 }
151 
forkFlowCondition(Atom Token)152 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
153   Atom ForkToken = arena().makeFlowConditionToken();
154   FlowConditionDeps[ForkToken].insert(Token);
155   addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
156   return ForkToken;
157 }
158 
159 Atom
joinFlowConditions(Atom FirstToken,Atom SecondToken)160 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
161                                             Atom SecondToken) {
162   Atom Token = arena().makeFlowConditionToken();
163   FlowConditionDeps[Token].insert(FirstToken);
164   FlowConditionDeps[Token].insert(SecondToken);
165   addFlowConditionConstraint(Token,
166                              arena().makeOr(arena().makeAtomRef(FirstToken),
167                                             arena().makeAtomRef(SecondToken)));
168   return Token;
169 }
170 
querySolver(llvm::SetVector<const Formula * > Constraints)171 Solver::Result DataflowAnalysisContext::querySolver(
172     llvm::SetVector<const Formula *> Constraints) {
173   return S.solve(Constraints.getArrayRef());
174 }
175 
flowConditionImplies(Atom Token,const Formula & F)176 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
177                                                    const Formula &F) {
178   if (F.isLiteral(true))
179     return true;
180 
181   // Returns true if and only if truth assignment of the flow condition implies
182   // that `F` is also true. We prove whether or not this property holds by
183   // reducing the problem to satisfiability checking. In other words, we attempt
184   // to show that assuming `F` is false makes the constraints induced by the
185   // flow condition unsatisfiable.
186   llvm::SetVector<const Formula *> Constraints;
187   Constraints.insert(&arena().makeAtomRef(Token));
188   Constraints.insert(&arena().makeNot(F));
189   addTransitiveFlowConditionConstraints(Token, Constraints);
190   return isUnsatisfiable(std::move(Constraints));
191 }
192 
flowConditionAllows(Atom Token,const Formula & F)193 bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
194                                                   const Formula &F) {
195   if (F.isLiteral(false))
196     return false;
197 
198   llvm::SetVector<const Formula *> Constraints;
199   Constraints.insert(&arena().makeAtomRef(Token));
200   Constraints.insert(&F);
201   addTransitiveFlowConditionConstraints(Token, Constraints);
202   return isSatisfiable(std::move(Constraints));
203 }
204 
equivalentFormulas(const Formula & Val1,const Formula & Val2)205 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
206                                                  const Formula &Val2) {
207   llvm::SetVector<const Formula *> Constraints;
208   Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
209   return isUnsatisfiable(std::move(Constraints));
210 }
211 
addTransitiveFlowConditionConstraints(Atom Token,llvm::SetVector<const Formula * > & Constraints)212 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
213     Atom Token, llvm::SetVector<const Formula *> &Constraints) {
214   llvm::DenseSet<Atom> AddedTokens;
215   std::vector<Atom> Remaining = {Token};
216 
217   if (Invariant)
218     Constraints.insert(Invariant);
219   // Define all the flow conditions that might be referenced in constraints.
220   while (!Remaining.empty()) {
221     auto Token = Remaining.back();
222     Remaining.pop_back();
223     if (!AddedTokens.insert(Token).second)
224       continue;
225 
226     auto ConstraintsIt = FlowConditionConstraints.find(Token);
227     if (ConstraintsIt == FlowConditionConstraints.end()) {
228       Constraints.insert(&arena().makeAtomRef(Token));
229     } else {
230       // Bind flow condition token via `iff` to its set of constraints:
231       // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
232       Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
233                                              *ConstraintsIt->second));
234     }
235 
236     if (auto DepsIt = FlowConditionDeps.find(Token);
237         DepsIt != FlowConditionDeps.end())
238       for (Atom A : DepsIt->second)
239         Remaining.push_back(A);
240   }
241 }
242 
printAtomList(const llvm::SmallVector<Atom> & Atoms,llvm::raw_ostream & OS)243 static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
244                           llvm::raw_ostream &OS) {
245   OS << "(";
246   for (size_t i = 0; i < Atoms.size(); ++i) {
247     OS << Atoms[i];
248     if (i + 1 < Atoms.size())
249       OS << ", ";
250   }
251   OS << ")\n";
252 }
253 
dumpFlowCondition(Atom Token,llvm::raw_ostream & OS)254 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
255                                                 llvm::raw_ostream &OS) {
256   llvm::SetVector<const Formula *> Constraints;
257   Constraints.insert(&arena().makeAtomRef(Token));
258   addTransitiveFlowConditionConstraints(Token, Constraints);
259 
260   OS << "Flow condition token: " << Token << "\n";
261   SimplifyConstraintsInfo Info;
262   llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
263   simplifyConstraints(Constraints, arena(), &Info);
264   if (!Constraints.empty()) {
265     OS << "Constraints:\n";
266     for (const auto *Constraint : Constraints) {
267       Constraint->print(OS);
268       OS << "\n";
269     }
270   }
271   if (!Info.TrueAtoms.empty()) {
272     OS << "True atoms: ";
273     printAtomList(Info.TrueAtoms, OS);
274   }
275   if (!Info.FalseAtoms.empty()) {
276     OS << "False atoms: ";
277     printAtomList(Info.FalseAtoms, OS);
278   }
279   if (!Info.EquivalentAtoms.empty()) {
280     OS << "Equivalent atoms:\n";
281     for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
282       printAtomList(Class, OS);
283   }
284 
285   OS << "\nFlow condition constraints before simplification:\n";
286   for (const auto *Constraint : OriginalConstraints) {
287     Constraint->print(OS);
288     OS << "\n";
289   }
290 }
291 
292 const AdornedCFG *
getAdornedCFG(const FunctionDecl * F)293 DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
294   // Canonicalize the key:
295   F = F->getDefinition();
296   if (F == nullptr)
297     return nullptr;
298   auto It = FunctionContexts.find(F);
299   if (It != FunctionContexts.end())
300     return &It->second;
301 
302   if (F->doesThisDeclarationHaveABody()) {
303     auto ACFG = AdornedCFG::build(*F);
304     // FIXME: Handle errors.
305     assert(ACFG);
306     auto Result = FunctionContexts.insert({F, std::move(*ACFG)});
307     return &Result.first->second;
308   }
309 
310   return nullptr;
311 }
312 
makeLoggerFromCommandLine()313 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
314   if (DataflowLog.empty())
315     return Logger::textual(llvm::errs());
316 
317   llvm::StringRef Dir = DataflowLog;
318   if (auto EC = llvm::sys::fs::create_directories(Dir))
319     llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
320   // All analysis runs within a process will log to the same directory.
321   // Share a counter so they don't all overwrite each other's 0.html.
322   // (Don't share a logger, it's not threadsafe).
323   static std::atomic<unsigned> Counter = {0};
324   auto StreamFactory =
325       [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
326     llvm::SmallString<256> File(Dir);
327     llvm::sys::path::append(File,
328                             std::to_string(Counter.fetch_add(1)) + ".html");
329     std::error_code EC;
330     auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
331     if (EC) {
332       llvm::errs() << "Failed to create log " << File << ": " << EC.message()
333                    << "\n";
334       return std::make_unique<llvm::raw_null_ostream>();
335     }
336     return OS;
337   };
338   return Logger::html(std::move(StreamFactory));
339 }
340 
DataflowAnalysisContext(Solver & S,std::unique_ptr<Solver> && OwnedSolver,Options Opts)341 DataflowAnalysisContext::DataflowAnalysisContext(
342     Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
343     : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
344       Opts(Opts) {
345   // If the -dataflow-log command-line flag was set, synthesize a logger.
346   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
347   // based tools.
348   if (Opts.Log == nullptr) {
349     if (DataflowLog.getNumOccurrences() > 0) {
350       LogOwner = makeLoggerFromCommandLine();
351       this->Opts.Log = LogOwner.get();
352       // FIXME: if the flag is given a value, write an HTML log to a file.
353     } else {
354       this->Opts.Log = &Logger::null();
355     }
356   }
357 }
358 
359 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
360 
361 } // namespace dataflow
362 } // namespace clang
363