xref: /freebsd/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===-- HTMLLogger.cpp ----------------------------------------------------===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric //
9*06c3fb27SDimitry Andric // This file implements the HTML logger. Given a directory dir/, we write
10*06c3fb27SDimitry Andric // dir/0.html for the first analysis, etc.
11*06c3fb27SDimitry Andric // These files contain a visualization that allows inspecting the CFG and the
12*06c3fb27SDimitry Andric // state of the analysis at each point.
13*06c3fb27SDimitry Andric // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14*06c3fb27SDimitry Andric // so each output file is self-contained.
15*06c3fb27SDimitry Andric //
16*06c3fb27SDimitry Andric // VIEWS
17*06c3fb27SDimitry Andric //
18*06c3fb27SDimitry Andric // The timeline and function view are always shown. These allow selecting basic
19*06c3fb27SDimitry Andric // blocks, statements within them, and processing iterations (BBs are visited
20*06c3fb27SDimitry Andric // multiple times when e.g. loops are involved).
21*06c3fb27SDimitry Andric // These are written directly into the HTML body.
22*06c3fb27SDimitry Andric //
23*06c3fb27SDimitry Andric // There are also listings of particular basic blocks, and dumps of the state
24*06c3fb27SDimitry Andric // at particular analysis points (i.e. BB2 iteration 3 statement 2).
25*06c3fb27SDimitry Andric // These are only shown when the relevant BB/analysis point is *selected*.
26*06c3fb27SDimitry Andric //
27*06c3fb27SDimitry Andric // DATA AND TEMPLATES
28*06c3fb27SDimitry Andric //
29*06c3fb27SDimitry Andric // The HTML proper is mostly static.
30*06c3fb27SDimitry Andric // The analysis data is in a JSON object HTMLLoggerData which is embedded as
31*06c3fb27SDimitry Andric // a <script> in the <head>.
32*06c3fb27SDimitry Andric // This gets rendered into DOM by a simple template processor which substitutes
33*06c3fb27SDimitry Andric // the data into <template> tags embedded in the HTML. (see inflate() in JS).
34*06c3fb27SDimitry Andric //
35*06c3fb27SDimitry Andric // SELECTION
36*06c3fb27SDimitry Andric //
37*06c3fb27SDimitry Andric // This is the only real interactive mechanism.
38*06c3fb27SDimitry Andric //
39*06c3fb27SDimitry Andric // At any given time, there are several named selections, e.g.:
40*06c3fb27SDimitry Andric //   bb: B2               (basic block 0 is selected)
41*06c3fb27SDimitry Andric //   elt: B2.4            (statement 4 is selected)
42*06c3fb27SDimitry Andric //   iter: B2:1           (iteration 1 of the basic block is selected)
43*06c3fb27SDimitry Andric //   hover: B3            (hovering over basic block 3)
44*06c3fb27SDimitry Andric //
45*06c3fb27SDimitry Andric // The selection is updated by mouse events: hover by moving the mouse and
46*06c3fb27SDimitry Andric // others by clicking. Elements that are click targets generally have attributes
47*06c3fb27SDimitry Andric // (id or data-foo) that define what they should select.
48*06c3fb27SDimitry Andric // See watchSelection() in JS for the exact logic.
49*06c3fb27SDimitry Andric //
50*06c3fb27SDimitry Andric // When the "bb" selection is set to "B2":
51*06c3fb27SDimitry Andric //   - sections <section data-selection="bb"> get shown
52*06c3fb27SDimitry Andric //   - templates under such sections get re-rendered
53*06c3fb27SDimitry Andric //   - elements with class/id "B2" get class "bb-select"
54*06c3fb27SDimitry Andric //
55*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
56*06c3fb27SDimitry Andric 
57*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
58*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/DebugSupport.h"
59*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Logger.h"
60*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Value.h"
62*06c3fb27SDimitry Andric #include "clang/Basic/SourceManager.h"
63*06c3fb27SDimitry Andric #include "clang/Lex/Lexer.h"
64*06c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h"
65*06c3fb27SDimitry Andric #include "llvm/ADT/ScopeExit.h"
66*06c3fb27SDimitry Andric #include "llvm/Support/Error.h"
67*06c3fb27SDimitry Andric #include "llvm/Support/FormatVariadic.h"
68*06c3fb27SDimitry Andric #include "llvm/Support/JSON.h"
69*06c3fb27SDimitry Andric #include "llvm/Support/Program.h"
70*06c3fb27SDimitry Andric #include "llvm/Support/ScopedPrinter.h"
71*06c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h"
72*06c3fb27SDimitry Andric // Defines assets: HTMLLogger_{html_js,css}
73*06c3fb27SDimitry Andric #include "HTMLLogger.inc"
74*06c3fb27SDimitry Andric 
75*06c3fb27SDimitry Andric namespace clang::dataflow {
76*06c3fb27SDimitry Andric namespace {
77*06c3fb27SDimitry Andric 
78*06c3fb27SDimitry Andric // Render a graphviz graph specification to SVG using the `dot` tool.
79*06c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80*06c3fb27SDimitry Andric 
81*06c3fb27SDimitry Andric using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82*06c3fb27SDimitry Andric 
83*06c3fb27SDimitry Andric // Recursively dumps Values/StorageLocations as JSON
84*06c3fb27SDimitry Andric class ModelDumper {
85*06c3fb27SDimitry Andric public:
86*06c3fb27SDimitry Andric   ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87*06c3fb27SDimitry Andric       : JOS(JOS), Env(Env) {}
88*06c3fb27SDimitry Andric 
89*06c3fb27SDimitry Andric   void dump(Value &V) {
90*06c3fb27SDimitry Andric     JOS.attribute("value_id", llvm::to_string(&V));
91*06c3fb27SDimitry Andric     if (!Visited.insert(&V).second)
92*06c3fb27SDimitry Andric       return;
93*06c3fb27SDimitry Andric 
94*06c3fb27SDimitry Andric     JOS.attribute("kind", debugString(V.getKind()));
95*06c3fb27SDimitry Andric 
96*06c3fb27SDimitry Andric     switch (V.getKind()) {
97*06c3fb27SDimitry Andric     case Value::Kind::Integer:
98*06c3fb27SDimitry Andric     case Value::Kind::TopBool:
99*06c3fb27SDimitry Andric     case Value::Kind::AtomicBool:
100*06c3fb27SDimitry Andric     case Value::Kind::FormulaBool:
101*06c3fb27SDimitry Andric       break;
102*06c3fb27SDimitry Andric     case Value::Kind::Reference:
103*06c3fb27SDimitry Andric       JOS.attributeObject(
104*06c3fb27SDimitry Andric           "referent", [&] { dump(cast<ReferenceValue>(V).getReferentLoc()); });
105*06c3fb27SDimitry Andric       break;
106*06c3fb27SDimitry Andric     case Value::Kind::Pointer:
107*06c3fb27SDimitry Andric       JOS.attributeObject(
108*06c3fb27SDimitry Andric           "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
109*06c3fb27SDimitry Andric       break;
110*06c3fb27SDimitry Andric     case Value::Kind::Struct:
111*06c3fb27SDimitry Andric       for (const auto &Child :
112*06c3fb27SDimitry Andric            cast<StructValue>(V).getAggregateLoc().children())
113*06c3fb27SDimitry Andric         JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
114*06c3fb27SDimitry Andric           if (Child.second)
115*06c3fb27SDimitry Andric             if (Value *Val = Env.getValue(*Child.second))
116*06c3fb27SDimitry Andric               dump(*Val);
117*06c3fb27SDimitry Andric         });
118*06c3fb27SDimitry Andric       break;
119*06c3fb27SDimitry Andric     }
120*06c3fb27SDimitry Andric 
121*06c3fb27SDimitry Andric     for (const auto& Prop : V.properties())
122*06c3fb27SDimitry Andric       JOS.attributeObject(("p:" + Prop.first()).str(),
123*06c3fb27SDimitry Andric                           [&] { dump(*Prop.second); });
124*06c3fb27SDimitry Andric 
125*06c3fb27SDimitry Andric     // Running the SAT solver is expensive, but knowing which booleans are
126*06c3fb27SDimitry Andric     // guaranteed true/false here is valuable and hard to determine by hand.
127*06c3fb27SDimitry Andric     if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
128*06c3fb27SDimitry Andric       JOS.attribute("formula", llvm::to_string(B->formula()));
129*06c3fb27SDimitry Andric       JOS.attribute(
130*06c3fb27SDimitry Andric           "truth", Env.flowConditionImplies(B->formula()) ? "true"
131*06c3fb27SDimitry Andric                    : Env.flowConditionImplies(Env.arena().makeNot(B->formula()))
132*06c3fb27SDimitry Andric                        ? "false"
133*06c3fb27SDimitry Andric                        : "unknown");
134*06c3fb27SDimitry Andric     }
135*06c3fb27SDimitry Andric   }
136*06c3fb27SDimitry Andric   void dump(const StorageLocation &L) {
137*06c3fb27SDimitry Andric     JOS.attribute("location", llvm::to_string(&L));
138*06c3fb27SDimitry Andric     if (!Visited.insert(&L).second)
139*06c3fb27SDimitry Andric       return;
140*06c3fb27SDimitry Andric 
141*06c3fb27SDimitry Andric     JOS.attribute("type", L.getType().getAsString());
142*06c3fb27SDimitry Andric     if (auto *V = Env.getValue(L))
143*06c3fb27SDimitry Andric       dump(*V);
144*06c3fb27SDimitry Andric   }
145*06c3fb27SDimitry Andric 
146*06c3fb27SDimitry Andric   llvm::DenseSet<const void*> Visited;
147*06c3fb27SDimitry Andric   llvm::json::OStream &JOS;
148*06c3fb27SDimitry Andric   const Environment &Env;
149*06c3fb27SDimitry Andric };
150*06c3fb27SDimitry Andric 
151*06c3fb27SDimitry Andric class HTMLLogger : public Logger {
152*06c3fb27SDimitry Andric   StreamFactory Streams;
153*06c3fb27SDimitry Andric   std::unique_ptr<llvm::raw_ostream> OS;
154*06c3fb27SDimitry Andric   std::optional<llvm::json::OStream> JOS;
155*06c3fb27SDimitry Andric 
156*06c3fb27SDimitry Andric   const ControlFlowContext *CFG;
157*06c3fb27SDimitry Andric   // Timeline of iterations of CFG block visitation.
158*06c3fb27SDimitry Andric   std::vector<std::pair<const CFGBlock *, unsigned>> Iters;
159*06c3fb27SDimitry Andric   // Number of times each CFG block has been seen.
160*06c3fb27SDimitry Andric   llvm::DenseMap<const CFGBlock *, unsigned> BlockIters;
161*06c3fb27SDimitry Andric   // The messages logged in the current context but not yet written.
162*06c3fb27SDimitry Andric   std::string ContextLogs;
163*06c3fb27SDimitry Andric   // The number of elements we have visited within the current CFG block.
164*06c3fb27SDimitry Andric   unsigned ElementIndex;
165*06c3fb27SDimitry Andric 
166*06c3fb27SDimitry Andric public:
167*06c3fb27SDimitry Andric   explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
168*06c3fb27SDimitry Andric   void beginAnalysis(const ControlFlowContext &CFG,
169*06c3fb27SDimitry Andric                      TypeErasedDataflowAnalysis &A) override {
170*06c3fb27SDimitry Andric     OS = Streams();
171*06c3fb27SDimitry Andric     this->CFG = &CFG;
172*06c3fb27SDimitry Andric     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
173*06c3fb27SDimitry Andric 
174*06c3fb27SDimitry Andric     if (const auto *D = CFG.getDecl()) {
175*06c3fb27SDimitry Andric       const auto &SM = A.getASTContext().getSourceManager();
176*06c3fb27SDimitry Andric       *OS << "<title>";
177*06c3fb27SDimitry Andric       if (const auto *ND = dyn_cast<NamedDecl>(D))
178*06c3fb27SDimitry Andric         *OS << ND->getNameAsString() << " at ";
179*06c3fb27SDimitry Andric       *OS << SM.getFilename(D->getLocation()) << ":"
180*06c3fb27SDimitry Andric           << SM.getSpellingLineNumber(D->getLocation());
181*06c3fb27SDimitry Andric       *OS << "</title>\n";
182*06c3fb27SDimitry Andric     };
183*06c3fb27SDimitry Andric 
184*06c3fb27SDimitry Andric     *OS << "<style>" << HTMLLogger_css << "</style>\n";
185*06c3fb27SDimitry Andric     *OS << "<script>" << HTMLLogger_js << "</script>\n";
186*06c3fb27SDimitry Andric 
187*06c3fb27SDimitry Andric     writeCode();
188*06c3fb27SDimitry Andric     writeCFG();
189*06c3fb27SDimitry Andric 
190*06c3fb27SDimitry Andric     *OS << "<script>var HTMLLoggerData = \n";
191*06c3fb27SDimitry Andric     JOS.emplace(*OS, /*Indent=*/2);
192*06c3fb27SDimitry Andric     JOS->objectBegin();
193*06c3fb27SDimitry Andric     JOS->attributeBegin("states");
194*06c3fb27SDimitry Andric     JOS->objectBegin();
195*06c3fb27SDimitry Andric   }
196*06c3fb27SDimitry Andric   // Between beginAnalysis() and endAnalysis() we write all the states for
197*06c3fb27SDimitry Andric   // particular analysis points into the `timeline` array.
198*06c3fb27SDimitry Andric   void endAnalysis() override {
199*06c3fb27SDimitry Andric     JOS->objectEnd();
200*06c3fb27SDimitry Andric     JOS->attributeEnd();
201*06c3fb27SDimitry Andric 
202*06c3fb27SDimitry Andric     JOS->attributeArray("timeline", [&] {
203*06c3fb27SDimitry Andric       for (const auto &E : Iters) {
204*06c3fb27SDimitry Andric         JOS->object([&] {
205*06c3fb27SDimitry Andric           JOS->attribute("block", blockID(E.first->getBlockID()));
206*06c3fb27SDimitry Andric           JOS->attribute("iter", E.second);
207*06c3fb27SDimitry Andric         });
208*06c3fb27SDimitry Andric       }
209*06c3fb27SDimitry Andric     });
210*06c3fb27SDimitry Andric     JOS->attributeObject("cfg", [&] {
211*06c3fb27SDimitry Andric       for (const auto &E : BlockIters)
212*06c3fb27SDimitry Andric         writeBlock(*E.first, E.second);
213*06c3fb27SDimitry Andric     });
214*06c3fb27SDimitry Andric 
215*06c3fb27SDimitry Andric     JOS->objectEnd();
216*06c3fb27SDimitry Andric     JOS.reset();
217*06c3fb27SDimitry Andric     *OS << ";\n</script>\n";
218*06c3fb27SDimitry Andric     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
219*06c3fb27SDimitry Andric   }
220*06c3fb27SDimitry Andric 
221*06c3fb27SDimitry Andric   void enterBlock(const CFGBlock &B) override {
222*06c3fb27SDimitry Andric     Iters.emplace_back(&B, ++BlockIters[&B]);
223*06c3fb27SDimitry Andric     ElementIndex = 0;
224*06c3fb27SDimitry Andric   }
225*06c3fb27SDimitry Andric   void enterElement(const CFGElement &E) override {
226*06c3fb27SDimitry Andric     ++ElementIndex;
227*06c3fb27SDimitry Andric   }
228*06c3fb27SDimitry Andric 
229*06c3fb27SDimitry Andric   static std::string blockID(unsigned Block) {
230*06c3fb27SDimitry Andric     return llvm::formatv("B{0}", Block);
231*06c3fb27SDimitry Andric   }
232*06c3fb27SDimitry Andric   static std::string eltID(unsigned Block, unsigned Element) {
233*06c3fb27SDimitry Andric     return llvm::formatv("B{0}.{1}", Block, Element);
234*06c3fb27SDimitry Andric   }
235*06c3fb27SDimitry Andric   static std::string iterID(unsigned Block, unsigned Iter) {
236*06c3fb27SDimitry Andric     return llvm::formatv("B{0}:{1}", Block, Iter);
237*06c3fb27SDimitry Andric   }
238*06c3fb27SDimitry Andric   static std::string elementIterID(unsigned Block, unsigned Iter,
239*06c3fb27SDimitry Andric                                    unsigned Element) {
240*06c3fb27SDimitry Andric     return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
241*06c3fb27SDimitry Andric   }
242*06c3fb27SDimitry Andric 
243*06c3fb27SDimitry Andric   // Write the analysis state associated with a particular analysis point.
244*06c3fb27SDimitry Andric   // FIXME: this dump is fairly opaque. We should show:
245*06c3fb27SDimitry Andric   //  - values associated with the current Stmt
246*06c3fb27SDimitry Andric   //  - values associated with its children
247*06c3fb27SDimitry Andric   //  - meaningful names for values
248*06c3fb27SDimitry Andric   //  - which boolean values are implied true/false by the flow condition
249*06c3fb27SDimitry Andric   void recordState(TypeErasedDataflowAnalysisState &State) override {
250*06c3fb27SDimitry Andric     unsigned Block = Iters.back().first->getBlockID();
251*06c3fb27SDimitry Andric     unsigned Iter = Iters.back().second;
252*06c3fb27SDimitry Andric     JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
253*06c3fb27SDimitry Andric       JOS->attribute("block", blockID(Block));
254*06c3fb27SDimitry Andric       JOS->attribute("iter", Iter);
255*06c3fb27SDimitry Andric       JOS->attribute("element", ElementIndex);
256*06c3fb27SDimitry Andric 
257*06c3fb27SDimitry Andric       // If this state immediately follows an Expr, show its built-in model.
258*06c3fb27SDimitry Andric       if (ElementIndex > 0) {
259*06c3fb27SDimitry Andric         auto S =
260*06c3fb27SDimitry Andric             Iters.back().first->Elements[ElementIndex - 1].getAs<CFGStmt>();
261*06c3fb27SDimitry Andric         if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr)
262*06c3fb27SDimitry Andric           if (auto *Loc = State.Env.getStorageLocation(*E, SkipPast::None))
263*06c3fb27SDimitry Andric             JOS->attributeObject(
264*06c3fb27SDimitry Andric                 "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); });
265*06c3fb27SDimitry Andric       }
266*06c3fb27SDimitry Andric       if (!ContextLogs.empty()) {
267*06c3fb27SDimitry Andric         JOS->attribute("logs", ContextLogs);
268*06c3fb27SDimitry Andric         ContextLogs.clear();
269*06c3fb27SDimitry Andric       }
270*06c3fb27SDimitry Andric       {
271*06c3fb27SDimitry Andric         std::string BuiltinLattice;
272*06c3fb27SDimitry Andric         llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
273*06c3fb27SDimitry Andric         State.Env.dump(BuiltinLatticeS);
274*06c3fb27SDimitry Andric         JOS->attribute("builtinLattice", BuiltinLattice);
275*06c3fb27SDimitry Andric       }
276*06c3fb27SDimitry Andric     });
277*06c3fb27SDimitry Andric   }
278*06c3fb27SDimitry Andric   void blockConverged() override { logText("Block converged"); }
279*06c3fb27SDimitry Andric 
280*06c3fb27SDimitry Andric   void logText(llvm::StringRef S) override {
281*06c3fb27SDimitry Andric     ContextLogs.append(S.begin(), S.end());
282*06c3fb27SDimitry Andric     ContextLogs.push_back('\n');
283*06c3fb27SDimitry Andric   }
284*06c3fb27SDimitry Andric 
285*06c3fb27SDimitry Andric private:
286*06c3fb27SDimitry Andric   // Write the CFG block details.
287*06c3fb27SDimitry Andric   // Currently this is just the list of elements in execution order.
288*06c3fb27SDimitry Andric   // FIXME: an AST dump would be a useful view, too.
289*06c3fb27SDimitry Andric   void writeBlock(const CFGBlock &B, unsigned Iters) {
290*06c3fb27SDimitry Andric     JOS->attributeObject(blockID(B.getBlockID()), [&] {
291*06c3fb27SDimitry Andric       JOS->attribute("iters", Iters);
292*06c3fb27SDimitry Andric       JOS->attributeArray("elements", [&] {
293*06c3fb27SDimitry Andric         for (const auto &Elt : B.Elements) {
294*06c3fb27SDimitry Andric           std::string Dump;
295*06c3fb27SDimitry Andric           llvm::raw_string_ostream DumpS(Dump);
296*06c3fb27SDimitry Andric           Elt.dumpToStream(DumpS);
297*06c3fb27SDimitry Andric           JOS->value(Dump);
298*06c3fb27SDimitry Andric         }
299*06c3fb27SDimitry Andric       });
300*06c3fb27SDimitry Andric     });
301*06c3fb27SDimitry Andric   }
302*06c3fb27SDimitry Andric 
303*06c3fb27SDimitry Andric   // Write the code of function being examined.
304*06c3fb27SDimitry Andric   // We want to overlay the code with <span>s that mark which BB particular
305*06c3fb27SDimitry Andric   // tokens are associated with, and even which BB element (so that clicking
306*06c3fb27SDimitry Andric   // can select the right element).
307*06c3fb27SDimitry Andric   void writeCode() {
308*06c3fb27SDimitry Andric     if (!CFG->getDecl())
309*06c3fb27SDimitry Andric       return;
310*06c3fb27SDimitry Andric     const auto &AST = CFG->getDecl()->getASTContext();
311*06c3fb27SDimitry Andric     bool Invalid = false;
312*06c3fb27SDimitry Andric 
313*06c3fb27SDimitry Andric     // Extract the source code from the original file.
314*06c3fb27SDimitry Andric     // Pretty-printing from the AST would probably be nicer (no macros or
315*06c3fb27SDimitry Andric     // indentation to worry about), but we need the boundaries of particular
316*06c3fb27SDimitry Andric     // AST nodes and the printer doesn't provide this.
317*06c3fb27SDimitry Andric     auto Range = clang::Lexer::makeFileCharRange(
318*06c3fb27SDimitry Andric         CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()),
319*06c3fb27SDimitry Andric         AST.getSourceManager(), AST.getLangOpts());
320*06c3fb27SDimitry Andric     if (Range.isInvalid())
321*06c3fb27SDimitry Andric       return;
322*06c3fb27SDimitry Andric     llvm::StringRef Code = clang::Lexer::getSourceText(
323*06c3fb27SDimitry Andric         Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
324*06c3fb27SDimitry Andric     if (Invalid)
325*06c3fb27SDimitry Andric       return;
326*06c3fb27SDimitry Andric 
327*06c3fb27SDimitry Andric     static constexpr unsigned Missing = -1;
328*06c3fb27SDimitry Andric     // TokenInfo stores the BB and set of elements that a token is part of.
329*06c3fb27SDimitry Andric     struct TokenInfo {
330*06c3fb27SDimitry Andric       // The basic block this is part of.
331*06c3fb27SDimitry Andric       // This is the BB of the stmt with the smallest containing range.
332*06c3fb27SDimitry Andric       unsigned BB = Missing;
333*06c3fb27SDimitry Andric       unsigned BBPriority = 0;
334*06c3fb27SDimitry Andric       // The most specific stmt this is part of (smallest range).
335*06c3fb27SDimitry Andric       unsigned Elt = Missing;
336*06c3fb27SDimitry Andric       unsigned EltPriority = 0;
337*06c3fb27SDimitry Andric       // All stmts this is part of.
338*06c3fb27SDimitry Andric       SmallVector<unsigned> Elts;
339*06c3fb27SDimitry Andric 
340*06c3fb27SDimitry Andric       // Mark this token as being part of BB.Elt.
341*06c3fb27SDimitry Andric       // RangeLen is the character length of the element's range, used to
342*06c3fb27SDimitry Andric       // distinguish inner vs outer statements.
343*06c3fb27SDimitry Andric       // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
344*06c3fb27SDimitry Andric       // However "a" has a smaller range, so is more specific. Clicking on the
345*06c3fb27SDimitry Andric       // token "a" should select the stmt "a".
346*06c3fb27SDimitry Andric       void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
347*06c3fb27SDimitry Andric         // A worse BB (larger range) => ignore.
348*06c3fb27SDimitry Andric         if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
349*06c3fb27SDimitry Andric           return;
350*06c3fb27SDimitry Andric         if (BB != this->BB) {
351*06c3fb27SDimitry Andric           this->BB = BB;
352*06c3fb27SDimitry Andric           Elts.clear();
353*06c3fb27SDimitry Andric           BBPriority = RangeLen;
354*06c3fb27SDimitry Andric         }
355*06c3fb27SDimitry Andric         BBPriority = std::min(BBPriority, RangeLen);
356*06c3fb27SDimitry Andric         Elts.push_back(Elt);
357*06c3fb27SDimitry Andric         if (this->Elt == Missing || EltPriority > RangeLen)
358*06c3fb27SDimitry Andric           this->Elt = Elt;
359*06c3fb27SDimitry Andric       }
360*06c3fb27SDimitry Andric       bool operator==(const TokenInfo &Other) const {
361*06c3fb27SDimitry Andric         return std::tie(BB, Elt, Elts) ==
362*06c3fb27SDimitry Andric                std::tie(Other.BB, Other.Elt, Other.Elts);
363*06c3fb27SDimitry Andric       }
364*06c3fb27SDimitry Andric       // Write the attributes for the <span> on this token.
365*06c3fb27SDimitry Andric       void write(llvm::raw_ostream &OS) const {
366*06c3fb27SDimitry Andric         OS << "class='c";
367*06c3fb27SDimitry Andric         if (BB != Missing)
368*06c3fb27SDimitry Andric           OS << " " << blockID(BB);
369*06c3fb27SDimitry Andric         for (unsigned Elt : Elts)
370*06c3fb27SDimitry Andric           OS << " " << eltID(BB, Elt);
371*06c3fb27SDimitry Andric         OS << "'";
372*06c3fb27SDimitry Andric 
373*06c3fb27SDimitry Andric         if (Elt != Missing)
374*06c3fb27SDimitry Andric           OS << " data-elt='" << eltID(BB, Elt) << "'";
375*06c3fb27SDimitry Andric         if (BB != Missing)
376*06c3fb27SDimitry Andric           OS << " data-bb='" << blockID(BB) << "'";
377*06c3fb27SDimitry Andric       }
378*06c3fb27SDimitry Andric     };
379*06c3fb27SDimitry Andric 
380*06c3fb27SDimitry Andric     // Construct one TokenInfo per character in a flat array.
381*06c3fb27SDimitry Andric     // This is inefficient (chars in a token all have the same info) but simple.
382*06c3fb27SDimitry Andric     std::vector<TokenInfo> State(Code.size());
383*06c3fb27SDimitry Andric     for (const auto *Block : CFG->getCFG()) {
384*06c3fb27SDimitry Andric       unsigned EltIndex = 0;
385*06c3fb27SDimitry Andric       for (const auto& Elt : *Block) {
386*06c3fb27SDimitry Andric         ++EltIndex;
387*06c3fb27SDimitry Andric         if (const auto S = Elt.getAs<CFGStmt>()) {
388*06c3fb27SDimitry Andric           auto EltRange = clang::Lexer::makeFileCharRange(
389*06c3fb27SDimitry Andric               CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
390*06c3fb27SDimitry Andric               AST.getSourceManager(), AST.getLangOpts());
391*06c3fb27SDimitry Andric           if (EltRange.isInvalid())
392*06c3fb27SDimitry Andric             continue;
393*06c3fb27SDimitry Andric           if (EltRange.getBegin() < Range.getBegin() ||
394*06c3fb27SDimitry Andric               EltRange.getEnd() >= Range.getEnd() ||
395*06c3fb27SDimitry Andric               EltRange.getEnd() < Range.getBegin() ||
396*06c3fb27SDimitry Andric               EltRange.getEnd() >= Range.getEnd())
397*06c3fb27SDimitry Andric             continue;
398*06c3fb27SDimitry Andric           unsigned Off = EltRange.getBegin().getRawEncoding() -
399*06c3fb27SDimitry Andric                          Range.getBegin().getRawEncoding();
400*06c3fb27SDimitry Andric           unsigned Len = EltRange.getEnd().getRawEncoding() -
401*06c3fb27SDimitry Andric                          EltRange.getBegin().getRawEncoding();
402*06c3fb27SDimitry Andric           for (unsigned I = 0; I < Len; ++I)
403*06c3fb27SDimitry Andric             State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
404*06c3fb27SDimitry Andric         }
405*06c3fb27SDimitry Andric       }
406*06c3fb27SDimitry Andric     }
407*06c3fb27SDimitry Andric 
408*06c3fb27SDimitry Andric     // Finally, write the code with the correct <span>s.
409*06c3fb27SDimitry Andric     unsigned Line =
410*06c3fb27SDimitry Andric         AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
411*06c3fb27SDimitry Andric     *OS << "<template data-copy='code'>\n";
412*06c3fb27SDimitry Andric     *OS << "<code class='filename'>";
413*06c3fb27SDimitry Andric     llvm::printHTMLEscaped(
414*06c3fb27SDimitry Andric         llvm::sys::path::filename(
415*06c3fb27SDimitry Andric             AST.getSourceManager().getFilename(Range.getBegin())),
416*06c3fb27SDimitry Andric         *OS);
417*06c3fb27SDimitry Andric     *OS << "</code>";
418*06c3fb27SDimitry Andric     *OS << "<code class='line' data-line='" << Line++ << "'>";
419*06c3fb27SDimitry Andric     for (unsigned I = 0; I < Code.size(); ++I) {
420*06c3fb27SDimitry Andric       // Don't actually write a <span> around each character, only break spans
421*06c3fb27SDimitry Andric       // when the TokenInfo changes.
422*06c3fb27SDimitry Andric       bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
423*06c3fb27SDimitry Andric       bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
424*06c3fb27SDimitry Andric       if (NeedOpen) {
425*06c3fb27SDimitry Andric         *OS << "<span ";
426*06c3fb27SDimitry Andric         State[I].write(*OS);
427*06c3fb27SDimitry Andric         *OS << ">";
428*06c3fb27SDimitry Andric       }
429*06c3fb27SDimitry Andric       if (Code[I] == '\n')
430*06c3fb27SDimitry Andric         *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
431*06c3fb27SDimitry Andric       else
432*06c3fb27SDimitry Andric         llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
433*06c3fb27SDimitry Andric       if (NeedClose) *OS << "</span>";
434*06c3fb27SDimitry Andric     }
435*06c3fb27SDimitry Andric     *OS << "</code>\n";
436*06c3fb27SDimitry Andric     *OS << "</template>";
437*06c3fb27SDimitry Andric   }
438*06c3fb27SDimitry Andric 
439*06c3fb27SDimitry Andric   // Write the CFG diagram, a graph of basic blocks.
440*06c3fb27SDimitry Andric   // Laying out graphs is hard, so we construct a graphviz description and shell
441*06c3fb27SDimitry Andric   // out to `dot` to turn it into an SVG.
442*06c3fb27SDimitry Andric   void writeCFG() {
443*06c3fb27SDimitry Andric     *OS << "<template data-copy='cfg'>\n";
444*06c3fb27SDimitry Andric     if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
445*06c3fb27SDimitry Andric       *OS << *SVG;
446*06c3fb27SDimitry Andric     else
447*06c3fb27SDimitry Andric       *OS << "Can't draw CFG: " << toString(SVG.takeError());
448*06c3fb27SDimitry Andric     *OS << "</template>\n";
449*06c3fb27SDimitry Andric   }
450*06c3fb27SDimitry Andric 
451*06c3fb27SDimitry Andric   // Produce a graphviz description of a CFG.
452*06c3fb27SDimitry Andric   static std::string buildCFGDot(const clang::CFG &CFG) {
453*06c3fb27SDimitry Andric     std::string Graph;
454*06c3fb27SDimitry Andric     llvm::raw_string_ostream GraphS(Graph);
455*06c3fb27SDimitry Andric     // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
456*06c3fb27SDimitry Andric     GraphS << R"(digraph {
457*06c3fb27SDimitry Andric       tooltip=" "
458*06c3fb27SDimitry Andric       node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
459*06c3fb27SDimitry Andric       edge[tooltip = " "]
460*06c3fb27SDimitry Andric )";
461*06c3fb27SDimitry Andric     for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
462*06c3fb27SDimitry Andric       GraphS << "  " << blockID(I) << " [id=" << blockID(I) << "]\n";
463*06c3fb27SDimitry Andric     for (const auto *Block : CFG) {
464*06c3fb27SDimitry Andric       for (const auto &Succ : Block->succs()) {
465*06c3fb27SDimitry Andric         GraphS << "  " << blockID(Block->getBlockID()) << " -> "
466*06c3fb27SDimitry Andric                << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
467*06c3fb27SDimitry Andric       }
468*06c3fb27SDimitry Andric     }
469*06c3fb27SDimitry Andric     GraphS << "}\n";
470*06c3fb27SDimitry Andric     return Graph;
471*06c3fb27SDimitry Andric   }
472*06c3fb27SDimitry Andric };
473*06c3fb27SDimitry Andric 
474*06c3fb27SDimitry Andric // Nothing interesting here, just subprocess/temp-file plumbing.
475*06c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
476*06c3fb27SDimitry Andric   std::string DotPath;
477*06c3fb27SDimitry Andric   if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
478*06c3fb27SDimitry Andric     DotPath = FromEnv;
479*06c3fb27SDimitry Andric   else {
480*06c3fb27SDimitry Andric     auto FromPath = llvm::sys::findProgramByName("dot");
481*06c3fb27SDimitry Andric     if (!FromPath)
482*06c3fb27SDimitry Andric       return llvm::createStringError(FromPath.getError(),
483*06c3fb27SDimitry Andric                                      "'dot' not found on PATH");
484*06c3fb27SDimitry Andric     DotPath = FromPath.get();
485*06c3fb27SDimitry Andric   }
486*06c3fb27SDimitry Andric 
487*06c3fb27SDimitry Andric   // Create input and output files for `dot` subprocess.
488*06c3fb27SDimitry Andric   // (We create the output file as empty, to reserve the temp filename).
489*06c3fb27SDimitry Andric   llvm::SmallString<256> Input, Output;
490*06c3fb27SDimitry Andric   int InputFD;
491*06c3fb27SDimitry Andric   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
492*06c3fb27SDimitry Andric                                                    Input))
493*06c3fb27SDimitry Andric     return llvm::createStringError(EC, "failed to create `dot` temp input");
494*06c3fb27SDimitry Andric   llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
495*06c3fb27SDimitry Andric   auto DeleteInput =
496*06c3fb27SDimitry Andric       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
497*06c3fb27SDimitry Andric   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
498*06c3fb27SDimitry Andric     return llvm::createStringError(EC, "failed to create `dot` temp output");
499*06c3fb27SDimitry Andric   auto DeleteOutput =
500*06c3fb27SDimitry Andric       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
501*06c3fb27SDimitry Andric 
502*06c3fb27SDimitry Andric   std::vector<std::optional<llvm::StringRef>> Redirects = {
503*06c3fb27SDimitry Andric       Input, Output,
504*06c3fb27SDimitry Andric       /*stderr=*/std::nullopt};
505*06c3fb27SDimitry Andric   std::string ErrMsg;
506*06c3fb27SDimitry Andric   int Code = llvm::sys::ExecuteAndWait(
507*06c3fb27SDimitry Andric       DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
508*06c3fb27SDimitry Andric       /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
509*06c3fb27SDimitry Andric   if (!ErrMsg.empty())
510*06c3fb27SDimitry Andric     return llvm::createStringError(llvm::inconvertibleErrorCode(),
511*06c3fb27SDimitry Andric                                    "'dot' failed: " + ErrMsg);
512*06c3fb27SDimitry Andric   if (Code != 0)
513*06c3fb27SDimitry Andric     return llvm::createStringError(llvm::inconvertibleErrorCode(),
514*06c3fb27SDimitry Andric                                    "'dot' failed (" + llvm::Twine(Code) + ")");
515*06c3fb27SDimitry Andric 
516*06c3fb27SDimitry Andric   auto Buf = llvm::MemoryBuffer::getFile(Output);
517*06c3fb27SDimitry Andric   if (!Buf)
518*06c3fb27SDimitry Andric     return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
519*06c3fb27SDimitry Andric 
520*06c3fb27SDimitry Andric   // Output has <?xml> prefix we don't want. Skip to <svg> tag.
521*06c3fb27SDimitry Andric   llvm::StringRef Result = Buf.get()->getBuffer();
522*06c3fb27SDimitry Andric   auto Pos = Result.find("<svg");
523*06c3fb27SDimitry Andric   if (Pos == llvm::StringRef::npos)
524*06c3fb27SDimitry Andric     return llvm::createStringError(llvm::inconvertibleErrorCode(),
525*06c3fb27SDimitry Andric                                    "Can't find <svg> tag in `dot` output");
526*06c3fb27SDimitry Andric   return Result.substr(Pos).str();
527*06c3fb27SDimitry Andric }
528*06c3fb27SDimitry Andric 
529*06c3fb27SDimitry Andric } // namespace
530*06c3fb27SDimitry Andric 
531*06c3fb27SDimitry Andric std::unique_ptr<Logger>
532*06c3fb27SDimitry Andric Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
533*06c3fb27SDimitry Andric   return std::make_unique<HTMLLogger>(std::move(Streams));
534*06c3fb27SDimitry Andric }
535*06c3fb27SDimitry Andric 
536*06c3fb27SDimitry Andric } // namespace clang::dataflow
537