1*06c3fb27SDimitry Andric //===-- HTMLLogger.cpp ----------------------------------------------------===// 2*06c3fb27SDimitry Andric // 3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 8*06c3fb27SDimitry Andric // 9*06c3fb27SDimitry Andric // This file implements the HTML logger. Given a directory dir/, we write 10*06c3fb27SDimitry Andric // dir/0.html for the first analysis, etc. 11*06c3fb27SDimitry Andric // These files contain a visualization that allows inspecting the CFG and the 12*06c3fb27SDimitry Andric // state of the analysis at each point. 13*06c3fb27SDimitry Andric // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded 14*06c3fb27SDimitry Andric // so each output file is self-contained. 15*06c3fb27SDimitry Andric // 16*06c3fb27SDimitry Andric // VIEWS 17*06c3fb27SDimitry Andric // 18*06c3fb27SDimitry Andric // The timeline and function view are always shown. These allow selecting basic 19*06c3fb27SDimitry Andric // blocks, statements within them, and processing iterations (BBs are visited 20*06c3fb27SDimitry Andric // multiple times when e.g. loops are involved). 21*06c3fb27SDimitry Andric // These are written directly into the HTML body. 22*06c3fb27SDimitry Andric // 23*06c3fb27SDimitry Andric // There are also listings of particular basic blocks, and dumps of the state 24*06c3fb27SDimitry Andric // at particular analysis points (i.e. BB2 iteration 3 statement 2). 25*06c3fb27SDimitry Andric // These are only shown when the relevant BB/analysis point is *selected*. 26*06c3fb27SDimitry Andric // 27*06c3fb27SDimitry Andric // DATA AND TEMPLATES 28*06c3fb27SDimitry Andric // 29*06c3fb27SDimitry Andric // The HTML proper is mostly static. 30*06c3fb27SDimitry Andric // The analysis data is in a JSON object HTMLLoggerData which is embedded as 31*06c3fb27SDimitry Andric // a <script> in the <head>. 32*06c3fb27SDimitry Andric // This gets rendered into DOM by a simple template processor which substitutes 33*06c3fb27SDimitry Andric // the data into <template> tags embedded in the HTML. (see inflate() in JS). 34*06c3fb27SDimitry Andric // 35*06c3fb27SDimitry Andric // SELECTION 36*06c3fb27SDimitry Andric // 37*06c3fb27SDimitry Andric // This is the only real interactive mechanism. 38*06c3fb27SDimitry Andric // 39*06c3fb27SDimitry Andric // At any given time, there are several named selections, e.g.: 40*06c3fb27SDimitry Andric // bb: B2 (basic block 0 is selected) 41*06c3fb27SDimitry Andric // elt: B2.4 (statement 4 is selected) 42*06c3fb27SDimitry Andric // iter: B2:1 (iteration 1 of the basic block is selected) 43*06c3fb27SDimitry Andric // hover: B3 (hovering over basic block 3) 44*06c3fb27SDimitry Andric // 45*06c3fb27SDimitry Andric // The selection is updated by mouse events: hover by moving the mouse and 46*06c3fb27SDimitry Andric // others by clicking. Elements that are click targets generally have attributes 47*06c3fb27SDimitry Andric // (id or data-foo) that define what they should select. 48*06c3fb27SDimitry Andric // See watchSelection() in JS for the exact logic. 49*06c3fb27SDimitry Andric // 50*06c3fb27SDimitry Andric // When the "bb" selection is set to "B2": 51*06c3fb27SDimitry Andric // - sections <section data-selection="bb"> get shown 52*06c3fb27SDimitry Andric // - templates under such sections get re-rendered 53*06c3fb27SDimitry Andric // - elements with class/id "B2" get class "bb-select" 54*06c3fb27SDimitry Andric // 55*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 56*06c3fb27SDimitry Andric 57*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 58*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/DebugSupport.h" 59*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Logger.h" 60*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 61*06c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Value.h" 62*06c3fb27SDimitry Andric #include "clang/Basic/SourceManager.h" 63*06c3fb27SDimitry Andric #include "clang/Lex/Lexer.h" 64*06c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 65*06c3fb27SDimitry Andric #include "llvm/ADT/ScopeExit.h" 66*06c3fb27SDimitry Andric #include "llvm/Support/Error.h" 67*06c3fb27SDimitry Andric #include "llvm/Support/FormatVariadic.h" 68*06c3fb27SDimitry Andric #include "llvm/Support/JSON.h" 69*06c3fb27SDimitry Andric #include "llvm/Support/Program.h" 70*06c3fb27SDimitry Andric #include "llvm/Support/ScopedPrinter.h" 71*06c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 72*06c3fb27SDimitry Andric // Defines assets: HTMLLogger_{html_js,css} 73*06c3fb27SDimitry Andric #include "HTMLLogger.inc" 74*06c3fb27SDimitry Andric 75*06c3fb27SDimitry Andric namespace clang::dataflow { 76*06c3fb27SDimitry Andric namespace { 77*06c3fb27SDimitry Andric 78*06c3fb27SDimitry Andric // Render a graphviz graph specification to SVG using the `dot` tool. 79*06c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); 80*06c3fb27SDimitry Andric 81*06c3fb27SDimitry Andric using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; 82*06c3fb27SDimitry Andric 83*06c3fb27SDimitry Andric // Recursively dumps Values/StorageLocations as JSON 84*06c3fb27SDimitry Andric class ModelDumper { 85*06c3fb27SDimitry Andric public: 86*06c3fb27SDimitry Andric ModelDumper(llvm::json::OStream &JOS, const Environment &Env) 87*06c3fb27SDimitry Andric : JOS(JOS), Env(Env) {} 88*06c3fb27SDimitry Andric 89*06c3fb27SDimitry Andric void dump(Value &V) { 90*06c3fb27SDimitry Andric JOS.attribute("value_id", llvm::to_string(&V)); 91*06c3fb27SDimitry Andric if (!Visited.insert(&V).second) 92*06c3fb27SDimitry Andric return; 93*06c3fb27SDimitry Andric 94*06c3fb27SDimitry Andric JOS.attribute("kind", debugString(V.getKind())); 95*06c3fb27SDimitry Andric 96*06c3fb27SDimitry Andric switch (V.getKind()) { 97*06c3fb27SDimitry Andric case Value::Kind::Integer: 98*06c3fb27SDimitry Andric case Value::Kind::TopBool: 99*06c3fb27SDimitry Andric case Value::Kind::AtomicBool: 100*06c3fb27SDimitry Andric case Value::Kind::FormulaBool: 101*06c3fb27SDimitry Andric break; 102*06c3fb27SDimitry Andric case Value::Kind::Reference: 103*06c3fb27SDimitry Andric JOS.attributeObject( 104*06c3fb27SDimitry Andric "referent", [&] { dump(cast<ReferenceValue>(V).getReferentLoc()); }); 105*06c3fb27SDimitry Andric break; 106*06c3fb27SDimitry Andric case Value::Kind::Pointer: 107*06c3fb27SDimitry Andric JOS.attributeObject( 108*06c3fb27SDimitry Andric "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); 109*06c3fb27SDimitry Andric break; 110*06c3fb27SDimitry Andric case Value::Kind::Struct: 111*06c3fb27SDimitry Andric for (const auto &Child : 112*06c3fb27SDimitry Andric cast<StructValue>(V).getAggregateLoc().children()) 113*06c3fb27SDimitry Andric JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { 114*06c3fb27SDimitry Andric if (Child.second) 115*06c3fb27SDimitry Andric if (Value *Val = Env.getValue(*Child.second)) 116*06c3fb27SDimitry Andric dump(*Val); 117*06c3fb27SDimitry Andric }); 118*06c3fb27SDimitry Andric break; 119*06c3fb27SDimitry Andric } 120*06c3fb27SDimitry Andric 121*06c3fb27SDimitry Andric for (const auto& Prop : V.properties()) 122*06c3fb27SDimitry Andric JOS.attributeObject(("p:" + Prop.first()).str(), 123*06c3fb27SDimitry Andric [&] { dump(*Prop.second); }); 124*06c3fb27SDimitry Andric 125*06c3fb27SDimitry Andric // Running the SAT solver is expensive, but knowing which booleans are 126*06c3fb27SDimitry Andric // guaranteed true/false here is valuable and hard to determine by hand. 127*06c3fb27SDimitry Andric if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { 128*06c3fb27SDimitry Andric JOS.attribute("formula", llvm::to_string(B->formula())); 129*06c3fb27SDimitry Andric JOS.attribute( 130*06c3fb27SDimitry Andric "truth", Env.flowConditionImplies(B->formula()) ? "true" 131*06c3fb27SDimitry Andric : Env.flowConditionImplies(Env.arena().makeNot(B->formula())) 132*06c3fb27SDimitry Andric ? "false" 133*06c3fb27SDimitry Andric : "unknown"); 134*06c3fb27SDimitry Andric } 135*06c3fb27SDimitry Andric } 136*06c3fb27SDimitry Andric void dump(const StorageLocation &L) { 137*06c3fb27SDimitry Andric JOS.attribute("location", llvm::to_string(&L)); 138*06c3fb27SDimitry Andric if (!Visited.insert(&L).second) 139*06c3fb27SDimitry Andric return; 140*06c3fb27SDimitry Andric 141*06c3fb27SDimitry Andric JOS.attribute("type", L.getType().getAsString()); 142*06c3fb27SDimitry Andric if (auto *V = Env.getValue(L)) 143*06c3fb27SDimitry Andric dump(*V); 144*06c3fb27SDimitry Andric } 145*06c3fb27SDimitry Andric 146*06c3fb27SDimitry Andric llvm::DenseSet<const void*> Visited; 147*06c3fb27SDimitry Andric llvm::json::OStream &JOS; 148*06c3fb27SDimitry Andric const Environment &Env; 149*06c3fb27SDimitry Andric }; 150*06c3fb27SDimitry Andric 151*06c3fb27SDimitry Andric class HTMLLogger : public Logger { 152*06c3fb27SDimitry Andric StreamFactory Streams; 153*06c3fb27SDimitry Andric std::unique_ptr<llvm::raw_ostream> OS; 154*06c3fb27SDimitry Andric std::optional<llvm::json::OStream> JOS; 155*06c3fb27SDimitry Andric 156*06c3fb27SDimitry Andric const ControlFlowContext *CFG; 157*06c3fb27SDimitry Andric // Timeline of iterations of CFG block visitation. 158*06c3fb27SDimitry Andric std::vector<std::pair<const CFGBlock *, unsigned>> Iters; 159*06c3fb27SDimitry Andric // Number of times each CFG block has been seen. 160*06c3fb27SDimitry Andric llvm::DenseMap<const CFGBlock *, unsigned> BlockIters; 161*06c3fb27SDimitry Andric // The messages logged in the current context but not yet written. 162*06c3fb27SDimitry Andric std::string ContextLogs; 163*06c3fb27SDimitry Andric // The number of elements we have visited within the current CFG block. 164*06c3fb27SDimitry Andric unsigned ElementIndex; 165*06c3fb27SDimitry Andric 166*06c3fb27SDimitry Andric public: 167*06c3fb27SDimitry Andric explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} 168*06c3fb27SDimitry Andric void beginAnalysis(const ControlFlowContext &CFG, 169*06c3fb27SDimitry Andric TypeErasedDataflowAnalysis &A) override { 170*06c3fb27SDimitry Andric OS = Streams(); 171*06c3fb27SDimitry Andric this->CFG = &CFG; 172*06c3fb27SDimitry Andric *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; 173*06c3fb27SDimitry Andric 174*06c3fb27SDimitry Andric if (const auto *D = CFG.getDecl()) { 175*06c3fb27SDimitry Andric const auto &SM = A.getASTContext().getSourceManager(); 176*06c3fb27SDimitry Andric *OS << "<title>"; 177*06c3fb27SDimitry Andric if (const auto *ND = dyn_cast<NamedDecl>(D)) 178*06c3fb27SDimitry Andric *OS << ND->getNameAsString() << " at "; 179*06c3fb27SDimitry Andric *OS << SM.getFilename(D->getLocation()) << ":" 180*06c3fb27SDimitry Andric << SM.getSpellingLineNumber(D->getLocation()); 181*06c3fb27SDimitry Andric *OS << "</title>\n"; 182*06c3fb27SDimitry Andric }; 183*06c3fb27SDimitry Andric 184*06c3fb27SDimitry Andric *OS << "<style>" << HTMLLogger_css << "</style>\n"; 185*06c3fb27SDimitry Andric *OS << "<script>" << HTMLLogger_js << "</script>\n"; 186*06c3fb27SDimitry Andric 187*06c3fb27SDimitry Andric writeCode(); 188*06c3fb27SDimitry Andric writeCFG(); 189*06c3fb27SDimitry Andric 190*06c3fb27SDimitry Andric *OS << "<script>var HTMLLoggerData = \n"; 191*06c3fb27SDimitry Andric JOS.emplace(*OS, /*Indent=*/2); 192*06c3fb27SDimitry Andric JOS->objectBegin(); 193*06c3fb27SDimitry Andric JOS->attributeBegin("states"); 194*06c3fb27SDimitry Andric JOS->objectBegin(); 195*06c3fb27SDimitry Andric } 196*06c3fb27SDimitry Andric // Between beginAnalysis() and endAnalysis() we write all the states for 197*06c3fb27SDimitry Andric // particular analysis points into the `timeline` array. 198*06c3fb27SDimitry Andric void endAnalysis() override { 199*06c3fb27SDimitry Andric JOS->objectEnd(); 200*06c3fb27SDimitry Andric JOS->attributeEnd(); 201*06c3fb27SDimitry Andric 202*06c3fb27SDimitry Andric JOS->attributeArray("timeline", [&] { 203*06c3fb27SDimitry Andric for (const auto &E : Iters) { 204*06c3fb27SDimitry Andric JOS->object([&] { 205*06c3fb27SDimitry Andric JOS->attribute("block", blockID(E.first->getBlockID())); 206*06c3fb27SDimitry Andric JOS->attribute("iter", E.second); 207*06c3fb27SDimitry Andric }); 208*06c3fb27SDimitry Andric } 209*06c3fb27SDimitry Andric }); 210*06c3fb27SDimitry Andric JOS->attributeObject("cfg", [&] { 211*06c3fb27SDimitry Andric for (const auto &E : BlockIters) 212*06c3fb27SDimitry Andric writeBlock(*E.first, E.second); 213*06c3fb27SDimitry Andric }); 214*06c3fb27SDimitry Andric 215*06c3fb27SDimitry Andric JOS->objectEnd(); 216*06c3fb27SDimitry Andric JOS.reset(); 217*06c3fb27SDimitry Andric *OS << ";\n</script>\n"; 218*06c3fb27SDimitry Andric *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; 219*06c3fb27SDimitry Andric } 220*06c3fb27SDimitry Andric 221*06c3fb27SDimitry Andric void enterBlock(const CFGBlock &B) override { 222*06c3fb27SDimitry Andric Iters.emplace_back(&B, ++BlockIters[&B]); 223*06c3fb27SDimitry Andric ElementIndex = 0; 224*06c3fb27SDimitry Andric } 225*06c3fb27SDimitry Andric void enterElement(const CFGElement &E) override { 226*06c3fb27SDimitry Andric ++ElementIndex; 227*06c3fb27SDimitry Andric } 228*06c3fb27SDimitry Andric 229*06c3fb27SDimitry Andric static std::string blockID(unsigned Block) { 230*06c3fb27SDimitry Andric return llvm::formatv("B{0}", Block); 231*06c3fb27SDimitry Andric } 232*06c3fb27SDimitry Andric static std::string eltID(unsigned Block, unsigned Element) { 233*06c3fb27SDimitry Andric return llvm::formatv("B{0}.{1}", Block, Element); 234*06c3fb27SDimitry Andric } 235*06c3fb27SDimitry Andric static std::string iterID(unsigned Block, unsigned Iter) { 236*06c3fb27SDimitry Andric return llvm::formatv("B{0}:{1}", Block, Iter); 237*06c3fb27SDimitry Andric } 238*06c3fb27SDimitry Andric static std::string elementIterID(unsigned Block, unsigned Iter, 239*06c3fb27SDimitry Andric unsigned Element) { 240*06c3fb27SDimitry Andric return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); 241*06c3fb27SDimitry Andric } 242*06c3fb27SDimitry Andric 243*06c3fb27SDimitry Andric // Write the analysis state associated with a particular analysis point. 244*06c3fb27SDimitry Andric // FIXME: this dump is fairly opaque. We should show: 245*06c3fb27SDimitry Andric // - values associated with the current Stmt 246*06c3fb27SDimitry Andric // - values associated with its children 247*06c3fb27SDimitry Andric // - meaningful names for values 248*06c3fb27SDimitry Andric // - which boolean values are implied true/false by the flow condition 249*06c3fb27SDimitry Andric void recordState(TypeErasedDataflowAnalysisState &State) override { 250*06c3fb27SDimitry Andric unsigned Block = Iters.back().first->getBlockID(); 251*06c3fb27SDimitry Andric unsigned Iter = Iters.back().second; 252*06c3fb27SDimitry Andric JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { 253*06c3fb27SDimitry Andric JOS->attribute("block", blockID(Block)); 254*06c3fb27SDimitry Andric JOS->attribute("iter", Iter); 255*06c3fb27SDimitry Andric JOS->attribute("element", ElementIndex); 256*06c3fb27SDimitry Andric 257*06c3fb27SDimitry Andric // If this state immediately follows an Expr, show its built-in model. 258*06c3fb27SDimitry Andric if (ElementIndex > 0) { 259*06c3fb27SDimitry Andric auto S = 260*06c3fb27SDimitry Andric Iters.back().first->Elements[ElementIndex - 1].getAs<CFGStmt>(); 261*06c3fb27SDimitry Andric if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) 262*06c3fb27SDimitry Andric if (auto *Loc = State.Env.getStorageLocation(*E, SkipPast::None)) 263*06c3fb27SDimitry Andric JOS->attributeObject( 264*06c3fb27SDimitry Andric "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); }); 265*06c3fb27SDimitry Andric } 266*06c3fb27SDimitry Andric if (!ContextLogs.empty()) { 267*06c3fb27SDimitry Andric JOS->attribute("logs", ContextLogs); 268*06c3fb27SDimitry Andric ContextLogs.clear(); 269*06c3fb27SDimitry Andric } 270*06c3fb27SDimitry Andric { 271*06c3fb27SDimitry Andric std::string BuiltinLattice; 272*06c3fb27SDimitry Andric llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); 273*06c3fb27SDimitry Andric State.Env.dump(BuiltinLatticeS); 274*06c3fb27SDimitry Andric JOS->attribute("builtinLattice", BuiltinLattice); 275*06c3fb27SDimitry Andric } 276*06c3fb27SDimitry Andric }); 277*06c3fb27SDimitry Andric } 278*06c3fb27SDimitry Andric void blockConverged() override { logText("Block converged"); } 279*06c3fb27SDimitry Andric 280*06c3fb27SDimitry Andric void logText(llvm::StringRef S) override { 281*06c3fb27SDimitry Andric ContextLogs.append(S.begin(), S.end()); 282*06c3fb27SDimitry Andric ContextLogs.push_back('\n'); 283*06c3fb27SDimitry Andric } 284*06c3fb27SDimitry Andric 285*06c3fb27SDimitry Andric private: 286*06c3fb27SDimitry Andric // Write the CFG block details. 287*06c3fb27SDimitry Andric // Currently this is just the list of elements in execution order. 288*06c3fb27SDimitry Andric // FIXME: an AST dump would be a useful view, too. 289*06c3fb27SDimitry Andric void writeBlock(const CFGBlock &B, unsigned Iters) { 290*06c3fb27SDimitry Andric JOS->attributeObject(blockID(B.getBlockID()), [&] { 291*06c3fb27SDimitry Andric JOS->attribute("iters", Iters); 292*06c3fb27SDimitry Andric JOS->attributeArray("elements", [&] { 293*06c3fb27SDimitry Andric for (const auto &Elt : B.Elements) { 294*06c3fb27SDimitry Andric std::string Dump; 295*06c3fb27SDimitry Andric llvm::raw_string_ostream DumpS(Dump); 296*06c3fb27SDimitry Andric Elt.dumpToStream(DumpS); 297*06c3fb27SDimitry Andric JOS->value(Dump); 298*06c3fb27SDimitry Andric } 299*06c3fb27SDimitry Andric }); 300*06c3fb27SDimitry Andric }); 301*06c3fb27SDimitry Andric } 302*06c3fb27SDimitry Andric 303*06c3fb27SDimitry Andric // Write the code of function being examined. 304*06c3fb27SDimitry Andric // We want to overlay the code with <span>s that mark which BB particular 305*06c3fb27SDimitry Andric // tokens are associated with, and even which BB element (so that clicking 306*06c3fb27SDimitry Andric // can select the right element). 307*06c3fb27SDimitry Andric void writeCode() { 308*06c3fb27SDimitry Andric if (!CFG->getDecl()) 309*06c3fb27SDimitry Andric return; 310*06c3fb27SDimitry Andric const auto &AST = CFG->getDecl()->getASTContext(); 311*06c3fb27SDimitry Andric bool Invalid = false; 312*06c3fb27SDimitry Andric 313*06c3fb27SDimitry Andric // Extract the source code from the original file. 314*06c3fb27SDimitry Andric // Pretty-printing from the AST would probably be nicer (no macros or 315*06c3fb27SDimitry Andric // indentation to worry about), but we need the boundaries of particular 316*06c3fb27SDimitry Andric // AST nodes and the printer doesn't provide this. 317*06c3fb27SDimitry Andric auto Range = clang::Lexer::makeFileCharRange( 318*06c3fb27SDimitry Andric CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()), 319*06c3fb27SDimitry Andric AST.getSourceManager(), AST.getLangOpts()); 320*06c3fb27SDimitry Andric if (Range.isInvalid()) 321*06c3fb27SDimitry Andric return; 322*06c3fb27SDimitry Andric llvm::StringRef Code = clang::Lexer::getSourceText( 323*06c3fb27SDimitry Andric Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); 324*06c3fb27SDimitry Andric if (Invalid) 325*06c3fb27SDimitry Andric return; 326*06c3fb27SDimitry Andric 327*06c3fb27SDimitry Andric static constexpr unsigned Missing = -1; 328*06c3fb27SDimitry Andric // TokenInfo stores the BB and set of elements that a token is part of. 329*06c3fb27SDimitry Andric struct TokenInfo { 330*06c3fb27SDimitry Andric // The basic block this is part of. 331*06c3fb27SDimitry Andric // This is the BB of the stmt with the smallest containing range. 332*06c3fb27SDimitry Andric unsigned BB = Missing; 333*06c3fb27SDimitry Andric unsigned BBPriority = 0; 334*06c3fb27SDimitry Andric // The most specific stmt this is part of (smallest range). 335*06c3fb27SDimitry Andric unsigned Elt = Missing; 336*06c3fb27SDimitry Andric unsigned EltPriority = 0; 337*06c3fb27SDimitry Andric // All stmts this is part of. 338*06c3fb27SDimitry Andric SmallVector<unsigned> Elts; 339*06c3fb27SDimitry Andric 340*06c3fb27SDimitry Andric // Mark this token as being part of BB.Elt. 341*06c3fb27SDimitry Andric // RangeLen is the character length of the element's range, used to 342*06c3fb27SDimitry Andric // distinguish inner vs outer statements. 343*06c3fb27SDimitry Andric // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". 344*06c3fb27SDimitry Andric // However "a" has a smaller range, so is more specific. Clicking on the 345*06c3fb27SDimitry Andric // token "a" should select the stmt "a". 346*06c3fb27SDimitry Andric void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { 347*06c3fb27SDimitry Andric // A worse BB (larger range) => ignore. 348*06c3fb27SDimitry Andric if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) 349*06c3fb27SDimitry Andric return; 350*06c3fb27SDimitry Andric if (BB != this->BB) { 351*06c3fb27SDimitry Andric this->BB = BB; 352*06c3fb27SDimitry Andric Elts.clear(); 353*06c3fb27SDimitry Andric BBPriority = RangeLen; 354*06c3fb27SDimitry Andric } 355*06c3fb27SDimitry Andric BBPriority = std::min(BBPriority, RangeLen); 356*06c3fb27SDimitry Andric Elts.push_back(Elt); 357*06c3fb27SDimitry Andric if (this->Elt == Missing || EltPriority > RangeLen) 358*06c3fb27SDimitry Andric this->Elt = Elt; 359*06c3fb27SDimitry Andric } 360*06c3fb27SDimitry Andric bool operator==(const TokenInfo &Other) const { 361*06c3fb27SDimitry Andric return std::tie(BB, Elt, Elts) == 362*06c3fb27SDimitry Andric std::tie(Other.BB, Other.Elt, Other.Elts); 363*06c3fb27SDimitry Andric } 364*06c3fb27SDimitry Andric // Write the attributes for the <span> on this token. 365*06c3fb27SDimitry Andric void write(llvm::raw_ostream &OS) const { 366*06c3fb27SDimitry Andric OS << "class='c"; 367*06c3fb27SDimitry Andric if (BB != Missing) 368*06c3fb27SDimitry Andric OS << " " << blockID(BB); 369*06c3fb27SDimitry Andric for (unsigned Elt : Elts) 370*06c3fb27SDimitry Andric OS << " " << eltID(BB, Elt); 371*06c3fb27SDimitry Andric OS << "'"; 372*06c3fb27SDimitry Andric 373*06c3fb27SDimitry Andric if (Elt != Missing) 374*06c3fb27SDimitry Andric OS << " data-elt='" << eltID(BB, Elt) << "'"; 375*06c3fb27SDimitry Andric if (BB != Missing) 376*06c3fb27SDimitry Andric OS << " data-bb='" << blockID(BB) << "'"; 377*06c3fb27SDimitry Andric } 378*06c3fb27SDimitry Andric }; 379*06c3fb27SDimitry Andric 380*06c3fb27SDimitry Andric // Construct one TokenInfo per character in a flat array. 381*06c3fb27SDimitry Andric // This is inefficient (chars in a token all have the same info) but simple. 382*06c3fb27SDimitry Andric std::vector<TokenInfo> State(Code.size()); 383*06c3fb27SDimitry Andric for (const auto *Block : CFG->getCFG()) { 384*06c3fb27SDimitry Andric unsigned EltIndex = 0; 385*06c3fb27SDimitry Andric for (const auto& Elt : *Block) { 386*06c3fb27SDimitry Andric ++EltIndex; 387*06c3fb27SDimitry Andric if (const auto S = Elt.getAs<CFGStmt>()) { 388*06c3fb27SDimitry Andric auto EltRange = clang::Lexer::makeFileCharRange( 389*06c3fb27SDimitry Andric CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), 390*06c3fb27SDimitry Andric AST.getSourceManager(), AST.getLangOpts()); 391*06c3fb27SDimitry Andric if (EltRange.isInvalid()) 392*06c3fb27SDimitry Andric continue; 393*06c3fb27SDimitry Andric if (EltRange.getBegin() < Range.getBegin() || 394*06c3fb27SDimitry Andric EltRange.getEnd() >= Range.getEnd() || 395*06c3fb27SDimitry Andric EltRange.getEnd() < Range.getBegin() || 396*06c3fb27SDimitry Andric EltRange.getEnd() >= Range.getEnd()) 397*06c3fb27SDimitry Andric continue; 398*06c3fb27SDimitry Andric unsigned Off = EltRange.getBegin().getRawEncoding() - 399*06c3fb27SDimitry Andric Range.getBegin().getRawEncoding(); 400*06c3fb27SDimitry Andric unsigned Len = EltRange.getEnd().getRawEncoding() - 401*06c3fb27SDimitry Andric EltRange.getBegin().getRawEncoding(); 402*06c3fb27SDimitry Andric for (unsigned I = 0; I < Len; ++I) 403*06c3fb27SDimitry Andric State[Off + I].assign(Block->getBlockID(), EltIndex, Len); 404*06c3fb27SDimitry Andric } 405*06c3fb27SDimitry Andric } 406*06c3fb27SDimitry Andric } 407*06c3fb27SDimitry Andric 408*06c3fb27SDimitry Andric // Finally, write the code with the correct <span>s. 409*06c3fb27SDimitry Andric unsigned Line = 410*06c3fb27SDimitry Andric AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); 411*06c3fb27SDimitry Andric *OS << "<template data-copy='code'>\n"; 412*06c3fb27SDimitry Andric *OS << "<code class='filename'>"; 413*06c3fb27SDimitry Andric llvm::printHTMLEscaped( 414*06c3fb27SDimitry Andric llvm::sys::path::filename( 415*06c3fb27SDimitry Andric AST.getSourceManager().getFilename(Range.getBegin())), 416*06c3fb27SDimitry Andric *OS); 417*06c3fb27SDimitry Andric *OS << "</code>"; 418*06c3fb27SDimitry Andric *OS << "<code class='line' data-line='" << Line++ << "'>"; 419*06c3fb27SDimitry Andric for (unsigned I = 0; I < Code.size(); ++I) { 420*06c3fb27SDimitry Andric // Don't actually write a <span> around each character, only break spans 421*06c3fb27SDimitry Andric // when the TokenInfo changes. 422*06c3fb27SDimitry Andric bool NeedOpen = I == 0 || !(State[I] == State[I-1]); 423*06c3fb27SDimitry Andric bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); 424*06c3fb27SDimitry Andric if (NeedOpen) { 425*06c3fb27SDimitry Andric *OS << "<span "; 426*06c3fb27SDimitry Andric State[I].write(*OS); 427*06c3fb27SDimitry Andric *OS << ">"; 428*06c3fb27SDimitry Andric } 429*06c3fb27SDimitry Andric if (Code[I] == '\n') 430*06c3fb27SDimitry Andric *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; 431*06c3fb27SDimitry Andric else 432*06c3fb27SDimitry Andric llvm::printHTMLEscaped(Code.substr(I, 1), *OS); 433*06c3fb27SDimitry Andric if (NeedClose) *OS << "</span>"; 434*06c3fb27SDimitry Andric } 435*06c3fb27SDimitry Andric *OS << "</code>\n"; 436*06c3fb27SDimitry Andric *OS << "</template>"; 437*06c3fb27SDimitry Andric } 438*06c3fb27SDimitry Andric 439*06c3fb27SDimitry Andric // Write the CFG diagram, a graph of basic blocks. 440*06c3fb27SDimitry Andric // Laying out graphs is hard, so we construct a graphviz description and shell 441*06c3fb27SDimitry Andric // out to `dot` to turn it into an SVG. 442*06c3fb27SDimitry Andric void writeCFG() { 443*06c3fb27SDimitry Andric *OS << "<template data-copy='cfg'>\n"; 444*06c3fb27SDimitry Andric if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG()))) 445*06c3fb27SDimitry Andric *OS << *SVG; 446*06c3fb27SDimitry Andric else 447*06c3fb27SDimitry Andric *OS << "Can't draw CFG: " << toString(SVG.takeError()); 448*06c3fb27SDimitry Andric *OS << "</template>\n"; 449*06c3fb27SDimitry Andric } 450*06c3fb27SDimitry Andric 451*06c3fb27SDimitry Andric // Produce a graphviz description of a CFG. 452*06c3fb27SDimitry Andric static std::string buildCFGDot(const clang::CFG &CFG) { 453*06c3fb27SDimitry Andric std::string Graph; 454*06c3fb27SDimitry Andric llvm::raw_string_ostream GraphS(Graph); 455*06c3fb27SDimitry Andric // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. 456*06c3fb27SDimitry Andric GraphS << R"(digraph { 457*06c3fb27SDimitry Andric tooltip=" " 458*06c3fb27SDimitry Andric node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] 459*06c3fb27SDimitry Andric edge[tooltip = " "] 460*06c3fb27SDimitry Andric )"; 461*06c3fb27SDimitry Andric for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) 462*06c3fb27SDimitry Andric GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n"; 463*06c3fb27SDimitry Andric for (const auto *Block : CFG) { 464*06c3fb27SDimitry Andric for (const auto &Succ : Block->succs()) { 465*06c3fb27SDimitry Andric GraphS << " " << blockID(Block->getBlockID()) << " -> " 466*06c3fb27SDimitry Andric << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; 467*06c3fb27SDimitry Andric } 468*06c3fb27SDimitry Andric } 469*06c3fb27SDimitry Andric GraphS << "}\n"; 470*06c3fb27SDimitry Andric return Graph; 471*06c3fb27SDimitry Andric } 472*06c3fb27SDimitry Andric }; 473*06c3fb27SDimitry Andric 474*06c3fb27SDimitry Andric // Nothing interesting here, just subprocess/temp-file plumbing. 475*06c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { 476*06c3fb27SDimitry Andric std::string DotPath; 477*06c3fb27SDimitry Andric if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) 478*06c3fb27SDimitry Andric DotPath = FromEnv; 479*06c3fb27SDimitry Andric else { 480*06c3fb27SDimitry Andric auto FromPath = llvm::sys::findProgramByName("dot"); 481*06c3fb27SDimitry Andric if (!FromPath) 482*06c3fb27SDimitry Andric return llvm::createStringError(FromPath.getError(), 483*06c3fb27SDimitry Andric "'dot' not found on PATH"); 484*06c3fb27SDimitry Andric DotPath = FromPath.get(); 485*06c3fb27SDimitry Andric } 486*06c3fb27SDimitry Andric 487*06c3fb27SDimitry Andric // Create input and output files for `dot` subprocess. 488*06c3fb27SDimitry Andric // (We create the output file as empty, to reserve the temp filename). 489*06c3fb27SDimitry Andric llvm::SmallString<256> Input, Output; 490*06c3fb27SDimitry Andric int InputFD; 491*06c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, 492*06c3fb27SDimitry Andric Input)) 493*06c3fb27SDimitry Andric return llvm::createStringError(EC, "failed to create `dot` temp input"); 494*06c3fb27SDimitry Andric llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; 495*06c3fb27SDimitry Andric auto DeleteInput = 496*06c3fb27SDimitry Andric llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); 497*06c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) 498*06c3fb27SDimitry Andric return llvm::createStringError(EC, "failed to create `dot` temp output"); 499*06c3fb27SDimitry Andric auto DeleteOutput = 500*06c3fb27SDimitry Andric llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); 501*06c3fb27SDimitry Andric 502*06c3fb27SDimitry Andric std::vector<std::optional<llvm::StringRef>> Redirects = { 503*06c3fb27SDimitry Andric Input, Output, 504*06c3fb27SDimitry Andric /*stderr=*/std::nullopt}; 505*06c3fb27SDimitry Andric std::string ErrMsg; 506*06c3fb27SDimitry Andric int Code = llvm::sys::ExecuteAndWait( 507*06c3fb27SDimitry Andric DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, 508*06c3fb27SDimitry Andric /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); 509*06c3fb27SDimitry Andric if (!ErrMsg.empty()) 510*06c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 511*06c3fb27SDimitry Andric "'dot' failed: " + ErrMsg); 512*06c3fb27SDimitry Andric if (Code != 0) 513*06c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 514*06c3fb27SDimitry Andric "'dot' failed (" + llvm::Twine(Code) + ")"); 515*06c3fb27SDimitry Andric 516*06c3fb27SDimitry Andric auto Buf = llvm::MemoryBuffer::getFile(Output); 517*06c3fb27SDimitry Andric if (!Buf) 518*06c3fb27SDimitry Andric return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); 519*06c3fb27SDimitry Andric 520*06c3fb27SDimitry Andric // Output has <?xml> prefix we don't want. Skip to <svg> tag. 521*06c3fb27SDimitry Andric llvm::StringRef Result = Buf.get()->getBuffer(); 522*06c3fb27SDimitry Andric auto Pos = Result.find("<svg"); 523*06c3fb27SDimitry Andric if (Pos == llvm::StringRef::npos) 524*06c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 525*06c3fb27SDimitry Andric "Can't find <svg> tag in `dot` output"); 526*06c3fb27SDimitry Andric return Result.substr(Pos).str(); 527*06c3fb27SDimitry Andric } 528*06c3fb27SDimitry Andric 529*06c3fb27SDimitry Andric } // namespace 530*06c3fb27SDimitry Andric 531*06c3fb27SDimitry Andric std::unique_ptr<Logger> 532*06c3fb27SDimitry Andric Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { 533*06c3fb27SDimitry Andric return std::make_unique<HTMLLogger>(std::move(Streams)); 534*06c3fb27SDimitry Andric } 535*06c3fb27SDimitry Andric 536*06c3fb27SDimitry Andric } // namespace clang::dataflow 537