1 //===- Markup.h -------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file declares the log symbolizer markup data model and parser. 11 /// 12 /// See https://llvm.org/docs/SymbolizerMarkupFormat.html 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H 17 #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H 18 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/Support/Compiler.h" 23 #include "llvm/Support/Regex.h" 24 25 namespace llvm { 26 namespace symbolize { 27 28 /// A node of symbolizer markup. 29 /// 30 /// If only the Text field is set, this represents a region of text outside a 31 /// markup element. ANSI SGR control codes are also reported this way; if 32 /// detected, then the control code will be the entirety of the Text field, and 33 /// any surrounding text will be reported as preceding and following nodes. 34 struct MarkupNode { 35 /// The full text of this node in the input. 36 StringRef Text; 37 38 /// If this represents an element, the tag. Otherwise, empty. 39 StringRef Tag; 40 41 /// If this represents an element with fields, a list of the field contents. 42 /// Otherwise, empty. 43 SmallVector<StringRef> Fields; 44 45 bool operator==(const MarkupNode &Other) const { 46 return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields; 47 } 48 bool operator!=(const MarkupNode &Other) const { return !(*this == Other); } 49 }; 50 51 /// Parses a log containing symbolizer markup into a sequence of nodes. 52 class MarkupParser { 53 public: 54 LLVM_ABI MarkupParser(StringSet<> MultilineTags = {}); 55 56 /// Parses an individual \p Line of input. 57 /// 58 /// Nodes from the previous parseLine() call that haven't yet been extracted 59 /// by nextNode() are discarded. The nodes returned by nextNode() may 60 /// reference the input string, so it must be retained by the caller until the 61 /// last use. 62 /// 63 /// Note that some elements may span multiple lines. If a line ends with the 64 /// start of one of these elements, then no nodes will be produced until the 65 /// either the end or something that cannot be part of an element is 66 /// encountered. This may only occur after multiple calls to parseLine(), 67 /// corresponding to the lines of the multi-line element. 68 LLVM_ABI void parseLine(StringRef Line); 69 70 /// Inform the parser of that the input stream has ended. 71 /// 72 /// This allows the parser to finish any deferred processing (e.g., an 73 /// in-progress multi-line element) and may cause nextNode() to return 74 /// additional nodes. 75 LLVM_ABI void flush(); 76 77 /// Returns the next node in the input sequence. 78 /// 79 /// Calling nextNode() may invalidate the contents of the node returned by the 80 /// previous call. 81 /// 82 /// \returns the next markup node or std::nullopt if none remain. 83 LLVM_ABI std::optional<MarkupNode> nextNode(); 84 isSGR(const MarkupNode & Node)85 bool isSGR(const MarkupNode &Node) const { 86 return SGRSyntax.match(Node.Text); 87 } 88 89 private: 90 std::optional<MarkupNode> parseElement(StringRef Line); 91 void parseTextOutsideMarkup(StringRef Text); 92 std::optional<StringRef> parseMultiLineBegin(StringRef Line); 93 std::optional<StringRef> parseMultiLineEnd(StringRef Line); 94 95 // Tags of elements that can span multiple lines. 96 const StringSet<> MultilineTags; 97 98 // Contents of a multi-line element that has finished being parsed. Retained 99 // to keep returned StringRefs for the contents valid. 100 std::string FinishedMultiline; 101 102 // Contents of a multi-line element that is still in the process of receiving 103 // lines. 104 std::string InProgressMultiline; 105 106 // The line currently being parsed. 107 StringRef Line; 108 109 // Buffer for nodes parsed from the current line. 110 SmallVector<MarkupNode> Buffer; 111 112 // Next buffer index to return. 113 size_t NextIdx; 114 115 // Regular expression matching supported ANSI SGR escape sequences. 116 const Regex SGRSyntax; 117 }; 118 119 } // end namespace symbolize 120 } // end namespace llvm 121 122 #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H 123