xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Markup.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Markup.h -------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file declares the log symbolizer markup data model and parser.
11 ///
12 /// See https://llvm.org/docs/SymbolizerMarkupFormat.html
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
17 #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
18 
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/Support/Regex.h"
24 
25 namespace llvm {
26 namespace symbolize {
27 
28 /// A node of symbolizer markup.
29 ///
30 /// If only the Text field is set, this represents a region of text outside a
31 /// markup element. ANSI SGR control codes are also reported this way; if
32 /// detected, then the control code will be the entirety of the Text field, and
33 /// any surrounding text will be reported as preceding and following nodes.
34 struct MarkupNode {
35   /// The full text of this node in the input.
36   StringRef Text;
37 
38   /// If this represents an element, the tag. Otherwise, empty.
39   StringRef Tag;
40 
41   /// If this represents an element with fields, a list of the field contents.
42   /// Otherwise, empty.
43   SmallVector<StringRef> Fields;
44 
45   bool operator==(const MarkupNode &Other) const {
46     return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
47   }
48   bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
49 };
50 
51 /// Parses a log containing symbolizer markup into a sequence of nodes.
52 class MarkupParser {
53 public:
54   LLVM_ABI MarkupParser(StringSet<> MultilineTags = {});
55 
56   /// Parses an individual \p Line of input.
57   ///
58   /// Nodes from the previous parseLine() call that haven't yet been extracted
59   /// by nextNode() are discarded. The nodes returned by nextNode() may
60   /// reference the input string, so it must be retained by the caller until the
61   /// last use.
62   ///
63   /// Note that some elements may span multiple lines. If a line ends with the
64   /// start of one of these elements, then no nodes will be produced until the
65   /// either the end or something that cannot be part of an element is
66   /// encountered. This may only occur after multiple calls to parseLine(),
67   /// corresponding to the lines of the multi-line element.
68   LLVM_ABI void parseLine(StringRef Line);
69 
70   /// Inform the parser of that the input stream has ended.
71   ///
72   /// This allows the parser to finish any deferred processing (e.g., an
73   /// in-progress multi-line element) and may cause nextNode() to return
74   /// additional nodes.
75   LLVM_ABI void flush();
76 
77   /// Returns the next node in the input sequence.
78   ///
79   /// Calling nextNode() may invalidate the contents of the node returned by the
80   /// previous call.
81   ///
82   /// \returns the next markup node or std::nullopt if none remain.
83   LLVM_ABI std::optional<MarkupNode> nextNode();
84 
isSGR(const MarkupNode & Node)85   bool isSGR(const MarkupNode &Node) const {
86     return SGRSyntax.match(Node.Text);
87   }
88 
89 private:
90   std::optional<MarkupNode> parseElement(StringRef Line);
91   void parseTextOutsideMarkup(StringRef Text);
92   std::optional<StringRef> parseMultiLineBegin(StringRef Line);
93   std::optional<StringRef> parseMultiLineEnd(StringRef Line);
94 
95   // Tags of elements that can span multiple lines.
96   const StringSet<> MultilineTags;
97 
98   // Contents of a multi-line element that has finished being parsed. Retained
99   // to keep returned StringRefs for the contents valid.
100   std::string FinishedMultiline;
101 
102   // Contents of a multi-line element that is still in the process of receiving
103   // lines.
104   std::string InProgressMultiline;
105 
106   // The line currently being parsed.
107   StringRef Line;
108 
109   // Buffer for nodes parsed from the current line.
110   SmallVector<MarkupNode> Buffer;
111 
112   // Next buffer index to return.
113   size_t NextIdx;
114 
115   // Regular expression matching supported ANSI SGR escape sequences.
116   const Regex SGRSyntax;
117 };
118 
119 } // end namespace symbolize
120 } // end namespace llvm
121 
122 #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
123