1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the log symbolizer markup data model and parser. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/DebugInfo/Symbolize/Markup.h" 15 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/StringExtras.h" 18 19 namespace llvm { 20 namespace symbolize { 21 22 // Matches the following: 23 // "\033[0m" 24 // "\033[1m" 25 // "\033[30m" -- "\033[37m" 26 static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; 27 28 MarkupParser::MarkupParser(StringSet<> MultilineTags) 29 : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} 30 31 static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { 32 return Str.take_front(Pos - Str.begin()); 33 } 34 static void advanceTo(StringRef &Str, StringRef::iterator Pos) { 35 Str = Str.drop_front(Pos - Str.begin()); 36 } 37 38 void MarkupParser::parseLine(StringRef Line) { 39 Buffer.clear(); 40 NextIdx = 0; 41 FinishedMultiline.clear(); 42 this->Line = Line; 43 } 44 45 std::optional<MarkupNode> MarkupParser::nextNode() { 46 // Pull something out of the buffer if possible. 47 if (!Buffer.empty()) { 48 if (NextIdx < Buffer.size()) 49 return std::move(Buffer[NextIdx++]); 50 NextIdx = 0; 51 Buffer.clear(); 52 } 53 54 // The buffer is empty, so parse the next bit of the line. 55 56 if (Line.empty()) 57 return std::nullopt; 58 59 if (!InProgressMultiline.empty()) { 60 if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { 61 llvm::append_range(InProgressMultiline, *MultilineEnd); 62 assert(FinishedMultiline.empty() && 63 "At most one multi-line element can be finished at a time."); 64 FinishedMultiline.swap(InProgressMultiline); 65 // Parse the multi-line element as if it were contiguous. 66 advanceTo(Line, MultilineEnd->end()); 67 return *parseElement(FinishedMultiline); 68 } 69 70 // The whole line is part of the multi-line element. 71 llvm::append_range(InProgressMultiline, Line); 72 Line = Line.drop_front(Line.size()); 73 return std::nullopt; 74 } 75 76 // Find the first valid markup element, if any. 77 if (std::optional<MarkupNode> Element = parseElement(Line)) { 78 parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); 79 Buffer.push_back(std::move(*Element)); 80 advanceTo(Line, Element->Text.end()); 81 return nextNode(); 82 } 83 84 // Since there were no valid elements remaining, see if the line opens a 85 // multi-line element. 86 if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { 87 // Emit any text before the element. 88 parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); 89 90 // Begin recording the multi-line element. 91 llvm::append_range(InProgressMultiline, *MultilineBegin); 92 Line = Line.drop_front(Line.size()); 93 return nextNode(); 94 } 95 96 // The line doesn't contain any more markup elements, so emit it as text. 97 parseTextOutsideMarkup(Line); 98 Line = Line.drop_front(Line.size()); 99 return nextNode(); 100 } 101 102 void MarkupParser::flush() { 103 Buffer.clear(); 104 NextIdx = 0; 105 Line = {}; 106 if (InProgressMultiline.empty()) 107 return; 108 FinishedMultiline.swap(InProgressMultiline); 109 parseTextOutsideMarkup(FinishedMultiline); 110 } 111 112 // Finds and returns the next valid markup element in the given line. Returns 113 // std::nullopt if the line contains no valid elements. 114 std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { 115 while (true) { 116 // Find next element using begin and end markers. 117 size_t BeginPos = Line.find("{{{"); 118 if (BeginPos == StringRef::npos) 119 return std::nullopt; 120 size_t EndPos = Line.find("}}}", BeginPos + 3); 121 if (EndPos == StringRef::npos) 122 return std::nullopt; 123 EndPos += 3; 124 MarkupNode Element; 125 Element.Text = Line.slice(BeginPos, EndPos); 126 Line = Line.substr(EndPos); 127 128 // Parse tag. 129 StringRef Content = Element.Text.drop_front(3).drop_back(3); 130 StringRef FieldsContent; 131 std::tie(Element.Tag, FieldsContent) = Content.split(':'); 132 if (Element.Tag.empty()) 133 continue; 134 135 // Parse fields. 136 if (!FieldsContent.empty()) 137 FieldsContent.split(Element.Fields, ":"); 138 else if (Content.back() == ':') 139 Element.Fields.push_back(FieldsContent); 140 141 return Element; 142 } 143 } 144 145 static MarkupNode textNode(StringRef Text) { 146 MarkupNode Node; 147 Node.Text = Text; 148 return Node; 149 } 150 151 // Parses a region of text known to be outside any markup elements. Such text 152 // may still contain SGR control codes, so the region is further subdivided into 153 // control codes and true text regions. 154 void MarkupParser::parseTextOutsideMarkup(StringRef Text) { 155 if (Text.empty()) 156 return; 157 SmallVector<StringRef> Matches; 158 while (SGRSyntax.match(Text, &Matches)) { 159 // Emit any text before the SGR element. 160 if (Matches.begin()->begin() != Text.begin()) 161 Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); 162 163 Buffer.push_back(textNode(*Matches.begin())); 164 advanceTo(Text, Matches.begin()->end()); 165 } 166 if (!Text.empty()) 167 Buffer.push_back(textNode(Text)); 168 } 169 170 // Given that a line doesn't contain any valid markup, see if it ends with the 171 // start of a multi-line element. If so, returns the beginning. 172 std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { 173 // A multi-line begin marker must be the last one on the line. 174 size_t BeginPos = Line.rfind("{{{"); 175 if (BeginPos == StringRef::npos) 176 return std::nullopt; 177 size_t BeginTagPos = BeginPos + 3; 178 179 // If there are any end markers afterwards, the begin marker cannot belong to 180 // a multi-line element. 181 size_t EndPos = Line.find("}}}", BeginTagPos); 182 if (EndPos != StringRef::npos) 183 return std::nullopt; 184 185 // Check whether the tag is registered multi-line. 186 size_t EndTagPos = Line.find(':', BeginTagPos); 187 if (EndTagPos == StringRef::npos) 188 return std::nullopt; 189 StringRef Tag = Line.slice(BeginTagPos, EndTagPos); 190 if (!MultilineTags.contains(Tag)) 191 return std::nullopt; 192 return Line.substr(BeginPos); 193 } 194 195 // See if the line begins with the ending of an in-progress multi-line element. 196 // If so, return the ending. 197 std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { 198 size_t EndPos = Line.find("}}}"); 199 if (EndPos == StringRef::npos) 200 return std::nullopt; 201 return Line.take_front(EndPos + 3); 202 } 203 204 } // end namespace symbolize 205 } // end namespace llvm 206