//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the log symbolizer markup data model and parser. /// //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/Symbolize/Markup.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" namespace llvm { namespace symbolize { // Matches the following: // "\033[0m" // "\033[1m" // "\033[30m" -- "\033[37m" static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; MarkupParser::MarkupParser(StringSet<> MultilineTags) : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { return Str.take_front(Pos - Str.begin()); } static void advanceTo(StringRef &Str, StringRef::iterator Pos) { Str = Str.drop_front(Pos - Str.begin()); } void MarkupParser::parseLine(StringRef Line) { Buffer.clear(); NextIdx = 0; FinishedMultiline.clear(); this->Line = Line; } Optional MarkupParser::nextNode() { // Pull something out of the buffer if possible. if (!Buffer.empty()) { if (NextIdx < Buffer.size()) return std::move(Buffer[NextIdx++]); NextIdx = 0; Buffer.clear(); } // The buffer is empty, so parse the next bit of the line. if (Line.empty()) return None; if (!InProgressMultiline.empty()) { if (Optional MultilineEnd = parseMultiLineEnd(Line)) { llvm::append_range(InProgressMultiline, *MultilineEnd); assert(FinishedMultiline.empty() && "At most one multi-line element can be finished at a time."); FinishedMultiline.swap(InProgressMultiline); // Parse the multi-line element as if it were contiguous. advanceTo(Line, MultilineEnd->end()); return *parseElement(FinishedMultiline); } // The whole line is part of the multi-line element. llvm::append_range(InProgressMultiline, Line); Line = Line.drop_front(Line.size()); return None; } // Find the first valid markup element, if any. if (Optional Element = parseElement(Line)) { parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); Buffer.push_back(std::move(*Element)); advanceTo(Line, Element->Text.end()); return nextNode(); } // Since there were no valid elements remaining, see if the line opens a // multi-line element. if (Optional MultilineBegin = parseMultiLineBegin(Line)) { // Emit any text before the element. parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); // Begin recording the multi-line element. llvm::append_range(InProgressMultiline, *MultilineBegin); Line = Line.drop_front(Line.size()); return nextNode(); } // The line doesn't contain any more markup elements, so emit it as text. parseTextOutsideMarkup(Line); Line = Line.drop_front(Line.size()); return nextNode(); } void MarkupParser::flush() { Buffer.clear(); NextIdx = 0; Line = {}; if (InProgressMultiline.empty()) return; FinishedMultiline.swap(InProgressMultiline); parseTextOutsideMarkup(FinishedMultiline); } // Finds and returns the next valid markup element in the given line. Returns // None if the line contains no valid elements. Optional MarkupParser::parseElement(StringRef Line) { while (true) { // Find next element using begin and end markers. size_t BeginPos = Line.find("{{{"); if (BeginPos == StringRef::npos) return None; size_t EndPos = Line.find("}}}", BeginPos + 3); if (EndPos == StringRef::npos) return None; EndPos += 3; MarkupNode Element; Element.Text = Line.slice(BeginPos, EndPos); Line = Line.substr(EndPos); // Parse tag. StringRef Content = Element.Text.drop_front(3).drop_back(3); StringRef FieldsContent; std::tie(Element.Tag, FieldsContent) = Content.split(':'); if (Element.Tag.empty()) continue; // Parse fields. if (!FieldsContent.empty()) FieldsContent.split(Element.Fields, ":"); else if (Content.back() == ':') Element.Fields.push_back(FieldsContent); return Element; } } static MarkupNode textNode(StringRef Text) { MarkupNode Node; Node.Text = Text; return Node; } // Parses a region of text known to be outside any markup elements. Such text // may still contain SGR control codes, so the region is further subdivided into // control codes and true text regions. void MarkupParser::parseTextOutsideMarkup(StringRef Text) { if (Text.empty()) return; SmallVector Matches; while (SGRSyntax.match(Text, &Matches)) { // Emit any text before the SGR element. if (Matches.begin()->begin() != Text.begin()) Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); Buffer.push_back(textNode(*Matches.begin())); advanceTo(Text, Matches.begin()->end()); } if (!Text.empty()) Buffer.push_back(textNode(Text)); } // Given that a line doesn't contain any valid markup, see if it ends with the // start of a multi-line element. If so, returns the beginning. Optional MarkupParser::parseMultiLineBegin(StringRef Line) { // A multi-line begin marker must be the last one on the line. size_t BeginPos = Line.rfind("{{{"); if (BeginPos == StringRef::npos) return None; size_t BeginTagPos = BeginPos + 3; // If there are any end markers afterwards, the begin marker cannot belong to // a multi-line element. size_t EndPos = Line.find("}}}", BeginTagPos); if (EndPos != StringRef::npos) return None; // Check whether the tag is registered multi-line. size_t EndTagPos = Line.find(':', BeginTagPos); if (EndTagPos == StringRef::npos) return None; StringRef Tag = Line.slice(BeginTagPos, EndTagPos); if (!MultilineTags.contains(Tag)) return None; return Line.substr(BeginPos); } // See if the line begins with the ending of an in-progress multi-line element. // If so, return the ending. Optional MarkupParser::parseMultiLineEnd(StringRef Line) { size_t EndPos = Line.find("}}}"); if (EndPos == StringRef::npos) return None; return Line.take_front(EndPos + 3); } } // end namespace symbolize } // end namespace llvm