xref: /freebsd/contrib/llvm-project/lldb/source/ValueObject/DILLexer.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- DILLexer.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // This implements the recursive descent parser for the Data Inspection
8 // Language (DIL), and its helper functions, which will eventually underlie the
9 // 'frame variable' command. The language that this parser recognizes is
10 // described in lldb/docs/dil-expr-lang.ebnf
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "lldb/ValueObject/DILLexer.h"
15 #include "lldb/Utility/Status.h"
16 #include "lldb/ValueObject/DILParser.h"
17 #include "llvm/ADT/StringSwitch.h"
18 
19 namespace lldb_private::dil {
20 
GetTokenName(Kind kind)21 llvm::StringRef Token::GetTokenName(Kind kind) {
22   switch (kind) {
23   case Kind::amp:
24     return "amp";
25   case Kind::arrow:
26     return "arrow";
27   case Kind::coloncolon:
28     return "coloncolon";
29   case Kind::eof:
30     return "eof";
31   case Kind::identifier:
32     return "identifier";
33   case Kind::l_paren:
34     return "l_paren";
35   case Kind::l_square:
36     return "l_square";
37   case Kind::minus:
38     return "minus";
39   case Kind::numeric_constant:
40     return "numeric_constant";
41   case Kind::period:
42     return "period";
43   case Kind::r_paren:
44     return "r_paren";
45   case Kind::r_square:
46     return "r_square";
47   case Token::star:
48     return "star";
49   }
50   llvm_unreachable("Unknown token name");
51 }
52 
IsLetter(char c)53 static bool IsLetter(char c) {
54   return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
55 }
56 
IsDigit(char c)57 static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
58 
59 // A word starts with a letter, underscore, or dollar sign, followed by
60 // letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or  underscores.
IsWord(llvm::StringRef expr,llvm::StringRef & remainder)61 static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
62                                              llvm::StringRef &remainder) {
63   // Find the longest prefix consisting of letters, digits, underscors and
64   // '$'. If it doesn't start with a digit, then it's a word.
65   llvm::StringRef candidate = remainder.take_while(
66       [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; });
67   if (candidate.empty() || IsDigit(candidate[0]))
68     return std::nullopt;
69   remainder = remainder.drop_front(candidate.size());
70   return candidate;
71 }
72 
IsNumberBodyChar(char ch)73 static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
74 
IsNumber(llvm::StringRef expr,llvm::StringRef & remainder)75 static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
76                                                llvm::StringRef &remainder) {
77   if (IsDigit(remainder[0])) {
78     llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
79     remainder = remainder.drop_front(number.size());
80     return number;
81   }
82   return std::nullopt;
83 }
84 
Create(llvm::StringRef expr)85 llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
86   std::vector<Token> tokens;
87   llvm::StringRef remainder = expr;
88   do {
89     if (llvm::Expected<Token> t = Lex(expr, remainder)) {
90       tokens.push_back(std::move(*t));
91     } else {
92       return t.takeError();
93     }
94   } while (tokens.back().GetKind() != Token::eof);
95   return DILLexer(expr, std::move(tokens));
96 }
97 
Lex(llvm::StringRef expr,llvm::StringRef & remainder)98 llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
99                                     llvm::StringRef &remainder) {
100   // Skip over whitespace (spaces).
101   remainder = remainder.ltrim();
102   llvm::StringRef::iterator cur_pos = remainder.begin();
103 
104   // Check to see if we've reached the end of our input string.
105   if (remainder.empty())
106     return Token(Token::eof, "", (uint32_t)expr.size());
107 
108   uint32_t position = cur_pos - expr.begin();
109   std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
110   if (maybe_number)
111     return Token(Token::numeric_constant, maybe_number->str(), position);
112   std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
113   if (maybe_word)
114     return Token(Token::identifier, maybe_word->str(), position);
115 
116   constexpr std::pair<Token::Kind, const char *> operators[] = {
117       {Token::amp, "&"},     {Token::arrow, "->"},   {Token::coloncolon, "::"},
118       {Token::l_paren, "("}, {Token::l_square, "["}, {Token::minus, "-"},
119       {Token::period, "."},  {Token::r_paren, ")"},  {Token::r_square, "]"},
120       {Token::star, "*"},
121   };
122   for (auto [kind, str] : operators) {
123     if (remainder.consume_front(str))
124       return Token(kind, str, position);
125   }
126 
127   // Unrecognized character(s) in string; unable to lex it.
128   return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
129                                               position);
130 }
131 
132 } // namespace lldb_private::dil
133