1 //===-- DILLexer.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // This implements the recursive descent parser for the Data Inspection
8 // Language (DIL), and its helper functions, which will eventually underlie the
9 // 'frame variable' command. The language that this parser recognizes is
10 // described in lldb/docs/dil-expr-lang.ebnf
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "lldb/ValueObject/DILLexer.h"
15 #include "lldb/Utility/Status.h"
16 #include "lldb/ValueObject/DILParser.h"
17 #include "llvm/ADT/StringSwitch.h"
18
19 namespace lldb_private::dil {
20
GetTokenName(Kind kind)21 llvm::StringRef Token::GetTokenName(Kind kind) {
22 switch (kind) {
23 case Kind::amp:
24 return "amp";
25 case Kind::arrow:
26 return "arrow";
27 case Kind::coloncolon:
28 return "coloncolon";
29 case Kind::eof:
30 return "eof";
31 case Kind::identifier:
32 return "identifier";
33 case Kind::l_paren:
34 return "l_paren";
35 case Kind::l_square:
36 return "l_square";
37 case Kind::minus:
38 return "minus";
39 case Kind::numeric_constant:
40 return "numeric_constant";
41 case Kind::period:
42 return "period";
43 case Kind::r_paren:
44 return "r_paren";
45 case Kind::r_square:
46 return "r_square";
47 case Token::star:
48 return "star";
49 }
50 llvm_unreachable("Unknown token name");
51 }
52
IsLetter(char c)53 static bool IsLetter(char c) {
54 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
55 }
56
IsDigit(char c)57 static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
58
59 // A word starts with a letter, underscore, or dollar sign, followed by
60 // letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
IsWord(llvm::StringRef expr,llvm::StringRef & remainder)61 static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
62 llvm::StringRef &remainder) {
63 // Find the longest prefix consisting of letters, digits, underscors and
64 // '$'. If it doesn't start with a digit, then it's a word.
65 llvm::StringRef candidate = remainder.take_while(
66 [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; });
67 if (candidate.empty() || IsDigit(candidate[0]))
68 return std::nullopt;
69 remainder = remainder.drop_front(candidate.size());
70 return candidate;
71 }
72
IsNumberBodyChar(char ch)73 static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
74
IsNumber(llvm::StringRef expr,llvm::StringRef & remainder)75 static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
76 llvm::StringRef &remainder) {
77 if (IsDigit(remainder[0])) {
78 llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
79 remainder = remainder.drop_front(number.size());
80 return number;
81 }
82 return std::nullopt;
83 }
84
Create(llvm::StringRef expr)85 llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
86 std::vector<Token> tokens;
87 llvm::StringRef remainder = expr;
88 do {
89 if (llvm::Expected<Token> t = Lex(expr, remainder)) {
90 tokens.push_back(std::move(*t));
91 } else {
92 return t.takeError();
93 }
94 } while (tokens.back().GetKind() != Token::eof);
95 return DILLexer(expr, std::move(tokens));
96 }
97
Lex(llvm::StringRef expr,llvm::StringRef & remainder)98 llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
99 llvm::StringRef &remainder) {
100 // Skip over whitespace (spaces).
101 remainder = remainder.ltrim();
102 llvm::StringRef::iterator cur_pos = remainder.begin();
103
104 // Check to see if we've reached the end of our input string.
105 if (remainder.empty())
106 return Token(Token::eof, "", (uint32_t)expr.size());
107
108 uint32_t position = cur_pos - expr.begin();
109 std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
110 if (maybe_number)
111 return Token(Token::numeric_constant, maybe_number->str(), position);
112 std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
113 if (maybe_word)
114 return Token(Token::identifier, maybe_word->str(), position);
115
116 constexpr std::pair<Token::Kind, const char *> operators[] = {
117 {Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
118 {Token::l_paren, "("}, {Token::l_square, "["}, {Token::minus, "-"},
119 {Token::period, "."}, {Token::r_paren, ")"}, {Token::r_square, "]"},
120 {Token::star, "*"},
121 };
122 for (auto [kind, str] : operators) {
123 if (remainder.consume_front(str))
124 return Token(kind, str, position);
125 }
126
127 // Unrecognized character(s) in string; unable to lex it.
128 return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
129 position);
130 }
131
132 } // namespace lldb_private::dil
133