1 //===-- DILParser.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // This implements the recursive descent parser for the Data Inspection
8 // Language (DIL), and its helper functions, which will eventually underlie the
9 // 'frame variable' command. The language that this parser recognizes is
10 // described in lldb/docs/dil-expr-lang.ebnf
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "lldb/ValueObject/DILParser.h"
15 #include "lldb/Target/ExecutionContextScope.h"
16 #include "lldb/Utility/DiagnosticsRendering.h"
17 #include "lldb/ValueObject/DILAST.h"
18 #include "lldb/ValueObject/DILEval.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/FormatAdapters.h"
21 #include <cstdlib>
22 #include <limits.h>
23 #include <memory>
24 #include <sstream>
25 #include <string>
26
27 namespace lldb_private::dil {
28
DILDiagnosticError(llvm::StringRef expr,const std::string & message,uint32_t loc,uint16_t err_len)29 DILDiagnosticError::DILDiagnosticError(llvm::StringRef expr,
30 const std::string &message, uint32_t loc,
31 uint16_t err_len)
32 : ErrorInfo(make_error_code(std::errc::invalid_argument)) {
33 DiagnosticDetail::SourceLocation sloc = {
34 FileSpec{}, /*line=*/1, static_cast<uint16_t>(loc + 1),
35 err_len, false, /*in_user_input=*/true};
36 std::string rendered_msg =
37 llvm::formatv("<user expression 0>:1:{0}: {1}\n 1 | {2}\n | ^",
38 loc + 1, message, expr);
39 m_detail.source_location = sloc;
40 m_detail.severity = lldb::eSeverityError;
41 m_detail.message = message;
42 m_detail.rendered = std::move(rendered_msg);
43 }
44
45 llvm::Expected<ASTNodeUP>
Parse(llvm::StringRef dil_input_expr,DILLexer lexer,std::shared_ptr<StackFrame> frame_sp,lldb::DynamicValueType use_dynamic,bool use_synthetic,bool fragile_ivar,bool check_ptr_vs_member)46 DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer,
47 std::shared_ptr<StackFrame> frame_sp,
48 lldb::DynamicValueType use_dynamic, bool use_synthetic,
49 bool fragile_ivar, bool check_ptr_vs_member) {
50 llvm::Error error = llvm::Error::success();
51 DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic,
52 fragile_ivar, check_ptr_vs_member, error);
53
54 ASTNodeUP node_up = parser.Run();
55
56 if (error)
57 return error;
58
59 return node_up;
60 }
61
DILParser(llvm::StringRef dil_input_expr,DILLexer lexer,std::shared_ptr<StackFrame> frame_sp,lldb::DynamicValueType use_dynamic,bool use_synthetic,bool fragile_ivar,bool check_ptr_vs_member,llvm::Error & error)62 DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer,
63 std::shared_ptr<StackFrame> frame_sp,
64 lldb::DynamicValueType use_dynamic, bool use_synthetic,
65 bool fragile_ivar, bool check_ptr_vs_member,
66 llvm::Error &error)
67 : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr),
68 m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic),
69 m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar),
70 m_check_ptr_vs_member(check_ptr_vs_member) {}
71
Run()72 ASTNodeUP DILParser::Run() {
73 ASTNodeUP expr = ParseExpression();
74
75 Expect(Token::Kind::eof);
76
77 return expr;
78 }
79
80 // Parse an expression.
81 //
82 // expression:
83 // unary_expression
84 //
ParseExpression()85 ASTNodeUP DILParser::ParseExpression() { return ParseUnaryExpression(); }
86
87 // Parse an unary_expression.
88 //
89 // unary_expression:
90 // postfix_expression
91 // unary_operator expression
92 //
93 // unary_operator:
94 // "&"
95 // "*"
96 //
ParseUnaryExpression()97 ASTNodeUP DILParser::ParseUnaryExpression() {
98 if (CurToken().IsOneOf({Token::amp, Token::star})) {
99 Token token = CurToken();
100 uint32_t loc = token.GetLocation();
101 m_dil_lexer.Advance();
102 auto rhs = ParseExpression();
103 switch (token.GetKind()) {
104 case Token::star:
105 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Deref,
106 std::move(rhs));
107 case Token::amp:
108 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::AddrOf,
109 std::move(rhs));
110
111 default:
112 llvm_unreachable("invalid token kind");
113 }
114 }
115 return ParsePostfixExpression();
116 }
117
118 // Parse a postfix_expression.
119 //
120 // postfix_expression:
121 // primary_expression
122 // postfix_expression "[" integer_literal "]"
123 // postfix_expression "[" integer_literal "-" integer_literal "]"
124 // postfix_expression "." id_expression
125 // postfix_expression "->" id_expression
126 //
ParsePostfixExpression()127 ASTNodeUP DILParser::ParsePostfixExpression() {
128 ASTNodeUP lhs = ParsePrimaryExpression();
129 while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
130 uint32_t loc = CurToken().GetLocation();
131 Token token = CurToken();
132 switch (token.GetKind()) {
133 case Token::l_square: {
134 m_dil_lexer.Advance();
135 std::optional<int64_t> index = ParseIntegerConstant();
136 if (!index) {
137 BailOut(
138 llvm::formatv("failed to parse integer constant: {0}", CurToken()),
139 CurToken().GetLocation(), CurToken().GetSpelling().length());
140 return std::make_unique<ErrorNode>();
141 }
142 if (CurToken().GetKind() == Token::minus) {
143 m_dil_lexer.Advance();
144 std::optional<int64_t> last_index = ParseIntegerConstant();
145 if (!last_index) {
146 BailOut(llvm::formatv("failed to parse integer constant: {0}",
147 CurToken()),
148 CurToken().GetLocation(), CurToken().GetSpelling().length());
149 return std::make_unique<ErrorNode>();
150 }
151 lhs = std::make_unique<BitFieldExtractionNode>(
152 loc, std::move(lhs), std::move(*index), std::move(*last_index));
153 } else {
154 lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
155 std::move(*index));
156 }
157 Expect(Token::r_square);
158 m_dil_lexer.Advance();
159 break;
160 }
161 case Token::period:
162 case Token::arrow: {
163 m_dil_lexer.Advance();
164 Token member_token = CurToken();
165 std::string member_id = ParseIdExpression();
166 lhs = std::make_unique<MemberOfNode>(
167 member_token.GetLocation(), std::move(lhs),
168 token.GetKind() == Token::arrow, member_id);
169 break;
170 }
171 default:
172 llvm_unreachable("invalid token");
173 }
174 }
175
176 return lhs;
177 }
178
179 // Parse a primary_expression.
180 //
181 // primary_expression:
182 // id_expression
183 // "(" expression ")"
184 //
ParsePrimaryExpression()185 ASTNodeUP DILParser::ParsePrimaryExpression() {
186 if (CurToken().IsOneOf(
187 {Token::coloncolon, Token::identifier, Token::l_paren})) {
188 // Save the source location for the diagnostics message.
189 uint32_t loc = CurToken().GetLocation();
190 std::string identifier = ParseIdExpression();
191
192 if (!identifier.empty())
193 return std::make_unique<IdentifierNode>(loc, identifier);
194 }
195
196 if (CurToken().Is(Token::l_paren)) {
197 m_dil_lexer.Advance();
198 auto expr = ParseExpression();
199 Expect(Token::r_paren);
200 m_dil_lexer.Advance();
201 return expr;
202 }
203
204 BailOut(llvm::formatv("Unexpected token: {0}", CurToken()),
205 CurToken().GetLocation(), CurToken().GetSpelling().length());
206 return std::make_unique<ErrorNode>();
207 }
208
209 // Parse nested_name_specifier.
210 //
211 // nested_name_specifier:
212 // type_name "::"
213 // namespace_name "::"
214 // nested_name_specifier identifier "::"
215 //
ParseNestedNameSpecifier()216 std::string DILParser::ParseNestedNameSpecifier() {
217 // The first token in nested_name_specifier is always an identifier, or
218 // '(anonymous namespace)'.
219 switch (CurToken().GetKind()) {
220 case Token::l_paren: {
221 // Anonymous namespaces need to be treated specially: They are
222 // represented the the string '(anonymous namespace)', which has a
223 // space in it (throwing off normal parsing) and is not actually
224 // proper C++> Check to see if we're looking at
225 // '(anonymous namespace)::...'
226
227 // Look for all the pieces, in order:
228 // l_paren 'anonymous' 'namespace' r_paren coloncolon
229 if (m_dil_lexer.LookAhead(1).Is(Token::identifier) &&
230 (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") &&
231 m_dil_lexer.LookAhead(2).Is(Token::identifier) &&
232 (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") &&
233 m_dil_lexer.LookAhead(3).Is(Token::r_paren) &&
234 m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) {
235 m_dil_lexer.Advance(4);
236
237 Expect(Token::coloncolon);
238 m_dil_lexer.Advance();
239 if (!CurToken().Is(Token::identifier) && !CurToken().Is(Token::l_paren)) {
240 BailOut("Expected an identifier or anonymous namespace, but not found.",
241 CurToken().GetLocation(), CurToken().GetSpelling().length());
242 }
243 // Continue parsing the nested_namespace_specifier.
244 std::string identifier2 = ParseNestedNameSpecifier();
245
246 return "(anonymous namespace)::" + identifier2;
247 }
248
249 return "";
250 } // end of special handling for '(anonymous namespace)'
251 case Token::identifier: {
252 // If the next token is scope ("::"), then this is indeed a
253 // nested_name_specifier
254 if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) {
255 // This nested_name_specifier is a single identifier.
256 std::string identifier = CurToken().GetSpelling();
257 m_dil_lexer.Advance(1);
258 Expect(Token::coloncolon);
259 m_dil_lexer.Advance();
260 // Continue parsing the nested_name_specifier.
261 return identifier + "::" + ParseNestedNameSpecifier();
262 }
263
264 return "";
265 }
266 default:
267 return "";
268 }
269 }
270
271 // Parse an id_expression.
272 //
273 // id_expression:
274 // unqualified_id
275 // qualified_id
276 //
277 // qualified_id:
278 // ["::"] [nested_name_specifier] unqualified_id
279 // ["::"] identifier
280 //
281 // identifier:
282 // ? Token::identifier ?
283 //
ParseIdExpression()284 std::string DILParser::ParseIdExpression() {
285 // Try parsing optional global scope operator.
286 bool global_scope = false;
287 if (CurToken().Is(Token::coloncolon)) {
288 global_scope = true;
289 m_dil_lexer.Advance();
290 }
291
292 // Try parsing optional nested_name_specifier.
293 std::string nested_name_specifier = ParseNestedNameSpecifier();
294
295 // If nested_name_specifier is present, then it's qualified_id production.
296 // Follow the first production rule.
297 if (!nested_name_specifier.empty()) {
298 // Parse unqualified_id and construct a fully qualified id expression.
299 auto unqualified_id = ParseUnqualifiedId();
300
301 return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "",
302 nested_name_specifier, unqualified_id);
303 }
304
305 if (!CurToken().Is(Token::identifier))
306 return "";
307
308 // No nested_name_specifier, but with global scope -- this is also a
309 // qualified_id production. Follow the second production rule.
310 if (global_scope) {
311 Expect(Token::identifier);
312 std::string identifier = CurToken().GetSpelling();
313 m_dil_lexer.Advance();
314 return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier);
315 }
316
317 // This is unqualified_id production.
318 return ParseUnqualifiedId();
319 }
320
321 // Parse an unqualified_id.
322 //
323 // unqualified_id:
324 // identifier
325 //
326 // identifier:
327 // ? Token::identifier ?
328 //
ParseUnqualifiedId()329 std::string DILParser::ParseUnqualifiedId() {
330 Expect(Token::identifier);
331 std::string identifier = CurToken().GetSpelling();
332 m_dil_lexer.Advance();
333 return identifier;
334 }
335
BailOut(const std::string & error,uint32_t loc,uint16_t err_len)336 void DILParser::BailOut(const std::string &error, uint32_t loc,
337 uint16_t err_len) {
338 if (m_error)
339 // If error is already set, then the parser is in the "bail-out" mode. Don't
340 // do anything and keep the original error.
341 return;
342
343 m_error =
344 llvm::make_error<DILDiagnosticError>(m_input_expr, error, loc, err_len);
345 // Advance the lexer token index to the end of the lexed tokens vector.
346 m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
347 }
348
349 // Parse a integer_literal.
350 //
351 // integer_literal:
352 // ? Integer constant ?
353 //
ParseIntegerConstant()354 std::optional<int64_t> DILParser::ParseIntegerConstant() {
355 std::string number_spelling;
356 if (CurToken().GetKind() == Token::minus) {
357 // StringRef::getAsInteger<>() can parse negative numbers.
358 // FIXME: Remove this once unary minus operator is added.
359 number_spelling = "-";
360 m_dil_lexer.Advance();
361 }
362 number_spelling.append(CurToken().GetSpelling());
363 llvm::StringRef spelling_ref = number_spelling;
364 int64_t raw_value;
365 if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) {
366 m_dil_lexer.Advance();
367 return raw_value;
368 }
369
370 return std::nullopt;
371 }
372
Expect(Token::Kind kind)373 void DILParser::Expect(Token::Kind kind) {
374 if (CurToken().IsNot(kind)) {
375 BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
376 CurToken().GetLocation(), CurToken().GetSpelling().length());
377 }
378 }
379
380 } // namespace lldb_private::dil
381