xref: /freebsd/contrib/llvm-project/lldb/source/ValueObject/DILParser.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- DILParser.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // This implements the recursive descent parser for the Data Inspection
8 // Language (DIL), and its helper functions, which will eventually underlie the
9 // 'frame variable' command. The language that this parser recognizes is
10 // described in lldb/docs/dil-expr-lang.ebnf
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "lldb/ValueObject/DILParser.h"
15 #include "lldb/Target/ExecutionContextScope.h"
16 #include "lldb/Utility/DiagnosticsRendering.h"
17 #include "lldb/ValueObject/DILAST.h"
18 #include "lldb/ValueObject/DILEval.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/FormatAdapters.h"
21 #include <cstdlib>
22 #include <limits.h>
23 #include <memory>
24 #include <sstream>
25 #include <string>
26 
27 namespace lldb_private::dil {
28 
DILDiagnosticError(llvm::StringRef expr,const std::string & message,uint32_t loc,uint16_t err_len)29 DILDiagnosticError::DILDiagnosticError(llvm::StringRef expr,
30                                        const std::string &message, uint32_t loc,
31                                        uint16_t err_len)
32     : ErrorInfo(make_error_code(std::errc::invalid_argument)) {
33   DiagnosticDetail::SourceLocation sloc = {
34       FileSpec{}, /*line=*/1, static_cast<uint16_t>(loc + 1),
35       err_len,    false,      /*in_user_input=*/true};
36   std::string rendered_msg =
37       llvm::formatv("<user expression 0>:1:{0}: {1}\n   1 | {2}\n     | ^",
38                     loc + 1, message, expr);
39   m_detail.source_location = sloc;
40   m_detail.severity = lldb::eSeverityError;
41   m_detail.message = message;
42   m_detail.rendered = std::move(rendered_msg);
43 }
44 
45 llvm::Expected<ASTNodeUP>
Parse(llvm::StringRef dil_input_expr,DILLexer lexer,std::shared_ptr<StackFrame> frame_sp,lldb::DynamicValueType use_dynamic,bool use_synthetic,bool fragile_ivar,bool check_ptr_vs_member)46 DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer,
47                  std::shared_ptr<StackFrame> frame_sp,
48                  lldb::DynamicValueType use_dynamic, bool use_synthetic,
49                  bool fragile_ivar, bool check_ptr_vs_member) {
50   llvm::Error error = llvm::Error::success();
51   DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic,
52                    fragile_ivar, check_ptr_vs_member, error);
53 
54   ASTNodeUP node_up = parser.Run();
55 
56   if (error)
57     return error;
58 
59   return node_up;
60 }
61 
DILParser(llvm::StringRef dil_input_expr,DILLexer lexer,std::shared_ptr<StackFrame> frame_sp,lldb::DynamicValueType use_dynamic,bool use_synthetic,bool fragile_ivar,bool check_ptr_vs_member,llvm::Error & error)62 DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer,
63                      std::shared_ptr<StackFrame> frame_sp,
64                      lldb::DynamicValueType use_dynamic, bool use_synthetic,
65                      bool fragile_ivar, bool check_ptr_vs_member,
66                      llvm::Error &error)
67     : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr),
68       m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic),
69       m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar),
70       m_check_ptr_vs_member(check_ptr_vs_member) {}
71 
Run()72 ASTNodeUP DILParser::Run() {
73   ASTNodeUP expr = ParseExpression();
74 
75   Expect(Token::Kind::eof);
76 
77   return expr;
78 }
79 
80 // Parse an expression.
81 //
82 //  expression:
83 //    unary_expression
84 //
ParseExpression()85 ASTNodeUP DILParser::ParseExpression() { return ParseUnaryExpression(); }
86 
87 // Parse an unary_expression.
88 //
89 //  unary_expression:
90 //    postfix_expression
91 //    unary_operator expression
92 //
93 //  unary_operator:
94 //    "&"
95 //    "*"
96 //
ParseUnaryExpression()97 ASTNodeUP DILParser::ParseUnaryExpression() {
98   if (CurToken().IsOneOf({Token::amp, Token::star})) {
99     Token token = CurToken();
100     uint32_t loc = token.GetLocation();
101     m_dil_lexer.Advance();
102     auto rhs = ParseExpression();
103     switch (token.GetKind()) {
104     case Token::star:
105       return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Deref,
106                                            std::move(rhs));
107     case Token::amp:
108       return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::AddrOf,
109                                            std::move(rhs));
110 
111     default:
112       llvm_unreachable("invalid token kind");
113     }
114   }
115   return ParsePostfixExpression();
116 }
117 
118 // Parse a postfix_expression.
119 //
120 //  postfix_expression:
121 //    primary_expression
122 //    postfix_expression "[" integer_literal "]"
123 //    postfix_expression "[" integer_literal "-" integer_literal "]"
124 //    postfix_expression "." id_expression
125 //    postfix_expression "->" id_expression
126 //
ParsePostfixExpression()127 ASTNodeUP DILParser::ParsePostfixExpression() {
128   ASTNodeUP lhs = ParsePrimaryExpression();
129   while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
130     uint32_t loc = CurToken().GetLocation();
131     Token token = CurToken();
132     switch (token.GetKind()) {
133     case Token::l_square: {
134       m_dil_lexer.Advance();
135       std::optional<int64_t> index = ParseIntegerConstant();
136       if (!index) {
137         BailOut(
138             llvm::formatv("failed to parse integer constant: {0}", CurToken()),
139             CurToken().GetLocation(), CurToken().GetSpelling().length());
140         return std::make_unique<ErrorNode>();
141       }
142       if (CurToken().GetKind() == Token::minus) {
143         m_dil_lexer.Advance();
144         std::optional<int64_t> last_index = ParseIntegerConstant();
145         if (!last_index) {
146           BailOut(llvm::formatv("failed to parse integer constant: {0}",
147                                 CurToken()),
148                   CurToken().GetLocation(), CurToken().GetSpelling().length());
149           return std::make_unique<ErrorNode>();
150         }
151         lhs = std::make_unique<BitFieldExtractionNode>(
152             loc, std::move(lhs), std::move(*index), std::move(*last_index));
153       } else {
154         lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
155                                                    std::move(*index));
156       }
157       Expect(Token::r_square);
158       m_dil_lexer.Advance();
159       break;
160     }
161     case Token::period:
162     case Token::arrow: {
163       m_dil_lexer.Advance();
164       Token member_token = CurToken();
165       std::string member_id = ParseIdExpression();
166       lhs = std::make_unique<MemberOfNode>(
167           member_token.GetLocation(), std::move(lhs),
168           token.GetKind() == Token::arrow, member_id);
169       break;
170     }
171     default:
172       llvm_unreachable("invalid token");
173     }
174   }
175 
176   return lhs;
177 }
178 
179 // Parse a primary_expression.
180 //
181 //  primary_expression:
182 //    id_expression
183 //    "(" expression ")"
184 //
ParsePrimaryExpression()185 ASTNodeUP DILParser::ParsePrimaryExpression() {
186   if (CurToken().IsOneOf(
187           {Token::coloncolon, Token::identifier, Token::l_paren})) {
188     // Save the source location for the diagnostics message.
189     uint32_t loc = CurToken().GetLocation();
190     std::string identifier = ParseIdExpression();
191 
192     if (!identifier.empty())
193       return std::make_unique<IdentifierNode>(loc, identifier);
194   }
195 
196   if (CurToken().Is(Token::l_paren)) {
197     m_dil_lexer.Advance();
198     auto expr = ParseExpression();
199     Expect(Token::r_paren);
200     m_dil_lexer.Advance();
201     return expr;
202   }
203 
204   BailOut(llvm::formatv("Unexpected token: {0}", CurToken()),
205           CurToken().GetLocation(), CurToken().GetSpelling().length());
206   return std::make_unique<ErrorNode>();
207 }
208 
209 // Parse nested_name_specifier.
210 //
211 //  nested_name_specifier:
212 //    type_name "::"
213 //    namespace_name "::"
214 //    nested_name_specifier identifier "::"
215 //
ParseNestedNameSpecifier()216 std::string DILParser::ParseNestedNameSpecifier() {
217   // The first token in nested_name_specifier is always an identifier, or
218   // '(anonymous namespace)'.
219   switch (CurToken().GetKind()) {
220   case Token::l_paren: {
221     // Anonymous namespaces need to be treated specially: They are
222     // represented the the string '(anonymous namespace)', which has a
223     // space in it (throwing off normal parsing) and is not actually
224     // proper C++> Check to see if we're looking at
225     // '(anonymous namespace)::...'
226 
227     // Look for all the pieces, in order:
228     // l_paren 'anonymous' 'namespace' r_paren coloncolon
229     if (m_dil_lexer.LookAhead(1).Is(Token::identifier) &&
230         (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") &&
231         m_dil_lexer.LookAhead(2).Is(Token::identifier) &&
232         (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") &&
233         m_dil_lexer.LookAhead(3).Is(Token::r_paren) &&
234         m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) {
235       m_dil_lexer.Advance(4);
236 
237       Expect(Token::coloncolon);
238       m_dil_lexer.Advance();
239       if (!CurToken().Is(Token::identifier) && !CurToken().Is(Token::l_paren)) {
240         BailOut("Expected an identifier or anonymous namespace, but not found.",
241                 CurToken().GetLocation(), CurToken().GetSpelling().length());
242       }
243       // Continue parsing the nested_namespace_specifier.
244       std::string identifier2 = ParseNestedNameSpecifier();
245 
246       return "(anonymous namespace)::" + identifier2;
247     }
248 
249     return "";
250   } // end of special handling for '(anonymous namespace)'
251   case Token::identifier: {
252     // If the next token is scope ("::"), then this is indeed a
253     // nested_name_specifier
254     if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) {
255       // This nested_name_specifier is a single identifier.
256       std::string identifier = CurToken().GetSpelling();
257       m_dil_lexer.Advance(1);
258       Expect(Token::coloncolon);
259       m_dil_lexer.Advance();
260       // Continue parsing the nested_name_specifier.
261       return identifier + "::" + ParseNestedNameSpecifier();
262     }
263 
264     return "";
265   }
266   default:
267     return "";
268   }
269 }
270 
271 // Parse an id_expression.
272 //
273 //  id_expression:
274 //    unqualified_id
275 //    qualified_id
276 //
277 //  qualified_id:
278 //    ["::"] [nested_name_specifier] unqualified_id
279 //    ["::"] identifier
280 //
281 //  identifier:
282 //    ? Token::identifier ?
283 //
ParseIdExpression()284 std::string DILParser::ParseIdExpression() {
285   // Try parsing optional global scope operator.
286   bool global_scope = false;
287   if (CurToken().Is(Token::coloncolon)) {
288     global_scope = true;
289     m_dil_lexer.Advance();
290   }
291 
292   // Try parsing optional nested_name_specifier.
293   std::string nested_name_specifier = ParseNestedNameSpecifier();
294 
295   // If nested_name_specifier is present, then it's qualified_id production.
296   // Follow the first production rule.
297   if (!nested_name_specifier.empty()) {
298     // Parse unqualified_id and construct a fully qualified id expression.
299     auto unqualified_id = ParseUnqualifiedId();
300 
301     return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "",
302                          nested_name_specifier, unqualified_id);
303   }
304 
305   if (!CurToken().Is(Token::identifier))
306     return "";
307 
308   // No nested_name_specifier, but with global scope -- this is also a
309   // qualified_id production. Follow the second production rule.
310   if (global_scope) {
311     Expect(Token::identifier);
312     std::string identifier = CurToken().GetSpelling();
313     m_dil_lexer.Advance();
314     return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier);
315   }
316 
317   // This is unqualified_id production.
318   return ParseUnqualifiedId();
319 }
320 
321 // Parse an unqualified_id.
322 //
323 //  unqualified_id:
324 //    identifier
325 //
326 //  identifier:
327 //    ? Token::identifier ?
328 //
ParseUnqualifiedId()329 std::string DILParser::ParseUnqualifiedId() {
330   Expect(Token::identifier);
331   std::string identifier = CurToken().GetSpelling();
332   m_dil_lexer.Advance();
333   return identifier;
334 }
335 
BailOut(const std::string & error,uint32_t loc,uint16_t err_len)336 void DILParser::BailOut(const std::string &error, uint32_t loc,
337                         uint16_t err_len) {
338   if (m_error)
339     // If error is already set, then the parser is in the "bail-out" mode. Don't
340     // do anything and keep the original error.
341     return;
342 
343   m_error =
344       llvm::make_error<DILDiagnosticError>(m_input_expr, error, loc, err_len);
345   // Advance the lexer token index to the end of the lexed tokens vector.
346   m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
347 }
348 
349 // Parse a integer_literal.
350 //
351 //  integer_literal:
352 //    ? Integer constant ?
353 //
ParseIntegerConstant()354 std::optional<int64_t> DILParser::ParseIntegerConstant() {
355   std::string number_spelling;
356   if (CurToken().GetKind() == Token::minus) {
357     // StringRef::getAsInteger<>() can parse negative numbers.
358     // FIXME: Remove this once unary minus operator is added.
359     number_spelling = "-";
360     m_dil_lexer.Advance();
361   }
362   number_spelling.append(CurToken().GetSpelling());
363   llvm::StringRef spelling_ref = number_spelling;
364   int64_t raw_value;
365   if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) {
366     m_dil_lexer.Advance();
367     return raw_value;
368   }
369 
370   return std::nullopt;
371 }
372 
Expect(Token::Kind kind)373 void DILParser::Expect(Token::Kind kind) {
374   if (CurToken().IsNot(kind)) {
375     BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
376             CurToken().GetLocation(), CurToken().GetSpelling().length());
377   }
378 }
379 
380 } // namespace lldb_private::dil
381