xref: /freebsd/contrib/llvm-project/clang/lib/ASTMatchers/Dynamic/Parser.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Recursive parser implementation for the matcher expression grammar.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/ASTMatchers/Dynamic/Parser.h"
15 #include "clang/ASTMatchers/ASTMatchersInternal.h"
16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17 #include "clang/ASTMatchers/Dynamic/Registry.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include <cassert>
23 #include <cerrno>
24 #include <cstddef>
25 #include <cstdlib>
26 #include <optional>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 namespace clang {
32 namespace ast_matchers {
33 namespace dynamic {
34 
35 /// Simple structure to hold information for one token from the parser.
36 struct Parser::TokenInfo {
37   /// Different possible tokens.
38   enum TokenKind {
39     TK_Eof,
40     TK_NewLine,
41     TK_OpenParen,
42     TK_CloseParen,
43     TK_Comma,
44     TK_Period,
45     TK_Literal,
46     TK_Ident,
47     TK_InvalidChar,
48     TK_Error,
49     TK_CodeCompletion
50   };
51 
52   /// Some known identifiers.
53   static const char* const ID_Bind;
54   static const char *const ID_With;
55 
56   TokenInfo() = default;
57 
58   StringRef Text;
59   TokenKind Kind = TK_Eof;
60   SourceRange Range;
61   VariantValue Value;
62 };
63 
64 const char* const Parser::TokenInfo::ID_Bind = "bind";
65 const char *const Parser::TokenInfo::ID_With = "with";
66 
67 /// Simple tokenizer for the parser.
68 class Parser::CodeTokenizer {
69 public:
70   explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
71       : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
72     NextToken = getNextToken();
73   }
74 
75   CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
76                 unsigned CodeCompletionOffset)
77       : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
78         CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
79     NextToken = getNextToken();
80   }
81 
82   /// Returns but doesn't consume the next token.
83   const TokenInfo &peekNextToken() const { return NextToken; }
84 
85   /// Consumes and returns the next token.
86   TokenInfo consumeNextToken() {
87     TokenInfo ThisToken = NextToken;
88     NextToken = getNextToken();
89     return ThisToken;
90   }
91 
92   TokenInfo SkipNewlines() {
93     while (NextToken.Kind == TokenInfo::TK_NewLine)
94       NextToken = getNextToken();
95     return NextToken;
96   }
97 
98   TokenInfo consumeNextTokenIgnoreNewlines() {
99     SkipNewlines();
100     if (NextToken.Kind == TokenInfo::TK_Eof)
101       return NextToken;
102     return consumeNextToken();
103   }
104 
105   TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
106 
107 private:
108   TokenInfo getNextToken() {
109     consumeWhitespace();
110     TokenInfo Result;
111     Result.Range.Start = currentLocation();
112 
113     if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
114       Result.Kind = TokenInfo::TK_CodeCompletion;
115       Result.Text = StringRef(CodeCompletionLocation, 0);
116       CodeCompletionLocation = nullptr;
117       return Result;
118     }
119 
120     if (Code.empty()) {
121       Result.Kind = TokenInfo::TK_Eof;
122       Result.Text = "";
123       return Result;
124     }
125 
126     switch (Code[0]) {
127     case '#':
128       Code = Code.drop_until([](char c) { return c == '\n'; });
129       return getNextToken();
130     case ',':
131       Result.Kind = TokenInfo::TK_Comma;
132       Result.Text = Code.substr(0, 1);
133       Code = Code.drop_front();
134       break;
135     case '.':
136       Result.Kind = TokenInfo::TK_Period;
137       Result.Text = Code.substr(0, 1);
138       Code = Code.drop_front();
139       break;
140     case '\n':
141       ++Line;
142       StartOfLine = Code.drop_front();
143       Result.Kind = TokenInfo::TK_NewLine;
144       Result.Text = Code.substr(0, 1);
145       Code = Code.drop_front();
146       break;
147     case '(':
148       Result.Kind = TokenInfo::TK_OpenParen;
149       Result.Text = Code.substr(0, 1);
150       Code = Code.drop_front();
151       break;
152     case ')':
153       Result.Kind = TokenInfo::TK_CloseParen;
154       Result.Text = Code.substr(0, 1);
155       Code = Code.drop_front();
156       break;
157 
158     case '"':
159     case '\'':
160       // Parse a string literal.
161       consumeStringLiteral(&Result);
162       break;
163 
164     case '0': case '1': case '2': case '3': case '4':
165     case '5': case '6': case '7': case '8': case '9':
166       // Parse an unsigned and float literal.
167       consumeNumberLiteral(&Result);
168       break;
169 
170     default:
171       if (isAlphanumeric(Code[0])) {
172         // Parse an identifier
173         size_t TokenLength = 1;
174         while (true) {
175           // A code completion location in/immediately after an identifier will
176           // cause the portion of the identifier before the code completion
177           // location to become a code completion token.
178           if (CodeCompletionLocation == Code.data() + TokenLength) {
179             CodeCompletionLocation = nullptr;
180             Result.Kind = TokenInfo::TK_CodeCompletion;
181             Result.Text = Code.substr(0, TokenLength);
182             Code = Code.drop_front(TokenLength);
183             return Result;
184           }
185           if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
186             break;
187           ++TokenLength;
188         }
189         if (TokenLength == 4 && Code.starts_with("true")) {
190           Result.Kind = TokenInfo::TK_Literal;
191           Result.Value = true;
192         } else if (TokenLength == 5 && Code.starts_with("false")) {
193           Result.Kind = TokenInfo::TK_Literal;
194           Result.Value = false;
195         } else {
196           Result.Kind = TokenInfo::TK_Ident;
197           Result.Text = Code.substr(0, TokenLength);
198         }
199         Code = Code.drop_front(TokenLength);
200       } else {
201         Result.Kind = TokenInfo::TK_InvalidChar;
202         Result.Text = Code.substr(0, 1);
203         Code = Code.drop_front(1);
204       }
205       break;
206     }
207 
208     Result.Range.End = currentLocation();
209     return Result;
210   }
211 
212   /// Consume an unsigned and float literal.
213   void consumeNumberLiteral(TokenInfo *Result) {
214     bool isFloatingLiteral = false;
215     unsigned Length = 1;
216     if (Code.size() > 1) {
217       // Consume the 'x' or 'b' radix modifier, if present.
218       switch (toLowercase(Code[1])) {
219       case 'x': case 'b': Length = 2;
220       }
221     }
222     while (Length < Code.size() && isHexDigit(Code[Length]))
223       ++Length;
224 
225     // Try to recognize a floating point literal.
226     while (Length < Code.size()) {
227       char c = Code[Length];
228       if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
229         isFloatingLiteral = true;
230         Length++;
231       } else {
232         break;
233       }
234     }
235 
236     Result->Text = Code.substr(0, Length);
237     Code = Code.drop_front(Length);
238 
239     if (isFloatingLiteral) {
240       char *end;
241       errno = 0;
242       std::string Text = Result->Text.str();
243       double doubleValue = strtod(Text.c_str(), &end);
244       if (*end == 0 && errno == 0) {
245         Result->Kind = TokenInfo::TK_Literal;
246         Result->Value = doubleValue;
247         return;
248       }
249     } else {
250       unsigned Value;
251       if (!Result->Text.getAsInteger(0, Value)) {
252         Result->Kind = TokenInfo::TK_Literal;
253         Result->Value = Value;
254         return;
255       }
256     }
257 
258     SourceRange Range;
259     Range.Start = Result->Range.Start;
260     Range.End = currentLocation();
261     Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
262     Result->Kind = TokenInfo::TK_Error;
263   }
264 
265   /// Consume a string literal.
266   ///
267   /// \c Code must be positioned at the start of the literal (the opening
268   /// quote). Consumed until it finds the same closing quote character.
269   void consumeStringLiteral(TokenInfo *Result) {
270     bool InEscape = false;
271     const char Marker = Code[0];
272     for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
273       if (InEscape) {
274         InEscape = false;
275         continue;
276       }
277       if (Code[Length] == '\\') {
278         InEscape = true;
279         continue;
280       }
281       if (Code[Length] == Marker) {
282         Result->Kind = TokenInfo::TK_Literal;
283         Result->Text = Code.substr(0, Length + 1);
284         Result->Value = Code.substr(1, Length - 1);
285         Code = Code.drop_front(Length + 1);
286         return;
287       }
288     }
289 
290     StringRef ErrorText = Code;
291     Code = Code.drop_front(Code.size());
292     SourceRange Range;
293     Range.Start = Result->Range.Start;
294     Range.End = currentLocation();
295     Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
296     Result->Kind = TokenInfo::TK_Error;
297   }
298 
299   /// Consume all leading whitespace from \c Code.
300   void consumeWhitespace() {
301     // Don't trim newlines.
302     Code = Code.ltrim(" \t\v\f\r");
303   }
304 
305   SourceLocation currentLocation() {
306     SourceLocation Location;
307     Location.Line = Line;
308     Location.Column = Code.data() - StartOfLine.data() + 1;
309     return Location;
310   }
311 
312   StringRef &Code;
313   StringRef StartOfLine;
314   unsigned Line = 1;
315   Diagnostics *Error;
316   TokenInfo NextToken;
317   const char *CodeCompletionLocation = nullptr;
318 };
319 
320 Parser::Sema::~Sema() = default;
321 
322 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
323     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
324   return {};
325 }
326 
327 std::vector<MatcherCompletion>
328 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
329   return {};
330 }
331 
332 struct Parser::ScopedContextEntry {
333   Parser *P;
334 
335   ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
336     P->ContextStack.push_back(std::make_pair(C, 0u));
337   }
338 
339   ~ScopedContextEntry() {
340     P->ContextStack.pop_back();
341   }
342 
343   void nextArg() {
344     ++P->ContextStack.back().second;
345   }
346 };
347 
348 /// Parse expressions that start with an identifier.
349 ///
350 /// This function can parse named values and matchers.
351 /// In case of failure it will try to determine the user's intent to give
352 /// an appropriate error message.
353 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
354   const TokenInfo NameToken = Tokenizer->consumeNextToken();
355 
356   if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
357     // Parse as a named value.
358     if (const VariantValue NamedValue =
359             NamedValues ? NamedValues->lookup(NameToken.Text)
360                         : VariantValue()) {
361 
362       if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
363         *Value = NamedValue;
364         return true;
365       }
366 
367       std::string BindID;
368       Tokenizer->consumeNextToken();
369       TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
370       if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
371         addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
372         return false;
373       }
374 
375       if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
376           (ChainCallToken.Text != TokenInfo::ID_Bind &&
377            ChainCallToken.Text != TokenInfo::ID_With)) {
378         Error->addError(ChainCallToken.Range,
379                         Error->ET_ParserMalformedChainedExpr);
380         return false;
381       }
382       if (ChainCallToken.Text == TokenInfo::ID_With) {
383 
384         Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
385                                  NameToken.Text, NameToken.Range);
386 
387         Error->addError(ChainCallToken.Range,
388                         Error->ET_RegistryMatcherNoWithSupport);
389         return false;
390       }
391       if (!parseBindID(BindID))
392         return false;
393 
394       assert(NamedValue.isMatcher());
395       std::optional<DynTypedMatcher> Result =
396           NamedValue.getMatcher().getSingleMatcher();
397       if (Result) {
398         std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
399         if (Bound) {
400           *Value = VariantMatcher::SingleMatcher(*Bound);
401           return true;
402         }
403       }
404       return false;
405     }
406 
407     if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
408       Error->addError(Tokenizer->peekNextToken().Range,
409                       Error->ET_ParserNoOpenParen)
410           << "NewLine";
411       return false;
412     }
413 
414     // If the syntax is correct and the name is not a matcher either, report
415     // unknown named value.
416     if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
417          Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
418          Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
419          Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
420         !S->lookupMatcherCtor(NameToken.Text)) {
421       Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
422           << NameToken.Text;
423       return false;
424     }
425     // Otherwise, fallback to the matcher parser.
426   }
427 
428   Tokenizer->SkipNewlines();
429 
430   assert(NameToken.Kind == TokenInfo::TK_Ident);
431   TokenInfo OpenToken = Tokenizer->consumeNextToken();
432   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
433     Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
434         << OpenToken.Text;
435     return false;
436   }
437 
438   std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
439 
440   // Parse as a matcher expression.
441   return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
442 }
443 
444 bool Parser::parseBindID(std::string &BindID) {
445   // Parse the parenthesized argument to .bind("foo")
446   const TokenInfo OpenToken = Tokenizer->consumeNextToken();
447   const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
448   const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449 
450   // TODO: We could use different error codes for each/some to be more
451   //       explicit about the syntax error.
452   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
453     Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
454     return false;
455   }
456   if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
457     Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
458     return false;
459   }
460   if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
461     Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
462     return false;
463   }
464   BindID = IDToken.Value.getString();
465   return true;
466 }
467 
468 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
469                                  const TokenInfo &OpenToken,
470                                  VariantValue *Value) {
471   std::vector<ParserValue> Args;
472   TokenInfo EndToken;
473 
474   Tokenizer->SkipNewlines();
475 
476   {
477     ScopedContextEntry SCE(this, Ctor);
478 
479     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
480       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
481         // End of args.
482         EndToken = Tokenizer->consumeNextToken();
483         break;
484       }
485       if (!Args.empty()) {
486         // We must find a , token to continue.
487         TokenInfo CommaToken = Tokenizer->consumeNextToken();
488         if (CommaToken.Kind != TokenInfo::TK_Comma) {
489           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
490               << CommaToken.Text;
491           return false;
492         }
493       }
494 
495       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
496                                NameToken.Text, NameToken.Range,
497                                Args.size() + 1);
498       ParserValue ArgValue;
499       Tokenizer->SkipNewlines();
500 
501       if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
502         addExpressionCompletions();
503         return false;
504       }
505 
506       TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
507 
508       if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
509         Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
510             << NameToken.Text;
511         return false;
512       }
513 
514       ArgValue.Text = NodeMatcherToken.Text;
515       ArgValue.Range = NodeMatcherToken.Range;
516 
517       std::optional<MatcherCtor> MappedMatcher =
518           S->lookupMatcherCtor(ArgValue.Text);
519 
520       if (!MappedMatcher) {
521         Error->addError(NodeMatcherToken.Range,
522                         Error->ET_RegistryMatcherNotFound)
523             << NodeMatcherToken.Text;
524         return false;
525       }
526 
527       ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
528 
529       if (NK.isNone()) {
530         Error->addError(NodeMatcherToken.Range,
531                         Error->ET_RegistryNonNodeMatcher)
532             << NodeMatcherToken.Text;
533         return false;
534       }
535 
536       ArgValue.Value = NK;
537 
538       Tokenizer->SkipNewlines();
539       Args.push_back(ArgValue);
540 
541       SCE.nextArg();
542     }
543   }
544 
545   if (EndToken.Kind == TokenInfo::TK_Eof) {
546     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
547     return false;
548   }
549 
550   internal::MatcherDescriptorPtr BuiltCtor =
551       S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error);
552 
553   if (!BuiltCtor.get()) {
554     Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
555         << NameToken.Text;
556     return false;
557   }
558 
559   std::string BindID;
560   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
561     Tokenizer->consumeNextToken();
562     TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
563     if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
564       addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
565       addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));
566       return false;
567     }
568     if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
569         (ChainCallToken.Text != TokenInfo::ID_Bind &&
570          ChainCallToken.Text != TokenInfo::ID_With)) {
571       Error->addError(ChainCallToken.Range,
572                       Error->ET_ParserMalformedChainedExpr);
573       return false;
574     }
575     if (ChainCallToken.Text == TokenInfo::ID_Bind) {
576       if (!parseBindID(BindID))
577         return false;
578       Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
579                                NameToken.Text, NameToken.Range);
580       SourceRange MatcherRange = NameToken.Range;
581       MatcherRange.End = ChainCallToken.Range.End;
582       VariantMatcher Result = S->actOnMatcherExpression(
583           BuiltCtor.get(), MatcherRange, BindID, {}, Error);
584       if (Result.isNull())
585         return false;
586 
587       *Value = Result;
588       return true;
589     } else if (ChainCallToken.Text == TokenInfo::ID_With) {
590       Tokenizer->SkipNewlines();
591 
592       if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
593         StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
594                                ? StringRef("EOF")
595                                : Tokenizer->peekNextToken().Text;
596         Error->addError(Tokenizer->peekNextToken().Range,
597                         Error->ET_ParserNoOpenParen)
598             << ErrTxt;
599         return false;
600       }
601 
602       TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
603 
604       return parseMatcherExpressionImpl(NameToken, WithOpenToken,
605                                         BuiltCtor.get(), Value);
606     }
607   }
608 
609   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
610                            NameToken.Text, NameToken.Range);
611   SourceRange MatcherRange = NameToken.Range;
612   MatcherRange.End = EndToken.Range.End;
613   VariantMatcher Result = S->actOnMatcherExpression(
614       BuiltCtor.get(), MatcherRange, BindID, {}, Error);
615   if (Result.isNull())
616     return false;
617 
618   *Value = Result;
619   return true;
620 }
621 
622 /// Parse and validate a matcher expression.
623 /// \return \c true on success, in which case \c Value has the matcher parsed.
624 ///   If the input is malformed, or some argument has an error, it
625 ///   returns \c false.
626 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
627                                         const TokenInfo &OpenToken,
628                                         std::optional<MatcherCtor> Ctor,
629                                         VariantValue *Value) {
630   if (!Ctor) {
631     Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
632         << NameToken.Text;
633     // Do not return here. We need to continue to give completion suggestions.
634   }
635 
636   if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
637     return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);
638 
639   std::vector<ParserValue> Args;
640   TokenInfo EndToken;
641 
642   Tokenizer->SkipNewlines();
643 
644   {
645     ScopedContextEntry SCE(this, Ctor.value_or(nullptr));
646 
647     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
648       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
649         // End of args.
650         EndToken = Tokenizer->consumeNextToken();
651         break;
652       }
653       if (!Args.empty()) {
654         // We must find a , token to continue.
655         const TokenInfo CommaToken = Tokenizer->consumeNextToken();
656         if (CommaToken.Kind != TokenInfo::TK_Comma) {
657           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
658               << CommaToken.Text;
659           return false;
660         }
661       }
662 
663       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
664                                NameToken.Text, NameToken.Range,
665                                Args.size() + 1);
666       ParserValue ArgValue;
667       Tokenizer->SkipNewlines();
668       ArgValue.Text = Tokenizer->peekNextToken().Text;
669       ArgValue.Range = Tokenizer->peekNextToken().Range;
670       if (!parseExpressionImpl(&ArgValue.Value)) {
671         return false;
672       }
673 
674       Tokenizer->SkipNewlines();
675       Args.push_back(ArgValue);
676       SCE.nextArg();
677     }
678   }
679 
680   if (EndToken.Kind == TokenInfo::TK_Eof) {
681     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
682     return false;
683   }
684 
685   std::string BindID;
686   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
687     Tokenizer->consumeNextToken();
688     TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
689     if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
690       addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
691       return false;
692     }
693 
694     if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
695       Error->addError(ChainCallToken.Range,
696                       Error->ET_ParserMalformedChainedExpr);
697       return false;
698     }
699     if (ChainCallToken.Text == TokenInfo::ID_With) {
700 
701       Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
702                                NameToken.Text, NameToken.Range);
703 
704       Error->addError(ChainCallToken.Range,
705                       Error->ET_RegistryMatcherNoWithSupport);
706       return false;
707     }
708     if (ChainCallToken.Text != TokenInfo::ID_Bind) {
709       Error->addError(ChainCallToken.Range,
710                       Error->ET_ParserMalformedChainedExpr);
711       return false;
712     }
713     if (!parseBindID(BindID))
714       return false;
715   }
716 
717   if (!Ctor)
718     return false;
719 
720   // Merge the start and end infos.
721   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
722                            NameToken.Text, NameToken.Range);
723   SourceRange MatcherRange = NameToken.Range;
724   MatcherRange.End = EndToken.Range.End;
725   VariantMatcher Result = S->actOnMatcherExpression(
726       *Ctor, MatcherRange, BindID, Args, Error);
727   if (Result.isNull()) return false;
728 
729   *Value = Result;
730   return true;
731 }
732 
733 // If the prefix of this completion matches the completion token, add it to
734 // Completions minus the prefix.
735 void Parser::addCompletion(const TokenInfo &CompToken,
736                            const MatcherCompletion& Completion) {
737   if (StringRef(Completion.TypedText).starts_with(CompToken.Text) &&
738       Completion.Specificity > 0) {
739     Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
740                              Completion.MatcherDecl, Completion.Specificity);
741   }
742 }
743 
744 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
745     ArrayRef<ArgKind> AcceptedTypes) {
746   if (!NamedValues) return std::vector<MatcherCompletion>();
747   std::vector<MatcherCompletion> Result;
748   for (const auto &Entry : *NamedValues) {
749     unsigned Specificity;
750     if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
751       std::string Decl =
752           (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
753       Result.emplace_back(Entry.getKey(), Decl, Specificity);
754     }
755   }
756   return Result;
757 }
758 
759 void Parser::addExpressionCompletions() {
760   const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
761   assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
762 
763   // We cannot complete code if there is an invalid element on the context
764   // stack.
765   for (ContextStackTy::iterator I = ContextStack.begin(),
766                                 E = ContextStack.end();
767        I != E; ++I) {
768     if (!I->first)
769       return;
770   }
771 
772   auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
773   for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
774     addCompletion(CompToken, Completion);
775   }
776 
777   for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
778     addCompletion(CompToken, Completion);
779   }
780 }
781 
782 /// Parse an <Expression>
783 bool Parser::parseExpressionImpl(VariantValue *Value) {
784   switch (Tokenizer->nextTokenKind()) {
785   case TokenInfo::TK_Literal:
786     *Value = Tokenizer->consumeNextToken().Value;
787     return true;
788 
789   case TokenInfo::TK_Ident:
790     return parseIdentifierPrefixImpl(Value);
791 
792   case TokenInfo::TK_CodeCompletion:
793     addExpressionCompletions();
794     return false;
795 
796   case TokenInfo::TK_Eof:
797     Error->addError(Tokenizer->consumeNextToken().Range,
798                     Error->ET_ParserNoCode);
799     return false;
800 
801   case TokenInfo::TK_Error:
802     // This error was already reported by the tokenizer.
803     return false;
804   case TokenInfo::TK_NewLine:
805   case TokenInfo::TK_OpenParen:
806   case TokenInfo::TK_CloseParen:
807   case TokenInfo::TK_Comma:
808   case TokenInfo::TK_Period:
809   case TokenInfo::TK_InvalidChar:
810     const TokenInfo Token = Tokenizer->consumeNextToken();
811     Error->addError(Token.Range, Error->ET_ParserInvalidToken)
812         << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
813     return false;
814   }
815 
816   llvm_unreachable("Unknown token kind.");
817 }
818 
819 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
820 
821 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
822                const NamedValueMap *NamedValues, Diagnostics *Error)
823     : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
824       NamedValues(NamedValues), Error(Error) {}
825 
826 Parser::RegistrySema::~RegistrySema() = default;
827 
828 std::optional<MatcherCtor>
829 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
830   return Registry::lookupMatcherCtor(MatcherName);
831 }
832 
833 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
834     MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
835     ArrayRef<ParserValue> Args, Diagnostics *Error) {
836   if (BindID.empty()) {
837     return Registry::constructMatcher(Ctor, NameRange, Args, Error);
838   } else {
839     return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
840                                            Error);
841   }
842 }
843 
844 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
845     ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
846   return Registry::getAcceptedCompletionTypes(Context);
847 }
848 
849 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
850     ArrayRef<ArgKind> AcceptedTypes) {
851   return Registry::getMatcherCompletions(AcceptedTypes);
852 }
853 
854 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
855   return Registry::isBuilderMatcher(Ctor);
856 }
857 
858 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
859   return Registry::nodeMatcherType(Ctor);
860 }
861 
862 internal::MatcherDescriptorPtr
863 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
864                                        ArrayRef<ParserValue> Args,
865                                        Diagnostics *Error) const {
866   return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
867 }
868 
869 bool Parser::parseExpression(StringRef &Code, Sema *S,
870                              const NamedValueMap *NamedValues,
871                              VariantValue *Value, Diagnostics *Error) {
872   CodeTokenizer Tokenizer(Code, Error);
873   if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
874     return false;
875   auto NT = Tokenizer.peekNextToken();
876   if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
877     Error->addError(Tokenizer.peekNextToken().Range,
878                     Error->ET_ParserTrailingCode);
879     return false;
880   }
881   return true;
882 }
883 
884 std::vector<MatcherCompletion>
885 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
886                            const NamedValueMap *NamedValues) {
887   Diagnostics Error;
888   CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
889   Parser P(&Tokenizer, S, NamedValues, &Error);
890   VariantValue Dummy;
891   P.parseExpressionImpl(&Dummy);
892 
893   // Sort by specificity, then by name.
894   llvm::sort(P.Completions,
895              [](const MatcherCompletion &A, const MatcherCompletion &B) {
896                if (A.Specificity != B.Specificity)
897                  return A.Specificity > B.Specificity;
898                return A.TypedText < B.TypedText;
899              });
900 
901   return P.Completions;
902 }
903 
904 std::optional<DynTypedMatcher>
905 Parser::parseMatcherExpression(StringRef &Code, Sema *S,
906                                const NamedValueMap *NamedValues,
907                                Diagnostics *Error) {
908   VariantValue Value;
909   if (!parseExpression(Code, S, NamedValues, &Value, Error))
910     return std::nullopt;
911   if (!Value.isMatcher()) {
912     Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
913     return std::nullopt;
914   }
915   std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
916   if (!Result) {
917     Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
918         << Value.getTypeAsString();
919   }
920   return Result;
921 }
922 
923 } // namespace dynamic
924 } // namespace ast_matchers
925 } // namespace clang
926