10b57cec5SDimitry Andric //===- Parser.cpp - Matcher expression parser -----------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// Recursive parser implementation for the matcher expression grammar. 110b57cec5SDimitry Andric /// 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "clang/ASTMatchers/Dynamic/Parser.h" 150b57cec5SDimitry Andric #include "clang/ASTMatchers/ASTMatchersInternal.h" 160b57cec5SDimitry Andric #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 170b57cec5SDimitry Andric #include "clang/ASTMatchers/Dynamic/Registry.h" 180b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h" 190b57cec5SDimitry Andric #include "llvm/ADT/Optional.h" 200b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 210b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 220b57cec5SDimitry Andric #include "llvm/Support/ManagedStatic.h" 230b57cec5SDimitry Andric #include <algorithm> 240b57cec5SDimitry Andric #include <cassert> 250b57cec5SDimitry Andric #include <cerrno> 260b57cec5SDimitry Andric #include <cstddef> 270b57cec5SDimitry Andric #include <cstdlib> 280b57cec5SDimitry Andric #include <string> 290b57cec5SDimitry Andric #include <utility> 300b57cec5SDimitry Andric #include <vector> 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric namespace clang { 330b57cec5SDimitry Andric namespace ast_matchers { 340b57cec5SDimitry Andric namespace dynamic { 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric /// Simple structure to hold information for one token from the parser. 370b57cec5SDimitry Andric struct Parser::TokenInfo { 380b57cec5SDimitry Andric /// Different possible tokens. 390b57cec5SDimitry Andric enum TokenKind { 400b57cec5SDimitry Andric TK_Eof, 41*480093f4SDimitry Andric TK_NewLine, 420b57cec5SDimitry Andric TK_OpenParen, 430b57cec5SDimitry Andric TK_CloseParen, 440b57cec5SDimitry Andric TK_Comma, 450b57cec5SDimitry Andric TK_Period, 460b57cec5SDimitry Andric TK_Literal, 470b57cec5SDimitry Andric TK_Ident, 480b57cec5SDimitry Andric TK_InvalidChar, 490b57cec5SDimitry Andric TK_Error, 500b57cec5SDimitry Andric TK_CodeCompletion 510b57cec5SDimitry Andric }; 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric /// Some known identifiers. 540b57cec5SDimitry Andric static const char* const ID_Bind; 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric TokenInfo() = default; 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric StringRef Text; 590b57cec5SDimitry Andric TokenKind Kind = TK_Eof; 600b57cec5SDimitry Andric SourceRange Range; 610b57cec5SDimitry Andric VariantValue Value; 620b57cec5SDimitry Andric }; 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric const char* const Parser::TokenInfo::ID_Bind = "bind"; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric /// Simple tokenizer for the parser. 670b57cec5SDimitry Andric class Parser::CodeTokenizer { 680b57cec5SDimitry Andric public: 69*480093f4SDimitry Andric explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) 700b57cec5SDimitry Andric : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { 710b57cec5SDimitry Andric NextToken = getNextToken(); 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric 74*480093f4SDimitry Andric CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, 750b57cec5SDimitry Andric unsigned CodeCompletionOffset) 760b57cec5SDimitry Andric : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), 770b57cec5SDimitry Andric CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 780b57cec5SDimitry Andric NextToken = getNextToken(); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric /// Returns but doesn't consume the next token. 820b57cec5SDimitry Andric const TokenInfo &peekNextToken() const { return NextToken; } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric /// Consumes and returns the next token. 850b57cec5SDimitry Andric TokenInfo consumeNextToken() { 860b57cec5SDimitry Andric TokenInfo ThisToken = NextToken; 870b57cec5SDimitry Andric NextToken = getNextToken(); 880b57cec5SDimitry Andric return ThisToken; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 91*480093f4SDimitry Andric TokenInfo SkipNewlines() { 92*480093f4SDimitry Andric while (NextToken.Kind == TokenInfo::TK_NewLine) 93*480093f4SDimitry Andric NextToken = getNextToken(); 94*480093f4SDimitry Andric return NextToken; 95*480093f4SDimitry Andric } 96*480093f4SDimitry Andric 97*480093f4SDimitry Andric TokenInfo consumeNextTokenIgnoreNewlines() { 98*480093f4SDimitry Andric SkipNewlines(); 99*480093f4SDimitry Andric if (NextToken.Kind == TokenInfo::TK_Eof) 100*480093f4SDimitry Andric return NextToken; 101*480093f4SDimitry Andric return consumeNextToken(); 102*480093f4SDimitry Andric } 103*480093f4SDimitry Andric 1040b57cec5SDimitry Andric TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric private: 1070b57cec5SDimitry Andric TokenInfo getNextToken() { 1080b57cec5SDimitry Andric consumeWhitespace(); 1090b57cec5SDimitry Andric TokenInfo Result; 1100b57cec5SDimitry Andric Result.Range.Start = currentLocation(); 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 1130b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_CodeCompletion; 1140b57cec5SDimitry Andric Result.Text = StringRef(CodeCompletionLocation, 0); 1150b57cec5SDimitry Andric CodeCompletionLocation = nullptr; 1160b57cec5SDimitry Andric return Result; 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric if (Code.empty()) { 1200b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Eof; 1210b57cec5SDimitry Andric Result.Text = ""; 1220b57cec5SDimitry Andric return Result; 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric switch (Code[0]) { 1260b57cec5SDimitry Andric case '#': 127*480093f4SDimitry Andric Code = Code.drop_until([](char c) { return c == '\n'; }); 128*480093f4SDimitry Andric return getNextToken(); 1290b57cec5SDimitry Andric case ',': 1300b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Comma; 1310b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 1320b57cec5SDimitry Andric Code = Code.drop_front(); 1330b57cec5SDimitry Andric break; 1340b57cec5SDimitry Andric case '.': 1350b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Period; 1360b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 1370b57cec5SDimitry Andric Code = Code.drop_front(); 1380b57cec5SDimitry Andric break; 139*480093f4SDimitry Andric case '\n': 140*480093f4SDimitry Andric ++Line; 141*480093f4SDimitry Andric StartOfLine = Code.drop_front(); 142*480093f4SDimitry Andric Result.Kind = TokenInfo::TK_NewLine; 143*480093f4SDimitry Andric Result.Text = Code.substr(0, 1); 144*480093f4SDimitry Andric Code = Code.drop_front(); 145*480093f4SDimitry Andric break; 1460b57cec5SDimitry Andric case '(': 1470b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_OpenParen; 1480b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 1490b57cec5SDimitry Andric Code = Code.drop_front(); 1500b57cec5SDimitry Andric break; 1510b57cec5SDimitry Andric case ')': 1520b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_CloseParen; 1530b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 1540b57cec5SDimitry Andric Code = Code.drop_front(); 1550b57cec5SDimitry Andric break; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric case '"': 1580b57cec5SDimitry Andric case '\'': 1590b57cec5SDimitry Andric // Parse a string literal. 1600b57cec5SDimitry Andric consumeStringLiteral(&Result); 1610b57cec5SDimitry Andric break; 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric case '0': case '1': case '2': case '3': case '4': 1640b57cec5SDimitry Andric case '5': case '6': case '7': case '8': case '9': 1650b57cec5SDimitry Andric // Parse an unsigned and float literal. 1660b57cec5SDimitry Andric consumeNumberLiteral(&Result); 1670b57cec5SDimitry Andric break; 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric default: 1700b57cec5SDimitry Andric if (isAlphanumeric(Code[0])) { 1710b57cec5SDimitry Andric // Parse an identifier 1720b57cec5SDimitry Andric size_t TokenLength = 1; 1730b57cec5SDimitry Andric while (true) { 1740b57cec5SDimitry Andric // A code completion location in/immediately after an identifier will 1750b57cec5SDimitry Andric // cause the portion of the identifier before the code completion 1760b57cec5SDimitry Andric // location to become a code completion token. 1770b57cec5SDimitry Andric if (CodeCompletionLocation == Code.data() + TokenLength) { 1780b57cec5SDimitry Andric CodeCompletionLocation = nullptr; 1790b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_CodeCompletion; 1800b57cec5SDimitry Andric Result.Text = Code.substr(0, TokenLength); 1810b57cec5SDimitry Andric Code = Code.drop_front(TokenLength); 1820b57cec5SDimitry Andric return Result; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 1850b57cec5SDimitry Andric break; 1860b57cec5SDimitry Andric ++TokenLength; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric if (TokenLength == 4 && Code.startswith("true")) { 1890b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Literal; 1900b57cec5SDimitry Andric Result.Value = true; 1910b57cec5SDimitry Andric } else if (TokenLength == 5 && Code.startswith("false")) { 1920b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Literal; 1930b57cec5SDimitry Andric Result.Value = false; 1940b57cec5SDimitry Andric } else { 1950b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Ident; 1960b57cec5SDimitry Andric Result.Text = Code.substr(0, TokenLength); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric Code = Code.drop_front(TokenLength); 1990b57cec5SDimitry Andric } else { 2000b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_InvalidChar; 2010b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 2020b57cec5SDimitry Andric Code = Code.drop_front(1); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric break; 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric Result.Range.End = currentLocation(); 2080b57cec5SDimitry Andric return Result; 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric /// Consume an unsigned and float literal. 2120b57cec5SDimitry Andric void consumeNumberLiteral(TokenInfo *Result) { 2130b57cec5SDimitry Andric bool isFloatingLiteral = false; 2140b57cec5SDimitry Andric unsigned Length = 1; 2150b57cec5SDimitry Andric if (Code.size() > 1) { 2160b57cec5SDimitry Andric // Consume the 'x' or 'b' radix modifier, if present. 2170b57cec5SDimitry Andric switch (toLowercase(Code[1])) { 2180b57cec5SDimitry Andric case 'x': case 'b': Length = 2; 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric while (Length < Code.size() && isHexDigit(Code[Length])) 2220b57cec5SDimitry Andric ++Length; 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric // Try to recognize a floating point literal. 2250b57cec5SDimitry Andric while (Length < Code.size()) { 2260b57cec5SDimitry Andric char c = Code[Length]; 2270b57cec5SDimitry Andric if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { 2280b57cec5SDimitry Andric isFloatingLiteral = true; 2290b57cec5SDimitry Andric Length++; 2300b57cec5SDimitry Andric } else { 2310b57cec5SDimitry Andric break; 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric Result->Text = Code.substr(0, Length); 2360b57cec5SDimitry Andric Code = Code.drop_front(Length); 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric if (isFloatingLiteral) { 2390b57cec5SDimitry Andric char *end; 2400b57cec5SDimitry Andric errno = 0; 2410b57cec5SDimitry Andric std::string Text = Result->Text.str(); 2420b57cec5SDimitry Andric double doubleValue = strtod(Text.c_str(), &end); 2430b57cec5SDimitry Andric if (*end == 0 && errno == 0) { 2440b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Literal; 2450b57cec5SDimitry Andric Result->Value = doubleValue; 2460b57cec5SDimitry Andric return; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric } else { 2490b57cec5SDimitry Andric unsigned Value; 2500b57cec5SDimitry Andric if (!Result->Text.getAsInteger(0, Value)) { 2510b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Literal; 2520b57cec5SDimitry Andric Result->Value = Value; 2530b57cec5SDimitry Andric return; 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric SourceRange Range; 2580b57cec5SDimitry Andric Range.Start = Result->Range.Start; 2590b57cec5SDimitry Andric Range.End = currentLocation(); 2600b57cec5SDimitry Andric Error->addError(Range, Error->ET_ParserNumberError) << Result->Text; 2610b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Error; 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric /// Consume a string literal. 2650b57cec5SDimitry Andric /// 2660b57cec5SDimitry Andric /// \c Code must be positioned at the start of the literal (the opening 2670b57cec5SDimitry Andric /// quote). Consumed until it finds the same closing quote character. 2680b57cec5SDimitry Andric void consumeStringLiteral(TokenInfo *Result) { 2690b57cec5SDimitry Andric bool InEscape = false; 2700b57cec5SDimitry Andric const char Marker = Code[0]; 2710b57cec5SDimitry Andric for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 2720b57cec5SDimitry Andric if (InEscape) { 2730b57cec5SDimitry Andric InEscape = false; 2740b57cec5SDimitry Andric continue; 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric if (Code[Length] == '\\') { 2770b57cec5SDimitry Andric InEscape = true; 2780b57cec5SDimitry Andric continue; 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric if (Code[Length] == Marker) { 2810b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Literal; 2820b57cec5SDimitry Andric Result->Text = Code.substr(0, Length + 1); 2830b57cec5SDimitry Andric Result->Value = Code.substr(1, Length - 1); 2840b57cec5SDimitry Andric Code = Code.drop_front(Length + 1); 2850b57cec5SDimitry Andric return; 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric StringRef ErrorText = Code; 2900b57cec5SDimitry Andric Code = Code.drop_front(Code.size()); 2910b57cec5SDimitry Andric SourceRange Range; 2920b57cec5SDimitry Andric Range.Start = Result->Range.Start; 2930b57cec5SDimitry Andric Range.End = currentLocation(); 2940b57cec5SDimitry Andric Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 2950b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Error; 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric /// Consume all leading whitespace from \c Code. 2990b57cec5SDimitry Andric void consumeWhitespace() { 300*480093f4SDimitry Andric Code = Code.drop_while([](char c) { 301*480093f4SDimitry Andric // Don't trim newlines. 302*480093f4SDimitry Andric return StringRef(" \t\v\f\r").contains(c); 303*480093f4SDimitry Andric }); 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric SourceLocation currentLocation() { 3070b57cec5SDimitry Andric SourceLocation Location; 3080b57cec5SDimitry Andric Location.Line = Line; 3090b57cec5SDimitry Andric Location.Column = Code.data() - StartOfLine.data() + 1; 3100b57cec5SDimitry Andric return Location; 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric 313*480093f4SDimitry Andric StringRef &Code; 3140b57cec5SDimitry Andric StringRef StartOfLine; 3150b57cec5SDimitry Andric unsigned Line = 1; 3160b57cec5SDimitry Andric Diagnostics *Error; 3170b57cec5SDimitry Andric TokenInfo NextToken; 3180b57cec5SDimitry Andric const char *CodeCompletionLocation = nullptr; 3190b57cec5SDimitry Andric }; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric Parser::Sema::~Sema() = default; 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 3240b57cec5SDimitry Andric llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 3250b57cec5SDimitry Andric return {}; 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric std::vector<MatcherCompletion> 3290b57cec5SDimitry Andric Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 3300b57cec5SDimitry Andric return {}; 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric struct Parser::ScopedContextEntry { 3340b57cec5SDimitry Andric Parser *P; 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 3370b57cec5SDimitry Andric P->ContextStack.push_back(std::make_pair(C, 0u)); 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric ~ScopedContextEntry() { 3410b57cec5SDimitry Andric P->ContextStack.pop_back(); 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric void nextArg() { 3450b57cec5SDimitry Andric ++P->ContextStack.back().second; 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric }; 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric /// Parse expressions that start with an identifier. 3500b57cec5SDimitry Andric /// 3510b57cec5SDimitry Andric /// This function can parse named values and matchers. 3520b57cec5SDimitry Andric /// In case of failure it will try to determine the user's intent to give 3530b57cec5SDimitry Andric /// an appropriate error message. 3540b57cec5SDimitry Andric bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 3550b57cec5SDimitry Andric const TokenInfo NameToken = Tokenizer->consumeNextToken(); 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 3580b57cec5SDimitry Andric // Parse as a named value. 3590b57cec5SDimitry Andric if (const VariantValue NamedValue = 3600b57cec5SDimitry Andric NamedValues ? NamedValues->lookup(NameToken.Text) 3610b57cec5SDimitry Andric : VariantValue()) { 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { 3640b57cec5SDimitry Andric *Value = NamedValue; 3650b57cec5SDimitry Andric return true; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric std::string BindID; 3690b57cec5SDimitry Andric if (!parseBindID(BindID)) 3700b57cec5SDimitry Andric return false; 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric assert(NamedValue.isMatcher()); 3730b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> Result = 3740b57cec5SDimitry Andric NamedValue.getMatcher().getSingleMatcher(); 3750b57cec5SDimitry Andric if (Result.hasValue()) { 3760b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID); 3770b57cec5SDimitry Andric if (Bound.hasValue()) { 3780b57cec5SDimitry Andric *Value = VariantMatcher::SingleMatcher(*Bound); 3790b57cec5SDimitry Andric return true; 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric return false; 3830b57cec5SDimitry Andric } 384*480093f4SDimitry Andric 385*480093f4SDimitry Andric if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { 386*480093f4SDimitry Andric Error->addError(Tokenizer->peekNextToken().Range, 387*480093f4SDimitry Andric Error->ET_ParserNoOpenParen) 388*480093f4SDimitry Andric << "NewLine"; 389*480093f4SDimitry Andric return false; 390*480093f4SDimitry Andric } 391*480093f4SDimitry Andric 3920b57cec5SDimitry Andric // If the syntax is correct and the name is not a matcher either, report 3930b57cec5SDimitry Andric // unknown named value. 3940b57cec5SDimitry Andric if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 3950b57cec5SDimitry Andric Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 396*480093f4SDimitry Andric Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || 3970b57cec5SDimitry Andric Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 3980b57cec5SDimitry Andric !S->lookupMatcherCtor(NameToken.Text)) { 3990b57cec5SDimitry Andric Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 4000b57cec5SDimitry Andric << NameToken.Text; 4010b57cec5SDimitry Andric return false; 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric // Otherwise, fallback to the matcher parser. 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric 406*480093f4SDimitry Andric Tokenizer->SkipNewlines(); 407*480093f4SDimitry Andric 4080b57cec5SDimitry Andric // Parse as a matcher expression. 4090b57cec5SDimitry Andric return parseMatcherExpressionImpl(NameToken, Value); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric bool Parser::parseBindID(std::string &BindID) { 4130b57cec5SDimitry Andric // Parse .bind("foo") 4140b57cec5SDimitry Andric assert(Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period); 4150b57cec5SDimitry Andric Tokenizer->consumeNextToken(); // consume the period. 4160b57cec5SDimitry Andric const TokenInfo BindToken = Tokenizer->consumeNextToken(); 4170b57cec5SDimitry Andric if (BindToken.Kind == TokenInfo::TK_CodeCompletion) { 4180b57cec5SDimitry Andric addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1)); 4190b57cec5SDimitry Andric return false; 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 423*480093f4SDimitry Andric const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 424*480093f4SDimitry Andric const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric // TODO: We could use different error codes for each/some to be more 4270b57cec5SDimitry Andric // explicit about the syntax error. 4280b57cec5SDimitry Andric if (BindToken.Kind != TokenInfo::TK_Ident || 4290b57cec5SDimitry Andric BindToken.Text != TokenInfo::ID_Bind) { 4300b57cec5SDimitry Andric Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr); 4310b57cec5SDimitry Andric return false; 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 4340b57cec5SDimitry Andric Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 4350b57cec5SDimitry Andric return false; 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 4380b57cec5SDimitry Andric Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 4390b57cec5SDimitry Andric return false; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 4420b57cec5SDimitry Andric Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 4430b57cec5SDimitry Andric return false; 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric BindID = IDToken.Value.getString(); 4460b57cec5SDimitry Andric return true; 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric /// Parse and validate a matcher expression. 4500b57cec5SDimitry Andric /// \return \c true on success, in which case \c Value has the matcher parsed. 4510b57cec5SDimitry Andric /// If the input is malformed, or some argument has an error, it 4520b57cec5SDimitry Andric /// returns \c false. 4530b57cec5SDimitry Andric bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 4540b57cec5SDimitry Andric VariantValue *Value) { 4550b57cec5SDimitry Andric assert(NameToken.Kind == TokenInfo::TK_Ident); 4560b57cec5SDimitry Andric const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 4570b57cec5SDimitry Andric if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 4580b57cec5SDimitry Andric Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 4590b57cec5SDimitry Andric << OpenToken.Text; 4600b57cec5SDimitry Andric return false; 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric if (!Ctor) { 4660b57cec5SDimitry Andric Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 4670b57cec5SDimitry Andric << NameToken.Text; 4680b57cec5SDimitry Andric // Do not return here. We need to continue to give completion suggestions. 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric std::vector<ParserValue> Args; 4720b57cec5SDimitry Andric TokenInfo EndToken; 4730b57cec5SDimitry Andric 474*480093f4SDimitry Andric Tokenizer->SkipNewlines(); 475*480093f4SDimitry Andric 4760b57cec5SDimitry Andric { 4770b57cec5SDimitry Andric ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 4800b57cec5SDimitry Andric if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 4810b57cec5SDimitry Andric // End of args. 4820b57cec5SDimitry Andric EndToken = Tokenizer->consumeNextToken(); 4830b57cec5SDimitry Andric break; 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric if (!Args.empty()) { 4860b57cec5SDimitry Andric // We must find a , token to continue. 4870b57cec5SDimitry Andric const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 4880b57cec5SDimitry Andric if (CommaToken.Kind != TokenInfo::TK_Comma) { 4890b57cec5SDimitry Andric Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 4900b57cec5SDimitry Andric << CommaToken.Text; 4910b57cec5SDimitry Andric return false; 4920b57cec5SDimitry Andric } 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 4960b57cec5SDimitry Andric NameToken.Text, NameToken.Range, 4970b57cec5SDimitry Andric Args.size() + 1); 4980b57cec5SDimitry Andric ParserValue ArgValue; 499*480093f4SDimitry Andric Tokenizer->SkipNewlines(); 5000b57cec5SDimitry Andric ArgValue.Text = Tokenizer->peekNextToken().Text; 5010b57cec5SDimitry Andric ArgValue.Range = Tokenizer->peekNextToken().Range; 5020b57cec5SDimitry Andric if (!parseExpressionImpl(&ArgValue.Value)) { 5030b57cec5SDimitry Andric return false; 5040b57cec5SDimitry Andric } 5050b57cec5SDimitry Andric 506*480093f4SDimitry Andric Tokenizer->SkipNewlines(); 5070b57cec5SDimitry Andric Args.push_back(ArgValue); 5080b57cec5SDimitry Andric SCE.nextArg(); 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric if (EndToken.Kind == TokenInfo::TK_Eof) { 5130b57cec5SDimitry Andric Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 5140b57cec5SDimitry Andric return false; 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric std::string BindID; 5180b57cec5SDimitry Andric if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 5190b57cec5SDimitry Andric if (!parseBindID(BindID)) 5200b57cec5SDimitry Andric return false; 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric if (!Ctor) 5240b57cec5SDimitry Andric return false; 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric // Merge the start and end infos. 5270b57cec5SDimitry Andric Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 5280b57cec5SDimitry Andric NameToken.Text, NameToken.Range); 5290b57cec5SDimitry Andric SourceRange MatcherRange = NameToken.Range; 5300b57cec5SDimitry Andric MatcherRange.End = EndToken.Range.End; 5310b57cec5SDimitry Andric VariantMatcher Result = S->actOnMatcherExpression( 5320b57cec5SDimitry Andric *Ctor, MatcherRange, BindID, Args, Error); 5330b57cec5SDimitry Andric if (Result.isNull()) return false; 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric *Value = Result; 5360b57cec5SDimitry Andric return true; 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric // If the prefix of this completion matches the completion token, add it to 5400b57cec5SDimitry Andric // Completions minus the prefix. 5410b57cec5SDimitry Andric void Parser::addCompletion(const TokenInfo &CompToken, 5420b57cec5SDimitry Andric const MatcherCompletion& Completion) { 5430b57cec5SDimitry Andric if (StringRef(Completion.TypedText).startswith(CompToken.Text) && 5440b57cec5SDimitry Andric Completion.Specificity > 0) { 5450b57cec5SDimitry Andric Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 5460b57cec5SDimitry Andric Completion.MatcherDecl, Completion.Specificity); 5470b57cec5SDimitry Andric } 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 5510b57cec5SDimitry Andric ArrayRef<ArgKind> AcceptedTypes) { 5520b57cec5SDimitry Andric if (!NamedValues) return std::vector<MatcherCompletion>(); 5530b57cec5SDimitry Andric std::vector<MatcherCompletion> Result; 5540b57cec5SDimitry Andric for (const auto &Entry : *NamedValues) { 5550b57cec5SDimitry Andric unsigned Specificity; 5560b57cec5SDimitry Andric if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 5570b57cec5SDimitry Andric std::string Decl = 5580b57cec5SDimitry Andric (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 5590b57cec5SDimitry Andric Result.emplace_back(Entry.getKey(), Decl, Specificity); 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric return Result; 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric void Parser::addExpressionCompletions() { 566*480093f4SDimitry Andric const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 5670b57cec5SDimitry Andric assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric // We cannot complete code if there is an invalid element on the context 5700b57cec5SDimitry Andric // stack. 5710b57cec5SDimitry Andric for (ContextStackTy::iterator I = ContextStack.begin(), 5720b57cec5SDimitry Andric E = ContextStack.end(); 5730b57cec5SDimitry Andric I != E; ++I) { 5740b57cec5SDimitry Andric if (!I->first) 5750b57cec5SDimitry Andric return; 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 5790b57cec5SDimitry Andric for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 5800b57cec5SDimitry Andric addCompletion(CompToken, Completion); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 5840b57cec5SDimitry Andric addCompletion(CompToken, Completion); 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric /// Parse an <Expression> 5890b57cec5SDimitry Andric bool Parser::parseExpressionImpl(VariantValue *Value) { 5900b57cec5SDimitry Andric switch (Tokenizer->nextTokenKind()) { 5910b57cec5SDimitry Andric case TokenInfo::TK_Literal: 5920b57cec5SDimitry Andric *Value = Tokenizer->consumeNextToken().Value; 5930b57cec5SDimitry Andric return true; 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric case TokenInfo::TK_Ident: 5960b57cec5SDimitry Andric return parseIdentifierPrefixImpl(Value); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric case TokenInfo::TK_CodeCompletion: 5990b57cec5SDimitry Andric addExpressionCompletions(); 6000b57cec5SDimitry Andric return false; 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric case TokenInfo::TK_Eof: 6030b57cec5SDimitry Andric Error->addError(Tokenizer->consumeNextToken().Range, 6040b57cec5SDimitry Andric Error->ET_ParserNoCode); 6050b57cec5SDimitry Andric return false; 6060b57cec5SDimitry Andric 6070b57cec5SDimitry Andric case TokenInfo::TK_Error: 6080b57cec5SDimitry Andric // This error was already reported by the tokenizer. 6090b57cec5SDimitry Andric return false; 610*480093f4SDimitry Andric case TokenInfo::TK_NewLine: 6110b57cec5SDimitry Andric case TokenInfo::TK_OpenParen: 6120b57cec5SDimitry Andric case TokenInfo::TK_CloseParen: 6130b57cec5SDimitry Andric case TokenInfo::TK_Comma: 6140b57cec5SDimitry Andric case TokenInfo::TK_Period: 6150b57cec5SDimitry Andric case TokenInfo::TK_InvalidChar: 6160b57cec5SDimitry Andric const TokenInfo Token = Tokenizer->consumeNextToken(); 617*480093f4SDimitry Andric Error->addError(Token.Range, Error->ET_ParserInvalidToken) 618*480093f4SDimitry Andric << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text); 6190b57cec5SDimitry Andric return false; 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric llvm_unreachable("Unknown token kind."); 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 6260b57cec5SDimitry Andric 6270b57cec5SDimitry Andric Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 6280b57cec5SDimitry Andric const NamedValueMap *NamedValues, Diagnostics *Error) 6290b57cec5SDimitry Andric : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 6300b57cec5SDimitry Andric NamedValues(NamedValues), Error(Error) {} 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric Parser::RegistrySema::~RegistrySema() = default; 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric llvm::Optional<MatcherCtor> 6350b57cec5SDimitry Andric Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 6360b57cec5SDimitry Andric return Registry::lookupMatcherCtor(MatcherName); 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 6390b57cec5SDimitry Andric VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 6400b57cec5SDimitry Andric MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 6410b57cec5SDimitry Andric ArrayRef<ParserValue> Args, Diagnostics *Error) { 6420b57cec5SDimitry Andric if (BindID.empty()) { 6430b57cec5SDimitry Andric return Registry::constructMatcher(Ctor, NameRange, Args, Error); 6440b57cec5SDimitry Andric } else { 6450b57cec5SDimitry Andric return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 6460b57cec5SDimitry Andric Error); 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric } 6490b57cec5SDimitry Andric 6500b57cec5SDimitry Andric std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 6510b57cec5SDimitry Andric ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 6520b57cec5SDimitry Andric return Registry::getAcceptedCompletionTypes(Context); 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 6560b57cec5SDimitry Andric ArrayRef<ArgKind> AcceptedTypes) { 6570b57cec5SDimitry Andric return Registry::getMatcherCompletions(AcceptedTypes); 6580b57cec5SDimitry Andric } 6590b57cec5SDimitry Andric 660*480093f4SDimitry Andric bool Parser::parseExpression(StringRef &Code, Sema *S, 6610b57cec5SDimitry Andric const NamedValueMap *NamedValues, 6620b57cec5SDimitry Andric VariantValue *Value, Diagnostics *Error) { 6630b57cec5SDimitry Andric CodeTokenizer Tokenizer(Code, Error); 6640b57cec5SDimitry Andric if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 6650b57cec5SDimitry Andric return false; 666*480093f4SDimitry Andric auto NT = Tokenizer.peekNextToken(); 667*480093f4SDimitry Andric if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { 6680b57cec5SDimitry Andric Error->addError(Tokenizer.peekNextToken().Range, 6690b57cec5SDimitry Andric Error->ET_ParserTrailingCode); 6700b57cec5SDimitry Andric return false; 6710b57cec5SDimitry Andric } 6720b57cec5SDimitry Andric return true; 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric std::vector<MatcherCompletion> 676*480093f4SDimitry Andric Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, 6770b57cec5SDimitry Andric const NamedValueMap *NamedValues) { 6780b57cec5SDimitry Andric Diagnostics Error; 6790b57cec5SDimitry Andric CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 6800b57cec5SDimitry Andric Parser P(&Tokenizer, S, NamedValues, &Error); 6810b57cec5SDimitry Andric VariantValue Dummy; 6820b57cec5SDimitry Andric P.parseExpressionImpl(&Dummy); 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric // Sort by specificity, then by name. 6850b57cec5SDimitry Andric llvm::sort(P.Completions, 6860b57cec5SDimitry Andric [](const MatcherCompletion &A, const MatcherCompletion &B) { 6870b57cec5SDimitry Andric if (A.Specificity != B.Specificity) 6880b57cec5SDimitry Andric return A.Specificity > B.Specificity; 6890b57cec5SDimitry Andric return A.TypedText < B.TypedText; 6900b57cec5SDimitry Andric }); 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric return P.Completions; 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> 696*480093f4SDimitry Andric Parser::parseMatcherExpression(StringRef &Code, Sema *S, 6970b57cec5SDimitry Andric const NamedValueMap *NamedValues, 6980b57cec5SDimitry Andric Diagnostics *Error) { 6990b57cec5SDimitry Andric VariantValue Value; 7000b57cec5SDimitry Andric if (!parseExpression(Code, S, NamedValues, &Value, Error)) 7010b57cec5SDimitry Andric return llvm::Optional<DynTypedMatcher>(); 7020b57cec5SDimitry Andric if (!Value.isMatcher()) { 7030b57cec5SDimitry Andric Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 7040b57cec5SDimitry Andric return llvm::Optional<DynTypedMatcher>(); 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> Result = 7070b57cec5SDimitry Andric Value.getMatcher().getSingleMatcher(); 7080b57cec5SDimitry Andric if (!Result.hasValue()) { 7090b57cec5SDimitry Andric Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 7100b57cec5SDimitry Andric << Value.getTypeAsString(); 7110b57cec5SDimitry Andric } 7120b57cec5SDimitry Andric return Result; 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric } // namespace dynamic 7160b57cec5SDimitry Andric } // namespace ast_matchers 7170b57cec5SDimitry Andric } // namespace clang 718