1*0b57cec5SDimitry Andric //===- Parser.cpp - Matcher expression parser -----------------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric /// 9*0b57cec5SDimitry Andric /// \file 10*0b57cec5SDimitry Andric /// Recursive parser implementation for the matcher expression grammar. 11*0b57cec5SDimitry Andric /// 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "clang/ASTMatchers/Dynamic/Parser.h" 15*0b57cec5SDimitry Andric #include "clang/ASTMatchers/ASTMatchersInternal.h" 16*0b57cec5SDimitry Andric #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 17*0b57cec5SDimitry Andric #include "clang/ASTMatchers/Dynamic/Registry.h" 18*0b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h" 19*0b57cec5SDimitry Andric #include "llvm/ADT/Optional.h" 20*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 21*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 22*0b57cec5SDimitry Andric #include "llvm/Support/ManagedStatic.h" 23*0b57cec5SDimitry Andric #include <algorithm> 24*0b57cec5SDimitry Andric #include <cassert> 25*0b57cec5SDimitry Andric #include <cerrno> 26*0b57cec5SDimitry Andric #include <cstddef> 27*0b57cec5SDimitry Andric #include <cstdlib> 28*0b57cec5SDimitry Andric #include <string> 29*0b57cec5SDimitry Andric #include <utility> 30*0b57cec5SDimitry Andric #include <vector> 31*0b57cec5SDimitry Andric 32*0b57cec5SDimitry Andric namespace clang { 33*0b57cec5SDimitry Andric namespace ast_matchers { 34*0b57cec5SDimitry Andric namespace dynamic { 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric /// Simple structure to hold information for one token from the parser. 37*0b57cec5SDimitry Andric struct Parser::TokenInfo { 38*0b57cec5SDimitry Andric /// Different possible tokens. 39*0b57cec5SDimitry Andric enum TokenKind { 40*0b57cec5SDimitry Andric TK_Eof, 41*0b57cec5SDimitry Andric TK_OpenParen, 42*0b57cec5SDimitry Andric TK_CloseParen, 43*0b57cec5SDimitry Andric TK_Comma, 44*0b57cec5SDimitry Andric TK_Period, 45*0b57cec5SDimitry Andric TK_Literal, 46*0b57cec5SDimitry Andric TK_Ident, 47*0b57cec5SDimitry Andric TK_InvalidChar, 48*0b57cec5SDimitry Andric TK_Error, 49*0b57cec5SDimitry Andric TK_CodeCompletion 50*0b57cec5SDimitry Andric }; 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric /// Some known identifiers. 53*0b57cec5SDimitry Andric static const char* const ID_Bind; 54*0b57cec5SDimitry Andric 55*0b57cec5SDimitry Andric TokenInfo() = default; 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric StringRef Text; 58*0b57cec5SDimitry Andric TokenKind Kind = TK_Eof; 59*0b57cec5SDimitry Andric SourceRange Range; 60*0b57cec5SDimitry Andric VariantValue Value; 61*0b57cec5SDimitry Andric }; 62*0b57cec5SDimitry Andric 63*0b57cec5SDimitry Andric const char* const Parser::TokenInfo::ID_Bind = "bind"; 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric /// Simple tokenizer for the parser. 66*0b57cec5SDimitry Andric class Parser::CodeTokenizer { 67*0b57cec5SDimitry Andric public: 68*0b57cec5SDimitry Andric explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error) 69*0b57cec5SDimitry Andric : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { 70*0b57cec5SDimitry Andric NextToken = getNextToken(); 71*0b57cec5SDimitry Andric } 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric CodeTokenizer(StringRef MatcherCode, Diagnostics *Error, 74*0b57cec5SDimitry Andric unsigned CodeCompletionOffset) 75*0b57cec5SDimitry Andric : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), 76*0b57cec5SDimitry Andric CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 77*0b57cec5SDimitry Andric NextToken = getNextToken(); 78*0b57cec5SDimitry Andric } 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric /// Returns but doesn't consume the next token. 81*0b57cec5SDimitry Andric const TokenInfo &peekNextToken() const { return NextToken; } 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric /// Consumes and returns the next token. 84*0b57cec5SDimitry Andric TokenInfo consumeNextToken() { 85*0b57cec5SDimitry Andric TokenInfo ThisToken = NextToken; 86*0b57cec5SDimitry Andric NextToken = getNextToken(); 87*0b57cec5SDimitry Andric return ThisToken; 88*0b57cec5SDimitry Andric } 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric private: 93*0b57cec5SDimitry Andric TokenInfo getNextToken() { 94*0b57cec5SDimitry Andric consumeWhitespace(); 95*0b57cec5SDimitry Andric TokenInfo Result; 96*0b57cec5SDimitry Andric Result.Range.Start = currentLocation(); 97*0b57cec5SDimitry Andric 98*0b57cec5SDimitry Andric if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 99*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_CodeCompletion; 100*0b57cec5SDimitry Andric Result.Text = StringRef(CodeCompletionLocation, 0); 101*0b57cec5SDimitry Andric CodeCompletionLocation = nullptr; 102*0b57cec5SDimitry Andric return Result; 103*0b57cec5SDimitry Andric } 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric if (Code.empty()) { 106*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Eof; 107*0b57cec5SDimitry Andric Result.Text = ""; 108*0b57cec5SDimitry Andric return Result; 109*0b57cec5SDimitry Andric } 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric switch (Code[0]) { 112*0b57cec5SDimitry Andric case '#': 113*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Eof; 114*0b57cec5SDimitry Andric Result.Text = ""; 115*0b57cec5SDimitry Andric return Result; 116*0b57cec5SDimitry Andric case ',': 117*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Comma; 118*0b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 119*0b57cec5SDimitry Andric Code = Code.drop_front(); 120*0b57cec5SDimitry Andric break; 121*0b57cec5SDimitry Andric case '.': 122*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Period; 123*0b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 124*0b57cec5SDimitry Andric Code = Code.drop_front(); 125*0b57cec5SDimitry Andric break; 126*0b57cec5SDimitry Andric case '(': 127*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_OpenParen; 128*0b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 129*0b57cec5SDimitry Andric Code = Code.drop_front(); 130*0b57cec5SDimitry Andric break; 131*0b57cec5SDimitry Andric case ')': 132*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_CloseParen; 133*0b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 134*0b57cec5SDimitry Andric Code = Code.drop_front(); 135*0b57cec5SDimitry Andric break; 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric case '"': 138*0b57cec5SDimitry Andric case '\'': 139*0b57cec5SDimitry Andric // Parse a string literal. 140*0b57cec5SDimitry Andric consumeStringLiteral(&Result); 141*0b57cec5SDimitry Andric break; 142*0b57cec5SDimitry Andric 143*0b57cec5SDimitry Andric case '0': case '1': case '2': case '3': case '4': 144*0b57cec5SDimitry Andric case '5': case '6': case '7': case '8': case '9': 145*0b57cec5SDimitry Andric // Parse an unsigned and float literal. 146*0b57cec5SDimitry Andric consumeNumberLiteral(&Result); 147*0b57cec5SDimitry Andric break; 148*0b57cec5SDimitry Andric 149*0b57cec5SDimitry Andric default: 150*0b57cec5SDimitry Andric if (isAlphanumeric(Code[0])) { 151*0b57cec5SDimitry Andric // Parse an identifier 152*0b57cec5SDimitry Andric size_t TokenLength = 1; 153*0b57cec5SDimitry Andric while (true) { 154*0b57cec5SDimitry Andric // A code completion location in/immediately after an identifier will 155*0b57cec5SDimitry Andric // cause the portion of the identifier before the code completion 156*0b57cec5SDimitry Andric // location to become a code completion token. 157*0b57cec5SDimitry Andric if (CodeCompletionLocation == Code.data() + TokenLength) { 158*0b57cec5SDimitry Andric CodeCompletionLocation = nullptr; 159*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_CodeCompletion; 160*0b57cec5SDimitry Andric Result.Text = Code.substr(0, TokenLength); 161*0b57cec5SDimitry Andric Code = Code.drop_front(TokenLength); 162*0b57cec5SDimitry Andric return Result; 163*0b57cec5SDimitry Andric } 164*0b57cec5SDimitry Andric if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 165*0b57cec5SDimitry Andric break; 166*0b57cec5SDimitry Andric ++TokenLength; 167*0b57cec5SDimitry Andric } 168*0b57cec5SDimitry Andric if (TokenLength == 4 && Code.startswith("true")) { 169*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Literal; 170*0b57cec5SDimitry Andric Result.Value = true; 171*0b57cec5SDimitry Andric } else if (TokenLength == 5 && Code.startswith("false")) { 172*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Literal; 173*0b57cec5SDimitry Andric Result.Value = false; 174*0b57cec5SDimitry Andric } else { 175*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_Ident; 176*0b57cec5SDimitry Andric Result.Text = Code.substr(0, TokenLength); 177*0b57cec5SDimitry Andric } 178*0b57cec5SDimitry Andric Code = Code.drop_front(TokenLength); 179*0b57cec5SDimitry Andric } else { 180*0b57cec5SDimitry Andric Result.Kind = TokenInfo::TK_InvalidChar; 181*0b57cec5SDimitry Andric Result.Text = Code.substr(0, 1); 182*0b57cec5SDimitry Andric Code = Code.drop_front(1); 183*0b57cec5SDimitry Andric } 184*0b57cec5SDimitry Andric break; 185*0b57cec5SDimitry Andric } 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric Result.Range.End = currentLocation(); 188*0b57cec5SDimitry Andric return Result; 189*0b57cec5SDimitry Andric } 190*0b57cec5SDimitry Andric 191*0b57cec5SDimitry Andric /// Consume an unsigned and float literal. 192*0b57cec5SDimitry Andric void consumeNumberLiteral(TokenInfo *Result) { 193*0b57cec5SDimitry Andric bool isFloatingLiteral = false; 194*0b57cec5SDimitry Andric unsigned Length = 1; 195*0b57cec5SDimitry Andric if (Code.size() > 1) { 196*0b57cec5SDimitry Andric // Consume the 'x' or 'b' radix modifier, if present. 197*0b57cec5SDimitry Andric switch (toLowercase(Code[1])) { 198*0b57cec5SDimitry Andric case 'x': case 'b': Length = 2; 199*0b57cec5SDimitry Andric } 200*0b57cec5SDimitry Andric } 201*0b57cec5SDimitry Andric while (Length < Code.size() && isHexDigit(Code[Length])) 202*0b57cec5SDimitry Andric ++Length; 203*0b57cec5SDimitry Andric 204*0b57cec5SDimitry Andric // Try to recognize a floating point literal. 205*0b57cec5SDimitry Andric while (Length < Code.size()) { 206*0b57cec5SDimitry Andric char c = Code[Length]; 207*0b57cec5SDimitry Andric if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { 208*0b57cec5SDimitry Andric isFloatingLiteral = true; 209*0b57cec5SDimitry Andric Length++; 210*0b57cec5SDimitry Andric } else { 211*0b57cec5SDimitry Andric break; 212*0b57cec5SDimitry Andric } 213*0b57cec5SDimitry Andric } 214*0b57cec5SDimitry Andric 215*0b57cec5SDimitry Andric Result->Text = Code.substr(0, Length); 216*0b57cec5SDimitry Andric Code = Code.drop_front(Length); 217*0b57cec5SDimitry Andric 218*0b57cec5SDimitry Andric if (isFloatingLiteral) { 219*0b57cec5SDimitry Andric char *end; 220*0b57cec5SDimitry Andric errno = 0; 221*0b57cec5SDimitry Andric std::string Text = Result->Text.str(); 222*0b57cec5SDimitry Andric double doubleValue = strtod(Text.c_str(), &end); 223*0b57cec5SDimitry Andric if (*end == 0 && errno == 0) { 224*0b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Literal; 225*0b57cec5SDimitry Andric Result->Value = doubleValue; 226*0b57cec5SDimitry Andric return; 227*0b57cec5SDimitry Andric } 228*0b57cec5SDimitry Andric } else { 229*0b57cec5SDimitry Andric unsigned Value; 230*0b57cec5SDimitry Andric if (!Result->Text.getAsInteger(0, Value)) { 231*0b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Literal; 232*0b57cec5SDimitry Andric Result->Value = Value; 233*0b57cec5SDimitry Andric return; 234*0b57cec5SDimitry Andric } 235*0b57cec5SDimitry Andric } 236*0b57cec5SDimitry Andric 237*0b57cec5SDimitry Andric SourceRange Range; 238*0b57cec5SDimitry Andric Range.Start = Result->Range.Start; 239*0b57cec5SDimitry Andric Range.End = currentLocation(); 240*0b57cec5SDimitry Andric Error->addError(Range, Error->ET_ParserNumberError) << Result->Text; 241*0b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Error; 242*0b57cec5SDimitry Andric } 243*0b57cec5SDimitry Andric 244*0b57cec5SDimitry Andric /// Consume a string literal. 245*0b57cec5SDimitry Andric /// 246*0b57cec5SDimitry Andric /// \c Code must be positioned at the start of the literal (the opening 247*0b57cec5SDimitry Andric /// quote). Consumed until it finds the same closing quote character. 248*0b57cec5SDimitry Andric void consumeStringLiteral(TokenInfo *Result) { 249*0b57cec5SDimitry Andric bool InEscape = false; 250*0b57cec5SDimitry Andric const char Marker = Code[0]; 251*0b57cec5SDimitry Andric for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 252*0b57cec5SDimitry Andric if (InEscape) { 253*0b57cec5SDimitry Andric InEscape = false; 254*0b57cec5SDimitry Andric continue; 255*0b57cec5SDimitry Andric } 256*0b57cec5SDimitry Andric if (Code[Length] == '\\') { 257*0b57cec5SDimitry Andric InEscape = true; 258*0b57cec5SDimitry Andric continue; 259*0b57cec5SDimitry Andric } 260*0b57cec5SDimitry Andric if (Code[Length] == Marker) { 261*0b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Literal; 262*0b57cec5SDimitry Andric Result->Text = Code.substr(0, Length + 1); 263*0b57cec5SDimitry Andric Result->Value = Code.substr(1, Length - 1); 264*0b57cec5SDimitry Andric Code = Code.drop_front(Length + 1); 265*0b57cec5SDimitry Andric return; 266*0b57cec5SDimitry Andric } 267*0b57cec5SDimitry Andric } 268*0b57cec5SDimitry Andric 269*0b57cec5SDimitry Andric StringRef ErrorText = Code; 270*0b57cec5SDimitry Andric Code = Code.drop_front(Code.size()); 271*0b57cec5SDimitry Andric SourceRange Range; 272*0b57cec5SDimitry Andric Range.Start = Result->Range.Start; 273*0b57cec5SDimitry Andric Range.End = currentLocation(); 274*0b57cec5SDimitry Andric Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 275*0b57cec5SDimitry Andric Result->Kind = TokenInfo::TK_Error; 276*0b57cec5SDimitry Andric } 277*0b57cec5SDimitry Andric 278*0b57cec5SDimitry Andric /// Consume all leading whitespace from \c Code. 279*0b57cec5SDimitry Andric void consumeWhitespace() { 280*0b57cec5SDimitry Andric while (!Code.empty() && isWhitespace(Code[0])) { 281*0b57cec5SDimitry Andric if (Code[0] == '\n') { 282*0b57cec5SDimitry Andric ++Line; 283*0b57cec5SDimitry Andric StartOfLine = Code.drop_front(); 284*0b57cec5SDimitry Andric } 285*0b57cec5SDimitry Andric Code = Code.drop_front(); 286*0b57cec5SDimitry Andric } 287*0b57cec5SDimitry Andric } 288*0b57cec5SDimitry Andric 289*0b57cec5SDimitry Andric SourceLocation currentLocation() { 290*0b57cec5SDimitry Andric SourceLocation Location; 291*0b57cec5SDimitry Andric Location.Line = Line; 292*0b57cec5SDimitry Andric Location.Column = Code.data() - StartOfLine.data() + 1; 293*0b57cec5SDimitry Andric return Location; 294*0b57cec5SDimitry Andric } 295*0b57cec5SDimitry Andric 296*0b57cec5SDimitry Andric StringRef Code; 297*0b57cec5SDimitry Andric StringRef StartOfLine; 298*0b57cec5SDimitry Andric unsigned Line = 1; 299*0b57cec5SDimitry Andric Diagnostics *Error; 300*0b57cec5SDimitry Andric TokenInfo NextToken; 301*0b57cec5SDimitry Andric const char *CodeCompletionLocation = nullptr; 302*0b57cec5SDimitry Andric }; 303*0b57cec5SDimitry Andric 304*0b57cec5SDimitry Andric Parser::Sema::~Sema() = default; 305*0b57cec5SDimitry Andric 306*0b57cec5SDimitry Andric std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 307*0b57cec5SDimitry Andric llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 308*0b57cec5SDimitry Andric return {}; 309*0b57cec5SDimitry Andric } 310*0b57cec5SDimitry Andric 311*0b57cec5SDimitry Andric std::vector<MatcherCompletion> 312*0b57cec5SDimitry Andric Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 313*0b57cec5SDimitry Andric return {}; 314*0b57cec5SDimitry Andric } 315*0b57cec5SDimitry Andric 316*0b57cec5SDimitry Andric struct Parser::ScopedContextEntry { 317*0b57cec5SDimitry Andric Parser *P; 318*0b57cec5SDimitry Andric 319*0b57cec5SDimitry Andric ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 320*0b57cec5SDimitry Andric P->ContextStack.push_back(std::make_pair(C, 0u)); 321*0b57cec5SDimitry Andric } 322*0b57cec5SDimitry Andric 323*0b57cec5SDimitry Andric ~ScopedContextEntry() { 324*0b57cec5SDimitry Andric P->ContextStack.pop_back(); 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric 327*0b57cec5SDimitry Andric void nextArg() { 328*0b57cec5SDimitry Andric ++P->ContextStack.back().second; 329*0b57cec5SDimitry Andric } 330*0b57cec5SDimitry Andric }; 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric /// Parse expressions that start with an identifier. 333*0b57cec5SDimitry Andric /// 334*0b57cec5SDimitry Andric /// This function can parse named values and matchers. 335*0b57cec5SDimitry Andric /// In case of failure it will try to determine the user's intent to give 336*0b57cec5SDimitry Andric /// an appropriate error message. 337*0b57cec5SDimitry Andric bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 338*0b57cec5SDimitry Andric const TokenInfo NameToken = Tokenizer->consumeNextToken(); 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 341*0b57cec5SDimitry Andric // Parse as a named value. 342*0b57cec5SDimitry Andric if (const VariantValue NamedValue = 343*0b57cec5SDimitry Andric NamedValues ? NamedValues->lookup(NameToken.Text) 344*0b57cec5SDimitry Andric : VariantValue()) { 345*0b57cec5SDimitry Andric 346*0b57cec5SDimitry Andric if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { 347*0b57cec5SDimitry Andric *Value = NamedValue; 348*0b57cec5SDimitry Andric return true; 349*0b57cec5SDimitry Andric } 350*0b57cec5SDimitry Andric 351*0b57cec5SDimitry Andric std::string BindID; 352*0b57cec5SDimitry Andric if (!parseBindID(BindID)) 353*0b57cec5SDimitry Andric return false; 354*0b57cec5SDimitry Andric 355*0b57cec5SDimitry Andric assert(NamedValue.isMatcher()); 356*0b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> Result = 357*0b57cec5SDimitry Andric NamedValue.getMatcher().getSingleMatcher(); 358*0b57cec5SDimitry Andric if (Result.hasValue()) { 359*0b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID); 360*0b57cec5SDimitry Andric if (Bound.hasValue()) { 361*0b57cec5SDimitry Andric *Value = VariantMatcher::SingleMatcher(*Bound); 362*0b57cec5SDimitry Andric return true; 363*0b57cec5SDimitry Andric } 364*0b57cec5SDimitry Andric } 365*0b57cec5SDimitry Andric return false; 366*0b57cec5SDimitry Andric } 367*0b57cec5SDimitry Andric // If the syntax is correct and the name is not a matcher either, report 368*0b57cec5SDimitry Andric // unknown named value. 369*0b57cec5SDimitry Andric if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 370*0b57cec5SDimitry Andric Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 371*0b57cec5SDimitry Andric Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 372*0b57cec5SDimitry Andric !S->lookupMatcherCtor(NameToken.Text)) { 373*0b57cec5SDimitry Andric Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 374*0b57cec5SDimitry Andric << NameToken.Text; 375*0b57cec5SDimitry Andric return false; 376*0b57cec5SDimitry Andric } 377*0b57cec5SDimitry Andric // Otherwise, fallback to the matcher parser. 378*0b57cec5SDimitry Andric } 379*0b57cec5SDimitry Andric 380*0b57cec5SDimitry Andric // Parse as a matcher expression. 381*0b57cec5SDimitry Andric return parseMatcherExpressionImpl(NameToken, Value); 382*0b57cec5SDimitry Andric } 383*0b57cec5SDimitry Andric 384*0b57cec5SDimitry Andric bool Parser::parseBindID(std::string &BindID) { 385*0b57cec5SDimitry Andric // Parse .bind("foo") 386*0b57cec5SDimitry Andric assert(Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period); 387*0b57cec5SDimitry Andric Tokenizer->consumeNextToken(); // consume the period. 388*0b57cec5SDimitry Andric const TokenInfo BindToken = Tokenizer->consumeNextToken(); 389*0b57cec5SDimitry Andric if (BindToken.Kind == TokenInfo::TK_CodeCompletion) { 390*0b57cec5SDimitry Andric addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1)); 391*0b57cec5SDimitry Andric return false; 392*0b57cec5SDimitry Andric } 393*0b57cec5SDimitry Andric 394*0b57cec5SDimitry Andric const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 395*0b57cec5SDimitry Andric const TokenInfo IDToken = Tokenizer->consumeNextToken(); 396*0b57cec5SDimitry Andric const TokenInfo CloseToken = Tokenizer->consumeNextToken(); 397*0b57cec5SDimitry Andric 398*0b57cec5SDimitry Andric // TODO: We could use different error codes for each/some to be more 399*0b57cec5SDimitry Andric // explicit about the syntax error. 400*0b57cec5SDimitry Andric if (BindToken.Kind != TokenInfo::TK_Ident || 401*0b57cec5SDimitry Andric BindToken.Text != TokenInfo::ID_Bind) { 402*0b57cec5SDimitry Andric Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr); 403*0b57cec5SDimitry Andric return false; 404*0b57cec5SDimitry Andric } 405*0b57cec5SDimitry Andric if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 406*0b57cec5SDimitry Andric Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 407*0b57cec5SDimitry Andric return false; 408*0b57cec5SDimitry Andric } 409*0b57cec5SDimitry Andric if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 410*0b57cec5SDimitry Andric Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 411*0b57cec5SDimitry Andric return false; 412*0b57cec5SDimitry Andric } 413*0b57cec5SDimitry Andric if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 414*0b57cec5SDimitry Andric Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 415*0b57cec5SDimitry Andric return false; 416*0b57cec5SDimitry Andric } 417*0b57cec5SDimitry Andric BindID = IDToken.Value.getString(); 418*0b57cec5SDimitry Andric return true; 419*0b57cec5SDimitry Andric } 420*0b57cec5SDimitry Andric 421*0b57cec5SDimitry Andric /// Parse and validate a matcher expression. 422*0b57cec5SDimitry Andric /// \return \c true on success, in which case \c Value has the matcher parsed. 423*0b57cec5SDimitry Andric /// If the input is malformed, or some argument has an error, it 424*0b57cec5SDimitry Andric /// returns \c false. 425*0b57cec5SDimitry Andric bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 426*0b57cec5SDimitry Andric VariantValue *Value) { 427*0b57cec5SDimitry Andric assert(NameToken.Kind == TokenInfo::TK_Ident); 428*0b57cec5SDimitry Andric const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 429*0b57cec5SDimitry Andric if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 430*0b57cec5SDimitry Andric Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 431*0b57cec5SDimitry Andric << OpenToken.Text; 432*0b57cec5SDimitry Andric return false; 433*0b57cec5SDimitry Andric } 434*0b57cec5SDimitry Andric 435*0b57cec5SDimitry Andric llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 436*0b57cec5SDimitry Andric 437*0b57cec5SDimitry Andric if (!Ctor) { 438*0b57cec5SDimitry Andric Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 439*0b57cec5SDimitry Andric << NameToken.Text; 440*0b57cec5SDimitry Andric // Do not return here. We need to continue to give completion suggestions. 441*0b57cec5SDimitry Andric } 442*0b57cec5SDimitry Andric 443*0b57cec5SDimitry Andric std::vector<ParserValue> Args; 444*0b57cec5SDimitry Andric TokenInfo EndToken; 445*0b57cec5SDimitry Andric 446*0b57cec5SDimitry Andric { 447*0b57cec5SDimitry Andric ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); 448*0b57cec5SDimitry Andric 449*0b57cec5SDimitry Andric while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 450*0b57cec5SDimitry Andric if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 451*0b57cec5SDimitry Andric // End of args. 452*0b57cec5SDimitry Andric EndToken = Tokenizer->consumeNextToken(); 453*0b57cec5SDimitry Andric break; 454*0b57cec5SDimitry Andric } 455*0b57cec5SDimitry Andric if (!Args.empty()) { 456*0b57cec5SDimitry Andric // We must find a , token to continue. 457*0b57cec5SDimitry Andric const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 458*0b57cec5SDimitry Andric if (CommaToken.Kind != TokenInfo::TK_Comma) { 459*0b57cec5SDimitry Andric Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 460*0b57cec5SDimitry Andric << CommaToken.Text; 461*0b57cec5SDimitry Andric return false; 462*0b57cec5SDimitry Andric } 463*0b57cec5SDimitry Andric } 464*0b57cec5SDimitry Andric 465*0b57cec5SDimitry Andric Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 466*0b57cec5SDimitry Andric NameToken.Text, NameToken.Range, 467*0b57cec5SDimitry Andric Args.size() + 1); 468*0b57cec5SDimitry Andric ParserValue ArgValue; 469*0b57cec5SDimitry Andric ArgValue.Text = Tokenizer->peekNextToken().Text; 470*0b57cec5SDimitry Andric ArgValue.Range = Tokenizer->peekNextToken().Range; 471*0b57cec5SDimitry Andric if (!parseExpressionImpl(&ArgValue.Value)) { 472*0b57cec5SDimitry Andric return false; 473*0b57cec5SDimitry Andric } 474*0b57cec5SDimitry Andric 475*0b57cec5SDimitry Andric Args.push_back(ArgValue); 476*0b57cec5SDimitry Andric SCE.nextArg(); 477*0b57cec5SDimitry Andric } 478*0b57cec5SDimitry Andric } 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric if (EndToken.Kind == TokenInfo::TK_Eof) { 481*0b57cec5SDimitry Andric Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 482*0b57cec5SDimitry Andric return false; 483*0b57cec5SDimitry Andric } 484*0b57cec5SDimitry Andric 485*0b57cec5SDimitry Andric std::string BindID; 486*0b57cec5SDimitry Andric if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 487*0b57cec5SDimitry Andric if (!parseBindID(BindID)) 488*0b57cec5SDimitry Andric return false; 489*0b57cec5SDimitry Andric } 490*0b57cec5SDimitry Andric 491*0b57cec5SDimitry Andric if (!Ctor) 492*0b57cec5SDimitry Andric return false; 493*0b57cec5SDimitry Andric 494*0b57cec5SDimitry Andric // Merge the start and end infos. 495*0b57cec5SDimitry Andric Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 496*0b57cec5SDimitry Andric NameToken.Text, NameToken.Range); 497*0b57cec5SDimitry Andric SourceRange MatcherRange = NameToken.Range; 498*0b57cec5SDimitry Andric MatcherRange.End = EndToken.Range.End; 499*0b57cec5SDimitry Andric VariantMatcher Result = S->actOnMatcherExpression( 500*0b57cec5SDimitry Andric *Ctor, MatcherRange, BindID, Args, Error); 501*0b57cec5SDimitry Andric if (Result.isNull()) return false; 502*0b57cec5SDimitry Andric 503*0b57cec5SDimitry Andric *Value = Result; 504*0b57cec5SDimitry Andric return true; 505*0b57cec5SDimitry Andric } 506*0b57cec5SDimitry Andric 507*0b57cec5SDimitry Andric // If the prefix of this completion matches the completion token, add it to 508*0b57cec5SDimitry Andric // Completions minus the prefix. 509*0b57cec5SDimitry Andric void Parser::addCompletion(const TokenInfo &CompToken, 510*0b57cec5SDimitry Andric const MatcherCompletion& Completion) { 511*0b57cec5SDimitry Andric if (StringRef(Completion.TypedText).startswith(CompToken.Text) && 512*0b57cec5SDimitry Andric Completion.Specificity > 0) { 513*0b57cec5SDimitry Andric Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 514*0b57cec5SDimitry Andric Completion.MatcherDecl, Completion.Specificity); 515*0b57cec5SDimitry Andric } 516*0b57cec5SDimitry Andric } 517*0b57cec5SDimitry Andric 518*0b57cec5SDimitry Andric std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 519*0b57cec5SDimitry Andric ArrayRef<ArgKind> AcceptedTypes) { 520*0b57cec5SDimitry Andric if (!NamedValues) return std::vector<MatcherCompletion>(); 521*0b57cec5SDimitry Andric std::vector<MatcherCompletion> Result; 522*0b57cec5SDimitry Andric for (const auto &Entry : *NamedValues) { 523*0b57cec5SDimitry Andric unsigned Specificity; 524*0b57cec5SDimitry Andric if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 525*0b57cec5SDimitry Andric std::string Decl = 526*0b57cec5SDimitry Andric (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 527*0b57cec5SDimitry Andric Result.emplace_back(Entry.getKey(), Decl, Specificity); 528*0b57cec5SDimitry Andric } 529*0b57cec5SDimitry Andric } 530*0b57cec5SDimitry Andric return Result; 531*0b57cec5SDimitry Andric } 532*0b57cec5SDimitry Andric 533*0b57cec5SDimitry Andric void Parser::addExpressionCompletions() { 534*0b57cec5SDimitry Andric const TokenInfo CompToken = Tokenizer->consumeNextToken(); 535*0b57cec5SDimitry Andric assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 536*0b57cec5SDimitry Andric 537*0b57cec5SDimitry Andric // We cannot complete code if there is an invalid element on the context 538*0b57cec5SDimitry Andric // stack. 539*0b57cec5SDimitry Andric for (ContextStackTy::iterator I = ContextStack.begin(), 540*0b57cec5SDimitry Andric E = ContextStack.end(); 541*0b57cec5SDimitry Andric I != E; ++I) { 542*0b57cec5SDimitry Andric if (!I->first) 543*0b57cec5SDimitry Andric return; 544*0b57cec5SDimitry Andric } 545*0b57cec5SDimitry Andric 546*0b57cec5SDimitry Andric auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 547*0b57cec5SDimitry Andric for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 548*0b57cec5SDimitry Andric addCompletion(CompToken, Completion); 549*0b57cec5SDimitry Andric } 550*0b57cec5SDimitry Andric 551*0b57cec5SDimitry Andric for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 552*0b57cec5SDimitry Andric addCompletion(CompToken, Completion); 553*0b57cec5SDimitry Andric } 554*0b57cec5SDimitry Andric } 555*0b57cec5SDimitry Andric 556*0b57cec5SDimitry Andric /// Parse an <Expression> 557*0b57cec5SDimitry Andric bool Parser::parseExpressionImpl(VariantValue *Value) { 558*0b57cec5SDimitry Andric switch (Tokenizer->nextTokenKind()) { 559*0b57cec5SDimitry Andric case TokenInfo::TK_Literal: 560*0b57cec5SDimitry Andric *Value = Tokenizer->consumeNextToken().Value; 561*0b57cec5SDimitry Andric return true; 562*0b57cec5SDimitry Andric 563*0b57cec5SDimitry Andric case TokenInfo::TK_Ident: 564*0b57cec5SDimitry Andric return parseIdentifierPrefixImpl(Value); 565*0b57cec5SDimitry Andric 566*0b57cec5SDimitry Andric case TokenInfo::TK_CodeCompletion: 567*0b57cec5SDimitry Andric addExpressionCompletions(); 568*0b57cec5SDimitry Andric return false; 569*0b57cec5SDimitry Andric 570*0b57cec5SDimitry Andric case TokenInfo::TK_Eof: 571*0b57cec5SDimitry Andric Error->addError(Tokenizer->consumeNextToken().Range, 572*0b57cec5SDimitry Andric Error->ET_ParserNoCode); 573*0b57cec5SDimitry Andric return false; 574*0b57cec5SDimitry Andric 575*0b57cec5SDimitry Andric case TokenInfo::TK_Error: 576*0b57cec5SDimitry Andric // This error was already reported by the tokenizer. 577*0b57cec5SDimitry Andric return false; 578*0b57cec5SDimitry Andric 579*0b57cec5SDimitry Andric case TokenInfo::TK_OpenParen: 580*0b57cec5SDimitry Andric case TokenInfo::TK_CloseParen: 581*0b57cec5SDimitry Andric case TokenInfo::TK_Comma: 582*0b57cec5SDimitry Andric case TokenInfo::TK_Period: 583*0b57cec5SDimitry Andric case TokenInfo::TK_InvalidChar: 584*0b57cec5SDimitry Andric const TokenInfo Token = Tokenizer->consumeNextToken(); 585*0b57cec5SDimitry Andric Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text; 586*0b57cec5SDimitry Andric return false; 587*0b57cec5SDimitry Andric } 588*0b57cec5SDimitry Andric 589*0b57cec5SDimitry Andric llvm_unreachable("Unknown token kind."); 590*0b57cec5SDimitry Andric } 591*0b57cec5SDimitry Andric 592*0b57cec5SDimitry Andric static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 593*0b57cec5SDimitry Andric 594*0b57cec5SDimitry Andric Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 595*0b57cec5SDimitry Andric const NamedValueMap *NamedValues, Diagnostics *Error) 596*0b57cec5SDimitry Andric : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 597*0b57cec5SDimitry Andric NamedValues(NamedValues), Error(Error) {} 598*0b57cec5SDimitry Andric 599*0b57cec5SDimitry Andric Parser::RegistrySema::~RegistrySema() = default; 600*0b57cec5SDimitry Andric 601*0b57cec5SDimitry Andric llvm::Optional<MatcherCtor> 602*0b57cec5SDimitry Andric Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 603*0b57cec5SDimitry Andric return Registry::lookupMatcherCtor(MatcherName); 604*0b57cec5SDimitry Andric } 605*0b57cec5SDimitry Andric 606*0b57cec5SDimitry Andric VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 607*0b57cec5SDimitry Andric MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 608*0b57cec5SDimitry Andric ArrayRef<ParserValue> Args, Diagnostics *Error) { 609*0b57cec5SDimitry Andric if (BindID.empty()) { 610*0b57cec5SDimitry Andric return Registry::constructMatcher(Ctor, NameRange, Args, Error); 611*0b57cec5SDimitry Andric } else { 612*0b57cec5SDimitry Andric return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 613*0b57cec5SDimitry Andric Error); 614*0b57cec5SDimitry Andric } 615*0b57cec5SDimitry Andric } 616*0b57cec5SDimitry Andric 617*0b57cec5SDimitry Andric std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 618*0b57cec5SDimitry Andric ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 619*0b57cec5SDimitry Andric return Registry::getAcceptedCompletionTypes(Context); 620*0b57cec5SDimitry Andric } 621*0b57cec5SDimitry Andric 622*0b57cec5SDimitry Andric std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 623*0b57cec5SDimitry Andric ArrayRef<ArgKind> AcceptedTypes) { 624*0b57cec5SDimitry Andric return Registry::getMatcherCompletions(AcceptedTypes); 625*0b57cec5SDimitry Andric } 626*0b57cec5SDimitry Andric 627*0b57cec5SDimitry Andric bool Parser::parseExpression(StringRef Code, Sema *S, 628*0b57cec5SDimitry Andric const NamedValueMap *NamedValues, 629*0b57cec5SDimitry Andric VariantValue *Value, Diagnostics *Error) { 630*0b57cec5SDimitry Andric CodeTokenizer Tokenizer(Code, Error); 631*0b57cec5SDimitry Andric if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 632*0b57cec5SDimitry Andric return false; 633*0b57cec5SDimitry Andric if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) { 634*0b57cec5SDimitry Andric Error->addError(Tokenizer.peekNextToken().Range, 635*0b57cec5SDimitry Andric Error->ET_ParserTrailingCode); 636*0b57cec5SDimitry Andric return false; 637*0b57cec5SDimitry Andric } 638*0b57cec5SDimitry Andric return true; 639*0b57cec5SDimitry Andric } 640*0b57cec5SDimitry Andric 641*0b57cec5SDimitry Andric std::vector<MatcherCompletion> 642*0b57cec5SDimitry Andric Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, 643*0b57cec5SDimitry Andric const NamedValueMap *NamedValues) { 644*0b57cec5SDimitry Andric Diagnostics Error; 645*0b57cec5SDimitry Andric CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 646*0b57cec5SDimitry Andric Parser P(&Tokenizer, S, NamedValues, &Error); 647*0b57cec5SDimitry Andric VariantValue Dummy; 648*0b57cec5SDimitry Andric P.parseExpressionImpl(&Dummy); 649*0b57cec5SDimitry Andric 650*0b57cec5SDimitry Andric // Sort by specificity, then by name. 651*0b57cec5SDimitry Andric llvm::sort(P.Completions, 652*0b57cec5SDimitry Andric [](const MatcherCompletion &A, const MatcherCompletion &B) { 653*0b57cec5SDimitry Andric if (A.Specificity != B.Specificity) 654*0b57cec5SDimitry Andric return A.Specificity > B.Specificity; 655*0b57cec5SDimitry Andric return A.TypedText < B.TypedText; 656*0b57cec5SDimitry Andric }); 657*0b57cec5SDimitry Andric 658*0b57cec5SDimitry Andric return P.Completions; 659*0b57cec5SDimitry Andric } 660*0b57cec5SDimitry Andric 661*0b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> 662*0b57cec5SDimitry Andric Parser::parseMatcherExpression(StringRef Code, Sema *S, 663*0b57cec5SDimitry Andric const NamedValueMap *NamedValues, 664*0b57cec5SDimitry Andric Diagnostics *Error) { 665*0b57cec5SDimitry Andric VariantValue Value; 666*0b57cec5SDimitry Andric if (!parseExpression(Code, S, NamedValues, &Value, Error)) 667*0b57cec5SDimitry Andric return llvm::Optional<DynTypedMatcher>(); 668*0b57cec5SDimitry Andric if (!Value.isMatcher()) { 669*0b57cec5SDimitry Andric Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 670*0b57cec5SDimitry Andric return llvm::Optional<DynTypedMatcher>(); 671*0b57cec5SDimitry Andric } 672*0b57cec5SDimitry Andric llvm::Optional<DynTypedMatcher> Result = 673*0b57cec5SDimitry Andric Value.getMatcher().getSingleMatcher(); 674*0b57cec5SDimitry Andric if (!Result.hasValue()) { 675*0b57cec5SDimitry Andric Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 676*0b57cec5SDimitry Andric << Value.getTypeAsString(); 677*0b57cec5SDimitry Andric } 678*0b57cec5SDimitry Andric return Result; 679*0b57cec5SDimitry Andric } 680*0b57cec5SDimitry Andric 681*0b57cec5SDimitry Andric } // namespace dynamic 682*0b57cec5SDimitry Andric } // namespace ast_matchers 683*0b57cec5SDimitry Andric } // namespace clang 684