1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the FormatToken, a wrapper 11 /// around Token with additional information related to formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/OperatorPrecedence.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Lex/Lexer.h" 22 #include <memory> 23 #include <unordered_set> 24 25 namespace clang { 26 namespace format { 27 28 #define LIST_TOKEN_TYPES \ 29 TYPE(ArrayInitializerLSquare) \ 30 TYPE(ArraySubscriptLSquare) \ 31 TYPE(AttributeColon) \ 32 TYPE(AttributeParen) \ 33 TYPE(AttributeSquare) \ 34 TYPE(BinaryOperator) \ 35 TYPE(BitFieldColon) \ 36 TYPE(BlockComment) \ 37 TYPE(CastRParen) \ 38 TYPE(ConditionalExpr) \ 39 TYPE(ConflictAlternative) \ 40 TYPE(ConflictEnd) \ 41 TYPE(ConflictStart) \ 42 TYPE(CtorInitializerColon) \ 43 TYPE(CtorInitializerComma) \ 44 TYPE(DesignatedInitializerLSquare) \ 45 TYPE(DesignatedInitializerPeriod) \ 46 TYPE(DictLiteral) \ 47 TYPE(ForEachMacro) \ 48 TYPE(FunctionAnnotationRParen) \ 49 TYPE(FunctionDeclarationName) \ 50 TYPE(FunctionLBrace) \ 51 TYPE(FunctionTypeLParen) \ 52 TYPE(ImplicitStringLiteral) \ 53 TYPE(InheritanceColon) \ 54 TYPE(InheritanceComma) \ 55 TYPE(InlineASMBrace) \ 56 TYPE(InlineASMColon) \ 57 TYPE(JavaAnnotation) \ 58 TYPE(JsComputedPropertyName) \ 59 TYPE(JsExponentiation) \ 60 TYPE(JsExponentiationEqual) \ 61 TYPE(JsFatArrow) \ 62 TYPE(JsNonNullAssertion) \ 63 TYPE(JsNullishCoalescingOperator) \ 64 TYPE(JsNullPropagatingOperator) \ 65 TYPE(JsPrivateIdentifier) \ 66 TYPE(JsTypeColon) \ 67 TYPE(JsTypeOperator) \ 68 TYPE(JsTypeOptionalQuestion) \ 69 TYPE(LambdaArrow) \ 70 TYPE(LambdaLBrace) \ 71 TYPE(LambdaLSquare) \ 72 TYPE(LeadingJavaAnnotation) \ 73 TYPE(LineComment) \ 74 TYPE(MacroBlockBegin) \ 75 TYPE(MacroBlockEnd) \ 76 TYPE(NamespaceMacro) \ 77 TYPE(ObjCBlockLBrace) \ 78 TYPE(ObjCBlockLParen) \ 79 TYPE(ObjCDecl) \ 80 TYPE(ObjCForIn) \ 81 TYPE(ObjCMethodExpr) \ 82 TYPE(ObjCMethodSpecifier) \ 83 TYPE(ObjCProperty) \ 84 TYPE(ObjCStringLiteral) \ 85 TYPE(OverloadedOperator) \ 86 TYPE(OverloadedOperatorLParen) \ 87 TYPE(PointerOrReference) \ 88 TYPE(PureVirtualSpecifier) \ 89 TYPE(RangeBasedForLoopColon) \ 90 TYPE(RegexLiteral) \ 91 TYPE(SelectorName) \ 92 TYPE(StartOfName) \ 93 TYPE(StatementMacro) \ 94 TYPE(StructuredBindingLSquare) \ 95 TYPE(TemplateCloser) \ 96 TYPE(TemplateOpener) \ 97 TYPE(TemplateString) \ 98 TYPE(ProtoExtensionLSquare) \ 99 TYPE(TrailingAnnotation) \ 100 TYPE(TrailingReturnArrow) \ 101 TYPE(TrailingUnaryOperator) \ 102 TYPE(TypenameMacro) \ 103 TYPE(UnaryOperator) \ 104 TYPE(CSharpStringLiteral) \ 105 TYPE(CSharpNullCoalescing) \ 106 TYPE(Unknown) 107 108 enum TokenType { 109 #define TYPE(X) TT_##X, 110 LIST_TOKEN_TYPES 111 #undef TYPE 112 NUM_TOKEN_TYPES 113 }; 114 115 /// Determines the name of a token type. 116 const char *getTokenTypeName(TokenType Type); 117 118 // Represents what type of block a set of braces open. 119 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 120 121 // The packing kind of a function's parameters. 122 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 123 124 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 125 126 class TokenRole; 127 class AnnotatedLine; 128 129 /// A wrapper around a \c Token storing information about the 130 /// whitespace characters preceding it. 131 struct FormatToken { 132 FormatToken() {} 133 134 /// The \c Token. 135 Token Tok; 136 137 /// The number of newlines immediately before the \c Token. 138 /// 139 /// This can be used to determine what the user wrote in the original code 140 /// and thereby e.g. leave an empty line between two function definitions. 141 unsigned NewlinesBefore = 0; 142 143 /// Whether there is at least one unescaped newline before the \c 144 /// Token. 145 bool HasUnescapedNewline = false; 146 147 /// The range of the whitespace immediately preceding the \c Token. 148 SourceRange WhitespaceRange; 149 150 /// The offset just past the last '\n' in this token's leading 151 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 152 unsigned LastNewlineOffset = 0; 153 154 /// The width of the non-whitespace parts of the token (or its first 155 /// line for multi-line tokens) in columns. 156 /// We need this to correctly measure number of columns a token spans. 157 unsigned ColumnWidth = 0; 158 159 /// Contains the width in columns of the last line of a multi-line 160 /// token. 161 unsigned LastLineColumnWidth = 0; 162 163 /// Whether the token text contains newlines (escaped or not). 164 bool IsMultiline = false; 165 166 /// Indicates that this is the first token of the file. 167 bool IsFirst = false; 168 169 /// Whether there must be a line break before this token. 170 /// 171 /// This happens for example when a preprocessor directive ended directly 172 /// before the token. 173 bool MustBreakBefore = false; 174 175 /// The raw text of the token. 176 /// 177 /// Contains the raw token text without leading whitespace and without leading 178 /// escaped newlines. 179 StringRef TokenText; 180 181 /// Set to \c true if this token is an unterminated literal. 182 bool IsUnterminatedLiteral = 0; 183 184 /// Contains the kind of block if this token is a brace. 185 BraceBlockKind BlockKind = BK_Unknown; 186 187 TokenType Type = TT_Unknown; 188 189 /// The number of spaces that should be inserted before this token. 190 unsigned SpacesRequiredBefore = 0; 191 192 /// \c true if it is allowed to break before this token. 193 bool CanBreakBefore = false; 194 195 /// \c true if this is the ">" of "template<..>". 196 bool ClosesTemplateDeclaration = false; 197 198 /// Number of parameters, if this is "(", "[" or "<". 199 unsigned ParameterCount = 0; 200 201 /// Number of parameters that are nested blocks, 202 /// if this is "(", "[" or "<". 203 unsigned BlockParameterCount = 0; 204 205 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of 206 /// the surrounding bracket. 207 tok::TokenKind ParentBracket = tok::unknown; 208 209 /// A token can have a special role that can carry extra information 210 /// about the token's formatting. 211 std::unique_ptr<TokenRole> Role; 212 213 /// If this is an opening parenthesis, how are the parameters packed? 214 ParameterPackingKind PackingKind = PPK_Inconclusive; 215 216 /// The total length of the unwrapped line up to and including this 217 /// token. 218 unsigned TotalLength = 0; 219 220 /// The original 0-based column of this token, including expanded tabs. 221 /// The configured TabWidth is used as tab width. 222 unsigned OriginalColumn = 0; 223 224 /// The length of following tokens until the next natural split point, 225 /// or the next token that can be broken. 226 unsigned UnbreakableTailLength = 0; 227 228 // FIXME: Come up with a 'cleaner' concept. 229 /// The binding strength of a token. This is a combined value of 230 /// operator precedence, parenthesis nesting, etc. 231 unsigned BindingStrength = 0; 232 233 /// The nesting level of this token, i.e. the number of surrounding (), 234 /// [], {} or <>. 235 unsigned NestingLevel = 0; 236 237 /// The indent level of this token. Copied from the surrounding line. 238 unsigned IndentLevel = 0; 239 240 /// Penalty for inserting a line break before this token. 241 unsigned SplitPenalty = 0; 242 243 /// If this is the first ObjC selector name in an ObjC method 244 /// definition or call, this contains the length of the longest name. 245 /// 246 /// This being set to 0 means that the selectors should not be colon-aligned, 247 /// e.g. because several of them are block-type. 248 unsigned LongestObjCSelectorName = 0; 249 250 /// If this is the first ObjC selector name in an ObjC method 251 /// definition or call, this contains the number of parts that the whole 252 /// selector consist of. 253 unsigned ObjCSelectorNameParts = 0; 254 255 /// The 0-based index of the parameter/argument. For ObjC it is set 256 /// for the selector name token. 257 /// For now calculated only for ObjC. 258 unsigned ParameterIndex = 0; 259 260 /// Stores the number of required fake parentheses and the 261 /// corresponding operator precedence. 262 /// 263 /// If multiple fake parentheses start at a token, this vector stores them in 264 /// reverse order, i.e. inner fake parenthesis first. 265 SmallVector<prec::Level, 4> FakeLParens; 266 /// Insert this many fake ) after this token for correct indentation. 267 unsigned FakeRParens = 0; 268 269 /// \c true if this token starts a binary expression, i.e. has at least 270 /// one fake l_paren with a precedence greater than prec::Unknown. 271 bool StartsBinaryExpression = false; 272 /// \c true if this token ends a binary expression. 273 bool EndsBinaryExpression = false; 274 275 /// If this is an operator (or "."/"->") in a sequence of operators 276 /// with the same precedence, contains the 0-based operator index. 277 unsigned OperatorIndex = 0; 278 279 /// If this is an operator (or "."/"->") in a sequence of operators 280 /// with the same precedence, points to the next operator. 281 FormatToken *NextOperator = nullptr; 282 283 /// Is this token part of a \c DeclStmt defining multiple variables? 284 /// 285 /// Only set if \c Type == \c TT_StartOfName. 286 bool PartOfMultiVariableDeclStmt = false; 287 288 /// Does this line comment continue a line comment section? 289 /// 290 /// Only set to true if \c Type == \c TT_LineComment. 291 bool ContinuesLineCommentSection = false; 292 293 /// If this is a bracket, this points to the matching one. 294 FormatToken *MatchingParen = nullptr; 295 296 /// The previous token in the unwrapped line. 297 FormatToken *Previous = nullptr; 298 299 /// The next token in the unwrapped line. 300 FormatToken *Next = nullptr; 301 302 /// If this token starts a block, this contains all the unwrapped lines 303 /// in it. 304 SmallVector<AnnotatedLine *, 1> Children; 305 306 /// Stores the formatting decision for the token once it was made. 307 FormatDecision Decision = FD_Unformatted; 308 309 /// If \c true, this token has been fully formatted (indented and 310 /// potentially re-formatted inside), and we do not allow further formatting 311 /// changes. 312 bool Finalized = false; 313 314 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 315 bool is(TokenType TT) const { return Type == TT; } 316 bool is(const IdentifierInfo *II) const { 317 return II && II == Tok.getIdentifierInfo(); 318 } 319 bool is(tok::PPKeywordKind Kind) const { 320 return Tok.getIdentifierInfo() && 321 Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 322 } 323 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 324 return is(K1) || is(K2); 325 } 326 template <typename A, typename B, typename... Ts> 327 bool isOneOf(A K1, B K2, Ts... Ks) const { 328 return is(K1) || isOneOf(K2, Ks...); 329 } 330 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 331 332 bool isIf(bool AllowConstexprMacro = true) const { 333 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || 334 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); 335 } 336 337 bool closesScopeAfterBlock() const { 338 if (BlockKind == BK_Block) 339 return true; 340 if (closesScope()) 341 return Previous->closesScopeAfterBlock(); 342 return false; 343 } 344 345 /// \c true if this token starts a sequence with the given tokens in order, 346 /// following the ``Next`` pointers, ignoring comments. 347 template <typename A, typename... Ts> 348 bool startsSequence(A K1, Ts... Tokens) const { 349 return startsSequenceInternal(K1, Tokens...); 350 } 351 352 /// \c true if this token ends a sequence with the given tokens in order, 353 /// following the ``Previous`` pointers, ignoring comments. 354 /// For example, given tokens [T1, T2, T3], the function returns true if 355 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other 356 /// words, the tokens passed to this function need to the reverse of the 357 /// order the tokens appear in code. 358 template <typename A, typename... Ts> 359 bool endsSequence(A K1, Ts... Tokens) const { 360 return endsSequenceInternal(K1, Tokens...); 361 } 362 363 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 364 365 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 366 return Tok.isObjCAtKeyword(Kind); 367 } 368 369 bool isAccessSpecifier(bool ColonRequired = true) const { 370 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 371 (!ColonRequired || (Next && Next->is(tok::colon))); 372 } 373 374 /// Determine whether the token is a simple-type-specifier. 375 bool isSimpleTypeSpecifier() const; 376 377 bool isObjCAccessSpecifier() const { 378 return is(tok::at) && Next && 379 (Next->isObjCAtKeyword(tok::objc_public) || 380 Next->isObjCAtKeyword(tok::objc_protected) || 381 Next->isObjCAtKeyword(tok::objc_package) || 382 Next->isObjCAtKeyword(tok::objc_private)); 383 } 384 385 /// Returns whether \p Tok is ([{ or an opening < of a template or in 386 /// protos. 387 bool opensScope() const { 388 if (is(TT_TemplateString) && TokenText.endswith("${")) 389 return true; 390 if (is(TT_DictLiteral) && is(tok::less)) 391 return true; 392 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 393 TT_TemplateOpener); 394 } 395 /// Returns whether \p Tok is )]} or a closing > of a template or in 396 /// protos. 397 bool closesScope() const { 398 if (is(TT_TemplateString) && TokenText.startswith("}")) 399 return true; 400 if (is(TT_DictLiteral) && is(tok::greater)) 401 return true; 402 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 403 TT_TemplateCloser); 404 } 405 406 /// Returns \c true if this is a "." or "->" accessing a member. 407 bool isMemberAccess() const { 408 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 409 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 410 TT_LambdaArrow, TT_LeadingJavaAnnotation); 411 } 412 413 bool isUnaryOperator() const { 414 switch (Tok.getKind()) { 415 case tok::plus: 416 case tok::plusplus: 417 case tok::minus: 418 case tok::minusminus: 419 case tok::exclaim: 420 case tok::tilde: 421 case tok::kw_sizeof: 422 case tok::kw_alignof: 423 return true; 424 default: 425 return false; 426 } 427 } 428 429 bool isBinaryOperator() const { 430 // Comma is a binary operator, but does not behave as such wrt. formatting. 431 return getPrecedence() > prec::Comma; 432 } 433 434 bool isTrailingComment() const { 435 return is(tok::comment) && 436 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 437 } 438 439 /// Returns \c true if this is a keyword that can be used 440 /// like a function call (e.g. sizeof, typeid, ...). 441 bool isFunctionLikeKeyword() const { 442 switch (Tok.getKind()) { 443 case tok::kw_throw: 444 case tok::kw_typeid: 445 case tok::kw_return: 446 case tok::kw_sizeof: 447 case tok::kw_alignof: 448 case tok::kw_alignas: 449 case tok::kw_decltype: 450 case tok::kw_noexcept: 451 case tok::kw_static_assert: 452 case tok::kw___attribute: 453 return true; 454 default: 455 return false; 456 } 457 } 458 459 /// Returns \c true if this is a string literal that's like a label, 460 /// e.g. ends with "=" or ":". 461 bool isLabelString() const { 462 if (!is(tok::string_literal)) 463 return false; 464 StringRef Content = TokenText; 465 if (Content.startswith("\"") || Content.startswith("'")) 466 Content = Content.drop_front(1); 467 if (Content.endswith("\"") || Content.endswith("'")) 468 Content = Content.drop_back(1); 469 Content = Content.trim(); 470 return Content.size() > 1 && 471 (Content.back() == ':' || Content.back() == '='); 472 } 473 474 /// Returns actual token start location without leading escaped 475 /// newlines and whitespace. 476 /// 477 /// This can be different to Tok.getLocation(), which includes leading escaped 478 /// newlines. 479 SourceLocation getStartOfNonWhitespace() const { 480 return WhitespaceRange.getEnd(); 481 } 482 483 prec::Level getPrecedence() const { 484 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, 485 /*CPlusPlus11=*/true); 486 } 487 488 /// Returns the previous token ignoring comments. 489 FormatToken *getPreviousNonComment() const { 490 FormatToken *Tok = Previous; 491 while (Tok && Tok->is(tok::comment)) 492 Tok = Tok->Previous; 493 return Tok; 494 } 495 496 /// Returns the next token ignoring comments. 497 const FormatToken *getNextNonComment() const { 498 const FormatToken *Tok = Next; 499 while (Tok && Tok->is(tok::comment)) 500 Tok = Tok->Next; 501 return Tok; 502 } 503 504 /// Returns \c true if this tokens starts a block-type list, i.e. a 505 /// list that should be indented with a block indent. 506 bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { 507 if (is(TT_TemplateString) && opensScope()) 508 return true; 509 return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || 510 (is(tok::l_brace) && 511 (BlockKind == BK_Block || is(TT_DictLiteral) || 512 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 513 (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || 514 Style.Language == FormatStyle::LK_TextProto)); 515 } 516 517 /// Returns whether the token is the left square bracket of a C++ 518 /// structured binding declaration. 519 bool isCppStructuredBinding(const FormatStyle &Style) const { 520 if (!Style.isCpp() || isNot(tok::l_square)) 521 return false; 522 const FormatToken *T = this; 523 do { 524 T = T->getPreviousNonComment(); 525 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, 526 tok::ampamp)); 527 return T && T->is(tok::kw_auto); 528 } 529 530 /// Same as opensBlockOrBlockTypeList, but for the closing token. 531 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 532 if (is(TT_TemplateString) && closesScope()) 533 return true; 534 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 535 } 536 537 /// Return the actual namespace token, if this token starts a namespace 538 /// block. 539 const FormatToken *getNamespaceToken() const { 540 const FormatToken *NamespaceTok = this; 541 if (is(tok::comment)) 542 NamespaceTok = NamespaceTok->getNextNonComment(); 543 // Detect "(inline|export)? namespace" in the beginning of a line. 544 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) 545 NamespaceTok = NamespaceTok->getNextNonComment(); 546 return NamespaceTok && 547 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) 548 ? NamespaceTok 549 : nullptr; 550 } 551 552 private: 553 // Disallow copying. 554 FormatToken(const FormatToken &) = delete; 555 void operator=(const FormatToken &) = delete; 556 557 template <typename A, typename... Ts> 558 bool startsSequenceInternal(A K1, Ts... Tokens) const { 559 if (is(tok::comment) && Next) 560 return Next->startsSequenceInternal(K1, Tokens...); 561 return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 562 } 563 564 template <typename A> bool startsSequenceInternal(A K1) const { 565 if (is(tok::comment) && Next) 566 return Next->startsSequenceInternal(K1); 567 return is(K1); 568 } 569 570 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { 571 if (is(tok::comment) && Previous) 572 return Previous->endsSequenceInternal(K1); 573 return is(K1); 574 } 575 576 template <typename A, typename... Ts> 577 bool endsSequenceInternal(A K1, Ts... Tokens) const { 578 if (is(tok::comment) && Previous) 579 return Previous->endsSequenceInternal(K1, Tokens...); 580 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 581 } 582 }; 583 584 class ContinuationIndenter; 585 struct LineState; 586 587 class TokenRole { 588 public: 589 TokenRole(const FormatStyle &Style) : Style(Style) {} 590 virtual ~TokenRole(); 591 592 /// After the \c TokenAnnotator has finished annotating all the tokens, 593 /// this function precomputes required information for formatting. 594 virtual void precomputeFormattingInfos(const FormatToken *Token); 595 596 /// Apply the special formatting that the given role demands. 597 /// 598 /// Assumes that the token having this role is already formatted. 599 /// 600 /// Continues formatting from \p State leaving indentation to \p Indenter and 601 /// returns the total penalty that this formatting incurs. 602 virtual unsigned formatFromToken(LineState &State, 603 ContinuationIndenter *Indenter, 604 bool DryRun) { 605 return 0; 606 } 607 608 /// Same as \c formatFromToken, but assumes that the first token has 609 /// already been set thereby deciding on the first line break. 610 virtual unsigned formatAfterToken(LineState &State, 611 ContinuationIndenter *Indenter, 612 bool DryRun) { 613 return 0; 614 } 615 616 /// Notifies the \c Role that a comma was found. 617 virtual void CommaFound(const FormatToken *Token) {} 618 619 virtual const FormatToken *lastComma() { return nullptr; } 620 621 protected: 622 const FormatStyle &Style; 623 }; 624 625 class CommaSeparatedList : public TokenRole { 626 public: 627 CommaSeparatedList(const FormatStyle &Style) 628 : TokenRole(Style), HasNestedBracedList(false) {} 629 630 void precomputeFormattingInfos(const FormatToken *Token) override; 631 632 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 633 bool DryRun) override; 634 635 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 636 bool DryRun) override; 637 638 /// Adds \p Token as the next comma to the \c CommaSeparated list. 639 void CommaFound(const FormatToken *Token) override { 640 Commas.push_back(Token); 641 } 642 643 const FormatToken *lastComma() override { 644 if (Commas.empty()) 645 return nullptr; 646 return Commas.back(); 647 } 648 649 private: 650 /// A struct that holds information on how to format a given list with 651 /// a specific number of columns. 652 struct ColumnFormat { 653 /// The number of columns to use. 654 unsigned Columns; 655 656 /// The total width in characters. 657 unsigned TotalWidth; 658 659 /// The number of lines required for this format. 660 unsigned LineCount; 661 662 /// The size of each column in characters. 663 SmallVector<unsigned, 8> ColumnSizes; 664 }; 665 666 /// Calculate which \c ColumnFormat fits best into 667 /// \p RemainingCharacters. 668 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 669 670 /// The ordered \c FormatTokens making up the commas of this list. 671 SmallVector<const FormatToken *, 8> Commas; 672 673 /// The length of each of the list's items in characters including the 674 /// trailing comma. 675 SmallVector<unsigned, 8> ItemLengths; 676 677 /// Precomputed formats that can be used for this list. 678 SmallVector<ColumnFormat, 4> Formats; 679 680 bool HasNestedBracedList; 681 }; 682 683 /// Encapsulates keywords that are context sensitive or for languages not 684 /// properly supported by Clang's lexer. 685 struct AdditionalKeywords { 686 AdditionalKeywords(IdentifierTable &IdentTable) { 687 kw_final = &IdentTable.get("final"); 688 kw_override = &IdentTable.get("override"); 689 kw_in = &IdentTable.get("in"); 690 kw_of = &IdentTable.get("of"); 691 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM"); 692 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 693 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 694 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM"); 695 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 696 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 697 698 kw_as = &IdentTable.get("as"); 699 kw_async = &IdentTable.get("async"); 700 kw_await = &IdentTable.get("await"); 701 kw_declare = &IdentTable.get("declare"); 702 kw_finally = &IdentTable.get("finally"); 703 kw_from = &IdentTable.get("from"); 704 kw_function = &IdentTable.get("function"); 705 kw_get = &IdentTable.get("get"); 706 kw_import = &IdentTable.get("import"); 707 kw_infer = &IdentTable.get("infer"); 708 kw_is = &IdentTable.get("is"); 709 kw_let = &IdentTable.get("let"); 710 kw_module = &IdentTable.get("module"); 711 kw_readonly = &IdentTable.get("readonly"); 712 kw_set = &IdentTable.get("set"); 713 kw_type = &IdentTable.get("type"); 714 kw_typeof = &IdentTable.get("typeof"); 715 kw_var = &IdentTable.get("var"); 716 kw_yield = &IdentTable.get("yield"); 717 718 kw_abstract = &IdentTable.get("abstract"); 719 kw_assert = &IdentTable.get("assert"); 720 kw_extends = &IdentTable.get("extends"); 721 kw_implements = &IdentTable.get("implements"); 722 kw_instanceof = &IdentTable.get("instanceof"); 723 kw_interface = &IdentTable.get("interface"); 724 kw_native = &IdentTable.get("native"); 725 kw_package = &IdentTable.get("package"); 726 kw_synchronized = &IdentTable.get("synchronized"); 727 kw_throws = &IdentTable.get("throws"); 728 kw___except = &IdentTable.get("__except"); 729 kw___has_include = &IdentTable.get("__has_include"); 730 kw___has_include_next = &IdentTable.get("__has_include_next"); 731 732 kw_mark = &IdentTable.get("mark"); 733 734 kw_extend = &IdentTable.get("extend"); 735 kw_option = &IdentTable.get("option"); 736 kw_optional = &IdentTable.get("optional"); 737 kw_repeated = &IdentTable.get("repeated"); 738 kw_required = &IdentTable.get("required"); 739 kw_returns = &IdentTable.get("returns"); 740 741 kw_signals = &IdentTable.get("signals"); 742 kw_qsignals = &IdentTable.get("Q_SIGNALS"); 743 kw_slots = &IdentTable.get("slots"); 744 kw_qslots = &IdentTable.get("Q_SLOTS"); 745 746 // C# keywords 747 kw_dollar = &IdentTable.get("dollar"); 748 kw_base = &IdentTable.get("base"); 749 kw_byte = &IdentTable.get("byte"); 750 kw_checked = &IdentTable.get("checked"); 751 kw_decimal = &IdentTable.get("decimal"); 752 kw_delegate = &IdentTable.get("delegate"); 753 kw_event = &IdentTable.get("event"); 754 kw_fixed = &IdentTable.get("fixed"); 755 kw_foreach = &IdentTable.get("foreach"); 756 kw_implicit = &IdentTable.get("implicit"); 757 kw_internal = &IdentTable.get("internal"); 758 kw_lock = &IdentTable.get("lock"); 759 kw_null = &IdentTable.get("null"); 760 kw_object = &IdentTable.get("object"); 761 kw_out = &IdentTable.get("out"); 762 kw_params = &IdentTable.get("params"); 763 kw_ref = &IdentTable.get("ref"); 764 kw_string = &IdentTable.get("string"); 765 kw_stackalloc = &IdentTable.get("stackalloc"); 766 kw_sbyte = &IdentTable.get("sbyte"); 767 kw_sealed = &IdentTable.get("sealed"); 768 kw_uint = &IdentTable.get("uint"); 769 kw_ulong = &IdentTable.get("ulong"); 770 kw_unchecked = &IdentTable.get("unchecked"); 771 kw_unsafe = &IdentTable.get("unsafe"); 772 kw_ushort = &IdentTable.get("ushort"); 773 774 // Keep this at the end of the constructor to make sure everything here 775 // is 776 // already initialized. 777 JsExtraKeywords = std::unordered_set<IdentifierInfo *>( 778 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 779 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 780 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 781 // Keywords from the Java section. 782 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 783 784 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>( 785 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, 786 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, 787 kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, 788 kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, 789 kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, 790 // Keywords from the JavaScript section. 791 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 792 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 793 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 794 // Keywords from the Java section. 795 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 796 } 797 798 // Context sensitive keywords. 799 IdentifierInfo *kw_final; 800 IdentifierInfo *kw_override; 801 IdentifierInfo *kw_in; 802 IdentifierInfo *kw_of; 803 IdentifierInfo *kw_CF_CLOSED_ENUM; 804 IdentifierInfo *kw_CF_ENUM; 805 IdentifierInfo *kw_CF_OPTIONS; 806 IdentifierInfo *kw_NS_CLOSED_ENUM; 807 IdentifierInfo *kw_NS_ENUM; 808 IdentifierInfo *kw_NS_OPTIONS; 809 IdentifierInfo *kw___except; 810 IdentifierInfo *kw___has_include; 811 IdentifierInfo *kw___has_include_next; 812 813 // JavaScript keywords. 814 IdentifierInfo *kw_as; 815 IdentifierInfo *kw_async; 816 IdentifierInfo *kw_await; 817 IdentifierInfo *kw_declare; 818 IdentifierInfo *kw_finally; 819 IdentifierInfo *kw_from; 820 IdentifierInfo *kw_function; 821 IdentifierInfo *kw_get; 822 IdentifierInfo *kw_import; 823 IdentifierInfo *kw_infer; 824 IdentifierInfo *kw_is; 825 IdentifierInfo *kw_let; 826 IdentifierInfo *kw_module; 827 IdentifierInfo *kw_readonly; 828 IdentifierInfo *kw_set; 829 IdentifierInfo *kw_type; 830 IdentifierInfo *kw_typeof; 831 IdentifierInfo *kw_var; 832 IdentifierInfo *kw_yield; 833 834 // Java keywords. 835 IdentifierInfo *kw_abstract; 836 IdentifierInfo *kw_assert; 837 IdentifierInfo *kw_extends; 838 IdentifierInfo *kw_implements; 839 IdentifierInfo *kw_instanceof; 840 IdentifierInfo *kw_interface; 841 IdentifierInfo *kw_native; 842 IdentifierInfo *kw_package; 843 IdentifierInfo *kw_synchronized; 844 IdentifierInfo *kw_throws; 845 846 // Pragma keywords. 847 IdentifierInfo *kw_mark; 848 849 // Proto keywords. 850 IdentifierInfo *kw_extend; 851 IdentifierInfo *kw_option; 852 IdentifierInfo *kw_optional; 853 IdentifierInfo *kw_repeated; 854 IdentifierInfo *kw_required; 855 IdentifierInfo *kw_returns; 856 857 // QT keywords. 858 IdentifierInfo *kw_signals; 859 IdentifierInfo *kw_qsignals; 860 IdentifierInfo *kw_slots; 861 IdentifierInfo *kw_qslots; 862 863 // C# keywords 864 IdentifierInfo *kw_dollar; 865 IdentifierInfo *kw_base; 866 IdentifierInfo *kw_byte; 867 IdentifierInfo *kw_checked; 868 IdentifierInfo *kw_decimal; 869 IdentifierInfo *kw_delegate; 870 IdentifierInfo *kw_event; 871 IdentifierInfo *kw_fixed; 872 IdentifierInfo *kw_foreach; 873 IdentifierInfo *kw_implicit; 874 IdentifierInfo *kw_internal; 875 876 IdentifierInfo *kw_lock; 877 IdentifierInfo *kw_null; 878 IdentifierInfo *kw_object; 879 IdentifierInfo *kw_out; 880 881 IdentifierInfo *kw_params; 882 883 IdentifierInfo *kw_ref; 884 IdentifierInfo *kw_string; 885 IdentifierInfo *kw_stackalloc; 886 IdentifierInfo *kw_sbyte; 887 IdentifierInfo *kw_sealed; 888 IdentifierInfo *kw_uint; 889 IdentifierInfo *kw_ulong; 890 IdentifierInfo *kw_unchecked; 891 IdentifierInfo *kw_unsafe; 892 IdentifierInfo *kw_ushort; 893 894 /// Returns \c true if \p Tok is a true JavaScript identifier, returns 895 /// \c false if it is a keyword or a pseudo keyword. 896 bool IsJavaScriptIdentifier(const FormatToken &Tok) const { 897 return Tok.is(tok::identifier) && 898 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 899 JsExtraKeywords.end(); 900 } 901 902 /// Returns \c true if \p Tok is a C# keyword, returns 903 /// \c false if it is a anything else. 904 bool isCSharpKeyword(const FormatToken &Tok) const { 905 switch (Tok.Tok.getKind()) { 906 case tok::kw_bool: 907 case tok::kw_break: 908 case tok::kw_case: 909 case tok::kw_catch: 910 case tok::kw_char: 911 case tok::kw_class: 912 case tok::kw_const: 913 case tok::kw_continue: 914 case tok::kw_default: 915 case tok::kw_do: 916 case tok::kw_double: 917 case tok::kw_else: 918 case tok::kw_enum: 919 case tok::kw_explicit: 920 case tok::kw_extern: 921 case tok::kw_false: 922 case tok::kw_float: 923 case tok::kw_for: 924 case tok::kw_goto: 925 case tok::kw_if: 926 case tok::kw_int: 927 case tok::kw_long: 928 case tok::kw_namespace: 929 case tok::kw_new: 930 case tok::kw_operator: 931 case tok::kw_private: 932 case tok::kw_protected: 933 case tok::kw_public: 934 case tok::kw_return: 935 case tok::kw_short: 936 case tok::kw_sizeof: 937 case tok::kw_static: 938 case tok::kw_struct: 939 case tok::kw_switch: 940 case tok::kw_this: 941 case tok::kw_throw: 942 case tok::kw_true: 943 case tok::kw_try: 944 case tok::kw_typeof: 945 case tok::kw_using: 946 case tok::kw_virtual: 947 case tok::kw_void: 948 case tok::kw_volatile: 949 case tok::kw_while: 950 return true; 951 default: 952 return Tok.is(tok::identifier) && 953 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 954 CSharpExtraKeywords.end(); 955 } 956 } 957 958 private: 959 /// The JavaScript keywords beyond the C++ keyword set. 960 std::unordered_set<IdentifierInfo *> JsExtraKeywords; 961 962 /// The C# keywords beyond the C++ keyword set 963 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords; 964 }; 965 966 } // namespace format 967 } // namespace clang 968 969 #endif 970