1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the FormatToken, a wrapper 11 /// around Token with additional information related to formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/OperatorPrecedence.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Lex/Lexer.h" 22 #include <memory> 23 #include <unordered_set> 24 25 namespace clang { 26 namespace format { 27 28 #define LIST_TOKEN_TYPES \ 29 TYPE(ArrayInitializerLSquare) \ 30 TYPE(ArraySubscriptLSquare) \ 31 TYPE(AttributeColon) \ 32 TYPE(AttributeParen) \ 33 TYPE(AttributeSquare) \ 34 TYPE(BinaryOperator) \ 35 TYPE(BitFieldColon) \ 36 TYPE(BlockComment) \ 37 TYPE(CastRParen) \ 38 TYPE(ConditionalExpr) \ 39 TYPE(ConflictAlternative) \ 40 TYPE(ConflictEnd) \ 41 TYPE(ConflictStart) \ 42 TYPE(CtorInitializerColon) \ 43 TYPE(CtorInitializerComma) \ 44 TYPE(DesignatedInitializerLSquare) \ 45 TYPE(DesignatedInitializerPeriod) \ 46 TYPE(DictLiteral) \ 47 TYPE(ForEachMacro) \ 48 TYPE(FunctionAnnotationRParen) \ 49 TYPE(FunctionDeclarationName) \ 50 TYPE(FunctionLBrace) \ 51 TYPE(FunctionTypeLParen) \ 52 TYPE(ImplicitStringLiteral) \ 53 TYPE(InheritanceColon) \ 54 TYPE(InheritanceComma) \ 55 TYPE(InlineASMBrace) \ 56 TYPE(InlineASMColon) \ 57 TYPE(JavaAnnotation) \ 58 TYPE(JsComputedPropertyName) \ 59 TYPE(JsExponentiation) \ 60 TYPE(JsExponentiationEqual) \ 61 TYPE(JsFatArrow) \ 62 TYPE(JsNonNullAssertion) \ 63 TYPE(JsPrivateIdentifier) \ 64 TYPE(JsTypeColon) \ 65 TYPE(JsTypeOperator) \ 66 TYPE(JsTypeOptionalQuestion) \ 67 TYPE(LambdaArrow) \ 68 TYPE(LambdaLBrace) \ 69 TYPE(LambdaLSquare) \ 70 TYPE(LeadingJavaAnnotation) \ 71 TYPE(LineComment) \ 72 TYPE(MacroBlockBegin) \ 73 TYPE(MacroBlockEnd) \ 74 TYPE(NamespaceMacro) \ 75 TYPE(ObjCBlockLBrace) \ 76 TYPE(ObjCBlockLParen) \ 77 TYPE(ObjCDecl) \ 78 TYPE(ObjCForIn) \ 79 TYPE(ObjCMethodExpr) \ 80 TYPE(ObjCMethodSpecifier) \ 81 TYPE(ObjCProperty) \ 82 TYPE(ObjCStringLiteral) \ 83 TYPE(OverloadedOperator) \ 84 TYPE(OverloadedOperatorLParen) \ 85 TYPE(PointerOrReference) \ 86 TYPE(PureVirtualSpecifier) \ 87 TYPE(RangeBasedForLoopColon) \ 88 TYPE(RegexLiteral) \ 89 TYPE(SelectorName) \ 90 TYPE(StartOfName) \ 91 TYPE(StatementMacro) \ 92 TYPE(StructuredBindingLSquare) \ 93 TYPE(TemplateCloser) \ 94 TYPE(TemplateOpener) \ 95 TYPE(TemplateString) \ 96 TYPE(ProtoExtensionLSquare) \ 97 TYPE(TrailingAnnotation) \ 98 TYPE(TrailingReturnArrow) \ 99 TYPE(TrailingUnaryOperator) \ 100 TYPE(TypenameMacro) \ 101 TYPE(UnaryOperator) \ 102 TYPE(CSharpStringLiteral) \ 103 TYPE(CSharpNullCoalescing) \ 104 TYPE(Unknown) 105 106 enum TokenType { 107 #define TYPE(X) TT_##X, 108 LIST_TOKEN_TYPES 109 #undef TYPE 110 NUM_TOKEN_TYPES 111 }; 112 113 /// Determines the name of a token type. 114 const char *getTokenTypeName(TokenType Type); 115 116 // Represents what type of block a set of braces open. 117 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 118 119 // The packing kind of a function's parameters. 120 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 121 122 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 123 124 class TokenRole; 125 class AnnotatedLine; 126 127 /// A wrapper around a \c Token storing information about the 128 /// whitespace characters preceding it. 129 struct FormatToken { 130 FormatToken() {} 131 132 /// The \c Token. 133 Token Tok; 134 135 /// The number of newlines immediately before the \c Token. 136 /// 137 /// This can be used to determine what the user wrote in the original code 138 /// and thereby e.g. leave an empty line between two function definitions. 139 unsigned NewlinesBefore = 0; 140 141 /// Whether there is at least one unescaped newline before the \c 142 /// Token. 143 bool HasUnescapedNewline = false; 144 145 /// The range of the whitespace immediately preceding the \c Token. 146 SourceRange WhitespaceRange; 147 148 /// The offset just past the last '\n' in this token's leading 149 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 150 unsigned LastNewlineOffset = 0; 151 152 /// The width of the non-whitespace parts of the token (or its first 153 /// line for multi-line tokens) in columns. 154 /// We need this to correctly measure number of columns a token spans. 155 unsigned ColumnWidth = 0; 156 157 /// Contains the width in columns of the last line of a multi-line 158 /// token. 159 unsigned LastLineColumnWidth = 0; 160 161 /// Whether the token text contains newlines (escaped or not). 162 bool IsMultiline = false; 163 164 /// Indicates that this is the first token of the file. 165 bool IsFirst = false; 166 167 /// Whether there must be a line break before this token. 168 /// 169 /// This happens for example when a preprocessor directive ended directly 170 /// before the token. 171 bool MustBreakBefore = false; 172 173 /// The raw text of the token. 174 /// 175 /// Contains the raw token text without leading whitespace and without leading 176 /// escaped newlines. 177 StringRef TokenText; 178 179 /// Set to \c true if this token is an unterminated literal. 180 bool IsUnterminatedLiteral = 0; 181 182 /// Contains the kind of block if this token is a brace. 183 BraceBlockKind BlockKind = BK_Unknown; 184 185 TokenType Type = TT_Unknown; 186 187 /// The number of spaces that should be inserted before this token. 188 unsigned SpacesRequiredBefore = 0; 189 190 /// \c true if it is allowed to break before this token. 191 bool CanBreakBefore = false; 192 193 /// \c true if this is the ">" of "template<..>". 194 bool ClosesTemplateDeclaration = false; 195 196 /// Number of parameters, if this is "(", "[" or "<". 197 unsigned ParameterCount = 0; 198 199 /// Number of parameters that are nested blocks, 200 /// if this is "(", "[" or "<". 201 unsigned BlockParameterCount = 0; 202 203 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of 204 /// the surrounding bracket. 205 tok::TokenKind ParentBracket = tok::unknown; 206 207 /// A token can have a special role that can carry extra information 208 /// about the token's formatting. 209 std::unique_ptr<TokenRole> Role; 210 211 /// If this is an opening parenthesis, how are the parameters packed? 212 ParameterPackingKind PackingKind = PPK_Inconclusive; 213 214 /// The total length of the unwrapped line up to and including this 215 /// token. 216 unsigned TotalLength = 0; 217 218 /// The original 0-based column of this token, including expanded tabs. 219 /// The configured TabWidth is used as tab width. 220 unsigned OriginalColumn = 0; 221 222 /// The length of following tokens until the next natural split point, 223 /// or the next token that can be broken. 224 unsigned UnbreakableTailLength = 0; 225 226 // FIXME: Come up with a 'cleaner' concept. 227 /// The binding strength of a token. This is a combined value of 228 /// operator precedence, parenthesis nesting, etc. 229 unsigned BindingStrength = 0; 230 231 /// The nesting level of this token, i.e. the number of surrounding (), 232 /// [], {} or <>. 233 unsigned NestingLevel = 0; 234 235 /// The indent level of this token. Copied from the surrounding line. 236 unsigned IndentLevel = 0; 237 238 /// Penalty for inserting a line break before this token. 239 unsigned SplitPenalty = 0; 240 241 /// If this is the first ObjC selector name in an ObjC method 242 /// definition or call, this contains the length of the longest name. 243 /// 244 /// This being set to 0 means that the selectors should not be colon-aligned, 245 /// e.g. because several of them are block-type. 246 unsigned LongestObjCSelectorName = 0; 247 248 /// If this is the first ObjC selector name in an ObjC method 249 /// definition or call, this contains the number of parts that the whole 250 /// selector consist of. 251 unsigned ObjCSelectorNameParts = 0; 252 253 /// The 0-based index of the parameter/argument. For ObjC it is set 254 /// for the selector name token. 255 /// For now calculated only for ObjC. 256 unsigned ParameterIndex = 0; 257 258 /// Stores the number of required fake parentheses and the 259 /// corresponding operator precedence. 260 /// 261 /// If multiple fake parentheses start at a token, this vector stores them in 262 /// reverse order, i.e. inner fake parenthesis first. 263 SmallVector<prec::Level, 4> FakeLParens; 264 /// Insert this many fake ) after this token for correct indentation. 265 unsigned FakeRParens = 0; 266 267 /// \c true if this token starts a binary expression, i.e. has at least 268 /// one fake l_paren with a precedence greater than prec::Unknown. 269 bool StartsBinaryExpression = false; 270 /// \c true if this token ends a binary expression. 271 bool EndsBinaryExpression = false; 272 273 /// If this is an operator (or "."/"->") in a sequence of operators 274 /// with the same precedence, contains the 0-based operator index. 275 unsigned OperatorIndex = 0; 276 277 /// If this is an operator (or "."/"->") in a sequence of operators 278 /// with the same precedence, points to the next operator. 279 FormatToken *NextOperator = nullptr; 280 281 /// Is this token part of a \c DeclStmt defining multiple variables? 282 /// 283 /// Only set if \c Type == \c TT_StartOfName. 284 bool PartOfMultiVariableDeclStmt = false; 285 286 /// Does this line comment continue a line comment section? 287 /// 288 /// Only set to true if \c Type == \c TT_LineComment. 289 bool ContinuesLineCommentSection = false; 290 291 /// If this is a bracket, this points to the matching one. 292 FormatToken *MatchingParen = nullptr; 293 294 /// The previous token in the unwrapped line. 295 FormatToken *Previous = nullptr; 296 297 /// The next token in the unwrapped line. 298 FormatToken *Next = nullptr; 299 300 /// If this token starts a block, this contains all the unwrapped lines 301 /// in it. 302 SmallVector<AnnotatedLine *, 1> Children; 303 304 /// Stores the formatting decision for the token once it was made. 305 FormatDecision Decision = FD_Unformatted; 306 307 /// If \c true, this token has been fully formatted (indented and 308 /// potentially re-formatted inside), and we do not allow further formatting 309 /// changes. 310 bool Finalized = false; 311 312 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 313 bool is(TokenType TT) const { return Type == TT; } 314 bool is(const IdentifierInfo *II) const { 315 return II && II == Tok.getIdentifierInfo(); 316 } 317 bool is(tok::PPKeywordKind Kind) const { 318 return Tok.getIdentifierInfo() && 319 Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 320 } 321 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 322 return is(K1) || is(K2); 323 } 324 template <typename A, typename B, typename... Ts> 325 bool isOneOf(A K1, B K2, Ts... Ks) const { 326 return is(K1) || isOneOf(K2, Ks...); 327 } 328 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 329 330 bool isIf(bool AllowConstexprMacro = true) const { 331 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || 332 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); 333 } 334 335 bool closesScopeAfterBlock() const { 336 if (BlockKind == BK_Block) 337 return true; 338 if (closesScope()) 339 return Previous->closesScopeAfterBlock(); 340 return false; 341 } 342 343 /// \c true if this token starts a sequence with the given tokens in order, 344 /// following the ``Next`` pointers, ignoring comments. 345 template <typename A, typename... Ts> 346 bool startsSequence(A K1, Ts... Tokens) const { 347 return startsSequenceInternal(K1, Tokens...); 348 } 349 350 /// \c true if this token ends a sequence with the given tokens in order, 351 /// following the ``Previous`` pointers, ignoring comments. 352 /// For example, given tokens [T1, T2, T3], the function returns true if 353 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other 354 /// words, the tokens passed to this function need to the reverse of the 355 /// order the tokens appear in code. 356 template <typename A, typename... Ts> 357 bool endsSequence(A K1, Ts... Tokens) const { 358 return endsSequenceInternal(K1, Tokens...); 359 } 360 361 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 362 363 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 364 return Tok.isObjCAtKeyword(Kind); 365 } 366 367 bool isAccessSpecifier(bool ColonRequired = true) const { 368 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 369 (!ColonRequired || (Next && Next->is(tok::colon))); 370 } 371 372 /// Determine whether the token is a simple-type-specifier. 373 bool isSimpleTypeSpecifier() const; 374 375 bool isObjCAccessSpecifier() const { 376 return is(tok::at) && Next && 377 (Next->isObjCAtKeyword(tok::objc_public) || 378 Next->isObjCAtKeyword(tok::objc_protected) || 379 Next->isObjCAtKeyword(tok::objc_package) || 380 Next->isObjCAtKeyword(tok::objc_private)); 381 } 382 383 /// Returns whether \p Tok is ([{ or an opening < of a template or in 384 /// protos. 385 bool opensScope() const { 386 if (is(TT_TemplateString) && TokenText.endswith("${")) 387 return true; 388 if (is(TT_DictLiteral) && is(tok::less)) 389 return true; 390 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 391 TT_TemplateOpener); 392 } 393 /// Returns whether \p Tok is )]} or a closing > of a template or in 394 /// protos. 395 bool closesScope() const { 396 if (is(TT_TemplateString) && TokenText.startswith("}")) 397 return true; 398 if (is(TT_DictLiteral) && is(tok::greater)) 399 return true; 400 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 401 TT_TemplateCloser); 402 } 403 404 /// Returns \c true if this is a "." or "->" accessing a member. 405 bool isMemberAccess() const { 406 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 407 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 408 TT_LambdaArrow); 409 } 410 411 bool isUnaryOperator() const { 412 switch (Tok.getKind()) { 413 case tok::plus: 414 case tok::plusplus: 415 case tok::minus: 416 case tok::minusminus: 417 case tok::exclaim: 418 case tok::tilde: 419 case tok::kw_sizeof: 420 case tok::kw_alignof: 421 return true; 422 default: 423 return false; 424 } 425 } 426 427 bool isBinaryOperator() const { 428 // Comma is a binary operator, but does not behave as such wrt. formatting. 429 return getPrecedence() > prec::Comma; 430 } 431 432 bool isTrailingComment() const { 433 return is(tok::comment) && 434 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 435 } 436 437 /// Returns \c true if this is a keyword that can be used 438 /// like a function call (e.g. sizeof, typeid, ...). 439 bool isFunctionLikeKeyword() const { 440 switch (Tok.getKind()) { 441 case tok::kw_throw: 442 case tok::kw_typeid: 443 case tok::kw_return: 444 case tok::kw_sizeof: 445 case tok::kw_alignof: 446 case tok::kw_alignas: 447 case tok::kw_decltype: 448 case tok::kw_noexcept: 449 case tok::kw_static_assert: 450 case tok::kw___attribute: 451 return true; 452 default: 453 return false; 454 } 455 } 456 457 /// Returns \c true if this is a string literal that's like a label, 458 /// e.g. ends with "=" or ":". 459 bool isLabelString() const { 460 if (!is(tok::string_literal)) 461 return false; 462 StringRef Content = TokenText; 463 if (Content.startswith("\"") || Content.startswith("'")) 464 Content = Content.drop_front(1); 465 if (Content.endswith("\"") || Content.endswith("'")) 466 Content = Content.drop_back(1); 467 Content = Content.trim(); 468 return Content.size() > 1 && 469 (Content.back() == ':' || Content.back() == '='); 470 } 471 472 /// Returns actual token start location without leading escaped 473 /// newlines and whitespace. 474 /// 475 /// This can be different to Tok.getLocation(), which includes leading escaped 476 /// newlines. 477 SourceLocation getStartOfNonWhitespace() const { 478 return WhitespaceRange.getEnd(); 479 } 480 481 prec::Level getPrecedence() const { 482 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, 483 /*CPlusPlus11=*/true); 484 } 485 486 /// Returns the previous token ignoring comments. 487 FormatToken *getPreviousNonComment() const { 488 FormatToken *Tok = Previous; 489 while (Tok && Tok->is(tok::comment)) 490 Tok = Tok->Previous; 491 return Tok; 492 } 493 494 /// Returns the next token ignoring comments. 495 const FormatToken *getNextNonComment() const { 496 const FormatToken *Tok = Next; 497 while (Tok && Tok->is(tok::comment)) 498 Tok = Tok->Next; 499 return Tok; 500 } 501 502 /// Returns \c true if this tokens starts a block-type list, i.e. a 503 /// list that should be indented with a block indent. 504 bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { 505 if (is(TT_TemplateString) && opensScope()) 506 return true; 507 return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || 508 (is(tok::l_brace) && 509 (BlockKind == BK_Block || is(TT_DictLiteral) || 510 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 511 (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || 512 Style.Language == FormatStyle::LK_TextProto)); 513 } 514 515 /// Returns whether the token is the left square bracket of a C++ 516 /// structured binding declaration. 517 bool isCppStructuredBinding(const FormatStyle &Style) const { 518 if (!Style.isCpp() || isNot(tok::l_square)) 519 return false; 520 const FormatToken *T = this; 521 do { 522 T = T->getPreviousNonComment(); 523 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, 524 tok::ampamp)); 525 return T && T->is(tok::kw_auto); 526 } 527 528 /// Same as opensBlockOrBlockTypeList, but for the closing token. 529 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 530 if (is(TT_TemplateString) && closesScope()) 531 return true; 532 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 533 } 534 535 /// Return the actual namespace token, if this token starts a namespace 536 /// block. 537 const FormatToken *getNamespaceToken() const { 538 const FormatToken *NamespaceTok = this; 539 if (is(tok::comment)) 540 NamespaceTok = NamespaceTok->getNextNonComment(); 541 // Detect "(inline|export)? namespace" in the beginning of a line. 542 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) 543 NamespaceTok = NamespaceTok->getNextNonComment(); 544 return NamespaceTok && 545 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) 546 ? NamespaceTok 547 : nullptr; 548 } 549 550 private: 551 // Disallow copying. 552 FormatToken(const FormatToken &) = delete; 553 void operator=(const FormatToken &) = delete; 554 555 template <typename A, typename... Ts> 556 bool startsSequenceInternal(A K1, Ts... Tokens) const { 557 if (is(tok::comment) && Next) 558 return Next->startsSequenceInternal(K1, Tokens...); 559 return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 560 } 561 562 template <typename A> bool startsSequenceInternal(A K1) const { 563 if (is(tok::comment) && Next) 564 return Next->startsSequenceInternal(K1); 565 return is(K1); 566 } 567 568 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { 569 if (is(tok::comment) && Previous) 570 return Previous->endsSequenceInternal(K1); 571 return is(K1); 572 } 573 574 template <typename A, typename... Ts> 575 bool endsSequenceInternal(A K1, Ts... Tokens) const { 576 if (is(tok::comment) && Previous) 577 return Previous->endsSequenceInternal(K1, Tokens...); 578 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 579 } 580 }; 581 582 class ContinuationIndenter; 583 struct LineState; 584 585 class TokenRole { 586 public: 587 TokenRole(const FormatStyle &Style) : Style(Style) {} 588 virtual ~TokenRole(); 589 590 /// After the \c TokenAnnotator has finished annotating all the tokens, 591 /// this function precomputes required information for formatting. 592 virtual void precomputeFormattingInfos(const FormatToken *Token); 593 594 /// Apply the special formatting that the given role demands. 595 /// 596 /// Assumes that the token having this role is already formatted. 597 /// 598 /// Continues formatting from \p State leaving indentation to \p Indenter and 599 /// returns the total penalty that this formatting incurs. 600 virtual unsigned formatFromToken(LineState &State, 601 ContinuationIndenter *Indenter, 602 bool DryRun) { 603 return 0; 604 } 605 606 /// Same as \c formatFromToken, but assumes that the first token has 607 /// already been set thereby deciding on the first line break. 608 virtual unsigned formatAfterToken(LineState &State, 609 ContinuationIndenter *Indenter, 610 bool DryRun) { 611 return 0; 612 } 613 614 /// Notifies the \c Role that a comma was found. 615 virtual void CommaFound(const FormatToken *Token) {} 616 617 virtual const FormatToken *lastComma() { return nullptr; } 618 619 protected: 620 const FormatStyle &Style; 621 }; 622 623 class CommaSeparatedList : public TokenRole { 624 public: 625 CommaSeparatedList(const FormatStyle &Style) 626 : TokenRole(Style), HasNestedBracedList(false) {} 627 628 void precomputeFormattingInfos(const FormatToken *Token) override; 629 630 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 631 bool DryRun) override; 632 633 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 634 bool DryRun) override; 635 636 /// Adds \p Token as the next comma to the \c CommaSeparated list. 637 void CommaFound(const FormatToken *Token) override { 638 Commas.push_back(Token); 639 } 640 641 const FormatToken *lastComma() override { 642 if (Commas.empty()) 643 return nullptr; 644 return Commas.back(); 645 } 646 647 private: 648 /// A struct that holds information on how to format a given list with 649 /// a specific number of columns. 650 struct ColumnFormat { 651 /// The number of columns to use. 652 unsigned Columns; 653 654 /// The total width in characters. 655 unsigned TotalWidth; 656 657 /// The number of lines required for this format. 658 unsigned LineCount; 659 660 /// The size of each column in characters. 661 SmallVector<unsigned, 8> ColumnSizes; 662 }; 663 664 /// Calculate which \c ColumnFormat fits best into 665 /// \p RemainingCharacters. 666 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 667 668 /// The ordered \c FormatTokens making up the commas of this list. 669 SmallVector<const FormatToken *, 8> Commas; 670 671 /// The length of each of the list's items in characters including the 672 /// trailing comma. 673 SmallVector<unsigned, 8> ItemLengths; 674 675 /// Precomputed formats that can be used for this list. 676 SmallVector<ColumnFormat, 4> Formats; 677 678 bool HasNestedBracedList; 679 }; 680 681 /// Encapsulates keywords that are context sensitive or for languages not 682 /// properly supported by Clang's lexer. 683 struct AdditionalKeywords { 684 AdditionalKeywords(IdentifierTable &IdentTable) { 685 kw_final = &IdentTable.get("final"); 686 kw_override = &IdentTable.get("override"); 687 kw_in = &IdentTable.get("in"); 688 kw_of = &IdentTable.get("of"); 689 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM"); 690 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 691 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 692 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM"); 693 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 694 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 695 696 kw_as = &IdentTable.get("as"); 697 kw_async = &IdentTable.get("async"); 698 kw_await = &IdentTable.get("await"); 699 kw_declare = &IdentTable.get("declare"); 700 kw_finally = &IdentTable.get("finally"); 701 kw_from = &IdentTable.get("from"); 702 kw_function = &IdentTable.get("function"); 703 kw_get = &IdentTable.get("get"); 704 kw_import = &IdentTable.get("import"); 705 kw_infer = &IdentTable.get("infer"); 706 kw_is = &IdentTable.get("is"); 707 kw_let = &IdentTable.get("let"); 708 kw_module = &IdentTable.get("module"); 709 kw_readonly = &IdentTable.get("readonly"); 710 kw_set = &IdentTable.get("set"); 711 kw_type = &IdentTable.get("type"); 712 kw_typeof = &IdentTable.get("typeof"); 713 kw_var = &IdentTable.get("var"); 714 kw_yield = &IdentTable.get("yield"); 715 716 kw_abstract = &IdentTable.get("abstract"); 717 kw_assert = &IdentTable.get("assert"); 718 kw_extends = &IdentTable.get("extends"); 719 kw_implements = &IdentTable.get("implements"); 720 kw_instanceof = &IdentTable.get("instanceof"); 721 kw_interface = &IdentTable.get("interface"); 722 kw_native = &IdentTable.get("native"); 723 kw_package = &IdentTable.get("package"); 724 kw_synchronized = &IdentTable.get("synchronized"); 725 kw_throws = &IdentTable.get("throws"); 726 kw___except = &IdentTable.get("__except"); 727 kw___has_include = &IdentTable.get("__has_include"); 728 kw___has_include_next = &IdentTable.get("__has_include_next"); 729 730 kw_mark = &IdentTable.get("mark"); 731 732 kw_extend = &IdentTable.get("extend"); 733 kw_option = &IdentTable.get("option"); 734 kw_optional = &IdentTable.get("optional"); 735 kw_repeated = &IdentTable.get("repeated"); 736 kw_required = &IdentTable.get("required"); 737 kw_returns = &IdentTable.get("returns"); 738 739 kw_signals = &IdentTable.get("signals"); 740 kw_qsignals = &IdentTable.get("Q_SIGNALS"); 741 kw_slots = &IdentTable.get("slots"); 742 kw_qslots = &IdentTable.get("Q_SLOTS"); 743 744 // C# keywords 745 kw_dollar = &IdentTable.get("dollar"); 746 kw_base = &IdentTable.get("base"); 747 kw_byte = &IdentTable.get("byte"); 748 kw_checked = &IdentTable.get("checked"); 749 kw_decimal = &IdentTable.get("decimal"); 750 kw_delegate = &IdentTable.get("delegate"); 751 kw_event = &IdentTable.get("event"); 752 kw_fixed = &IdentTable.get("fixed"); 753 kw_foreach = &IdentTable.get("foreach"); 754 kw_implicit = &IdentTable.get("implicit"); 755 kw_internal = &IdentTable.get("internal"); 756 kw_lock = &IdentTable.get("lock"); 757 kw_null = &IdentTable.get("null"); 758 kw_object = &IdentTable.get("object"); 759 kw_out = &IdentTable.get("out"); 760 kw_params = &IdentTable.get("params"); 761 kw_ref = &IdentTable.get("ref"); 762 kw_string = &IdentTable.get("string"); 763 kw_stackalloc = &IdentTable.get("stackalloc"); 764 kw_sbyte = &IdentTable.get("sbyte"); 765 kw_sealed = &IdentTable.get("sealed"); 766 kw_uint = &IdentTable.get("uint"); 767 kw_ulong = &IdentTable.get("ulong"); 768 kw_unchecked = &IdentTable.get("unchecked"); 769 kw_unsafe = &IdentTable.get("unsafe"); 770 kw_ushort = &IdentTable.get("ushort"); 771 772 // Keep this at the end of the constructor to make sure everything here 773 // is 774 // already initialized. 775 JsExtraKeywords = std::unordered_set<IdentifierInfo *>( 776 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 777 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 778 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 779 // Keywords from the Java section. 780 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 781 782 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>( 783 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, 784 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, 785 kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, 786 kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, 787 kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, 788 // Keywords from the JavaScript section. 789 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 790 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 791 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 792 // Keywords from the Java section. 793 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 794 } 795 796 // Context sensitive keywords. 797 IdentifierInfo *kw_final; 798 IdentifierInfo *kw_override; 799 IdentifierInfo *kw_in; 800 IdentifierInfo *kw_of; 801 IdentifierInfo *kw_CF_CLOSED_ENUM; 802 IdentifierInfo *kw_CF_ENUM; 803 IdentifierInfo *kw_CF_OPTIONS; 804 IdentifierInfo *kw_NS_CLOSED_ENUM; 805 IdentifierInfo *kw_NS_ENUM; 806 IdentifierInfo *kw_NS_OPTIONS; 807 IdentifierInfo *kw___except; 808 IdentifierInfo *kw___has_include; 809 IdentifierInfo *kw___has_include_next; 810 811 // JavaScript keywords. 812 IdentifierInfo *kw_as; 813 IdentifierInfo *kw_async; 814 IdentifierInfo *kw_await; 815 IdentifierInfo *kw_declare; 816 IdentifierInfo *kw_finally; 817 IdentifierInfo *kw_from; 818 IdentifierInfo *kw_function; 819 IdentifierInfo *kw_get; 820 IdentifierInfo *kw_import; 821 IdentifierInfo *kw_infer; 822 IdentifierInfo *kw_is; 823 IdentifierInfo *kw_let; 824 IdentifierInfo *kw_module; 825 IdentifierInfo *kw_readonly; 826 IdentifierInfo *kw_set; 827 IdentifierInfo *kw_type; 828 IdentifierInfo *kw_typeof; 829 IdentifierInfo *kw_var; 830 IdentifierInfo *kw_yield; 831 832 // Java keywords. 833 IdentifierInfo *kw_abstract; 834 IdentifierInfo *kw_assert; 835 IdentifierInfo *kw_extends; 836 IdentifierInfo *kw_implements; 837 IdentifierInfo *kw_instanceof; 838 IdentifierInfo *kw_interface; 839 IdentifierInfo *kw_native; 840 IdentifierInfo *kw_package; 841 IdentifierInfo *kw_synchronized; 842 IdentifierInfo *kw_throws; 843 844 // Pragma keywords. 845 IdentifierInfo *kw_mark; 846 847 // Proto keywords. 848 IdentifierInfo *kw_extend; 849 IdentifierInfo *kw_option; 850 IdentifierInfo *kw_optional; 851 IdentifierInfo *kw_repeated; 852 IdentifierInfo *kw_required; 853 IdentifierInfo *kw_returns; 854 855 // QT keywords. 856 IdentifierInfo *kw_signals; 857 IdentifierInfo *kw_qsignals; 858 IdentifierInfo *kw_slots; 859 IdentifierInfo *kw_qslots; 860 861 // C# keywords 862 IdentifierInfo *kw_dollar; 863 IdentifierInfo *kw_base; 864 IdentifierInfo *kw_byte; 865 IdentifierInfo *kw_checked; 866 IdentifierInfo *kw_decimal; 867 IdentifierInfo *kw_delegate; 868 IdentifierInfo *kw_event; 869 IdentifierInfo *kw_fixed; 870 IdentifierInfo *kw_foreach; 871 IdentifierInfo *kw_implicit; 872 IdentifierInfo *kw_internal; 873 874 IdentifierInfo *kw_lock; 875 IdentifierInfo *kw_null; 876 IdentifierInfo *kw_object; 877 IdentifierInfo *kw_out; 878 879 IdentifierInfo *kw_params; 880 881 IdentifierInfo *kw_ref; 882 IdentifierInfo *kw_string; 883 IdentifierInfo *kw_stackalloc; 884 IdentifierInfo *kw_sbyte; 885 IdentifierInfo *kw_sealed; 886 IdentifierInfo *kw_uint; 887 IdentifierInfo *kw_ulong; 888 IdentifierInfo *kw_unchecked; 889 IdentifierInfo *kw_unsafe; 890 IdentifierInfo *kw_ushort; 891 892 /// Returns \c true if \p Tok is a true JavaScript identifier, returns 893 /// \c false if it is a keyword or a pseudo keyword. 894 bool IsJavaScriptIdentifier(const FormatToken &Tok) const { 895 return Tok.is(tok::identifier) && 896 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 897 JsExtraKeywords.end(); 898 } 899 900 /// Returns \c true if \p Tok is a C# keyword, returns 901 /// \c false if it is a anything else. 902 bool isCSharpKeyword(const FormatToken &Tok) const { 903 switch (Tok.Tok.getKind()) { 904 case tok::kw_bool: 905 case tok::kw_break: 906 case tok::kw_case: 907 case tok::kw_catch: 908 case tok::kw_char: 909 case tok::kw_class: 910 case tok::kw_const: 911 case tok::kw_continue: 912 case tok::kw_default: 913 case tok::kw_do: 914 case tok::kw_double: 915 case tok::kw_else: 916 case tok::kw_enum: 917 case tok::kw_explicit: 918 case tok::kw_extern: 919 case tok::kw_false: 920 case tok::kw_float: 921 case tok::kw_for: 922 case tok::kw_goto: 923 case tok::kw_if: 924 case tok::kw_int: 925 case tok::kw_long: 926 case tok::kw_namespace: 927 case tok::kw_new: 928 case tok::kw_operator: 929 case tok::kw_private: 930 case tok::kw_protected: 931 case tok::kw_public: 932 case tok::kw_return: 933 case tok::kw_short: 934 case tok::kw_sizeof: 935 case tok::kw_static: 936 case tok::kw_struct: 937 case tok::kw_switch: 938 case tok::kw_this: 939 case tok::kw_throw: 940 case tok::kw_true: 941 case tok::kw_try: 942 case tok::kw_typeof: 943 case tok::kw_using: 944 case tok::kw_virtual: 945 case tok::kw_void: 946 case tok::kw_volatile: 947 case tok::kw_while: 948 return true; 949 default: 950 return Tok.is(tok::identifier) && 951 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 952 CSharpExtraKeywords.end(); 953 } 954 } 955 956 private: 957 /// The JavaScript keywords beyond the C++ keyword set. 958 std::unordered_set<IdentifierInfo *> JsExtraKeywords; 959 960 /// The C# keywords beyond the C++ keyword set 961 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords; 962 }; 963 964 } // namespace format 965 } // namespace clang 966 967 #endif 968