1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the FormatToken, a wrapper 11 /// around Token with additional information related to formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/OperatorPrecedence.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Lex/Lexer.h" 22 #include <memory> 23 #include <unordered_set> 24 25 namespace clang { 26 namespace format { 27 28 #define LIST_TOKEN_TYPES \ 29 TYPE(ArrayInitializerLSquare) \ 30 TYPE(ArraySubscriptLSquare) \ 31 TYPE(AttributeColon) \ 32 TYPE(AttributeParen) \ 33 TYPE(AttributeSquare) \ 34 TYPE(BinaryOperator) \ 35 TYPE(BitFieldColon) \ 36 TYPE(BlockComment) \ 37 TYPE(CastRParen) \ 38 TYPE(ConditionalExpr) \ 39 TYPE(ConflictAlternative) \ 40 TYPE(ConflictEnd) \ 41 TYPE(ConflictStart) \ 42 TYPE(CtorInitializerColon) \ 43 TYPE(CtorInitializerComma) \ 44 TYPE(DesignatedInitializerLSquare) \ 45 TYPE(DesignatedInitializerPeriod) \ 46 TYPE(DictLiteral) \ 47 TYPE(ForEachMacro) \ 48 TYPE(FunctionAnnotationRParen) \ 49 TYPE(FunctionDeclarationName) \ 50 TYPE(FunctionLBrace) \ 51 TYPE(FunctionTypeLParen) \ 52 TYPE(ImplicitStringLiteral) \ 53 TYPE(InheritanceColon) \ 54 TYPE(InheritanceComma) \ 55 TYPE(InlineASMBrace) \ 56 TYPE(InlineASMColon) \ 57 TYPE(InlineASMSymbolicNameLSquare) \ 58 TYPE(JavaAnnotation) \ 59 TYPE(JsComputedPropertyName) \ 60 TYPE(JsExponentiation) \ 61 TYPE(JsExponentiationEqual) \ 62 TYPE(JsFatArrow) \ 63 TYPE(JsNonNullAssertion) \ 64 TYPE(JsNullishCoalescingOperator) \ 65 TYPE(JsNullPropagatingOperator) \ 66 TYPE(JsPrivateIdentifier) \ 67 TYPE(JsTypeColon) \ 68 TYPE(JsTypeOperator) \ 69 TYPE(JsTypeOptionalQuestion) \ 70 TYPE(LambdaArrow) \ 71 TYPE(LambdaLBrace) \ 72 TYPE(LambdaLSquare) \ 73 TYPE(LeadingJavaAnnotation) \ 74 TYPE(LineComment) \ 75 TYPE(MacroBlockBegin) \ 76 TYPE(MacroBlockEnd) \ 77 TYPE(NamespaceMacro) \ 78 TYPE(ObjCBlockLBrace) \ 79 TYPE(ObjCBlockLParen) \ 80 TYPE(ObjCDecl) \ 81 TYPE(ObjCForIn) \ 82 TYPE(ObjCMethodExpr) \ 83 TYPE(ObjCMethodSpecifier) \ 84 TYPE(ObjCProperty) \ 85 TYPE(ObjCStringLiteral) \ 86 TYPE(OverloadedOperator) \ 87 TYPE(OverloadedOperatorLParen) \ 88 TYPE(PointerOrReference) \ 89 TYPE(PureVirtualSpecifier) \ 90 TYPE(RangeBasedForLoopColon) \ 91 TYPE(RegexLiteral) \ 92 TYPE(SelectorName) \ 93 TYPE(StartOfName) \ 94 TYPE(StatementMacro) \ 95 TYPE(StructuredBindingLSquare) \ 96 TYPE(TemplateCloser) \ 97 TYPE(TemplateOpener) \ 98 TYPE(TemplateString) \ 99 TYPE(ProtoExtensionLSquare) \ 100 TYPE(TrailingAnnotation) \ 101 TYPE(TrailingReturnArrow) \ 102 TYPE(TrailingUnaryOperator) \ 103 TYPE(TypenameMacro) \ 104 TYPE(UnaryOperator) \ 105 TYPE(UntouchableMacroFunc) \ 106 TYPE(CSharpStringLiteral) \ 107 TYPE(CSharpNamedArgumentColon) \ 108 TYPE(CSharpNullable) \ 109 TYPE(CSharpNullCoalescing) \ 110 TYPE(CSharpNullConditional) \ 111 TYPE(CSharpNullConditionalLSquare) \ 112 TYPE(CSharpGenericTypeConstraint) \ 113 TYPE(CSharpGenericTypeConstraintColon) \ 114 TYPE(CSharpGenericTypeConstraintComma) \ 115 TYPE(Unknown) 116 117 /// Determines the semantic type of a syntactic token, e.g. whether "<" is a 118 /// template opener or binary operator. 119 enum TokenType { 120 #define TYPE(X) TT_##X, 121 LIST_TOKEN_TYPES 122 #undef TYPE 123 NUM_TOKEN_TYPES 124 }; 125 126 /// Determines the name of a token type. 127 const char *getTokenTypeName(TokenType Type); 128 129 // Represents what type of block a set of braces open. 130 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 131 132 // The packing kind of a function's parameters. 133 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 134 135 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 136 137 class TokenRole; 138 class AnnotatedLine; 139 140 /// A wrapper around a \c Token storing information about the 141 /// whitespace characters preceding it. 142 struct FormatToken { 143 FormatToken() {} 144 145 /// The \c Token. 146 Token Tok; 147 148 /// The number of newlines immediately before the \c Token. 149 /// 150 /// This can be used to determine what the user wrote in the original code 151 /// and thereby e.g. leave an empty line between two function definitions. 152 unsigned NewlinesBefore = 0; 153 154 /// Whether there is at least one unescaped newline before the \c 155 /// Token. 156 bool HasUnescapedNewline = false; 157 158 /// The range of the whitespace immediately preceding the \c Token. 159 SourceRange WhitespaceRange; 160 161 /// The offset just past the last '\n' in this token's leading 162 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 163 unsigned LastNewlineOffset = 0; 164 165 /// The width of the non-whitespace parts of the token (or its first 166 /// line for multi-line tokens) in columns. 167 /// We need this to correctly measure number of columns a token spans. 168 unsigned ColumnWidth = 0; 169 170 /// Contains the width in columns of the last line of a multi-line 171 /// token. 172 unsigned LastLineColumnWidth = 0; 173 174 /// Whether the token text contains newlines (escaped or not). 175 bool IsMultiline = false; 176 177 /// Indicates that this is the first token of the file. 178 bool IsFirst = false; 179 180 /// Whether there must be a line break before this token. 181 /// 182 /// This happens for example when a preprocessor directive ended directly 183 /// before the token. 184 bool MustBreakBefore = false; 185 186 /// Whether to not align across this token 187 /// 188 /// This happens for example when a preprocessor directive ended directly 189 /// before the token, but very rarely otherwise. 190 bool MustBreakAlignBefore = false; 191 192 /// The raw text of the token. 193 /// 194 /// Contains the raw token text without leading whitespace and without leading 195 /// escaped newlines. 196 StringRef TokenText; 197 198 /// Set to \c true if this token is an unterminated literal. 199 bool IsUnterminatedLiteral = 0; 200 201 /// Contains the kind of block if this token is a brace. 202 BraceBlockKind BlockKind = BK_Unknown; 203 204 /// Returns the token's type, e.g. whether "<" is a template opener or 205 /// binary operator. 206 TokenType getType() const { return Type; } 207 void setType(TokenType T) { Type = T; } 208 209 /// The number of spaces that should be inserted before this token. 210 unsigned SpacesRequiredBefore = 0; 211 212 /// \c true if it is allowed to break before this token. 213 bool CanBreakBefore = false; 214 215 /// \c true if this is the ">" of "template<..>". 216 bool ClosesTemplateDeclaration = false; 217 218 /// Number of parameters, if this is "(", "[" or "<". 219 unsigned ParameterCount = 0; 220 221 /// Number of parameters that are nested blocks, 222 /// if this is "(", "[" or "<". 223 unsigned BlockParameterCount = 0; 224 225 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of 226 /// the surrounding bracket. 227 tok::TokenKind ParentBracket = tok::unknown; 228 229 /// A token can have a special role that can carry extra information 230 /// about the token's formatting. 231 std::unique_ptr<TokenRole> Role; 232 233 /// If this is an opening parenthesis, how are the parameters packed? 234 ParameterPackingKind PackingKind = PPK_Inconclusive; 235 236 /// The total length of the unwrapped line up to and including this 237 /// token. 238 unsigned TotalLength = 0; 239 240 /// The original 0-based column of this token, including expanded tabs. 241 /// The configured TabWidth is used as tab width. 242 unsigned OriginalColumn = 0; 243 244 /// The length of following tokens until the next natural split point, 245 /// or the next token that can be broken. 246 unsigned UnbreakableTailLength = 0; 247 248 // FIXME: Come up with a 'cleaner' concept. 249 /// The binding strength of a token. This is a combined value of 250 /// operator precedence, parenthesis nesting, etc. 251 unsigned BindingStrength = 0; 252 253 /// The nesting level of this token, i.e. the number of surrounding (), 254 /// [], {} or <>. 255 unsigned NestingLevel = 0; 256 257 /// The indent level of this token. Copied from the surrounding line. 258 unsigned IndentLevel = 0; 259 260 /// Penalty for inserting a line break before this token. 261 unsigned SplitPenalty = 0; 262 263 /// If this is the first ObjC selector name in an ObjC method 264 /// definition or call, this contains the length of the longest name. 265 /// 266 /// This being set to 0 means that the selectors should not be colon-aligned, 267 /// e.g. because several of them are block-type. 268 unsigned LongestObjCSelectorName = 0; 269 270 /// If this is the first ObjC selector name in an ObjC method 271 /// definition or call, this contains the number of parts that the whole 272 /// selector consist of. 273 unsigned ObjCSelectorNameParts = 0; 274 275 /// The 0-based index of the parameter/argument. For ObjC it is set 276 /// for the selector name token. 277 /// For now calculated only for ObjC. 278 unsigned ParameterIndex = 0; 279 280 /// Stores the number of required fake parentheses and the 281 /// corresponding operator precedence. 282 /// 283 /// If multiple fake parentheses start at a token, this vector stores them in 284 /// reverse order, i.e. inner fake parenthesis first. 285 SmallVector<prec::Level, 4> FakeLParens; 286 /// Insert this many fake ) after this token for correct indentation. 287 unsigned FakeRParens = 0; 288 289 /// \c true if this token starts a binary expression, i.e. has at least 290 /// one fake l_paren with a precedence greater than prec::Unknown. 291 bool StartsBinaryExpression = false; 292 /// \c true if this token ends a binary expression. 293 bool EndsBinaryExpression = false; 294 295 /// If this is an operator (or "."/"->") in a sequence of operators 296 /// with the same precedence, contains the 0-based operator index. 297 unsigned OperatorIndex = 0; 298 299 /// If this is an operator (or "."/"->") in a sequence of operators 300 /// with the same precedence, points to the next operator. 301 FormatToken *NextOperator = nullptr; 302 303 /// Is this token part of a \c DeclStmt defining multiple variables? 304 /// 305 /// Only set if \c Type == \c TT_StartOfName. 306 bool PartOfMultiVariableDeclStmt = false; 307 308 /// Does this line comment continue a line comment section? 309 /// 310 /// Only set to true if \c Type == \c TT_LineComment. 311 bool ContinuesLineCommentSection = false; 312 313 /// If this is a bracket, this points to the matching one. 314 FormatToken *MatchingParen = nullptr; 315 316 /// The previous token in the unwrapped line. 317 FormatToken *Previous = nullptr; 318 319 /// The next token in the unwrapped line. 320 FormatToken *Next = nullptr; 321 322 /// If this token starts a block, this contains all the unwrapped lines 323 /// in it. 324 SmallVector<AnnotatedLine *, 1> Children; 325 326 /// Stores the formatting decision for the token once it was made. 327 FormatDecision Decision = FD_Unformatted; 328 329 /// If \c true, this token has been fully formatted (indented and 330 /// potentially re-formatted inside), and we do not allow further formatting 331 /// changes. 332 bool Finalized = false; 333 334 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 335 bool is(TokenType TT) const { return Type == TT; } 336 bool is(const IdentifierInfo *II) const { 337 return II && II == Tok.getIdentifierInfo(); 338 } 339 bool is(tok::PPKeywordKind Kind) const { 340 return Tok.getIdentifierInfo() && 341 Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 342 } 343 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 344 return is(K1) || is(K2); 345 } 346 template <typename A, typename B, typename... Ts> 347 bool isOneOf(A K1, B K2, Ts... Ks) const { 348 return is(K1) || isOneOf(K2, Ks...); 349 } 350 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 351 352 bool isIf(bool AllowConstexprMacro = true) const { 353 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || 354 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); 355 } 356 357 bool closesScopeAfterBlock() const { 358 if (BlockKind == BK_Block) 359 return true; 360 if (closesScope()) 361 return Previous->closesScopeAfterBlock(); 362 return false; 363 } 364 365 /// \c true if this token starts a sequence with the given tokens in order, 366 /// following the ``Next`` pointers, ignoring comments. 367 template <typename A, typename... Ts> 368 bool startsSequence(A K1, Ts... Tokens) const { 369 return startsSequenceInternal(K1, Tokens...); 370 } 371 372 /// \c true if this token ends a sequence with the given tokens in order, 373 /// following the ``Previous`` pointers, ignoring comments. 374 /// For example, given tokens [T1, T2, T3], the function returns true if 375 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other 376 /// words, the tokens passed to this function need to the reverse of the 377 /// order the tokens appear in code. 378 template <typename A, typename... Ts> 379 bool endsSequence(A K1, Ts... Tokens) const { 380 return endsSequenceInternal(K1, Tokens...); 381 } 382 383 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 384 385 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 386 return Tok.isObjCAtKeyword(Kind); 387 } 388 389 bool isAccessSpecifier(bool ColonRequired = true) const { 390 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 391 (!ColonRequired || (Next && Next->is(tok::colon))); 392 } 393 394 /// Determine whether the token is a simple-type-specifier. 395 bool isSimpleTypeSpecifier() const; 396 397 bool isObjCAccessSpecifier() const { 398 return is(tok::at) && Next && 399 (Next->isObjCAtKeyword(tok::objc_public) || 400 Next->isObjCAtKeyword(tok::objc_protected) || 401 Next->isObjCAtKeyword(tok::objc_package) || 402 Next->isObjCAtKeyword(tok::objc_private)); 403 } 404 405 /// Returns whether \p Tok is ([{ or an opening < of a template or in 406 /// protos. 407 bool opensScope() const { 408 if (is(TT_TemplateString) && TokenText.endswith("${")) 409 return true; 410 if (is(TT_DictLiteral) && is(tok::less)) 411 return true; 412 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 413 TT_TemplateOpener); 414 } 415 /// Returns whether \p Tok is )]} or a closing > of a template or in 416 /// protos. 417 bool closesScope() const { 418 if (is(TT_TemplateString) && TokenText.startswith("}")) 419 return true; 420 if (is(TT_DictLiteral) && is(tok::greater)) 421 return true; 422 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 423 TT_TemplateCloser); 424 } 425 426 /// Returns \c true if this is a "." or "->" accessing a member. 427 bool isMemberAccess() const { 428 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 429 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 430 TT_LambdaArrow, TT_LeadingJavaAnnotation); 431 } 432 433 bool isUnaryOperator() const { 434 switch (Tok.getKind()) { 435 case tok::plus: 436 case tok::plusplus: 437 case tok::minus: 438 case tok::minusminus: 439 case tok::exclaim: 440 case tok::tilde: 441 case tok::kw_sizeof: 442 case tok::kw_alignof: 443 return true; 444 default: 445 return false; 446 } 447 } 448 449 bool isBinaryOperator() const { 450 // Comma is a binary operator, but does not behave as such wrt. formatting. 451 return getPrecedence() > prec::Comma; 452 } 453 454 bool isTrailingComment() const { 455 return is(tok::comment) && 456 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 457 } 458 459 /// Returns \c true if this is a keyword that can be used 460 /// like a function call (e.g. sizeof, typeid, ...). 461 bool isFunctionLikeKeyword() const { 462 switch (Tok.getKind()) { 463 case tok::kw_throw: 464 case tok::kw_typeid: 465 case tok::kw_return: 466 case tok::kw_sizeof: 467 case tok::kw_alignof: 468 case tok::kw_alignas: 469 case tok::kw_decltype: 470 case tok::kw_noexcept: 471 case tok::kw_static_assert: 472 case tok::kw___attribute: 473 return true; 474 default: 475 return false; 476 } 477 } 478 479 /// Returns \c true if this is a string literal that's like a label, 480 /// e.g. ends with "=" or ":". 481 bool isLabelString() const { 482 if (!is(tok::string_literal)) 483 return false; 484 StringRef Content = TokenText; 485 if (Content.startswith("\"") || Content.startswith("'")) 486 Content = Content.drop_front(1); 487 if (Content.endswith("\"") || Content.endswith("'")) 488 Content = Content.drop_back(1); 489 Content = Content.trim(); 490 return Content.size() > 1 && 491 (Content.back() == ':' || Content.back() == '='); 492 } 493 494 /// Returns actual token start location without leading escaped 495 /// newlines and whitespace. 496 /// 497 /// This can be different to Tok.getLocation(), which includes leading escaped 498 /// newlines. 499 SourceLocation getStartOfNonWhitespace() const { 500 return WhitespaceRange.getEnd(); 501 } 502 503 prec::Level getPrecedence() const { 504 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, 505 /*CPlusPlus11=*/true); 506 } 507 508 /// Returns the previous token ignoring comments. 509 FormatToken *getPreviousNonComment() const { 510 FormatToken *Tok = Previous; 511 while (Tok && Tok->is(tok::comment)) 512 Tok = Tok->Previous; 513 return Tok; 514 } 515 516 /// Returns the next token ignoring comments. 517 const FormatToken *getNextNonComment() const { 518 const FormatToken *Tok = Next; 519 while (Tok && Tok->is(tok::comment)) 520 Tok = Tok->Next; 521 return Tok; 522 } 523 524 /// Returns \c true if this tokens starts a block-type list, i.e. a 525 /// list that should be indented with a block indent. 526 bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { 527 // C# Does not indent object initialisers as continuations. 528 if (is(tok::l_brace) && BlockKind == BK_BracedInit && Style.isCSharp()) 529 return true; 530 if (is(TT_TemplateString) && opensScope()) 531 return true; 532 return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || 533 (is(tok::l_brace) && 534 (BlockKind == BK_Block || is(TT_DictLiteral) || 535 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 536 (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || 537 Style.Language == FormatStyle::LK_TextProto)); 538 } 539 540 /// Returns whether the token is the left square bracket of a C++ 541 /// structured binding declaration. 542 bool isCppStructuredBinding(const FormatStyle &Style) const { 543 if (!Style.isCpp() || isNot(tok::l_square)) 544 return false; 545 const FormatToken *T = this; 546 do { 547 T = T->getPreviousNonComment(); 548 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, 549 tok::ampamp)); 550 return T && T->is(tok::kw_auto); 551 } 552 553 /// Same as opensBlockOrBlockTypeList, but for the closing token. 554 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 555 if (is(TT_TemplateString) && closesScope()) 556 return true; 557 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 558 } 559 560 /// Return the actual namespace token, if this token starts a namespace 561 /// block. 562 const FormatToken *getNamespaceToken() const { 563 const FormatToken *NamespaceTok = this; 564 if (is(tok::comment)) 565 NamespaceTok = NamespaceTok->getNextNonComment(); 566 // Detect "(inline|export)? namespace" in the beginning of a line. 567 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) 568 NamespaceTok = NamespaceTok->getNextNonComment(); 569 return NamespaceTok && 570 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) 571 ? NamespaceTok 572 : nullptr; 573 } 574 575 private: 576 // Disallow copying. 577 FormatToken(const FormatToken &) = delete; 578 void operator=(const FormatToken &) = delete; 579 580 template <typename A, typename... Ts> 581 bool startsSequenceInternal(A K1, Ts... Tokens) const { 582 if (is(tok::comment) && Next) 583 return Next->startsSequenceInternal(K1, Tokens...); 584 return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 585 } 586 587 template <typename A> bool startsSequenceInternal(A K1) const { 588 if (is(tok::comment) && Next) 589 return Next->startsSequenceInternal(K1); 590 return is(K1); 591 } 592 593 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { 594 if (is(tok::comment) && Previous) 595 return Previous->endsSequenceInternal(K1); 596 return is(K1); 597 } 598 599 template <typename A, typename... Ts> 600 bool endsSequenceInternal(A K1, Ts... Tokens) const { 601 if (is(tok::comment) && Previous) 602 return Previous->endsSequenceInternal(K1, Tokens...); 603 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 604 } 605 606 TokenType Type = TT_Unknown; 607 }; 608 609 class ContinuationIndenter; 610 struct LineState; 611 612 class TokenRole { 613 public: 614 TokenRole(const FormatStyle &Style) : Style(Style) {} 615 virtual ~TokenRole(); 616 617 /// After the \c TokenAnnotator has finished annotating all the tokens, 618 /// this function precomputes required information for formatting. 619 virtual void precomputeFormattingInfos(const FormatToken *Token); 620 621 /// Apply the special formatting that the given role demands. 622 /// 623 /// Assumes that the token having this role is already formatted. 624 /// 625 /// Continues formatting from \p State leaving indentation to \p Indenter and 626 /// returns the total penalty that this formatting incurs. 627 virtual unsigned formatFromToken(LineState &State, 628 ContinuationIndenter *Indenter, 629 bool DryRun) { 630 return 0; 631 } 632 633 /// Same as \c formatFromToken, but assumes that the first token has 634 /// already been set thereby deciding on the first line break. 635 virtual unsigned formatAfterToken(LineState &State, 636 ContinuationIndenter *Indenter, 637 bool DryRun) { 638 return 0; 639 } 640 641 /// Notifies the \c Role that a comma was found. 642 virtual void CommaFound(const FormatToken *Token) {} 643 644 virtual const FormatToken *lastComma() { return nullptr; } 645 646 protected: 647 const FormatStyle &Style; 648 }; 649 650 class CommaSeparatedList : public TokenRole { 651 public: 652 CommaSeparatedList(const FormatStyle &Style) 653 : TokenRole(Style), HasNestedBracedList(false) {} 654 655 void precomputeFormattingInfos(const FormatToken *Token) override; 656 657 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 658 bool DryRun) override; 659 660 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 661 bool DryRun) override; 662 663 /// Adds \p Token as the next comma to the \c CommaSeparated list. 664 void CommaFound(const FormatToken *Token) override { 665 Commas.push_back(Token); 666 } 667 668 const FormatToken *lastComma() override { 669 if (Commas.empty()) 670 return nullptr; 671 return Commas.back(); 672 } 673 674 private: 675 /// A struct that holds information on how to format a given list with 676 /// a specific number of columns. 677 struct ColumnFormat { 678 /// The number of columns to use. 679 unsigned Columns; 680 681 /// The total width in characters. 682 unsigned TotalWidth; 683 684 /// The number of lines required for this format. 685 unsigned LineCount; 686 687 /// The size of each column in characters. 688 SmallVector<unsigned, 8> ColumnSizes; 689 }; 690 691 /// Calculate which \c ColumnFormat fits best into 692 /// \p RemainingCharacters. 693 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 694 695 /// The ordered \c FormatTokens making up the commas of this list. 696 SmallVector<const FormatToken *, 8> Commas; 697 698 /// The length of each of the list's items in characters including the 699 /// trailing comma. 700 SmallVector<unsigned, 8> ItemLengths; 701 702 /// Precomputed formats that can be used for this list. 703 SmallVector<ColumnFormat, 4> Formats; 704 705 bool HasNestedBracedList; 706 }; 707 708 /// Encapsulates keywords that are context sensitive or for languages not 709 /// properly supported by Clang's lexer. 710 struct AdditionalKeywords { 711 AdditionalKeywords(IdentifierTable &IdentTable) { 712 kw_final = &IdentTable.get("final"); 713 kw_override = &IdentTable.get("override"); 714 kw_in = &IdentTable.get("in"); 715 kw_of = &IdentTable.get("of"); 716 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM"); 717 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 718 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 719 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM"); 720 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 721 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 722 723 kw_as = &IdentTable.get("as"); 724 kw_async = &IdentTable.get("async"); 725 kw_await = &IdentTable.get("await"); 726 kw_declare = &IdentTable.get("declare"); 727 kw_finally = &IdentTable.get("finally"); 728 kw_from = &IdentTable.get("from"); 729 kw_function = &IdentTable.get("function"); 730 kw_get = &IdentTable.get("get"); 731 kw_import = &IdentTable.get("import"); 732 kw_infer = &IdentTable.get("infer"); 733 kw_is = &IdentTable.get("is"); 734 kw_let = &IdentTable.get("let"); 735 kw_module = &IdentTable.get("module"); 736 kw_readonly = &IdentTable.get("readonly"); 737 kw_set = &IdentTable.get("set"); 738 kw_type = &IdentTable.get("type"); 739 kw_typeof = &IdentTable.get("typeof"); 740 kw_var = &IdentTable.get("var"); 741 kw_yield = &IdentTable.get("yield"); 742 743 kw_abstract = &IdentTable.get("abstract"); 744 kw_assert = &IdentTable.get("assert"); 745 kw_extends = &IdentTable.get("extends"); 746 kw_implements = &IdentTable.get("implements"); 747 kw_instanceof = &IdentTable.get("instanceof"); 748 kw_interface = &IdentTable.get("interface"); 749 kw_native = &IdentTable.get("native"); 750 kw_package = &IdentTable.get("package"); 751 kw_synchronized = &IdentTable.get("synchronized"); 752 kw_throws = &IdentTable.get("throws"); 753 kw___except = &IdentTable.get("__except"); 754 kw___has_include = &IdentTable.get("__has_include"); 755 kw___has_include_next = &IdentTable.get("__has_include_next"); 756 757 kw_mark = &IdentTable.get("mark"); 758 759 kw_extend = &IdentTable.get("extend"); 760 kw_option = &IdentTable.get("option"); 761 kw_optional = &IdentTable.get("optional"); 762 kw_repeated = &IdentTable.get("repeated"); 763 kw_required = &IdentTable.get("required"); 764 kw_returns = &IdentTable.get("returns"); 765 766 kw_signals = &IdentTable.get("signals"); 767 kw_qsignals = &IdentTable.get("Q_SIGNALS"); 768 kw_slots = &IdentTable.get("slots"); 769 kw_qslots = &IdentTable.get("Q_SLOTS"); 770 771 // C# keywords 772 kw_dollar = &IdentTable.get("dollar"); 773 kw_base = &IdentTable.get("base"); 774 kw_byte = &IdentTable.get("byte"); 775 kw_checked = &IdentTable.get("checked"); 776 kw_decimal = &IdentTable.get("decimal"); 777 kw_delegate = &IdentTable.get("delegate"); 778 kw_event = &IdentTable.get("event"); 779 kw_fixed = &IdentTable.get("fixed"); 780 kw_foreach = &IdentTable.get("foreach"); 781 kw_implicit = &IdentTable.get("implicit"); 782 kw_internal = &IdentTable.get("internal"); 783 kw_lock = &IdentTable.get("lock"); 784 kw_null = &IdentTable.get("null"); 785 kw_object = &IdentTable.get("object"); 786 kw_out = &IdentTable.get("out"); 787 kw_params = &IdentTable.get("params"); 788 kw_ref = &IdentTable.get("ref"); 789 kw_string = &IdentTable.get("string"); 790 kw_stackalloc = &IdentTable.get("stackalloc"); 791 kw_sbyte = &IdentTable.get("sbyte"); 792 kw_sealed = &IdentTable.get("sealed"); 793 kw_uint = &IdentTable.get("uint"); 794 kw_ulong = &IdentTable.get("ulong"); 795 kw_unchecked = &IdentTable.get("unchecked"); 796 kw_unsafe = &IdentTable.get("unsafe"); 797 kw_ushort = &IdentTable.get("ushort"); 798 kw_when = &IdentTable.get("when"); 799 kw_where = &IdentTable.get("where"); 800 801 // Keep this at the end of the constructor to make sure everything here 802 // is 803 // already initialized. 804 JsExtraKeywords = std::unordered_set<IdentifierInfo *>( 805 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 806 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 807 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 808 // Keywords from the Java section. 809 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 810 811 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>( 812 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, 813 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, 814 kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, 815 kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, 816 kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, kw_when, 817 kw_where, 818 // Keywords from the JavaScript section. 819 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 820 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 821 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 822 // Keywords from the Java section. 823 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 824 } 825 826 // Context sensitive keywords. 827 IdentifierInfo *kw_final; 828 IdentifierInfo *kw_override; 829 IdentifierInfo *kw_in; 830 IdentifierInfo *kw_of; 831 IdentifierInfo *kw_CF_CLOSED_ENUM; 832 IdentifierInfo *kw_CF_ENUM; 833 IdentifierInfo *kw_CF_OPTIONS; 834 IdentifierInfo *kw_NS_CLOSED_ENUM; 835 IdentifierInfo *kw_NS_ENUM; 836 IdentifierInfo *kw_NS_OPTIONS; 837 IdentifierInfo *kw___except; 838 IdentifierInfo *kw___has_include; 839 IdentifierInfo *kw___has_include_next; 840 841 // JavaScript keywords. 842 IdentifierInfo *kw_as; 843 IdentifierInfo *kw_async; 844 IdentifierInfo *kw_await; 845 IdentifierInfo *kw_declare; 846 IdentifierInfo *kw_finally; 847 IdentifierInfo *kw_from; 848 IdentifierInfo *kw_function; 849 IdentifierInfo *kw_get; 850 IdentifierInfo *kw_import; 851 IdentifierInfo *kw_infer; 852 IdentifierInfo *kw_is; 853 IdentifierInfo *kw_let; 854 IdentifierInfo *kw_module; 855 IdentifierInfo *kw_readonly; 856 IdentifierInfo *kw_set; 857 IdentifierInfo *kw_type; 858 IdentifierInfo *kw_typeof; 859 IdentifierInfo *kw_var; 860 IdentifierInfo *kw_yield; 861 862 // Java keywords. 863 IdentifierInfo *kw_abstract; 864 IdentifierInfo *kw_assert; 865 IdentifierInfo *kw_extends; 866 IdentifierInfo *kw_implements; 867 IdentifierInfo *kw_instanceof; 868 IdentifierInfo *kw_interface; 869 IdentifierInfo *kw_native; 870 IdentifierInfo *kw_package; 871 IdentifierInfo *kw_synchronized; 872 IdentifierInfo *kw_throws; 873 874 // Pragma keywords. 875 IdentifierInfo *kw_mark; 876 877 // Proto keywords. 878 IdentifierInfo *kw_extend; 879 IdentifierInfo *kw_option; 880 IdentifierInfo *kw_optional; 881 IdentifierInfo *kw_repeated; 882 IdentifierInfo *kw_required; 883 IdentifierInfo *kw_returns; 884 885 // QT keywords. 886 IdentifierInfo *kw_signals; 887 IdentifierInfo *kw_qsignals; 888 IdentifierInfo *kw_slots; 889 IdentifierInfo *kw_qslots; 890 891 // C# keywords 892 IdentifierInfo *kw_dollar; 893 IdentifierInfo *kw_base; 894 IdentifierInfo *kw_byte; 895 IdentifierInfo *kw_checked; 896 IdentifierInfo *kw_decimal; 897 IdentifierInfo *kw_delegate; 898 IdentifierInfo *kw_event; 899 IdentifierInfo *kw_fixed; 900 IdentifierInfo *kw_foreach; 901 IdentifierInfo *kw_implicit; 902 IdentifierInfo *kw_internal; 903 904 IdentifierInfo *kw_lock; 905 IdentifierInfo *kw_null; 906 IdentifierInfo *kw_object; 907 IdentifierInfo *kw_out; 908 909 IdentifierInfo *kw_params; 910 911 IdentifierInfo *kw_ref; 912 IdentifierInfo *kw_string; 913 IdentifierInfo *kw_stackalloc; 914 IdentifierInfo *kw_sbyte; 915 IdentifierInfo *kw_sealed; 916 IdentifierInfo *kw_uint; 917 IdentifierInfo *kw_ulong; 918 IdentifierInfo *kw_unchecked; 919 IdentifierInfo *kw_unsafe; 920 IdentifierInfo *kw_ushort; 921 IdentifierInfo *kw_when; 922 IdentifierInfo *kw_where; 923 924 /// Returns \c true if \p Tok is a true JavaScript identifier, returns 925 /// \c false if it is a keyword or a pseudo keyword. 926 /// If \c AcceptIdentifierName is true, returns true not only for keywords, 927 // but also for IdentifierName tokens (aka pseudo-keywords), such as 928 // ``yield``. 929 bool IsJavaScriptIdentifier(const FormatToken &Tok, 930 bool AcceptIdentifierName = true) const { 931 // Based on the list of JavaScript & TypeScript keywords here: 932 // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74 933 switch (Tok.Tok.getKind()) { 934 case tok::kw_break: 935 case tok::kw_case: 936 case tok::kw_catch: 937 case tok::kw_class: 938 case tok::kw_continue: 939 case tok::kw_const: 940 case tok::kw_default: 941 case tok::kw_delete: 942 case tok::kw_do: 943 case tok::kw_else: 944 case tok::kw_enum: 945 case tok::kw_export: 946 case tok::kw_false: 947 case tok::kw_for: 948 case tok::kw_if: 949 case tok::kw_import: 950 case tok::kw_module: 951 case tok::kw_new: 952 case tok::kw_private: 953 case tok::kw_protected: 954 case tok::kw_public: 955 case tok::kw_return: 956 case tok::kw_static: 957 case tok::kw_switch: 958 case tok::kw_this: 959 case tok::kw_throw: 960 case tok::kw_true: 961 case tok::kw_try: 962 case tok::kw_typeof: 963 case tok::kw_void: 964 case tok::kw_while: 965 // These are JS keywords that are lexed by LLVM/clang as keywords. 966 return false; 967 case tok::identifier: { 968 // For identifiers, make sure they are true identifiers, excluding the 969 // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). 970 bool IsPseudoKeyword = 971 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != 972 JsExtraKeywords.end(); 973 return AcceptIdentifierName || !IsPseudoKeyword; 974 } 975 default: 976 // Other keywords are handled in the switch below, to avoid problems due 977 // to duplicate case labels when using the #include trick. 978 break; 979 } 980 981 switch (Tok.Tok.getKind()) { 982 // Handle C++ keywords not included above: these are all JS identifiers. 983 #define KEYWORD(X, Y) case tok::kw_##X: 984 #include "clang/Basic/TokenKinds.def" 985 // #undef KEYWORD is not needed -- it's #undef-ed at the end of 986 // TokenKinds.def 987 return true; 988 default: 989 // All other tokens (punctuation etc) are not JS identifiers. 990 return false; 991 } 992 } 993 994 /// Returns \c true if \p Tok is a C# keyword, returns 995 /// \c false if it is a anything else. 996 bool isCSharpKeyword(const FormatToken &Tok) const { 997 switch (Tok.Tok.getKind()) { 998 case tok::kw_bool: 999 case tok::kw_break: 1000 case tok::kw_case: 1001 case tok::kw_catch: 1002 case tok::kw_char: 1003 case tok::kw_class: 1004 case tok::kw_const: 1005 case tok::kw_continue: 1006 case tok::kw_default: 1007 case tok::kw_do: 1008 case tok::kw_double: 1009 case tok::kw_else: 1010 case tok::kw_enum: 1011 case tok::kw_explicit: 1012 case tok::kw_extern: 1013 case tok::kw_false: 1014 case tok::kw_float: 1015 case tok::kw_for: 1016 case tok::kw_goto: 1017 case tok::kw_if: 1018 case tok::kw_int: 1019 case tok::kw_long: 1020 case tok::kw_namespace: 1021 case tok::kw_new: 1022 case tok::kw_operator: 1023 case tok::kw_private: 1024 case tok::kw_protected: 1025 case tok::kw_public: 1026 case tok::kw_return: 1027 case tok::kw_short: 1028 case tok::kw_sizeof: 1029 case tok::kw_static: 1030 case tok::kw_struct: 1031 case tok::kw_switch: 1032 case tok::kw_this: 1033 case tok::kw_throw: 1034 case tok::kw_true: 1035 case tok::kw_try: 1036 case tok::kw_typeof: 1037 case tok::kw_using: 1038 case tok::kw_virtual: 1039 case tok::kw_void: 1040 case tok::kw_volatile: 1041 case tok::kw_while: 1042 return true; 1043 default: 1044 return Tok.is(tok::identifier) && 1045 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 1046 CSharpExtraKeywords.end(); 1047 } 1048 } 1049 1050 private: 1051 /// The JavaScript keywords beyond the C++ keyword set. 1052 std::unordered_set<IdentifierInfo *> JsExtraKeywords; 1053 1054 /// The C# keywords beyond the C++ keyword set 1055 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords; 1056 }; 1057 1058 } // namespace format 1059 } // namespace clang 1060 1061 #endif 1062