1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the FormatToken, a wrapper 11 /// around Token with additional information related to formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/OperatorPrecedence.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Lex/Lexer.h" 22 #include <memory> 23 #include <unordered_set> 24 25 namespace clang { 26 namespace format { 27 28 #define LIST_TOKEN_TYPES \ 29 TYPE(ArrayInitializerLSquare) \ 30 TYPE(ArraySubscriptLSquare) \ 31 TYPE(AttributeColon) \ 32 TYPE(AttributeMacro) \ 33 TYPE(AttributeParen) \ 34 TYPE(AttributeSquare) \ 35 TYPE(BinaryOperator) \ 36 TYPE(BitFieldColon) \ 37 TYPE(BlockComment) \ 38 TYPE(BracedListLBrace) \ 39 TYPE(CastRParen) \ 40 TYPE(ClassLBrace) \ 41 TYPE(CompoundRequirementLBrace) \ 42 TYPE(ConditionalExpr) \ 43 TYPE(ConflictAlternative) \ 44 TYPE(ConflictEnd) \ 45 TYPE(ConflictStart) \ 46 /* l_brace of if/for/while */ \ 47 TYPE(ControlStatementLBrace) \ 48 TYPE(CppCastLParen) \ 49 TYPE(CSharpGenericTypeConstraint) \ 50 TYPE(CSharpGenericTypeConstraintColon) \ 51 TYPE(CSharpGenericTypeConstraintComma) \ 52 TYPE(CSharpNamedArgumentColon) \ 53 TYPE(CSharpNullable) \ 54 TYPE(CSharpNullConditionalLSquare) \ 55 TYPE(CSharpStringLiteral) \ 56 TYPE(CtorInitializerColon) \ 57 TYPE(CtorInitializerComma) \ 58 TYPE(DesignatedInitializerLSquare) \ 59 TYPE(DesignatedInitializerPeriod) \ 60 TYPE(DictLiteral) \ 61 TYPE(ElseLBrace) \ 62 TYPE(EnumLBrace) \ 63 TYPE(FatArrow) \ 64 TYPE(ForEachMacro) \ 65 TYPE(FunctionAnnotationRParen) \ 66 TYPE(FunctionDeclarationName) \ 67 TYPE(FunctionLBrace) \ 68 TYPE(FunctionLikeOrFreestandingMacro) \ 69 TYPE(FunctionTypeLParen) \ 70 TYPE(IfMacro) \ 71 TYPE(ImplicitStringLiteral) \ 72 TYPE(InheritanceColon) \ 73 TYPE(InheritanceComma) \ 74 TYPE(InlineASMBrace) \ 75 TYPE(InlineASMColon) \ 76 TYPE(InlineASMSymbolicNameLSquare) \ 77 TYPE(JavaAnnotation) \ 78 TYPE(JsAndAndEqual) \ 79 TYPE(JsComputedPropertyName) \ 80 TYPE(JsExponentiation) \ 81 TYPE(JsExponentiationEqual) \ 82 TYPE(JsPipePipeEqual) \ 83 TYPE(JsPrivateIdentifier) \ 84 TYPE(JsTypeColon) \ 85 TYPE(JsTypeOperator) \ 86 TYPE(JsTypeOptionalQuestion) \ 87 TYPE(LambdaArrow) \ 88 TYPE(LambdaLBrace) \ 89 TYPE(LambdaLSquare) \ 90 TYPE(LeadingJavaAnnotation) \ 91 TYPE(LineComment) \ 92 TYPE(MacroBlockBegin) \ 93 TYPE(MacroBlockEnd) \ 94 TYPE(ModulePartitionColon) \ 95 TYPE(NamespaceMacro) \ 96 TYPE(NonNullAssertion) \ 97 TYPE(NullCoalescingEqual) \ 98 TYPE(NullCoalescingOperator) \ 99 TYPE(NullPropagatingOperator) \ 100 TYPE(ObjCBlockLBrace) \ 101 TYPE(ObjCBlockLParen) \ 102 TYPE(ObjCDecl) \ 103 TYPE(ObjCForIn) \ 104 TYPE(ObjCMethodExpr) \ 105 TYPE(ObjCMethodSpecifier) \ 106 TYPE(ObjCProperty) \ 107 TYPE(ObjCStringLiteral) \ 108 TYPE(OverloadedOperator) \ 109 TYPE(OverloadedOperatorLParen) \ 110 TYPE(PointerOrReference) \ 111 TYPE(ProtoExtensionLSquare) \ 112 TYPE(PureVirtualSpecifier) \ 113 TYPE(RangeBasedForLoopColon) \ 114 TYPE(RecordLBrace) \ 115 TYPE(RegexLiteral) \ 116 TYPE(RequiresClause) \ 117 TYPE(RequiresClauseInARequiresExpression) \ 118 TYPE(RequiresExpression) \ 119 TYPE(RequiresExpressionLBrace) \ 120 TYPE(RequiresExpressionLParen) \ 121 TYPE(SelectorName) \ 122 TYPE(StartOfName) \ 123 TYPE(StatementAttributeLikeMacro) \ 124 TYPE(StatementMacro) \ 125 TYPE(StructLBrace) \ 126 TYPE(StructuredBindingLSquare) \ 127 TYPE(TemplateCloser) \ 128 TYPE(TemplateOpener) \ 129 TYPE(TemplateString) \ 130 TYPE(TrailingAnnotation) \ 131 TYPE(TrailingReturnArrow) \ 132 TYPE(TrailingUnaryOperator) \ 133 TYPE(TypeDeclarationParen) \ 134 TYPE(TypenameMacro) \ 135 TYPE(UnaryOperator) \ 136 TYPE(UnionLBrace) \ 137 TYPE(UntouchableMacroFunc) \ 138 TYPE(Unknown) 139 140 /// Determines the semantic type of a syntactic token, e.g. whether "<" is a 141 /// template opener or binary operator. 142 enum TokenType : uint8_t { 143 #define TYPE(X) TT_##X, 144 LIST_TOKEN_TYPES 145 #undef TYPE 146 NUM_TOKEN_TYPES 147 }; 148 149 /// Determines the name of a token type. 150 const char *getTokenTypeName(TokenType Type); 151 152 // Represents what type of block a set of braces open. 153 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 154 155 // The packing kind of a function's parameters. 156 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 157 158 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 159 160 /// Roles a token can take in a configured macro expansion. 161 enum MacroRole { 162 /// The token was expanded from a macro argument when formatting the expanded 163 /// token sequence. 164 MR_ExpandedArg, 165 /// The token is part of a macro argument that was previously formatted as 166 /// expansion when formatting the unexpanded macro call. 167 MR_UnexpandedArg, 168 /// The token was expanded from a macro definition, and is not visible as part 169 /// of the macro call. 170 MR_Hidden, 171 }; 172 173 struct FormatToken; 174 175 /// Contains information on the token's role in a macro expansion. 176 /// 177 /// Given the following definitions: 178 /// A(X) = [ X ] 179 /// B(X) = < X > 180 /// C(X) = X 181 /// 182 /// Consider the macro call: 183 /// A({B(C(C(x)))}) -> [{<x>}] 184 /// 185 /// In this case, the tokens of the unexpanded macro call will have the 186 /// following relevant entries in their macro context (note that formatting 187 /// the unexpanded macro call happens *after* formatting the expanded macro 188 /// call): 189 /// A( { B( C( C(x) ) ) } ) 190 /// Role: NN U NN NN NNUN N N U N (N=None, U=UnexpandedArg) 191 /// 192 /// [ { < x > } ] 193 /// Role: H E H E H E H (H=Hidden, E=ExpandedArg) 194 /// ExpandedFrom[0]: A A A A A A A 195 /// ExpandedFrom[1]: B B B 196 /// ExpandedFrom[2]: C 197 /// ExpandedFrom[3]: C 198 /// StartOfExpansion: 1 0 1 2 0 0 0 199 /// EndOfExpansion: 0 0 0 2 1 0 1 200 struct MacroExpansion { 201 MacroExpansion(MacroRole Role) : Role(Role) {} 202 203 /// The token's role in the macro expansion. 204 /// When formatting an expanded macro, all tokens that are part of macro 205 /// arguments will be MR_ExpandedArg, while all tokens that are not visible in 206 /// the macro call will be MR_Hidden. 207 /// When formatting an unexpanded macro call, all tokens that are part of 208 /// macro arguments will be MR_UnexpandedArg. 209 MacroRole Role; 210 211 /// The stack of macro call identifier tokens this token was expanded from. 212 llvm::SmallVector<FormatToken *, 1> ExpandedFrom; 213 214 /// The number of expansions of which this macro is the first entry. 215 unsigned StartOfExpansion = 0; 216 217 /// The number of currently open expansions in \c ExpandedFrom this macro is 218 /// the last token in. 219 unsigned EndOfExpansion = 0; 220 }; 221 222 class TokenRole; 223 class AnnotatedLine; 224 225 /// A wrapper around a \c Token storing information about the 226 /// whitespace characters preceding it. 227 struct FormatToken { 228 FormatToken() 229 : HasUnescapedNewline(false), IsMultiline(false), IsFirst(false), 230 MustBreakBefore(false), IsUnterminatedLiteral(false), 231 CanBreakBefore(false), ClosesTemplateDeclaration(false), 232 StartsBinaryExpression(false), EndsBinaryExpression(false), 233 PartOfMultiVariableDeclStmt(false), ContinuesLineCommentSection(false), 234 Finalized(false), ClosesRequiresClause(false), BlockKind(BK_Unknown), 235 Decision(FD_Unformatted), PackingKind(PPK_Inconclusive), 236 TypeIsFinalized(false), Type(TT_Unknown) {} 237 238 /// The \c Token. 239 Token Tok; 240 241 /// The raw text of the token. 242 /// 243 /// Contains the raw token text without leading whitespace and without leading 244 /// escaped newlines. 245 StringRef TokenText; 246 247 /// A token can have a special role that can carry extra information 248 /// about the token's formatting. 249 /// FIXME: Make FormatToken for parsing and AnnotatedToken two different 250 /// classes and make this a unique_ptr in the AnnotatedToken class. 251 std::shared_ptr<TokenRole> Role; 252 253 /// The range of the whitespace immediately preceding the \c Token. 254 SourceRange WhitespaceRange; 255 256 /// Whether there is at least one unescaped newline before the \c 257 /// Token. 258 unsigned HasUnescapedNewline : 1; 259 260 /// Whether the token text contains newlines (escaped or not). 261 unsigned IsMultiline : 1; 262 263 /// Indicates that this is the first token of the file. 264 unsigned IsFirst : 1; 265 266 /// Whether there must be a line break before this token. 267 /// 268 /// This happens for example when a preprocessor directive ended directly 269 /// before the token. 270 unsigned MustBreakBefore : 1; 271 272 /// Set to \c true if this token is an unterminated literal. 273 unsigned IsUnterminatedLiteral : 1; 274 275 /// \c true if it is allowed to break before this token. 276 unsigned CanBreakBefore : 1; 277 278 /// \c true if this is the ">" of "template<..>". 279 unsigned ClosesTemplateDeclaration : 1; 280 281 /// \c true if this token starts a binary expression, i.e. has at least 282 /// one fake l_paren with a precedence greater than prec::Unknown. 283 unsigned StartsBinaryExpression : 1; 284 /// \c true if this token ends a binary expression. 285 unsigned EndsBinaryExpression : 1; 286 287 /// Is this token part of a \c DeclStmt defining multiple variables? 288 /// 289 /// Only set if \c Type == \c TT_StartOfName. 290 unsigned PartOfMultiVariableDeclStmt : 1; 291 292 /// Does this line comment continue a line comment section? 293 /// 294 /// Only set to true if \c Type == \c TT_LineComment. 295 unsigned ContinuesLineCommentSection : 1; 296 297 /// If \c true, this token has been fully formatted (indented and 298 /// potentially re-formatted inside), and we do not allow further formatting 299 /// changes. 300 unsigned Finalized : 1; 301 302 /// \c true if this is the last token within requires clause. 303 unsigned ClosesRequiresClause : 1; 304 305 private: 306 /// Contains the kind of block if this token is a brace. 307 unsigned BlockKind : 2; 308 309 public: 310 BraceBlockKind getBlockKind() const { 311 return static_cast<BraceBlockKind>(BlockKind); 312 } 313 void setBlockKind(BraceBlockKind BBK) { 314 BlockKind = BBK; 315 assert(getBlockKind() == BBK && "BraceBlockKind overflow!"); 316 } 317 318 private: 319 /// Stores the formatting decision for the token once it was made. 320 unsigned Decision : 2; 321 322 public: 323 FormatDecision getDecision() const { 324 return static_cast<FormatDecision>(Decision); 325 } 326 void setDecision(FormatDecision D) { 327 Decision = D; 328 assert(getDecision() == D && "FormatDecision overflow!"); 329 } 330 331 private: 332 /// If this is an opening parenthesis, how are the parameters packed? 333 unsigned PackingKind : 2; 334 335 public: 336 ParameterPackingKind getPackingKind() const { 337 return static_cast<ParameterPackingKind>(PackingKind); 338 } 339 void setPackingKind(ParameterPackingKind K) { 340 PackingKind = K; 341 assert(getPackingKind() == K && "ParameterPackingKind overflow!"); 342 } 343 344 private: 345 unsigned TypeIsFinalized : 1; 346 TokenType Type; 347 348 public: 349 /// Returns the token's type, e.g. whether "<" is a template opener or 350 /// binary operator. 351 TokenType getType() const { return Type; } 352 void setType(TokenType T) { 353 assert((!TypeIsFinalized || T == Type) && 354 "Please use overwriteFixedType to change a fixed type."); 355 Type = T; 356 } 357 /// Sets the type and also the finalized flag. This prevents the type to be 358 /// reset in TokenAnnotator::resetTokenMetadata(). If the type needs to be set 359 /// to another one please use overwriteFixedType, or even better remove the 360 /// need to reassign the type. 361 void setFinalizedType(TokenType T) { 362 Type = T; 363 TypeIsFinalized = true; 364 } 365 void overwriteFixedType(TokenType T) { 366 TypeIsFinalized = false; 367 setType(T); 368 } 369 bool isTypeFinalized() const { return TypeIsFinalized; } 370 371 /// The number of newlines immediately before the \c Token. 372 /// 373 /// This can be used to determine what the user wrote in the original code 374 /// and thereby e.g. leave an empty line between two function definitions. 375 unsigned NewlinesBefore = 0; 376 377 /// The offset just past the last '\n' in this token's leading 378 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 379 unsigned LastNewlineOffset = 0; 380 381 /// The width of the non-whitespace parts of the token (or its first 382 /// line for multi-line tokens) in columns. 383 /// We need this to correctly measure number of columns a token spans. 384 unsigned ColumnWidth = 0; 385 386 /// Contains the width in columns of the last line of a multi-line 387 /// token. 388 unsigned LastLineColumnWidth = 0; 389 390 /// The number of spaces that should be inserted before this token. 391 unsigned SpacesRequiredBefore = 0; 392 393 /// Number of parameters, if this is "(", "[" or "<". 394 unsigned ParameterCount = 0; 395 396 /// Number of parameters that are nested blocks, 397 /// if this is "(", "[" or "<". 398 unsigned BlockParameterCount = 0; 399 400 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of 401 /// the surrounding bracket. 402 tok::TokenKind ParentBracket = tok::unknown; 403 404 /// The total length of the unwrapped line up to and including this 405 /// token. 406 unsigned TotalLength = 0; 407 408 /// The original 0-based column of this token, including expanded tabs. 409 /// The configured TabWidth is used as tab width. 410 unsigned OriginalColumn = 0; 411 412 /// The length of following tokens until the next natural split point, 413 /// or the next token that can be broken. 414 unsigned UnbreakableTailLength = 0; 415 416 // FIXME: Come up with a 'cleaner' concept. 417 /// The binding strength of a token. This is a combined value of 418 /// operator precedence, parenthesis nesting, etc. 419 unsigned BindingStrength = 0; 420 421 /// The nesting level of this token, i.e. the number of surrounding (), 422 /// [], {} or <>. 423 unsigned NestingLevel = 0; 424 425 /// The indent level of this token. Copied from the surrounding line. 426 unsigned IndentLevel = 0; 427 428 /// Penalty for inserting a line break before this token. 429 unsigned SplitPenalty = 0; 430 431 /// If this is the first ObjC selector name in an ObjC method 432 /// definition or call, this contains the length of the longest name. 433 /// 434 /// This being set to 0 means that the selectors should not be colon-aligned, 435 /// e.g. because several of them are block-type. 436 unsigned LongestObjCSelectorName = 0; 437 438 /// If this is the first ObjC selector name in an ObjC method 439 /// definition or call, this contains the number of parts that the whole 440 /// selector consist of. 441 unsigned ObjCSelectorNameParts = 0; 442 443 /// The 0-based index of the parameter/argument. For ObjC it is set 444 /// for the selector name token. 445 /// For now calculated only for ObjC. 446 unsigned ParameterIndex = 0; 447 448 /// Stores the number of required fake parentheses and the 449 /// corresponding operator precedence. 450 /// 451 /// If multiple fake parentheses start at a token, this vector stores them in 452 /// reverse order, i.e. inner fake parenthesis first. 453 SmallVector<prec::Level, 4> FakeLParens; 454 /// Insert this many fake ) after this token for correct indentation. 455 unsigned FakeRParens = 0; 456 457 /// If this is an operator (or "."/"->") in a sequence of operators 458 /// with the same precedence, contains the 0-based operator index. 459 unsigned OperatorIndex = 0; 460 461 /// If this is an operator (or "."/"->") in a sequence of operators 462 /// with the same precedence, points to the next operator. 463 FormatToken *NextOperator = nullptr; 464 465 /// If this is a bracket, this points to the matching one. 466 FormatToken *MatchingParen = nullptr; 467 468 /// The previous token in the unwrapped line. 469 FormatToken *Previous = nullptr; 470 471 /// The next token in the unwrapped line. 472 FormatToken *Next = nullptr; 473 474 /// The first token in set of column elements. 475 bool StartsColumn = false; 476 477 /// This notes the start of the line of an array initializer. 478 bool ArrayInitializerLineStart = false; 479 480 /// This starts an array initializer. 481 bool IsArrayInitializer = false; 482 483 /// Is optional and can be removed. 484 bool Optional = false; 485 486 /// Number of optional braces to be inserted after this token: 487 /// -1: a single left brace 488 /// 0: no braces 489 /// >0: number of right braces 490 int8_t BraceCount = 0; 491 492 /// If this token starts a block, this contains all the unwrapped lines 493 /// in it. 494 SmallVector<AnnotatedLine *, 1> Children; 495 496 // Contains all attributes related to how this token takes part 497 // in a configured macro expansion. 498 llvm::Optional<MacroExpansion> MacroCtx; 499 500 /// When macro expansion introduces nodes with children, those are marked as 501 /// \c MacroParent. 502 /// FIXME: The formatting code currently hard-codes the assumption that 503 /// child nodes are introduced by blocks following an opening brace. 504 /// This is deeply baked into the code and disentangling this will require 505 /// signficant refactorings. \c MacroParent allows us to special-case the 506 /// cases in which we treat parents as block-openers for now. 507 bool MacroParent = false; 508 509 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 510 bool is(TokenType TT) const { return getType() == TT; } 511 bool is(const IdentifierInfo *II) const { 512 return II && II == Tok.getIdentifierInfo(); 513 } 514 bool is(tok::PPKeywordKind Kind) const { 515 return Tok.getIdentifierInfo() && 516 Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 517 } 518 bool is(BraceBlockKind BBK) const { return getBlockKind() == BBK; } 519 bool is(ParameterPackingKind PPK) const { return getPackingKind() == PPK; } 520 521 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 522 return is(K1) || is(K2); 523 } 524 template <typename A, typename B, typename... Ts> 525 bool isOneOf(A K1, B K2, Ts... Ks) const { 526 return is(K1) || isOneOf(K2, Ks...); 527 } 528 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 529 530 bool isIf(bool AllowConstexprMacro = true) const { 531 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || 532 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); 533 } 534 535 bool closesScopeAfterBlock() const { 536 if (getBlockKind() == BK_Block) 537 return true; 538 if (closesScope()) 539 return Previous->closesScopeAfterBlock(); 540 return false; 541 } 542 543 /// \c true if this token starts a sequence with the given tokens in order, 544 /// following the ``Next`` pointers, ignoring comments. 545 template <typename A, typename... Ts> 546 bool startsSequence(A K1, Ts... Tokens) const { 547 return startsSequenceInternal(K1, Tokens...); 548 } 549 550 /// \c true if this token ends a sequence with the given tokens in order, 551 /// following the ``Previous`` pointers, ignoring comments. 552 /// For example, given tokens [T1, T2, T3], the function returns true if 553 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other 554 /// words, the tokens passed to this function need to the reverse of the 555 /// order the tokens appear in code. 556 template <typename A, typename... Ts> 557 bool endsSequence(A K1, Ts... Tokens) const { 558 return endsSequenceInternal(K1, Tokens...); 559 } 560 561 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 562 563 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 564 return Tok.isObjCAtKeyword(Kind); 565 } 566 567 bool isAccessSpecifier(bool ColonRequired = true) const { 568 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 569 (!ColonRequired || (Next && Next->is(tok::colon))); 570 } 571 572 bool canBePointerOrReferenceQualifier() const { 573 return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile, 574 tok::kw___attribute, tok::kw__Nonnull, tok::kw__Nullable, 575 tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64, 576 TT_AttributeMacro); 577 } 578 579 /// Determine whether the token is a simple-type-specifier. 580 LLVM_NODISCARD bool isSimpleTypeSpecifier() const; 581 582 LLVM_NODISCARD bool isTypeOrIdentifier() const; 583 584 bool isObjCAccessSpecifier() const { 585 return is(tok::at) && Next && 586 (Next->isObjCAtKeyword(tok::objc_public) || 587 Next->isObjCAtKeyword(tok::objc_protected) || 588 Next->isObjCAtKeyword(tok::objc_package) || 589 Next->isObjCAtKeyword(tok::objc_private)); 590 } 591 592 /// Returns whether \p Tok is ([{ or an opening < of a template or in 593 /// protos. 594 bool opensScope() const { 595 if (is(TT_TemplateString) && TokenText.endswith("${")) 596 return true; 597 if (is(TT_DictLiteral) && is(tok::less)) 598 return true; 599 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 600 TT_TemplateOpener); 601 } 602 /// Returns whether \p Tok is )]} or a closing > of a template or in 603 /// protos. 604 bool closesScope() const { 605 if (is(TT_TemplateString) && TokenText.startswith("}")) 606 return true; 607 if (is(TT_DictLiteral) && is(tok::greater)) 608 return true; 609 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 610 TT_TemplateCloser); 611 } 612 613 /// Returns \c true if this is a "." or "->" accessing a member. 614 bool isMemberAccess() const { 615 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 616 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 617 TT_LambdaArrow, TT_LeadingJavaAnnotation); 618 } 619 620 bool isUnaryOperator() const { 621 switch (Tok.getKind()) { 622 case tok::plus: 623 case tok::plusplus: 624 case tok::minus: 625 case tok::minusminus: 626 case tok::exclaim: 627 case tok::tilde: 628 case tok::kw_sizeof: 629 case tok::kw_alignof: 630 return true; 631 default: 632 return false; 633 } 634 } 635 636 bool isBinaryOperator() const { 637 // Comma is a binary operator, but does not behave as such wrt. formatting. 638 return getPrecedence() > prec::Comma; 639 } 640 641 bool isTrailingComment() const { 642 return is(tok::comment) && 643 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 644 } 645 646 /// Returns \c true if this is a keyword that can be used 647 /// like a function call (e.g. sizeof, typeid, ...). 648 bool isFunctionLikeKeyword() const { 649 switch (Tok.getKind()) { 650 case tok::kw_throw: 651 case tok::kw_typeid: 652 case tok::kw_return: 653 case tok::kw_sizeof: 654 case tok::kw_alignof: 655 case tok::kw_alignas: 656 case tok::kw_decltype: 657 case tok::kw_noexcept: 658 case tok::kw_static_assert: 659 case tok::kw__Atomic: 660 case tok::kw___attribute: 661 case tok::kw___underlying_type: 662 case tok::kw_requires: 663 return true; 664 default: 665 return false; 666 } 667 } 668 669 /// Returns \c true if this is a string literal that's like a label, 670 /// e.g. ends with "=" or ":". 671 bool isLabelString() const { 672 if (!is(tok::string_literal)) 673 return false; 674 StringRef Content = TokenText; 675 if (Content.startswith("\"") || Content.startswith("'")) 676 Content = Content.drop_front(1); 677 if (Content.endswith("\"") || Content.endswith("'")) 678 Content = Content.drop_back(1); 679 Content = Content.trim(); 680 return Content.size() > 1 && 681 (Content.back() == ':' || Content.back() == '='); 682 } 683 684 /// Returns actual token start location without leading escaped 685 /// newlines and whitespace. 686 /// 687 /// This can be different to Tok.getLocation(), which includes leading escaped 688 /// newlines. 689 SourceLocation getStartOfNonWhitespace() const { 690 return WhitespaceRange.getEnd(); 691 } 692 693 /// Returns \c true if the range of whitespace immediately preceding the \c 694 /// Token is not empty. 695 bool hasWhitespaceBefore() const { 696 return WhitespaceRange.getBegin() != WhitespaceRange.getEnd(); 697 } 698 699 prec::Level getPrecedence() const { 700 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, 701 /*CPlusPlus11=*/true); 702 } 703 704 /// Returns the previous token ignoring comments. 705 LLVM_NODISCARD FormatToken *getPreviousNonComment() const { 706 FormatToken *Tok = Previous; 707 while (Tok && Tok->is(tok::comment)) 708 Tok = Tok->Previous; 709 return Tok; 710 } 711 712 /// Returns the next token ignoring comments. 713 LLVM_NODISCARD const FormatToken *getNextNonComment() const { 714 const FormatToken *Tok = Next; 715 while (Tok && Tok->is(tok::comment)) 716 Tok = Tok->Next; 717 return Tok; 718 } 719 720 /// Returns \c true if this tokens starts a block-type list, i.e. a 721 /// list that should be indented with a block indent. 722 LLVM_NODISCARD bool opensBlockOrBlockTypeList(const FormatStyle &Style) const; 723 724 /// Returns whether the token is the left square bracket of a C++ 725 /// structured binding declaration. 726 bool isCppStructuredBinding(const FormatStyle &Style) const { 727 if (!Style.isCpp() || isNot(tok::l_square)) 728 return false; 729 const FormatToken *T = this; 730 do { 731 T = T->getPreviousNonComment(); 732 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, 733 tok::ampamp)); 734 return T && T->is(tok::kw_auto); 735 } 736 737 /// Same as opensBlockOrBlockTypeList, but for the closing token. 738 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 739 if (is(TT_TemplateString) && closesScope()) 740 return true; 741 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 742 } 743 744 /// Return the actual namespace token, if this token starts a namespace 745 /// block. 746 const FormatToken *getNamespaceToken() const { 747 const FormatToken *NamespaceTok = this; 748 if (is(tok::comment)) 749 NamespaceTok = NamespaceTok->getNextNonComment(); 750 // Detect "(inline|export)? namespace" in the beginning of a line. 751 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) 752 NamespaceTok = NamespaceTok->getNextNonComment(); 753 return NamespaceTok && 754 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) 755 ? NamespaceTok 756 : nullptr; 757 } 758 759 void copyFrom(const FormatToken &Tok) { *this = Tok; } 760 761 private: 762 // Only allow copying via the explicit copyFrom method. 763 FormatToken(const FormatToken &) = delete; 764 FormatToken &operator=(const FormatToken &) = default; 765 766 template <typename A, typename... Ts> 767 bool startsSequenceInternal(A K1, Ts... Tokens) const { 768 if (is(tok::comment) && Next) 769 return Next->startsSequenceInternal(K1, Tokens...); 770 return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 771 } 772 773 template <typename A> bool startsSequenceInternal(A K1) const { 774 if (is(tok::comment) && Next) 775 return Next->startsSequenceInternal(K1); 776 return is(K1); 777 } 778 779 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { 780 if (is(tok::comment) && Previous) 781 return Previous->endsSequenceInternal(K1); 782 return is(K1); 783 } 784 785 template <typename A, typename... Ts> 786 bool endsSequenceInternal(A K1, Ts... Tokens) const { 787 if (is(tok::comment) && Previous) 788 return Previous->endsSequenceInternal(K1, Tokens...); 789 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 790 } 791 }; 792 793 class ContinuationIndenter; 794 struct LineState; 795 796 class TokenRole { 797 public: 798 TokenRole(const FormatStyle &Style) : Style(Style) {} 799 virtual ~TokenRole(); 800 801 /// After the \c TokenAnnotator has finished annotating all the tokens, 802 /// this function precomputes required information for formatting. 803 virtual void precomputeFormattingInfos(const FormatToken *Token); 804 805 /// Apply the special formatting that the given role demands. 806 /// 807 /// Assumes that the token having this role is already formatted. 808 /// 809 /// Continues formatting from \p State leaving indentation to \p Indenter and 810 /// returns the total penalty that this formatting incurs. 811 virtual unsigned formatFromToken(LineState &State, 812 ContinuationIndenter *Indenter, 813 bool DryRun) { 814 return 0; 815 } 816 817 /// Same as \c formatFromToken, but assumes that the first token has 818 /// already been set thereby deciding on the first line break. 819 virtual unsigned formatAfterToken(LineState &State, 820 ContinuationIndenter *Indenter, 821 bool DryRun) { 822 return 0; 823 } 824 825 /// Notifies the \c Role that a comma was found. 826 virtual void CommaFound(const FormatToken *Token) {} 827 828 virtual const FormatToken *lastComma() { return nullptr; } 829 830 protected: 831 const FormatStyle &Style; 832 }; 833 834 class CommaSeparatedList : public TokenRole { 835 public: 836 CommaSeparatedList(const FormatStyle &Style) 837 : TokenRole(Style), HasNestedBracedList(false) {} 838 839 void precomputeFormattingInfos(const FormatToken *Token) override; 840 841 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 842 bool DryRun) override; 843 844 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 845 bool DryRun) override; 846 847 /// Adds \p Token as the next comma to the \c CommaSeparated list. 848 void CommaFound(const FormatToken *Token) override { 849 Commas.push_back(Token); 850 } 851 852 const FormatToken *lastComma() override { 853 if (Commas.empty()) 854 return nullptr; 855 return Commas.back(); 856 } 857 858 private: 859 /// A struct that holds information on how to format a given list with 860 /// a specific number of columns. 861 struct ColumnFormat { 862 /// The number of columns to use. 863 unsigned Columns; 864 865 /// The total width in characters. 866 unsigned TotalWidth; 867 868 /// The number of lines required for this format. 869 unsigned LineCount; 870 871 /// The size of each column in characters. 872 SmallVector<unsigned, 8> ColumnSizes; 873 }; 874 875 /// Calculate which \c ColumnFormat fits best into 876 /// \p RemainingCharacters. 877 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 878 879 /// The ordered \c FormatTokens making up the commas of this list. 880 SmallVector<const FormatToken *, 8> Commas; 881 882 /// The length of each of the list's items in characters including the 883 /// trailing comma. 884 SmallVector<unsigned, 8> ItemLengths; 885 886 /// Precomputed formats that can be used for this list. 887 SmallVector<ColumnFormat, 4> Formats; 888 889 bool HasNestedBracedList; 890 }; 891 892 /// Encapsulates keywords that are context sensitive or for languages not 893 /// properly supported by Clang's lexer. 894 struct AdditionalKeywords { 895 AdditionalKeywords(IdentifierTable &IdentTable) { 896 kw_final = &IdentTable.get("final"); 897 kw_override = &IdentTable.get("override"); 898 kw_in = &IdentTable.get("in"); 899 kw_of = &IdentTable.get("of"); 900 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM"); 901 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 902 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 903 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM"); 904 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 905 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 906 907 kw_as = &IdentTable.get("as"); 908 kw_async = &IdentTable.get("async"); 909 kw_await = &IdentTable.get("await"); 910 kw_declare = &IdentTable.get("declare"); 911 kw_finally = &IdentTable.get("finally"); 912 kw_from = &IdentTable.get("from"); 913 kw_function = &IdentTable.get("function"); 914 kw_get = &IdentTable.get("get"); 915 kw_import = &IdentTable.get("import"); 916 kw_infer = &IdentTable.get("infer"); 917 kw_is = &IdentTable.get("is"); 918 kw_let = &IdentTable.get("let"); 919 kw_module = &IdentTable.get("module"); 920 kw_readonly = &IdentTable.get("readonly"); 921 kw_set = &IdentTable.get("set"); 922 kw_type = &IdentTable.get("type"); 923 kw_typeof = &IdentTable.get("typeof"); 924 kw_var = &IdentTable.get("var"); 925 kw_yield = &IdentTable.get("yield"); 926 927 kw_abstract = &IdentTable.get("abstract"); 928 kw_assert = &IdentTable.get("assert"); 929 kw_extends = &IdentTable.get("extends"); 930 kw_implements = &IdentTable.get("implements"); 931 kw_instanceof = &IdentTable.get("instanceof"); 932 kw_interface = &IdentTable.get("interface"); 933 kw_native = &IdentTable.get("native"); 934 kw_package = &IdentTable.get("package"); 935 kw_synchronized = &IdentTable.get("synchronized"); 936 kw_throws = &IdentTable.get("throws"); 937 kw___except = &IdentTable.get("__except"); 938 kw___has_include = &IdentTable.get("__has_include"); 939 kw___has_include_next = &IdentTable.get("__has_include_next"); 940 941 kw_mark = &IdentTable.get("mark"); 942 kw_region = &IdentTable.get("region"); 943 944 kw_extend = &IdentTable.get("extend"); 945 kw_option = &IdentTable.get("option"); 946 kw_optional = &IdentTable.get("optional"); 947 kw_repeated = &IdentTable.get("repeated"); 948 kw_required = &IdentTable.get("required"); 949 kw_returns = &IdentTable.get("returns"); 950 951 kw_signals = &IdentTable.get("signals"); 952 kw_qsignals = &IdentTable.get("Q_SIGNALS"); 953 kw_slots = &IdentTable.get("slots"); 954 kw_qslots = &IdentTable.get("Q_SLOTS"); 955 956 // For internal clang-format use. 957 kw_internal_ident_after_define = 958 &IdentTable.get("__CLANG_FORMAT_INTERNAL_IDENT_AFTER_DEFINE__"); 959 960 // C# keywords 961 kw_dollar = &IdentTable.get("dollar"); 962 kw_base = &IdentTable.get("base"); 963 kw_byte = &IdentTable.get("byte"); 964 kw_checked = &IdentTable.get("checked"); 965 kw_decimal = &IdentTable.get("decimal"); 966 kw_delegate = &IdentTable.get("delegate"); 967 kw_event = &IdentTable.get("event"); 968 kw_fixed = &IdentTable.get("fixed"); 969 kw_foreach = &IdentTable.get("foreach"); 970 kw_init = &IdentTable.get("init"); 971 kw_implicit = &IdentTable.get("implicit"); 972 kw_internal = &IdentTable.get("internal"); 973 kw_lock = &IdentTable.get("lock"); 974 kw_null = &IdentTable.get("null"); 975 kw_object = &IdentTable.get("object"); 976 kw_out = &IdentTable.get("out"); 977 kw_params = &IdentTable.get("params"); 978 kw_ref = &IdentTable.get("ref"); 979 kw_string = &IdentTable.get("string"); 980 kw_stackalloc = &IdentTable.get("stackalloc"); 981 kw_sbyte = &IdentTable.get("sbyte"); 982 kw_sealed = &IdentTable.get("sealed"); 983 kw_uint = &IdentTable.get("uint"); 984 kw_ulong = &IdentTable.get("ulong"); 985 kw_unchecked = &IdentTable.get("unchecked"); 986 kw_unsafe = &IdentTable.get("unsafe"); 987 kw_ushort = &IdentTable.get("ushort"); 988 kw_when = &IdentTable.get("when"); 989 kw_where = &IdentTable.get("where"); 990 991 kw_always = &IdentTable.get("always"); 992 kw_always_comb = &IdentTable.get("always_comb"); 993 kw_always_ff = &IdentTable.get("always_ff"); 994 kw_always_latch = &IdentTable.get("always_latch"); 995 kw_assign = &IdentTable.get("assign"); 996 kw_assume = &IdentTable.get("assume"); 997 kw_automatic = &IdentTable.get("automatic"); 998 kw_before = &IdentTable.get("before"); 999 kw_begin = &IdentTable.get("begin"); 1000 kw_begin_keywords = &IdentTable.get("begin_keywords"); 1001 kw_bins = &IdentTable.get("bins"); 1002 kw_binsof = &IdentTable.get("binsof"); 1003 kw_casex = &IdentTable.get("casex"); 1004 kw_casez = &IdentTable.get("casez"); 1005 kw_celldefine = &IdentTable.get("celldefine"); 1006 kw_checker = &IdentTable.get("checker"); 1007 kw_clocking = &IdentTable.get("clocking"); 1008 kw_constraint = &IdentTable.get("constraint"); 1009 kw_cover = &IdentTable.get("cover"); 1010 kw_covergroup = &IdentTable.get("covergroup"); 1011 kw_coverpoint = &IdentTable.get("coverpoint"); 1012 kw_default_decay_time = &IdentTable.get("default_decay_time"); 1013 kw_default_nettype = &IdentTable.get("default_nettype"); 1014 kw_default_trireg_strength = &IdentTable.get("default_trireg_strength"); 1015 kw_delay_mode_distributed = &IdentTable.get("delay_mode_distributed"); 1016 kw_delay_mode_path = &IdentTable.get("delay_mode_path"); 1017 kw_delay_mode_unit = &IdentTable.get("delay_mode_unit"); 1018 kw_delay_mode_zero = &IdentTable.get("delay_mode_zero"); 1019 kw_disable = &IdentTable.get("disable"); 1020 kw_dist = &IdentTable.get("dist"); 1021 kw_elsif = &IdentTable.get("elsif"); 1022 kw_end = &IdentTable.get("end"); 1023 kw_end_keywords = &IdentTable.get("end_keywords"); 1024 kw_endcase = &IdentTable.get("endcase"); 1025 kw_endcelldefine = &IdentTable.get("endcelldefine"); 1026 kw_endchecker = &IdentTable.get("endchecker"); 1027 kw_endclass = &IdentTable.get("endclass"); 1028 kw_endclocking = &IdentTable.get("endclocking"); 1029 kw_endfunction = &IdentTable.get("endfunction"); 1030 kw_endgenerate = &IdentTable.get("endgenerate"); 1031 kw_endgroup = &IdentTable.get("endgroup"); 1032 kw_endinterface = &IdentTable.get("endinterface"); 1033 kw_endmodule = &IdentTable.get("endmodule"); 1034 kw_endpackage = &IdentTable.get("endpackage"); 1035 kw_endprimitive = &IdentTable.get("endprimitive"); 1036 kw_endprogram = &IdentTable.get("endprogram"); 1037 kw_endproperty = &IdentTable.get("endproperty"); 1038 kw_endsequence = &IdentTable.get("endsequence"); 1039 kw_endspecify = &IdentTable.get("endspecify"); 1040 kw_endtable = &IdentTable.get("endtable"); 1041 kw_endtask = &IdentTable.get("endtask"); 1042 kw_forever = &IdentTable.get("forever"); 1043 kw_fork = &IdentTable.get("fork"); 1044 kw_generate = &IdentTable.get("generate"); 1045 kw_highz0 = &IdentTable.get("highz0"); 1046 kw_highz1 = &IdentTable.get("highz1"); 1047 kw_iff = &IdentTable.get("iff"); 1048 kw_ifnone = &IdentTable.get("ifnone"); 1049 kw_ignore_bins = &IdentTable.get("ignore_bins"); 1050 kw_illegal_bins = &IdentTable.get("illegal_bins"); 1051 kw_initial = &IdentTable.get("initial"); 1052 kw_inout = &IdentTable.get("inout"); 1053 kw_input = &IdentTable.get("input"); 1054 kw_inside = &IdentTable.get("inside"); 1055 kw_interconnect = &IdentTable.get("interconnect"); 1056 kw_intersect = &IdentTable.get("intersect"); 1057 kw_join = &IdentTable.get("join"); 1058 kw_join_any = &IdentTable.get("join_any"); 1059 kw_join_none = &IdentTable.get("join_none"); 1060 kw_large = &IdentTable.get("large"); 1061 kw_local = &IdentTable.get("local"); 1062 kw_localparam = &IdentTable.get("localparam"); 1063 kw_macromodule = &IdentTable.get("macromodule"); 1064 kw_matches = &IdentTable.get("matches"); 1065 kw_medium = &IdentTable.get("medium"); 1066 kw_nounconnected_drive = &IdentTable.get("nounconnected_drive"); 1067 kw_output = &IdentTable.get("output"); 1068 kw_packed = &IdentTable.get("packed"); 1069 kw_parameter = &IdentTable.get("parameter"); 1070 kw_primitive = &IdentTable.get("primitive"); 1071 kw_priority = &IdentTable.get("priority"); 1072 kw_program = &IdentTable.get("program"); 1073 kw_property = &IdentTable.get("property"); 1074 kw_pull0 = &IdentTable.get("pull0"); 1075 kw_pull1 = &IdentTable.get("pull1"); 1076 kw_pure = &IdentTable.get("pure"); 1077 kw_rand = &IdentTable.get("rand"); 1078 kw_randc = &IdentTable.get("randc"); 1079 kw_randcase = &IdentTable.get("randcase"); 1080 kw_randsequence = &IdentTable.get("randsequence"); 1081 kw_repeat = &IdentTable.get("repeat"); 1082 kw_resetall = &IdentTable.get("resetall"); 1083 kw_sample = &IdentTable.get("sample"); 1084 kw_scalared = &IdentTable.get("scalared"); 1085 kw_sequence = &IdentTable.get("sequence"); 1086 kw_small = &IdentTable.get("small"); 1087 kw_soft = &IdentTable.get("soft"); 1088 kw_solve = &IdentTable.get("solve"); 1089 kw_specify = &IdentTable.get("specify"); 1090 kw_specparam = &IdentTable.get("specparam"); 1091 kw_strong0 = &IdentTable.get("strong0"); 1092 kw_strong1 = &IdentTable.get("strong1"); 1093 kw_supply0 = &IdentTable.get("supply0"); 1094 kw_supply1 = &IdentTable.get("supply1"); 1095 kw_table = &IdentTable.get("table"); 1096 kw_tagged = &IdentTable.get("tagged"); 1097 kw_task = &IdentTable.get("task"); 1098 kw_timescale = &IdentTable.get("timescale"); 1099 kw_tri = &IdentTable.get("tri"); 1100 kw_tri0 = &IdentTable.get("tri0"); 1101 kw_tri1 = &IdentTable.get("tri1"); 1102 kw_triand = &IdentTable.get("triand"); 1103 kw_trior = &IdentTable.get("trior"); 1104 kw_trireg = &IdentTable.get("trireg"); 1105 kw_unconnected_drive = &IdentTable.get("unconnected_drive"); 1106 kw_undefineall = &IdentTable.get("undefineall"); 1107 kw_unique = &IdentTable.get("unique"); 1108 kw_unique0 = &IdentTable.get("unique0"); 1109 kw_uwire = &IdentTable.get("uwire"); 1110 kw_vectored = &IdentTable.get("vectored"); 1111 kw_wand = &IdentTable.get("wand"); 1112 kw_weak0 = &IdentTable.get("weak0"); 1113 kw_weak1 = &IdentTable.get("weak1"); 1114 kw_wildcard = &IdentTable.get("wildcard"); 1115 kw_wire = &IdentTable.get("wire"); 1116 kw_with = &IdentTable.get("with"); 1117 kw_wor = &IdentTable.get("wor"); 1118 1119 // Symbols that are treated as keywords. 1120 kw_verilogHash = &IdentTable.get("#"); 1121 kw_verilogHashHash = &IdentTable.get("##"); 1122 1123 // Keep this at the end of the constructor to make sure everything here 1124 // is 1125 // already initialized. 1126 JsExtraKeywords = std::unordered_set<IdentifierInfo *>( 1127 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 1128 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_override, 1129 kw_readonly, kw_set, kw_type, kw_typeof, kw_var, kw_yield, 1130 // Keywords from the Java section. 1131 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 1132 1133 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>( 1134 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, 1135 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_init, kw_interface, 1136 kw_internal, kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, 1137 kw_params, kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, 1138 kw_sealed, kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, 1139 kw_when, kw_where, 1140 // Keywords from the JavaScript section. 1141 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 1142 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 1143 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 1144 // Keywords from the Java section. 1145 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 1146 1147 // Some keywords are not included here because they don't need special 1148 // treatment like `showcancelled` or they should be treated as identifiers 1149 // like `int` and `logic`. 1150 VerilogExtraKeywords = 1151 std::unordered_set<IdentifierInfo *>({kw_always, 1152 kw_always_comb, 1153 kw_always_ff, 1154 kw_always_latch, 1155 kw_assert, 1156 kw_assign, 1157 kw_assume, 1158 kw_automatic, 1159 kw_before, 1160 kw_begin, 1161 kw_bins, 1162 kw_binsof, 1163 kw_casex, 1164 kw_casez, 1165 kw_celldefine, 1166 kw_checker, 1167 kw_clocking, 1168 kw_constraint, 1169 kw_cover, 1170 kw_covergroup, 1171 kw_coverpoint, 1172 kw_disable, 1173 kw_dist, 1174 kw_end, 1175 kw_endcase, 1176 kw_endchecker, 1177 kw_endclass, 1178 kw_endclocking, 1179 kw_endfunction, 1180 kw_endgenerate, 1181 kw_endgroup, 1182 kw_endinterface, 1183 kw_endmodule, 1184 kw_endpackage, 1185 kw_endprimitive, 1186 kw_endprogram, 1187 kw_endproperty, 1188 kw_endsequence, 1189 kw_endspecify, 1190 kw_endtable, 1191 kw_endtask, 1192 kw_extends, 1193 kw_final, 1194 kw_foreach, 1195 kw_forever, 1196 kw_fork, 1197 kw_function, 1198 kw_generate, 1199 kw_highz0, 1200 kw_highz1, 1201 kw_iff, 1202 kw_ifnone, 1203 kw_ignore_bins, 1204 kw_illegal_bins, 1205 kw_implements, 1206 kw_import, 1207 kw_initial, 1208 kw_inout, 1209 kw_input, 1210 kw_inside, 1211 kw_interconnect, 1212 kw_interface, 1213 kw_intersect, 1214 kw_join, 1215 kw_join_any, 1216 kw_join_none, 1217 kw_large, 1218 kw_let, 1219 kw_local, 1220 kw_localparam, 1221 kw_macromodule, 1222 kw_matches, 1223 kw_medium, 1224 kw_output, 1225 kw_package, 1226 kw_packed, 1227 kw_parameter, 1228 kw_primitive, 1229 kw_priority, 1230 kw_program, 1231 kw_property, 1232 kw_pull0, 1233 kw_pull1, 1234 kw_pure, 1235 kw_rand, 1236 kw_randc, 1237 kw_randcase, 1238 kw_randsequence, 1239 kw_ref, 1240 kw_repeat, 1241 kw_sample, 1242 kw_scalared, 1243 kw_sequence, 1244 kw_small, 1245 kw_soft, 1246 kw_solve, 1247 kw_specify, 1248 kw_specparam, 1249 kw_strong0, 1250 kw_strong1, 1251 kw_supply0, 1252 kw_supply1, 1253 kw_table, 1254 kw_tagged, 1255 kw_task, 1256 kw_tri, 1257 kw_tri0, 1258 kw_tri1, 1259 kw_triand, 1260 kw_trior, 1261 kw_trireg, 1262 kw_unique, 1263 kw_unique0, 1264 kw_uwire, 1265 kw_var, 1266 kw_vectored, 1267 kw_wand, 1268 kw_weak0, 1269 kw_weak1, 1270 kw_wildcard, 1271 kw_wire, 1272 kw_with, 1273 kw_wor, 1274 kw_verilogHash, 1275 kw_verilogHashHash}); 1276 } 1277 1278 // Context sensitive keywords. 1279 IdentifierInfo *kw_final; 1280 IdentifierInfo *kw_override; 1281 IdentifierInfo *kw_in; 1282 IdentifierInfo *kw_of; 1283 IdentifierInfo *kw_CF_CLOSED_ENUM; 1284 IdentifierInfo *kw_CF_ENUM; 1285 IdentifierInfo *kw_CF_OPTIONS; 1286 IdentifierInfo *kw_NS_CLOSED_ENUM; 1287 IdentifierInfo *kw_NS_ENUM; 1288 IdentifierInfo *kw_NS_OPTIONS; 1289 IdentifierInfo *kw___except; 1290 IdentifierInfo *kw___has_include; 1291 IdentifierInfo *kw___has_include_next; 1292 1293 // JavaScript keywords. 1294 IdentifierInfo *kw_as; 1295 IdentifierInfo *kw_async; 1296 IdentifierInfo *kw_await; 1297 IdentifierInfo *kw_declare; 1298 IdentifierInfo *kw_finally; 1299 IdentifierInfo *kw_from; 1300 IdentifierInfo *kw_function; 1301 IdentifierInfo *kw_get; 1302 IdentifierInfo *kw_import; 1303 IdentifierInfo *kw_infer; 1304 IdentifierInfo *kw_is; 1305 IdentifierInfo *kw_let; 1306 IdentifierInfo *kw_module; 1307 IdentifierInfo *kw_readonly; 1308 IdentifierInfo *kw_set; 1309 IdentifierInfo *kw_type; 1310 IdentifierInfo *kw_typeof; 1311 IdentifierInfo *kw_var; 1312 IdentifierInfo *kw_yield; 1313 1314 // Java keywords. 1315 IdentifierInfo *kw_abstract; 1316 IdentifierInfo *kw_assert; 1317 IdentifierInfo *kw_extends; 1318 IdentifierInfo *kw_implements; 1319 IdentifierInfo *kw_instanceof; 1320 IdentifierInfo *kw_interface; 1321 IdentifierInfo *kw_native; 1322 IdentifierInfo *kw_package; 1323 IdentifierInfo *kw_synchronized; 1324 IdentifierInfo *kw_throws; 1325 1326 // Pragma keywords. 1327 IdentifierInfo *kw_mark; 1328 IdentifierInfo *kw_region; 1329 1330 // Proto keywords. 1331 IdentifierInfo *kw_extend; 1332 IdentifierInfo *kw_option; 1333 IdentifierInfo *kw_optional; 1334 IdentifierInfo *kw_repeated; 1335 IdentifierInfo *kw_required; 1336 IdentifierInfo *kw_returns; 1337 1338 // QT keywords. 1339 IdentifierInfo *kw_signals; 1340 IdentifierInfo *kw_qsignals; 1341 IdentifierInfo *kw_slots; 1342 IdentifierInfo *kw_qslots; 1343 1344 // For internal use by clang-format. 1345 IdentifierInfo *kw_internal_ident_after_define; 1346 1347 // C# keywords 1348 IdentifierInfo *kw_dollar; 1349 IdentifierInfo *kw_base; 1350 IdentifierInfo *kw_byte; 1351 IdentifierInfo *kw_checked; 1352 IdentifierInfo *kw_decimal; 1353 IdentifierInfo *kw_delegate; 1354 IdentifierInfo *kw_event; 1355 IdentifierInfo *kw_fixed; 1356 IdentifierInfo *kw_foreach; 1357 IdentifierInfo *kw_implicit; 1358 IdentifierInfo *kw_init; 1359 IdentifierInfo *kw_internal; 1360 1361 IdentifierInfo *kw_lock; 1362 IdentifierInfo *kw_null; 1363 IdentifierInfo *kw_object; 1364 IdentifierInfo *kw_out; 1365 1366 IdentifierInfo *kw_params; 1367 1368 IdentifierInfo *kw_ref; 1369 IdentifierInfo *kw_string; 1370 IdentifierInfo *kw_stackalloc; 1371 IdentifierInfo *kw_sbyte; 1372 IdentifierInfo *kw_sealed; 1373 IdentifierInfo *kw_uint; 1374 IdentifierInfo *kw_ulong; 1375 IdentifierInfo *kw_unchecked; 1376 IdentifierInfo *kw_unsafe; 1377 IdentifierInfo *kw_ushort; 1378 IdentifierInfo *kw_when; 1379 IdentifierInfo *kw_where; 1380 1381 // Verilog keywords 1382 IdentifierInfo *kw_always; 1383 IdentifierInfo *kw_always_comb; 1384 IdentifierInfo *kw_always_ff; 1385 IdentifierInfo *kw_always_latch; 1386 IdentifierInfo *kw_assign; 1387 IdentifierInfo *kw_assume; 1388 IdentifierInfo *kw_automatic; 1389 IdentifierInfo *kw_before; 1390 IdentifierInfo *kw_begin; 1391 IdentifierInfo *kw_begin_keywords; 1392 IdentifierInfo *kw_bins; 1393 IdentifierInfo *kw_binsof; 1394 IdentifierInfo *kw_casex; 1395 IdentifierInfo *kw_casez; 1396 IdentifierInfo *kw_celldefine; 1397 IdentifierInfo *kw_checker; 1398 IdentifierInfo *kw_clocking; 1399 IdentifierInfo *kw_constraint; 1400 IdentifierInfo *kw_cover; 1401 IdentifierInfo *kw_covergroup; 1402 IdentifierInfo *kw_coverpoint; 1403 IdentifierInfo *kw_default_decay_time; 1404 IdentifierInfo *kw_default_nettype; 1405 IdentifierInfo *kw_default_trireg_strength; 1406 IdentifierInfo *kw_delay_mode_distributed; 1407 IdentifierInfo *kw_delay_mode_path; 1408 IdentifierInfo *kw_delay_mode_unit; 1409 IdentifierInfo *kw_delay_mode_zero; 1410 IdentifierInfo *kw_disable; 1411 IdentifierInfo *kw_dist; 1412 IdentifierInfo *kw_elsif; 1413 IdentifierInfo *kw_end; 1414 IdentifierInfo *kw_end_keywords; 1415 IdentifierInfo *kw_endcase; 1416 IdentifierInfo *kw_endcelldefine; 1417 IdentifierInfo *kw_endchecker; 1418 IdentifierInfo *kw_endclass; 1419 IdentifierInfo *kw_endclocking; 1420 IdentifierInfo *kw_endfunction; 1421 IdentifierInfo *kw_endgenerate; 1422 IdentifierInfo *kw_endgroup; 1423 IdentifierInfo *kw_endinterface; 1424 IdentifierInfo *kw_endmodule; 1425 IdentifierInfo *kw_endpackage; 1426 IdentifierInfo *kw_endprimitive; 1427 IdentifierInfo *kw_endprogram; 1428 IdentifierInfo *kw_endproperty; 1429 IdentifierInfo *kw_endsequence; 1430 IdentifierInfo *kw_endspecify; 1431 IdentifierInfo *kw_endtable; 1432 IdentifierInfo *kw_endtask; 1433 IdentifierInfo *kw_forever; 1434 IdentifierInfo *kw_fork; 1435 IdentifierInfo *kw_generate; 1436 IdentifierInfo *kw_highz0; 1437 IdentifierInfo *kw_highz1; 1438 IdentifierInfo *kw_iff; 1439 IdentifierInfo *kw_ifnone; 1440 IdentifierInfo *kw_ignore_bins; 1441 IdentifierInfo *kw_illegal_bins; 1442 IdentifierInfo *kw_initial; 1443 IdentifierInfo *kw_inout; 1444 IdentifierInfo *kw_input; 1445 IdentifierInfo *kw_inside; 1446 IdentifierInfo *kw_interconnect; 1447 IdentifierInfo *kw_intersect; 1448 IdentifierInfo *kw_join; 1449 IdentifierInfo *kw_join_any; 1450 IdentifierInfo *kw_join_none; 1451 IdentifierInfo *kw_large; 1452 IdentifierInfo *kw_local; 1453 IdentifierInfo *kw_localparam; 1454 IdentifierInfo *kw_macromodule; 1455 IdentifierInfo *kw_matches; 1456 IdentifierInfo *kw_medium; 1457 IdentifierInfo *kw_nounconnected_drive; 1458 IdentifierInfo *kw_output; 1459 IdentifierInfo *kw_packed; 1460 IdentifierInfo *kw_parameter; 1461 IdentifierInfo *kw_primitive; 1462 IdentifierInfo *kw_priority; 1463 IdentifierInfo *kw_program; 1464 IdentifierInfo *kw_property; 1465 IdentifierInfo *kw_pull0; 1466 IdentifierInfo *kw_pull1; 1467 IdentifierInfo *kw_pure; 1468 IdentifierInfo *kw_rand; 1469 IdentifierInfo *kw_randc; 1470 IdentifierInfo *kw_randcase; 1471 IdentifierInfo *kw_randsequence; 1472 IdentifierInfo *kw_repeat; 1473 IdentifierInfo *kw_resetall; 1474 IdentifierInfo *kw_sample; 1475 IdentifierInfo *kw_scalared; 1476 IdentifierInfo *kw_sequence; 1477 IdentifierInfo *kw_small; 1478 IdentifierInfo *kw_soft; 1479 IdentifierInfo *kw_solve; 1480 IdentifierInfo *kw_specify; 1481 IdentifierInfo *kw_specparam; 1482 IdentifierInfo *kw_strong0; 1483 IdentifierInfo *kw_strong1; 1484 IdentifierInfo *kw_supply0; 1485 IdentifierInfo *kw_supply1; 1486 IdentifierInfo *kw_table; 1487 IdentifierInfo *kw_tagged; 1488 IdentifierInfo *kw_task; 1489 IdentifierInfo *kw_timescale; 1490 IdentifierInfo *kw_tri0; 1491 IdentifierInfo *kw_tri1; 1492 IdentifierInfo *kw_tri; 1493 IdentifierInfo *kw_triand; 1494 IdentifierInfo *kw_trior; 1495 IdentifierInfo *kw_trireg; 1496 IdentifierInfo *kw_unconnected_drive; 1497 IdentifierInfo *kw_undefineall; 1498 IdentifierInfo *kw_unique; 1499 IdentifierInfo *kw_unique0; 1500 IdentifierInfo *kw_uwire; 1501 IdentifierInfo *kw_vectored; 1502 IdentifierInfo *kw_wand; 1503 IdentifierInfo *kw_weak0; 1504 IdentifierInfo *kw_weak1; 1505 IdentifierInfo *kw_wildcard; 1506 IdentifierInfo *kw_wire; 1507 IdentifierInfo *kw_with; 1508 IdentifierInfo *kw_wor; 1509 1510 // Workaround for hashes and backticks in Verilog. 1511 IdentifierInfo *kw_verilogHash; 1512 IdentifierInfo *kw_verilogHashHash; 1513 1514 /// Returns \c true if \p Tok is a keyword or an identifier. 1515 bool isWordLike(const FormatToken &Tok) const { 1516 // getIdentifierinfo returns non-null for keywords as well as identifiers. 1517 return Tok.Tok.getIdentifierInfo() != nullptr && 1518 !Tok.isOneOf(kw_verilogHash, kw_verilogHashHash); 1519 } 1520 1521 /// Returns \c true if \p Tok is a true JavaScript identifier, returns 1522 /// \c false if it is a keyword or a pseudo keyword. 1523 /// If \c AcceptIdentifierName is true, returns true not only for keywords, 1524 // but also for IdentifierName tokens (aka pseudo-keywords), such as 1525 // ``yield``. 1526 bool IsJavaScriptIdentifier(const FormatToken &Tok, 1527 bool AcceptIdentifierName = true) const { 1528 // Based on the list of JavaScript & TypeScript keywords here: 1529 // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74 1530 switch (Tok.Tok.getKind()) { 1531 case tok::kw_break: 1532 case tok::kw_case: 1533 case tok::kw_catch: 1534 case tok::kw_class: 1535 case tok::kw_continue: 1536 case tok::kw_const: 1537 case tok::kw_default: 1538 case tok::kw_delete: 1539 case tok::kw_do: 1540 case tok::kw_else: 1541 case tok::kw_enum: 1542 case tok::kw_export: 1543 case tok::kw_false: 1544 case tok::kw_for: 1545 case tok::kw_if: 1546 case tok::kw_import: 1547 case tok::kw_module: 1548 case tok::kw_new: 1549 case tok::kw_private: 1550 case tok::kw_protected: 1551 case tok::kw_public: 1552 case tok::kw_return: 1553 case tok::kw_static: 1554 case tok::kw_switch: 1555 case tok::kw_this: 1556 case tok::kw_throw: 1557 case tok::kw_true: 1558 case tok::kw_try: 1559 case tok::kw_typeof: 1560 case tok::kw_void: 1561 case tok::kw_while: 1562 // These are JS keywords that are lexed by LLVM/clang as keywords. 1563 return false; 1564 case tok::identifier: { 1565 // For identifiers, make sure they are true identifiers, excluding the 1566 // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). 1567 bool IsPseudoKeyword = 1568 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != 1569 JsExtraKeywords.end(); 1570 return AcceptIdentifierName || !IsPseudoKeyword; 1571 } 1572 default: 1573 // Other keywords are handled in the switch below, to avoid problems due 1574 // to duplicate case labels when using the #include trick. 1575 break; 1576 } 1577 1578 switch (Tok.Tok.getKind()) { 1579 // Handle C++ keywords not included above: these are all JS identifiers. 1580 #define KEYWORD(X, Y) case tok::kw_##X: 1581 #include "clang/Basic/TokenKinds.def" 1582 // #undef KEYWORD is not needed -- it's #undef-ed at the end of 1583 // TokenKinds.def 1584 return true; 1585 default: 1586 // All other tokens (punctuation etc) are not JS identifiers. 1587 return false; 1588 } 1589 } 1590 1591 /// Returns \c true if \p Tok is a C# keyword, returns 1592 /// \c false if it is a anything else. 1593 bool isCSharpKeyword(const FormatToken &Tok) const { 1594 switch (Tok.Tok.getKind()) { 1595 case tok::kw_bool: 1596 case tok::kw_break: 1597 case tok::kw_case: 1598 case tok::kw_catch: 1599 case tok::kw_char: 1600 case tok::kw_class: 1601 case tok::kw_const: 1602 case tok::kw_continue: 1603 case tok::kw_default: 1604 case tok::kw_do: 1605 case tok::kw_double: 1606 case tok::kw_else: 1607 case tok::kw_enum: 1608 case tok::kw_explicit: 1609 case tok::kw_extern: 1610 case tok::kw_false: 1611 case tok::kw_float: 1612 case tok::kw_for: 1613 case tok::kw_goto: 1614 case tok::kw_if: 1615 case tok::kw_int: 1616 case tok::kw_long: 1617 case tok::kw_namespace: 1618 case tok::kw_new: 1619 case tok::kw_operator: 1620 case tok::kw_private: 1621 case tok::kw_protected: 1622 case tok::kw_public: 1623 case tok::kw_return: 1624 case tok::kw_short: 1625 case tok::kw_sizeof: 1626 case tok::kw_static: 1627 case tok::kw_struct: 1628 case tok::kw_switch: 1629 case tok::kw_this: 1630 case tok::kw_throw: 1631 case tok::kw_true: 1632 case tok::kw_try: 1633 case tok::kw_typeof: 1634 case tok::kw_using: 1635 case tok::kw_virtual: 1636 case tok::kw_void: 1637 case tok::kw_volatile: 1638 case tok::kw_while: 1639 return true; 1640 default: 1641 return Tok.is(tok::identifier) && 1642 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 1643 CSharpExtraKeywords.end(); 1644 } 1645 } 1646 1647 bool isVerilogIdentifier(const FormatToken &Tok) const { 1648 switch (Tok.Tok.getKind()) { 1649 case tok::kw_case: 1650 case tok::kw_class: 1651 case tok::kw_const: 1652 case tok::kw_continue: 1653 case tok::kw_default: 1654 case tok::kw_do: 1655 case tok::kw_extern: 1656 case tok::kw_else: 1657 case tok::kw_enum: 1658 case tok::kw_for: 1659 case tok::kw_if: 1660 case tok::kw_restrict: 1661 case tok::kw_signed: 1662 case tok::kw_static: 1663 case tok::kw_struct: 1664 case tok::kw_typedef: 1665 case tok::kw_union: 1666 case tok::kw_unsigned: 1667 case tok::kw_virtual: 1668 case tok::kw_while: 1669 return false; 1670 case tok::identifier: 1671 return VerilogExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 1672 VerilogExtraKeywords.end(); 1673 default: 1674 // getIdentifierInfo returns non-null for both identifiers and keywords. 1675 return Tok.Tok.getIdentifierInfo() != nullptr; 1676 } 1677 } 1678 1679 /// Returns whether \p Tok is a Verilog preprocessor directive. This is 1680 /// needed because macro expansions start with a backtick as well and they 1681 /// need to be treated differently. 1682 bool isVerilogPPDirective(const FormatToken &Tok) const { 1683 auto Info = Tok.Tok.getIdentifierInfo(); 1684 if (!Info) 1685 return false; 1686 switch (Info->getPPKeywordID()) { 1687 case tok::pp_define: 1688 case tok::pp_else: 1689 case tok::pp_endif: 1690 case tok::pp_ifdef: 1691 case tok::pp_ifndef: 1692 case tok::pp_include: 1693 case tok::pp_line: 1694 case tok::pp_pragma: 1695 case tok::pp_undef: 1696 return true; 1697 default: 1698 return Tok.isOneOf(kw_begin_keywords, kw_celldefine, 1699 kw_default_decay_time, kw_default_nettype, 1700 kw_default_trireg_strength, kw_delay_mode_distributed, 1701 kw_delay_mode_path, kw_delay_mode_unit, 1702 kw_delay_mode_zero, kw_elsif, kw_end_keywords, 1703 kw_endcelldefine, kw_nounconnected_drive, kw_resetall, 1704 kw_timescale, kw_unconnected_drive, kw_undefineall); 1705 } 1706 } 1707 1708 /// Returns whether \p Tok is a Verilog keyword that opens a block. 1709 bool isVerilogBegin(const FormatToken &Tok) const { 1710 // `table` is not included since it needs to be treated specially. 1711 return !Tok.endsSequence(kw_fork, kw_disable) && 1712 Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify); 1713 } 1714 1715 /// Returns whether \p Tok is a Verilog keyword that closes a block. 1716 bool isVerilogEnd(const FormatToken &Tok) const { 1717 return !Tok.endsSequence(kw_join, kw_rand) && 1718 Tok.isOneOf(TT_MacroBlockEnd, kw_end, kw_endcase, kw_endclass, 1719 kw_endclocking, kw_endchecker, kw_endfunction, 1720 kw_endgenerate, kw_endgroup, kw_endinterface, 1721 kw_endmodule, kw_endpackage, kw_endprimitive, 1722 kw_endprogram, kw_endproperty, kw_endsequence, 1723 kw_endspecify, kw_endtable, kw_endtask, kw_join, 1724 kw_join_any, kw_join_none); 1725 } 1726 1727 /// Whether the token begins a block. 1728 bool isBlockBegin(const FormatToken &Tok, const FormatStyle &Style) const { 1729 return Tok.is(TT_MacroBlockBegin) || 1730 (Style.isVerilog() ? isVerilogBegin(Tok) : Tok.is(tok::l_brace)); 1731 } 1732 1733 private: 1734 /// The JavaScript keywords beyond the C++ keyword set. 1735 std::unordered_set<IdentifierInfo *> JsExtraKeywords; 1736 1737 /// The C# keywords beyond the C++ keyword set 1738 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords; 1739 1740 /// The Verilog keywords beyond the C++ keyword set. 1741 std::unordered_set<IdentifierInfo *> VerilogExtraKeywords; 1742 }; 1743 1744 } // namespace format 1745 } // namespace clang 1746 1747 #endif 1748