xref: /freebsd/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp (revision 3bd749dbd90cc3b95719b65393df5ca8a0fe919d)
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <algorithm>
29 #include <utility>
30 
31 #define DEBUG_TYPE "format-parser"
32 
33 namespace clang {
34 namespace format {
35 
36 namespace {
37 
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39                StringRef Prefix = "", bool PrintText = false) {
40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42   bool NewLine = false;
43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                     E = Line.Tokens.end();
45        I != E; ++I) {
46     if (NewLine) {
47       OS << Prefix;
48       NewLine = false;
49     }
50     OS << I->Tok->Tok.getName() << "["
51        << "T=" << (unsigned)I->Tok->getType()
52        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53        << "\"] ";
54     for (SmallVectorImpl<UnwrappedLine>::const_iterator
55              CI = I->Children.begin(),
56              CE = I->Children.end();
57          CI != CE; ++CI) {
58       OS << "\n";
59       printLine(OS, *CI, (Prefix + "  ").str());
60       NewLine = true;
61     }
62   }
63   if (!NewLine)
64     OS << "\n";
65 }
66 
67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68   printLine(llvm::dbgs(), Line);
69 }
70 
71 class ScopedDeclarationState {
72 public:
73   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74                          bool MustBeDeclaration)
75       : Line(Line), Stack(Stack) {
76     Line.MustBeDeclaration = MustBeDeclaration;
77     Stack.push_back(MustBeDeclaration);
78   }
79   ~ScopedDeclarationState() {
80     Stack.pop_back();
81     if (!Stack.empty())
82       Line.MustBeDeclaration = Stack.back();
83     else
84       Line.MustBeDeclaration = true;
85   }
86 
87 private:
88   UnwrappedLine &Line;
89   llvm::BitVector &Stack;
90 };
91 
92 } // end anonymous namespace
93 
94 class ScopedLineState {
95 public:
96   ScopedLineState(UnwrappedLineParser &Parser,
97                   bool SwitchToPreprocessorLines = false)
98       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
99     if (SwitchToPreprocessorLines)
100       Parser.CurrentLines = &Parser.PreprocessorDirectives;
101     else if (!Parser.Line->Tokens.empty())
102       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
103     PreBlockLine = std::move(Parser.Line);
104     Parser.Line = std::make_unique<UnwrappedLine>();
105     Parser.Line->Level = PreBlockLine->Level;
106     Parser.Line->PPLevel = PreBlockLine->PPLevel;
107     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
108     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
109   }
110 
111   ~ScopedLineState() {
112     if (!Parser.Line->Tokens.empty())
113       Parser.addUnwrappedLine();
114     assert(Parser.Line->Tokens.empty());
115     Parser.Line = std::move(PreBlockLine);
116     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
117       Parser.MustBreakBeforeNextToken = true;
118     Parser.CurrentLines = OriginalLines;
119   }
120 
121 private:
122   UnwrappedLineParser &Parser;
123 
124   std::unique_ptr<UnwrappedLine> PreBlockLine;
125   SmallVectorImpl<UnwrappedLine> *OriginalLines;
126 };
127 
128 class CompoundStatementIndenter {
129 public:
130   CompoundStatementIndenter(UnwrappedLineParser *Parser,
131                             const FormatStyle &Style, unsigned &LineLevel)
132       : CompoundStatementIndenter(Parser, LineLevel,
133                                   Style.BraceWrapping.AfterControlStatement,
134                                   Style.BraceWrapping.IndentBraces) {}
135   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
136                             bool WrapBrace, bool IndentBrace)
137       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
138     if (WrapBrace)
139       Parser->addUnwrappedLine();
140     if (IndentBrace)
141       ++LineLevel;
142   }
143   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
144 
145 private:
146   unsigned &LineLevel;
147   unsigned OldLineLevel;
148 };
149 
150 UnwrappedLineParser::UnwrappedLineParser(
151     SourceManager &SourceMgr, const FormatStyle &Style,
152     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
153     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
154     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
155     IdentifierTable &IdentTable)
156     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
157       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
158       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
159       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
160       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
161                        ? IG_Rejected
162                        : IG_Inited),
163       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
164       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
165 
166 void UnwrappedLineParser::reset() {
167   PPBranchLevel = -1;
168   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
169                      ? IG_Rejected
170                      : IG_Inited;
171   IncludeGuardToken = nullptr;
172   Line.reset(new UnwrappedLine);
173   CommentsBeforeNextToken.clear();
174   FormatTok = nullptr;
175   MustBreakBeforeNextToken = false;
176   PreprocessorDirectives.clear();
177   CurrentLines = &Lines;
178   DeclarationScopeStack.clear();
179   NestedTooDeep.clear();
180   PPStack.clear();
181   Line->FirstStartColumn = FirstStartColumn;
182 
183   if (!Unexpanded.empty())
184     for (FormatToken *Token : AllTokens)
185       Token->MacroCtx.reset();
186   CurrentExpandedLines.clear();
187   ExpandedLines.clear();
188   Unexpanded.clear();
189   InExpansion = false;
190   Reconstruct.reset();
191 }
192 
193 void UnwrappedLineParser::parse() {
194   IndexedTokenSource TokenSource(AllTokens);
195   Line->FirstStartColumn = FirstStartColumn;
196   do {
197     LLVM_DEBUG(llvm::dbgs() << "----\n");
198     reset();
199     Tokens = &TokenSource;
200     TokenSource.reset();
201 
202     readToken();
203     parseFile();
204 
205     // If we found an include guard then all preprocessor directives (other than
206     // the guard) are over-indented by one.
207     if (IncludeGuard == IG_Found) {
208       for (auto &Line : Lines)
209         if (Line.InPPDirective && Line.Level > 0)
210           --Line.Level;
211     }
212 
213     // Create line with eof token.
214     assert(FormatTok->is(tok::eof));
215     pushToken(FormatTok);
216     addUnwrappedLine();
217 
218     // In a first run, format everything with the lines containing macro calls
219     // replaced by the expansion.
220     if (!ExpandedLines.empty()) {
221       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
222       for (const auto &Line : Lines) {
223         if (!Line.Tokens.empty()) {
224           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
225           if (it != ExpandedLines.end()) {
226             for (const auto &Expanded : it->second) {
227               LLVM_DEBUG(printDebugInfo(Expanded));
228               Callback.consumeUnwrappedLine(Expanded);
229             }
230             continue;
231           }
232         }
233         LLVM_DEBUG(printDebugInfo(Line));
234         Callback.consumeUnwrappedLine(Line);
235       }
236       Callback.finishRun();
237     }
238 
239     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
240     for (const UnwrappedLine &Line : Lines) {
241       LLVM_DEBUG(printDebugInfo(Line));
242       Callback.consumeUnwrappedLine(Line);
243     }
244     Callback.finishRun();
245     Lines.clear();
246     while (!PPLevelBranchIndex.empty() &&
247            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
248       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
249       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
250     }
251     if (!PPLevelBranchIndex.empty()) {
252       ++PPLevelBranchIndex.back();
253       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
254       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
255     }
256   } while (!PPLevelBranchIndex.empty());
257 }
258 
259 void UnwrappedLineParser::parseFile() {
260   // The top-level context in a file always has declarations, except for pre-
261   // processor directives and JavaScript files.
262   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
263   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
264                                           MustBeDeclaration);
265   if (Style.Language == FormatStyle::LK_TextProto)
266     parseBracedList();
267   else
268     parseLevel();
269   // Make sure to format the remaining tokens.
270   //
271   // LK_TextProto is special since its top-level is parsed as the body of a
272   // braced list, which does not necessarily have natural line separators such
273   // as a semicolon. Comments after the last entry that have been determined to
274   // not belong to that line, as in:
275   //   key: value
276   //   // endfile comment
277   // do not have a chance to be put on a line of their own until this point.
278   // Here we add this newline before end-of-file comments.
279   if (Style.Language == FormatStyle::LK_TextProto &&
280       !CommentsBeforeNextToken.empty()) {
281     addUnwrappedLine();
282   }
283   flushComments(true);
284   addUnwrappedLine();
285 }
286 
287 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
288   do {
289     switch (FormatTok->Tok.getKind()) {
290     case tok::l_brace:
291       return;
292     default:
293       if (FormatTok->is(Keywords.kw_where)) {
294         addUnwrappedLine();
295         nextToken();
296         parseCSharpGenericTypeConstraint();
297         break;
298       }
299       nextToken();
300       break;
301     }
302   } while (!eof());
303 }
304 
305 void UnwrappedLineParser::parseCSharpAttribute() {
306   int UnpairedSquareBrackets = 1;
307   do {
308     switch (FormatTok->Tok.getKind()) {
309     case tok::r_square:
310       nextToken();
311       --UnpairedSquareBrackets;
312       if (UnpairedSquareBrackets == 0) {
313         addUnwrappedLine();
314         return;
315       }
316       break;
317     case tok::l_square:
318       ++UnpairedSquareBrackets;
319       nextToken();
320       break;
321     default:
322       nextToken();
323       break;
324     }
325   } while (!eof());
326 }
327 
328 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
329   if (!Lines.empty() && Lines.back().InPPDirective)
330     return true;
331 
332   const FormatToken *Previous = Tokens->getPreviousToken();
333   return Previous && Previous->is(tok::comment) &&
334          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
335 }
336 
337 /// \brief Parses a level, that is ???.
338 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
339 /// \param CanContainBracedList If the content can contain (at any level) a
340 /// braced list.
341 /// \param NextLBracesType The type for left brace found in this level.
342 /// \param IfKind The \p if statement kind in the level.
343 /// \param IfLeftBrace The left brace of the \p if block in the level.
344 /// \returns true if a simple block of if/else/for/while, or false otherwise.
345 /// (A simple block has a single statement.)
346 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
347                                      bool CanContainBracedList,
348                                      TokenType NextLBracesType,
349                                      IfStmtKind *IfKind,
350                                      FormatToken **IfLeftBrace) {
351   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
352                                   ? TT_BracedListLBrace
353                                   : TT_Unknown;
354   const bool IsPrecededByCommentOrPPDirective =
355       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
356   FormatToken *IfLBrace = nullptr;
357   bool HasDoWhile = false;
358   bool HasLabel = false;
359   unsigned StatementCount = 0;
360   bool SwitchLabelEncountered = false;
361 
362   do {
363     if (FormatTok->getType() == TT_AttributeMacro) {
364       nextToken();
365       continue;
366     }
367     tok::TokenKind kind = FormatTok->Tok.getKind();
368     if (FormatTok->getType() == TT_MacroBlockBegin)
369       kind = tok::l_brace;
370     else if (FormatTok->getType() == TT_MacroBlockEnd)
371       kind = tok::r_brace;
372 
373     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
374                          &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
375       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
376                              &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
377                              HasLabel ? nullptr : &HasLabel);
378       ++StatementCount;
379       assert(StatementCount > 0 && "StatementCount overflow!");
380     };
381 
382     switch (kind) {
383     case tok::comment:
384       nextToken();
385       addUnwrappedLine();
386       break;
387     case tok::l_brace:
388       if (NextLBracesType != TT_Unknown) {
389         FormatTok->setFinalizedType(NextLBracesType);
390       } else if (FormatTok->Previous &&
391                  FormatTok->Previous->ClosesRequiresClause) {
392         // We need the 'default' case here to correctly parse a function
393         // l_brace.
394         ParseDefault();
395         continue;
396       }
397       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
398           tryToParseBracedList()) {
399         continue;
400       }
401       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
402                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
403                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
404                  NextLBracesType);
405       ++StatementCount;
406       assert(StatementCount > 0 && "StatementCount overflow!");
407       addUnwrappedLine();
408       break;
409     case tok::r_brace:
410       if (OpeningBrace) {
411         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
412             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
413           return false;
414         }
415         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
416             HasDoWhile || IsPrecededByCommentOrPPDirective ||
417             precededByCommentOrPPDirective()) {
418           return false;
419         }
420         const FormatToken *Next = Tokens->peekNextToken();
421         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
422           return false;
423         if (IfLeftBrace)
424           *IfLeftBrace = IfLBrace;
425         return true;
426       }
427       nextToken();
428       addUnwrappedLine();
429       break;
430     case tok::kw_default: {
431       unsigned StoredPosition = Tokens->getPosition();
432       FormatToken *Next;
433       do {
434         Next = Tokens->getNextToken();
435         assert(Next);
436       } while (Next->is(tok::comment));
437       FormatTok = Tokens->setPosition(StoredPosition);
438       if (Next->isNot(tok::colon)) {
439         // default not followed by ':' is not a case label; treat it like
440         // an identifier.
441         parseStructuralElement();
442         break;
443       }
444       // Else, if it is 'default:', fall through to the case handling.
445       [[fallthrough]];
446     }
447     case tok::kw_case:
448       if (Style.isProto() || Style.isVerilog() ||
449           (Style.isJavaScript() && Line->MustBeDeclaration)) {
450         // Proto: there are no switch/case statements
451         // Verilog: Case labels don't have this word. We handle case
452         // labels including default in TokenAnnotator.
453         // JavaScript: A 'case: string' style field declaration.
454         ParseDefault();
455         break;
456       }
457       if (!SwitchLabelEncountered &&
458           (Style.IndentCaseLabels ||
459            (Line->InPPDirective && Line->Level == 1))) {
460         ++Line->Level;
461       }
462       SwitchLabelEncountered = true;
463       parseStructuralElement();
464       break;
465     case tok::l_square:
466       if (Style.isCSharp()) {
467         nextToken();
468         parseCSharpAttribute();
469         break;
470       }
471       if (handleCppAttributes())
472         break;
473       [[fallthrough]];
474     default:
475       ParseDefault();
476       break;
477     }
478   } while (!eof());
479 
480   return false;
481 }
482 
483 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
484   // We'll parse forward through the tokens until we hit
485   // a closing brace or eof - note that getNextToken() will
486   // parse macros, so this will magically work inside macro
487   // definitions, too.
488   unsigned StoredPosition = Tokens->getPosition();
489   FormatToken *Tok = FormatTok;
490   const FormatToken *PrevTok = Tok->Previous;
491   // Keep a stack of positions of lbrace tokens. We will
492   // update information about whether an lbrace starts a
493   // braced init list or a different block during the loop.
494   struct StackEntry {
495     FormatToken *Tok;
496     const FormatToken *PrevTok;
497   };
498   SmallVector<StackEntry, 8> LBraceStack;
499   assert(Tok->is(tok::l_brace));
500   do {
501     // Get next non-comment token.
502     FormatToken *NextTok;
503     do {
504       NextTok = Tokens->getNextToken();
505     } while (NextTok->is(tok::comment));
506 
507     switch (Tok->Tok.getKind()) {
508     case tok::l_brace:
509       if (Style.isJavaScript() && PrevTok) {
510         if (PrevTok->isOneOf(tok::colon, tok::less)) {
511           // A ':' indicates this code is in a type, or a braced list
512           // following a label in an object literal ({a: {b: 1}}).
513           // A '<' could be an object used in a comparison, but that is nonsense
514           // code (can never return true), so more likely it is a generic type
515           // argument (`X<{a: string; b: number}>`).
516           // The code below could be confused by semicolons between the
517           // individual members in a type member list, which would normally
518           // trigger BK_Block. In both cases, this must be parsed as an inline
519           // braced init.
520           Tok->setBlockKind(BK_BracedInit);
521         } else if (PrevTok->is(tok::r_paren)) {
522           // `) { }` can only occur in function or method declarations in JS.
523           Tok->setBlockKind(BK_Block);
524         }
525       } else {
526         Tok->setBlockKind(BK_Unknown);
527       }
528       LBraceStack.push_back({Tok, PrevTok});
529       break;
530     case tok::r_brace:
531       if (LBraceStack.empty())
532         break;
533       if (LBraceStack.back().Tok->is(BK_Unknown)) {
534         bool ProbablyBracedList = false;
535         if (Style.Language == FormatStyle::LK_Proto) {
536           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
537         } else {
538           // Skip NextTok over preprocessor lines, otherwise we may not
539           // properly diagnose the block as a braced intializer
540           // if the comma separator appears after the pp directive.
541           while (NextTok->is(tok::hash)) {
542             ScopedMacroState MacroState(*Line, Tokens, NextTok);
543             do {
544               NextTok = Tokens->getNextToken();
545             } while (NextTok->isNot(tok::eof));
546           }
547 
548           // Using OriginalColumn to distinguish between ObjC methods and
549           // binary operators is a bit hacky.
550           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
551                                   NextTok->OriginalColumn == 0;
552 
553           // Try to detect a braced list. Note that regardless how we mark inner
554           // braces here, we will overwrite the BlockKind later if we parse a
555           // braced list (where all blocks inside are by default braced lists),
556           // or when we explicitly detect blocks (for example while parsing
557           // lambdas).
558 
559           // If we already marked the opening brace as braced list, the closing
560           // must also be part of it.
561           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
562 
563           ProbablyBracedList = ProbablyBracedList ||
564                                (Style.isJavaScript() &&
565                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
566                                                  Keywords.kw_as));
567           ProbablyBracedList = ProbablyBracedList ||
568                                (Style.isCpp() && NextTok->is(tok::l_paren));
569 
570           // If there is a comma, semicolon or right paren after the closing
571           // brace, we assume this is a braced initializer list.
572           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
573           // braced list in JS.
574           ProbablyBracedList =
575               ProbablyBracedList ||
576               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
577                                tok::r_paren, tok::r_square, tok::ellipsis);
578 
579           // Distinguish between braced list in a constructor initializer list
580           // followed by constructor body, or just adjacent blocks.
581           ProbablyBracedList =
582               ProbablyBracedList ||
583               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
584                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
585                                                    tok::greater));
586 
587           ProbablyBracedList =
588               ProbablyBracedList ||
589               (NextTok->is(tok::identifier) &&
590                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
591 
592           ProbablyBracedList = ProbablyBracedList ||
593                                (NextTok->is(tok::semi) &&
594                                 (!ExpectClassBody || LBraceStack.size() != 1));
595 
596           ProbablyBracedList =
597               ProbablyBracedList ||
598               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
599 
600           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
601             // We can have an array subscript after a braced init
602             // list, but C++11 attributes are expected after blocks.
603             NextTok = Tokens->getNextToken();
604             ProbablyBracedList = NextTok->isNot(tok::l_square);
605           }
606         }
607         if (ProbablyBracedList) {
608           Tok->setBlockKind(BK_BracedInit);
609           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
610         } else {
611           Tok->setBlockKind(BK_Block);
612           LBraceStack.back().Tok->setBlockKind(BK_Block);
613         }
614       }
615       LBraceStack.pop_back();
616       break;
617     case tok::identifier:
618       if (!Tok->is(TT_StatementMacro))
619         break;
620       [[fallthrough]];
621     case tok::at:
622     case tok::semi:
623     case tok::kw_if:
624     case tok::kw_while:
625     case tok::kw_for:
626     case tok::kw_switch:
627     case tok::kw_try:
628     case tok::kw___try:
629       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
630         LBraceStack.back().Tok->setBlockKind(BK_Block);
631       break;
632     default:
633       break;
634     }
635     PrevTok = Tok;
636     Tok = NextTok;
637   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
638 
639   // Assume other blocks for all unclosed opening braces.
640   for (const auto &Entry : LBraceStack)
641     if (Entry.Tok->is(BK_Unknown))
642       Entry.Tok->setBlockKind(BK_Block);
643 
644   FormatTok = Tokens->setPosition(StoredPosition);
645 }
646 
647 template <class T>
648 static inline void hash_combine(std::size_t &seed, const T &v) {
649   std::hash<T> hasher;
650   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
651 }
652 
653 size_t UnwrappedLineParser::computePPHash() const {
654   size_t h = 0;
655   for (const auto &i : PPStack) {
656     hash_combine(h, size_t(i.Kind));
657     hash_combine(h, i.Line);
658   }
659   return h;
660 }
661 
662 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
663 // is not null, subtracts its length (plus the preceding space) when computing
664 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
665 // running the token annotator on it so that we can restore them afterward.
666 bool UnwrappedLineParser::mightFitOnOneLine(
667     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
668   const auto ColumnLimit = Style.ColumnLimit;
669   if (ColumnLimit == 0)
670     return true;
671 
672   auto &Tokens = ParsedLine.Tokens;
673   assert(!Tokens.empty());
674 
675   const auto *LastToken = Tokens.back().Tok;
676   assert(LastToken);
677 
678   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
679 
680   int Index = 0;
681   for (const auto &Token : Tokens) {
682     assert(Token.Tok);
683     auto &SavedToken = SavedTokens[Index++];
684     SavedToken.Tok = new FormatToken;
685     SavedToken.Tok->copyFrom(*Token.Tok);
686     SavedToken.Children = std::move(Token.Children);
687   }
688 
689   AnnotatedLine Line(ParsedLine);
690   assert(Line.Last == LastToken);
691 
692   TokenAnnotator Annotator(Style, Keywords);
693   Annotator.annotate(Line);
694   Annotator.calculateFormattingInformation(Line);
695 
696   auto Length = LastToken->TotalLength;
697   if (OpeningBrace) {
698     assert(OpeningBrace != Tokens.front().Tok);
699     if (auto Prev = OpeningBrace->Previous;
700         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
701       Length -= ColumnLimit;
702     }
703     Length -= OpeningBrace->TokenText.size() + 1;
704   }
705 
706   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
707     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
708     Length -= FirstToken->TokenText.size() + 1;
709   }
710 
711   Index = 0;
712   for (auto &Token : Tokens) {
713     const auto &SavedToken = SavedTokens[Index++];
714     Token.Tok->copyFrom(*SavedToken.Tok);
715     Token.Children = std::move(SavedToken.Children);
716     delete SavedToken.Tok;
717   }
718 
719   // If these change PPLevel needs to be used for get correct indentation.
720   assert(!Line.InMacroBody);
721   assert(!Line.InPPDirective);
722   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
723 }
724 
725 FormatToken *UnwrappedLineParser::parseBlock(
726     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
727     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
728     bool CanContainBracedList, TokenType NextLBracesType) {
729   auto HandleVerilogBlockLabel = [this]() {
730     // ":" name
731     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
732       nextToken();
733       if (Keywords.isVerilogIdentifier(*FormatTok))
734         nextToken();
735     }
736   };
737 
738   // Whether this is a Verilog-specific block that has a special header like a
739   // module.
740   const bool VerilogHierarchy =
741       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
742   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
743           (Style.isVerilog() &&
744            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
745          "'{' or macro block token expected");
746   FormatToken *Tok = FormatTok;
747   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
748   auto Index = CurrentLines->size();
749   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
750   FormatTok->setBlockKind(BK_Block);
751 
752   // For Whitesmiths mode, jump to the next level prior to skipping over the
753   // braces.
754   if (!VerilogHierarchy && AddLevels > 0 &&
755       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
756     ++Line->Level;
757   }
758 
759   size_t PPStartHash = computePPHash();
760 
761   const unsigned InitialLevel = Line->Level;
762   if (VerilogHierarchy) {
763     AddLevels += parseVerilogHierarchyHeader();
764   } else {
765     nextToken(/*LevelDifference=*/AddLevels);
766     HandleVerilogBlockLabel();
767   }
768 
769   // Bail out if there are too many levels. Otherwise, the stack might overflow.
770   if (Line->Level > 300)
771     return nullptr;
772 
773   if (MacroBlock && FormatTok->is(tok::l_paren))
774     parseParens();
775 
776   size_t NbPreprocessorDirectives =
777       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
778   addUnwrappedLine();
779   size_t OpeningLineIndex =
780       CurrentLines->empty()
781           ? (UnwrappedLine::kInvalidIndex)
782           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
783 
784   // Whitesmiths is weird here. The brace needs to be indented for the namespace
785   // block, but the block itself may not be indented depending on the style
786   // settings. This allows the format to back up one level in those cases.
787   if (UnindentWhitesmithsBraces)
788     --Line->Level;
789 
790   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
791                                           MustBeDeclaration);
792   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
793     Line->Level += AddLevels;
794 
795   FormatToken *IfLBrace = nullptr;
796   const bool SimpleBlock =
797       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
798 
799   if (eof())
800     return IfLBrace;
801 
802   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
803                  : !FormatTok->is(tok::r_brace)) {
804     Line->Level = InitialLevel;
805     FormatTok->setBlockKind(BK_Block);
806     return IfLBrace;
807   }
808 
809   const bool IsFunctionRBrace =
810       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
811 
812   auto RemoveBraces = [=]() mutable {
813     if (!SimpleBlock)
814       return false;
815     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
816     assert(FormatTok->is(tok::r_brace));
817     const bool WrappedOpeningBrace = !Tok->Previous;
818     if (WrappedOpeningBrace && FollowedByComment)
819       return false;
820     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
821     if (KeepBraces && !HasRequiredIfBraces)
822       return false;
823     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
824       const FormatToken *Previous = Tokens->getPreviousToken();
825       assert(Previous);
826       if (Previous->is(tok::r_brace) && !Previous->Optional)
827         return false;
828     }
829     assert(!CurrentLines->empty());
830     auto &LastLine = CurrentLines->back();
831     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
832       return false;
833     if (Tok->is(TT_ElseLBrace))
834       return true;
835     if (WrappedOpeningBrace) {
836       assert(Index > 0);
837       --Index; // The line above the wrapped l_brace.
838       Tok = nullptr;
839     }
840     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
841   };
842   if (RemoveBraces()) {
843     Tok->MatchingParen = FormatTok;
844     FormatTok->MatchingParen = Tok;
845   }
846 
847   size_t PPEndHash = computePPHash();
848 
849   // Munch the closing brace.
850   nextToken(/*LevelDifference=*/-AddLevels);
851 
852   // When this is a function block and there is an unnecessary semicolon
853   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
854   // it later).
855   if (Style.RemoveSemicolon && IsFunctionRBrace) {
856     while (FormatTok->is(tok::semi)) {
857       FormatTok->Optional = true;
858       nextToken();
859     }
860   }
861 
862   HandleVerilogBlockLabel();
863 
864   if (MacroBlock && FormatTok->is(tok::l_paren))
865     parseParens();
866 
867   Line->Level = InitialLevel;
868 
869   if (FormatTok->is(tok::kw_noexcept)) {
870     // A noexcept in a requires expression.
871     nextToken();
872   }
873 
874   if (FormatTok->is(tok::arrow)) {
875     // Following the } or noexcept we can find a trailing return type arrow
876     // as part of an implicit conversion constraint.
877     nextToken();
878     parseStructuralElement();
879   }
880 
881   if (MunchSemi && FormatTok->is(tok::semi))
882     nextToken();
883 
884   if (PPStartHash == PPEndHash) {
885     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
886     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
887       // Update the opening line to add the forward reference as well
888       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
889           CurrentLines->size() - 1;
890     }
891   }
892 
893   return IfLBrace;
894 }
895 
896 static bool isGoogScope(const UnwrappedLine &Line) {
897   // FIXME: Closure-library specific stuff should not be hard-coded but be
898   // configurable.
899   if (Line.Tokens.size() < 4)
900     return false;
901   auto I = Line.Tokens.begin();
902   if (I->Tok->TokenText != "goog")
903     return false;
904   ++I;
905   if (I->Tok->isNot(tok::period))
906     return false;
907   ++I;
908   if (I->Tok->TokenText != "scope")
909     return false;
910   ++I;
911   return I->Tok->is(tok::l_paren);
912 }
913 
914 static bool isIIFE(const UnwrappedLine &Line,
915                    const AdditionalKeywords &Keywords) {
916   // Look for the start of an immediately invoked anonymous function.
917   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
918   // This is commonly done in JavaScript to create a new, anonymous scope.
919   // Example: (function() { ... })()
920   if (Line.Tokens.size() < 3)
921     return false;
922   auto I = Line.Tokens.begin();
923   if (I->Tok->isNot(tok::l_paren))
924     return false;
925   ++I;
926   if (I->Tok->isNot(Keywords.kw_function))
927     return false;
928   ++I;
929   return I->Tok->is(tok::l_paren);
930 }
931 
932 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
933                                    const FormatToken &InitialToken) {
934   tok::TokenKind Kind = InitialToken.Tok.getKind();
935   if (InitialToken.is(TT_NamespaceMacro))
936     Kind = tok::kw_namespace;
937 
938   switch (Kind) {
939   case tok::kw_namespace:
940     return Style.BraceWrapping.AfterNamespace;
941   case tok::kw_class:
942     return Style.BraceWrapping.AfterClass;
943   case tok::kw_union:
944     return Style.BraceWrapping.AfterUnion;
945   case tok::kw_struct:
946     return Style.BraceWrapping.AfterStruct;
947   case tok::kw_enum:
948     return Style.BraceWrapping.AfterEnum;
949   default:
950     return false;
951   }
952 }
953 
954 void UnwrappedLineParser::parseChildBlock(
955     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
956   assert(FormatTok->is(tok::l_brace));
957   FormatTok->setBlockKind(BK_Block);
958   const FormatToken *OpeningBrace = FormatTok;
959   nextToken();
960   {
961     bool SkipIndent = (Style.isJavaScript() &&
962                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
963     ScopedLineState LineState(*this);
964     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
965                                             /*MustBeDeclaration=*/false);
966     Line->Level += SkipIndent ? 0 : 1;
967     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
968     flushComments(isOnNewLine(*FormatTok));
969     Line->Level -= SkipIndent ? 0 : 1;
970   }
971   nextToken();
972 }
973 
974 void UnwrappedLineParser::parsePPDirective() {
975   assert(FormatTok->is(tok::hash) && "'#' expected");
976   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
977 
978   nextToken();
979 
980   if (!FormatTok->Tok.getIdentifierInfo()) {
981     parsePPUnknown();
982     return;
983   }
984 
985   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
986   case tok::pp_define:
987     parsePPDefine();
988     return;
989   case tok::pp_if:
990     parsePPIf(/*IfDef=*/false);
991     break;
992   case tok::pp_ifdef:
993   case tok::pp_ifndef:
994     parsePPIf(/*IfDef=*/true);
995     break;
996   case tok::pp_else:
997   case tok::pp_elifdef:
998   case tok::pp_elifndef:
999   case tok::pp_elif:
1000     parsePPElse();
1001     break;
1002   case tok::pp_endif:
1003     parsePPEndIf();
1004     break;
1005   case tok::pp_pragma:
1006     parsePPPragma();
1007     break;
1008   default:
1009     parsePPUnknown();
1010     break;
1011   }
1012 }
1013 
1014 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1015   size_t Line = CurrentLines->size();
1016   if (CurrentLines == &PreprocessorDirectives)
1017     Line += Lines.size();
1018 
1019   if (Unreachable ||
1020       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1021     PPStack.push_back({PP_Unreachable, Line});
1022   } else {
1023     PPStack.push_back({PP_Conditional, Line});
1024   }
1025 }
1026 
1027 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1028   ++PPBranchLevel;
1029   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1030   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1031     PPLevelBranchIndex.push_back(0);
1032     PPLevelBranchCount.push_back(0);
1033   }
1034   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1035   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1036   conditionalCompilationCondition(Unreachable || Skip);
1037 }
1038 
1039 void UnwrappedLineParser::conditionalCompilationAlternative() {
1040   if (!PPStack.empty())
1041     PPStack.pop_back();
1042   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1043   if (!PPChainBranchIndex.empty())
1044     ++PPChainBranchIndex.top();
1045   conditionalCompilationCondition(
1046       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1047       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1048 }
1049 
1050 void UnwrappedLineParser::conditionalCompilationEnd() {
1051   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1052   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1053     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1054       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1055   }
1056   // Guard against #endif's without #if.
1057   if (PPBranchLevel > -1)
1058     --PPBranchLevel;
1059   if (!PPChainBranchIndex.empty())
1060     PPChainBranchIndex.pop();
1061   if (!PPStack.empty())
1062     PPStack.pop_back();
1063 }
1064 
1065 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1066   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1067   nextToken();
1068   bool Unreachable = false;
1069   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1070     Unreachable = true;
1071   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1072     Unreachable = true;
1073   conditionalCompilationStart(Unreachable);
1074   FormatToken *IfCondition = FormatTok;
1075   // If there's a #ifndef on the first line, and the only lines before it are
1076   // comments, it could be an include guard.
1077   bool MaybeIncludeGuard = IfNDef;
1078   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1079     for (auto &Line : Lines) {
1080       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1081         MaybeIncludeGuard = false;
1082         IncludeGuard = IG_Rejected;
1083         break;
1084       }
1085     }
1086   }
1087   --PPBranchLevel;
1088   parsePPUnknown();
1089   ++PPBranchLevel;
1090   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1091     IncludeGuard = IG_IfNdefed;
1092     IncludeGuardToken = IfCondition;
1093   }
1094 }
1095 
1096 void UnwrappedLineParser::parsePPElse() {
1097   // If a potential include guard has an #else, it's not an include guard.
1098   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1099     IncludeGuard = IG_Rejected;
1100   // Don't crash when there is an #else without an #if.
1101   assert(PPBranchLevel >= -1);
1102   if (PPBranchLevel == -1)
1103     conditionalCompilationStart(/*Unreachable=*/true);
1104   conditionalCompilationAlternative();
1105   --PPBranchLevel;
1106   parsePPUnknown();
1107   ++PPBranchLevel;
1108 }
1109 
1110 void UnwrappedLineParser::parsePPEndIf() {
1111   conditionalCompilationEnd();
1112   parsePPUnknown();
1113   // If the #endif of a potential include guard is the last thing in the file,
1114   // then we found an include guard.
1115   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1116       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1117     IncludeGuard = IG_Found;
1118   }
1119 }
1120 
1121 void UnwrappedLineParser::parsePPDefine() {
1122   nextToken();
1123 
1124   if (!FormatTok->Tok.getIdentifierInfo()) {
1125     IncludeGuard = IG_Rejected;
1126     IncludeGuardToken = nullptr;
1127     parsePPUnknown();
1128     return;
1129   }
1130 
1131   if (IncludeGuard == IG_IfNdefed &&
1132       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1133     IncludeGuard = IG_Defined;
1134     IncludeGuardToken = nullptr;
1135     for (auto &Line : Lines) {
1136       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1137         IncludeGuard = IG_Rejected;
1138         break;
1139       }
1140     }
1141   }
1142 
1143   // In the context of a define, even keywords should be treated as normal
1144   // identifiers. Setting the kind to identifier is not enough, because we need
1145   // to treat additional keywords like __except as well, which are already
1146   // identifiers. Setting the identifier info to null interferes with include
1147   // guard processing above, and changes preprocessing nesting.
1148   FormatTok->Tok.setKind(tok::identifier);
1149   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1150   nextToken();
1151   if (FormatTok->Tok.getKind() == tok::l_paren &&
1152       !FormatTok->hasWhitespaceBefore()) {
1153     parseParens();
1154   }
1155   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1156     Line->Level += PPBranchLevel + 1;
1157   addUnwrappedLine();
1158   ++Line->Level;
1159 
1160   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1161   assert((int)Line->PPLevel >= 0);
1162   Line->InMacroBody = true;
1163 
1164   // Errors during a preprocessor directive can only affect the layout of the
1165   // preprocessor directive, and thus we ignore them. An alternative approach
1166   // would be to use the same approach we use on the file level (no
1167   // re-indentation if there was a structural error) within the macro
1168   // definition.
1169   parseFile();
1170 }
1171 
1172 void UnwrappedLineParser::parsePPPragma() {
1173   Line->InPragmaDirective = true;
1174   parsePPUnknown();
1175 }
1176 
1177 void UnwrappedLineParser::parsePPUnknown() {
1178   do {
1179     nextToken();
1180   } while (!eof());
1181   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1182     Line->Level += PPBranchLevel + 1;
1183   addUnwrappedLine();
1184 }
1185 
1186 // Here we exclude certain tokens that are not usually the first token in an
1187 // unwrapped line. This is used in attempt to distinguish macro calls without
1188 // trailing semicolons from other constructs split to several lines.
1189 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1190   // Semicolon can be a null-statement, l_square can be a start of a macro or
1191   // a C++11 attribute, but this doesn't seem to be common.
1192   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1193          Tok.isNot(TT_AttributeSquare) &&
1194          // Tokens that can only be used as binary operators and a part of
1195          // overloaded operator names.
1196          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1197          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1198          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1199          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1200          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1201          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1202          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1203          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1204          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1205          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1206          Tok.isNot(tok::lesslessequal) &&
1207          // Colon is used in labels, base class lists, initializer lists,
1208          // range-based for loops, ternary operator, but should never be the
1209          // first token in an unwrapped line.
1210          Tok.isNot(tok::colon) &&
1211          // 'noexcept' is a trailing annotation.
1212          Tok.isNot(tok::kw_noexcept);
1213 }
1214 
1215 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1216                           const FormatToken *FormatTok) {
1217   // FIXME: This returns true for C/C++ keywords like 'struct'.
1218   return FormatTok->is(tok::identifier) &&
1219          (!FormatTok->Tok.getIdentifierInfo() ||
1220           !FormatTok->isOneOf(
1221               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1222               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1223               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1224               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1225               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1226               Keywords.kw_instanceof, Keywords.kw_interface,
1227               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1228 }
1229 
1230 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1231                                  const FormatToken *FormatTok) {
1232   return FormatTok->Tok.isLiteral() ||
1233          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1234          mustBeJSIdent(Keywords, FormatTok);
1235 }
1236 
1237 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1238 // when encountered after a value (see mustBeJSIdentOrValue).
1239 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1240                            const FormatToken *FormatTok) {
1241   return FormatTok->isOneOf(
1242       tok::kw_return, Keywords.kw_yield,
1243       // conditionals
1244       tok::kw_if, tok::kw_else,
1245       // loops
1246       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1247       // switch/case
1248       tok::kw_switch, tok::kw_case,
1249       // exceptions
1250       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1251       // declaration
1252       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1253       Keywords.kw_async, Keywords.kw_function,
1254       // import/export
1255       Keywords.kw_import, tok::kw_export);
1256 }
1257 
1258 // Checks whether a token is a type in K&R C (aka C78).
1259 static bool isC78Type(const FormatToken &Tok) {
1260   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1261                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1262                      tok::identifier);
1263 }
1264 
1265 // This function checks whether a token starts the first parameter declaration
1266 // in a K&R C (aka C78) function definition, e.g.:
1267 //   int f(a, b)
1268 //   short a, b;
1269 //   {
1270 //      return a + b;
1271 //   }
1272 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1273                                const FormatToken *FuncName) {
1274   assert(Tok);
1275   assert(Next);
1276   assert(FuncName);
1277 
1278   if (FuncName->isNot(tok::identifier))
1279     return false;
1280 
1281   const FormatToken *Prev = FuncName->Previous;
1282   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1283     return false;
1284 
1285   if (!isC78Type(*Tok) &&
1286       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1287     return false;
1288   }
1289 
1290   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1291     return false;
1292 
1293   Tok = Tok->Previous;
1294   if (!Tok || Tok->isNot(tok::r_paren))
1295     return false;
1296 
1297   Tok = Tok->Previous;
1298   if (!Tok || Tok->isNot(tok::identifier))
1299     return false;
1300 
1301   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1302 }
1303 
1304 bool UnwrappedLineParser::parseModuleImport() {
1305   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1306 
1307   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1308       !Token->Tok.getIdentifierInfo() &&
1309       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1310     return false;
1311   }
1312 
1313   nextToken();
1314   while (!eof()) {
1315     if (FormatTok->is(tok::colon)) {
1316       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1317     }
1318     // Handle import <foo/bar.h> as we would an include statement.
1319     else if (FormatTok->is(tok::less)) {
1320       nextToken();
1321       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1322         // Mark tokens up to the trailing line comments as implicit string
1323         // literals.
1324         if (FormatTok->isNot(tok::comment) &&
1325             !FormatTok->TokenText.startswith("//")) {
1326           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1327         }
1328         nextToken();
1329       }
1330     }
1331     if (FormatTok->is(tok::semi)) {
1332       nextToken();
1333       break;
1334     }
1335     nextToken();
1336   }
1337 
1338   addUnwrappedLine();
1339   return true;
1340 }
1341 
1342 // readTokenWithJavaScriptASI reads the next token and terminates the current
1343 // line if JavaScript Automatic Semicolon Insertion must
1344 // happen between the current token and the next token.
1345 //
1346 // This method is conservative - it cannot cover all edge cases of JavaScript,
1347 // but only aims to correctly handle certain well known cases. It *must not*
1348 // return true in speculative cases.
1349 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1350   FormatToken *Previous = FormatTok;
1351   readToken();
1352   FormatToken *Next = FormatTok;
1353 
1354   bool IsOnSameLine =
1355       CommentsBeforeNextToken.empty()
1356           ? Next->NewlinesBefore == 0
1357           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1358   if (IsOnSameLine)
1359     return;
1360 
1361   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1362   bool PreviousStartsTemplateExpr =
1363       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1364   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1365     // If the line contains an '@' sign, the previous token might be an
1366     // annotation, which can precede another identifier/value.
1367     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1368       return LineNode.Tok->is(tok::at);
1369     });
1370     if (HasAt)
1371       return;
1372   }
1373   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1374     return addUnwrappedLine();
1375   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1376   bool NextEndsTemplateExpr =
1377       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1378   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1379       (PreviousMustBeValue ||
1380        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1381                          tok::minusminus))) {
1382     return addUnwrappedLine();
1383   }
1384   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1385       isJSDeclOrStmt(Keywords, Next)) {
1386     return addUnwrappedLine();
1387   }
1388 }
1389 
1390 void UnwrappedLineParser::parseStructuralElement(
1391     bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1392     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1393   if (Style.Language == FormatStyle::LK_TableGen &&
1394       FormatTok->is(tok::pp_include)) {
1395     nextToken();
1396     if (FormatTok->is(tok::string_literal))
1397       nextToken();
1398     addUnwrappedLine();
1399     return;
1400   }
1401 
1402   if (Style.isVerilog()) {
1403     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1404       parseForOrWhileLoop(/*HasParens=*/false);
1405       return;
1406     }
1407     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1408       parseForOrWhileLoop();
1409       return;
1410     }
1411     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1412                            Keywords.kw_assume, Keywords.kw_cover)) {
1413       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1414       return;
1415     }
1416 
1417     // Skip things that can exist before keywords like 'if' and 'case'.
1418     while (true) {
1419       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1420                              Keywords.kw_unique0)) {
1421         nextToken();
1422       } else if (FormatTok->is(tok::l_paren) &&
1423                  Tokens->peekNextToken()->is(tok::star)) {
1424         parseParens();
1425       } else {
1426         break;
1427       }
1428     }
1429   }
1430 
1431   // Tokens that only make sense at the beginning of a line.
1432   switch (FormatTok->Tok.getKind()) {
1433   case tok::kw_asm:
1434     nextToken();
1435     if (FormatTok->is(tok::l_brace)) {
1436       FormatTok->setFinalizedType(TT_InlineASMBrace);
1437       nextToken();
1438       while (FormatTok && !eof()) {
1439         if (FormatTok->is(tok::r_brace)) {
1440           FormatTok->setFinalizedType(TT_InlineASMBrace);
1441           nextToken();
1442           addUnwrappedLine();
1443           break;
1444         }
1445         FormatTok->Finalized = true;
1446         nextToken();
1447       }
1448     }
1449     break;
1450   case tok::kw_namespace:
1451     parseNamespace();
1452     return;
1453   case tok::kw_public:
1454   case tok::kw_protected:
1455   case tok::kw_private:
1456     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1457         Style.isCSharp()) {
1458       nextToken();
1459     } else {
1460       parseAccessSpecifier();
1461     }
1462     return;
1463   case tok::kw_if: {
1464     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1465       // field/method declaration.
1466       break;
1467     }
1468     FormatToken *Tok = parseIfThenElse(IfKind);
1469     if (IfLeftBrace)
1470       *IfLeftBrace = Tok;
1471     return;
1472   }
1473   case tok::kw_for:
1474   case tok::kw_while:
1475     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1476       // field/method declaration.
1477       break;
1478     }
1479     parseForOrWhileLoop();
1480     return;
1481   case tok::kw_do:
1482     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1483       // field/method declaration.
1484       break;
1485     }
1486     parseDoWhile();
1487     if (HasDoWhile)
1488       *HasDoWhile = true;
1489     return;
1490   case tok::kw_switch:
1491     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1492       // 'switch: string' field declaration.
1493       break;
1494     }
1495     parseSwitch();
1496     return;
1497   case tok::kw_default:
1498     // In Verilog default along with other labels are handled in the next loop.
1499     if (Style.isVerilog())
1500       break;
1501     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1502       // 'default: string' field declaration.
1503       break;
1504     }
1505     nextToken();
1506     if (FormatTok->is(tok::colon)) {
1507       FormatTok->setFinalizedType(TT_CaseLabelColon);
1508       parseLabel();
1509       return;
1510     }
1511     // e.g. "default void f() {}" in a Java interface.
1512     break;
1513   case tok::kw_case:
1514     // Proto: there are no switch/case statements.
1515     if (Style.isProto()) {
1516       nextToken();
1517       return;
1518     }
1519     if (Style.isVerilog()) {
1520       parseBlock();
1521       addUnwrappedLine();
1522       return;
1523     }
1524     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1525       // 'case: string' field declaration.
1526       nextToken();
1527       break;
1528     }
1529     parseCaseLabel();
1530     return;
1531   case tok::kw_try:
1532   case tok::kw___try:
1533     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534       // field/method declaration.
1535       break;
1536     }
1537     parseTryCatch();
1538     return;
1539   case tok::kw_extern:
1540     nextToken();
1541     if (Style.isVerilog()) {
1542       // In Verilog and extern module declaration looks like a start of module.
1543       // But there is no body and endmodule. So we handle it separately.
1544       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1545         parseVerilogHierarchyHeader();
1546         return;
1547       }
1548     } else if (FormatTok->is(tok::string_literal)) {
1549       nextToken();
1550       if (FormatTok->is(tok::l_brace)) {
1551         if (Style.BraceWrapping.AfterExternBlock)
1552           addUnwrappedLine();
1553         // Either we indent or for backwards compatibility we follow the
1554         // AfterExternBlock style.
1555         unsigned AddLevels =
1556             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1557                     (Style.BraceWrapping.AfterExternBlock &&
1558                      Style.IndentExternBlock ==
1559                          FormatStyle::IEBS_AfterExternBlock)
1560                 ? 1u
1561                 : 0u;
1562         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1563         addUnwrappedLine();
1564         return;
1565       }
1566     }
1567     break;
1568   case tok::kw_export:
1569     if (Style.isJavaScript()) {
1570       parseJavaScriptEs6ImportExport();
1571       return;
1572     }
1573     if (Style.isCpp()) {
1574       nextToken();
1575       if (FormatTok->is(tok::kw_namespace)) {
1576         parseNamespace();
1577         return;
1578       }
1579       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1580         return;
1581     }
1582     break;
1583   case tok::kw_inline:
1584     nextToken();
1585     if (FormatTok->is(tok::kw_namespace)) {
1586       parseNamespace();
1587       return;
1588     }
1589     break;
1590   case tok::identifier:
1591     if (FormatTok->is(TT_ForEachMacro)) {
1592       parseForOrWhileLoop();
1593       return;
1594     }
1595     if (FormatTok->is(TT_MacroBlockBegin)) {
1596       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1597                  /*MunchSemi=*/false);
1598       return;
1599     }
1600     if (FormatTok->is(Keywords.kw_import)) {
1601       if (Style.isJavaScript()) {
1602         parseJavaScriptEs6ImportExport();
1603         return;
1604       }
1605       if (Style.Language == FormatStyle::LK_Proto) {
1606         nextToken();
1607         if (FormatTok->is(tok::kw_public))
1608           nextToken();
1609         if (!FormatTok->is(tok::string_literal))
1610           return;
1611         nextToken();
1612         if (FormatTok->is(tok::semi))
1613           nextToken();
1614         addUnwrappedLine();
1615         return;
1616       }
1617       if (Style.isCpp() && parseModuleImport())
1618         return;
1619     }
1620     if (Style.isCpp() &&
1621         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1622                            Keywords.kw_slots, Keywords.kw_qslots)) {
1623       nextToken();
1624       if (FormatTok->is(tok::colon)) {
1625         nextToken();
1626         addUnwrappedLine();
1627         return;
1628       }
1629     }
1630     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1631       parseStatementMacro();
1632       return;
1633     }
1634     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1635       parseNamespace();
1636       return;
1637     }
1638     // In all other cases, parse the declaration.
1639     break;
1640   default:
1641     break;
1642   }
1643   do {
1644     const FormatToken *Previous = FormatTok->Previous;
1645     switch (FormatTok->Tok.getKind()) {
1646     case tok::at:
1647       nextToken();
1648       if (FormatTok->is(tok::l_brace)) {
1649         nextToken();
1650         parseBracedList();
1651         break;
1652       } else if (Style.Language == FormatStyle::LK_Java &&
1653                  FormatTok->is(Keywords.kw_interface)) {
1654         nextToken();
1655         break;
1656       }
1657       switch (FormatTok->Tok.getObjCKeywordID()) {
1658       case tok::objc_public:
1659       case tok::objc_protected:
1660       case tok::objc_package:
1661       case tok::objc_private:
1662         return parseAccessSpecifier();
1663       case tok::objc_interface:
1664       case tok::objc_implementation:
1665         return parseObjCInterfaceOrImplementation();
1666       case tok::objc_protocol:
1667         if (parseObjCProtocol())
1668           return;
1669         break;
1670       case tok::objc_end:
1671         return; // Handled by the caller.
1672       case tok::objc_optional:
1673       case tok::objc_required:
1674         nextToken();
1675         addUnwrappedLine();
1676         return;
1677       case tok::objc_autoreleasepool:
1678         nextToken();
1679         if (FormatTok->is(tok::l_brace)) {
1680           if (Style.BraceWrapping.AfterControlStatement ==
1681               FormatStyle::BWACS_Always) {
1682             addUnwrappedLine();
1683           }
1684           parseBlock();
1685         }
1686         addUnwrappedLine();
1687         return;
1688       case tok::objc_synchronized:
1689         nextToken();
1690         if (FormatTok->is(tok::l_paren)) {
1691           // Skip synchronization object
1692           parseParens();
1693         }
1694         if (FormatTok->is(tok::l_brace)) {
1695           if (Style.BraceWrapping.AfterControlStatement ==
1696               FormatStyle::BWACS_Always) {
1697             addUnwrappedLine();
1698           }
1699           parseBlock();
1700         }
1701         addUnwrappedLine();
1702         return;
1703       case tok::objc_try:
1704         // This branch isn't strictly necessary (the kw_try case below would
1705         // do this too after the tok::at is parsed above).  But be explicit.
1706         parseTryCatch();
1707         return;
1708       default:
1709         break;
1710       }
1711       break;
1712     case tok::kw_requires: {
1713       if (Style.isCpp()) {
1714         bool ParsedClause = parseRequires();
1715         if (ParsedClause)
1716           return;
1717       } else {
1718         nextToken();
1719       }
1720       break;
1721     }
1722     case tok::kw_enum:
1723       // Ignore if this is part of "template <enum ...".
1724       if (Previous && Previous->is(tok::less)) {
1725         nextToken();
1726         break;
1727       }
1728 
1729       // parseEnum falls through and does not yet add an unwrapped line as an
1730       // enum definition can start a structural element.
1731       if (!parseEnum())
1732         break;
1733       // This only applies to C++ and Verilog.
1734       if (!Style.isCpp() && !Style.isVerilog()) {
1735         addUnwrappedLine();
1736         return;
1737       }
1738       break;
1739     case tok::kw_typedef:
1740       nextToken();
1741       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1742                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1743                              Keywords.kw_CF_CLOSED_ENUM,
1744                              Keywords.kw_NS_CLOSED_ENUM)) {
1745         parseEnum();
1746       }
1747       break;
1748     case tok::kw_class:
1749       if (Style.isVerilog()) {
1750         parseBlock();
1751         addUnwrappedLine();
1752         return;
1753       }
1754       [[fallthrough]];
1755     case tok::kw_struct:
1756     case tok::kw_union:
1757       if (parseStructLike())
1758         return;
1759       break;
1760     case tok::period:
1761       nextToken();
1762       // In Java, classes have an implicit static member "class".
1763       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1764           FormatTok->is(tok::kw_class)) {
1765         nextToken();
1766       }
1767       if (Style.isJavaScript() && FormatTok &&
1768           FormatTok->Tok.getIdentifierInfo()) {
1769         // JavaScript only has pseudo keywords, all keywords are allowed to
1770         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1771         nextToken();
1772       }
1773       break;
1774     case tok::semi:
1775       nextToken();
1776       addUnwrappedLine();
1777       return;
1778     case tok::r_brace:
1779       addUnwrappedLine();
1780       return;
1781     case tok::l_paren: {
1782       parseParens();
1783       // Break the unwrapped line if a K&R C function definition has a parameter
1784       // declaration.
1785       if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
1786         break;
1787       if (isC78ParameterDecl(FormatTok,
1788                              Tokens->peekNextToken(/*SkipComment=*/true),
1789                              Previous)) {
1790         addUnwrappedLine();
1791         return;
1792       }
1793       break;
1794     }
1795     case tok::kw_operator:
1796       nextToken();
1797       if (FormatTok->isBinaryOperator())
1798         nextToken();
1799       break;
1800     case tok::caret:
1801       nextToken();
1802       // Block return type.
1803       if (FormatTok->Tok.isAnyIdentifier() ||
1804           FormatTok->isSimpleTypeSpecifier()) {
1805         nextToken();
1806         // Return types: pointers are ok too.
1807         while (FormatTok->is(tok::star))
1808           nextToken();
1809       }
1810       // Block argument list.
1811       if (FormatTok->is(tok::l_paren))
1812         parseParens();
1813       // Block body.
1814       if (FormatTok->is(tok::l_brace))
1815         parseChildBlock();
1816       break;
1817     case tok::l_brace:
1818       if (NextLBracesType != TT_Unknown)
1819         FormatTok->setFinalizedType(NextLBracesType);
1820       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1821         // A block outside of parentheses must be the last part of a
1822         // structural element.
1823         // FIXME: Figure out cases where this is not true, and add projections
1824         // for them (the one we know is missing are lambdas).
1825         if (Style.Language == FormatStyle::LK_Java &&
1826             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1827           // If necessary, we could set the type to something different than
1828           // TT_FunctionLBrace.
1829           if (Style.BraceWrapping.AfterControlStatement ==
1830               FormatStyle::BWACS_Always) {
1831             addUnwrappedLine();
1832           }
1833         } else if (Style.BraceWrapping.AfterFunction) {
1834           addUnwrappedLine();
1835         }
1836         FormatTok->setFinalizedType(TT_FunctionLBrace);
1837         parseBlock();
1838         addUnwrappedLine();
1839         return;
1840       }
1841       // Otherwise this was a braced init list, and the structural
1842       // element continues.
1843       break;
1844     case tok::kw_try:
1845       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1846         // field/method declaration.
1847         nextToken();
1848         break;
1849       }
1850       // We arrive here when parsing function-try blocks.
1851       if (Style.BraceWrapping.AfterFunction)
1852         addUnwrappedLine();
1853       parseTryCatch();
1854       return;
1855     case tok::identifier: {
1856       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1857           Line->MustBeDeclaration) {
1858         addUnwrappedLine();
1859         parseCSharpGenericTypeConstraint();
1860         break;
1861       }
1862       if (FormatTok->is(TT_MacroBlockEnd)) {
1863         addUnwrappedLine();
1864         return;
1865       }
1866 
1867       // Function declarations (as opposed to function expressions) are parsed
1868       // on their own unwrapped line by continuing this loop. Function
1869       // expressions (functions that are not on their own line) must not create
1870       // a new unwrapped line, so they are special cased below.
1871       size_t TokenCount = Line->Tokens.size();
1872       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1873           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1874                                                      Keywords.kw_async)))) {
1875         tryToParseJSFunction();
1876         break;
1877       }
1878       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1879           FormatTok->is(Keywords.kw_interface)) {
1880         if (Style.isJavaScript()) {
1881           // In JavaScript/TypeScript, "interface" can be used as a standalone
1882           // identifier, e.g. in `var interface = 1;`. If "interface" is
1883           // followed by another identifier, it is very like to be an actual
1884           // interface declaration.
1885           unsigned StoredPosition = Tokens->getPosition();
1886           FormatToken *Next = Tokens->getNextToken();
1887           FormatTok = Tokens->setPosition(StoredPosition);
1888           if (!mustBeJSIdent(Keywords, Next)) {
1889             nextToken();
1890             break;
1891           }
1892         }
1893         parseRecord();
1894         addUnwrappedLine();
1895         return;
1896       }
1897 
1898       if (Style.isVerilog()) {
1899         if (FormatTok->is(Keywords.kw_table)) {
1900           parseVerilogTable();
1901           return;
1902         }
1903         if (Keywords.isVerilogBegin(*FormatTok) ||
1904             Keywords.isVerilogHierarchy(*FormatTok)) {
1905           parseBlock();
1906           addUnwrappedLine();
1907           return;
1908         }
1909       }
1910 
1911       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1912         if (parseStructLike())
1913           return;
1914         break;
1915       }
1916 
1917       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1918         parseStatementMacro();
1919         return;
1920       }
1921 
1922       // See if the following token should start a new unwrapped line.
1923       StringRef Text = FormatTok->TokenText;
1924 
1925       FormatToken *PreviousToken = FormatTok;
1926       nextToken();
1927 
1928       // JS doesn't have macros, and within classes colons indicate fields, not
1929       // labels.
1930       if (Style.isJavaScript())
1931         break;
1932 
1933       auto OneTokenSoFar = [&]() {
1934         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1935         while (I != E && I->Tok->is(tok::comment))
1936           ++I;
1937         while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1938           ++I;
1939         return I != E && (++I == E);
1940       };
1941       if (OneTokenSoFar()) {
1942         // In Verilog labels can be any expression, so we don't do them here.
1943         if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
1944             !Line->MustBeDeclaration) {
1945           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1946           FormatTok->setFinalizedType(TT_GotoLabelColon);
1947           parseLabel(!Style.IndentGotoLabels);
1948           if (HasLabel)
1949             *HasLabel = true;
1950           return;
1951         }
1952         // Recognize function-like macro usages without trailing semicolon as
1953         // well as free-standing macros like Q_OBJECT.
1954         bool FunctionLike = FormatTok->is(tok::l_paren);
1955         if (FunctionLike)
1956           parseParens();
1957 
1958         bool FollowedByNewline =
1959             CommentsBeforeNextToken.empty()
1960                 ? FormatTok->NewlinesBefore > 0
1961                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1962 
1963         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1964             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1965           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
1966             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1967           addUnwrappedLine();
1968           return;
1969         }
1970       }
1971       break;
1972     }
1973     case tok::equal:
1974       if ((Style.isJavaScript() || Style.isCSharp()) &&
1975           FormatTok->is(TT_FatArrow)) {
1976         tryToParseChildBlock();
1977         break;
1978       }
1979 
1980       nextToken();
1981       if (FormatTok->is(tok::l_brace)) {
1982         // Block kind should probably be set to BK_BracedInit for any language.
1983         // C# needs this change to ensure that array initialisers and object
1984         // initialisers are indented the same way.
1985         if (Style.isCSharp())
1986           FormatTok->setBlockKind(BK_BracedInit);
1987         nextToken();
1988         parseBracedList();
1989       } else if (Style.Language == FormatStyle::LK_Proto &&
1990                  FormatTok->is(tok::less)) {
1991         nextToken();
1992         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1993                         /*ClosingBraceKind=*/tok::greater);
1994       }
1995       break;
1996     case tok::l_square:
1997       parseSquare();
1998       break;
1999     case tok::kw_new:
2000       parseNew();
2001       break;
2002     case tok::kw_case:
2003       // Proto: there are no switch/case statements.
2004       if (Style.isProto()) {
2005         nextToken();
2006         return;
2007       }
2008       // In Verilog switch is called case.
2009       if (Style.isVerilog()) {
2010         parseBlock();
2011         addUnwrappedLine();
2012         return;
2013       }
2014       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2015         // 'case: string' field declaration.
2016         nextToken();
2017         break;
2018       }
2019       parseCaseLabel();
2020       break;
2021     case tok::kw_default:
2022       nextToken();
2023       if (Style.isVerilog()) {
2024         if (FormatTok->is(tok::colon)) {
2025           // The label will be handled in the next iteration.
2026           break;
2027         }
2028         if (FormatTok->is(Keywords.kw_clocking)) {
2029           // A default clocking block.
2030           parseBlock();
2031           addUnwrappedLine();
2032           return;
2033         }
2034         parseVerilogCaseLabel();
2035         return;
2036       }
2037       break;
2038     case tok::colon:
2039       nextToken();
2040       if (Style.isVerilog()) {
2041         parseVerilogCaseLabel();
2042         return;
2043       }
2044       break;
2045     default:
2046       nextToken();
2047       break;
2048     }
2049   } while (!eof());
2050 }
2051 
2052 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2053   assert(FormatTok->is(tok::l_brace));
2054   if (!Style.isCSharp())
2055     return false;
2056   // See if it's a property accessor.
2057   if (FormatTok->Previous->isNot(tok::identifier))
2058     return false;
2059 
2060   // See if we are inside a property accessor.
2061   //
2062   // Record the current tokenPosition so that we can advance and
2063   // reset the current token. `Next` is not set yet so we need
2064   // another way to advance along the token stream.
2065   unsigned int StoredPosition = Tokens->getPosition();
2066   FormatToken *Tok = Tokens->getNextToken();
2067 
2068   // A trivial property accessor is of the form:
2069   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2070   // Track these as they do not require line breaks to be introduced.
2071   bool HasSpecialAccessor = false;
2072   bool IsTrivialPropertyAccessor = true;
2073   while (!eof()) {
2074     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2075                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2076                      Keywords.kw_init, Keywords.kw_set)) {
2077       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2078         HasSpecialAccessor = true;
2079       Tok = Tokens->getNextToken();
2080       continue;
2081     }
2082     if (Tok->isNot(tok::r_brace))
2083       IsTrivialPropertyAccessor = false;
2084     break;
2085   }
2086 
2087   if (!HasSpecialAccessor) {
2088     Tokens->setPosition(StoredPosition);
2089     return false;
2090   }
2091 
2092   // Try to parse the property accessor:
2093   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2094   Tokens->setPosition(StoredPosition);
2095   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2096     addUnwrappedLine();
2097   nextToken();
2098   do {
2099     switch (FormatTok->Tok.getKind()) {
2100     case tok::r_brace:
2101       nextToken();
2102       if (FormatTok->is(tok::equal)) {
2103         while (!eof() && FormatTok->isNot(tok::semi))
2104           nextToken();
2105         nextToken();
2106       }
2107       addUnwrappedLine();
2108       return true;
2109     case tok::l_brace:
2110       ++Line->Level;
2111       parseBlock(/*MustBeDeclaration=*/true);
2112       addUnwrappedLine();
2113       --Line->Level;
2114       break;
2115     case tok::equal:
2116       if (FormatTok->is(TT_FatArrow)) {
2117         ++Line->Level;
2118         do {
2119           nextToken();
2120         } while (!eof() && FormatTok->isNot(tok::semi));
2121         nextToken();
2122         addUnwrappedLine();
2123         --Line->Level;
2124         break;
2125       }
2126       nextToken();
2127       break;
2128     default:
2129       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2130                              Keywords.kw_set) &&
2131           !IsTrivialPropertyAccessor) {
2132         // Non-trivial get/set needs to be on its own line.
2133         addUnwrappedLine();
2134       }
2135       nextToken();
2136     }
2137   } while (!eof());
2138 
2139   // Unreachable for well-formed code (paired '{' and '}').
2140   return true;
2141 }
2142 
2143 bool UnwrappedLineParser::tryToParseLambda() {
2144   assert(FormatTok->is(tok::l_square));
2145   if (!Style.isCpp()) {
2146     nextToken();
2147     return false;
2148   }
2149   FormatToken &LSquare = *FormatTok;
2150   if (!tryToParseLambdaIntroducer())
2151     return false;
2152 
2153   bool SeenArrow = false;
2154   bool InTemplateParameterList = false;
2155 
2156   while (FormatTok->isNot(tok::l_brace)) {
2157     if (FormatTok->isSimpleTypeSpecifier()) {
2158       nextToken();
2159       continue;
2160     }
2161     switch (FormatTok->Tok.getKind()) {
2162     case tok::l_brace:
2163       break;
2164     case tok::l_paren:
2165       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2166       break;
2167     case tok::l_square:
2168       parseSquare();
2169       break;
2170     case tok::less:
2171       assert(FormatTok->Previous);
2172       if (FormatTok->Previous->is(tok::r_square))
2173         InTemplateParameterList = true;
2174       nextToken();
2175       break;
2176     case tok::kw_auto:
2177     case tok::kw_class:
2178     case tok::kw_template:
2179     case tok::kw_typename:
2180     case tok::amp:
2181     case tok::star:
2182     case tok::kw_const:
2183     case tok::kw_constexpr:
2184     case tok::kw_consteval:
2185     case tok::comma:
2186     case tok::greater:
2187     case tok::identifier:
2188     case tok::numeric_constant:
2189     case tok::coloncolon:
2190     case tok::kw_mutable:
2191     case tok::kw_noexcept:
2192     case tok::kw_static:
2193       nextToken();
2194       break;
2195     // Specialization of a template with an integer parameter can contain
2196     // arithmetic, logical, comparison and ternary operators.
2197     //
2198     // FIXME: This also accepts sequences of operators that are not in the scope
2199     // of a template argument list.
2200     //
2201     // In a C++ lambda a template type can only occur after an arrow. We use
2202     // this as an heuristic to distinguish between Objective-C expressions
2203     // followed by an `a->b` expression, such as:
2204     // ([obj func:arg] + a->b)
2205     // Otherwise the code below would parse as a lambda.
2206     //
2207     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2208     // explicit template lists: []<bool b = true && false>(U &&u){}
2209     case tok::plus:
2210     case tok::minus:
2211     case tok::exclaim:
2212     case tok::tilde:
2213     case tok::slash:
2214     case tok::percent:
2215     case tok::lessless:
2216     case tok::pipe:
2217     case tok::pipepipe:
2218     case tok::ampamp:
2219     case tok::caret:
2220     case tok::equalequal:
2221     case tok::exclaimequal:
2222     case tok::greaterequal:
2223     case tok::lessequal:
2224     case tok::question:
2225     case tok::colon:
2226     case tok::ellipsis:
2227     case tok::kw_true:
2228     case tok::kw_false:
2229       if (SeenArrow || InTemplateParameterList) {
2230         nextToken();
2231         break;
2232       }
2233       return true;
2234     case tok::arrow:
2235       // This might or might not actually be a lambda arrow (this could be an
2236       // ObjC method invocation followed by a dereferencing arrow). We might
2237       // reset this back to TT_Unknown in TokenAnnotator.
2238       FormatTok->setFinalizedType(TT_LambdaArrow);
2239       SeenArrow = true;
2240       nextToken();
2241       break;
2242     case tok::kw_requires: {
2243       auto *RequiresToken = FormatTok;
2244       nextToken();
2245       parseRequiresClause(RequiresToken);
2246       break;
2247     }
2248     default:
2249       return true;
2250     }
2251   }
2252   FormatTok->setFinalizedType(TT_LambdaLBrace);
2253   LSquare.setFinalizedType(TT_LambdaLSquare);
2254   parseChildBlock();
2255   return true;
2256 }
2257 
2258 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2259   const FormatToken *Previous = FormatTok->Previous;
2260   const FormatToken *LeftSquare = FormatTok;
2261   nextToken();
2262   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2263                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2264                                         tok::kw_co_yield, tok::kw_co_return)) ||
2265                     Previous->closesScope())) ||
2266       LeftSquare->isCppStructuredBinding(Style)) {
2267     return false;
2268   }
2269   if (FormatTok->is(tok::l_square))
2270     return false;
2271   if (FormatTok->is(tok::r_square)) {
2272     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2273     if (Next->is(tok::greater))
2274       return false;
2275   }
2276   parseSquare(/*LambdaIntroducer=*/true);
2277   return true;
2278 }
2279 
2280 void UnwrappedLineParser::tryToParseJSFunction() {
2281   assert(FormatTok->is(Keywords.kw_function) ||
2282          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2283   if (FormatTok->is(Keywords.kw_async))
2284     nextToken();
2285   // Consume "function".
2286   nextToken();
2287 
2288   // Consume * (generator function). Treat it like C++'s overloaded operators.
2289   if (FormatTok->is(tok::star)) {
2290     FormatTok->setFinalizedType(TT_OverloadedOperator);
2291     nextToken();
2292   }
2293 
2294   // Consume function name.
2295   if (FormatTok->is(tok::identifier))
2296     nextToken();
2297 
2298   if (FormatTok->isNot(tok::l_paren))
2299     return;
2300 
2301   // Parse formal parameter list.
2302   parseParens();
2303 
2304   if (FormatTok->is(tok::colon)) {
2305     // Parse a type definition.
2306     nextToken();
2307 
2308     // Eat the type declaration. For braced inline object types, balance braces,
2309     // otherwise just parse until finding an l_brace for the function body.
2310     if (FormatTok->is(tok::l_brace))
2311       tryToParseBracedList();
2312     else
2313       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2314         nextToken();
2315   }
2316 
2317   if (FormatTok->is(tok::semi))
2318     return;
2319 
2320   parseChildBlock();
2321 }
2322 
2323 bool UnwrappedLineParser::tryToParseBracedList() {
2324   if (FormatTok->is(BK_Unknown))
2325     calculateBraceTypes();
2326   assert(FormatTok->isNot(BK_Unknown));
2327   if (FormatTok->is(BK_Block))
2328     return false;
2329   nextToken();
2330   parseBracedList();
2331   return true;
2332 }
2333 
2334 bool UnwrappedLineParser::tryToParseChildBlock() {
2335   assert(Style.isJavaScript() || Style.isCSharp());
2336   assert(FormatTok->is(TT_FatArrow));
2337   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2338   // They always start an expression or a child block if followed by a curly
2339   // brace.
2340   nextToken();
2341   if (FormatTok->isNot(tok::l_brace))
2342     return false;
2343   parseChildBlock();
2344   return true;
2345 }
2346 
2347 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2348                                           bool IsEnum,
2349                                           tok::TokenKind ClosingBraceKind) {
2350   bool HasError = false;
2351 
2352   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2353   // replace this by using parseAssignmentExpression() inside.
2354   do {
2355     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2356         tryToParseChildBlock()) {
2357       continue;
2358     }
2359     if (Style.isJavaScript()) {
2360       if (FormatTok->is(Keywords.kw_function) ||
2361           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2362         tryToParseJSFunction();
2363         continue;
2364       }
2365       if (FormatTok->is(tok::l_brace)) {
2366         // Could be a method inside of a braced list `{a() { return 1; }}`.
2367         if (tryToParseBracedList())
2368           continue;
2369         parseChildBlock();
2370       }
2371     }
2372     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2373       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2374         addUnwrappedLine();
2375       nextToken();
2376       return !HasError;
2377     }
2378     switch (FormatTok->Tok.getKind()) {
2379     case tok::l_square:
2380       if (Style.isCSharp())
2381         parseSquare();
2382       else
2383         tryToParseLambda();
2384       break;
2385     case tok::l_paren:
2386       parseParens();
2387       // JavaScript can just have free standing methods and getters/setters in
2388       // object literals. Detect them by a "{" following ")".
2389       if (Style.isJavaScript()) {
2390         if (FormatTok->is(tok::l_brace))
2391           parseChildBlock();
2392         break;
2393       }
2394       break;
2395     case tok::l_brace:
2396       // Assume there are no blocks inside a braced init list apart
2397       // from the ones we explicitly parse out (like lambdas).
2398       FormatTok->setBlockKind(BK_BracedInit);
2399       nextToken();
2400       parseBracedList();
2401       break;
2402     case tok::less:
2403       if (Style.Language == FormatStyle::LK_Proto ||
2404           ClosingBraceKind == tok::greater) {
2405         nextToken();
2406         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2407                         /*ClosingBraceKind=*/tok::greater);
2408       } else {
2409         nextToken();
2410       }
2411       break;
2412     case tok::semi:
2413       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2414       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2415       // used for error recovery if we have otherwise determined that this is
2416       // a braced list.
2417       if (Style.isJavaScript()) {
2418         nextToken();
2419         break;
2420       }
2421       HasError = true;
2422       if (!ContinueOnSemicolons)
2423         return !HasError;
2424       nextToken();
2425       break;
2426     case tok::comma:
2427       nextToken();
2428       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2429         addUnwrappedLine();
2430       break;
2431     default:
2432       nextToken();
2433       break;
2434     }
2435   } while (!eof());
2436   return false;
2437 }
2438 
2439 /// \brief Parses a pair of parentheses (and everything between them).
2440 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2441 /// double ampersands. This applies for all nested scopes as well.
2442 ///
2443 /// Returns whether there is a `=` token between the parentheses.
2444 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2445   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2446   auto *LeftParen = FormatTok;
2447   bool SeenEqual = false;
2448   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2449   nextToken();
2450   do {
2451     switch (FormatTok->Tok.getKind()) {
2452     case tok::l_paren:
2453       if (parseParens(AmpAmpTokenType))
2454         SeenEqual = true;
2455       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2456         parseChildBlock();
2457       break;
2458     case tok::r_paren:
2459       if (!MightBeStmtExpr &&
2460           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2461         const auto *Prev = LeftParen->Previous;
2462         const auto *Next = Tokens->peekNextToken();
2463         const bool DoubleParens =
2464             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2465         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2466         const bool Blacklisted =
2467             PrevPrev &&
2468             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2469              (SeenEqual &&
2470               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2471                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2472         const bool ReturnParens =
2473             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2474             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2475             Next->is(tok::semi);
2476         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2477           LeftParen->Optional = true;
2478           FormatTok->Optional = true;
2479         }
2480       }
2481       nextToken();
2482       return SeenEqual;
2483     case tok::r_brace:
2484       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2485       return SeenEqual;
2486     case tok::l_square:
2487       tryToParseLambda();
2488       break;
2489     case tok::l_brace:
2490       if (!tryToParseBracedList())
2491         parseChildBlock();
2492       break;
2493     case tok::at:
2494       nextToken();
2495       if (FormatTok->is(tok::l_brace)) {
2496         nextToken();
2497         parseBracedList();
2498       }
2499       break;
2500     case tok::equal:
2501       SeenEqual = true;
2502       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2503         tryToParseChildBlock();
2504       else
2505         nextToken();
2506       break;
2507     case tok::kw_class:
2508       if (Style.isJavaScript())
2509         parseRecord(/*ParseAsExpr=*/true);
2510       else
2511         nextToken();
2512       break;
2513     case tok::identifier:
2514       if (Style.isJavaScript() &&
2515           (FormatTok->is(Keywords.kw_function) ||
2516            FormatTok->startsSequence(Keywords.kw_async,
2517                                      Keywords.kw_function))) {
2518         tryToParseJSFunction();
2519       } else {
2520         nextToken();
2521       }
2522       break;
2523     case tok::kw_requires: {
2524       auto RequiresToken = FormatTok;
2525       nextToken();
2526       parseRequiresExpression(RequiresToken);
2527       break;
2528     }
2529     case tok::ampamp:
2530       if (AmpAmpTokenType != TT_Unknown)
2531         FormatTok->setFinalizedType(AmpAmpTokenType);
2532       [[fallthrough]];
2533     default:
2534       nextToken();
2535       break;
2536     }
2537   } while (!eof());
2538   return SeenEqual;
2539 }
2540 
2541 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2542   if (!LambdaIntroducer) {
2543     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2544     if (tryToParseLambda())
2545       return;
2546   }
2547   do {
2548     switch (FormatTok->Tok.getKind()) {
2549     case tok::l_paren:
2550       parseParens();
2551       break;
2552     case tok::r_square:
2553       nextToken();
2554       return;
2555     case tok::r_brace:
2556       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2557       return;
2558     case tok::l_square:
2559       parseSquare();
2560       break;
2561     case tok::l_brace: {
2562       if (!tryToParseBracedList())
2563         parseChildBlock();
2564       break;
2565     }
2566     case tok::at:
2567       nextToken();
2568       if (FormatTok->is(tok::l_brace)) {
2569         nextToken();
2570         parseBracedList();
2571       }
2572       break;
2573     default:
2574       nextToken();
2575       break;
2576     }
2577   } while (!eof());
2578 }
2579 
2580 void UnwrappedLineParser::keepAncestorBraces() {
2581   if (!Style.RemoveBracesLLVM)
2582     return;
2583 
2584   const int MaxNestingLevels = 2;
2585   const int Size = NestedTooDeep.size();
2586   if (Size >= MaxNestingLevels)
2587     NestedTooDeep[Size - MaxNestingLevels] = true;
2588   NestedTooDeep.push_back(false);
2589 }
2590 
2591 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2592   for (const auto &Token : llvm::reverse(Line.Tokens))
2593     if (Token.Tok->isNot(tok::comment))
2594       return Token.Tok;
2595 
2596   return nullptr;
2597 }
2598 
2599 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2600   FormatToken *Tok = nullptr;
2601 
2602   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2603       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2604     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2605               ? getLastNonComment(*Line)
2606               : Line->Tokens.back().Tok;
2607     assert(Tok);
2608     if (Tok->BraceCount < 0) {
2609       assert(Tok->BraceCount == -1);
2610       Tok = nullptr;
2611     } else {
2612       Tok->BraceCount = -1;
2613     }
2614   }
2615 
2616   addUnwrappedLine();
2617   ++Line->Level;
2618   parseStructuralElement();
2619 
2620   if (Tok) {
2621     assert(!Line->InPPDirective);
2622     Tok = nullptr;
2623     for (const auto &L : llvm::reverse(*CurrentLines)) {
2624       if (!L.InPPDirective && getLastNonComment(L)) {
2625         Tok = L.Tokens.back().Tok;
2626         break;
2627       }
2628     }
2629     assert(Tok);
2630     ++Tok->BraceCount;
2631   }
2632 
2633   if (CheckEOF && eof())
2634     addUnwrappedLine();
2635 
2636   --Line->Level;
2637 }
2638 
2639 static void markOptionalBraces(FormatToken *LeftBrace) {
2640   if (!LeftBrace)
2641     return;
2642 
2643   assert(LeftBrace->is(tok::l_brace));
2644 
2645   FormatToken *RightBrace = LeftBrace->MatchingParen;
2646   if (!RightBrace) {
2647     assert(!LeftBrace->Optional);
2648     return;
2649   }
2650 
2651   assert(RightBrace->is(tok::r_brace));
2652   assert(RightBrace->MatchingParen == LeftBrace);
2653   assert(LeftBrace->Optional == RightBrace->Optional);
2654 
2655   LeftBrace->Optional = true;
2656   RightBrace->Optional = true;
2657 }
2658 
2659 void UnwrappedLineParser::handleAttributes() {
2660   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2661   if (FormatTok->is(TT_AttributeMacro))
2662     nextToken();
2663   if (FormatTok->is(tok::l_square))
2664     handleCppAttributes();
2665 }
2666 
2667 bool UnwrappedLineParser::handleCppAttributes() {
2668   // Handle [[likely]] / [[unlikely]] attributes.
2669   assert(FormatTok->is(tok::l_square));
2670   if (!tryToParseSimpleAttribute())
2671     return false;
2672   parseSquare();
2673   return true;
2674 }
2675 
2676 /// Returns whether \c Tok begins a block.
2677 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2678   // FIXME: rename the function or make
2679   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2680   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2681                            : Tok.is(tok::l_brace);
2682 }
2683 
2684 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2685                                                   bool KeepBraces,
2686                                                   bool IsVerilogAssert) {
2687   assert((FormatTok->is(tok::kw_if) ||
2688           (Style.isVerilog() &&
2689            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2690                               Keywords.kw_assume, Keywords.kw_cover))) &&
2691          "'if' expected");
2692   nextToken();
2693 
2694   if (IsVerilogAssert) {
2695     // Handle `assert #0` and `assert final`.
2696     if (FormatTok->is(Keywords.kw_verilogHash)) {
2697       nextToken();
2698       if (FormatTok->is(tok::numeric_constant))
2699         nextToken();
2700     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2701                                   Keywords.kw_sequence)) {
2702       nextToken();
2703     }
2704   }
2705 
2706   // Handle `if !consteval`.
2707   if (FormatTok->is(tok::exclaim))
2708     nextToken();
2709 
2710   bool KeepIfBraces = true;
2711   if (FormatTok->is(tok::kw_consteval)) {
2712     nextToken();
2713   } else {
2714     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2715     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2716       nextToken();
2717     if (FormatTok->is(tok::l_paren)) {
2718       FormatTok->setFinalizedType(TT_ConditionLParen);
2719       parseParens();
2720     }
2721   }
2722   handleAttributes();
2723   // The then action is optional in Verilog assert statements.
2724   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2725     nextToken();
2726     addUnwrappedLine();
2727     return nullptr;
2728   }
2729 
2730   bool NeedsUnwrappedLine = false;
2731   keepAncestorBraces();
2732 
2733   FormatToken *IfLeftBrace = nullptr;
2734   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2735 
2736   if (isBlockBegin(*FormatTok)) {
2737     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2738     IfLeftBrace = FormatTok;
2739     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2740     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2741                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2742     if (Style.BraceWrapping.BeforeElse)
2743       addUnwrappedLine();
2744     else
2745       NeedsUnwrappedLine = true;
2746   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2747     addUnwrappedLine();
2748   } else {
2749     parseUnbracedBody();
2750   }
2751 
2752   if (Style.RemoveBracesLLVM) {
2753     assert(!NestedTooDeep.empty());
2754     KeepIfBraces = KeepIfBraces ||
2755                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2756                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2757                    IfBlockKind == IfStmtKind::IfElseIf;
2758   }
2759 
2760   bool KeepElseBraces = KeepIfBraces;
2761   FormatToken *ElseLeftBrace = nullptr;
2762   IfStmtKind Kind = IfStmtKind::IfOnly;
2763 
2764   if (FormatTok->is(tok::kw_else)) {
2765     if (Style.RemoveBracesLLVM) {
2766       NestedTooDeep.back() = false;
2767       Kind = IfStmtKind::IfElse;
2768     }
2769     nextToken();
2770     handleAttributes();
2771     if (isBlockBegin(*FormatTok)) {
2772       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2773       FormatTok->setFinalizedType(TT_ElseLBrace);
2774       ElseLeftBrace = FormatTok;
2775       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2776       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2777       FormatToken *IfLBrace =
2778           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2779                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2780       if (FormatTok->is(tok::kw_else)) {
2781         KeepElseBraces = KeepElseBraces ||
2782                          ElseBlockKind == IfStmtKind::IfOnly ||
2783                          ElseBlockKind == IfStmtKind::IfElseIf;
2784       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2785         KeepElseBraces = true;
2786         assert(ElseLeftBrace->MatchingParen);
2787         markOptionalBraces(ElseLeftBrace);
2788       }
2789       addUnwrappedLine();
2790     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2791       const FormatToken *Previous = Tokens->getPreviousToken();
2792       assert(Previous);
2793       const bool IsPrecededByComment = Previous->is(tok::comment);
2794       if (IsPrecededByComment) {
2795         addUnwrappedLine();
2796         ++Line->Level;
2797       }
2798       bool TooDeep = true;
2799       if (Style.RemoveBracesLLVM) {
2800         Kind = IfStmtKind::IfElseIf;
2801         TooDeep = NestedTooDeep.pop_back_val();
2802       }
2803       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2804       if (Style.RemoveBracesLLVM)
2805         NestedTooDeep.push_back(TooDeep);
2806       if (IsPrecededByComment)
2807         --Line->Level;
2808     } else {
2809       parseUnbracedBody(/*CheckEOF=*/true);
2810     }
2811   } else {
2812     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2813     if (NeedsUnwrappedLine)
2814       addUnwrappedLine();
2815   }
2816 
2817   if (!Style.RemoveBracesLLVM)
2818     return nullptr;
2819 
2820   assert(!NestedTooDeep.empty());
2821   KeepElseBraces = KeepElseBraces ||
2822                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2823                    NestedTooDeep.back();
2824 
2825   NestedTooDeep.pop_back();
2826 
2827   if (!KeepIfBraces && !KeepElseBraces) {
2828     markOptionalBraces(IfLeftBrace);
2829     markOptionalBraces(ElseLeftBrace);
2830   } else if (IfLeftBrace) {
2831     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2832     if (IfRightBrace) {
2833       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2834       assert(!IfLeftBrace->Optional);
2835       assert(!IfRightBrace->Optional);
2836       IfLeftBrace->MatchingParen = nullptr;
2837       IfRightBrace->MatchingParen = nullptr;
2838     }
2839   }
2840 
2841   if (IfKind)
2842     *IfKind = Kind;
2843 
2844   return IfLeftBrace;
2845 }
2846 
2847 void UnwrappedLineParser::parseTryCatch() {
2848   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2849   nextToken();
2850   bool NeedsUnwrappedLine = false;
2851   if (FormatTok->is(tok::colon)) {
2852     // We are in a function try block, what comes is an initializer list.
2853     nextToken();
2854 
2855     // In case identifiers were removed by clang-tidy, what might follow is
2856     // multiple commas in sequence - before the first identifier.
2857     while (FormatTok->is(tok::comma))
2858       nextToken();
2859 
2860     while (FormatTok->is(tok::identifier)) {
2861       nextToken();
2862       if (FormatTok->is(tok::l_paren))
2863         parseParens();
2864       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2865           FormatTok->is(tok::l_brace)) {
2866         do {
2867           nextToken();
2868         } while (!FormatTok->is(tok::r_brace));
2869         nextToken();
2870       }
2871 
2872       // In case identifiers were removed by clang-tidy, what might follow is
2873       // multiple commas in sequence - after the first identifier.
2874       while (FormatTok->is(tok::comma))
2875         nextToken();
2876     }
2877   }
2878   // Parse try with resource.
2879   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2880     parseParens();
2881 
2882   keepAncestorBraces();
2883 
2884   if (FormatTok->is(tok::l_brace)) {
2885     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2886     parseBlock();
2887     if (Style.BraceWrapping.BeforeCatch)
2888       addUnwrappedLine();
2889     else
2890       NeedsUnwrappedLine = true;
2891   } else if (!FormatTok->is(tok::kw_catch)) {
2892     // The C++ standard requires a compound-statement after a try.
2893     // If there's none, we try to assume there's a structuralElement
2894     // and try to continue.
2895     addUnwrappedLine();
2896     ++Line->Level;
2897     parseStructuralElement();
2898     --Line->Level;
2899   }
2900   while (true) {
2901     if (FormatTok->is(tok::at))
2902       nextToken();
2903     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2904                              tok::kw___finally) ||
2905           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2906            FormatTok->is(Keywords.kw_finally)) ||
2907           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2908            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2909       break;
2910     }
2911     nextToken();
2912     while (FormatTok->isNot(tok::l_brace)) {
2913       if (FormatTok->is(tok::l_paren)) {
2914         parseParens();
2915         continue;
2916       }
2917       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2918         if (Style.RemoveBracesLLVM)
2919           NestedTooDeep.pop_back();
2920         return;
2921       }
2922       nextToken();
2923     }
2924     NeedsUnwrappedLine = false;
2925     Line->MustBeDeclaration = false;
2926     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2927     parseBlock();
2928     if (Style.BraceWrapping.BeforeCatch)
2929       addUnwrappedLine();
2930     else
2931       NeedsUnwrappedLine = true;
2932   }
2933 
2934   if (Style.RemoveBracesLLVM)
2935     NestedTooDeep.pop_back();
2936 
2937   if (NeedsUnwrappedLine)
2938     addUnwrappedLine();
2939 }
2940 
2941 void UnwrappedLineParser::parseNamespace() {
2942   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2943          "'namespace' expected");
2944 
2945   const FormatToken &InitialToken = *FormatTok;
2946   nextToken();
2947   if (InitialToken.is(TT_NamespaceMacro)) {
2948     parseParens();
2949   } else {
2950     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2951                               tok::l_square, tok::period, tok::l_paren) ||
2952            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2953       if (FormatTok->is(tok::l_square))
2954         parseSquare();
2955       else if (FormatTok->is(tok::l_paren))
2956         parseParens();
2957       else
2958         nextToken();
2959     }
2960   }
2961   if (FormatTok->is(tok::l_brace)) {
2962     if (ShouldBreakBeforeBrace(Style, InitialToken))
2963       addUnwrappedLine();
2964 
2965     unsigned AddLevels =
2966         Style.NamespaceIndentation == FormatStyle::NI_All ||
2967                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2968                  DeclarationScopeStack.size() > 1)
2969             ? 1u
2970             : 0u;
2971     bool ManageWhitesmithsBraces =
2972         AddLevels == 0u &&
2973         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2974 
2975     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2976     // the whole block.
2977     if (ManageWhitesmithsBraces)
2978       ++Line->Level;
2979 
2980     // Munch the semicolon after a namespace. This is more common than one would
2981     // think. Putting the semicolon into its own line is very ugly.
2982     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2983                /*KeepBraces=*/true, /*IfKind=*/nullptr,
2984                ManageWhitesmithsBraces);
2985 
2986     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2987 
2988     if (ManageWhitesmithsBraces)
2989       --Line->Level;
2990   }
2991   // FIXME: Add error handling.
2992 }
2993 
2994 void UnwrappedLineParser::parseNew() {
2995   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2996   nextToken();
2997 
2998   if (Style.isCSharp()) {
2999     do {
3000       // Handle constructor invocation, e.g. `new(field: value)`.
3001       if (FormatTok->is(tok::l_paren))
3002         parseParens();
3003 
3004       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3005       if (FormatTok->is(tok::l_brace))
3006         parseBracedList();
3007 
3008       if (FormatTok->isOneOf(tok::semi, tok::comma))
3009         return;
3010 
3011       nextToken();
3012     } while (!eof());
3013   }
3014 
3015   if (Style.Language != FormatStyle::LK_Java)
3016     return;
3017 
3018   // In Java, we can parse everything up to the parens, which aren't optional.
3019   do {
3020     // There should not be a ;, { or } before the new's open paren.
3021     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3022       return;
3023 
3024     // Consume the parens.
3025     if (FormatTok->is(tok::l_paren)) {
3026       parseParens();
3027 
3028       // If there is a class body of an anonymous class, consume that as child.
3029       if (FormatTok->is(tok::l_brace))
3030         parseChildBlock();
3031       return;
3032     }
3033     nextToken();
3034   } while (!eof());
3035 }
3036 
3037 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3038   keepAncestorBraces();
3039 
3040   if (isBlockBegin(*FormatTok)) {
3041     if (!KeepBraces)
3042       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3043     FormatToken *LeftBrace = FormatTok;
3044     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3045     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3046                /*MunchSemi=*/true, KeepBraces);
3047     if (!KeepBraces) {
3048       assert(!NestedTooDeep.empty());
3049       if (!NestedTooDeep.back())
3050         markOptionalBraces(LeftBrace);
3051     }
3052     if (WrapRightBrace)
3053       addUnwrappedLine();
3054   } else {
3055     parseUnbracedBody();
3056   }
3057 
3058   if (!KeepBraces)
3059     NestedTooDeep.pop_back();
3060 }
3061 
3062 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3063   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3064           (Style.isVerilog() &&
3065            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3066                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3067                               Keywords.kw_final, Keywords.kw_initial,
3068                               Keywords.kw_foreach, Keywords.kw_forever,
3069                               Keywords.kw_repeat))) &&
3070          "'for', 'while' or foreach macro expected");
3071   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3072                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3073 
3074   nextToken();
3075   // JS' for await ( ...
3076   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3077     nextToken();
3078   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3079     nextToken();
3080   if (HasParens && FormatTok->is(tok::l_paren)) {
3081     // The type is only set for Verilog basically because we were afraid to
3082     // change the existing behavior for loops. See the discussion on D121756 for
3083     // details.
3084     if (Style.isVerilog())
3085       FormatTok->setFinalizedType(TT_ConditionLParen);
3086     parseParens();
3087   }
3088   // Event control.
3089   if (Style.isVerilog())
3090     parseVerilogSensitivityList();
3091 
3092   handleAttributes();
3093   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3094 }
3095 
3096 void UnwrappedLineParser::parseDoWhile() {
3097   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3098   nextToken();
3099 
3100   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3101 
3102   // FIXME: Add error handling.
3103   if (!FormatTok->is(tok::kw_while)) {
3104     addUnwrappedLine();
3105     return;
3106   }
3107 
3108   // If in Whitesmiths mode, the line with the while() needs to be indented
3109   // to the same level as the block.
3110   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3111     ++Line->Level;
3112 
3113   nextToken();
3114   parseStructuralElement();
3115 }
3116 
3117 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3118   nextToken();
3119   unsigned OldLineLevel = Line->Level;
3120   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3121     --Line->Level;
3122   if (LeftAlignLabel)
3123     Line->Level = 0;
3124 
3125   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3126       FormatTok->is(tok::l_brace)) {
3127 
3128     CompoundStatementIndenter Indenter(this, Line->Level,
3129                                        Style.BraceWrapping.AfterCaseLabel,
3130                                        Style.BraceWrapping.IndentBraces);
3131     parseBlock();
3132     if (FormatTok->is(tok::kw_break)) {
3133       if (Style.BraceWrapping.AfterControlStatement ==
3134           FormatStyle::BWACS_Always) {
3135         addUnwrappedLine();
3136         if (!Style.IndentCaseBlocks &&
3137             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3138           ++Line->Level;
3139         }
3140       }
3141       parseStructuralElement();
3142     }
3143     addUnwrappedLine();
3144   } else {
3145     if (FormatTok->is(tok::semi))
3146       nextToken();
3147     addUnwrappedLine();
3148   }
3149   Line->Level = OldLineLevel;
3150   if (FormatTok->isNot(tok::l_brace)) {
3151     parseStructuralElement();
3152     addUnwrappedLine();
3153   }
3154 }
3155 
3156 void UnwrappedLineParser::parseCaseLabel() {
3157   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3158 
3159   // FIXME: fix handling of complex expressions here.
3160   do {
3161     nextToken();
3162     if (FormatTok->is(tok::colon)) {
3163       FormatTok->setFinalizedType(TT_CaseLabelColon);
3164       break;
3165     }
3166   } while (!eof());
3167   parseLabel();
3168 }
3169 
3170 void UnwrappedLineParser::parseSwitch() {
3171   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3172   nextToken();
3173   if (FormatTok->is(tok::l_paren))
3174     parseParens();
3175 
3176   keepAncestorBraces();
3177 
3178   if (FormatTok->is(tok::l_brace)) {
3179     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3180     parseBlock();
3181     addUnwrappedLine();
3182   } else {
3183     addUnwrappedLine();
3184     ++Line->Level;
3185     parseStructuralElement();
3186     --Line->Level;
3187   }
3188 
3189   if (Style.RemoveBracesLLVM)
3190     NestedTooDeep.pop_back();
3191 }
3192 
3193 // Operators that can follow a C variable.
3194 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3195   switch (kind) {
3196   case tok::ampamp:
3197   case tok::ampequal:
3198   case tok::arrow:
3199   case tok::caret:
3200   case tok::caretequal:
3201   case tok::comma:
3202   case tok::ellipsis:
3203   case tok::equal:
3204   case tok::equalequal:
3205   case tok::exclaim:
3206   case tok::exclaimequal:
3207   case tok::greater:
3208   case tok::greaterequal:
3209   case tok::greatergreater:
3210   case tok::greatergreaterequal:
3211   case tok::l_paren:
3212   case tok::l_square:
3213   case tok::less:
3214   case tok::lessequal:
3215   case tok::lessless:
3216   case tok::lesslessequal:
3217   case tok::minus:
3218   case tok::minusequal:
3219   case tok::minusminus:
3220   case tok::percent:
3221   case tok::percentequal:
3222   case tok::period:
3223   case tok::pipe:
3224   case tok::pipeequal:
3225   case tok::pipepipe:
3226   case tok::plus:
3227   case tok::plusequal:
3228   case tok::plusplus:
3229   case tok::question:
3230   case tok::r_brace:
3231   case tok::r_paren:
3232   case tok::r_square:
3233   case tok::semi:
3234   case tok::slash:
3235   case tok::slashequal:
3236   case tok::star:
3237   case tok::starequal:
3238     return true;
3239   default:
3240     return false;
3241   }
3242 }
3243 
3244 void UnwrappedLineParser::parseAccessSpecifier() {
3245   FormatToken *AccessSpecifierCandidate = FormatTok;
3246   nextToken();
3247   // Understand Qt's slots.
3248   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3249     nextToken();
3250   // Otherwise, we don't know what it is, and we'd better keep the next token.
3251   if (FormatTok->is(tok::colon)) {
3252     nextToken();
3253     addUnwrappedLine();
3254   } else if (!FormatTok->is(tok::coloncolon) &&
3255              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3256     // Not a variable name nor namespace name.
3257     addUnwrappedLine();
3258   } else if (AccessSpecifierCandidate) {
3259     // Consider the access specifier to be a C identifier.
3260     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3261   }
3262 }
3263 
3264 /// \brief Parses a requires, decides if it is a clause or an expression.
3265 /// \pre The current token has to be the requires keyword.
3266 /// \returns true if it parsed a clause.
3267 bool clang::format::UnwrappedLineParser::parseRequires() {
3268   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3269   auto RequiresToken = FormatTok;
3270 
3271   // We try to guess if it is a requires clause, or a requires expression. For
3272   // that we first consume the keyword and check the next token.
3273   nextToken();
3274 
3275   switch (FormatTok->Tok.getKind()) {
3276   case tok::l_brace:
3277     // This can only be an expression, never a clause.
3278     parseRequiresExpression(RequiresToken);
3279     return false;
3280   case tok::l_paren:
3281     // Clauses and expression can start with a paren, it's unclear what we have.
3282     break;
3283   default:
3284     // All other tokens can only be a clause.
3285     parseRequiresClause(RequiresToken);
3286     return true;
3287   }
3288 
3289   // Looking forward we would have to decide if there are function declaration
3290   // like arguments to the requires expression:
3291   // requires (T t) {
3292   // Or there is a constraint expression for the requires clause:
3293   // requires (C<T> && ...
3294 
3295   // But first let's look behind.
3296   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3297 
3298   if (!PreviousNonComment ||
3299       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3300     // If there is no token, or an expression left brace, we are a requires
3301     // clause within a requires expression.
3302     parseRequiresClause(RequiresToken);
3303     return true;
3304   }
3305 
3306   switch (PreviousNonComment->Tok.getKind()) {
3307   case tok::greater:
3308   case tok::r_paren:
3309   case tok::kw_noexcept:
3310   case tok::kw_const:
3311     // This is a requires clause.
3312     parseRequiresClause(RequiresToken);
3313     return true;
3314   case tok::amp:
3315   case tok::ampamp: {
3316     // This can be either:
3317     // if (... && requires (T t) ...)
3318     // Or
3319     // void member(...) && requires (C<T> ...
3320     // We check the one token before that for a const:
3321     // void member(...) const && requires (C<T> ...
3322     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3323     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3324       parseRequiresClause(RequiresToken);
3325       return true;
3326     }
3327     break;
3328   }
3329   default:
3330     if (PreviousNonComment->isTypeOrIdentifier()) {
3331       // This is a requires clause.
3332       parseRequiresClause(RequiresToken);
3333       return true;
3334     }
3335     // It's an expression.
3336     parseRequiresExpression(RequiresToken);
3337     return false;
3338   }
3339 
3340   // Now we look forward and try to check if the paren content is a parameter
3341   // list. The parameters can be cv-qualified and contain references or
3342   // pointers.
3343   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3344   // of stuff: typename, const, *, &, &&, ::, identifiers.
3345 
3346   unsigned StoredPosition = Tokens->getPosition();
3347   FormatToken *NextToken = Tokens->getNextToken();
3348   int Lookahead = 0;
3349   auto PeekNext = [&Lookahead, &NextToken, this] {
3350     ++Lookahead;
3351     NextToken = Tokens->getNextToken();
3352   };
3353 
3354   bool FoundType = false;
3355   bool LastWasColonColon = false;
3356   int OpenAngles = 0;
3357 
3358   for (; Lookahead < 50; PeekNext()) {
3359     switch (NextToken->Tok.getKind()) {
3360     case tok::kw_volatile:
3361     case tok::kw_const:
3362     case tok::comma:
3363       FormatTok = Tokens->setPosition(StoredPosition);
3364       parseRequiresExpression(RequiresToken);
3365       return false;
3366     case tok::r_paren:
3367     case tok::pipepipe:
3368       FormatTok = Tokens->setPosition(StoredPosition);
3369       parseRequiresClause(RequiresToken);
3370       return true;
3371     case tok::eof:
3372       // Break out of the loop.
3373       Lookahead = 50;
3374       break;
3375     case tok::coloncolon:
3376       LastWasColonColon = true;
3377       break;
3378     case tok::identifier:
3379       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3380         FormatTok = Tokens->setPosition(StoredPosition);
3381         parseRequiresExpression(RequiresToken);
3382         return false;
3383       }
3384       FoundType = true;
3385       LastWasColonColon = false;
3386       break;
3387     case tok::less:
3388       ++OpenAngles;
3389       break;
3390     case tok::greater:
3391       --OpenAngles;
3392       break;
3393     default:
3394       if (NextToken->isSimpleTypeSpecifier()) {
3395         FormatTok = Tokens->setPosition(StoredPosition);
3396         parseRequiresExpression(RequiresToken);
3397         return false;
3398       }
3399       break;
3400     }
3401   }
3402   // This seems to be a complicated expression, just assume it's a clause.
3403   FormatTok = Tokens->setPosition(StoredPosition);
3404   parseRequiresClause(RequiresToken);
3405   return true;
3406 }
3407 
3408 /// \brief Parses a requires clause.
3409 /// \param RequiresToken The requires keyword token, which starts this clause.
3410 /// \pre We need to be on the next token after the requires keyword.
3411 /// \sa parseRequiresExpression
3412 ///
3413 /// Returns if it either has finished parsing the clause, or it detects, that
3414 /// the clause is incorrect.
3415 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3416   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3417   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3418 
3419   // If there is no previous token, we are within a requires expression,
3420   // otherwise we will always have the template or function declaration in front
3421   // of it.
3422   bool InRequiresExpression =
3423       !RequiresToken->Previous ||
3424       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3425 
3426   RequiresToken->setFinalizedType(InRequiresExpression
3427                                       ? TT_RequiresClauseInARequiresExpression
3428                                       : TT_RequiresClause);
3429 
3430   // NOTE: parseConstraintExpression is only ever called from this function.
3431   // It could be inlined into here.
3432   parseConstraintExpression();
3433 
3434   if (!InRequiresExpression)
3435     FormatTok->Previous->ClosesRequiresClause = true;
3436 }
3437 
3438 /// \brief Parses a requires expression.
3439 /// \param RequiresToken The requires keyword token, which starts this clause.
3440 /// \pre We need to be on the next token after the requires keyword.
3441 /// \sa parseRequiresClause
3442 ///
3443 /// Returns if it either has finished parsing the expression, or it detects,
3444 /// that the expression is incorrect.
3445 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3446   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3447   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3448 
3449   RequiresToken->setFinalizedType(TT_RequiresExpression);
3450 
3451   if (FormatTok->is(tok::l_paren)) {
3452     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3453     parseParens();
3454   }
3455 
3456   if (FormatTok->is(tok::l_brace)) {
3457     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3458     parseChildBlock(/*CanContainBracedList=*/false,
3459                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3460   }
3461 }
3462 
3463 /// \brief Parses a constraint expression.
3464 ///
3465 /// This is the body of a requires clause. It returns, when the parsing is
3466 /// complete, or the expression is incorrect.
3467 void UnwrappedLineParser::parseConstraintExpression() {
3468   // The special handling for lambdas is needed since tryToParseLambda() eats a
3469   // token and if a requires expression is the last part of a requires clause
3470   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3471   // not set on the correct token. Thus we need to be aware if we even expect a
3472   // lambda to be possible.
3473   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3474   bool LambdaNextTimeAllowed = true;
3475 
3476   // Within lambda declarations, it is permitted to put a requires clause after
3477   // its template parameter list, which would place the requires clause right
3478   // before the parentheses of the parameters of the lambda declaration. Thus,
3479   // we track if we expect to see grouping parentheses at all.
3480   // Without this check, `requires foo<T> (T t)` in the below example would be
3481   // seen as the whole requires clause, accidentally eating the parameters of
3482   // the lambda.
3483   // [&]<typename T> requires foo<T> (T t) { ... };
3484   bool TopLevelParensAllowed = true;
3485 
3486   do {
3487     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3488 
3489     switch (FormatTok->Tok.getKind()) {
3490     case tok::kw_requires: {
3491       auto RequiresToken = FormatTok;
3492       nextToken();
3493       parseRequiresExpression(RequiresToken);
3494       break;
3495     }
3496 
3497     case tok::l_paren:
3498       if (!TopLevelParensAllowed)
3499         return;
3500       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3501       TopLevelParensAllowed = false;
3502       break;
3503 
3504     case tok::l_square:
3505       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3506         return;
3507       break;
3508 
3509     case tok::kw_const:
3510     case tok::semi:
3511     case tok::kw_class:
3512     case tok::kw_struct:
3513     case tok::kw_union:
3514       return;
3515 
3516     case tok::l_brace:
3517       // Potential function body.
3518       return;
3519 
3520     case tok::ampamp:
3521     case tok::pipepipe:
3522       FormatTok->setFinalizedType(TT_BinaryOperator);
3523       nextToken();
3524       LambdaNextTimeAllowed = true;
3525       TopLevelParensAllowed = true;
3526       break;
3527 
3528     case tok::comma:
3529     case tok::comment:
3530       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3531       nextToken();
3532       break;
3533 
3534     case tok::kw_sizeof:
3535     case tok::greater:
3536     case tok::greaterequal:
3537     case tok::greatergreater:
3538     case tok::less:
3539     case tok::lessequal:
3540     case tok::lessless:
3541     case tok::equalequal:
3542     case tok::exclaim:
3543     case tok::exclaimequal:
3544     case tok::plus:
3545     case tok::minus:
3546     case tok::star:
3547     case tok::slash:
3548       LambdaNextTimeAllowed = true;
3549       TopLevelParensAllowed = true;
3550       // Just eat them.
3551       nextToken();
3552       break;
3553 
3554     case tok::numeric_constant:
3555     case tok::coloncolon:
3556     case tok::kw_true:
3557     case tok::kw_false:
3558       TopLevelParensAllowed = false;
3559       // Just eat them.
3560       nextToken();
3561       break;
3562 
3563     case tok::kw_static_cast:
3564     case tok::kw_const_cast:
3565     case tok::kw_reinterpret_cast:
3566     case tok::kw_dynamic_cast:
3567       nextToken();
3568       if (!FormatTok->is(tok::less))
3569         return;
3570 
3571       nextToken();
3572       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3573                       /*ClosingBraceKind=*/tok::greater);
3574       break;
3575 
3576     default:
3577       if (!FormatTok->Tok.getIdentifierInfo()) {
3578         // Identifiers are part of the default case, we check for more then
3579         // tok::identifier to handle builtin type traits.
3580         return;
3581       }
3582 
3583       // We need to differentiate identifiers for a template deduction guide,
3584       // variables, or function return types (the constraint expression has
3585       // ended before that), and basically all other cases. But it's easier to
3586       // check the other way around.
3587       assert(FormatTok->Previous);
3588       switch (FormatTok->Previous->Tok.getKind()) {
3589       case tok::coloncolon:  // Nested identifier.
3590       case tok::ampamp:      // Start of a function or variable for the
3591       case tok::pipepipe:    // constraint expression. (binary)
3592       case tok::exclaim:     // The same as above, but unary.
3593       case tok::kw_requires: // Initial identifier of a requires clause.
3594       case tok::equal:       // Initial identifier of a concept declaration.
3595         break;
3596       default:
3597         return;
3598       }
3599 
3600       // Read identifier with optional template declaration.
3601       nextToken();
3602       if (FormatTok->is(tok::less)) {
3603         nextToken();
3604         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3605                         /*ClosingBraceKind=*/tok::greater);
3606       }
3607       TopLevelParensAllowed = false;
3608       break;
3609     }
3610   } while (!eof());
3611 }
3612 
3613 bool UnwrappedLineParser::parseEnum() {
3614   const FormatToken &InitialToken = *FormatTok;
3615 
3616   // Won't be 'enum' for NS_ENUMs.
3617   if (FormatTok->is(tok::kw_enum))
3618     nextToken();
3619 
3620   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3621   // declarations. An "enum" keyword followed by a colon would be a syntax
3622   // error and thus assume it is just an identifier.
3623   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3624     return false;
3625 
3626   // In protobuf, "enum" can be used as a field name.
3627   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3628     return false;
3629 
3630   // Eat up enum class ...
3631   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3632     nextToken();
3633 
3634   while (FormatTok->Tok.getIdentifierInfo() ||
3635          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3636                             tok::greater, tok::comma, tok::question,
3637                             tok::l_square, tok::r_square)) {
3638     if (Style.isVerilog()) {
3639       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3640       nextToken();
3641       // In Verilog the base type can have dimensions.
3642       while (FormatTok->is(tok::l_square))
3643         parseSquare();
3644     } else {
3645       nextToken();
3646     }
3647     // We can have macros or attributes in between 'enum' and the enum name.
3648     if (FormatTok->is(tok::l_paren))
3649       parseParens();
3650     if (FormatTok->is(TT_AttributeSquare)) {
3651       parseSquare();
3652       // Consume the closing TT_AttributeSquare.
3653       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3654         nextToken();
3655     }
3656     if (FormatTok->is(tok::identifier)) {
3657       nextToken();
3658       // If there are two identifiers in a row, this is likely an elaborate
3659       // return type. In Java, this can be "implements", etc.
3660       if (Style.isCpp() && FormatTok->is(tok::identifier))
3661         return false;
3662     }
3663   }
3664 
3665   // Just a declaration or something is wrong.
3666   if (FormatTok->isNot(tok::l_brace))
3667     return true;
3668   FormatTok->setFinalizedType(TT_EnumLBrace);
3669   FormatTok->setBlockKind(BK_Block);
3670 
3671   if (Style.Language == FormatStyle::LK_Java) {
3672     // Java enums are different.
3673     parseJavaEnumBody();
3674     return true;
3675   }
3676   if (Style.Language == FormatStyle::LK_Proto) {
3677     parseBlock(/*MustBeDeclaration=*/true);
3678     return true;
3679   }
3680 
3681   if (!Style.AllowShortEnumsOnASingleLine &&
3682       ShouldBreakBeforeBrace(Style, InitialToken)) {
3683     addUnwrappedLine();
3684   }
3685   // Parse enum body.
3686   nextToken();
3687   if (!Style.AllowShortEnumsOnASingleLine) {
3688     addUnwrappedLine();
3689     Line->Level += 1;
3690   }
3691   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3692                                    /*IsEnum=*/true);
3693   if (!Style.AllowShortEnumsOnASingleLine)
3694     Line->Level -= 1;
3695   if (HasError) {
3696     if (FormatTok->is(tok::semi))
3697       nextToken();
3698     addUnwrappedLine();
3699   }
3700   return true;
3701 
3702   // There is no addUnwrappedLine() here so that we fall through to parsing a
3703   // structural element afterwards. Thus, in "enum A {} n, m;",
3704   // "} n, m;" will end up in one unwrapped line.
3705 }
3706 
3707 bool UnwrappedLineParser::parseStructLike() {
3708   // parseRecord falls through and does not yet add an unwrapped line as a
3709   // record declaration or definition can start a structural element.
3710   parseRecord();
3711   // This does not apply to Java, JavaScript and C#.
3712   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3713       Style.isCSharp()) {
3714     if (FormatTok->is(tok::semi))
3715       nextToken();
3716     addUnwrappedLine();
3717     return true;
3718   }
3719   return false;
3720 }
3721 
3722 namespace {
3723 // A class used to set and restore the Token position when peeking
3724 // ahead in the token source.
3725 class ScopedTokenPosition {
3726   unsigned StoredPosition;
3727   FormatTokenSource *Tokens;
3728 
3729 public:
3730   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3731     assert(Tokens && "Tokens expected to not be null");
3732     StoredPosition = Tokens->getPosition();
3733   }
3734 
3735   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3736 };
3737 } // namespace
3738 
3739 // Look to see if we have [[ by looking ahead, if
3740 // its not then rewind to the original position.
3741 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3742   ScopedTokenPosition AutoPosition(Tokens);
3743   FormatToken *Tok = Tokens->getNextToken();
3744   // We already read the first [ check for the second.
3745   if (!Tok->is(tok::l_square))
3746     return false;
3747   // Double check that the attribute is just something
3748   // fairly simple.
3749   while (Tok->isNot(tok::eof)) {
3750     if (Tok->is(tok::r_square))
3751       break;
3752     Tok = Tokens->getNextToken();
3753   }
3754   if (Tok->is(tok::eof))
3755     return false;
3756   Tok = Tokens->getNextToken();
3757   if (!Tok->is(tok::r_square))
3758     return false;
3759   Tok = Tokens->getNextToken();
3760   if (Tok->is(tok::semi))
3761     return false;
3762   return true;
3763 }
3764 
3765 void UnwrappedLineParser::parseJavaEnumBody() {
3766   assert(FormatTok->is(tok::l_brace));
3767   const FormatToken *OpeningBrace = FormatTok;
3768 
3769   // Determine whether the enum is simple, i.e. does not have a semicolon or
3770   // constants with class bodies. Simple enums can be formatted like braced
3771   // lists, contracted to a single line, etc.
3772   unsigned StoredPosition = Tokens->getPosition();
3773   bool IsSimple = true;
3774   FormatToken *Tok = Tokens->getNextToken();
3775   while (!Tok->is(tok::eof)) {
3776     if (Tok->is(tok::r_brace))
3777       break;
3778     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3779       IsSimple = false;
3780       break;
3781     }
3782     // FIXME: This will also mark enums with braces in the arguments to enum
3783     // constants as "not simple". This is probably fine in practice, though.
3784     Tok = Tokens->getNextToken();
3785   }
3786   FormatTok = Tokens->setPosition(StoredPosition);
3787 
3788   if (IsSimple) {
3789     nextToken();
3790     parseBracedList();
3791     addUnwrappedLine();
3792     return;
3793   }
3794 
3795   // Parse the body of a more complex enum.
3796   // First add a line for everything up to the "{".
3797   nextToken();
3798   addUnwrappedLine();
3799   ++Line->Level;
3800 
3801   // Parse the enum constants.
3802   while (!eof()) {
3803     if (FormatTok->is(tok::l_brace)) {
3804       // Parse the constant's class body.
3805       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3806                  /*MunchSemi=*/false);
3807     } else if (FormatTok->is(tok::l_paren)) {
3808       parseParens();
3809     } else if (FormatTok->is(tok::comma)) {
3810       nextToken();
3811       addUnwrappedLine();
3812     } else if (FormatTok->is(tok::semi)) {
3813       nextToken();
3814       addUnwrappedLine();
3815       break;
3816     } else if (FormatTok->is(tok::r_brace)) {
3817       addUnwrappedLine();
3818       break;
3819     } else {
3820       nextToken();
3821     }
3822   }
3823 
3824   // Parse the class body after the enum's ";" if any.
3825   parseLevel(OpeningBrace);
3826   nextToken();
3827   --Line->Level;
3828   addUnwrappedLine();
3829 }
3830 
3831 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3832   const FormatToken &InitialToken = *FormatTok;
3833   nextToken();
3834 
3835   // The actual identifier can be a nested name specifier, and in macros
3836   // it is often token-pasted.
3837   // An [[attribute]] can be before the identifier.
3838   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3839                             tok::kw___attribute, tok::kw___declspec,
3840                             tok::kw_alignas, tok::l_square) ||
3841          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3842           FormatTok->isOneOf(tok::period, tok::comma))) {
3843     if (Style.isJavaScript() &&
3844         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3845       // JavaScript/TypeScript supports inline object types in
3846       // extends/implements positions:
3847       //     class Foo implements {bar: number} { }
3848       nextToken();
3849       if (FormatTok->is(tok::l_brace)) {
3850         tryToParseBracedList();
3851         continue;
3852       }
3853     }
3854     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3855       continue;
3856     bool IsNonMacroIdentifier =
3857         FormatTok->is(tok::identifier) &&
3858         FormatTok->TokenText != FormatTok->TokenText.upper();
3859     nextToken();
3860     // We can have macros in between 'class' and the class name.
3861     if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3862       parseParens();
3863   }
3864 
3865   // Note that parsing away template declarations here leads to incorrectly
3866   // accepting function declarations as record declarations.
3867   // In general, we cannot solve this problem. Consider:
3868   // class A<int> B() {}
3869   // which can be a function definition or a class definition when B() is a
3870   // macro. If we find enough real-world cases where this is a problem, we
3871   // can parse for the 'template' keyword in the beginning of the statement,
3872   // and thus rule out the record production in case there is no template
3873   // (this would still leave us with an ambiguity between template function
3874   // and class declarations).
3875   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3876     do {
3877       if (FormatTok->is(tok::l_brace)) {
3878         calculateBraceTypes(/*ExpectClassBody=*/true);
3879         if (!tryToParseBracedList())
3880           break;
3881       }
3882       if (FormatTok->is(tok::l_square)) {
3883         FormatToken *Previous = FormatTok->Previous;
3884         if (!Previous ||
3885             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3886           // Don't try parsing a lambda if we had a closing parenthesis before,
3887           // it was probably a pointer to an array: int (*)[].
3888           if (!tryToParseLambda())
3889             continue;
3890         } else {
3891           parseSquare();
3892           continue;
3893         }
3894       }
3895       if (FormatTok->is(tok::semi))
3896         return;
3897       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3898         addUnwrappedLine();
3899         nextToken();
3900         parseCSharpGenericTypeConstraint();
3901         break;
3902       }
3903       nextToken();
3904     } while (!eof());
3905   }
3906 
3907   auto GetBraceType = [](const FormatToken &RecordTok) {
3908     switch (RecordTok.Tok.getKind()) {
3909     case tok::kw_class:
3910       return TT_ClassLBrace;
3911     case tok::kw_struct:
3912       return TT_StructLBrace;
3913     case tok::kw_union:
3914       return TT_UnionLBrace;
3915     default:
3916       // Useful for e.g. interface.
3917       return TT_RecordLBrace;
3918     }
3919   };
3920   if (FormatTok->is(tok::l_brace)) {
3921     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3922     if (ParseAsExpr) {
3923       parseChildBlock();
3924     } else {
3925       if (ShouldBreakBeforeBrace(Style, InitialToken))
3926         addUnwrappedLine();
3927 
3928       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3929       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3930     }
3931   }
3932   // There is no addUnwrappedLine() here so that we fall through to parsing a
3933   // structural element afterwards. Thus, in "class A {} n, m;",
3934   // "} n, m;" will end up in one unwrapped line.
3935 }
3936 
3937 void UnwrappedLineParser::parseObjCMethod() {
3938   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3939          "'(' or identifier expected.");
3940   do {
3941     if (FormatTok->is(tok::semi)) {
3942       nextToken();
3943       addUnwrappedLine();
3944       return;
3945     } else if (FormatTok->is(tok::l_brace)) {
3946       if (Style.BraceWrapping.AfterFunction)
3947         addUnwrappedLine();
3948       parseBlock();
3949       addUnwrappedLine();
3950       return;
3951     } else {
3952       nextToken();
3953     }
3954   } while (!eof());
3955 }
3956 
3957 void UnwrappedLineParser::parseObjCProtocolList() {
3958   assert(FormatTok->is(tok::less) && "'<' expected.");
3959   do {
3960     nextToken();
3961     // Early exit in case someone forgot a close angle.
3962     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3963         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3964       return;
3965     }
3966   } while (!eof() && FormatTok->isNot(tok::greater));
3967   nextToken(); // Skip '>'.
3968 }
3969 
3970 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3971   do {
3972     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3973       nextToken();
3974       addUnwrappedLine();
3975       break;
3976     }
3977     if (FormatTok->is(tok::l_brace)) {
3978       parseBlock();
3979       // In ObjC interfaces, nothing should be following the "}".
3980       addUnwrappedLine();
3981     } else if (FormatTok->is(tok::r_brace)) {
3982       // Ignore stray "}". parseStructuralElement doesn't consume them.
3983       nextToken();
3984       addUnwrappedLine();
3985     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3986       nextToken();
3987       parseObjCMethod();
3988     } else {
3989       parseStructuralElement();
3990     }
3991   } while (!eof());
3992 }
3993 
3994 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3995   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3996          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3997   nextToken();
3998   nextToken(); // interface name
3999 
4000   // @interface can be followed by a lightweight generic
4001   // specialization list, then either a base class or a category.
4002   if (FormatTok->is(tok::less))
4003     parseObjCLightweightGenerics();
4004   if (FormatTok->is(tok::colon)) {
4005     nextToken();
4006     nextToken(); // base class name
4007     // The base class can also have lightweight generics applied to it.
4008     if (FormatTok->is(tok::less))
4009       parseObjCLightweightGenerics();
4010   } else if (FormatTok->is(tok::l_paren)) {
4011     // Skip category, if present.
4012     parseParens();
4013   }
4014 
4015   if (FormatTok->is(tok::less))
4016     parseObjCProtocolList();
4017 
4018   if (FormatTok->is(tok::l_brace)) {
4019     if (Style.BraceWrapping.AfterObjCDeclaration)
4020       addUnwrappedLine();
4021     parseBlock(/*MustBeDeclaration=*/true);
4022   }
4023 
4024   // With instance variables, this puts '}' on its own line.  Without instance
4025   // variables, this ends the @interface line.
4026   addUnwrappedLine();
4027 
4028   parseObjCUntilAtEnd();
4029 }
4030 
4031 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4032   assert(FormatTok->is(tok::less));
4033   // Unlike protocol lists, generic parameterizations support
4034   // nested angles:
4035   //
4036   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4037   //     NSObject <NSCopying, NSSecureCoding>
4038   //
4039   // so we need to count how many open angles we have left.
4040   unsigned NumOpenAngles = 1;
4041   do {
4042     nextToken();
4043     // Early exit in case someone forgot a close angle.
4044     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4045         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4046       break;
4047     }
4048     if (FormatTok->is(tok::less)) {
4049       ++NumOpenAngles;
4050     } else if (FormatTok->is(tok::greater)) {
4051       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4052       --NumOpenAngles;
4053     }
4054   } while (!eof() && NumOpenAngles != 0);
4055   nextToken(); // Skip '>'.
4056 }
4057 
4058 // Returns true for the declaration/definition form of @protocol,
4059 // false for the expression form.
4060 bool UnwrappedLineParser::parseObjCProtocol() {
4061   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4062   nextToken();
4063 
4064   if (FormatTok->is(tok::l_paren)) {
4065     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4066     return false;
4067   }
4068 
4069   // The definition/declaration form,
4070   // @protocol Foo
4071   // - (int)someMethod;
4072   // @end
4073 
4074   nextToken(); // protocol name
4075 
4076   if (FormatTok->is(tok::less))
4077     parseObjCProtocolList();
4078 
4079   // Check for protocol declaration.
4080   if (FormatTok->is(tok::semi)) {
4081     nextToken();
4082     addUnwrappedLine();
4083     return true;
4084   }
4085 
4086   addUnwrappedLine();
4087   parseObjCUntilAtEnd();
4088   return true;
4089 }
4090 
4091 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4092   bool IsImport = FormatTok->is(Keywords.kw_import);
4093   assert(IsImport || FormatTok->is(tok::kw_export));
4094   nextToken();
4095 
4096   // Consume the "default" in "export default class/function".
4097   if (FormatTok->is(tok::kw_default))
4098     nextToken();
4099 
4100   // Consume "async function", "function" and "default function", so that these
4101   // get parsed as free-standing JS functions, i.e. do not require a trailing
4102   // semicolon.
4103   if (FormatTok->is(Keywords.kw_async))
4104     nextToken();
4105   if (FormatTok->is(Keywords.kw_function)) {
4106     nextToken();
4107     return;
4108   }
4109 
4110   // For imports, `export *`, `export {...}`, consume the rest of the line up
4111   // to the terminating `;`. For everything else, just return and continue
4112   // parsing the structural element, i.e. the declaration or expression for
4113   // `export default`.
4114   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4115       !FormatTok->isStringLiteral() &&
4116       !(FormatTok->is(Keywords.kw_type) &&
4117         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4118     return;
4119   }
4120 
4121   while (!eof()) {
4122     if (FormatTok->is(tok::semi))
4123       return;
4124     if (Line->Tokens.empty()) {
4125       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4126       // import statement should terminate.
4127       return;
4128     }
4129     if (FormatTok->is(tok::l_brace)) {
4130       FormatTok->setBlockKind(BK_Block);
4131       nextToken();
4132       parseBracedList();
4133     } else {
4134       nextToken();
4135     }
4136   }
4137 }
4138 
4139 void UnwrappedLineParser::parseStatementMacro() {
4140   nextToken();
4141   if (FormatTok->is(tok::l_paren))
4142     parseParens();
4143   if (FormatTok->is(tok::semi))
4144     nextToken();
4145   addUnwrappedLine();
4146 }
4147 
4148 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4149   // consume things like a::`b.c[d:e] or a::*
4150   while (true) {
4151     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4152                            tok::coloncolon, tok::hash) ||
4153         Keywords.isVerilogIdentifier(*FormatTok)) {
4154       nextToken();
4155     } else if (FormatTok->is(tok::l_square)) {
4156       parseSquare();
4157     } else {
4158       break;
4159     }
4160   }
4161 }
4162 
4163 void UnwrappedLineParser::parseVerilogSensitivityList() {
4164   if (!FormatTok->is(tok::at))
4165     return;
4166   nextToken();
4167   // A block event expression has 2 at signs.
4168   if (FormatTok->is(tok::at))
4169     nextToken();
4170   switch (FormatTok->Tok.getKind()) {
4171   case tok::star:
4172     nextToken();
4173     break;
4174   case tok::l_paren:
4175     parseParens();
4176     break;
4177   default:
4178     parseVerilogHierarchyIdentifier();
4179     break;
4180   }
4181 }
4182 
4183 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4184   unsigned AddLevels = 0;
4185 
4186   if (FormatTok->is(Keywords.kw_clocking)) {
4187     nextToken();
4188     if (Keywords.isVerilogIdentifier(*FormatTok))
4189       nextToken();
4190     parseVerilogSensitivityList();
4191     if (FormatTok->is(tok::semi))
4192       nextToken();
4193   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4194                                 Keywords.kw_casez, Keywords.kw_randcase,
4195                                 Keywords.kw_randsequence)) {
4196     if (Style.IndentCaseLabels)
4197       AddLevels++;
4198     nextToken();
4199     if (FormatTok->is(tok::l_paren)) {
4200       FormatTok->setFinalizedType(TT_ConditionLParen);
4201       parseParens();
4202     }
4203     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4204       nextToken();
4205     // The case header has no semicolon.
4206   } else {
4207     // "module" etc.
4208     nextToken();
4209     // all the words like the name of the module and specifiers like
4210     // "automatic" and the width of function return type
4211     while (true) {
4212       if (FormatTok->is(tok::l_square)) {
4213         auto Prev = FormatTok->getPreviousNonComment();
4214         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4215           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4216         parseSquare();
4217       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4218                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4219         nextToken();
4220       } else {
4221         break;
4222       }
4223     }
4224 
4225     auto NewLine = [this]() {
4226       addUnwrappedLine();
4227       Line->IsContinuation = true;
4228     };
4229 
4230     // package imports
4231     while (FormatTok->is(Keywords.kw_import)) {
4232       NewLine();
4233       nextToken();
4234       parseVerilogHierarchyIdentifier();
4235       if (FormatTok->is(tok::semi))
4236         nextToken();
4237     }
4238 
4239     // parameters and ports
4240     if (FormatTok->is(Keywords.kw_verilogHash)) {
4241       NewLine();
4242       nextToken();
4243       if (FormatTok->is(tok::l_paren)) {
4244         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4245         parseParens();
4246       }
4247     }
4248     if (FormatTok->is(tok::l_paren)) {
4249       NewLine();
4250       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4251       parseParens();
4252     }
4253 
4254     // extends and implements
4255     if (FormatTok->is(Keywords.kw_extends)) {
4256       NewLine();
4257       nextToken();
4258       parseVerilogHierarchyIdentifier();
4259       if (FormatTok->is(tok::l_paren))
4260         parseParens();
4261     }
4262     if (FormatTok->is(Keywords.kw_implements)) {
4263       NewLine();
4264       do {
4265         nextToken();
4266         parseVerilogHierarchyIdentifier();
4267       } while (FormatTok->is(tok::comma));
4268     }
4269 
4270     // Coverage event for cover groups.
4271     if (FormatTok->is(tok::at)) {
4272       NewLine();
4273       parseVerilogSensitivityList();
4274     }
4275 
4276     if (FormatTok->is(tok::semi))
4277       nextToken(/*LevelDifference=*/1);
4278     addUnwrappedLine();
4279   }
4280 
4281   return AddLevels;
4282 }
4283 
4284 void UnwrappedLineParser::parseVerilogTable() {
4285   assert(FormatTok->is(Keywords.kw_table));
4286   nextToken(/*LevelDifference=*/1);
4287   addUnwrappedLine();
4288 
4289   auto InitialLevel = Line->Level++;
4290   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4291     FormatToken *Tok = FormatTok;
4292     nextToken();
4293     if (Tok->is(tok::semi))
4294       addUnwrappedLine();
4295     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4296       Tok->setFinalizedType(TT_VerilogTableItem);
4297   }
4298   Line->Level = InitialLevel;
4299   nextToken(/*LevelDifference=*/-1);
4300   addUnwrappedLine();
4301 }
4302 
4303 void UnwrappedLineParser::parseVerilogCaseLabel() {
4304   // The label will get unindented in AnnotatingParser. If there are no leading
4305   // spaces, indent the rest here so that things inside the block will be
4306   // indented relative to things outside. We don't use parseLabel because we
4307   // don't know whether this colon is a label or a ternary expression at this
4308   // point.
4309   auto OrigLevel = Line->Level;
4310   auto FirstLine = CurrentLines->size();
4311   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4312     ++Line->Level;
4313   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4314     --Line->Level;
4315   parseStructuralElement();
4316   // Restore the indentation in both the new line and the line that has the
4317   // label.
4318   if (CurrentLines->size() > FirstLine)
4319     (*CurrentLines)[FirstLine].Level = OrigLevel;
4320   Line->Level = OrigLevel;
4321 }
4322 
4323 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4324   for (const auto &N : Line.Tokens) {
4325     if (N.Tok->MacroCtx)
4326       return true;
4327     for (const UnwrappedLine &Child : N.Children)
4328       if (containsExpansion(Child))
4329         return true;
4330   }
4331   return false;
4332 }
4333 
4334 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4335   if (Line->Tokens.empty())
4336     return;
4337   LLVM_DEBUG({
4338     if (!parsingPPDirective()) {
4339       llvm::dbgs() << "Adding unwrapped line:\n";
4340       printDebugInfo(*Line);
4341     }
4342   });
4343 
4344   // If this line closes a block when in Whitesmiths mode, remember that
4345   // information so that the level can be decreased after the line is added.
4346   // This has to happen after the addition of the line since the line itself
4347   // needs to be indented.
4348   bool ClosesWhitesmithsBlock =
4349       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4350       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4351 
4352   // If the current line was expanded from a macro call, we use it to
4353   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4354   // line and the unexpanded token stream.
4355   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4356     if (!Reconstruct)
4357       Reconstruct.emplace(Line->Level, Unexpanded);
4358     Reconstruct->addLine(*Line);
4359 
4360     // While the reconstructed unexpanded lines are stored in the normal
4361     // flow of lines, the expanded lines are stored on the side to be analyzed
4362     // in an extra step.
4363     CurrentExpandedLines.push_back(std::move(*Line));
4364 
4365     if (Reconstruct->finished()) {
4366       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4367       assert(!Reconstructed.Tokens.empty() &&
4368              "Reconstructed must at least contain the macro identifier.");
4369       assert(!parsingPPDirective());
4370       LLVM_DEBUG({
4371         llvm::dbgs() << "Adding unexpanded line:\n";
4372         printDebugInfo(Reconstructed);
4373       });
4374       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4375       Lines.push_back(std::move(Reconstructed));
4376       CurrentExpandedLines.clear();
4377       Reconstruct.reset();
4378     }
4379   } else {
4380     // At the top level we only get here when no unexpansion is going on, or
4381     // when conditional formatting led to unfinished macro reconstructions.
4382     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4383     CurrentLines->push_back(std::move(*Line));
4384   }
4385   Line->Tokens.clear();
4386   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4387   Line->FirstStartColumn = 0;
4388   Line->IsContinuation = false;
4389 
4390   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4391     --Line->Level;
4392   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4393     CurrentLines->append(
4394         std::make_move_iterator(PreprocessorDirectives.begin()),
4395         std::make_move_iterator(PreprocessorDirectives.end()));
4396     PreprocessorDirectives.clear();
4397   }
4398   // Disconnect the current token from the last token on the previous line.
4399   FormatTok->Previous = nullptr;
4400 }
4401 
4402 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4403 
4404 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4405   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4406          FormatTok.NewlinesBefore > 0;
4407 }
4408 
4409 // Checks if \p FormatTok is a line comment that continues the line comment
4410 // section on \p Line.
4411 static bool
4412 continuesLineCommentSection(const FormatToken &FormatTok,
4413                             const UnwrappedLine &Line,
4414                             const llvm::Regex &CommentPragmasRegex) {
4415   if (Line.Tokens.empty())
4416     return false;
4417 
4418   StringRef IndentContent = FormatTok.TokenText;
4419   if (FormatTok.TokenText.startswith("//") ||
4420       FormatTok.TokenText.startswith("/*")) {
4421     IndentContent = FormatTok.TokenText.substr(2);
4422   }
4423   if (CommentPragmasRegex.match(IndentContent))
4424     return false;
4425 
4426   // If Line starts with a line comment, then FormatTok continues the comment
4427   // section if its original column is greater or equal to the original start
4428   // column of the line.
4429   //
4430   // Define the min column token of a line as follows: if a line ends in '{' or
4431   // contains a '{' followed by a line comment, then the min column token is
4432   // that '{'. Otherwise, the min column token of the line is the first token of
4433   // the line.
4434   //
4435   // If Line starts with a token other than a line comment, then FormatTok
4436   // continues the comment section if its original column is greater than the
4437   // original start column of the min column token of the line.
4438   //
4439   // For example, the second line comment continues the first in these cases:
4440   //
4441   // // first line
4442   // // second line
4443   //
4444   // and:
4445   //
4446   // // first line
4447   //  // second line
4448   //
4449   // and:
4450   //
4451   // int i; // first line
4452   //  // second line
4453   //
4454   // and:
4455   //
4456   // do { // first line
4457   //      // second line
4458   //   int i;
4459   // } while (true);
4460   //
4461   // and:
4462   //
4463   // enum {
4464   //   a, // first line
4465   //    // second line
4466   //   b
4467   // };
4468   //
4469   // The second line comment doesn't continue the first in these cases:
4470   //
4471   //   // first line
4472   //  // second line
4473   //
4474   // and:
4475   //
4476   // int i; // first line
4477   // // second line
4478   //
4479   // and:
4480   //
4481   // do { // first line
4482   //   // second line
4483   //   int i;
4484   // } while (true);
4485   //
4486   // and:
4487   //
4488   // enum {
4489   //   a, // first line
4490   //   // second line
4491   // };
4492   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4493 
4494   // Scan for '{//'. If found, use the column of '{' as a min column for line
4495   // comment section continuation.
4496   const FormatToken *PreviousToken = nullptr;
4497   for (const UnwrappedLineNode &Node : Line.Tokens) {
4498     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4499         isLineComment(*Node.Tok)) {
4500       MinColumnToken = PreviousToken;
4501       break;
4502     }
4503     PreviousToken = Node.Tok;
4504 
4505     // Grab the last newline preceding a token in this unwrapped line.
4506     if (Node.Tok->NewlinesBefore > 0)
4507       MinColumnToken = Node.Tok;
4508   }
4509   if (PreviousToken && PreviousToken->is(tok::l_brace))
4510     MinColumnToken = PreviousToken;
4511 
4512   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4513                               MinColumnToken);
4514 }
4515 
4516 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4517   bool JustComments = Line->Tokens.empty();
4518   for (FormatToken *Tok : CommentsBeforeNextToken) {
4519     // Line comments that belong to the same line comment section are put on the
4520     // same line since later we might want to reflow content between them.
4521     // Additional fine-grained breaking of line comment sections is controlled
4522     // by the class BreakableLineCommentSection in case it is desirable to keep
4523     // several line comment sections in the same unwrapped line.
4524     //
4525     // FIXME: Consider putting separate line comment sections as children to the
4526     // unwrapped line instead.
4527     Tok->ContinuesLineCommentSection =
4528         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4529     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4530       addUnwrappedLine();
4531     pushToken(Tok);
4532   }
4533   if (NewlineBeforeNext && JustComments)
4534     addUnwrappedLine();
4535   CommentsBeforeNextToken.clear();
4536 }
4537 
4538 void UnwrappedLineParser::nextToken(int LevelDifference) {
4539   if (eof())
4540     return;
4541   flushComments(isOnNewLine(*FormatTok));
4542   pushToken(FormatTok);
4543   FormatToken *Previous = FormatTok;
4544   if (!Style.isJavaScript())
4545     readToken(LevelDifference);
4546   else
4547     readTokenWithJavaScriptASI();
4548   FormatTok->Previous = Previous;
4549   if (Style.isVerilog()) {
4550     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4551     // keywords like `begin`, we can't treat them the same as left braces
4552     // because some contexts require one of them.  For example structs use
4553     // braces and if blocks use keywords, and a left brace can occur in an if
4554     // statement, but it is not a block.  For keywords like `end`, we simply
4555     // treat them the same as right braces.
4556     if (Keywords.isVerilogEnd(*FormatTok))
4557       FormatTok->Tok.setKind(tok::r_brace);
4558   }
4559 }
4560 
4561 void UnwrappedLineParser::distributeComments(
4562     const SmallVectorImpl<FormatToken *> &Comments,
4563     const FormatToken *NextTok) {
4564   // Whether or not a line comment token continues a line is controlled by
4565   // the method continuesLineCommentSection, with the following caveat:
4566   //
4567   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4568   // that each comment line from the trail is aligned with the next token, if
4569   // the next token exists. If a trail exists, the beginning of the maximal
4570   // trail is marked as a start of a new comment section.
4571   //
4572   // For example in this code:
4573   //
4574   // int a; // line about a
4575   //   // line 1 about b
4576   //   // line 2 about b
4577   //   int b;
4578   //
4579   // the two lines about b form a maximal trail, so there are two sections, the
4580   // first one consisting of the single comment "// line about a" and the
4581   // second one consisting of the next two comments.
4582   if (Comments.empty())
4583     return;
4584   bool ShouldPushCommentsInCurrentLine = true;
4585   bool HasTrailAlignedWithNextToken = false;
4586   unsigned StartOfTrailAlignedWithNextToken = 0;
4587   if (NextTok) {
4588     // We are skipping the first element intentionally.
4589     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4590       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4591         HasTrailAlignedWithNextToken = true;
4592         StartOfTrailAlignedWithNextToken = i;
4593       }
4594     }
4595   }
4596   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4597     FormatToken *FormatTok = Comments[i];
4598     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4599       FormatTok->ContinuesLineCommentSection = false;
4600     } else {
4601       FormatTok->ContinuesLineCommentSection =
4602           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4603     }
4604     if (!FormatTok->ContinuesLineCommentSection &&
4605         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4606       ShouldPushCommentsInCurrentLine = false;
4607     }
4608     if (ShouldPushCommentsInCurrentLine)
4609       pushToken(FormatTok);
4610     else
4611       CommentsBeforeNextToken.push_back(FormatTok);
4612   }
4613 }
4614 
4615 void UnwrappedLineParser::readToken(int LevelDifference) {
4616   SmallVector<FormatToken *, 1> Comments;
4617   bool PreviousWasComment = false;
4618   bool FirstNonCommentOnLine = false;
4619   do {
4620     FormatTok = Tokens->getNextToken();
4621     assert(FormatTok);
4622     while (FormatTok->getType() == TT_ConflictStart ||
4623            FormatTok->getType() == TT_ConflictEnd ||
4624            FormatTok->getType() == TT_ConflictAlternative) {
4625       if (FormatTok->getType() == TT_ConflictStart)
4626         conditionalCompilationStart(/*Unreachable=*/false);
4627       else if (FormatTok->getType() == TT_ConflictAlternative)
4628         conditionalCompilationAlternative();
4629       else if (FormatTok->getType() == TT_ConflictEnd)
4630         conditionalCompilationEnd();
4631       FormatTok = Tokens->getNextToken();
4632       FormatTok->MustBreakBefore = true;
4633     }
4634 
4635     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4636                                       const FormatToken &Tok,
4637                                       bool PreviousWasComment) {
4638       auto IsFirstOnLine = [](const FormatToken &Tok) {
4639         return Tok.HasUnescapedNewline || Tok.IsFirst;
4640       };
4641 
4642       // Consider preprocessor directives preceded by block comments as first
4643       // on line.
4644       if (PreviousWasComment)
4645         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4646       return IsFirstOnLine(Tok);
4647     };
4648 
4649     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4650         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4651     PreviousWasComment = FormatTok->is(tok::comment);
4652 
4653     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4654            (!Style.isVerilog() ||
4655             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4656            FirstNonCommentOnLine) {
4657       distributeComments(Comments, FormatTok);
4658       Comments.clear();
4659       // If there is an unfinished unwrapped line, we flush the preprocessor
4660       // directives only after that unwrapped line was finished later.
4661       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4662       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4663       assert((LevelDifference >= 0 ||
4664               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4665              "LevelDifference makes Line->Level negative");
4666       Line->Level += LevelDifference;
4667       // Comments stored before the preprocessor directive need to be output
4668       // before the preprocessor directive, at the same level as the
4669       // preprocessor directive, as we consider them to apply to the directive.
4670       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4671           PPBranchLevel > 0) {
4672         Line->Level += PPBranchLevel;
4673       }
4674       flushComments(isOnNewLine(*FormatTok));
4675       parsePPDirective();
4676       PreviousWasComment = FormatTok->is(tok::comment);
4677       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4678           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4679     }
4680 
4681     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4682         !Line->InPPDirective) {
4683       continue;
4684     }
4685 
4686     if (FormatTok->is(tok::identifier) &&
4687         Macros.defined(FormatTok->TokenText) &&
4688         // FIXME: Allow expanding macros in preprocessor directives.
4689         !Line->InPPDirective) {
4690       FormatToken *ID = FormatTok;
4691       unsigned Position = Tokens->getPosition();
4692 
4693       // To correctly parse the code, we need to replace the tokens of the macro
4694       // call with its expansion.
4695       auto PreCall = std::move(Line);
4696       Line.reset(new UnwrappedLine);
4697       bool OldInExpansion = InExpansion;
4698       InExpansion = true;
4699       // We parse the macro call into a new line.
4700       auto Args = parseMacroCall();
4701       InExpansion = OldInExpansion;
4702       assert(Line->Tokens.front().Tok == ID);
4703       // And remember the unexpanded macro call tokens.
4704       auto UnexpandedLine = std::move(Line);
4705       // Reset to the old line.
4706       Line = std::move(PreCall);
4707 
4708       LLVM_DEBUG({
4709         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4710         if (Args) {
4711           llvm::dbgs() << "(";
4712           for (const auto &Arg : Args.value())
4713             for (const auto &T : Arg)
4714               llvm::dbgs() << T->TokenText << " ";
4715           llvm::dbgs() << ")";
4716         }
4717         llvm::dbgs() << "\n";
4718       });
4719       if (Macros.objectLike(ID->TokenText) && Args &&
4720           !Macros.hasArity(ID->TokenText, Args->size())) {
4721         // The macro is either
4722         // - object-like, but we got argumnets, or
4723         // - overloaded to be both object-like and function-like, but none of
4724         //   the function-like arities match the number of arguments.
4725         // Thus, expand as object-like macro.
4726         LLVM_DEBUG(llvm::dbgs()
4727                    << "Macro \"" << ID->TokenText
4728                    << "\" not overloaded for arity " << Args->size()
4729                    << "or not function-like, using object-like overload.");
4730         Args.reset();
4731         UnexpandedLine->Tokens.resize(1);
4732         Tokens->setPosition(Position);
4733         nextToken();
4734         assert(!Args && Macros.objectLike(ID->TokenText));
4735       }
4736       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4737           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4738         // Next, we insert the expanded tokens in the token stream at the
4739         // current position, and continue parsing.
4740         Unexpanded[ID] = std::move(UnexpandedLine);
4741         SmallVector<FormatToken *, 8> Expansion =
4742             Macros.expand(ID, std::move(Args));
4743         if (!Expansion.empty())
4744           FormatTok = Tokens->insertTokens(Expansion);
4745 
4746         LLVM_DEBUG({
4747           llvm::dbgs() << "Expanded: ";
4748           for (const auto &T : Expansion)
4749             llvm::dbgs() << T->TokenText << " ";
4750           llvm::dbgs() << "\n";
4751         });
4752       } else {
4753         LLVM_DEBUG({
4754           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4755                        << "\", because it was used ";
4756           if (Args)
4757             llvm::dbgs() << "with " << Args->size();
4758           else
4759             llvm::dbgs() << "without";
4760           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4761         });
4762         Tokens->setPosition(Position);
4763         FormatTok = ID;
4764       }
4765     }
4766 
4767     if (!FormatTok->is(tok::comment)) {
4768       distributeComments(Comments, FormatTok);
4769       Comments.clear();
4770       return;
4771     }
4772 
4773     Comments.push_back(FormatTok);
4774   } while (!eof());
4775 
4776   distributeComments(Comments, nullptr);
4777   Comments.clear();
4778 }
4779 
4780 namespace {
4781 template <typename Iterator>
4782 void pushTokens(Iterator Begin, Iterator End,
4783                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4784   for (auto I = Begin; I != End; ++I) {
4785     Into.push_back(I->Tok);
4786     for (const auto &Child : I->Children)
4787       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4788   }
4789 }
4790 } // namespace
4791 
4792 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4793 UnwrappedLineParser::parseMacroCall() {
4794   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4795   assert(Line->Tokens.empty());
4796   nextToken();
4797   if (!FormatTok->is(tok::l_paren))
4798     return Args;
4799   unsigned Position = Tokens->getPosition();
4800   FormatToken *Tok = FormatTok;
4801   nextToken();
4802   Args.emplace();
4803   auto ArgStart = std::prev(Line->Tokens.end());
4804 
4805   int Parens = 0;
4806   do {
4807     switch (FormatTok->Tok.getKind()) {
4808     case tok::l_paren:
4809       ++Parens;
4810       nextToken();
4811       break;
4812     case tok::r_paren: {
4813       if (Parens > 0) {
4814         --Parens;
4815         nextToken();
4816         break;
4817       }
4818       Args->push_back({});
4819       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4820       nextToken();
4821       return Args;
4822     }
4823     case tok::comma: {
4824       if (Parens > 0) {
4825         nextToken();
4826         break;
4827       }
4828       Args->push_back({});
4829       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4830       nextToken();
4831       ArgStart = std::prev(Line->Tokens.end());
4832       break;
4833     }
4834     default:
4835       nextToken();
4836       break;
4837     }
4838   } while (!eof());
4839   Line->Tokens.resize(1);
4840   Tokens->setPosition(Position);
4841   FormatTok = Tok;
4842   return {};
4843 }
4844 
4845 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4846   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4847   if (MustBreakBeforeNextToken) {
4848     Line->Tokens.back().Tok->MustBreakBefore = true;
4849     MustBreakBeforeNextToken = false;
4850   }
4851 }
4852 
4853 } // end namespace format
4854 } // end namespace clang
4855