xref: /freebsd/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <algorithm>
29 #include <utility>
30 
31 #define DEBUG_TYPE "format-parser"
32 
33 namespace clang {
34 namespace format {
35 
36 namespace {
37 
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39                StringRef Prefix = "", bool PrintText = false) {
40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42   bool NewLine = false;
43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                     E = Line.Tokens.end();
45        I != E; ++I) {
46     if (NewLine) {
47       OS << Prefix;
48       NewLine = false;
49     }
50     OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52        << "\"] ";
53     for (SmallVectorImpl<UnwrappedLine>::const_iterator
54              CI = I->Children.begin(),
55              CE = I->Children.end();
56          CI != CE; ++CI) {
57       OS << "\n";
58       printLine(OS, *CI, (Prefix + "  ").str());
59       NewLine = true;
60     }
61   }
62   if (!NewLine)
63     OS << "\n";
64 }
65 
66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67   printLine(llvm::dbgs(), Line);
68 }
69 
70 class ScopedDeclarationState {
71 public:
72   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73                          bool MustBeDeclaration)
74       : Line(Line), Stack(Stack) {
75     Line.MustBeDeclaration = MustBeDeclaration;
76     Stack.push_back(MustBeDeclaration);
77   }
78   ~ScopedDeclarationState() {
79     Stack.pop_back();
80     if (!Stack.empty())
81       Line.MustBeDeclaration = Stack.back();
82     else
83       Line.MustBeDeclaration = true;
84   }
85 
86 private:
87   UnwrappedLine &Line;
88   llvm::BitVector &Stack;
89 };
90 
91 } // end anonymous namespace
92 
93 class ScopedLineState {
94 public:
95   ScopedLineState(UnwrappedLineParser &Parser,
96                   bool SwitchToPreprocessorLines = false)
97       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
98     if (SwitchToPreprocessorLines)
99       Parser.CurrentLines = &Parser.PreprocessorDirectives;
100     else if (!Parser.Line->Tokens.empty())
101       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
102     PreBlockLine = std::move(Parser.Line);
103     Parser.Line = std::make_unique<UnwrappedLine>();
104     Parser.Line->Level = PreBlockLine->Level;
105     Parser.Line->PPLevel = PreBlockLine->PPLevel;
106     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
107     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
108   }
109 
110   ~ScopedLineState() {
111     if (!Parser.Line->Tokens.empty())
112       Parser.addUnwrappedLine();
113     assert(Parser.Line->Tokens.empty());
114     Parser.Line = std::move(PreBlockLine);
115     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
116       Parser.MustBreakBeforeNextToken = true;
117     Parser.CurrentLines = OriginalLines;
118   }
119 
120 private:
121   UnwrappedLineParser &Parser;
122 
123   std::unique_ptr<UnwrappedLine> PreBlockLine;
124   SmallVectorImpl<UnwrappedLine> *OriginalLines;
125 };
126 
127 class CompoundStatementIndenter {
128 public:
129   CompoundStatementIndenter(UnwrappedLineParser *Parser,
130                             const FormatStyle &Style, unsigned &LineLevel)
131       : CompoundStatementIndenter(Parser, LineLevel,
132                                   Style.BraceWrapping.AfterControlStatement,
133                                   Style.BraceWrapping.IndentBraces) {}
134   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
135                             bool WrapBrace, bool IndentBrace)
136       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
137     if (WrapBrace)
138       Parser->addUnwrappedLine();
139     if (IndentBrace)
140       ++LineLevel;
141   }
142   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
143 
144 private:
145   unsigned &LineLevel;
146   unsigned OldLineLevel;
147 };
148 
149 UnwrappedLineParser::UnwrappedLineParser(
150     SourceManager &SourceMgr, const FormatStyle &Style,
151     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
152     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
153     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
154     IdentifierTable &IdentTable)
155     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
156       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
157       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
158       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
159       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
160                        ? IG_Rejected
161                        : IG_Inited),
162       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
163       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
164 
165 void UnwrappedLineParser::reset() {
166   PPBranchLevel = -1;
167   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
168                      ? IG_Rejected
169                      : IG_Inited;
170   IncludeGuardToken = nullptr;
171   Line.reset(new UnwrappedLine);
172   CommentsBeforeNextToken.clear();
173   FormatTok = nullptr;
174   MustBreakBeforeNextToken = false;
175   IsDecltypeAutoFunction = false;
176   PreprocessorDirectives.clear();
177   CurrentLines = &Lines;
178   DeclarationScopeStack.clear();
179   NestedTooDeep.clear();
180   NestedLambdas.clear();
181   PPStack.clear();
182   Line->FirstStartColumn = FirstStartColumn;
183 
184   if (!Unexpanded.empty())
185     for (FormatToken *Token : AllTokens)
186       Token->MacroCtx.reset();
187   CurrentExpandedLines.clear();
188   ExpandedLines.clear();
189   Unexpanded.clear();
190   InExpansion = false;
191   Reconstruct.reset();
192 }
193 
194 void UnwrappedLineParser::parse() {
195   IndexedTokenSource TokenSource(AllTokens);
196   Line->FirstStartColumn = FirstStartColumn;
197   do {
198     LLVM_DEBUG(llvm::dbgs() << "----\n");
199     reset();
200     Tokens = &TokenSource;
201     TokenSource.reset();
202 
203     readToken();
204     parseFile();
205 
206     // If we found an include guard then all preprocessor directives (other than
207     // the guard) are over-indented by one.
208     if (IncludeGuard == IG_Found) {
209       for (auto &Line : Lines)
210         if (Line.InPPDirective && Line.Level > 0)
211           --Line.Level;
212     }
213 
214     // Create line with eof token.
215     assert(eof());
216     pushToken(FormatTok);
217     addUnwrappedLine();
218 
219     // In a first run, format everything with the lines containing macro calls
220     // replaced by the expansion.
221     if (!ExpandedLines.empty()) {
222       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223       for (const auto &Line : Lines) {
224         if (!Line.Tokens.empty()) {
225           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
226           if (it != ExpandedLines.end()) {
227             for (const auto &Expanded : it->second) {
228               LLVM_DEBUG(printDebugInfo(Expanded));
229               Callback.consumeUnwrappedLine(Expanded);
230             }
231             continue;
232           }
233         }
234         LLVM_DEBUG(printDebugInfo(Line));
235         Callback.consumeUnwrappedLine(Line);
236       }
237       Callback.finishRun();
238     }
239 
240     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241     for (const UnwrappedLine &Line : Lines) {
242       LLVM_DEBUG(printDebugInfo(Line));
243       Callback.consumeUnwrappedLine(Line);
244     }
245     Callback.finishRun();
246     Lines.clear();
247     while (!PPLevelBranchIndex.empty() &&
248            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
249       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
250       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
251     }
252     if (!PPLevelBranchIndex.empty()) {
253       ++PPLevelBranchIndex.back();
254       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
255       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
256     }
257   } while (!PPLevelBranchIndex.empty());
258 }
259 
260 void UnwrappedLineParser::parseFile() {
261   // The top-level context in a file always has declarations, except for pre-
262   // processor directives and JavaScript files.
263   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
264   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
265                                           MustBeDeclaration);
266   if (Style.Language == FormatStyle::LK_TextProto)
267     parseBracedList();
268   else
269     parseLevel();
270   // Make sure to format the remaining tokens.
271   //
272   // LK_TextProto is special since its top-level is parsed as the body of a
273   // braced list, which does not necessarily have natural line separators such
274   // as a semicolon. Comments after the last entry that have been determined to
275   // not belong to that line, as in:
276   //   key: value
277   //   // endfile comment
278   // do not have a chance to be put on a line of their own until this point.
279   // Here we add this newline before end-of-file comments.
280   if (Style.Language == FormatStyle::LK_TextProto &&
281       !CommentsBeforeNextToken.empty()) {
282     addUnwrappedLine();
283   }
284   flushComments(true);
285   addUnwrappedLine();
286 }
287 
288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289   do {
290     switch (FormatTok->Tok.getKind()) {
291     case tok::l_brace:
292       return;
293     default:
294       if (FormatTok->is(Keywords.kw_where)) {
295         addUnwrappedLine();
296         nextToken();
297         parseCSharpGenericTypeConstraint();
298         break;
299       }
300       nextToken();
301       break;
302     }
303   } while (!eof());
304 }
305 
306 void UnwrappedLineParser::parseCSharpAttribute() {
307   int UnpairedSquareBrackets = 1;
308   do {
309     switch (FormatTok->Tok.getKind()) {
310     case tok::r_square:
311       nextToken();
312       --UnpairedSquareBrackets;
313       if (UnpairedSquareBrackets == 0) {
314         addUnwrappedLine();
315         return;
316       }
317       break;
318     case tok::l_square:
319       ++UnpairedSquareBrackets;
320       nextToken();
321       break;
322     default:
323       nextToken();
324       break;
325     }
326   } while (!eof());
327 }
328 
329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330   if (!Lines.empty() && Lines.back().InPPDirective)
331     return true;
332 
333   const FormatToken *Previous = Tokens->getPreviousToken();
334   return Previous && Previous->is(tok::comment) &&
335          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
336 }
337 
338 /// \brief Parses a level, that is ???.
339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340 /// \param IfKind The \p if statement kind in the level.
341 /// \param IfLeftBrace The left brace of the \p if block in the level.
342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
343 /// (A simple block has a single statement.)
344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
345                                      IfStmtKind *IfKind,
346                                      FormatToken **IfLeftBrace) {
347   const bool InRequiresExpression =
348       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
349   const bool IsPrecededByCommentOrPPDirective =
350       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
351   FormatToken *IfLBrace = nullptr;
352   bool HasDoWhile = false;
353   bool HasLabel = false;
354   unsigned StatementCount = 0;
355   bool SwitchLabelEncountered = false;
356 
357   do {
358     if (FormatTok->isAttribute()) {
359       nextToken();
360       continue;
361     }
362     tok::TokenKind kind = FormatTok->Tok.getKind();
363     if (FormatTok->getType() == TT_MacroBlockBegin)
364       kind = tok::l_brace;
365     else if (FormatTok->getType() == TT_MacroBlockEnd)
366       kind = tok::r_brace;
367 
368     auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
369                          &HasLabel, &StatementCount] {
370       parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
371                              HasDoWhile ? nullptr : &HasDoWhile,
372                              HasLabel ? nullptr : &HasLabel);
373       ++StatementCount;
374       assert(StatementCount > 0 && "StatementCount overflow!");
375     };
376 
377     switch (kind) {
378     case tok::comment:
379       nextToken();
380       addUnwrappedLine();
381       break;
382     case tok::l_brace:
383       if (InRequiresExpression) {
384         FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
385       } else if (FormatTok->Previous &&
386                  FormatTok->Previous->ClosesRequiresClause) {
387         // We need the 'default' case here to correctly parse a function
388         // l_brace.
389         ParseDefault();
390         continue;
391       }
392       if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
393           tryToParseBracedList()) {
394         continue;
395       }
396       parseBlock();
397       ++StatementCount;
398       assert(StatementCount > 0 && "StatementCount overflow!");
399       addUnwrappedLine();
400       break;
401     case tok::r_brace:
402       if (OpeningBrace) {
403         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
404             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
405           return false;
406         }
407         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
408             HasDoWhile || IsPrecededByCommentOrPPDirective ||
409             precededByCommentOrPPDirective()) {
410           return false;
411         }
412         const FormatToken *Next = Tokens->peekNextToken();
413         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
414           return false;
415         if (IfLeftBrace)
416           *IfLeftBrace = IfLBrace;
417         return true;
418       }
419       nextToken();
420       addUnwrappedLine();
421       break;
422     case tok::kw_default: {
423       unsigned StoredPosition = Tokens->getPosition();
424       FormatToken *Next;
425       do {
426         Next = Tokens->getNextToken();
427         assert(Next);
428       } while (Next->is(tok::comment));
429       FormatTok = Tokens->setPosition(StoredPosition);
430       if (Next->isNot(tok::colon)) {
431         // default not followed by ':' is not a case label; treat it like
432         // an identifier.
433         parseStructuralElement();
434         break;
435       }
436       // Else, if it is 'default:', fall through to the case handling.
437       [[fallthrough]];
438     }
439     case tok::kw_case:
440       if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441           (Style.isJavaScript() && Line->MustBeDeclaration)) {
442         // Proto: there are no switch/case statements
443         // Verilog: Case labels don't have this word. We handle case
444         // labels including default in TokenAnnotator.
445         // JavaScript: A 'case: string' style field declaration.
446         ParseDefault();
447         break;
448       }
449       if (!SwitchLabelEncountered &&
450           (Style.IndentCaseLabels ||
451            (Line->InPPDirective && Line->Level == 1))) {
452         ++Line->Level;
453       }
454       SwitchLabelEncountered = true;
455       parseStructuralElement();
456       break;
457     case tok::l_square:
458       if (Style.isCSharp()) {
459         nextToken();
460         parseCSharpAttribute();
461         break;
462       }
463       if (handleCppAttributes())
464         break;
465       [[fallthrough]];
466     default:
467       ParseDefault();
468       break;
469     }
470   } while (!eof());
471 
472   return false;
473 }
474 
475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
476   // We'll parse forward through the tokens until we hit
477   // a closing brace or eof - note that getNextToken() will
478   // parse macros, so this will magically work inside macro
479   // definitions, too.
480   unsigned StoredPosition = Tokens->getPosition();
481   FormatToken *Tok = FormatTok;
482   const FormatToken *PrevTok = Tok->Previous;
483   // Keep a stack of positions of lbrace tokens. We will
484   // update information about whether an lbrace starts a
485   // braced init list or a different block during the loop.
486   struct StackEntry {
487     FormatToken *Tok;
488     const FormatToken *PrevTok;
489   };
490   SmallVector<StackEntry, 8> LBraceStack;
491   assert(Tok->is(tok::l_brace));
492   do {
493     // Get next non-comment, non-preprocessor token.
494     FormatToken *NextTok;
495     do {
496       NextTok = Tokens->getNextToken();
497     } while (NextTok->is(tok::comment));
498     while (NextTok->is(tok::hash) && !Line->InMacroBody) {
499       NextTok = Tokens->getNextToken();
500       do {
501         NextTok = Tokens->getNextToken();
502       } while (NextTok->is(tok::comment) ||
503                (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
504     }
505 
506     switch (Tok->Tok.getKind()) {
507     case tok::l_brace:
508       if (Style.isJavaScript() && PrevTok) {
509         if (PrevTok->isOneOf(tok::colon, tok::less)) {
510           // A ':' indicates this code is in a type, or a braced list
511           // following a label in an object literal ({a: {b: 1}}).
512           // A '<' could be an object used in a comparison, but that is nonsense
513           // code (can never return true), so more likely it is a generic type
514           // argument (`X<{a: string; b: number}>`).
515           // The code below could be confused by semicolons between the
516           // individual members in a type member list, which would normally
517           // trigger BK_Block. In both cases, this must be parsed as an inline
518           // braced init.
519           Tok->setBlockKind(BK_BracedInit);
520         } else if (PrevTok->is(tok::r_paren)) {
521           // `) { }` can only occur in function or method declarations in JS.
522           Tok->setBlockKind(BK_Block);
523         }
524       } else {
525         Tok->setBlockKind(BK_Unknown);
526       }
527       LBraceStack.push_back({Tok, PrevTok});
528       break;
529     case tok::r_brace:
530       if (LBraceStack.empty())
531         break;
532       if (LBraceStack.back().Tok->is(BK_Unknown)) {
533         bool ProbablyBracedList = false;
534         if (Style.Language == FormatStyle::LK_Proto) {
535           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
536         } else {
537           // Skip NextTok over preprocessor lines, otherwise we may not
538           // properly diagnose the block as a braced intializer
539           // if the comma separator appears after the pp directive.
540           while (NextTok->is(tok::hash)) {
541             ScopedMacroState MacroState(*Line, Tokens, NextTok);
542             do {
543               NextTok = Tokens->getNextToken();
544             } while (NextTok->isNot(tok::eof));
545           }
546 
547           // Using OriginalColumn to distinguish between ObjC methods and
548           // binary operators is a bit hacky.
549           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
550                                   NextTok->OriginalColumn == 0;
551 
552           // Try to detect a braced list. Note that regardless how we mark inner
553           // braces here, we will overwrite the BlockKind later if we parse a
554           // braced list (where all blocks inside are by default braced lists),
555           // or when we explicitly detect blocks (for example while parsing
556           // lambdas).
557 
558           // If we already marked the opening brace as braced list, the closing
559           // must also be part of it.
560           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
561 
562           ProbablyBracedList = ProbablyBracedList ||
563                                (Style.isJavaScript() &&
564                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
565                                                  Keywords.kw_as));
566           ProbablyBracedList = ProbablyBracedList ||
567                                (Style.isCpp() && NextTok->is(tok::l_paren));
568 
569           // If there is a comma, semicolon or right paren after the closing
570           // brace, we assume this is a braced initializer list.
571           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572           // braced list in JS.
573           ProbablyBracedList =
574               ProbablyBracedList ||
575               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
576                                tok::r_paren, tok::r_square, tok::ellipsis);
577 
578           // Distinguish between braced list in a constructor initializer list
579           // followed by constructor body, or just adjacent blocks.
580           ProbablyBracedList =
581               ProbablyBracedList ||
582               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
583                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
584                                                    tok::greater));
585 
586           ProbablyBracedList =
587               ProbablyBracedList ||
588               (NextTok->is(tok::identifier) &&
589                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
590 
591           ProbablyBracedList = ProbablyBracedList ||
592                                (NextTok->is(tok::semi) &&
593                                 (!ExpectClassBody || LBraceStack.size() != 1));
594 
595           ProbablyBracedList =
596               ProbablyBracedList ||
597               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
598 
599           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
600             // We can have an array subscript after a braced init
601             // list, but C++11 attributes are expected after blocks.
602             NextTok = Tokens->getNextToken();
603             ProbablyBracedList = NextTok->isNot(tok::l_square);
604           }
605         }
606         if (ProbablyBracedList) {
607           Tok->setBlockKind(BK_BracedInit);
608           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
609         } else {
610           Tok->setBlockKind(BK_Block);
611           LBraceStack.back().Tok->setBlockKind(BK_Block);
612         }
613       }
614       LBraceStack.pop_back();
615       break;
616     case tok::identifier:
617       if (Tok->isNot(TT_StatementMacro))
618         break;
619       [[fallthrough]];
620     case tok::at:
621     case tok::semi:
622     case tok::kw_if:
623     case tok::kw_while:
624     case tok::kw_for:
625     case tok::kw_switch:
626     case tok::kw_try:
627     case tok::kw___try:
628       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
629         LBraceStack.back().Tok->setBlockKind(BK_Block);
630       break;
631     default:
632       break;
633     }
634     PrevTok = Tok;
635     Tok = NextTok;
636   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
637 
638   // Assume other blocks for all unclosed opening braces.
639   for (const auto &Entry : LBraceStack)
640     if (Entry.Tok->is(BK_Unknown))
641       Entry.Tok->setBlockKind(BK_Block);
642 
643   FormatTok = Tokens->setPosition(StoredPosition);
644 }
645 
646 // Sets the token type of the directly previous right brace.
647 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
648   if (auto Prev = FormatTok->getPreviousNonComment();
649       Prev && Prev->is(tok::r_brace)) {
650     Prev->setFinalizedType(Type);
651   }
652 }
653 
654 template <class T>
655 static inline void hash_combine(std::size_t &seed, const T &v) {
656   std::hash<T> hasher;
657   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
658 }
659 
660 size_t UnwrappedLineParser::computePPHash() const {
661   size_t h = 0;
662   for (const auto &i : PPStack) {
663     hash_combine(h, size_t(i.Kind));
664     hash_combine(h, i.Line);
665   }
666   return h;
667 }
668 
669 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
670 // is not null, subtracts its length (plus the preceding space) when computing
671 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
672 // running the token annotator on it so that we can restore them afterward.
673 bool UnwrappedLineParser::mightFitOnOneLine(
674     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
675   const auto ColumnLimit = Style.ColumnLimit;
676   if (ColumnLimit == 0)
677     return true;
678 
679   auto &Tokens = ParsedLine.Tokens;
680   assert(!Tokens.empty());
681 
682   const auto *LastToken = Tokens.back().Tok;
683   assert(LastToken);
684 
685   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
686 
687   int Index = 0;
688   for (const auto &Token : Tokens) {
689     assert(Token.Tok);
690     auto &SavedToken = SavedTokens[Index++];
691     SavedToken.Tok = new FormatToken;
692     SavedToken.Tok->copyFrom(*Token.Tok);
693     SavedToken.Children = std::move(Token.Children);
694   }
695 
696   AnnotatedLine Line(ParsedLine);
697   assert(Line.Last == LastToken);
698 
699   TokenAnnotator Annotator(Style, Keywords);
700   Annotator.annotate(Line);
701   Annotator.calculateFormattingInformation(Line);
702 
703   auto Length = LastToken->TotalLength;
704   if (OpeningBrace) {
705     assert(OpeningBrace != Tokens.front().Tok);
706     if (auto Prev = OpeningBrace->Previous;
707         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
708       Length -= ColumnLimit;
709     }
710     Length -= OpeningBrace->TokenText.size() + 1;
711   }
712 
713   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
714     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
715     Length -= FirstToken->TokenText.size() + 1;
716   }
717 
718   Index = 0;
719   for (auto &Token : Tokens) {
720     const auto &SavedToken = SavedTokens[Index++];
721     Token.Tok->copyFrom(*SavedToken.Tok);
722     Token.Children = std::move(SavedToken.Children);
723     delete SavedToken.Tok;
724   }
725 
726   // If these change PPLevel needs to be used for get correct indentation.
727   assert(!Line.InMacroBody);
728   assert(!Line.InPPDirective);
729   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
730 }
731 
732 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
733                                              unsigned AddLevels, bool MunchSemi,
734                                              bool KeepBraces,
735                                              IfStmtKind *IfKind,
736                                              bool UnindentWhitesmithsBraces) {
737   auto HandleVerilogBlockLabel = [this]() {
738     // ":" name
739     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
740       nextToken();
741       if (Keywords.isVerilogIdentifier(*FormatTok))
742         nextToken();
743     }
744   };
745 
746   // Whether this is a Verilog-specific block that has a special header like a
747   // module.
748   const bool VerilogHierarchy =
749       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
750   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
751           (Style.isVerilog() &&
752            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
753          "'{' or macro block token expected");
754   FormatToken *Tok = FormatTok;
755   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
756   auto Index = CurrentLines->size();
757   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
758   FormatTok->setBlockKind(BK_Block);
759 
760   // For Whitesmiths mode, jump to the next level prior to skipping over the
761   // braces.
762   if (!VerilogHierarchy && AddLevels > 0 &&
763       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
764     ++Line->Level;
765   }
766 
767   size_t PPStartHash = computePPHash();
768 
769   const unsigned InitialLevel = Line->Level;
770   if (VerilogHierarchy) {
771     AddLevels += parseVerilogHierarchyHeader();
772   } else {
773     nextToken(/*LevelDifference=*/AddLevels);
774     HandleVerilogBlockLabel();
775   }
776 
777   // Bail out if there are too many levels. Otherwise, the stack might overflow.
778   if (Line->Level > 300)
779     return nullptr;
780 
781   if (MacroBlock && FormatTok->is(tok::l_paren))
782     parseParens();
783 
784   size_t NbPreprocessorDirectives =
785       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
786   addUnwrappedLine();
787   size_t OpeningLineIndex =
788       CurrentLines->empty()
789           ? (UnwrappedLine::kInvalidIndex)
790           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
791 
792   // Whitesmiths is weird here. The brace needs to be indented for the namespace
793   // block, but the block itself may not be indented depending on the style
794   // settings. This allows the format to back up one level in those cases.
795   if (UnindentWhitesmithsBraces)
796     --Line->Level;
797 
798   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
799                                           MustBeDeclaration);
800   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
801     Line->Level += AddLevels;
802 
803   FormatToken *IfLBrace = nullptr;
804   const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
805 
806   if (eof())
807     return IfLBrace;
808 
809   if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
810                  : FormatTok->isNot(tok::r_brace)) {
811     Line->Level = InitialLevel;
812     FormatTok->setBlockKind(BK_Block);
813     return IfLBrace;
814   }
815 
816   if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
817     FormatTok->setFinalizedType(TT_NamespaceRBrace);
818 
819   const bool IsFunctionRBrace =
820       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
821 
822   auto RemoveBraces = [=]() mutable {
823     if (!SimpleBlock)
824       return false;
825     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
826     assert(FormatTok->is(tok::r_brace));
827     const bool WrappedOpeningBrace = !Tok->Previous;
828     if (WrappedOpeningBrace && FollowedByComment)
829       return false;
830     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
831     if (KeepBraces && !HasRequiredIfBraces)
832       return false;
833     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
834       const FormatToken *Previous = Tokens->getPreviousToken();
835       assert(Previous);
836       if (Previous->is(tok::r_brace) && !Previous->Optional)
837         return false;
838     }
839     assert(!CurrentLines->empty());
840     auto &LastLine = CurrentLines->back();
841     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
842       return false;
843     if (Tok->is(TT_ElseLBrace))
844       return true;
845     if (WrappedOpeningBrace) {
846       assert(Index > 0);
847       --Index; // The line above the wrapped l_brace.
848       Tok = nullptr;
849     }
850     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
851   };
852   if (RemoveBraces()) {
853     Tok->MatchingParen = FormatTok;
854     FormatTok->MatchingParen = Tok;
855   }
856 
857   size_t PPEndHash = computePPHash();
858 
859   // Munch the closing brace.
860   nextToken(/*LevelDifference=*/-AddLevels);
861 
862   // When this is a function block and there is an unnecessary semicolon
863   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
864   // it later).
865   if (Style.RemoveSemicolon && IsFunctionRBrace) {
866     while (FormatTok->is(tok::semi)) {
867       FormatTok->Optional = true;
868       nextToken();
869     }
870   }
871 
872   HandleVerilogBlockLabel();
873 
874   if (MacroBlock && FormatTok->is(tok::l_paren))
875     parseParens();
876 
877   Line->Level = InitialLevel;
878 
879   if (FormatTok->is(tok::kw_noexcept)) {
880     // A noexcept in a requires expression.
881     nextToken();
882   }
883 
884   if (FormatTok->is(tok::arrow)) {
885     // Following the } or noexcept we can find a trailing return type arrow
886     // as part of an implicit conversion constraint.
887     nextToken();
888     parseStructuralElement();
889   }
890 
891   if (MunchSemi && FormatTok->is(tok::semi))
892     nextToken();
893 
894   if (PPStartHash == PPEndHash) {
895     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
896     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
897       // Update the opening line to add the forward reference as well
898       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
899           CurrentLines->size() - 1;
900     }
901   }
902 
903   return IfLBrace;
904 }
905 
906 static bool isGoogScope(const UnwrappedLine &Line) {
907   // FIXME: Closure-library specific stuff should not be hard-coded but be
908   // configurable.
909   if (Line.Tokens.size() < 4)
910     return false;
911   auto I = Line.Tokens.begin();
912   if (I->Tok->TokenText != "goog")
913     return false;
914   ++I;
915   if (I->Tok->isNot(tok::period))
916     return false;
917   ++I;
918   if (I->Tok->TokenText != "scope")
919     return false;
920   ++I;
921   return I->Tok->is(tok::l_paren);
922 }
923 
924 static bool isIIFE(const UnwrappedLine &Line,
925                    const AdditionalKeywords &Keywords) {
926   // Look for the start of an immediately invoked anonymous function.
927   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
928   // This is commonly done in JavaScript to create a new, anonymous scope.
929   // Example: (function() { ... })()
930   if (Line.Tokens.size() < 3)
931     return false;
932   auto I = Line.Tokens.begin();
933   if (I->Tok->isNot(tok::l_paren))
934     return false;
935   ++I;
936   if (I->Tok->isNot(Keywords.kw_function))
937     return false;
938   ++I;
939   return I->Tok->is(tok::l_paren);
940 }
941 
942 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
943                                    const FormatToken &InitialToken) {
944   tok::TokenKind Kind = InitialToken.Tok.getKind();
945   if (InitialToken.is(TT_NamespaceMacro))
946     Kind = tok::kw_namespace;
947 
948   switch (Kind) {
949   case tok::kw_namespace:
950     return Style.BraceWrapping.AfterNamespace;
951   case tok::kw_class:
952     return Style.BraceWrapping.AfterClass;
953   case tok::kw_union:
954     return Style.BraceWrapping.AfterUnion;
955   case tok::kw_struct:
956     return Style.BraceWrapping.AfterStruct;
957   case tok::kw_enum:
958     return Style.BraceWrapping.AfterEnum;
959   default:
960     return false;
961   }
962 }
963 
964 void UnwrappedLineParser::parseChildBlock() {
965   assert(FormatTok->is(tok::l_brace));
966   FormatTok->setBlockKind(BK_Block);
967   const FormatToken *OpeningBrace = FormatTok;
968   nextToken();
969   {
970     bool SkipIndent = (Style.isJavaScript() &&
971                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
972     ScopedLineState LineState(*this);
973     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
974                                             /*MustBeDeclaration=*/false);
975     Line->Level += SkipIndent ? 0 : 1;
976     parseLevel(OpeningBrace);
977     flushComments(isOnNewLine(*FormatTok));
978     Line->Level -= SkipIndent ? 0 : 1;
979   }
980   nextToken();
981 }
982 
983 void UnwrappedLineParser::parsePPDirective() {
984   assert(FormatTok->is(tok::hash) && "'#' expected");
985   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
986 
987   nextToken();
988 
989   if (!FormatTok->Tok.getIdentifierInfo()) {
990     parsePPUnknown();
991     return;
992   }
993 
994   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
995   case tok::pp_define:
996     parsePPDefine();
997     return;
998   case tok::pp_if:
999     parsePPIf(/*IfDef=*/false);
1000     break;
1001   case tok::pp_ifdef:
1002   case tok::pp_ifndef:
1003     parsePPIf(/*IfDef=*/true);
1004     break;
1005   case tok::pp_else:
1006   case tok::pp_elifdef:
1007   case tok::pp_elifndef:
1008   case tok::pp_elif:
1009     parsePPElse();
1010     break;
1011   case tok::pp_endif:
1012     parsePPEndIf();
1013     break;
1014   case tok::pp_pragma:
1015     parsePPPragma();
1016     break;
1017   default:
1018     parsePPUnknown();
1019     break;
1020   }
1021 }
1022 
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1024   size_t Line = CurrentLines->size();
1025   if (CurrentLines == &PreprocessorDirectives)
1026     Line += Lines.size();
1027 
1028   if (Unreachable ||
1029       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1030     PPStack.push_back({PP_Unreachable, Line});
1031   } else {
1032     PPStack.push_back({PP_Conditional, Line});
1033   }
1034 }
1035 
1036 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1037   ++PPBranchLevel;
1038   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1039   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1040     PPLevelBranchIndex.push_back(0);
1041     PPLevelBranchCount.push_back(0);
1042   }
1043   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1044   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1045   conditionalCompilationCondition(Unreachable || Skip);
1046 }
1047 
1048 void UnwrappedLineParser::conditionalCompilationAlternative() {
1049   if (!PPStack.empty())
1050     PPStack.pop_back();
1051   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1052   if (!PPChainBranchIndex.empty())
1053     ++PPChainBranchIndex.top();
1054   conditionalCompilationCondition(
1055       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1056       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1057 }
1058 
1059 void UnwrappedLineParser::conditionalCompilationEnd() {
1060   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1061   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1062     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1063       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1064   }
1065   // Guard against #endif's without #if.
1066   if (PPBranchLevel > -1)
1067     --PPBranchLevel;
1068   if (!PPChainBranchIndex.empty())
1069     PPChainBranchIndex.pop();
1070   if (!PPStack.empty())
1071     PPStack.pop_back();
1072 }
1073 
1074 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1075   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1076   nextToken();
1077   bool Unreachable = false;
1078   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1079     Unreachable = true;
1080   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1081     Unreachable = true;
1082   conditionalCompilationStart(Unreachable);
1083   FormatToken *IfCondition = FormatTok;
1084   // If there's a #ifndef on the first line, and the only lines before it are
1085   // comments, it could be an include guard.
1086   bool MaybeIncludeGuard = IfNDef;
1087   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1088     for (auto &Line : Lines) {
1089       if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1090         MaybeIncludeGuard = false;
1091         IncludeGuard = IG_Rejected;
1092         break;
1093       }
1094     }
1095   }
1096   --PPBranchLevel;
1097   parsePPUnknown();
1098   ++PPBranchLevel;
1099   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1100     IncludeGuard = IG_IfNdefed;
1101     IncludeGuardToken = IfCondition;
1102   }
1103 }
1104 
1105 void UnwrappedLineParser::parsePPElse() {
1106   // If a potential include guard has an #else, it's not an include guard.
1107   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1108     IncludeGuard = IG_Rejected;
1109   // Don't crash when there is an #else without an #if.
1110   assert(PPBranchLevel >= -1);
1111   if (PPBranchLevel == -1)
1112     conditionalCompilationStart(/*Unreachable=*/true);
1113   conditionalCompilationAlternative();
1114   --PPBranchLevel;
1115   parsePPUnknown();
1116   ++PPBranchLevel;
1117 }
1118 
1119 void UnwrappedLineParser::parsePPEndIf() {
1120   conditionalCompilationEnd();
1121   parsePPUnknown();
1122   // If the #endif of a potential include guard is the last thing in the file,
1123   // then we found an include guard.
1124   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1125       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1126     IncludeGuard = IG_Found;
1127   }
1128 }
1129 
1130 void UnwrappedLineParser::parsePPDefine() {
1131   nextToken();
1132 
1133   if (!FormatTok->Tok.getIdentifierInfo()) {
1134     IncludeGuard = IG_Rejected;
1135     IncludeGuardToken = nullptr;
1136     parsePPUnknown();
1137     return;
1138   }
1139 
1140   if (IncludeGuard == IG_IfNdefed &&
1141       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1142     IncludeGuard = IG_Defined;
1143     IncludeGuardToken = nullptr;
1144     for (auto &Line : Lines) {
1145       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1146         IncludeGuard = IG_Rejected;
1147         break;
1148       }
1149     }
1150   }
1151 
1152   // In the context of a define, even keywords should be treated as normal
1153   // identifiers. Setting the kind to identifier is not enough, because we need
1154   // to treat additional keywords like __except as well, which are already
1155   // identifiers. Setting the identifier info to null interferes with include
1156   // guard processing above, and changes preprocessing nesting.
1157   FormatTok->Tok.setKind(tok::identifier);
1158   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1159   nextToken();
1160   if (FormatTok->Tok.getKind() == tok::l_paren &&
1161       !FormatTok->hasWhitespaceBefore()) {
1162     parseParens();
1163   }
1164   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1165     Line->Level += PPBranchLevel + 1;
1166   addUnwrappedLine();
1167   ++Line->Level;
1168 
1169   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1170   assert((int)Line->PPLevel >= 0);
1171   Line->InMacroBody = true;
1172 
1173   if (FormatTok->is(tok::identifier) &&
1174       Tokens->peekNextToken()->is(tok::colon)) {
1175     nextToken();
1176     nextToken();
1177   }
1178 
1179   // Errors during a preprocessor directive can only affect the layout of the
1180   // preprocessor directive, and thus we ignore them. An alternative approach
1181   // would be to use the same approach we use on the file level (no
1182   // re-indentation if there was a structural error) within the macro
1183   // definition.
1184   parseFile();
1185 }
1186 
1187 void UnwrappedLineParser::parsePPPragma() {
1188   Line->InPragmaDirective = true;
1189   parsePPUnknown();
1190 }
1191 
1192 void UnwrappedLineParser::parsePPUnknown() {
1193   do {
1194     nextToken();
1195   } while (!eof());
1196   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1197     Line->Level += PPBranchLevel + 1;
1198   addUnwrappedLine();
1199 }
1200 
1201 // Here we exclude certain tokens that are not usually the first token in an
1202 // unwrapped line. This is used in attempt to distinguish macro calls without
1203 // trailing semicolons from other constructs split to several lines.
1204 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1205   // Semicolon can be a null-statement, l_square can be a start of a macro or
1206   // a C++11 attribute, but this doesn't seem to be common.
1207   assert(Tok.isNot(TT_AttributeSquare));
1208   return !Tok.isOneOf(tok::semi, tok::l_brace,
1209                       // Tokens that can only be used as binary operators and a
1210                       // part of overloaded operator names.
1211                       tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1212                       tok::less, tok::greater, tok::slash, tok::percent,
1213                       tok::lessless, tok::greatergreater, tok::equal,
1214                       tok::plusequal, tok::minusequal, tok::starequal,
1215                       tok::slashequal, tok::percentequal, tok::ampequal,
1216                       tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1217                       tok::lesslessequal,
1218                       // Colon is used in labels, base class lists, initializer
1219                       // lists, range-based for loops, ternary operator, but
1220                       // should never be the first token in an unwrapped line.
1221                       tok::colon,
1222                       // 'noexcept' is a trailing annotation.
1223                       tok::kw_noexcept);
1224 }
1225 
1226 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1227                           const FormatToken *FormatTok) {
1228   // FIXME: This returns true for C/C++ keywords like 'struct'.
1229   return FormatTok->is(tok::identifier) &&
1230          (!FormatTok->Tok.getIdentifierInfo() ||
1231           !FormatTok->isOneOf(
1232               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1233               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1234               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1235               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1236               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1237               Keywords.kw_instanceof, Keywords.kw_interface,
1238               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1239 }
1240 
1241 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1242                                  const FormatToken *FormatTok) {
1243   return FormatTok->Tok.isLiteral() ||
1244          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1245          mustBeJSIdent(Keywords, FormatTok);
1246 }
1247 
1248 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1249 // when encountered after a value (see mustBeJSIdentOrValue).
1250 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1251                            const FormatToken *FormatTok) {
1252   return FormatTok->isOneOf(
1253       tok::kw_return, Keywords.kw_yield,
1254       // conditionals
1255       tok::kw_if, tok::kw_else,
1256       // loops
1257       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1258       // switch/case
1259       tok::kw_switch, tok::kw_case,
1260       // exceptions
1261       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1262       // declaration
1263       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1264       Keywords.kw_async, Keywords.kw_function,
1265       // import/export
1266       Keywords.kw_import, tok::kw_export);
1267 }
1268 
1269 // Checks whether a token is a type in K&R C (aka C78).
1270 static bool isC78Type(const FormatToken &Tok) {
1271   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1272                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1273                      tok::identifier);
1274 }
1275 
1276 // This function checks whether a token starts the first parameter declaration
1277 // in a K&R C (aka C78) function definition, e.g.:
1278 //   int f(a, b)
1279 //   short a, b;
1280 //   {
1281 //      return a + b;
1282 //   }
1283 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1284                                const FormatToken *FuncName) {
1285   assert(Tok);
1286   assert(Next);
1287   assert(FuncName);
1288 
1289   if (FuncName->isNot(tok::identifier))
1290     return false;
1291 
1292   const FormatToken *Prev = FuncName->Previous;
1293   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1294     return false;
1295 
1296   if (!isC78Type(*Tok) &&
1297       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1298     return false;
1299   }
1300 
1301   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1302     return false;
1303 
1304   Tok = Tok->Previous;
1305   if (!Tok || Tok->isNot(tok::r_paren))
1306     return false;
1307 
1308   Tok = Tok->Previous;
1309   if (!Tok || Tok->isNot(tok::identifier))
1310     return false;
1311 
1312   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1313 }
1314 
1315 bool UnwrappedLineParser::parseModuleImport() {
1316   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1317 
1318   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1319       !Token->Tok.getIdentifierInfo() &&
1320       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1321     return false;
1322   }
1323 
1324   nextToken();
1325   while (!eof()) {
1326     if (FormatTok->is(tok::colon)) {
1327       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1328     }
1329     // Handle import <foo/bar.h> as we would an include statement.
1330     else if (FormatTok->is(tok::less)) {
1331       nextToken();
1332       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1333         // Mark tokens up to the trailing line comments as implicit string
1334         // literals.
1335         if (FormatTok->isNot(tok::comment) &&
1336             !FormatTok->TokenText.starts_with("//")) {
1337           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1338         }
1339         nextToken();
1340       }
1341     }
1342     if (FormatTok->is(tok::semi)) {
1343       nextToken();
1344       break;
1345     }
1346     nextToken();
1347   }
1348 
1349   addUnwrappedLine();
1350   return true;
1351 }
1352 
1353 // readTokenWithJavaScriptASI reads the next token and terminates the current
1354 // line if JavaScript Automatic Semicolon Insertion must
1355 // happen between the current token and the next token.
1356 //
1357 // This method is conservative - it cannot cover all edge cases of JavaScript,
1358 // but only aims to correctly handle certain well known cases. It *must not*
1359 // return true in speculative cases.
1360 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1361   FormatToken *Previous = FormatTok;
1362   readToken();
1363   FormatToken *Next = FormatTok;
1364 
1365   bool IsOnSameLine =
1366       CommentsBeforeNextToken.empty()
1367           ? Next->NewlinesBefore == 0
1368           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1369   if (IsOnSameLine)
1370     return;
1371 
1372   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1373   bool PreviousStartsTemplateExpr =
1374       Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1375   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1376     // If the line contains an '@' sign, the previous token might be an
1377     // annotation, which can precede another identifier/value.
1378     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1379       return LineNode.Tok->is(tok::at);
1380     });
1381     if (HasAt)
1382       return;
1383   }
1384   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1385     return addUnwrappedLine();
1386   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1387   bool NextEndsTemplateExpr =
1388       Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1389   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1390       (PreviousMustBeValue ||
1391        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1392                          tok::minusminus))) {
1393     return addUnwrappedLine();
1394   }
1395   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1396       isJSDeclOrStmt(Keywords, Next)) {
1397     return addUnwrappedLine();
1398   }
1399 }
1400 
1401 void UnwrappedLineParser::parseStructuralElement(
1402     const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1403     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1404   if (Style.Language == FormatStyle::LK_TableGen &&
1405       FormatTok->is(tok::pp_include)) {
1406     nextToken();
1407     if (FormatTok->is(tok::string_literal))
1408       nextToken();
1409     addUnwrappedLine();
1410     return;
1411   }
1412 
1413   if (Style.isCpp()) {
1414     while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1415     }
1416   } else if (Style.isVerilog()) {
1417     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1418       parseForOrWhileLoop(/*HasParens=*/false);
1419       return;
1420     }
1421     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1422       parseForOrWhileLoop();
1423       return;
1424     }
1425     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1426                            Keywords.kw_assume, Keywords.kw_cover)) {
1427       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1428       return;
1429     }
1430 
1431     // Skip things that can exist before keywords like 'if' and 'case'.
1432     while (true) {
1433       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1434                              Keywords.kw_unique0)) {
1435         nextToken();
1436       } else if (FormatTok->is(tok::l_paren) &&
1437                  Tokens->peekNextToken()->is(tok::star)) {
1438         parseParens();
1439       } else {
1440         break;
1441       }
1442     }
1443   }
1444 
1445   // Tokens that only make sense at the beginning of a line.
1446   switch (FormatTok->Tok.getKind()) {
1447   case tok::kw_asm:
1448     nextToken();
1449     if (FormatTok->is(tok::l_brace)) {
1450       FormatTok->setFinalizedType(TT_InlineASMBrace);
1451       nextToken();
1452       while (FormatTok && !eof()) {
1453         if (FormatTok->is(tok::r_brace)) {
1454           FormatTok->setFinalizedType(TT_InlineASMBrace);
1455           nextToken();
1456           addUnwrappedLine();
1457           break;
1458         }
1459         FormatTok->Finalized = true;
1460         nextToken();
1461       }
1462     }
1463     break;
1464   case tok::kw_namespace:
1465     parseNamespace();
1466     return;
1467   case tok::kw_public:
1468   case tok::kw_protected:
1469   case tok::kw_private:
1470     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1471         Style.isCSharp()) {
1472       nextToken();
1473     } else {
1474       parseAccessSpecifier();
1475     }
1476     return;
1477   case tok::kw_if: {
1478     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1479       // field/method declaration.
1480       break;
1481     }
1482     FormatToken *Tok = parseIfThenElse(IfKind);
1483     if (IfLeftBrace)
1484       *IfLeftBrace = Tok;
1485     return;
1486   }
1487   case tok::kw_for:
1488   case tok::kw_while:
1489     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1490       // field/method declaration.
1491       break;
1492     }
1493     parseForOrWhileLoop();
1494     return;
1495   case tok::kw_do:
1496     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497       // field/method declaration.
1498       break;
1499     }
1500     parseDoWhile();
1501     if (HasDoWhile)
1502       *HasDoWhile = true;
1503     return;
1504   case tok::kw_switch:
1505     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1506       // 'switch: string' field declaration.
1507       break;
1508     }
1509     parseSwitch();
1510     return;
1511   case tok::kw_default:
1512     // In Verilog default along with other labels are handled in the next loop.
1513     if (Style.isVerilog())
1514       break;
1515     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1516       // 'default: string' field declaration.
1517       break;
1518     }
1519     nextToken();
1520     if (FormatTok->is(tok::colon)) {
1521       FormatTok->setFinalizedType(TT_CaseLabelColon);
1522       parseLabel();
1523       return;
1524     }
1525     // e.g. "default void f() {}" in a Java interface.
1526     break;
1527   case tok::kw_case:
1528     // Proto: there are no switch/case statements.
1529     if (Style.Language == FormatStyle::LK_Proto) {
1530       nextToken();
1531       return;
1532     }
1533     if (Style.isVerilog()) {
1534       parseBlock();
1535       addUnwrappedLine();
1536       return;
1537     }
1538     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539       // 'case: string' field declaration.
1540       nextToken();
1541       break;
1542     }
1543     parseCaseLabel();
1544     return;
1545   case tok::kw_try:
1546   case tok::kw___try:
1547     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1548       // field/method declaration.
1549       break;
1550     }
1551     parseTryCatch();
1552     return;
1553   case tok::kw_extern:
1554     nextToken();
1555     if (Style.isVerilog()) {
1556       // In Verilog and extern module declaration looks like a start of module.
1557       // But there is no body and endmodule. So we handle it separately.
1558       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1559         parseVerilogHierarchyHeader();
1560         return;
1561       }
1562     } else if (FormatTok->is(tok::string_literal)) {
1563       nextToken();
1564       if (FormatTok->is(tok::l_brace)) {
1565         if (Style.BraceWrapping.AfterExternBlock)
1566           addUnwrappedLine();
1567         // Either we indent or for backwards compatibility we follow the
1568         // AfterExternBlock style.
1569         unsigned AddLevels =
1570             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1571                     (Style.BraceWrapping.AfterExternBlock &&
1572                      Style.IndentExternBlock ==
1573                          FormatStyle::IEBS_AfterExternBlock)
1574                 ? 1u
1575                 : 0u;
1576         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1577         addUnwrappedLine();
1578         return;
1579       }
1580     }
1581     break;
1582   case tok::kw_export:
1583     if (Style.isJavaScript()) {
1584       parseJavaScriptEs6ImportExport();
1585       return;
1586     }
1587     if (Style.isCpp()) {
1588       nextToken();
1589       if (FormatTok->is(tok::kw_namespace)) {
1590         parseNamespace();
1591         return;
1592       }
1593       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1594         return;
1595     }
1596     break;
1597   case tok::kw_inline:
1598     nextToken();
1599     if (FormatTok->is(tok::kw_namespace)) {
1600       parseNamespace();
1601       return;
1602     }
1603     break;
1604   case tok::identifier:
1605     if (FormatTok->is(TT_ForEachMacro)) {
1606       parseForOrWhileLoop();
1607       return;
1608     }
1609     if (FormatTok->is(TT_MacroBlockBegin)) {
1610       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1611                  /*MunchSemi=*/false);
1612       return;
1613     }
1614     if (FormatTok->is(Keywords.kw_import)) {
1615       if (Style.isJavaScript()) {
1616         parseJavaScriptEs6ImportExport();
1617         return;
1618       }
1619       if (Style.Language == FormatStyle::LK_Proto) {
1620         nextToken();
1621         if (FormatTok->is(tok::kw_public))
1622           nextToken();
1623         if (FormatTok->isNot(tok::string_literal))
1624           return;
1625         nextToken();
1626         if (FormatTok->is(tok::semi))
1627           nextToken();
1628         addUnwrappedLine();
1629         return;
1630       }
1631       if (Style.isCpp() && parseModuleImport())
1632         return;
1633     }
1634     if (Style.isCpp() &&
1635         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1636                            Keywords.kw_slots, Keywords.kw_qslots)) {
1637       nextToken();
1638       if (FormatTok->is(tok::colon)) {
1639         nextToken();
1640         addUnwrappedLine();
1641         return;
1642       }
1643     }
1644     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1645       parseStatementMacro();
1646       return;
1647     }
1648     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1649       parseNamespace();
1650       return;
1651     }
1652     // In Verilog labels can be any expression, so we don't do them here.
1653     // JS doesn't have macros, and within classes colons indicate fields, not
1654     // labels.
1655     if (!Style.isJavaScript() && !Style.isVerilog() &&
1656         Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1657       nextToken();
1658       Line->Tokens.begin()->Tok->MustBreakBefore = true;
1659       FormatTok->setFinalizedType(TT_GotoLabelColon);
1660       parseLabel(!Style.IndentGotoLabels);
1661       if (HasLabel)
1662         *HasLabel = true;
1663       return;
1664     }
1665     // In all other cases, parse the declaration.
1666     break;
1667   default:
1668     break;
1669   }
1670 
1671   const bool InRequiresExpression =
1672       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1673   do {
1674     const FormatToken *Previous = FormatTok->Previous;
1675     switch (FormatTok->Tok.getKind()) {
1676     case tok::at:
1677       nextToken();
1678       if (FormatTok->is(tok::l_brace)) {
1679         nextToken();
1680         parseBracedList();
1681         break;
1682       } else if (Style.Language == FormatStyle::LK_Java &&
1683                  FormatTok->is(Keywords.kw_interface)) {
1684         nextToken();
1685         break;
1686       }
1687       switch (FormatTok->Tok.getObjCKeywordID()) {
1688       case tok::objc_public:
1689       case tok::objc_protected:
1690       case tok::objc_package:
1691       case tok::objc_private:
1692         return parseAccessSpecifier();
1693       case tok::objc_interface:
1694       case tok::objc_implementation:
1695         return parseObjCInterfaceOrImplementation();
1696       case tok::objc_protocol:
1697         if (parseObjCProtocol())
1698           return;
1699         break;
1700       case tok::objc_end:
1701         return; // Handled by the caller.
1702       case tok::objc_optional:
1703       case tok::objc_required:
1704         nextToken();
1705         addUnwrappedLine();
1706         return;
1707       case tok::objc_autoreleasepool:
1708         nextToken();
1709         if (FormatTok->is(tok::l_brace)) {
1710           if (Style.BraceWrapping.AfterControlStatement ==
1711               FormatStyle::BWACS_Always) {
1712             addUnwrappedLine();
1713           }
1714           parseBlock();
1715         }
1716         addUnwrappedLine();
1717         return;
1718       case tok::objc_synchronized:
1719         nextToken();
1720         if (FormatTok->is(tok::l_paren)) {
1721           // Skip synchronization object
1722           parseParens();
1723         }
1724         if (FormatTok->is(tok::l_brace)) {
1725           if (Style.BraceWrapping.AfterControlStatement ==
1726               FormatStyle::BWACS_Always) {
1727             addUnwrappedLine();
1728           }
1729           parseBlock();
1730         }
1731         addUnwrappedLine();
1732         return;
1733       case tok::objc_try:
1734         // This branch isn't strictly necessary (the kw_try case below would
1735         // do this too after the tok::at is parsed above).  But be explicit.
1736         parseTryCatch();
1737         return;
1738       default:
1739         break;
1740       }
1741       break;
1742     case tok::kw_requires: {
1743       if (Style.isCpp()) {
1744         bool ParsedClause = parseRequires();
1745         if (ParsedClause)
1746           return;
1747       } else {
1748         nextToken();
1749       }
1750       break;
1751     }
1752     case tok::kw_enum:
1753       // Ignore if this is part of "template <enum ...".
1754       if (Previous && Previous->is(tok::less)) {
1755         nextToken();
1756         break;
1757       }
1758 
1759       // parseEnum falls through and does not yet add an unwrapped line as an
1760       // enum definition can start a structural element.
1761       if (!parseEnum())
1762         break;
1763       // This only applies to C++ and Verilog.
1764       if (!Style.isCpp() && !Style.isVerilog()) {
1765         addUnwrappedLine();
1766         return;
1767       }
1768       break;
1769     case tok::kw_typedef:
1770       nextToken();
1771       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1772                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1773                              Keywords.kw_CF_CLOSED_ENUM,
1774                              Keywords.kw_NS_CLOSED_ENUM)) {
1775         parseEnum();
1776       }
1777       break;
1778     case tok::kw_class:
1779       if (Style.isVerilog()) {
1780         parseBlock();
1781         addUnwrappedLine();
1782         return;
1783       }
1784       [[fallthrough]];
1785     case tok::kw_struct:
1786     case tok::kw_union:
1787       if (parseStructLike())
1788         return;
1789       break;
1790     case tok::kw_decltype:
1791       nextToken();
1792       if (FormatTok->is(tok::l_paren)) {
1793         parseParens();
1794         assert(FormatTok->Previous);
1795         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1796                                               tok::l_paren)) {
1797           Line->SeenDecltypeAuto = true;
1798         }
1799       }
1800       break;
1801     case tok::period:
1802       nextToken();
1803       // In Java, classes have an implicit static member "class".
1804       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1805           FormatTok->is(tok::kw_class)) {
1806         nextToken();
1807       }
1808       if (Style.isJavaScript() && FormatTok &&
1809           FormatTok->Tok.getIdentifierInfo()) {
1810         // JavaScript only has pseudo keywords, all keywords are allowed to
1811         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1812         nextToken();
1813       }
1814       break;
1815     case tok::semi:
1816       nextToken();
1817       addUnwrappedLine();
1818       return;
1819     case tok::r_brace:
1820       addUnwrappedLine();
1821       return;
1822     case tok::l_paren: {
1823       parseParens();
1824       // Break the unwrapped line if a K&R C function definition has a parameter
1825       // declaration.
1826       if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1827         break;
1828       if (isC78ParameterDecl(FormatTok,
1829                              Tokens->peekNextToken(/*SkipComment=*/true),
1830                              Previous)) {
1831         addUnwrappedLine();
1832         return;
1833       }
1834       break;
1835     }
1836     case tok::kw_operator:
1837       nextToken();
1838       if (FormatTok->isBinaryOperator())
1839         nextToken();
1840       break;
1841     case tok::caret:
1842       nextToken();
1843       // Block return type.
1844       if (FormatTok->Tok.isAnyIdentifier() ||
1845           FormatTok->isSimpleTypeSpecifier()) {
1846         nextToken();
1847         // Return types: pointers are ok too.
1848         while (FormatTok->is(tok::star))
1849           nextToken();
1850       }
1851       // Block argument list.
1852       if (FormatTok->is(tok::l_paren))
1853         parseParens();
1854       // Block body.
1855       if (FormatTok->is(tok::l_brace))
1856         parseChildBlock();
1857       break;
1858     case tok::l_brace:
1859       if (InRequiresExpression)
1860         FormatTok->setFinalizedType(TT_BracedListLBrace);
1861       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1862         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1863         // A block outside of parentheses must be the last part of a
1864         // structural element.
1865         // FIXME: Figure out cases where this is not true, and add projections
1866         // for them (the one we know is missing are lambdas).
1867         if (Style.Language == FormatStyle::LK_Java &&
1868             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1869           // If necessary, we could set the type to something different than
1870           // TT_FunctionLBrace.
1871           if (Style.BraceWrapping.AfterControlStatement ==
1872               FormatStyle::BWACS_Always) {
1873             addUnwrappedLine();
1874           }
1875         } else if (Style.BraceWrapping.AfterFunction) {
1876           addUnwrappedLine();
1877         }
1878         FormatTok->setFinalizedType(TT_FunctionLBrace);
1879         parseBlock();
1880         IsDecltypeAutoFunction = false;
1881         addUnwrappedLine();
1882         return;
1883       }
1884       // Otherwise this was a braced init list, and the structural
1885       // element continues.
1886       break;
1887     case tok::kw_try:
1888       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1889         // field/method declaration.
1890         nextToken();
1891         break;
1892       }
1893       // We arrive here when parsing function-try blocks.
1894       if (Style.BraceWrapping.AfterFunction)
1895         addUnwrappedLine();
1896       parseTryCatch();
1897       return;
1898     case tok::identifier: {
1899       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1900           Line->MustBeDeclaration) {
1901         addUnwrappedLine();
1902         parseCSharpGenericTypeConstraint();
1903         break;
1904       }
1905       if (FormatTok->is(TT_MacroBlockEnd)) {
1906         addUnwrappedLine();
1907         return;
1908       }
1909 
1910       // Function declarations (as opposed to function expressions) are parsed
1911       // on their own unwrapped line by continuing this loop. Function
1912       // expressions (functions that are not on their own line) must not create
1913       // a new unwrapped line, so they are special cased below.
1914       size_t TokenCount = Line->Tokens.size();
1915       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1916           (TokenCount > 1 ||
1917            (TokenCount == 1 &&
1918             Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1919         tryToParseJSFunction();
1920         break;
1921       }
1922       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1923           FormatTok->is(Keywords.kw_interface)) {
1924         if (Style.isJavaScript()) {
1925           // In JavaScript/TypeScript, "interface" can be used as a standalone
1926           // identifier, e.g. in `var interface = 1;`. If "interface" is
1927           // followed by another identifier, it is very like to be an actual
1928           // interface declaration.
1929           unsigned StoredPosition = Tokens->getPosition();
1930           FormatToken *Next = Tokens->getNextToken();
1931           FormatTok = Tokens->setPosition(StoredPosition);
1932           if (!mustBeJSIdent(Keywords, Next)) {
1933             nextToken();
1934             break;
1935           }
1936         }
1937         parseRecord();
1938         addUnwrappedLine();
1939         return;
1940       }
1941 
1942       if (Style.isVerilog()) {
1943         if (FormatTok->is(Keywords.kw_table)) {
1944           parseVerilogTable();
1945           return;
1946         }
1947         if (Keywords.isVerilogBegin(*FormatTok) ||
1948             Keywords.isVerilogHierarchy(*FormatTok)) {
1949           parseBlock();
1950           addUnwrappedLine();
1951           return;
1952         }
1953       }
1954 
1955       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1956         if (parseStructLike())
1957           return;
1958         break;
1959       }
1960 
1961       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1962         parseStatementMacro();
1963         return;
1964       }
1965 
1966       // See if the following token should start a new unwrapped line.
1967       StringRef Text = FormatTok->TokenText;
1968 
1969       FormatToken *PreviousToken = FormatTok;
1970       nextToken();
1971 
1972       // JS doesn't have macros, and within classes colons indicate fields, not
1973       // labels.
1974       if (Style.isJavaScript())
1975         break;
1976 
1977       auto OneTokenSoFar = [&]() {
1978         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1979         while (I != E && I->Tok->is(tok::comment))
1980           ++I;
1981         if (Style.isVerilog())
1982           while (I != E && I->Tok->is(tok::hash))
1983             ++I;
1984         return I != E && (++I == E);
1985       };
1986       if (OneTokenSoFar()) {
1987         // Recognize function-like macro usages without trailing semicolon as
1988         // well as free-standing macros like Q_OBJECT.
1989         bool FunctionLike = FormatTok->is(tok::l_paren);
1990         if (FunctionLike)
1991           parseParens();
1992 
1993         bool FollowedByNewline =
1994             CommentsBeforeNextToken.empty()
1995                 ? FormatTok->NewlinesBefore > 0
1996                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1997 
1998         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1999             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2000           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2001             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2002           addUnwrappedLine();
2003           return;
2004         }
2005       }
2006       break;
2007     }
2008     case tok::equal:
2009       if ((Style.isJavaScript() || Style.isCSharp()) &&
2010           FormatTok->is(TT_FatArrow)) {
2011         tryToParseChildBlock();
2012         break;
2013       }
2014 
2015       nextToken();
2016       if (FormatTok->is(tok::l_brace)) {
2017         // Block kind should probably be set to BK_BracedInit for any language.
2018         // C# needs this change to ensure that array initialisers and object
2019         // initialisers are indented the same way.
2020         if (Style.isCSharp())
2021           FormatTok->setBlockKind(BK_BracedInit);
2022         nextToken();
2023         parseBracedList();
2024       } else if (Style.Language == FormatStyle::LK_Proto &&
2025                  FormatTok->is(tok::less)) {
2026         nextToken();
2027         parseBracedList(/*IsAngleBracket=*/true);
2028       }
2029       break;
2030     case tok::l_square:
2031       parseSquare();
2032       break;
2033     case tok::kw_new:
2034       parseNew();
2035       break;
2036     case tok::kw_case:
2037       // Proto: there are no switch/case statements.
2038       if (Style.Language == FormatStyle::LK_Proto) {
2039         nextToken();
2040         return;
2041       }
2042       // In Verilog switch is called case.
2043       if (Style.isVerilog()) {
2044         parseBlock();
2045         addUnwrappedLine();
2046         return;
2047       }
2048       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2049         // 'case: string' field declaration.
2050         nextToken();
2051         break;
2052       }
2053       parseCaseLabel();
2054       break;
2055     case tok::kw_default:
2056       nextToken();
2057       if (Style.isVerilog()) {
2058         if (FormatTok->is(tok::colon)) {
2059           // The label will be handled in the next iteration.
2060           break;
2061         }
2062         if (FormatTok->is(Keywords.kw_clocking)) {
2063           // A default clocking block.
2064           parseBlock();
2065           addUnwrappedLine();
2066           return;
2067         }
2068         parseVerilogCaseLabel();
2069         return;
2070       }
2071       break;
2072     case tok::colon:
2073       nextToken();
2074       if (Style.isVerilog()) {
2075         parseVerilogCaseLabel();
2076         return;
2077       }
2078       break;
2079     default:
2080       nextToken();
2081       break;
2082     }
2083   } while (!eof());
2084 }
2085 
2086 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2087   assert(FormatTok->is(tok::l_brace));
2088   if (!Style.isCSharp())
2089     return false;
2090   // See if it's a property accessor.
2091   if (FormatTok->Previous->isNot(tok::identifier))
2092     return false;
2093 
2094   // See if we are inside a property accessor.
2095   //
2096   // Record the current tokenPosition so that we can advance and
2097   // reset the current token. `Next` is not set yet so we need
2098   // another way to advance along the token stream.
2099   unsigned int StoredPosition = Tokens->getPosition();
2100   FormatToken *Tok = Tokens->getNextToken();
2101 
2102   // A trivial property accessor is of the form:
2103   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2104   // Track these as they do not require line breaks to be introduced.
2105   bool HasSpecialAccessor = false;
2106   bool IsTrivialPropertyAccessor = true;
2107   while (!eof()) {
2108     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2109                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2110                      Keywords.kw_init, Keywords.kw_set)) {
2111       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2112         HasSpecialAccessor = true;
2113       Tok = Tokens->getNextToken();
2114       continue;
2115     }
2116     if (Tok->isNot(tok::r_brace))
2117       IsTrivialPropertyAccessor = false;
2118     break;
2119   }
2120 
2121   if (!HasSpecialAccessor) {
2122     Tokens->setPosition(StoredPosition);
2123     return false;
2124   }
2125 
2126   // Try to parse the property accessor:
2127   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2128   Tokens->setPosition(StoredPosition);
2129   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2130     addUnwrappedLine();
2131   nextToken();
2132   do {
2133     switch (FormatTok->Tok.getKind()) {
2134     case tok::r_brace:
2135       nextToken();
2136       if (FormatTok->is(tok::equal)) {
2137         while (!eof() && FormatTok->isNot(tok::semi))
2138           nextToken();
2139         nextToken();
2140       }
2141       addUnwrappedLine();
2142       return true;
2143     case tok::l_brace:
2144       ++Line->Level;
2145       parseBlock(/*MustBeDeclaration=*/true);
2146       addUnwrappedLine();
2147       --Line->Level;
2148       break;
2149     case tok::equal:
2150       if (FormatTok->is(TT_FatArrow)) {
2151         ++Line->Level;
2152         do {
2153           nextToken();
2154         } while (!eof() && FormatTok->isNot(tok::semi));
2155         nextToken();
2156         addUnwrappedLine();
2157         --Line->Level;
2158         break;
2159       }
2160       nextToken();
2161       break;
2162     default:
2163       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2164                              Keywords.kw_set) &&
2165           !IsTrivialPropertyAccessor) {
2166         // Non-trivial get/set needs to be on its own line.
2167         addUnwrappedLine();
2168       }
2169       nextToken();
2170     }
2171   } while (!eof());
2172 
2173   // Unreachable for well-formed code (paired '{' and '}').
2174   return true;
2175 }
2176 
2177 bool UnwrappedLineParser::tryToParseLambda() {
2178   assert(FormatTok->is(tok::l_square));
2179   if (!Style.isCpp()) {
2180     nextToken();
2181     return false;
2182   }
2183   FormatToken &LSquare = *FormatTok;
2184   if (!tryToParseLambdaIntroducer())
2185     return false;
2186 
2187   bool SeenArrow = false;
2188   bool InTemplateParameterList = false;
2189 
2190   while (FormatTok->isNot(tok::l_brace)) {
2191     if (FormatTok->isSimpleTypeSpecifier()) {
2192       nextToken();
2193       continue;
2194     }
2195     switch (FormatTok->Tok.getKind()) {
2196     case tok::l_brace:
2197       break;
2198     case tok::l_paren:
2199       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2200       break;
2201     case tok::l_square:
2202       parseSquare();
2203       break;
2204     case tok::less:
2205       assert(FormatTok->Previous);
2206       if (FormatTok->Previous->is(tok::r_square))
2207         InTemplateParameterList = true;
2208       nextToken();
2209       break;
2210     case tok::kw_auto:
2211     case tok::kw_class:
2212     case tok::kw_template:
2213     case tok::kw_typename:
2214     case tok::amp:
2215     case tok::star:
2216     case tok::kw_const:
2217     case tok::kw_constexpr:
2218     case tok::kw_consteval:
2219     case tok::comma:
2220     case tok::greater:
2221     case tok::identifier:
2222     case tok::numeric_constant:
2223     case tok::coloncolon:
2224     case tok::kw_mutable:
2225     case tok::kw_noexcept:
2226     case tok::kw_static:
2227       nextToken();
2228       break;
2229     // Specialization of a template with an integer parameter can contain
2230     // arithmetic, logical, comparison and ternary operators.
2231     //
2232     // FIXME: This also accepts sequences of operators that are not in the scope
2233     // of a template argument list.
2234     //
2235     // In a C++ lambda a template type can only occur after an arrow. We use
2236     // this as an heuristic to distinguish between Objective-C expressions
2237     // followed by an `a->b` expression, such as:
2238     // ([obj func:arg] + a->b)
2239     // Otherwise the code below would parse as a lambda.
2240     case tok::plus:
2241     case tok::minus:
2242     case tok::exclaim:
2243     case tok::tilde:
2244     case tok::slash:
2245     case tok::percent:
2246     case tok::lessless:
2247     case tok::pipe:
2248     case tok::pipepipe:
2249     case tok::ampamp:
2250     case tok::caret:
2251     case tok::equalequal:
2252     case tok::exclaimequal:
2253     case tok::greaterequal:
2254     case tok::lessequal:
2255     case tok::question:
2256     case tok::colon:
2257     case tok::ellipsis:
2258     case tok::kw_true:
2259     case tok::kw_false:
2260       if (SeenArrow || InTemplateParameterList) {
2261         nextToken();
2262         break;
2263       }
2264       return true;
2265     case tok::arrow:
2266       // This might or might not actually be a lambda arrow (this could be an
2267       // ObjC method invocation followed by a dereferencing arrow). We might
2268       // reset this back to TT_Unknown in TokenAnnotator.
2269       FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2270       SeenArrow = true;
2271       nextToken();
2272       break;
2273     case tok::kw_requires: {
2274       auto *RequiresToken = FormatTok;
2275       nextToken();
2276       parseRequiresClause(RequiresToken);
2277       break;
2278     }
2279     case tok::equal:
2280       if (!InTemplateParameterList)
2281         return true;
2282       nextToken();
2283       break;
2284     default:
2285       return true;
2286     }
2287   }
2288 
2289   FormatTok->setFinalizedType(TT_LambdaLBrace);
2290   LSquare.setFinalizedType(TT_LambdaLSquare);
2291 
2292   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2293   parseChildBlock();
2294   assert(!NestedLambdas.empty());
2295   NestedLambdas.pop_back();
2296 
2297   return true;
2298 }
2299 
2300 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2301   const FormatToken *Previous = FormatTok->Previous;
2302   const FormatToken *LeftSquare = FormatTok;
2303   nextToken();
2304   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2305                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2306                                         tok::kw_co_yield, tok::kw_co_return)) ||
2307                     Previous->closesScope())) ||
2308       LeftSquare->isCppStructuredBinding(Style)) {
2309     return false;
2310   }
2311   if (FormatTok->is(tok::l_square))
2312     return false;
2313   if (FormatTok->is(tok::r_square)) {
2314     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2315     if (Next->is(tok::greater))
2316       return false;
2317   }
2318   parseSquare(/*LambdaIntroducer=*/true);
2319   return true;
2320 }
2321 
2322 void UnwrappedLineParser::tryToParseJSFunction() {
2323   assert(FormatTok->is(Keywords.kw_function));
2324   if (FormatTok->is(Keywords.kw_async))
2325     nextToken();
2326   // Consume "function".
2327   nextToken();
2328 
2329   // Consume * (generator function). Treat it like C++'s overloaded operators.
2330   if (FormatTok->is(tok::star)) {
2331     FormatTok->setFinalizedType(TT_OverloadedOperator);
2332     nextToken();
2333   }
2334 
2335   // Consume function name.
2336   if (FormatTok->is(tok::identifier))
2337     nextToken();
2338 
2339   if (FormatTok->isNot(tok::l_paren))
2340     return;
2341 
2342   // Parse formal parameter list.
2343   parseParens();
2344 
2345   if (FormatTok->is(tok::colon)) {
2346     // Parse a type definition.
2347     nextToken();
2348 
2349     // Eat the type declaration. For braced inline object types, balance braces,
2350     // otherwise just parse until finding an l_brace for the function body.
2351     if (FormatTok->is(tok::l_brace))
2352       tryToParseBracedList();
2353     else
2354       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2355         nextToken();
2356   }
2357 
2358   if (FormatTok->is(tok::semi))
2359     return;
2360 
2361   parseChildBlock();
2362 }
2363 
2364 bool UnwrappedLineParser::tryToParseBracedList() {
2365   if (FormatTok->is(BK_Unknown))
2366     calculateBraceTypes();
2367   assert(FormatTok->isNot(BK_Unknown));
2368   if (FormatTok->is(BK_Block))
2369     return false;
2370   nextToken();
2371   parseBracedList();
2372   return true;
2373 }
2374 
2375 bool UnwrappedLineParser::tryToParseChildBlock() {
2376   assert(Style.isJavaScript() || Style.isCSharp());
2377   assert(FormatTok->is(TT_FatArrow));
2378   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2379   // They always start an expression or a child block if followed by a curly
2380   // brace.
2381   nextToken();
2382   if (FormatTok->isNot(tok::l_brace))
2383     return false;
2384   parseChildBlock();
2385   return true;
2386 }
2387 
2388 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2389   bool HasError = false;
2390 
2391   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2392   // replace this by using parseAssignmentExpression() inside.
2393   do {
2394     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2395         tryToParseChildBlock()) {
2396       continue;
2397     }
2398     if (Style.isJavaScript()) {
2399       if (FormatTok->is(Keywords.kw_function)) {
2400         tryToParseJSFunction();
2401         continue;
2402       }
2403       if (FormatTok->is(tok::l_brace)) {
2404         // Could be a method inside of a braced list `{a() { return 1; }}`.
2405         if (tryToParseBracedList())
2406           continue;
2407         parseChildBlock();
2408       }
2409     }
2410     if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2411       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2412         addUnwrappedLine();
2413       nextToken();
2414       return !HasError;
2415     }
2416     switch (FormatTok->Tok.getKind()) {
2417     case tok::l_square:
2418       if (Style.isCSharp())
2419         parseSquare();
2420       else
2421         tryToParseLambda();
2422       break;
2423     case tok::l_paren:
2424       parseParens();
2425       // JavaScript can just have free standing methods and getters/setters in
2426       // object literals. Detect them by a "{" following ")".
2427       if (Style.isJavaScript()) {
2428         if (FormatTok->is(tok::l_brace))
2429           parseChildBlock();
2430         break;
2431       }
2432       break;
2433     case tok::l_brace:
2434       // Assume there are no blocks inside a braced init list apart
2435       // from the ones we explicitly parse out (like lambdas).
2436       FormatTok->setBlockKind(BK_BracedInit);
2437       nextToken();
2438       parseBracedList();
2439       break;
2440     case tok::less:
2441       nextToken();
2442       if (IsAngleBracket)
2443         parseBracedList(/*IsAngleBracket=*/true);
2444       break;
2445     case tok::semi:
2446       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2447       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2448       // used for error recovery if we have otherwise determined that this is
2449       // a braced list.
2450       if (Style.isJavaScript()) {
2451         nextToken();
2452         break;
2453       }
2454       HasError = true;
2455       if (!IsEnum)
2456         return false;
2457       nextToken();
2458       break;
2459     case tok::comma:
2460       nextToken();
2461       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2462         addUnwrappedLine();
2463       break;
2464     default:
2465       nextToken();
2466       break;
2467     }
2468   } while (!eof());
2469   return false;
2470 }
2471 
2472 /// \brief Parses a pair of parentheses (and everything between them).
2473 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2474 /// double ampersands. This applies for all nested scopes as well.
2475 ///
2476 /// Returns whether there is a `=` token between the parentheses.
2477 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2478   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2479   auto *LeftParen = FormatTok;
2480   bool SeenEqual = false;
2481   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2482   nextToken();
2483   do {
2484     switch (FormatTok->Tok.getKind()) {
2485     case tok::l_paren:
2486       if (parseParens(AmpAmpTokenType))
2487         SeenEqual = true;
2488       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2489         parseChildBlock();
2490       break;
2491     case tok::r_paren:
2492       if (!MightBeStmtExpr &&
2493           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2494         const auto *Prev = LeftParen->Previous;
2495         const auto *Next = Tokens->peekNextToken();
2496         const bool DoubleParens =
2497             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2498         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2499         const bool Blacklisted =
2500             PrevPrev &&
2501             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2502              (SeenEqual &&
2503               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2504                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2505         const bool ReturnParens =
2506             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2507             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2508              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2509             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2510             Next->is(tok::semi);
2511         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2512           LeftParen->Optional = true;
2513           FormatTok->Optional = true;
2514         }
2515       }
2516       nextToken();
2517       return SeenEqual;
2518     case tok::r_brace:
2519       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2520       return SeenEqual;
2521     case tok::l_square:
2522       tryToParseLambda();
2523       break;
2524     case tok::l_brace:
2525       if (!tryToParseBracedList())
2526         parseChildBlock();
2527       break;
2528     case tok::at:
2529       nextToken();
2530       if (FormatTok->is(tok::l_brace)) {
2531         nextToken();
2532         parseBracedList();
2533       }
2534       break;
2535     case tok::equal:
2536       SeenEqual = true;
2537       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2538         tryToParseChildBlock();
2539       else
2540         nextToken();
2541       break;
2542     case tok::kw_class:
2543       if (Style.isJavaScript())
2544         parseRecord(/*ParseAsExpr=*/true);
2545       else
2546         nextToken();
2547       break;
2548     case tok::identifier:
2549       if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2550         tryToParseJSFunction();
2551       else
2552         nextToken();
2553       break;
2554     case tok::kw_requires: {
2555       auto RequiresToken = FormatTok;
2556       nextToken();
2557       parseRequiresExpression(RequiresToken);
2558       break;
2559     }
2560     case tok::ampamp:
2561       if (AmpAmpTokenType != TT_Unknown)
2562         FormatTok->setFinalizedType(AmpAmpTokenType);
2563       [[fallthrough]];
2564     default:
2565       nextToken();
2566       break;
2567     }
2568   } while (!eof());
2569   return SeenEqual;
2570 }
2571 
2572 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2573   if (!LambdaIntroducer) {
2574     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2575     if (tryToParseLambda())
2576       return;
2577   }
2578   do {
2579     switch (FormatTok->Tok.getKind()) {
2580     case tok::l_paren:
2581       parseParens();
2582       break;
2583     case tok::r_square:
2584       nextToken();
2585       return;
2586     case tok::r_brace:
2587       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2588       return;
2589     case tok::l_square:
2590       parseSquare();
2591       break;
2592     case tok::l_brace: {
2593       if (!tryToParseBracedList())
2594         parseChildBlock();
2595       break;
2596     }
2597     case tok::at:
2598       nextToken();
2599       if (FormatTok->is(tok::l_brace)) {
2600         nextToken();
2601         parseBracedList();
2602       }
2603       break;
2604     default:
2605       nextToken();
2606       break;
2607     }
2608   } while (!eof());
2609 }
2610 
2611 void UnwrappedLineParser::keepAncestorBraces() {
2612   if (!Style.RemoveBracesLLVM)
2613     return;
2614 
2615   const int MaxNestingLevels = 2;
2616   const int Size = NestedTooDeep.size();
2617   if (Size >= MaxNestingLevels)
2618     NestedTooDeep[Size - MaxNestingLevels] = true;
2619   NestedTooDeep.push_back(false);
2620 }
2621 
2622 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2623   for (const auto &Token : llvm::reverse(Line.Tokens))
2624     if (Token.Tok->isNot(tok::comment))
2625       return Token.Tok;
2626 
2627   return nullptr;
2628 }
2629 
2630 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2631   FormatToken *Tok = nullptr;
2632 
2633   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2634       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2635     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2636               ? getLastNonComment(*Line)
2637               : Line->Tokens.back().Tok;
2638     assert(Tok);
2639     if (Tok->BraceCount < 0) {
2640       assert(Tok->BraceCount == -1);
2641       Tok = nullptr;
2642     } else {
2643       Tok->BraceCount = -1;
2644     }
2645   }
2646 
2647   addUnwrappedLine();
2648   ++Line->Level;
2649   parseStructuralElement();
2650 
2651   if (Tok) {
2652     assert(!Line->InPPDirective);
2653     Tok = nullptr;
2654     for (const auto &L : llvm::reverse(*CurrentLines)) {
2655       if (!L.InPPDirective && getLastNonComment(L)) {
2656         Tok = L.Tokens.back().Tok;
2657         break;
2658       }
2659     }
2660     assert(Tok);
2661     ++Tok->BraceCount;
2662   }
2663 
2664   if (CheckEOF && eof())
2665     addUnwrappedLine();
2666 
2667   --Line->Level;
2668 }
2669 
2670 static void markOptionalBraces(FormatToken *LeftBrace) {
2671   if (!LeftBrace)
2672     return;
2673 
2674   assert(LeftBrace->is(tok::l_brace));
2675 
2676   FormatToken *RightBrace = LeftBrace->MatchingParen;
2677   if (!RightBrace) {
2678     assert(!LeftBrace->Optional);
2679     return;
2680   }
2681 
2682   assert(RightBrace->is(tok::r_brace));
2683   assert(RightBrace->MatchingParen == LeftBrace);
2684   assert(LeftBrace->Optional == RightBrace->Optional);
2685 
2686   LeftBrace->Optional = true;
2687   RightBrace->Optional = true;
2688 }
2689 
2690 void UnwrappedLineParser::handleAttributes() {
2691   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2692   if (FormatTok->isAttribute())
2693     nextToken();
2694   else if (FormatTok->is(tok::l_square))
2695     handleCppAttributes();
2696 }
2697 
2698 bool UnwrappedLineParser::handleCppAttributes() {
2699   // Handle [[likely]] / [[unlikely]] attributes.
2700   assert(FormatTok->is(tok::l_square));
2701   if (!tryToParseSimpleAttribute())
2702     return false;
2703   parseSquare();
2704   return true;
2705 }
2706 
2707 /// Returns whether \c Tok begins a block.
2708 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2709   // FIXME: rename the function or make
2710   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2711   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2712                            : Tok.is(tok::l_brace);
2713 }
2714 
2715 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2716                                                   bool KeepBraces,
2717                                                   bool IsVerilogAssert) {
2718   assert((FormatTok->is(tok::kw_if) ||
2719           (Style.isVerilog() &&
2720            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2721                               Keywords.kw_assume, Keywords.kw_cover))) &&
2722          "'if' expected");
2723   nextToken();
2724 
2725   if (IsVerilogAssert) {
2726     // Handle `assert #0` and `assert final`.
2727     if (FormatTok->is(Keywords.kw_verilogHash)) {
2728       nextToken();
2729       if (FormatTok->is(tok::numeric_constant))
2730         nextToken();
2731     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2732                                   Keywords.kw_sequence)) {
2733       nextToken();
2734     }
2735   }
2736 
2737   // Handle `if !consteval`.
2738   if (FormatTok->is(tok::exclaim))
2739     nextToken();
2740 
2741   bool KeepIfBraces = true;
2742   if (FormatTok->is(tok::kw_consteval)) {
2743     nextToken();
2744   } else {
2745     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2746     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2747       nextToken();
2748     if (FormatTok->is(tok::l_paren)) {
2749       FormatTok->setFinalizedType(TT_ConditionLParen);
2750       parseParens();
2751     }
2752   }
2753   handleAttributes();
2754   // The then action is optional in Verilog assert statements.
2755   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2756     nextToken();
2757     addUnwrappedLine();
2758     return nullptr;
2759   }
2760 
2761   bool NeedsUnwrappedLine = false;
2762   keepAncestorBraces();
2763 
2764   FormatToken *IfLeftBrace = nullptr;
2765   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2766 
2767   if (isBlockBegin(*FormatTok)) {
2768     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2769     IfLeftBrace = FormatTok;
2770     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2771     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2772                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2773     setPreviousRBraceType(TT_ControlStatementRBrace);
2774     if (Style.BraceWrapping.BeforeElse)
2775       addUnwrappedLine();
2776     else
2777       NeedsUnwrappedLine = true;
2778   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2779     addUnwrappedLine();
2780   } else {
2781     parseUnbracedBody();
2782   }
2783 
2784   if (Style.RemoveBracesLLVM) {
2785     assert(!NestedTooDeep.empty());
2786     KeepIfBraces = KeepIfBraces ||
2787                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2788                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2789                    IfBlockKind == IfStmtKind::IfElseIf;
2790   }
2791 
2792   bool KeepElseBraces = KeepIfBraces;
2793   FormatToken *ElseLeftBrace = nullptr;
2794   IfStmtKind Kind = IfStmtKind::IfOnly;
2795 
2796   if (FormatTok->is(tok::kw_else)) {
2797     if (Style.RemoveBracesLLVM) {
2798       NestedTooDeep.back() = false;
2799       Kind = IfStmtKind::IfElse;
2800     }
2801     nextToken();
2802     handleAttributes();
2803     if (isBlockBegin(*FormatTok)) {
2804       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2805       FormatTok->setFinalizedType(TT_ElseLBrace);
2806       ElseLeftBrace = FormatTok;
2807       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2808       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2809       FormatToken *IfLBrace =
2810           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2811                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2812       setPreviousRBraceType(TT_ElseRBrace);
2813       if (FormatTok->is(tok::kw_else)) {
2814         KeepElseBraces = KeepElseBraces ||
2815                          ElseBlockKind == IfStmtKind::IfOnly ||
2816                          ElseBlockKind == IfStmtKind::IfElseIf;
2817       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2818         KeepElseBraces = true;
2819         assert(ElseLeftBrace->MatchingParen);
2820         markOptionalBraces(ElseLeftBrace);
2821       }
2822       addUnwrappedLine();
2823     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2824       const FormatToken *Previous = Tokens->getPreviousToken();
2825       assert(Previous);
2826       const bool IsPrecededByComment = Previous->is(tok::comment);
2827       if (IsPrecededByComment) {
2828         addUnwrappedLine();
2829         ++Line->Level;
2830       }
2831       bool TooDeep = true;
2832       if (Style.RemoveBracesLLVM) {
2833         Kind = IfStmtKind::IfElseIf;
2834         TooDeep = NestedTooDeep.pop_back_val();
2835       }
2836       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2837       if (Style.RemoveBracesLLVM)
2838         NestedTooDeep.push_back(TooDeep);
2839       if (IsPrecededByComment)
2840         --Line->Level;
2841     } else {
2842       parseUnbracedBody(/*CheckEOF=*/true);
2843     }
2844   } else {
2845     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2846     if (NeedsUnwrappedLine)
2847       addUnwrappedLine();
2848   }
2849 
2850   if (!Style.RemoveBracesLLVM)
2851     return nullptr;
2852 
2853   assert(!NestedTooDeep.empty());
2854   KeepElseBraces = KeepElseBraces ||
2855                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2856                    NestedTooDeep.back();
2857 
2858   NestedTooDeep.pop_back();
2859 
2860   if (!KeepIfBraces && !KeepElseBraces) {
2861     markOptionalBraces(IfLeftBrace);
2862     markOptionalBraces(ElseLeftBrace);
2863   } else if (IfLeftBrace) {
2864     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2865     if (IfRightBrace) {
2866       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2867       assert(!IfLeftBrace->Optional);
2868       assert(!IfRightBrace->Optional);
2869       IfLeftBrace->MatchingParen = nullptr;
2870       IfRightBrace->MatchingParen = nullptr;
2871     }
2872   }
2873 
2874   if (IfKind)
2875     *IfKind = Kind;
2876 
2877   return IfLeftBrace;
2878 }
2879 
2880 void UnwrappedLineParser::parseTryCatch() {
2881   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2882   nextToken();
2883   bool NeedsUnwrappedLine = false;
2884   if (FormatTok->is(tok::colon)) {
2885     // We are in a function try block, what comes is an initializer list.
2886     nextToken();
2887 
2888     // In case identifiers were removed by clang-tidy, what might follow is
2889     // multiple commas in sequence - before the first identifier.
2890     while (FormatTok->is(tok::comma))
2891       nextToken();
2892 
2893     while (FormatTok->is(tok::identifier)) {
2894       nextToken();
2895       if (FormatTok->is(tok::l_paren))
2896         parseParens();
2897       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2898           FormatTok->is(tok::l_brace)) {
2899         do {
2900           nextToken();
2901         } while (FormatTok->isNot(tok::r_brace));
2902         nextToken();
2903       }
2904 
2905       // In case identifiers were removed by clang-tidy, what might follow is
2906       // multiple commas in sequence - after the first identifier.
2907       while (FormatTok->is(tok::comma))
2908         nextToken();
2909     }
2910   }
2911   // Parse try with resource.
2912   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2913     parseParens();
2914 
2915   keepAncestorBraces();
2916 
2917   if (FormatTok->is(tok::l_brace)) {
2918     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2919     parseBlock();
2920     if (Style.BraceWrapping.BeforeCatch)
2921       addUnwrappedLine();
2922     else
2923       NeedsUnwrappedLine = true;
2924   } else if (FormatTok->isNot(tok::kw_catch)) {
2925     // The C++ standard requires a compound-statement after a try.
2926     // If there's none, we try to assume there's a structuralElement
2927     // and try to continue.
2928     addUnwrappedLine();
2929     ++Line->Level;
2930     parseStructuralElement();
2931     --Line->Level;
2932   }
2933   while (true) {
2934     if (FormatTok->is(tok::at))
2935       nextToken();
2936     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2937                              tok::kw___finally) ||
2938           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2939            FormatTok->is(Keywords.kw_finally)) ||
2940           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2941            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2942       break;
2943     }
2944     nextToken();
2945     while (FormatTok->isNot(tok::l_brace)) {
2946       if (FormatTok->is(tok::l_paren)) {
2947         parseParens();
2948         continue;
2949       }
2950       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2951         if (Style.RemoveBracesLLVM)
2952           NestedTooDeep.pop_back();
2953         return;
2954       }
2955       nextToken();
2956     }
2957     NeedsUnwrappedLine = false;
2958     Line->MustBeDeclaration = false;
2959     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2960     parseBlock();
2961     if (Style.BraceWrapping.BeforeCatch)
2962       addUnwrappedLine();
2963     else
2964       NeedsUnwrappedLine = true;
2965   }
2966 
2967   if (Style.RemoveBracesLLVM)
2968     NestedTooDeep.pop_back();
2969 
2970   if (NeedsUnwrappedLine)
2971     addUnwrappedLine();
2972 }
2973 
2974 void UnwrappedLineParser::parseNamespace() {
2975   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2976          "'namespace' expected");
2977 
2978   const FormatToken &InitialToken = *FormatTok;
2979   nextToken();
2980   if (InitialToken.is(TT_NamespaceMacro)) {
2981     parseParens();
2982   } else {
2983     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2984                               tok::l_square, tok::period, tok::l_paren) ||
2985            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2986       if (FormatTok->is(tok::l_square))
2987         parseSquare();
2988       else if (FormatTok->is(tok::l_paren))
2989         parseParens();
2990       else
2991         nextToken();
2992     }
2993   }
2994   if (FormatTok->is(tok::l_brace)) {
2995     FormatTok->setFinalizedType(TT_NamespaceLBrace);
2996 
2997     if (ShouldBreakBeforeBrace(Style, InitialToken))
2998       addUnwrappedLine();
2999 
3000     unsigned AddLevels =
3001         Style.NamespaceIndentation == FormatStyle::NI_All ||
3002                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3003                  DeclarationScopeStack.size() > 1)
3004             ? 1u
3005             : 0u;
3006     bool ManageWhitesmithsBraces =
3007         AddLevels == 0u &&
3008         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3009 
3010     // If we're in Whitesmiths mode, indent the brace if we're not indenting
3011     // the whole block.
3012     if (ManageWhitesmithsBraces)
3013       ++Line->Level;
3014 
3015     // Munch the semicolon after a namespace. This is more common than one would
3016     // think. Putting the semicolon into its own line is very ugly.
3017     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3018                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3019                ManageWhitesmithsBraces);
3020 
3021     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3022 
3023     if (ManageWhitesmithsBraces)
3024       --Line->Level;
3025   }
3026   // FIXME: Add error handling.
3027 }
3028 
3029 void UnwrappedLineParser::parseNew() {
3030   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3031   nextToken();
3032 
3033   if (Style.isCSharp()) {
3034     do {
3035       // Handle constructor invocation, e.g. `new(field: value)`.
3036       if (FormatTok->is(tok::l_paren))
3037         parseParens();
3038 
3039       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3040       if (FormatTok->is(tok::l_brace))
3041         parseBracedList();
3042 
3043       if (FormatTok->isOneOf(tok::semi, tok::comma))
3044         return;
3045 
3046       nextToken();
3047     } while (!eof());
3048   }
3049 
3050   if (Style.Language != FormatStyle::LK_Java)
3051     return;
3052 
3053   // In Java, we can parse everything up to the parens, which aren't optional.
3054   do {
3055     // There should not be a ;, { or } before the new's open paren.
3056     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3057       return;
3058 
3059     // Consume the parens.
3060     if (FormatTok->is(tok::l_paren)) {
3061       parseParens();
3062 
3063       // If there is a class body of an anonymous class, consume that as child.
3064       if (FormatTok->is(tok::l_brace))
3065         parseChildBlock();
3066       return;
3067     }
3068     nextToken();
3069   } while (!eof());
3070 }
3071 
3072 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3073   keepAncestorBraces();
3074 
3075   if (isBlockBegin(*FormatTok)) {
3076     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3077     FormatToken *LeftBrace = FormatTok;
3078     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3079     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3080                /*MunchSemi=*/true, KeepBraces);
3081     setPreviousRBraceType(TT_ControlStatementRBrace);
3082     if (!KeepBraces) {
3083       assert(!NestedTooDeep.empty());
3084       if (!NestedTooDeep.back())
3085         markOptionalBraces(LeftBrace);
3086     }
3087     if (WrapRightBrace)
3088       addUnwrappedLine();
3089   } else {
3090     parseUnbracedBody();
3091   }
3092 
3093   if (!KeepBraces)
3094     NestedTooDeep.pop_back();
3095 }
3096 
3097 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3098   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3099           (Style.isVerilog() &&
3100            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3101                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3102                               Keywords.kw_final, Keywords.kw_initial,
3103                               Keywords.kw_foreach, Keywords.kw_forever,
3104                               Keywords.kw_repeat))) &&
3105          "'for', 'while' or foreach macro expected");
3106   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3107                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3108 
3109   nextToken();
3110   // JS' for await ( ...
3111   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3112     nextToken();
3113   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3114     nextToken();
3115   if (HasParens && FormatTok->is(tok::l_paren)) {
3116     // The type is only set for Verilog basically because we were afraid to
3117     // change the existing behavior for loops. See the discussion on D121756 for
3118     // details.
3119     if (Style.isVerilog())
3120       FormatTok->setFinalizedType(TT_ConditionLParen);
3121     parseParens();
3122   }
3123 
3124   if (Style.isVerilog()) {
3125     // Event control.
3126     parseVerilogSensitivityList();
3127   } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3128              Tokens->getPreviousToken()->is(tok::r_paren)) {
3129     nextToken();
3130     addUnwrappedLine();
3131     return;
3132   }
3133 
3134   handleAttributes();
3135   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3136 }
3137 
3138 void UnwrappedLineParser::parseDoWhile() {
3139   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3140   nextToken();
3141 
3142   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3143 
3144   // FIXME: Add error handling.
3145   if (FormatTok->isNot(tok::kw_while)) {
3146     addUnwrappedLine();
3147     return;
3148   }
3149 
3150   FormatTok->setFinalizedType(TT_DoWhile);
3151 
3152   // If in Whitesmiths mode, the line with the while() needs to be indented
3153   // to the same level as the block.
3154   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3155     ++Line->Level;
3156 
3157   nextToken();
3158   parseStructuralElement();
3159 }
3160 
3161 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3162   nextToken();
3163   unsigned OldLineLevel = Line->Level;
3164   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3165     --Line->Level;
3166   if (LeftAlignLabel)
3167     Line->Level = 0;
3168 
3169   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3170       FormatTok->is(tok::l_brace)) {
3171 
3172     CompoundStatementIndenter Indenter(this, Line->Level,
3173                                        Style.BraceWrapping.AfterCaseLabel,
3174                                        Style.BraceWrapping.IndentBraces);
3175     parseBlock();
3176     if (FormatTok->is(tok::kw_break)) {
3177       if (Style.BraceWrapping.AfterControlStatement ==
3178           FormatStyle::BWACS_Always) {
3179         addUnwrappedLine();
3180         if (!Style.IndentCaseBlocks &&
3181             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3182           ++Line->Level;
3183         }
3184       }
3185       parseStructuralElement();
3186     }
3187     addUnwrappedLine();
3188   } else {
3189     if (FormatTok->is(tok::semi))
3190       nextToken();
3191     addUnwrappedLine();
3192   }
3193   Line->Level = OldLineLevel;
3194   if (FormatTok->isNot(tok::l_brace)) {
3195     parseStructuralElement();
3196     addUnwrappedLine();
3197   }
3198 }
3199 
3200 void UnwrappedLineParser::parseCaseLabel() {
3201   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3202 
3203   // FIXME: fix handling of complex expressions here.
3204   do {
3205     nextToken();
3206     if (FormatTok->is(tok::colon)) {
3207       FormatTok->setFinalizedType(TT_CaseLabelColon);
3208       break;
3209     }
3210   } while (!eof());
3211   parseLabel();
3212 }
3213 
3214 void UnwrappedLineParser::parseSwitch() {
3215   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3216   nextToken();
3217   if (FormatTok->is(tok::l_paren))
3218     parseParens();
3219 
3220   keepAncestorBraces();
3221 
3222   if (FormatTok->is(tok::l_brace)) {
3223     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3224     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3225     parseBlock();
3226     setPreviousRBraceType(TT_ControlStatementRBrace);
3227     addUnwrappedLine();
3228   } else {
3229     addUnwrappedLine();
3230     ++Line->Level;
3231     parseStructuralElement();
3232     --Line->Level;
3233   }
3234 
3235   if (Style.RemoveBracesLLVM)
3236     NestedTooDeep.pop_back();
3237 }
3238 
3239 // Operators that can follow a C variable.
3240 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3241   switch (kind) {
3242   case tok::ampamp:
3243   case tok::ampequal:
3244   case tok::arrow:
3245   case tok::caret:
3246   case tok::caretequal:
3247   case tok::comma:
3248   case tok::ellipsis:
3249   case tok::equal:
3250   case tok::equalequal:
3251   case tok::exclaim:
3252   case tok::exclaimequal:
3253   case tok::greater:
3254   case tok::greaterequal:
3255   case tok::greatergreater:
3256   case tok::greatergreaterequal:
3257   case tok::l_paren:
3258   case tok::l_square:
3259   case tok::less:
3260   case tok::lessequal:
3261   case tok::lessless:
3262   case tok::lesslessequal:
3263   case tok::minus:
3264   case tok::minusequal:
3265   case tok::minusminus:
3266   case tok::percent:
3267   case tok::percentequal:
3268   case tok::period:
3269   case tok::pipe:
3270   case tok::pipeequal:
3271   case tok::pipepipe:
3272   case tok::plus:
3273   case tok::plusequal:
3274   case tok::plusplus:
3275   case tok::question:
3276   case tok::r_brace:
3277   case tok::r_paren:
3278   case tok::r_square:
3279   case tok::semi:
3280   case tok::slash:
3281   case tok::slashequal:
3282   case tok::star:
3283   case tok::starequal:
3284     return true;
3285   default:
3286     return false;
3287   }
3288 }
3289 
3290 void UnwrappedLineParser::parseAccessSpecifier() {
3291   FormatToken *AccessSpecifierCandidate = FormatTok;
3292   nextToken();
3293   // Understand Qt's slots.
3294   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3295     nextToken();
3296   // Otherwise, we don't know what it is, and we'd better keep the next token.
3297   if (FormatTok->is(tok::colon)) {
3298     nextToken();
3299     addUnwrappedLine();
3300   } else if (FormatTok->isNot(tok::coloncolon) &&
3301              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3302     // Not a variable name nor namespace name.
3303     addUnwrappedLine();
3304   } else if (AccessSpecifierCandidate) {
3305     // Consider the access specifier to be a C identifier.
3306     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3307   }
3308 }
3309 
3310 /// \brief Parses a requires, decides if it is a clause or an expression.
3311 /// \pre The current token has to be the requires keyword.
3312 /// \returns true if it parsed a clause.
3313 bool clang::format::UnwrappedLineParser::parseRequires() {
3314   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3315   auto RequiresToken = FormatTok;
3316 
3317   // We try to guess if it is a requires clause, or a requires expression. For
3318   // that we first consume the keyword and check the next token.
3319   nextToken();
3320 
3321   switch (FormatTok->Tok.getKind()) {
3322   case tok::l_brace:
3323     // This can only be an expression, never a clause.
3324     parseRequiresExpression(RequiresToken);
3325     return false;
3326   case tok::l_paren:
3327     // Clauses and expression can start with a paren, it's unclear what we have.
3328     break;
3329   default:
3330     // All other tokens can only be a clause.
3331     parseRequiresClause(RequiresToken);
3332     return true;
3333   }
3334 
3335   // Looking forward we would have to decide if there are function declaration
3336   // like arguments to the requires expression:
3337   // requires (T t) {
3338   // Or there is a constraint expression for the requires clause:
3339   // requires (C<T> && ...
3340 
3341   // But first let's look behind.
3342   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3343 
3344   if (!PreviousNonComment ||
3345       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3346     // If there is no token, or an expression left brace, we are a requires
3347     // clause within a requires expression.
3348     parseRequiresClause(RequiresToken);
3349     return true;
3350   }
3351 
3352   switch (PreviousNonComment->Tok.getKind()) {
3353   case tok::greater:
3354   case tok::r_paren:
3355   case tok::kw_noexcept:
3356   case tok::kw_const:
3357     // This is a requires clause.
3358     parseRequiresClause(RequiresToken);
3359     return true;
3360   case tok::amp:
3361   case tok::ampamp: {
3362     // This can be either:
3363     // if (... && requires (T t) ...)
3364     // Or
3365     // void member(...) && requires (C<T> ...
3366     // We check the one token before that for a const:
3367     // void member(...) const && requires (C<T> ...
3368     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3369     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3370       parseRequiresClause(RequiresToken);
3371       return true;
3372     }
3373     break;
3374   }
3375   default:
3376     if (PreviousNonComment->isTypeOrIdentifier()) {
3377       // This is a requires clause.
3378       parseRequiresClause(RequiresToken);
3379       return true;
3380     }
3381     // It's an expression.
3382     parseRequiresExpression(RequiresToken);
3383     return false;
3384   }
3385 
3386   // Now we look forward and try to check if the paren content is a parameter
3387   // list. The parameters can be cv-qualified and contain references or
3388   // pointers.
3389   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3390   // of stuff: typename, const, *, &, &&, ::, identifiers.
3391 
3392   unsigned StoredPosition = Tokens->getPosition();
3393   FormatToken *NextToken = Tokens->getNextToken();
3394   int Lookahead = 0;
3395   auto PeekNext = [&Lookahead, &NextToken, this] {
3396     ++Lookahead;
3397     NextToken = Tokens->getNextToken();
3398   };
3399 
3400   bool FoundType = false;
3401   bool LastWasColonColon = false;
3402   int OpenAngles = 0;
3403 
3404   for (; Lookahead < 50; PeekNext()) {
3405     switch (NextToken->Tok.getKind()) {
3406     case tok::kw_volatile:
3407     case tok::kw_const:
3408     case tok::comma:
3409       if (OpenAngles == 0) {
3410         FormatTok = Tokens->setPosition(StoredPosition);
3411         parseRequiresExpression(RequiresToken);
3412         return false;
3413       }
3414       break;
3415     case tok::r_paren:
3416     case tok::pipepipe:
3417       FormatTok = Tokens->setPosition(StoredPosition);
3418       parseRequiresClause(RequiresToken);
3419       return true;
3420     case tok::eof:
3421       // Break out of the loop.
3422       Lookahead = 50;
3423       break;
3424     case tok::coloncolon:
3425       LastWasColonColon = true;
3426       break;
3427     case tok::identifier:
3428       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3429         FormatTok = Tokens->setPosition(StoredPosition);
3430         parseRequiresExpression(RequiresToken);
3431         return false;
3432       }
3433       FoundType = true;
3434       LastWasColonColon = false;
3435       break;
3436     case tok::less:
3437       ++OpenAngles;
3438       break;
3439     case tok::greater:
3440       --OpenAngles;
3441       break;
3442     default:
3443       if (NextToken->isSimpleTypeSpecifier()) {
3444         FormatTok = Tokens->setPosition(StoredPosition);
3445         parseRequiresExpression(RequiresToken);
3446         return false;
3447       }
3448       break;
3449     }
3450   }
3451   // This seems to be a complicated expression, just assume it's a clause.
3452   FormatTok = Tokens->setPosition(StoredPosition);
3453   parseRequiresClause(RequiresToken);
3454   return true;
3455 }
3456 
3457 /// \brief Parses a requires clause.
3458 /// \param RequiresToken The requires keyword token, which starts this clause.
3459 /// \pre We need to be on the next token after the requires keyword.
3460 /// \sa parseRequiresExpression
3461 ///
3462 /// Returns if it either has finished parsing the clause, or it detects, that
3463 /// the clause is incorrect.
3464 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3465   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3466   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3467 
3468   // If there is no previous token, we are within a requires expression,
3469   // otherwise we will always have the template or function declaration in front
3470   // of it.
3471   bool InRequiresExpression =
3472       !RequiresToken->Previous ||
3473       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3474 
3475   RequiresToken->setFinalizedType(InRequiresExpression
3476                                       ? TT_RequiresClauseInARequiresExpression
3477                                       : TT_RequiresClause);
3478 
3479   // NOTE: parseConstraintExpression is only ever called from this function.
3480   // It could be inlined into here.
3481   parseConstraintExpression();
3482 
3483   if (!InRequiresExpression)
3484     FormatTok->Previous->ClosesRequiresClause = true;
3485 }
3486 
3487 /// \brief Parses a requires expression.
3488 /// \param RequiresToken The requires keyword token, which starts this clause.
3489 /// \pre We need to be on the next token after the requires keyword.
3490 /// \sa parseRequiresClause
3491 ///
3492 /// Returns if it either has finished parsing the expression, or it detects,
3493 /// that the expression is incorrect.
3494 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3495   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3496   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3497 
3498   RequiresToken->setFinalizedType(TT_RequiresExpression);
3499 
3500   if (FormatTok->is(tok::l_paren)) {
3501     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3502     parseParens();
3503   }
3504 
3505   if (FormatTok->is(tok::l_brace)) {
3506     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3507     parseChildBlock();
3508   }
3509 }
3510 
3511 /// \brief Parses a constraint expression.
3512 ///
3513 /// This is the body of a requires clause. It returns, when the parsing is
3514 /// complete, or the expression is incorrect.
3515 void UnwrappedLineParser::parseConstraintExpression() {
3516   // The special handling for lambdas is needed since tryToParseLambda() eats a
3517   // token and if a requires expression is the last part of a requires clause
3518   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3519   // not set on the correct token. Thus we need to be aware if we even expect a
3520   // lambda to be possible.
3521   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3522   bool LambdaNextTimeAllowed = true;
3523 
3524   // Within lambda declarations, it is permitted to put a requires clause after
3525   // its template parameter list, which would place the requires clause right
3526   // before the parentheses of the parameters of the lambda declaration. Thus,
3527   // we track if we expect to see grouping parentheses at all.
3528   // Without this check, `requires foo<T> (T t)` in the below example would be
3529   // seen as the whole requires clause, accidentally eating the parameters of
3530   // the lambda.
3531   // [&]<typename T> requires foo<T> (T t) { ... };
3532   bool TopLevelParensAllowed = true;
3533 
3534   do {
3535     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3536 
3537     switch (FormatTok->Tok.getKind()) {
3538     case tok::kw_requires: {
3539       auto RequiresToken = FormatTok;
3540       nextToken();
3541       parseRequiresExpression(RequiresToken);
3542       break;
3543     }
3544 
3545     case tok::l_paren:
3546       if (!TopLevelParensAllowed)
3547         return;
3548       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3549       TopLevelParensAllowed = false;
3550       break;
3551 
3552     case tok::l_square:
3553       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3554         return;
3555       break;
3556 
3557     case tok::kw_const:
3558     case tok::semi:
3559     case tok::kw_class:
3560     case tok::kw_struct:
3561     case tok::kw_union:
3562       return;
3563 
3564     case tok::l_brace:
3565       // Potential function body.
3566       return;
3567 
3568     case tok::ampamp:
3569     case tok::pipepipe:
3570       FormatTok->setFinalizedType(TT_BinaryOperator);
3571       nextToken();
3572       LambdaNextTimeAllowed = true;
3573       TopLevelParensAllowed = true;
3574       break;
3575 
3576     case tok::comma:
3577     case tok::comment:
3578       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3579       nextToken();
3580       break;
3581 
3582     case tok::kw_sizeof:
3583     case tok::greater:
3584     case tok::greaterequal:
3585     case tok::greatergreater:
3586     case tok::less:
3587     case tok::lessequal:
3588     case tok::lessless:
3589     case tok::equalequal:
3590     case tok::exclaim:
3591     case tok::exclaimequal:
3592     case tok::plus:
3593     case tok::minus:
3594     case tok::star:
3595     case tok::slash:
3596       LambdaNextTimeAllowed = true;
3597       TopLevelParensAllowed = true;
3598       // Just eat them.
3599       nextToken();
3600       break;
3601 
3602     case tok::numeric_constant:
3603     case tok::coloncolon:
3604     case tok::kw_true:
3605     case tok::kw_false:
3606       TopLevelParensAllowed = false;
3607       // Just eat them.
3608       nextToken();
3609       break;
3610 
3611     case tok::kw_static_cast:
3612     case tok::kw_const_cast:
3613     case tok::kw_reinterpret_cast:
3614     case tok::kw_dynamic_cast:
3615       nextToken();
3616       if (FormatTok->isNot(tok::less))
3617         return;
3618 
3619       nextToken();
3620       parseBracedList(/*IsAngleBracket=*/true);
3621       break;
3622 
3623     default:
3624       if (!FormatTok->Tok.getIdentifierInfo()) {
3625         // Identifiers are part of the default case, we check for more then
3626         // tok::identifier to handle builtin type traits.
3627         return;
3628       }
3629 
3630       // We need to differentiate identifiers for a template deduction guide,
3631       // variables, or function return types (the constraint expression has
3632       // ended before that), and basically all other cases. But it's easier to
3633       // check the other way around.
3634       assert(FormatTok->Previous);
3635       switch (FormatTok->Previous->Tok.getKind()) {
3636       case tok::coloncolon:  // Nested identifier.
3637       case tok::ampamp:      // Start of a function or variable for the
3638       case tok::pipepipe:    // constraint expression. (binary)
3639       case tok::exclaim:     // The same as above, but unary.
3640       case tok::kw_requires: // Initial identifier of a requires clause.
3641       case tok::equal:       // Initial identifier of a concept declaration.
3642         break;
3643       default:
3644         return;
3645       }
3646 
3647       // Read identifier with optional template declaration.
3648       nextToken();
3649       if (FormatTok->is(tok::less)) {
3650         nextToken();
3651         parseBracedList(/*IsAngleBracket=*/true);
3652       }
3653       TopLevelParensAllowed = false;
3654       break;
3655     }
3656   } while (!eof());
3657 }
3658 
3659 bool UnwrappedLineParser::parseEnum() {
3660   const FormatToken &InitialToken = *FormatTok;
3661 
3662   // Won't be 'enum' for NS_ENUMs.
3663   if (FormatTok->is(tok::kw_enum))
3664     nextToken();
3665 
3666   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3667   // declarations. An "enum" keyword followed by a colon would be a syntax
3668   // error and thus assume it is just an identifier.
3669   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3670     return false;
3671 
3672   // In protobuf, "enum" can be used as a field name.
3673   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3674     return false;
3675 
3676   // Eat up enum class ...
3677   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3678     nextToken();
3679 
3680   while (FormatTok->Tok.getIdentifierInfo() ||
3681          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3682                             tok::greater, tok::comma, tok::question,
3683                             tok::l_square, tok::r_square)) {
3684     if (Style.isVerilog()) {
3685       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3686       nextToken();
3687       // In Verilog the base type can have dimensions.
3688       while (FormatTok->is(tok::l_square))
3689         parseSquare();
3690     } else {
3691       nextToken();
3692     }
3693     // We can have macros or attributes in between 'enum' and the enum name.
3694     if (FormatTok->is(tok::l_paren))
3695       parseParens();
3696     assert(FormatTok->isNot(TT_AttributeSquare));
3697     if (FormatTok->is(tok::identifier)) {
3698       nextToken();
3699       // If there are two identifiers in a row, this is likely an elaborate
3700       // return type. In Java, this can be "implements", etc.
3701       if (Style.isCpp() && FormatTok->is(tok::identifier))
3702         return false;
3703     }
3704   }
3705 
3706   // Just a declaration or something is wrong.
3707   if (FormatTok->isNot(tok::l_brace))
3708     return true;
3709   FormatTok->setFinalizedType(TT_EnumLBrace);
3710   FormatTok->setBlockKind(BK_Block);
3711 
3712   if (Style.Language == FormatStyle::LK_Java) {
3713     // Java enums are different.
3714     parseJavaEnumBody();
3715     return true;
3716   }
3717   if (Style.Language == FormatStyle::LK_Proto) {
3718     parseBlock(/*MustBeDeclaration=*/true);
3719     return true;
3720   }
3721 
3722   if (!Style.AllowShortEnumsOnASingleLine &&
3723       ShouldBreakBeforeBrace(Style, InitialToken)) {
3724     addUnwrappedLine();
3725   }
3726   // Parse enum body.
3727   nextToken();
3728   if (!Style.AllowShortEnumsOnASingleLine) {
3729     addUnwrappedLine();
3730     Line->Level += 1;
3731   }
3732   bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3733   if (!Style.AllowShortEnumsOnASingleLine)
3734     Line->Level -= 1;
3735   if (HasError) {
3736     if (FormatTok->is(tok::semi))
3737       nextToken();
3738     addUnwrappedLine();
3739   }
3740   setPreviousRBraceType(TT_EnumRBrace);
3741   return true;
3742 
3743   // There is no addUnwrappedLine() here so that we fall through to parsing a
3744   // structural element afterwards. Thus, in "enum A {} n, m;",
3745   // "} n, m;" will end up in one unwrapped line.
3746 }
3747 
3748 bool UnwrappedLineParser::parseStructLike() {
3749   // parseRecord falls through and does not yet add an unwrapped line as a
3750   // record declaration or definition can start a structural element.
3751   parseRecord();
3752   // This does not apply to Java, JavaScript and C#.
3753   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3754       Style.isCSharp()) {
3755     if (FormatTok->is(tok::semi))
3756       nextToken();
3757     addUnwrappedLine();
3758     return true;
3759   }
3760   return false;
3761 }
3762 
3763 namespace {
3764 // A class used to set and restore the Token position when peeking
3765 // ahead in the token source.
3766 class ScopedTokenPosition {
3767   unsigned StoredPosition;
3768   FormatTokenSource *Tokens;
3769 
3770 public:
3771   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3772     assert(Tokens && "Tokens expected to not be null");
3773     StoredPosition = Tokens->getPosition();
3774   }
3775 
3776   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3777 };
3778 } // namespace
3779 
3780 // Look to see if we have [[ by looking ahead, if
3781 // its not then rewind to the original position.
3782 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3783   ScopedTokenPosition AutoPosition(Tokens);
3784   FormatToken *Tok = Tokens->getNextToken();
3785   // We already read the first [ check for the second.
3786   if (Tok->isNot(tok::l_square))
3787     return false;
3788   // Double check that the attribute is just something
3789   // fairly simple.
3790   while (Tok->isNot(tok::eof)) {
3791     if (Tok->is(tok::r_square))
3792       break;
3793     Tok = Tokens->getNextToken();
3794   }
3795   if (Tok->is(tok::eof))
3796     return false;
3797   Tok = Tokens->getNextToken();
3798   if (Tok->isNot(tok::r_square))
3799     return false;
3800   Tok = Tokens->getNextToken();
3801   if (Tok->is(tok::semi))
3802     return false;
3803   return true;
3804 }
3805 
3806 void UnwrappedLineParser::parseJavaEnumBody() {
3807   assert(FormatTok->is(tok::l_brace));
3808   const FormatToken *OpeningBrace = FormatTok;
3809 
3810   // Determine whether the enum is simple, i.e. does not have a semicolon or
3811   // constants with class bodies. Simple enums can be formatted like braced
3812   // lists, contracted to a single line, etc.
3813   unsigned StoredPosition = Tokens->getPosition();
3814   bool IsSimple = true;
3815   FormatToken *Tok = Tokens->getNextToken();
3816   while (Tok->isNot(tok::eof)) {
3817     if (Tok->is(tok::r_brace))
3818       break;
3819     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3820       IsSimple = false;
3821       break;
3822     }
3823     // FIXME: This will also mark enums with braces in the arguments to enum
3824     // constants as "not simple". This is probably fine in practice, though.
3825     Tok = Tokens->getNextToken();
3826   }
3827   FormatTok = Tokens->setPosition(StoredPosition);
3828 
3829   if (IsSimple) {
3830     nextToken();
3831     parseBracedList();
3832     addUnwrappedLine();
3833     return;
3834   }
3835 
3836   // Parse the body of a more complex enum.
3837   // First add a line for everything up to the "{".
3838   nextToken();
3839   addUnwrappedLine();
3840   ++Line->Level;
3841 
3842   // Parse the enum constants.
3843   while (!eof()) {
3844     if (FormatTok->is(tok::l_brace)) {
3845       // Parse the constant's class body.
3846       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3847                  /*MunchSemi=*/false);
3848     } else if (FormatTok->is(tok::l_paren)) {
3849       parseParens();
3850     } else if (FormatTok->is(tok::comma)) {
3851       nextToken();
3852       addUnwrappedLine();
3853     } else if (FormatTok->is(tok::semi)) {
3854       nextToken();
3855       addUnwrappedLine();
3856       break;
3857     } else if (FormatTok->is(tok::r_brace)) {
3858       addUnwrappedLine();
3859       break;
3860     } else {
3861       nextToken();
3862     }
3863   }
3864 
3865   // Parse the class body after the enum's ";" if any.
3866   parseLevel(OpeningBrace);
3867   nextToken();
3868   --Line->Level;
3869   addUnwrappedLine();
3870 }
3871 
3872 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3873   const FormatToken &InitialToken = *FormatTok;
3874   nextToken();
3875 
3876   // The actual identifier can be a nested name specifier, and in macros
3877   // it is often token-pasted.
3878   // An [[attribute]] can be before the identifier.
3879   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3880                             tok::kw_alignas, tok::l_square) ||
3881          FormatTok->isAttribute() ||
3882          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3883           FormatTok->isOneOf(tok::period, tok::comma))) {
3884     if (Style.isJavaScript() &&
3885         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3886       // JavaScript/TypeScript supports inline object types in
3887       // extends/implements positions:
3888       //     class Foo implements {bar: number} { }
3889       nextToken();
3890       if (FormatTok->is(tok::l_brace)) {
3891         tryToParseBracedList();
3892         continue;
3893       }
3894     }
3895     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3896       continue;
3897     bool IsNonMacroIdentifier =
3898         FormatTok->is(tok::identifier) &&
3899         FormatTok->TokenText != FormatTok->TokenText.upper();
3900     nextToken();
3901     // We can have macros in between 'class' and the class name.
3902     if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3903       parseParens();
3904   }
3905 
3906   // Note that parsing away template declarations here leads to incorrectly
3907   // accepting function declarations as record declarations.
3908   // In general, we cannot solve this problem. Consider:
3909   // class A<int> B() {}
3910   // which can be a function definition or a class definition when B() is a
3911   // macro. If we find enough real-world cases where this is a problem, we
3912   // can parse for the 'template' keyword in the beginning of the statement,
3913   // and thus rule out the record production in case there is no template
3914   // (this would still leave us with an ambiguity between template function
3915   // and class declarations).
3916   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3917     do {
3918       if (FormatTok->is(tok::l_brace)) {
3919         calculateBraceTypes(/*ExpectClassBody=*/true);
3920         if (!tryToParseBracedList())
3921           break;
3922       }
3923       if (FormatTok->is(tok::l_square)) {
3924         FormatToken *Previous = FormatTok->Previous;
3925         if (!Previous ||
3926             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3927           // Don't try parsing a lambda if we had a closing parenthesis before,
3928           // it was probably a pointer to an array: int (*)[].
3929           if (!tryToParseLambda())
3930             continue;
3931         } else {
3932           parseSquare();
3933           continue;
3934         }
3935       }
3936       if (FormatTok->is(tok::semi))
3937         return;
3938       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3939         addUnwrappedLine();
3940         nextToken();
3941         parseCSharpGenericTypeConstraint();
3942         break;
3943       }
3944       nextToken();
3945     } while (!eof());
3946   }
3947 
3948   auto GetBraceTypes =
3949       [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3950     switch (RecordTok.Tok.getKind()) {
3951     case tok::kw_class:
3952       return {TT_ClassLBrace, TT_ClassRBrace};
3953     case tok::kw_struct:
3954       return {TT_StructLBrace, TT_StructRBrace};
3955     case tok::kw_union:
3956       return {TT_UnionLBrace, TT_UnionRBrace};
3957     default:
3958       // Useful for e.g. interface.
3959       return {TT_RecordLBrace, TT_RecordRBrace};
3960     }
3961   };
3962   if (FormatTok->is(tok::l_brace)) {
3963     auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
3964     FormatTok->setFinalizedType(OpenBraceType);
3965     if (ParseAsExpr) {
3966       parseChildBlock();
3967     } else {
3968       if (ShouldBreakBeforeBrace(Style, InitialToken))
3969         addUnwrappedLine();
3970 
3971       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3972       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3973     }
3974     setPreviousRBraceType(ClosingBraceType);
3975   }
3976   // There is no addUnwrappedLine() here so that we fall through to parsing a
3977   // structural element afterwards. Thus, in "class A {} n, m;",
3978   // "} n, m;" will end up in one unwrapped line.
3979 }
3980 
3981 void UnwrappedLineParser::parseObjCMethod() {
3982   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3983          "'(' or identifier expected.");
3984   do {
3985     if (FormatTok->is(tok::semi)) {
3986       nextToken();
3987       addUnwrappedLine();
3988       return;
3989     } else if (FormatTok->is(tok::l_brace)) {
3990       if (Style.BraceWrapping.AfterFunction)
3991         addUnwrappedLine();
3992       parseBlock();
3993       addUnwrappedLine();
3994       return;
3995     } else {
3996       nextToken();
3997     }
3998   } while (!eof());
3999 }
4000 
4001 void UnwrappedLineParser::parseObjCProtocolList() {
4002   assert(FormatTok->is(tok::less) && "'<' expected.");
4003   do {
4004     nextToken();
4005     // Early exit in case someone forgot a close angle.
4006     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4007         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4008       return;
4009     }
4010   } while (!eof() && FormatTok->isNot(tok::greater));
4011   nextToken(); // Skip '>'.
4012 }
4013 
4014 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4015   do {
4016     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4017       nextToken();
4018       addUnwrappedLine();
4019       break;
4020     }
4021     if (FormatTok->is(tok::l_brace)) {
4022       parseBlock();
4023       // In ObjC interfaces, nothing should be following the "}".
4024       addUnwrappedLine();
4025     } else if (FormatTok->is(tok::r_brace)) {
4026       // Ignore stray "}". parseStructuralElement doesn't consume them.
4027       nextToken();
4028       addUnwrappedLine();
4029     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4030       nextToken();
4031       parseObjCMethod();
4032     } else {
4033       parseStructuralElement();
4034     }
4035   } while (!eof());
4036 }
4037 
4038 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4039   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4040          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4041   nextToken();
4042   nextToken(); // interface name
4043 
4044   // @interface can be followed by a lightweight generic
4045   // specialization list, then either a base class or a category.
4046   if (FormatTok->is(tok::less))
4047     parseObjCLightweightGenerics();
4048   if (FormatTok->is(tok::colon)) {
4049     nextToken();
4050     nextToken(); // base class name
4051     // The base class can also have lightweight generics applied to it.
4052     if (FormatTok->is(tok::less))
4053       parseObjCLightweightGenerics();
4054   } else if (FormatTok->is(tok::l_paren)) {
4055     // Skip category, if present.
4056     parseParens();
4057   }
4058 
4059   if (FormatTok->is(tok::less))
4060     parseObjCProtocolList();
4061 
4062   if (FormatTok->is(tok::l_brace)) {
4063     if (Style.BraceWrapping.AfterObjCDeclaration)
4064       addUnwrappedLine();
4065     parseBlock(/*MustBeDeclaration=*/true);
4066   }
4067 
4068   // With instance variables, this puts '}' on its own line.  Without instance
4069   // variables, this ends the @interface line.
4070   addUnwrappedLine();
4071 
4072   parseObjCUntilAtEnd();
4073 }
4074 
4075 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4076   assert(FormatTok->is(tok::less));
4077   // Unlike protocol lists, generic parameterizations support
4078   // nested angles:
4079   //
4080   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4081   //     NSObject <NSCopying, NSSecureCoding>
4082   //
4083   // so we need to count how many open angles we have left.
4084   unsigned NumOpenAngles = 1;
4085   do {
4086     nextToken();
4087     // Early exit in case someone forgot a close angle.
4088     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4089         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4090       break;
4091     }
4092     if (FormatTok->is(tok::less)) {
4093       ++NumOpenAngles;
4094     } else if (FormatTok->is(tok::greater)) {
4095       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4096       --NumOpenAngles;
4097     }
4098   } while (!eof() && NumOpenAngles != 0);
4099   nextToken(); // Skip '>'.
4100 }
4101 
4102 // Returns true for the declaration/definition form of @protocol,
4103 // false for the expression form.
4104 bool UnwrappedLineParser::parseObjCProtocol() {
4105   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4106   nextToken();
4107 
4108   if (FormatTok->is(tok::l_paren)) {
4109     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4110     return false;
4111   }
4112 
4113   // The definition/declaration form,
4114   // @protocol Foo
4115   // - (int)someMethod;
4116   // @end
4117 
4118   nextToken(); // protocol name
4119 
4120   if (FormatTok->is(tok::less))
4121     parseObjCProtocolList();
4122 
4123   // Check for protocol declaration.
4124   if (FormatTok->is(tok::semi)) {
4125     nextToken();
4126     addUnwrappedLine();
4127     return true;
4128   }
4129 
4130   addUnwrappedLine();
4131   parseObjCUntilAtEnd();
4132   return true;
4133 }
4134 
4135 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4136   bool IsImport = FormatTok->is(Keywords.kw_import);
4137   assert(IsImport || FormatTok->is(tok::kw_export));
4138   nextToken();
4139 
4140   // Consume the "default" in "export default class/function".
4141   if (FormatTok->is(tok::kw_default))
4142     nextToken();
4143 
4144   // Consume "async function", "function" and "default function", so that these
4145   // get parsed as free-standing JS functions, i.e. do not require a trailing
4146   // semicolon.
4147   if (FormatTok->is(Keywords.kw_async))
4148     nextToken();
4149   if (FormatTok->is(Keywords.kw_function)) {
4150     nextToken();
4151     return;
4152   }
4153 
4154   // For imports, `export *`, `export {...}`, consume the rest of the line up
4155   // to the terminating `;`. For everything else, just return and continue
4156   // parsing the structural element, i.e. the declaration or expression for
4157   // `export default`.
4158   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4159       !FormatTok->isStringLiteral() &&
4160       !(FormatTok->is(Keywords.kw_type) &&
4161         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4162     return;
4163   }
4164 
4165   while (!eof()) {
4166     if (FormatTok->is(tok::semi))
4167       return;
4168     if (Line->Tokens.empty()) {
4169       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4170       // import statement should terminate.
4171       return;
4172     }
4173     if (FormatTok->is(tok::l_brace)) {
4174       FormatTok->setBlockKind(BK_Block);
4175       nextToken();
4176       parseBracedList();
4177     } else {
4178       nextToken();
4179     }
4180   }
4181 }
4182 
4183 void UnwrappedLineParser::parseStatementMacro() {
4184   nextToken();
4185   if (FormatTok->is(tok::l_paren))
4186     parseParens();
4187   if (FormatTok->is(tok::semi))
4188     nextToken();
4189   addUnwrappedLine();
4190 }
4191 
4192 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4193   // consume things like a::`b.c[d:e] or a::*
4194   while (true) {
4195     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4196                            tok::coloncolon, tok::hash) ||
4197         Keywords.isVerilogIdentifier(*FormatTok)) {
4198       nextToken();
4199     } else if (FormatTok->is(tok::l_square)) {
4200       parseSquare();
4201     } else {
4202       break;
4203     }
4204   }
4205 }
4206 
4207 void UnwrappedLineParser::parseVerilogSensitivityList() {
4208   if (FormatTok->isNot(tok::at))
4209     return;
4210   nextToken();
4211   // A block event expression has 2 at signs.
4212   if (FormatTok->is(tok::at))
4213     nextToken();
4214   switch (FormatTok->Tok.getKind()) {
4215   case tok::star:
4216     nextToken();
4217     break;
4218   case tok::l_paren:
4219     parseParens();
4220     break;
4221   default:
4222     parseVerilogHierarchyIdentifier();
4223     break;
4224   }
4225 }
4226 
4227 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4228   unsigned AddLevels = 0;
4229 
4230   if (FormatTok->is(Keywords.kw_clocking)) {
4231     nextToken();
4232     if (Keywords.isVerilogIdentifier(*FormatTok))
4233       nextToken();
4234     parseVerilogSensitivityList();
4235     if (FormatTok->is(tok::semi))
4236       nextToken();
4237   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4238                                 Keywords.kw_casez, Keywords.kw_randcase,
4239                                 Keywords.kw_randsequence)) {
4240     if (Style.IndentCaseLabels)
4241       AddLevels++;
4242     nextToken();
4243     if (FormatTok->is(tok::l_paren)) {
4244       FormatTok->setFinalizedType(TT_ConditionLParen);
4245       parseParens();
4246     }
4247     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4248       nextToken();
4249     // The case header has no semicolon.
4250   } else {
4251     // "module" etc.
4252     nextToken();
4253     // all the words like the name of the module and specifiers like
4254     // "automatic" and the width of function return type
4255     while (true) {
4256       if (FormatTok->is(tok::l_square)) {
4257         auto Prev = FormatTok->getPreviousNonComment();
4258         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4259           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4260         parseSquare();
4261       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4262                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4263         nextToken();
4264       } else {
4265         break;
4266       }
4267     }
4268 
4269     auto NewLine = [this]() {
4270       addUnwrappedLine();
4271       Line->IsContinuation = true;
4272     };
4273 
4274     // package imports
4275     while (FormatTok->is(Keywords.kw_import)) {
4276       NewLine();
4277       nextToken();
4278       parseVerilogHierarchyIdentifier();
4279       if (FormatTok->is(tok::semi))
4280         nextToken();
4281     }
4282 
4283     // parameters and ports
4284     if (FormatTok->is(Keywords.kw_verilogHash)) {
4285       NewLine();
4286       nextToken();
4287       if (FormatTok->is(tok::l_paren)) {
4288         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4289         parseParens();
4290       }
4291     }
4292     if (FormatTok->is(tok::l_paren)) {
4293       NewLine();
4294       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4295       parseParens();
4296     }
4297 
4298     // extends and implements
4299     if (FormatTok->is(Keywords.kw_extends)) {
4300       NewLine();
4301       nextToken();
4302       parseVerilogHierarchyIdentifier();
4303       if (FormatTok->is(tok::l_paren))
4304         parseParens();
4305     }
4306     if (FormatTok->is(Keywords.kw_implements)) {
4307       NewLine();
4308       do {
4309         nextToken();
4310         parseVerilogHierarchyIdentifier();
4311       } while (FormatTok->is(tok::comma));
4312     }
4313 
4314     // Coverage event for cover groups.
4315     if (FormatTok->is(tok::at)) {
4316       NewLine();
4317       parseVerilogSensitivityList();
4318     }
4319 
4320     if (FormatTok->is(tok::semi))
4321       nextToken(/*LevelDifference=*/1);
4322     addUnwrappedLine();
4323   }
4324 
4325   return AddLevels;
4326 }
4327 
4328 void UnwrappedLineParser::parseVerilogTable() {
4329   assert(FormatTok->is(Keywords.kw_table));
4330   nextToken(/*LevelDifference=*/1);
4331   addUnwrappedLine();
4332 
4333   auto InitialLevel = Line->Level++;
4334   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4335     FormatToken *Tok = FormatTok;
4336     nextToken();
4337     if (Tok->is(tok::semi))
4338       addUnwrappedLine();
4339     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4340       Tok->setFinalizedType(TT_VerilogTableItem);
4341   }
4342   Line->Level = InitialLevel;
4343   nextToken(/*LevelDifference=*/-1);
4344   addUnwrappedLine();
4345 }
4346 
4347 void UnwrappedLineParser::parseVerilogCaseLabel() {
4348   // The label will get unindented in AnnotatingParser. If there are no leading
4349   // spaces, indent the rest here so that things inside the block will be
4350   // indented relative to things outside. We don't use parseLabel because we
4351   // don't know whether this colon is a label or a ternary expression at this
4352   // point.
4353   auto OrigLevel = Line->Level;
4354   auto FirstLine = CurrentLines->size();
4355   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4356     ++Line->Level;
4357   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4358     --Line->Level;
4359   parseStructuralElement();
4360   // Restore the indentation in both the new line and the line that has the
4361   // label.
4362   if (CurrentLines->size() > FirstLine)
4363     (*CurrentLines)[FirstLine].Level = OrigLevel;
4364   Line->Level = OrigLevel;
4365 }
4366 
4367 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4368   for (const auto &N : Line.Tokens) {
4369     if (N.Tok->MacroCtx)
4370       return true;
4371     for (const UnwrappedLine &Child : N.Children)
4372       if (containsExpansion(Child))
4373         return true;
4374   }
4375   return false;
4376 }
4377 
4378 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4379   if (Line->Tokens.empty())
4380     return;
4381   LLVM_DEBUG({
4382     if (!parsingPPDirective()) {
4383       llvm::dbgs() << "Adding unwrapped line:\n";
4384       printDebugInfo(*Line);
4385     }
4386   });
4387 
4388   // If this line closes a block when in Whitesmiths mode, remember that
4389   // information so that the level can be decreased after the line is added.
4390   // This has to happen after the addition of the line since the line itself
4391   // needs to be indented.
4392   bool ClosesWhitesmithsBlock =
4393       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4394       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4395 
4396   // If the current line was expanded from a macro call, we use it to
4397   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4398   // line and the unexpanded token stream.
4399   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4400     if (!Reconstruct)
4401       Reconstruct.emplace(Line->Level, Unexpanded);
4402     Reconstruct->addLine(*Line);
4403 
4404     // While the reconstructed unexpanded lines are stored in the normal
4405     // flow of lines, the expanded lines are stored on the side to be analyzed
4406     // in an extra step.
4407     CurrentExpandedLines.push_back(std::move(*Line));
4408 
4409     if (Reconstruct->finished()) {
4410       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4411       assert(!Reconstructed.Tokens.empty() &&
4412              "Reconstructed must at least contain the macro identifier.");
4413       assert(!parsingPPDirective());
4414       LLVM_DEBUG({
4415         llvm::dbgs() << "Adding unexpanded line:\n";
4416         printDebugInfo(Reconstructed);
4417       });
4418       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4419       Lines.push_back(std::move(Reconstructed));
4420       CurrentExpandedLines.clear();
4421       Reconstruct.reset();
4422     }
4423   } else {
4424     // At the top level we only get here when no unexpansion is going on, or
4425     // when conditional formatting led to unfinished macro reconstructions.
4426     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4427     CurrentLines->push_back(std::move(*Line));
4428   }
4429   Line->Tokens.clear();
4430   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4431   Line->FirstStartColumn = 0;
4432   Line->IsContinuation = false;
4433   Line->SeenDecltypeAuto = false;
4434 
4435   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4436     --Line->Level;
4437   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4438     CurrentLines->append(
4439         std::make_move_iterator(PreprocessorDirectives.begin()),
4440         std::make_move_iterator(PreprocessorDirectives.end()));
4441     PreprocessorDirectives.clear();
4442   }
4443   // Disconnect the current token from the last token on the previous line.
4444   FormatTok->Previous = nullptr;
4445 }
4446 
4447 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4448 
4449 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4450   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4451          FormatTok.NewlinesBefore > 0;
4452 }
4453 
4454 // Checks if \p FormatTok is a line comment that continues the line comment
4455 // section on \p Line.
4456 static bool
4457 continuesLineCommentSection(const FormatToken &FormatTok,
4458                             const UnwrappedLine &Line,
4459                             const llvm::Regex &CommentPragmasRegex) {
4460   if (Line.Tokens.empty())
4461     return false;
4462 
4463   StringRef IndentContent = FormatTok.TokenText;
4464   if (FormatTok.TokenText.starts_with("//") ||
4465       FormatTok.TokenText.starts_with("/*")) {
4466     IndentContent = FormatTok.TokenText.substr(2);
4467   }
4468   if (CommentPragmasRegex.match(IndentContent))
4469     return false;
4470 
4471   // If Line starts with a line comment, then FormatTok continues the comment
4472   // section if its original column is greater or equal to the original start
4473   // column of the line.
4474   //
4475   // Define the min column token of a line as follows: if a line ends in '{' or
4476   // contains a '{' followed by a line comment, then the min column token is
4477   // that '{'. Otherwise, the min column token of the line is the first token of
4478   // the line.
4479   //
4480   // If Line starts with a token other than a line comment, then FormatTok
4481   // continues the comment section if its original column is greater than the
4482   // original start column of the min column token of the line.
4483   //
4484   // For example, the second line comment continues the first in these cases:
4485   //
4486   // // first line
4487   // // second line
4488   //
4489   // and:
4490   //
4491   // // first line
4492   //  // second line
4493   //
4494   // and:
4495   //
4496   // int i; // first line
4497   //  // second line
4498   //
4499   // and:
4500   //
4501   // do { // first line
4502   //      // second line
4503   //   int i;
4504   // } while (true);
4505   //
4506   // and:
4507   //
4508   // enum {
4509   //   a, // first line
4510   //    // second line
4511   //   b
4512   // };
4513   //
4514   // The second line comment doesn't continue the first in these cases:
4515   //
4516   //   // first line
4517   //  // second line
4518   //
4519   // and:
4520   //
4521   // int i; // first line
4522   // // second line
4523   //
4524   // and:
4525   //
4526   // do { // first line
4527   //   // second line
4528   //   int i;
4529   // } while (true);
4530   //
4531   // and:
4532   //
4533   // enum {
4534   //   a, // first line
4535   //   // second line
4536   // };
4537   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4538 
4539   // Scan for '{//'. If found, use the column of '{' as a min column for line
4540   // comment section continuation.
4541   const FormatToken *PreviousToken = nullptr;
4542   for (const UnwrappedLineNode &Node : Line.Tokens) {
4543     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4544         isLineComment(*Node.Tok)) {
4545       MinColumnToken = PreviousToken;
4546       break;
4547     }
4548     PreviousToken = Node.Tok;
4549 
4550     // Grab the last newline preceding a token in this unwrapped line.
4551     if (Node.Tok->NewlinesBefore > 0)
4552       MinColumnToken = Node.Tok;
4553   }
4554   if (PreviousToken && PreviousToken->is(tok::l_brace))
4555     MinColumnToken = PreviousToken;
4556 
4557   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4558                               MinColumnToken);
4559 }
4560 
4561 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4562   bool JustComments = Line->Tokens.empty();
4563   for (FormatToken *Tok : CommentsBeforeNextToken) {
4564     // Line comments that belong to the same line comment section are put on the
4565     // same line since later we might want to reflow content between them.
4566     // Additional fine-grained breaking of line comment sections is controlled
4567     // by the class BreakableLineCommentSection in case it is desirable to keep
4568     // several line comment sections in the same unwrapped line.
4569     //
4570     // FIXME: Consider putting separate line comment sections as children to the
4571     // unwrapped line instead.
4572     Tok->ContinuesLineCommentSection =
4573         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4574     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4575       addUnwrappedLine();
4576     pushToken(Tok);
4577   }
4578   if (NewlineBeforeNext && JustComments)
4579     addUnwrappedLine();
4580   CommentsBeforeNextToken.clear();
4581 }
4582 
4583 void UnwrappedLineParser::nextToken(int LevelDifference) {
4584   if (eof())
4585     return;
4586   flushComments(isOnNewLine(*FormatTok));
4587   pushToken(FormatTok);
4588   FormatToken *Previous = FormatTok;
4589   if (!Style.isJavaScript())
4590     readToken(LevelDifference);
4591   else
4592     readTokenWithJavaScriptASI();
4593   FormatTok->Previous = Previous;
4594   if (Style.isVerilog()) {
4595     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4596     // keywords like `begin`, we can't treat them the same as left braces
4597     // because some contexts require one of them.  For example structs use
4598     // braces and if blocks use keywords, and a left brace can occur in an if
4599     // statement, but it is not a block.  For keywords like `end`, we simply
4600     // treat them the same as right braces.
4601     if (Keywords.isVerilogEnd(*FormatTok))
4602       FormatTok->Tok.setKind(tok::r_brace);
4603   }
4604 }
4605 
4606 void UnwrappedLineParser::distributeComments(
4607     const SmallVectorImpl<FormatToken *> &Comments,
4608     const FormatToken *NextTok) {
4609   // Whether or not a line comment token continues a line is controlled by
4610   // the method continuesLineCommentSection, with the following caveat:
4611   //
4612   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4613   // that each comment line from the trail is aligned with the next token, if
4614   // the next token exists. If a trail exists, the beginning of the maximal
4615   // trail is marked as a start of a new comment section.
4616   //
4617   // For example in this code:
4618   //
4619   // int a; // line about a
4620   //   // line 1 about b
4621   //   // line 2 about b
4622   //   int b;
4623   //
4624   // the two lines about b form a maximal trail, so there are two sections, the
4625   // first one consisting of the single comment "// line about a" and the
4626   // second one consisting of the next two comments.
4627   if (Comments.empty())
4628     return;
4629   bool ShouldPushCommentsInCurrentLine = true;
4630   bool HasTrailAlignedWithNextToken = false;
4631   unsigned StartOfTrailAlignedWithNextToken = 0;
4632   if (NextTok) {
4633     // We are skipping the first element intentionally.
4634     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4635       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4636         HasTrailAlignedWithNextToken = true;
4637         StartOfTrailAlignedWithNextToken = i;
4638       }
4639     }
4640   }
4641   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4642     FormatToken *FormatTok = Comments[i];
4643     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4644       FormatTok->ContinuesLineCommentSection = false;
4645     } else {
4646       FormatTok->ContinuesLineCommentSection =
4647           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4648     }
4649     if (!FormatTok->ContinuesLineCommentSection &&
4650         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4651       ShouldPushCommentsInCurrentLine = false;
4652     }
4653     if (ShouldPushCommentsInCurrentLine)
4654       pushToken(FormatTok);
4655     else
4656       CommentsBeforeNextToken.push_back(FormatTok);
4657   }
4658 }
4659 
4660 void UnwrappedLineParser::readToken(int LevelDifference) {
4661   SmallVector<FormatToken *, 1> Comments;
4662   bool PreviousWasComment = false;
4663   bool FirstNonCommentOnLine = false;
4664   do {
4665     FormatTok = Tokens->getNextToken();
4666     assert(FormatTok);
4667     while (FormatTok->getType() == TT_ConflictStart ||
4668            FormatTok->getType() == TT_ConflictEnd ||
4669            FormatTok->getType() == TT_ConflictAlternative) {
4670       if (FormatTok->getType() == TT_ConflictStart)
4671         conditionalCompilationStart(/*Unreachable=*/false);
4672       else if (FormatTok->getType() == TT_ConflictAlternative)
4673         conditionalCompilationAlternative();
4674       else if (FormatTok->getType() == TT_ConflictEnd)
4675         conditionalCompilationEnd();
4676       FormatTok = Tokens->getNextToken();
4677       FormatTok->MustBreakBefore = true;
4678     }
4679 
4680     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4681                                       const FormatToken &Tok,
4682                                       bool PreviousWasComment) {
4683       auto IsFirstOnLine = [](const FormatToken &Tok) {
4684         return Tok.HasUnescapedNewline || Tok.IsFirst;
4685       };
4686 
4687       // Consider preprocessor directives preceded by block comments as first
4688       // on line.
4689       if (PreviousWasComment)
4690         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4691       return IsFirstOnLine(Tok);
4692     };
4693 
4694     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4695         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4696     PreviousWasComment = FormatTok->is(tok::comment);
4697 
4698     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4699            (!Style.isVerilog() ||
4700             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4701            FirstNonCommentOnLine) {
4702       distributeComments(Comments, FormatTok);
4703       Comments.clear();
4704       // If there is an unfinished unwrapped line, we flush the preprocessor
4705       // directives only after that unwrapped line was finished later.
4706       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4707       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4708       assert((LevelDifference >= 0 ||
4709               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4710              "LevelDifference makes Line->Level negative");
4711       Line->Level += LevelDifference;
4712       // Comments stored before the preprocessor directive need to be output
4713       // before the preprocessor directive, at the same level as the
4714       // preprocessor directive, as we consider them to apply to the directive.
4715       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4716           PPBranchLevel > 0) {
4717         Line->Level += PPBranchLevel;
4718       }
4719       flushComments(isOnNewLine(*FormatTok));
4720       parsePPDirective();
4721       PreviousWasComment = FormatTok->is(tok::comment);
4722       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4723           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4724     }
4725 
4726     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4727         !Line->InPPDirective) {
4728       continue;
4729     }
4730 
4731     if (FormatTok->is(tok::identifier) &&
4732         Macros.defined(FormatTok->TokenText) &&
4733         // FIXME: Allow expanding macros in preprocessor directives.
4734         !Line->InPPDirective) {
4735       FormatToken *ID = FormatTok;
4736       unsigned Position = Tokens->getPosition();
4737 
4738       // To correctly parse the code, we need to replace the tokens of the macro
4739       // call with its expansion.
4740       auto PreCall = std::move(Line);
4741       Line.reset(new UnwrappedLine);
4742       bool OldInExpansion = InExpansion;
4743       InExpansion = true;
4744       // We parse the macro call into a new line.
4745       auto Args = parseMacroCall();
4746       InExpansion = OldInExpansion;
4747       assert(Line->Tokens.front().Tok == ID);
4748       // And remember the unexpanded macro call tokens.
4749       auto UnexpandedLine = std::move(Line);
4750       // Reset to the old line.
4751       Line = std::move(PreCall);
4752 
4753       LLVM_DEBUG({
4754         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4755         if (Args) {
4756           llvm::dbgs() << "(";
4757           for (const auto &Arg : Args.value())
4758             for (const auto &T : Arg)
4759               llvm::dbgs() << T->TokenText << " ";
4760           llvm::dbgs() << ")";
4761         }
4762         llvm::dbgs() << "\n";
4763       });
4764       if (Macros.objectLike(ID->TokenText) && Args &&
4765           !Macros.hasArity(ID->TokenText, Args->size())) {
4766         // The macro is either
4767         // - object-like, but we got argumnets, or
4768         // - overloaded to be both object-like and function-like, but none of
4769         //   the function-like arities match the number of arguments.
4770         // Thus, expand as object-like macro.
4771         LLVM_DEBUG(llvm::dbgs()
4772                    << "Macro \"" << ID->TokenText
4773                    << "\" not overloaded for arity " << Args->size()
4774                    << "or not function-like, using object-like overload.");
4775         Args.reset();
4776         UnexpandedLine->Tokens.resize(1);
4777         Tokens->setPosition(Position);
4778         nextToken();
4779         assert(!Args && Macros.objectLike(ID->TokenText));
4780       }
4781       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4782           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4783         // Next, we insert the expanded tokens in the token stream at the
4784         // current position, and continue parsing.
4785         Unexpanded[ID] = std::move(UnexpandedLine);
4786         SmallVector<FormatToken *, 8> Expansion =
4787             Macros.expand(ID, std::move(Args));
4788         if (!Expansion.empty())
4789           FormatTok = Tokens->insertTokens(Expansion);
4790 
4791         LLVM_DEBUG({
4792           llvm::dbgs() << "Expanded: ";
4793           for (const auto &T : Expansion)
4794             llvm::dbgs() << T->TokenText << " ";
4795           llvm::dbgs() << "\n";
4796         });
4797       } else {
4798         LLVM_DEBUG({
4799           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4800                        << "\", because it was used ";
4801           if (Args)
4802             llvm::dbgs() << "with " << Args->size();
4803           else
4804             llvm::dbgs() << "without";
4805           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4806         });
4807         Tokens->setPosition(Position);
4808         FormatTok = ID;
4809       }
4810     }
4811 
4812     if (FormatTok->isNot(tok::comment)) {
4813       distributeComments(Comments, FormatTok);
4814       Comments.clear();
4815       return;
4816     }
4817 
4818     Comments.push_back(FormatTok);
4819   } while (!eof());
4820 
4821   distributeComments(Comments, nullptr);
4822   Comments.clear();
4823 }
4824 
4825 namespace {
4826 template <typename Iterator>
4827 void pushTokens(Iterator Begin, Iterator End,
4828                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4829   for (auto I = Begin; I != End; ++I) {
4830     Into.push_back(I->Tok);
4831     for (const auto &Child : I->Children)
4832       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4833   }
4834 }
4835 } // namespace
4836 
4837 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4838 UnwrappedLineParser::parseMacroCall() {
4839   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4840   assert(Line->Tokens.empty());
4841   nextToken();
4842   if (FormatTok->isNot(tok::l_paren))
4843     return Args;
4844   unsigned Position = Tokens->getPosition();
4845   FormatToken *Tok = FormatTok;
4846   nextToken();
4847   Args.emplace();
4848   auto ArgStart = std::prev(Line->Tokens.end());
4849 
4850   int Parens = 0;
4851   do {
4852     switch (FormatTok->Tok.getKind()) {
4853     case tok::l_paren:
4854       ++Parens;
4855       nextToken();
4856       break;
4857     case tok::r_paren: {
4858       if (Parens > 0) {
4859         --Parens;
4860         nextToken();
4861         break;
4862       }
4863       Args->push_back({});
4864       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4865       nextToken();
4866       return Args;
4867     }
4868     case tok::comma: {
4869       if (Parens > 0) {
4870         nextToken();
4871         break;
4872       }
4873       Args->push_back({});
4874       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4875       nextToken();
4876       ArgStart = std::prev(Line->Tokens.end());
4877       break;
4878     }
4879     default:
4880       nextToken();
4881       break;
4882     }
4883   } while (!eof());
4884   Line->Tokens.resize(1);
4885   Tokens->setPosition(Position);
4886   FormatTok = Tok;
4887   return {};
4888 }
4889 
4890 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4891   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4892   if (MustBreakBeforeNextToken) {
4893     Line->Tokens.back().Tok->MustBreakBefore = true;
4894     MustBreakBeforeNextToken = false;
4895   }
4896 }
4897 
4898 } // end namespace format
4899 } // end namespace clang
4900