xref: /freebsd/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp (revision 4c2d3b022a1d543dbbff75a0c53e8d3d7242216d)
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <algorithm>
29 #include <utility>
30 
31 #define DEBUG_TYPE "format-parser"
32 
33 namespace clang {
34 namespace format {
35 
36 namespace {
37 
38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39                StringRef Prefix = "", bool PrintText = false) {
40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42   bool NewLine = false;
43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                     E = Line.Tokens.end();
45        I != E; ++I) {
46     if (NewLine) {
47       OS << Prefix;
48       NewLine = false;
49     }
50     OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52        << "\"] ";
53     for (SmallVectorImpl<UnwrappedLine>::const_iterator
54              CI = I->Children.begin(),
55              CE = I->Children.end();
56          CI != CE; ++CI) {
57       OS << "\n";
58       printLine(OS, *CI, (Prefix + "  ").str());
59       NewLine = true;
60     }
61   }
62   if (!NewLine)
63     OS << "\n";
64 }
65 
66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67   printLine(llvm::dbgs(), Line);
68 }
69 
70 class ScopedDeclarationState {
71 public:
72   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73                          bool MustBeDeclaration)
74       : Line(Line), Stack(Stack) {
75     Line.MustBeDeclaration = MustBeDeclaration;
76     Stack.push_back(MustBeDeclaration);
77   }
78   ~ScopedDeclarationState() {
79     Stack.pop_back();
80     if (!Stack.empty())
81       Line.MustBeDeclaration = Stack.back();
82     else
83       Line.MustBeDeclaration = true;
84   }
85 
86 private:
87   UnwrappedLine &Line;
88   llvm::BitVector &Stack;
89 };
90 
91 } // end anonymous namespace
92 
93 class ScopedLineState {
94 public:
95   ScopedLineState(UnwrappedLineParser &Parser,
96                   bool SwitchToPreprocessorLines = false)
97       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
98     if (SwitchToPreprocessorLines)
99       Parser.CurrentLines = &Parser.PreprocessorDirectives;
100     else if (!Parser.Line->Tokens.empty())
101       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
102     PreBlockLine = std::move(Parser.Line);
103     Parser.Line = std::make_unique<UnwrappedLine>();
104     Parser.Line->Level = PreBlockLine->Level;
105     Parser.Line->PPLevel = PreBlockLine->PPLevel;
106     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
107     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
108   }
109 
110   ~ScopedLineState() {
111     if (!Parser.Line->Tokens.empty())
112       Parser.addUnwrappedLine();
113     assert(Parser.Line->Tokens.empty());
114     Parser.Line = std::move(PreBlockLine);
115     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
116       Parser.MustBreakBeforeNextToken = true;
117     Parser.CurrentLines = OriginalLines;
118   }
119 
120 private:
121   UnwrappedLineParser &Parser;
122 
123   std::unique_ptr<UnwrappedLine> PreBlockLine;
124   SmallVectorImpl<UnwrappedLine> *OriginalLines;
125 };
126 
127 class CompoundStatementIndenter {
128 public:
129   CompoundStatementIndenter(UnwrappedLineParser *Parser,
130                             const FormatStyle &Style, unsigned &LineLevel)
131       : CompoundStatementIndenter(Parser, LineLevel,
132                                   Style.BraceWrapping.AfterControlStatement,
133                                   Style.BraceWrapping.IndentBraces) {}
134   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
135                             bool WrapBrace, bool IndentBrace)
136       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
137     if (WrapBrace)
138       Parser->addUnwrappedLine();
139     if (IndentBrace)
140       ++LineLevel;
141   }
142   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
143 
144 private:
145   unsigned &LineLevel;
146   unsigned OldLineLevel;
147 };
148 
149 UnwrappedLineParser::UnwrappedLineParser(
150     SourceManager &SourceMgr, const FormatStyle &Style,
151     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
152     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
153     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
154     IdentifierTable &IdentTable)
155     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
156       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
157       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
158       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
159       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
160                        ? IG_Rejected
161                        : IG_Inited),
162       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
163       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
164 
165 void UnwrappedLineParser::reset() {
166   PPBranchLevel = -1;
167   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
168                      ? IG_Rejected
169                      : IG_Inited;
170   IncludeGuardToken = nullptr;
171   Line.reset(new UnwrappedLine);
172   CommentsBeforeNextToken.clear();
173   FormatTok = nullptr;
174   MustBreakBeforeNextToken = false;
175   IsDecltypeAutoFunction = false;
176   PreprocessorDirectives.clear();
177   CurrentLines = &Lines;
178   DeclarationScopeStack.clear();
179   NestedTooDeep.clear();
180   NestedLambdas.clear();
181   PPStack.clear();
182   Line->FirstStartColumn = FirstStartColumn;
183 
184   if (!Unexpanded.empty())
185     for (FormatToken *Token : AllTokens)
186       Token->MacroCtx.reset();
187   CurrentExpandedLines.clear();
188   ExpandedLines.clear();
189   Unexpanded.clear();
190   InExpansion = false;
191   Reconstruct.reset();
192 }
193 
194 void UnwrappedLineParser::parse() {
195   IndexedTokenSource TokenSource(AllTokens);
196   Line->FirstStartColumn = FirstStartColumn;
197   do {
198     LLVM_DEBUG(llvm::dbgs() << "----\n");
199     reset();
200     Tokens = &TokenSource;
201     TokenSource.reset();
202 
203     readToken();
204     parseFile();
205 
206     // If we found an include guard then all preprocessor directives (other than
207     // the guard) are over-indented by one.
208     if (IncludeGuard == IG_Found) {
209       for (auto &Line : Lines)
210         if (Line.InPPDirective && Line.Level > 0)
211           --Line.Level;
212     }
213 
214     // Create line with eof token.
215     assert(eof());
216     pushToken(FormatTok);
217     addUnwrappedLine();
218 
219     // In a first run, format everything with the lines containing macro calls
220     // replaced by the expansion.
221     if (!ExpandedLines.empty()) {
222       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223       for (const auto &Line : Lines) {
224         if (!Line.Tokens.empty()) {
225           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
226           if (it != ExpandedLines.end()) {
227             for (const auto &Expanded : it->second) {
228               LLVM_DEBUG(printDebugInfo(Expanded));
229               Callback.consumeUnwrappedLine(Expanded);
230             }
231             continue;
232           }
233         }
234         LLVM_DEBUG(printDebugInfo(Line));
235         Callback.consumeUnwrappedLine(Line);
236       }
237       Callback.finishRun();
238     }
239 
240     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241     for (const UnwrappedLine &Line : Lines) {
242       LLVM_DEBUG(printDebugInfo(Line));
243       Callback.consumeUnwrappedLine(Line);
244     }
245     Callback.finishRun();
246     Lines.clear();
247     while (!PPLevelBranchIndex.empty() &&
248            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
249       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
250       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
251     }
252     if (!PPLevelBranchIndex.empty()) {
253       ++PPLevelBranchIndex.back();
254       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
255       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
256     }
257   } while (!PPLevelBranchIndex.empty());
258 }
259 
260 void UnwrappedLineParser::parseFile() {
261   // The top-level context in a file always has declarations, except for pre-
262   // processor directives and JavaScript files.
263   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
264   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
265                                           MustBeDeclaration);
266   if (Style.Language == FormatStyle::LK_TextProto)
267     parseBracedList();
268   else
269     parseLevel();
270   // Make sure to format the remaining tokens.
271   //
272   // LK_TextProto is special since its top-level is parsed as the body of a
273   // braced list, which does not necessarily have natural line separators such
274   // as a semicolon. Comments after the last entry that have been determined to
275   // not belong to that line, as in:
276   //   key: value
277   //   // endfile comment
278   // do not have a chance to be put on a line of their own until this point.
279   // Here we add this newline before end-of-file comments.
280   if (Style.Language == FormatStyle::LK_TextProto &&
281       !CommentsBeforeNextToken.empty()) {
282     addUnwrappedLine();
283   }
284   flushComments(true);
285   addUnwrappedLine();
286 }
287 
288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289   do {
290     switch (FormatTok->Tok.getKind()) {
291     case tok::l_brace:
292       return;
293     default:
294       if (FormatTok->is(Keywords.kw_where)) {
295         addUnwrappedLine();
296         nextToken();
297         parseCSharpGenericTypeConstraint();
298         break;
299       }
300       nextToken();
301       break;
302     }
303   } while (!eof());
304 }
305 
306 void UnwrappedLineParser::parseCSharpAttribute() {
307   int UnpairedSquareBrackets = 1;
308   do {
309     switch (FormatTok->Tok.getKind()) {
310     case tok::r_square:
311       nextToken();
312       --UnpairedSquareBrackets;
313       if (UnpairedSquareBrackets == 0) {
314         addUnwrappedLine();
315         return;
316       }
317       break;
318     case tok::l_square:
319       ++UnpairedSquareBrackets;
320       nextToken();
321       break;
322     default:
323       nextToken();
324       break;
325     }
326   } while (!eof());
327 }
328 
329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330   if (!Lines.empty() && Lines.back().InPPDirective)
331     return true;
332 
333   const FormatToken *Previous = Tokens->getPreviousToken();
334   return Previous && Previous->is(tok::comment) &&
335          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
336 }
337 
338 /// \brief Parses a level, that is ???.
339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340 /// \param IfKind The \p if statement kind in the level.
341 /// \param IfLeftBrace The left brace of the \p if block in the level.
342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
343 /// (A simple block has a single statement.)
344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
345                                      IfStmtKind *IfKind,
346                                      FormatToken **IfLeftBrace) {
347   const bool InRequiresExpression =
348       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
349   const bool IsPrecededByCommentOrPPDirective =
350       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
351   FormatToken *IfLBrace = nullptr;
352   bool HasDoWhile = false;
353   bool HasLabel = false;
354   unsigned StatementCount = 0;
355   bool SwitchLabelEncountered = false;
356 
357   do {
358     if (FormatTok->isAttribute()) {
359       nextToken();
360       continue;
361     }
362     tok::TokenKind kind = FormatTok->Tok.getKind();
363     if (FormatTok->getType() == TT_MacroBlockBegin)
364       kind = tok::l_brace;
365     else if (FormatTok->getType() == TT_MacroBlockEnd)
366       kind = tok::r_brace;
367 
368     auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
369                          &HasLabel, &StatementCount] {
370       parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
371                              HasDoWhile ? nullptr : &HasDoWhile,
372                              HasLabel ? nullptr : &HasLabel);
373       ++StatementCount;
374       assert(StatementCount > 0 && "StatementCount overflow!");
375     };
376 
377     switch (kind) {
378     case tok::comment:
379       nextToken();
380       addUnwrappedLine();
381       break;
382     case tok::l_brace:
383       if (InRequiresExpression) {
384         FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
385       } else if (FormatTok->Previous &&
386                  FormatTok->Previous->ClosesRequiresClause) {
387         // We need the 'default' case here to correctly parse a function
388         // l_brace.
389         ParseDefault();
390         continue;
391       }
392       if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
393           tryToParseBracedList()) {
394         continue;
395       }
396       parseBlock();
397       ++StatementCount;
398       assert(StatementCount > 0 && "StatementCount overflow!");
399       addUnwrappedLine();
400       break;
401     case tok::r_brace:
402       if (OpeningBrace) {
403         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
404             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
405           return false;
406         }
407         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
408             HasDoWhile || IsPrecededByCommentOrPPDirective ||
409             precededByCommentOrPPDirective()) {
410           return false;
411         }
412         const FormatToken *Next = Tokens->peekNextToken();
413         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
414           return false;
415         if (IfLeftBrace)
416           *IfLeftBrace = IfLBrace;
417         return true;
418       }
419       nextToken();
420       addUnwrappedLine();
421       break;
422     case tok::kw_default: {
423       unsigned StoredPosition = Tokens->getPosition();
424       FormatToken *Next;
425       do {
426         Next = Tokens->getNextToken();
427         assert(Next);
428       } while (Next->is(tok::comment));
429       FormatTok = Tokens->setPosition(StoredPosition);
430       if (Next->isNot(tok::colon)) {
431         // default not followed by ':' is not a case label; treat it like
432         // an identifier.
433         parseStructuralElement();
434         break;
435       }
436       // Else, if it is 'default:', fall through to the case handling.
437       [[fallthrough]];
438     }
439     case tok::kw_case:
440       if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441           (Style.isJavaScript() && Line->MustBeDeclaration)) {
442         // Proto: there are no switch/case statements
443         // Verilog: Case labels don't have this word. We handle case
444         // labels including default in TokenAnnotator.
445         // JavaScript: A 'case: string' style field declaration.
446         ParseDefault();
447         break;
448       }
449       if (!SwitchLabelEncountered &&
450           (Style.IndentCaseLabels ||
451            (Line->InPPDirective && Line->Level == 1))) {
452         ++Line->Level;
453       }
454       SwitchLabelEncountered = true;
455       parseStructuralElement();
456       break;
457     case tok::l_square:
458       if (Style.isCSharp()) {
459         nextToken();
460         parseCSharpAttribute();
461         break;
462       }
463       if (handleCppAttributes())
464         break;
465       [[fallthrough]];
466     default:
467       ParseDefault();
468       break;
469     }
470   } while (!eof());
471 
472   return false;
473 }
474 
475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
476   // We'll parse forward through the tokens until we hit
477   // a closing brace or eof - note that getNextToken() will
478   // parse macros, so this will magically work inside macro
479   // definitions, too.
480   unsigned StoredPosition = Tokens->getPosition();
481   FormatToken *Tok = FormatTok;
482   const FormatToken *PrevTok = Tok->Previous;
483   // Keep a stack of positions of lbrace tokens. We will
484   // update information about whether an lbrace starts a
485   // braced init list or a different block during the loop.
486   struct StackEntry {
487     FormatToken *Tok;
488     const FormatToken *PrevTok;
489   };
490   SmallVector<StackEntry, 8> LBraceStack;
491   assert(Tok->is(tok::l_brace));
492   do {
493     // Get next non-comment, non-preprocessor token.
494     FormatToken *NextTok;
495     do {
496       NextTok = Tokens->getNextToken();
497     } while (NextTok->is(tok::comment));
498     while (NextTok->is(tok::hash) && !Line->InMacroBody) {
499       NextTok = Tokens->getNextToken();
500       do {
501         NextTok = Tokens->getNextToken();
502       } while (NextTok->is(tok::comment) ||
503                (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
504     }
505 
506     switch (Tok->Tok.getKind()) {
507     case tok::l_brace:
508       if (Style.isJavaScript() && PrevTok) {
509         if (PrevTok->isOneOf(tok::colon, tok::less)) {
510           // A ':' indicates this code is in a type, or a braced list
511           // following a label in an object literal ({a: {b: 1}}).
512           // A '<' could be an object used in a comparison, but that is nonsense
513           // code (can never return true), so more likely it is a generic type
514           // argument (`X<{a: string; b: number}>`).
515           // The code below could be confused by semicolons between the
516           // individual members in a type member list, which would normally
517           // trigger BK_Block. In both cases, this must be parsed as an inline
518           // braced init.
519           Tok->setBlockKind(BK_BracedInit);
520         } else if (PrevTok->is(tok::r_paren)) {
521           // `) { }` can only occur in function or method declarations in JS.
522           Tok->setBlockKind(BK_Block);
523         }
524       } else {
525         Tok->setBlockKind(BK_Unknown);
526       }
527       LBraceStack.push_back({Tok, PrevTok});
528       break;
529     case tok::r_brace:
530       if (LBraceStack.empty())
531         break;
532       if (LBraceStack.back().Tok->is(BK_Unknown)) {
533         bool ProbablyBracedList = false;
534         if (Style.Language == FormatStyle::LK_Proto) {
535           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
536         } else {
537           // Skip NextTok over preprocessor lines, otherwise we may not
538           // properly diagnose the block as a braced intializer
539           // if the comma separator appears after the pp directive.
540           while (NextTok->is(tok::hash)) {
541             ScopedMacroState MacroState(*Line, Tokens, NextTok);
542             do {
543               NextTok = Tokens->getNextToken();
544             } while (NextTok->isNot(tok::eof));
545           }
546 
547           // Using OriginalColumn to distinguish between ObjC methods and
548           // binary operators is a bit hacky.
549           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
550                                   NextTok->OriginalColumn == 0;
551 
552           // Try to detect a braced list. Note that regardless how we mark inner
553           // braces here, we will overwrite the BlockKind later if we parse a
554           // braced list (where all blocks inside are by default braced lists),
555           // or when we explicitly detect blocks (for example while parsing
556           // lambdas).
557 
558           // If we already marked the opening brace as braced list, the closing
559           // must also be part of it.
560           ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
561 
562           ProbablyBracedList = ProbablyBracedList ||
563                                (Style.isJavaScript() &&
564                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
565                                                  Keywords.kw_as));
566           ProbablyBracedList = ProbablyBracedList ||
567                                (Style.isCpp() && NextTok->is(tok::l_paren));
568 
569           // If there is a comma, semicolon or right paren after the closing
570           // brace, we assume this is a braced initializer list.
571           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572           // braced list in JS.
573           ProbablyBracedList =
574               ProbablyBracedList ||
575               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
576                                tok::r_paren, tok::r_square, tok::ellipsis);
577 
578           // Distinguish between braced list in a constructor initializer list
579           // followed by constructor body, or just adjacent blocks.
580           ProbablyBracedList =
581               ProbablyBracedList ||
582               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
583                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
584                                                    tok::greater));
585 
586           ProbablyBracedList =
587               ProbablyBracedList ||
588               (NextTok->is(tok::identifier) &&
589                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
590 
591           ProbablyBracedList = ProbablyBracedList ||
592                                (NextTok->is(tok::semi) &&
593                                 (!ExpectClassBody || LBraceStack.size() != 1));
594 
595           ProbablyBracedList =
596               ProbablyBracedList ||
597               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
598 
599           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
600             // We can have an array subscript after a braced init
601             // list, but C++11 attributes are expected after blocks.
602             NextTok = Tokens->getNextToken();
603             ProbablyBracedList = NextTok->isNot(tok::l_square);
604           }
605         }
606         if (ProbablyBracedList) {
607           Tok->setBlockKind(BK_BracedInit);
608           LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
609         } else {
610           Tok->setBlockKind(BK_Block);
611           LBraceStack.back().Tok->setBlockKind(BK_Block);
612         }
613       }
614       LBraceStack.pop_back();
615       break;
616     case tok::identifier:
617       if (Tok->isNot(TT_StatementMacro))
618         break;
619       [[fallthrough]];
620     case tok::at:
621     case tok::semi:
622     case tok::kw_if:
623     case tok::kw_while:
624     case tok::kw_for:
625     case tok::kw_switch:
626     case tok::kw_try:
627     case tok::kw___try:
628       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
629         LBraceStack.back().Tok->setBlockKind(BK_Block);
630       break;
631     default:
632       break;
633     }
634     PrevTok = Tok;
635     Tok = NextTok;
636   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
637 
638   // Assume other blocks for all unclosed opening braces.
639   for (const auto &Entry : LBraceStack)
640     if (Entry.Tok->is(BK_Unknown))
641       Entry.Tok->setBlockKind(BK_Block);
642 
643   FormatTok = Tokens->setPosition(StoredPosition);
644 }
645 
646 // Sets the token type of the directly previous right brace.
647 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
648   if (auto Prev = FormatTok->getPreviousNonComment();
649       Prev && Prev->is(tok::r_brace)) {
650     Prev->setFinalizedType(Type);
651   }
652 }
653 
654 template <class T>
655 static inline void hash_combine(std::size_t &seed, const T &v) {
656   std::hash<T> hasher;
657   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
658 }
659 
660 size_t UnwrappedLineParser::computePPHash() const {
661   size_t h = 0;
662   for (const auto &i : PPStack) {
663     hash_combine(h, size_t(i.Kind));
664     hash_combine(h, i.Line);
665   }
666   return h;
667 }
668 
669 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
670 // is not null, subtracts its length (plus the preceding space) when computing
671 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
672 // running the token annotator on it so that we can restore them afterward.
673 bool UnwrappedLineParser::mightFitOnOneLine(
674     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
675   const auto ColumnLimit = Style.ColumnLimit;
676   if (ColumnLimit == 0)
677     return true;
678 
679   auto &Tokens = ParsedLine.Tokens;
680   assert(!Tokens.empty());
681 
682   const auto *LastToken = Tokens.back().Tok;
683   assert(LastToken);
684 
685   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
686 
687   int Index = 0;
688   for (const auto &Token : Tokens) {
689     assert(Token.Tok);
690     auto &SavedToken = SavedTokens[Index++];
691     SavedToken.Tok = new FormatToken;
692     SavedToken.Tok->copyFrom(*Token.Tok);
693     SavedToken.Children = std::move(Token.Children);
694   }
695 
696   AnnotatedLine Line(ParsedLine);
697   assert(Line.Last == LastToken);
698 
699   TokenAnnotator Annotator(Style, Keywords);
700   Annotator.annotate(Line);
701   Annotator.calculateFormattingInformation(Line);
702 
703   auto Length = LastToken->TotalLength;
704   if (OpeningBrace) {
705     assert(OpeningBrace != Tokens.front().Tok);
706     if (auto Prev = OpeningBrace->Previous;
707         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
708       Length -= ColumnLimit;
709     }
710     Length -= OpeningBrace->TokenText.size() + 1;
711   }
712 
713   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
714     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
715     Length -= FirstToken->TokenText.size() + 1;
716   }
717 
718   Index = 0;
719   for (auto &Token : Tokens) {
720     const auto &SavedToken = SavedTokens[Index++];
721     Token.Tok->copyFrom(*SavedToken.Tok);
722     Token.Children = std::move(SavedToken.Children);
723     delete SavedToken.Tok;
724   }
725 
726   // If these change PPLevel needs to be used for get correct indentation.
727   assert(!Line.InMacroBody);
728   assert(!Line.InPPDirective);
729   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
730 }
731 
732 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
733                                              unsigned AddLevels, bool MunchSemi,
734                                              bool KeepBraces,
735                                              IfStmtKind *IfKind,
736                                              bool UnindentWhitesmithsBraces) {
737   auto HandleVerilogBlockLabel = [this]() {
738     // ":" name
739     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
740       nextToken();
741       if (Keywords.isVerilogIdentifier(*FormatTok))
742         nextToken();
743     }
744   };
745 
746   // Whether this is a Verilog-specific block that has a special header like a
747   // module.
748   const bool VerilogHierarchy =
749       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
750   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
751           (Style.isVerilog() &&
752            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
753          "'{' or macro block token expected");
754   FormatToken *Tok = FormatTok;
755   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
756   auto Index = CurrentLines->size();
757   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
758   FormatTok->setBlockKind(BK_Block);
759 
760   // For Whitesmiths mode, jump to the next level prior to skipping over the
761   // braces.
762   if (!VerilogHierarchy && AddLevels > 0 &&
763       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
764     ++Line->Level;
765   }
766 
767   size_t PPStartHash = computePPHash();
768 
769   const unsigned InitialLevel = Line->Level;
770   if (VerilogHierarchy) {
771     AddLevels += parseVerilogHierarchyHeader();
772   } else {
773     nextToken(/*LevelDifference=*/AddLevels);
774     HandleVerilogBlockLabel();
775   }
776 
777   // Bail out if there are too many levels. Otherwise, the stack might overflow.
778   if (Line->Level > 300)
779     return nullptr;
780 
781   if (MacroBlock && FormatTok->is(tok::l_paren))
782     parseParens();
783 
784   size_t NbPreprocessorDirectives =
785       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
786   addUnwrappedLine();
787   size_t OpeningLineIndex =
788       CurrentLines->empty()
789           ? (UnwrappedLine::kInvalidIndex)
790           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
791 
792   // Whitesmiths is weird here. The brace needs to be indented for the namespace
793   // block, but the block itself may not be indented depending on the style
794   // settings. This allows the format to back up one level in those cases.
795   if (UnindentWhitesmithsBraces)
796     --Line->Level;
797 
798   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
799                                           MustBeDeclaration);
800   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
801     Line->Level += AddLevels;
802 
803   FormatToken *IfLBrace = nullptr;
804   const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
805 
806   if (eof())
807     return IfLBrace;
808 
809   if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
810                  : FormatTok->isNot(tok::r_brace)) {
811     Line->Level = InitialLevel;
812     FormatTok->setBlockKind(BK_Block);
813     return IfLBrace;
814   }
815 
816   if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
817     FormatTok->setFinalizedType(TT_NamespaceRBrace);
818 
819   const bool IsFunctionRBrace =
820       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
821 
822   auto RemoveBraces = [=]() mutable {
823     if (!SimpleBlock)
824       return false;
825     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
826     assert(FormatTok->is(tok::r_brace));
827     const bool WrappedOpeningBrace = !Tok->Previous;
828     if (WrappedOpeningBrace && FollowedByComment)
829       return false;
830     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
831     if (KeepBraces && !HasRequiredIfBraces)
832       return false;
833     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
834       const FormatToken *Previous = Tokens->getPreviousToken();
835       assert(Previous);
836       if (Previous->is(tok::r_brace) && !Previous->Optional)
837         return false;
838     }
839     assert(!CurrentLines->empty());
840     auto &LastLine = CurrentLines->back();
841     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
842       return false;
843     if (Tok->is(TT_ElseLBrace))
844       return true;
845     if (WrappedOpeningBrace) {
846       assert(Index > 0);
847       --Index; // The line above the wrapped l_brace.
848       Tok = nullptr;
849     }
850     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
851   };
852   if (RemoveBraces()) {
853     Tok->MatchingParen = FormatTok;
854     FormatTok->MatchingParen = Tok;
855   }
856 
857   size_t PPEndHash = computePPHash();
858 
859   // Munch the closing brace.
860   nextToken(/*LevelDifference=*/-AddLevels);
861 
862   // When this is a function block and there is an unnecessary semicolon
863   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
864   // it later).
865   if (Style.RemoveSemicolon && IsFunctionRBrace) {
866     while (FormatTok->is(tok::semi)) {
867       FormatTok->Optional = true;
868       nextToken();
869     }
870   }
871 
872   HandleVerilogBlockLabel();
873 
874   if (MacroBlock && FormatTok->is(tok::l_paren))
875     parseParens();
876 
877   Line->Level = InitialLevel;
878 
879   if (FormatTok->is(tok::kw_noexcept)) {
880     // A noexcept in a requires expression.
881     nextToken();
882   }
883 
884   if (FormatTok->is(tok::arrow)) {
885     // Following the } or noexcept we can find a trailing return type arrow
886     // as part of an implicit conversion constraint.
887     nextToken();
888     parseStructuralElement();
889   }
890 
891   if (MunchSemi && FormatTok->is(tok::semi))
892     nextToken();
893 
894   if (PPStartHash == PPEndHash) {
895     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
896     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
897       // Update the opening line to add the forward reference as well
898       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
899           CurrentLines->size() - 1;
900     }
901   }
902 
903   return IfLBrace;
904 }
905 
906 static bool isGoogScope(const UnwrappedLine &Line) {
907   // FIXME: Closure-library specific stuff should not be hard-coded but be
908   // configurable.
909   if (Line.Tokens.size() < 4)
910     return false;
911   auto I = Line.Tokens.begin();
912   if (I->Tok->TokenText != "goog")
913     return false;
914   ++I;
915   if (I->Tok->isNot(tok::period))
916     return false;
917   ++I;
918   if (I->Tok->TokenText != "scope")
919     return false;
920   ++I;
921   return I->Tok->is(tok::l_paren);
922 }
923 
924 static bool isIIFE(const UnwrappedLine &Line,
925                    const AdditionalKeywords &Keywords) {
926   // Look for the start of an immediately invoked anonymous function.
927   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
928   // This is commonly done in JavaScript to create a new, anonymous scope.
929   // Example: (function() { ... })()
930   if (Line.Tokens.size() < 3)
931     return false;
932   auto I = Line.Tokens.begin();
933   if (I->Tok->isNot(tok::l_paren))
934     return false;
935   ++I;
936   if (I->Tok->isNot(Keywords.kw_function))
937     return false;
938   ++I;
939   return I->Tok->is(tok::l_paren);
940 }
941 
942 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
943                                    const FormatToken &InitialToken) {
944   tok::TokenKind Kind = InitialToken.Tok.getKind();
945   if (InitialToken.is(TT_NamespaceMacro))
946     Kind = tok::kw_namespace;
947 
948   switch (Kind) {
949   case tok::kw_namespace:
950     return Style.BraceWrapping.AfterNamespace;
951   case tok::kw_class:
952     return Style.BraceWrapping.AfterClass;
953   case tok::kw_union:
954     return Style.BraceWrapping.AfterUnion;
955   case tok::kw_struct:
956     return Style.BraceWrapping.AfterStruct;
957   case tok::kw_enum:
958     return Style.BraceWrapping.AfterEnum;
959   default:
960     return false;
961   }
962 }
963 
964 void UnwrappedLineParser::parseChildBlock() {
965   assert(FormatTok->is(tok::l_brace));
966   FormatTok->setBlockKind(BK_Block);
967   const FormatToken *OpeningBrace = FormatTok;
968   nextToken();
969   {
970     bool SkipIndent = (Style.isJavaScript() &&
971                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
972     ScopedLineState LineState(*this);
973     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
974                                             /*MustBeDeclaration=*/false);
975     Line->Level += SkipIndent ? 0 : 1;
976     parseLevel(OpeningBrace);
977     flushComments(isOnNewLine(*FormatTok));
978     Line->Level -= SkipIndent ? 0 : 1;
979   }
980   nextToken();
981 }
982 
983 void UnwrappedLineParser::parsePPDirective() {
984   assert(FormatTok->is(tok::hash) && "'#' expected");
985   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
986 
987   nextToken();
988 
989   if (!FormatTok->Tok.getIdentifierInfo()) {
990     parsePPUnknown();
991     return;
992   }
993 
994   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
995   case tok::pp_define:
996     parsePPDefine();
997     return;
998   case tok::pp_if:
999     parsePPIf(/*IfDef=*/false);
1000     break;
1001   case tok::pp_ifdef:
1002   case tok::pp_ifndef:
1003     parsePPIf(/*IfDef=*/true);
1004     break;
1005   case tok::pp_else:
1006   case tok::pp_elifdef:
1007   case tok::pp_elifndef:
1008   case tok::pp_elif:
1009     parsePPElse();
1010     break;
1011   case tok::pp_endif:
1012     parsePPEndIf();
1013     break;
1014   case tok::pp_pragma:
1015     parsePPPragma();
1016     break;
1017   default:
1018     parsePPUnknown();
1019     break;
1020   }
1021 }
1022 
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1024   size_t Line = CurrentLines->size();
1025   if (CurrentLines == &PreprocessorDirectives)
1026     Line += Lines.size();
1027 
1028   if (Unreachable ||
1029       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1030     PPStack.push_back({PP_Unreachable, Line});
1031   } else {
1032     PPStack.push_back({PP_Conditional, Line});
1033   }
1034 }
1035 
1036 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1037   ++PPBranchLevel;
1038   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1039   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1040     PPLevelBranchIndex.push_back(0);
1041     PPLevelBranchCount.push_back(0);
1042   }
1043   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1044   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1045   conditionalCompilationCondition(Unreachable || Skip);
1046 }
1047 
1048 void UnwrappedLineParser::conditionalCompilationAlternative() {
1049   if (!PPStack.empty())
1050     PPStack.pop_back();
1051   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1052   if (!PPChainBranchIndex.empty())
1053     ++PPChainBranchIndex.top();
1054   conditionalCompilationCondition(
1055       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1056       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1057 }
1058 
1059 void UnwrappedLineParser::conditionalCompilationEnd() {
1060   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1061   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1062     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1063       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1064   }
1065   // Guard against #endif's without #if.
1066   if (PPBranchLevel > -1)
1067     --PPBranchLevel;
1068   if (!PPChainBranchIndex.empty())
1069     PPChainBranchIndex.pop();
1070   if (!PPStack.empty())
1071     PPStack.pop_back();
1072 }
1073 
1074 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1075   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1076   nextToken();
1077   bool Unreachable = false;
1078   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1079     Unreachable = true;
1080   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1081     Unreachable = true;
1082   conditionalCompilationStart(Unreachable);
1083   FormatToken *IfCondition = FormatTok;
1084   // If there's a #ifndef on the first line, and the only lines before it are
1085   // comments, it could be an include guard.
1086   bool MaybeIncludeGuard = IfNDef;
1087   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1088     for (auto &Line : Lines) {
1089       if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1090         MaybeIncludeGuard = false;
1091         IncludeGuard = IG_Rejected;
1092         break;
1093       }
1094     }
1095   }
1096   --PPBranchLevel;
1097   parsePPUnknown();
1098   ++PPBranchLevel;
1099   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1100     IncludeGuard = IG_IfNdefed;
1101     IncludeGuardToken = IfCondition;
1102   }
1103 }
1104 
1105 void UnwrappedLineParser::parsePPElse() {
1106   // If a potential include guard has an #else, it's not an include guard.
1107   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1108     IncludeGuard = IG_Rejected;
1109   // Don't crash when there is an #else without an #if.
1110   assert(PPBranchLevel >= -1);
1111   if (PPBranchLevel == -1)
1112     conditionalCompilationStart(/*Unreachable=*/true);
1113   conditionalCompilationAlternative();
1114   --PPBranchLevel;
1115   parsePPUnknown();
1116   ++PPBranchLevel;
1117 }
1118 
1119 void UnwrappedLineParser::parsePPEndIf() {
1120   conditionalCompilationEnd();
1121   parsePPUnknown();
1122   // If the #endif of a potential include guard is the last thing in the file,
1123   // then we found an include guard.
1124   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1125       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1126     IncludeGuard = IG_Found;
1127   }
1128 }
1129 
1130 void UnwrappedLineParser::parsePPDefine() {
1131   nextToken();
1132 
1133   if (!FormatTok->Tok.getIdentifierInfo()) {
1134     IncludeGuard = IG_Rejected;
1135     IncludeGuardToken = nullptr;
1136     parsePPUnknown();
1137     return;
1138   }
1139 
1140   if (IncludeGuard == IG_IfNdefed &&
1141       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1142     IncludeGuard = IG_Defined;
1143     IncludeGuardToken = nullptr;
1144     for (auto &Line : Lines) {
1145       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1146         IncludeGuard = IG_Rejected;
1147         break;
1148       }
1149     }
1150   }
1151 
1152   // In the context of a define, even keywords should be treated as normal
1153   // identifiers. Setting the kind to identifier is not enough, because we need
1154   // to treat additional keywords like __except as well, which are already
1155   // identifiers. Setting the identifier info to null interferes with include
1156   // guard processing above, and changes preprocessing nesting.
1157   FormatTok->Tok.setKind(tok::identifier);
1158   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1159   nextToken();
1160   if (FormatTok->Tok.getKind() == tok::l_paren &&
1161       !FormatTok->hasWhitespaceBefore()) {
1162     parseParens();
1163   }
1164   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1165     Line->Level += PPBranchLevel + 1;
1166   addUnwrappedLine();
1167   ++Line->Level;
1168 
1169   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1170   assert((int)Line->PPLevel >= 0);
1171   Line->InMacroBody = true;
1172 
1173   if (Style.SkipMacroDefinitionBody) {
1174     do {
1175       FormatTok->Finalized = true;
1176       nextToken();
1177     } while (!eof());
1178     addUnwrappedLine();
1179     return;
1180   }
1181 
1182   if (FormatTok->is(tok::identifier) &&
1183       Tokens->peekNextToken()->is(tok::colon)) {
1184     nextToken();
1185     nextToken();
1186   }
1187 
1188   // Errors during a preprocessor directive can only affect the layout of the
1189   // preprocessor directive, and thus we ignore them. An alternative approach
1190   // would be to use the same approach we use on the file level (no
1191   // re-indentation if there was a structural error) within the macro
1192   // definition.
1193   parseFile();
1194 }
1195 
1196 void UnwrappedLineParser::parsePPPragma() {
1197   Line->InPragmaDirective = true;
1198   parsePPUnknown();
1199 }
1200 
1201 void UnwrappedLineParser::parsePPUnknown() {
1202   do {
1203     nextToken();
1204   } while (!eof());
1205   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1206     Line->Level += PPBranchLevel + 1;
1207   addUnwrappedLine();
1208 }
1209 
1210 // Here we exclude certain tokens that are not usually the first token in an
1211 // unwrapped line. This is used in attempt to distinguish macro calls without
1212 // trailing semicolons from other constructs split to several lines.
1213 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1214   // Semicolon can be a null-statement, l_square can be a start of a macro or
1215   // a C++11 attribute, but this doesn't seem to be common.
1216   assert(Tok.isNot(TT_AttributeSquare));
1217   return !Tok.isOneOf(tok::semi, tok::l_brace,
1218                       // Tokens that can only be used as binary operators and a
1219                       // part of overloaded operator names.
1220                       tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1221                       tok::less, tok::greater, tok::slash, tok::percent,
1222                       tok::lessless, tok::greatergreater, tok::equal,
1223                       tok::plusequal, tok::minusequal, tok::starequal,
1224                       tok::slashequal, tok::percentequal, tok::ampequal,
1225                       tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1226                       tok::lesslessequal,
1227                       // Colon is used in labels, base class lists, initializer
1228                       // lists, range-based for loops, ternary operator, but
1229                       // should never be the first token in an unwrapped line.
1230                       tok::colon,
1231                       // 'noexcept' is a trailing annotation.
1232                       tok::kw_noexcept);
1233 }
1234 
1235 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1236                           const FormatToken *FormatTok) {
1237   // FIXME: This returns true for C/C++ keywords like 'struct'.
1238   return FormatTok->is(tok::identifier) &&
1239          (!FormatTok->Tok.getIdentifierInfo() ||
1240           !FormatTok->isOneOf(
1241               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1242               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1243               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1244               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1245               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1246               Keywords.kw_instanceof, Keywords.kw_interface,
1247               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1248 }
1249 
1250 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1251                                  const FormatToken *FormatTok) {
1252   return FormatTok->Tok.isLiteral() ||
1253          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1254          mustBeJSIdent(Keywords, FormatTok);
1255 }
1256 
1257 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1258 // when encountered after a value (see mustBeJSIdentOrValue).
1259 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1260                            const FormatToken *FormatTok) {
1261   return FormatTok->isOneOf(
1262       tok::kw_return, Keywords.kw_yield,
1263       // conditionals
1264       tok::kw_if, tok::kw_else,
1265       // loops
1266       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1267       // switch/case
1268       tok::kw_switch, tok::kw_case,
1269       // exceptions
1270       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1271       // declaration
1272       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1273       Keywords.kw_async, Keywords.kw_function,
1274       // import/export
1275       Keywords.kw_import, tok::kw_export);
1276 }
1277 
1278 // Checks whether a token is a type in K&R C (aka C78).
1279 static bool isC78Type(const FormatToken &Tok) {
1280   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1281                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1282                      tok::identifier);
1283 }
1284 
1285 // This function checks whether a token starts the first parameter declaration
1286 // in a K&R C (aka C78) function definition, e.g.:
1287 //   int f(a, b)
1288 //   short a, b;
1289 //   {
1290 //      return a + b;
1291 //   }
1292 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1293                                const FormatToken *FuncName) {
1294   assert(Tok);
1295   assert(Next);
1296   assert(FuncName);
1297 
1298   if (FuncName->isNot(tok::identifier))
1299     return false;
1300 
1301   const FormatToken *Prev = FuncName->Previous;
1302   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1303     return false;
1304 
1305   if (!isC78Type(*Tok) &&
1306       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1307     return false;
1308   }
1309 
1310   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1311     return false;
1312 
1313   Tok = Tok->Previous;
1314   if (!Tok || Tok->isNot(tok::r_paren))
1315     return false;
1316 
1317   Tok = Tok->Previous;
1318   if (!Tok || Tok->isNot(tok::identifier))
1319     return false;
1320 
1321   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1322 }
1323 
1324 bool UnwrappedLineParser::parseModuleImport() {
1325   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1326 
1327   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1328       !Token->Tok.getIdentifierInfo() &&
1329       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1330     return false;
1331   }
1332 
1333   nextToken();
1334   while (!eof()) {
1335     if (FormatTok->is(tok::colon)) {
1336       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1337     }
1338     // Handle import <foo/bar.h> as we would an include statement.
1339     else if (FormatTok->is(tok::less)) {
1340       nextToken();
1341       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1342         // Mark tokens up to the trailing line comments as implicit string
1343         // literals.
1344         if (FormatTok->isNot(tok::comment) &&
1345             !FormatTok->TokenText.starts_with("//")) {
1346           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1347         }
1348         nextToken();
1349       }
1350     }
1351     if (FormatTok->is(tok::semi)) {
1352       nextToken();
1353       break;
1354     }
1355     nextToken();
1356   }
1357 
1358   addUnwrappedLine();
1359   return true;
1360 }
1361 
1362 // readTokenWithJavaScriptASI reads the next token and terminates the current
1363 // line if JavaScript Automatic Semicolon Insertion must
1364 // happen between the current token and the next token.
1365 //
1366 // This method is conservative - it cannot cover all edge cases of JavaScript,
1367 // but only aims to correctly handle certain well known cases. It *must not*
1368 // return true in speculative cases.
1369 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1370   FormatToken *Previous = FormatTok;
1371   readToken();
1372   FormatToken *Next = FormatTok;
1373 
1374   bool IsOnSameLine =
1375       CommentsBeforeNextToken.empty()
1376           ? Next->NewlinesBefore == 0
1377           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1378   if (IsOnSameLine)
1379     return;
1380 
1381   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1382   bool PreviousStartsTemplateExpr =
1383       Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1384   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1385     // If the line contains an '@' sign, the previous token might be an
1386     // annotation, which can precede another identifier/value.
1387     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1388       return LineNode.Tok->is(tok::at);
1389     });
1390     if (HasAt)
1391       return;
1392   }
1393   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1394     return addUnwrappedLine();
1395   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1396   bool NextEndsTemplateExpr =
1397       Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1398   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1399       (PreviousMustBeValue ||
1400        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1401                          tok::minusminus))) {
1402     return addUnwrappedLine();
1403   }
1404   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1405       isJSDeclOrStmt(Keywords, Next)) {
1406     return addUnwrappedLine();
1407   }
1408 }
1409 
1410 void UnwrappedLineParser::parseStructuralElement(
1411     const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1412     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1413   if (Style.Language == FormatStyle::LK_TableGen &&
1414       FormatTok->is(tok::pp_include)) {
1415     nextToken();
1416     if (FormatTok->is(tok::string_literal))
1417       nextToken();
1418     addUnwrappedLine();
1419     return;
1420   }
1421 
1422   if (Style.isCpp()) {
1423     while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1424     }
1425   } else if (Style.isVerilog()) {
1426     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1427       parseForOrWhileLoop(/*HasParens=*/false);
1428       return;
1429     }
1430     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1431       parseForOrWhileLoop();
1432       return;
1433     }
1434     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1435                            Keywords.kw_assume, Keywords.kw_cover)) {
1436       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1437       return;
1438     }
1439 
1440     // Skip things that can exist before keywords like 'if' and 'case'.
1441     while (true) {
1442       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1443                              Keywords.kw_unique0)) {
1444         nextToken();
1445       } else if (FormatTok->is(tok::l_paren) &&
1446                  Tokens->peekNextToken()->is(tok::star)) {
1447         parseParens();
1448       } else {
1449         break;
1450       }
1451     }
1452   }
1453 
1454   // Tokens that only make sense at the beginning of a line.
1455   switch (FormatTok->Tok.getKind()) {
1456   case tok::kw_asm:
1457     nextToken();
1458     if (FormatTok->is(tok::l_brace)) {
1459       FormatTok->setFinalizedType(TT_InlineASMBrace);
1460       nextToken();
1461       while (FormatTok && !eof()) {
1462         if (FormatTok->is(tok::r_brace)) {
1463           FormatTok->setFinalizedType(TT_InlineASMBrace);
1464           nextToken();
1465           addUnwrappedLine();
1466           break;
1467         }
1468         FormatTok->Finalized = true;
1469         nextToken();
1470       }
1471     }
1472     break;
1473   case tok::kw_namespace:
1474     parseNamespace();
1475     return;
1476   case tok::kw_public:
1477   case tok::kw_protected:
1478   case tok::kw_private:
1479     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1480         Style.isCSharp()) {
1481       nextToken();
1482     } else {
1483       parseAccessSpecifier();
1484     }
1485     return;
1486   case tok::kw_if: {
1487     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1488       // field/method declaration.
1489       break;
1490     }
1491     FormatToken *Tok = parseIfThenElse(IfKind);
1492     if (IfLeftBrace)
1493       *IfLeftBrace = Tok;
1494     return;
1495   }
1496   case tok::kw_for:
1497   case tok::kw_while:
1498     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1499       // field/method declaration.
1500       break;
1501     }
1502     parseForOrWhileLoop();
1503     return;
1504   case tok::kw_do:
1505     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1506       // field/method declaration.
1507       break;
1508     }
1509     parseDoWhile();
1510     if (HasDoWhile)
1511       *HasDoWhile = true;
1512     return;
1513   case tok::kw_switch:
1514     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1515       // 'switch: string' field declaration.
1516       break;
1517     }
1518     parseSwitch();
1519     return;
1520   case tok::kw_default:
1521     // In Verilog default along with other labels are handled in the next loop.
1522     if (Style.isVerilog())
1523       break;
1524     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1525       // 'default: string' field declaration.
1526       break;
1527     }
1528     nextToken();
1529     if (FormatTok->is(tok::colon)) {
1530       FormatTok->setFinalizedType(TT_CaseLabelColon);
1531       parseLabel();
1532       return;
1533     }
1534     // e.g. "default void f() {}" in a Java interface.
1535     break;
1536   case tok::kw_case:
1537     // Proto: there are no switch/case statements.
1538     if (Style.Language == FormatStyle::LK_Proto) {
1539       nextToken();
1540       return;
1541     }
1542     if (Style.isVerilog()) {
1543       parseBlock();
1544       addUnwrappedLine();
1545       return;
1546     }
1547     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1548       // 'case: string' field declaration.
1549       nextToken();
1550       break;
1551     }
1552     parseCaseLabel();
1553     return;
1554   case tok::kw_try:
1555   case tok::kw___try:
1556     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1557       // field/method declaration.
1558       break;
1559     }
1560     parseTryCatch();
1561     return;
1562   case tok::kw_extern:
1563     nextToken();
1564     if (Style.isVerilog()) {
1565       // In Verilog and extern module declaration looks like a start of module.
1566       // But there is no body and endmodule. So we handle it separately.
1567       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1568         parseVerilogHierarchyHeader();
1569         return;
1570       }
1571     } else if (FormatTok->is(tok::string_literal)) {
1572       nextToken();
1573       if (FormatTok->is(tok::l_brace)) {
1574         if (Style.BraceWrapping.AfterExternBlock)
1575           addUnwrappedLine();
1576         // Either we indent or for backwards compatibility we follow the
1577         // AfterExternBlock style.
1578         unsigned AddLevels =
1579             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1580                     (Style.BraceWrapping.AfterExternBlock &&
1581                      Style.IndentExternBlock ==
1582                          FormatStyle::IEBS_AfterExternBlock)
1583                 ? 1u
1584                 : 0u;
1585         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1586         addUnwrappedLine();
1587         return;
1588       }
1589     }
1590     break;
1591   case tok::kw_export:
1592     if (Style.isJavaScript()) {
1593       parseJavaScriptEs6ImportExport();
1594       return;
1595     }
1596     if (Style.isCpp()) {
1597       nextToken();
1598       if (FormatTok->is(tok::kw_namespace)) {
1599         parseNamespace();
1600         return;
1601       }
1602       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1603         return;
1604     }
1605     break;
1606   case tok::kw_inline:
1607     nextToken();
1608     if (FormatTok->is(tok::kw_namespace)) {
1609       parseNamespace();
1610       return;
1611     }
1612     break;
1613   case tok::identifier:
1614     if (FormatTok->is(TT_ForEachMacro)) {
1615       parseForOrWhileLoop();
1616       return;
1617     }
1618     if (FormatTok->is(TT_MacroBlockBegin)) {
1619       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1620                  /*MunchSemi=*/false);
1621       return;
1622     }
1623     if (FormatTok->is(Keywords.kw_import)) {
1624       if (Style.isJavaScript()) {
1625         parseJavaScriptEs6ImportExport();
1626         return;
1627       }
1628       if (Style.Language == FormatStyle::LK_Proto) {
1629         nextToken();
1630         if (FormatTok->is(tok::kw_public))
1631           nextToken();
1632         if (FormatTok->isNot(tok::string_literal))
1633           return;
1634         nextToken();
1635         if (FormatTok->is(tok::semi))
1636           nextToken();
1637         addUnwrappedLine();
1638         return;
1639       }
1640       if (Style.isCpp() && parseModuleImport())
1641         return;
1642     }
1643     if (Style.isCpp() &&
1644         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1645                            Keywords.kw_slots, Keywords.kw_qslots)) {
1646       nextToken();
1647       if (FormatTok->is(tok::colon)) {
1648         nextToken();
1649         addUnwrappedLine();
1650         return;
1651       }
1652     }
1653     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1654       parseStatementMacro();
1655       return;
1656     }
1657     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1658       parseNamespace();
1659       return;
1660     }
1661     // In Verilog labels can be any expression, so we don't do them here.
1662     // JS doesn't have macros, and within classes colons indicate fields, not
1663     // labels.
1664     // TableGen doesn't have labels.
1665     if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1666         Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1667       nextToken();
1668       Line->Tokens.begin()->Tok->MustBreakBefore = true;
1669       FormatTok->setFinalizedType(TT_GotoLabelColon);
1670       parseLabel(!Style.IndentGotoLabels);
1671       if (HasLabel)
1672         *HasLabel = true;
1673       return;
1674     }
1675     // In all other cases, parse the declaration.
1676     break;
1677   default:
1678     break;
1679   }
1680 
1681   const bool InRequiresExpression =
1682       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1683   do {
1684     const FormatToken *Previous = FormatTok->Previous;
1685     switch (FormatTok->Tok.getKind()) {
1686     case tok::at:
1687       nextToken();
1688       if (FormatTok->is(tok::l_brace)) {
1689         nextToken();
1690         parseBracedList();
1691         break;
1692       } else if (Style.Language == FormatStyle::LK_Java &&
1693                  FormatTok->is(Keywords.kw_interface)) {
1694         nextToken();
1695         break;
1696       }
1697       switch (FormatTok->Tok.getObjCKeywordID()) {
1698       case tok::objc_public:
1699       case tok::objc_protected:
1700       case tok::objc_package:
1701       case tok::objc_private:
1702         return parseAccessSpecifier();
1703       case tok::objc_interface:
1704       case tok::objc_implementation:
1705         return parseObjCInterfaceOrImplementation();
1706       case tok::objc_protocol:
1707         if (parseObjCProtocol())
1708           return;
1709         break;
1710       case tok::objc_end:
1711         return; // Handled by the caller.
1712       case tok::objc_optional:
1713       case tok::objc_required:
1714         nextToken();
1715         addUnwrappedLine();
1716         return;
1717       case tok::objc_autoreleasepool:
1718         nextToken();
1719         if (FormatTok->is(tok::l_brace)) {
1720           if (Style.BraceWrapping.AfterControlStatement ==
1721               FormatStyle::BWACS_Always) {
1722             addUnwrappedLine();
1723           }
1724           parseBlock();
1725         }
1726         addUnwrappedLine();
1727         return;
1728       case tok::objc_synchronized:
1729         nextToken();
1730         if (FormatTok->is(tok::l_paren)) {
1731           // Skip synchronization object
1732           parseParens();
1733         }
1734         if (FormatTok->is(tok::l_brace)) {
1735           if (Style.BraceWrapping.AfterControlStatement ==
1736               FormatStyle::BWACS_Always) {
1737             addUnwrappedLine();
1738           }
1739           parseBlock();
1740         }
1741         addUnwrappedLine();
1742         return;
1743       case tok::objc_try:
1744         // This branch isn't strictly necessary (the kw_try case below would
1745         // do this too after the tok::at is parsed above).  But be explicit.
1746         parseTryCatch();
1747         return;
1748       default:
1749         break;
1750       }
1751       break;
1752     case tok::kw_requires: {
1753       if (Style.isCpp()) {
1754         bool ParsedClause = parseRequires();
1755         if (ParsedClause)
1756           return;
1757       } else {
1758         nextToken();
1759       }
1760       break;
1761     }
1762     case tok::kw_enum:
1763       // Ignore if this is part of "template <enum ...".
1764       if (Previous && Previous->is(tok::less)) {
1765         nextToken();
1766         break;
1767       }
1768 
1769       // parseEnum falls through and does not yet add an unwrapped line as an
1770       // enum definition can start a structural element.
1771       if (!parseEnum())
1772         break;
1773       // This only applies to C++ and Verilog.
1774       if (!Style.isCpp() && !Style.isVerilog()) {
1775         addUnwrappedLine();
1776         return;
1777       }
1778       break;
1779     case tok::kw_typedef:
1780       nextToken();
1781       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1782                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1783                              Keywords.kw_CF_CLOSED_ENUM,
1784                              Keywords.kw_NS_CLOSED_ENUM)) {
1785         parseEnum();
1786       }
1787       break;
1788     case tok::kw_class:
1789       if (Style.isVerilog()) {
1790         parseBlock();
1791         addUnwrappedLine();
1792         return;
1793       }
1794       if (Style.isTableGen()) {
1795         // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1796         // This is same as def and so on.
1797         nextToken();
1798         break;
1799       }
1800       [[fallthrough]];
1801     case tok::kw_struct:
1802     case tok::kw_union:
1803       if (parseStructLike())
1804         return;
1805       break;
1806     case tok::kw_decltype:
1807       nextToken();
1808       if (FormatTok->is(tok::l_paren)) {
1809         parseParens();
1810         assert(FormatTok->Previous);
1811         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1812                                               tok::l_paren)) {
1813           Line->SeenDecltypeAuto = true;
1814         }
1815       }
1816       break;
1817     case tok::period:
1818       nextToken();
1819       // In Java, classes have an implicit static member "class".
1820       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1821           FormatTok->is(tok::kw_class)) {
1822         nextToken();
1823       }
1824       if (Style.isJavaScript() && FormatTok &&
1825           FormatTok->Tok.getIdentifierInfo()) {
1826         // JavaScript only has pseudo keywords, all keywords are allowed to
1827         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1828         nextToken();
1829       }
1830       break;
1831     case tok::semi:
1832       nextToken();
1833       addUnwrappedLine();
1834       return;
1835     case tok::r_brace:
1836       addUnwrappedLine();
1837       return;
1838     case tok::l_paren: {
1839       parseParens();
1840       // Break the unwrapped line if a K&R C function definition has a parameter
1841       // declaration.
1842       if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1843         break;
1844       if (isC78ParameterDecl(FormatTok,
1845                              Tokens->peekNextToken(/*SkipComment=*/true),
1846                              Previous)) {
1847         addUnwrappedLine();
1848         return;
1849       }
1850       break;
1851     }
1852     case tok::kw_operator:
1853       nextToken();
1854       if (FormatTok->isBinaryOperator())
1855         nextToken();
1856       break;
1857     case tok::caret:
1858       nextToken();
1859       // Block return type.
1860       if (FormatTok->Tok.isAnyIdentifier() ||
1861           FormatTok->isSimpleTypeSpecifier()) {
1862         nextToken();
1863         // Return types: pointers are ok too.
1864         while (FormatTok->is(tok::star))
1865           nextToken();
1866       }
1867       // Block argument list.
1868       if (FormatTok->is(tok::l_paren))
1869         parseParens();
1870       // Block body.
1871       if (FormatTok->is(tok::l_brace))
1872         parseChildBlock();
1873       break;
1874     case tok::l_brace:
1875       if (InRequiresExpression)
1876         FormatTok->setFinalizedType(TT_BracedListLBrace);
1877       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1878         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1879         // A block outside of parentheses must be the last part of a
1880         // structural element.
1881         // FIXME: Figure out cases where this is not true, and add projections
1882         // for them (the one we know is missing are lambdas).
1883         if (Style.Language == FormatStyle::LK_Java &&
1884             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1885           // If necessary, we could set the type to something different than
1886           // TT_FunctionLBrace.
1887           if (Style.BraceWrapping.AfterControlStatement ==
1888               FormatStyle::BWACS_Always) {
1889             addUnwrappedLine();
1890           }
1891         } else if (Style.BraceWrapping.AfterFunction) {
1892           addUnwrappedLine();
1893         }
1894         FormatTok->setFinalizedType(TT_FunctionLBrace);
1895         parseBlock();
1896         IsDecltypeAutoFunction = false;
1897         addUnwrappedLine();
1898         return;
1899       }
1900       // Otherwise this was a braced init list, and the structural
1901       // element continues.
1902       break;
1903     case tok::kw_try:
1904       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1905         // field/method declaration.
1906         nextToken();
1907         break;
1908       }
1909       // We arrive here when parsing function-try blocks.
1910       if (Style.BraceWrapping.AfterFunction)
1911         addUnwrappedLine();
1912       parseTryCatch();
1913       return;
1914     case tok::identifier: {
1915       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1916           Line->MustBeDeclaration) {
1917         addUnwrappedLine();
1918         parseCSharpGenericTypeConstraint();
1919         break;
1920       }
1921       if (FormatTok->is(TT_MacroBlockEnd)) {
1922         addUnwrappedLine();
1923         return;
1924       }
1925 
1926       // Function declarations (as opposed to function expressions) are parsed
1927       // on their own unwrapped line by continuing this loop. Function
1928       // expressions (functions that are not on their own line) must not create
1929       // a new unwrapped line, so they are special cased below.
1930       size_t TokenCount = Line->Tokens.size();
1931       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1932           (TokenCount > 1 ||
1933            (TokenCount == 1 &&
1934             Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1935         tryToParseJSFunction();
1936         break;
1937       }
1938       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1939           FormatTok->is(Keywords.kw_interface)) {
1940         if (Style.isJavaScript()) {
1941           // In JavaScript/TypeScript, "interface" can be used as a standalone
1942           // identifier, e.g. in `var interface = 1;`. If "interface" is
1943           // followed by another identifier, it is very like to be an actual
1944           // interface declaration.
1945           unsigned StoredPosition = Tokens->getPosition();
1946           FormatToken *Next = Tokens->getNextToken();
1947           FormatTok = Tokens->setPosition(StoredPosition);
1948           if (!mustBeJSIdent(Keywords, Next)) {
1949             nextToken();
1950             break;
1951           }
1952         }
1953         parseRecord();
1954         addUnwrappedLine();
1955         return;
1956       }
1957 
1958       if (Style.isVerilog()) {
1959         if (FormatTok->is(Keywords.kw_table)) {
1960           parseVerilogTable();
1961           return;
1962         }
1963         if (Keywords.isVerilogBegin(*FormatTok) ||
1964             Keywords.isVerilogHierarchy(*FormatTok)) {
1965           parseBlock();
1966           addUnwrappedLine();
1967           return;
1968         }
1969       }
1970 
1971       if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1972         if (parseStructLike())
1973           return;
1974         break;
1975       }
1976 
1977       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1978         parseStatementMacro();
1979         return;
1980       }
1981 
1982       // See if the following token should start a new unwrapped line.
1983       StringRef Text = FormatTok->TokenText;
1984 
1985       FormatToken *PreviousToken = FormatTok;
1986       nextToken();
1987 
1988       // JS doesn't have macros, and within classes colons indicate fields, not
1989       // labels.
1990       if (Style.isJavaScript())
1991         break;
1992 
1993       auto OneTokenSoFar = [&]() {
1994         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1995         while (I != E && I->Tok->is(tok::comment))
1996           ++I;
1997         if (Style.isVerilog())
1998           while (I != E && I->Tok->is(tok::hash))
1999             ++I;
2000         return I != E && (++I == E);
2001       };
2002       if (OneTokenSoFar()) {
2003         // Recognize function-like macro usages without trailing semicolon as
2004         // well as free-standing macros like Q_OBJECT.
2005         bool FunctionLike = FormatTok->is(tok::l_paren);
2006         if (FunctionLike)
2007           parseParens();
2008 
2009         bool FollowedByNewline =
2010             CommentsBeforeNextToken.empty()
2011                 ? FormatTok->NewlinesBefore > 0
2012                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2013 
2014         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2015             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2016           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2017             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2018           addUnwrappedLine();
2019           return;
2020         }
2021       }
2022       break;
2023     }
2024     case tok::equal:
2025       if ((Style.isJavaScript() || Style.isCSharp()) &&
2026           FormatTok->is(TT_FatArrow)) {
2027         tryToParseChildBlock();
2028         break;
2029       }
2030 
2031       nextToken();
2032       if (FormatTok->is(tok::l_brace)) {
2033         // Block kind should probably be set to BK_BracedInit for any language.
2034         // C# needs this change to ensure that array initialisers and object
2035         // initialisers are indented the same way.
2036         if (Style.isCSharp())
2037           FormatTok->setBlockKind(BK_BracedInit);
2038         // TableGen's defset statement has syntax of the form,
2039         // `defset <type> <name> = { <statement>... }`
2040         if (Style.isTableGen() &&
2041             Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2042           FormatTok->setFinalizedType(TT_FunctionLBrace);
2043           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2044                      /*MunchSemi=*/false);
2045           addUnwrappedLine();
2046           break;
2047         }
2048         nextToken();
2049         parseBracedList();
2050       } else if (Style.Language == FormatStyle::LK_Proto &&
2051                  FormatTok->is(tok::less)) {
2052         nextToken();
2053         parseBracedList(/*IsAngleBracket=*/true);
2054       }
2055       break;
2056     case tok::l_square:
2057       parseSquare();
2058       break;
2059     case tok::kw_new:
2060       parseNew();
2061       break;
2062     case tok::kw_case:
2063       // Proto: there are no switch/case statements.
2064       if (Style.Language == FormatStyle::LK_Proto) {
2065         nextToken();
2066         return;
2067       }
2068       // In Verilog switch is called case.
2069       if (Style.isVerilog()) {
2070         parseBlock();
2071         addUnwrappedLine();
2072         return;
2073       }
2074       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2075         // 'case: string' field declaration.
2076         nextToken();
2077         break;
2078       }
2079       parseCaseLabel();
2080       break;
2081     case tok::kw_default:
2082       nextToken();
2083       if (Style.isVerilog()) {
2084         if (FormatTok->is(tok::colon)) {
2085           // The label will be handled in the next iteration.
2086           break;
2087         }
2088         if (FormatTok->is(Keywords.kw_clocking)) {
2089           // A default clocking block.
2090           parseBlock();
2091           addUnwrappedLine();
2092           return;
2093         }
2094         parseVerilogCaseLabel();
2095         return;
2096       }
2097       break;
2098     case tok::colon:
2099       nextToken();
2100       if (Style.isVerilog()) {
2101         parseVerilogCaseLabel();
2102         return;
2103       }
2104       break;
2105     default:
2106       nextToken();
2107       break;
2108     }
2109   } while (!eof());
2110 }
2111 
2112 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2113   assert(FormatTok->is(tok::l_brace));
2114   if (!Style.isCSharp())
2115     return false;
2116   // See if it's a property accessor.
2117   if (FormatTok->Previous->isNot(tok::identifier))
2118     return false;
2119 
2120   // See if we are inside a property accessor.
2121   //
2122   // Record the current tokenPosition so that we can advance and
2123   // reset the current token. `Next` is not set yet so we need
2124   // another way to advance along the token stream.
2125   unsigned int StoredPosition = Tokens->getPosition();
2126   FormatToken *Tok = Tokens->getNextToken();
2127 
2128   // A trivial property accessor is of the form:
2129   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2130   // Track these as they do not require line breaks to be introduced.
2131   bool HasSpecialAccessor = false;
2132   bool IsTrivialPropertyAccessor = true;
2133   while (!eof()) {
2134     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2135                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2136                      Keywords.kw_init, Keywords.kw_set)) {
2137       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2138         HasSpecialAccessor = true;
2139       Tok = Tokens->getNextToken();
2140       continue;
2141     }
2142     if (Tok->isNot(tok::r_brace))
2143       IsTrivialPropertyAccessor = false;
2144     break;
2145   }
2146 
2147   if (!HasSpecialAccessor) {
2148     Tokens->setPosition(StoredPosition);
2149     return false;
2150   }
2151 
2152   // Try to parse the property accessor:
2153   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2154   Tokens->setPosition(StoredPosition);
2155   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2156     addUnwrappedLine();
2157   nextToken();
2158   do {
2159     switch (FormatTok->Tok.getKind()) {
2160     case tok::r_brace:
2161       nextToken();
2162       if (FormatTok->is(tok::equal)) {
2163         while (!eof() && FormatTok->isNot(tok::semi))
2164           nextToken();
2165         nextToken();
2166       }
2167       addUnwrappedLine();
2168       return true;
2169     case tok::l_brace:
2170       ++Line->Level;
2171       parseBlock(/*MustBeDeclaration=*/true);
2172       addUnwrappedLine();
2173       --Line->Level;
2174       break;
2175     case tok::equal:
2176       if (FormatTok->is(TT_FatArrow)) {
2177         ++Line->Level;
2178         do {
2179           nextToken();
2180         } while (!eof() && FormatTok->isNot(tok::semi));
2181         nextToken();
2182         addUnwrappedLine();
2183         --Line->Level;
2184         break;
2185       }
2186       nextToken();
2187       break;
2188     default:
2189       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2190                              Keywords.kw_set) &&
2191           !IsTrivialPropertyAccessor) {
2192         // Non-trivial get/set needs to be on its own line.
2193         addUnwrappedLine();
2194       }
2195       nextToken();
2196     }
2197   } while (!eof());
2198 
2199   // Unreachable for well-formed code (paired '{' and '}').
2200   return true;
2201 }
2202 
2203 bool UnwrappedLineParser::tryToParseLambda() {
2204   assert(FormatTok->is(tok::l_square));
2205   if (!Style.isCpp()) {
2206     nextToken();
2207     return false;
2208   }
2209   FormatToken &LSquare = *FormatTok;
2210   if (!tryToParseLambdaIntroducer())
2211     return false;
2212 
2213   bool SeenArrow = false;
2214   bool InTemplateParameterList = false;
2215 
2216   while (FormatTok->isNot(tok::l_brace)) {
2217     if (FormatTok->isSimpleTypeSpecifier()) {
2218       nextToken();
2219       continue;
2220     }
2221     switch (FormatTok->Tok.getKind()) {
2222     case tok::l_brace:
2223       break;
2224     case tok::l_paren:
2225       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2226       break;
2227     case tok::l_square:
2228       parseSquare();
2229       break;
2230     case tok::less:
2231       assert(FormatTok->Previous);
2232       if (FormatTok->Previous->is(tok::r_square))
2233         InTemplateParameterList = true;
2234       nextToken();
2235       break;
2236     case tok::kw_auto:
2237     case tok::kw_class:
2238     case tok::kw_template:
2239     case tok::kw_typename:
2240     case tok::amp:
2241     case tok::star:
2242     case tok::kw_const:
2243     case tok::kw_constexpr:
2244     case tok::kw_consteval:
2245     case tok::comma:
2246     case tok::greater:
2247     case tok::identifier:
2248     case tok::numeric_constant:
2249     case tok::coloncolon:
2250     case tok::kw_mutable:
2251     case tok::kw_noexcept:
2252     case tok::kw_static:
2253       nextToken();
2254       break;
2255     // Specialization of a template with an integer parameter can contain
2256     // arithmetic, logical, comparison and ternary operators.
2257     //
2258     // FIXME: This also accepts sequences of operators that are not in the scope
2259     // of a template argument list.
2260     //
2261     // In a C++ lambda a template type can only occur after an arrow. We use
2262     // this as an heuristic to distinguish between Objective-C expressions
2263     // followed by an `a->b` expression, such as:
2264     // ([obj func:arg] + a->b)
2265     // Otherwise the code below would parse as a lambda.
2266     case tok::plus:
2267     case tok::minus:
2268     case tok::exclaim:
2269     case tok::tilde:
2270     case tok::slash:
2271     case tok::percent:
2272     case tok::lessless:
2273     case tok::pipe:
2274     case tok::pipepipe:
2275     case tok::ampamp:
2276     case tok::caret:
2277     case tok::equalequal:
2278     case tok::exclaimequal:
2279     case tok::greaterequal:
2280     case tok::lessequal:
2281     case tok::question:
2282     case tok::colon:
2283     case tok::ellipsis:
2284     case tok::kw_true:
2285     case tok::kw_false:
2286       if (SeenArrow || InTemplateParameterList) {
2287         nextToken();
2288         break;
2289       }
2290       return true;
2291     case tok::arrow:
2292       // This might or might not actually be a lambda arrow (this could be an
2293       // ObjC method invocation followed by a dereferencing arrow). We might
2294       // reset this back to TT_Unknown in TokenAnnotator.
2295       FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2296       SeenArrow = true;
2297       nextToken();
2298       break;
2299     case tok::kw_requires: {
2300       auto *RequiresToken = FormatTok;
2301       nextToken();
2302       parseRequiresClause(RequiresToken);
2303       break;
2304     }
2305     case tok::equal:
2306       if (!InTemplateParameterList)
2307         return true;
2308       nextToken();
2309       break;
2310     default:
2311       return true;
2312     }
2313   }
2314 
2315   FormatTok->setFinalizedType(TT_LambdaLBrace);
2316   LSquare.setFinalizedType(TT_LambdaLSquare);
2317 
2318   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2319   parseChildBlock();
2320   assert(!NestedLambdas.empty());
2321   NestedLambdas.pop_back();
2322 
2323   return true;
2324 }
2325 
2326 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2327   const FormatToken *Previous = FormatTok->Previous;
2328   const FormatToken *LeftSquare = FormatTok;
2329   nextToken();
2330   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2331                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2332                                         tok::kw_co_yield, tok::kw_co_return)) ||
2333                     Previous->closesScope())) ||
2334       LeftSquare->isCppStructuredBinding(Style)) {
2335     return false;
2336   }
2337   if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2338     return false;
2339   if (FormatTok->is(tok::r_square)) {
2340     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2341     if (Next->is(tok::greater))
2342       return false;
2343   }
2344   parseSquare(/*LambdaIntroducer=*/true);
2345   return true;
2346 }
2347 
2348 void UnwrappedLineParser::tryToParseJSFunction() {
2349   assert(FormatTok->is(Keywords.kw_function));
2350   if (FormatTok->is(Keywords.kw_async))
2351     nextToken();
2352   // Consume "function".
2353   nextToken();
2354 
2355   // Consume * (generator function). Treat it like C++'s overloaded operators.
2356   if (FormatTok->is(tok::star)) {
2357     FormatTok->setFinalizedType(TT_OverloadedOperator);
2358     nextToken();
2359   }
2360 
2361   // Consume function name.
2362   if (FormatTok->is(tok::identifier))
2363     nextToken();
2364 
2365   if (FormatTok->isNot(tok::l_paren))
2366     return;
2367 
2368   // Parse formal parameter list.
2369   parseParens();
2370 
2371   if (FormatTok->is(tok::colon)) {
2372     // Parse a type definition.
2373     nextToken();
2374 
2375     // Eat the type declaration. For braced inline object types, balance braces,
2376     // otherwise just parse until finding an l_brace for the function body.
2377     if (FormatTok->is(tok::l_brace))
2378       tryToParseBracedList();
2379     else
2380       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2381         nextToken();
2382   }
2383 
2384   if (FormatTok->is(tok::semi))
2385     return;
2386 
2387   parseChildBlock();
2388 }
2389 
2390 bool UnwrappedLineParser::tryToParseBracedList() {
2391   if (FormatTok->is(BK_Unknown))
2392     calculateBraceTypes();
2393   assert(FormatTok->isNot(BK_Unknown));
2394   if (FormatTok->is(BK_Block))
2395     return false;
2396   nextToken();
2397   parseBracedList();
2398   return true;
2399 }
2400 
2401 bool UnwrappedLineParser::tryToParseChildBlock() {
2402   assert(Style.isJavaScript() || Style.isCSharp());
2403   assert(FormatTok->is(TT_FatArrow));
2404   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2405   // They always start an expression or a child block if followed by a curly
2406   // brace.
2407   nextToken();
2408   if (FormatTok->isNot(tok::l_brace))
2409     return false;
2410   parseChildBlock();
2411   return true;
2412 }
2413 
2414 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2415   bool HasError = false;
2416 
2417   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2418   // replace this by using parseAssignmentExpression() inside.
2419   do {
2420     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2421         tryToParseChildBlock()) {
2422       continue;
2423     }
2424     if (Style.isJavaScript()) {
2425       if (FormatTok->is(Keywords.kw_function)) {
2426         tryToParseJSFunction();
2427         continue;
2428       }
2429       if (FormatTok->is(tok::l_brace)) {
2430         // Could be a method inside of a braced list `{a() { return 1; }}`.
2431         if (tryToParseBracedList())
2432           continue;
2433         parseChildBlock();
2434       }
2435     }
2436     if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2437       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2438         addUnwrappedLine();
2439       nextToken();
2440       return !HasError;
2441     }
2442     switch (FormatTok->Tok.getKind()) {
2443     case tok::l_square:
2444       if (Style.isCSharp())
2445         parseSquare();
2446       else
2447         tryToParseLambda();
2448       break;
2449     case tok::l_paren:
2450       parseParens();
2451       // JavaScript can just have free standing methods and getters/setters in
2452       // object literals. Detect them by a "{" following ")".
2453       if (Style.isJavaScript()) {
2454         if (FormatTok->is(tok::l_brace))
2455           parseChildBlock();
2456         break;
2457       }
2458       break;
2459     case tok::l_brace:
2460       // Assume there are no blocks inside a braced init list apart
2461       // from the ones we explicitly parse out (like lambdas).
2462       FormatTok->setBlockKind(BK_BracedInit);
2463       nextToken();
2464       parseBracedList();
2465       break;
2466     case tok::less:
2467       nextToken();
2468       if (IsAngleBracket)
2469         parseBracedList(/*IsAngleBracket=*/true);
2470       break;
2471     case tok::semi:
2472       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2473       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2474       // used for error recovery if we have otherwise determined that this is
2475       // a braced list.
2476       if (Style.isJavaScript()) {
2477         nextToken();
2478         break;
2479       }
2480       HasError = true;
2481       if (!IsEnum)
2482         return false;
2483       nextToken();
2484       break;
2485     case tok::comma:
2486       nextToken();
2487       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2488         addUnwrappedLine();
2489       break;
2490     default:
2491       nextToken();
2492       break;
2493     }
2494   } while (!eof());
2495   return false;
2496 }
2497 
2498 /// \brief Parses a pair of parentheses (and everything between them).
2499 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2500 /// double ampersands. This applies for all nested scopes as well.
2501 ///
2502 /// Returns whether there is a `=` token between the parentheses.
2503 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2504   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2505   auto *LeftParen = FormatTok;
2506   bool SeenEqual = false;
2507   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2508   nextToken();
2509   do {
2510     switch (FormatTok->Tok.getKind()) {
2511     case tok::l_paren:
2512       if (parseParens(AmpAmpTokenType))
2513         SeenEqual = true;
2514       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2515         parseChildBlock();
2516       break;
2517     case tok::r_paren:
2518       if (!MightBeStmtExpr && !Line->InMacroBody &&
2519           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2520         const auto *Prev = LeftParen->Previous;
2521         const auto *Next = Tokens->peekNextToken();
2522         const bool DoubleParens =
2523             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2524         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2525         const bool Blacklisted =
2526             PrevPrev &&
2527             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2528              (SeenEqual &&
2529               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2530                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2531         const bool ReturnParens =
2532             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2533             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2534              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2535             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2536             Next->is(tok::semi);
2537         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2538           LeftParen->Optional = true;
2539           FormatTok->Optional = true;
2540         }
2541       }
2542       nextToken();
2543       return SeenEqual;
2544     case tok::r_brace:
2545       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2546       return SeenEqual;
2547     case tok::l_square:
2548       tryToParseLambda();
2549       break;
2550     case tok::l_brace:
2551       if (!tryToParseBracedList())
2552         parseChildBlock();
2553       break;
2554     case tok::at:
2555       nextToken();
2556       if (FormatTok->is(tok::l_brace)) {
2557         nextToken();
2558         parseBracedList();
2559       }
2560       break;
2561     case tok::equal:
2562       SeenEqual = true;
2563       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2564         tryToParseChildBlock();
2565       else
2566         nextToken();
2567       break;
2568     case tok::kw_class:
2569       if (Style.isJavaScript())
2570         parseRecord(/*ParseAsExpr=*/true);
2571       else
2572         nextToken();
2573       break;
2574     case tok::identifier:
2575       if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2576         tryToParseJSFunction();
2577       else
2578         nextToken();
2579       break;
2580     case tok::kw_requires: {
2581       auto RequiresToken = FormatTok;
2582       nextToken();
2583       parseRequiresExpression(RequiresToken);
2584       break;
2585     }
2586     case tok::ampamp:
2587       if (AmpAmpTokenType != TT_Unknown)
2588         FormatTok->setFinalizedType(AmpAmpTokenType);
2589       [[fallthrough]];
2590     default:
2591       nextToken();
2592       break;
2593     }
2594   } while (!eof());
2595   return SeenEqual;
2596 }
2597 
2598 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2599   if (!LambdaIntroducer) {
2600     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2601     if (tryToParseLambda())
2602       return;
2603   }
2604   do {
2605     switch (FormatTok->Tok.getKind()) {
2606     case tok::l_paren:
2607       parseParens();
2608       break;
2609     case tok::r_square:
2610       nextToken();
2611       return;
2612     case tok::r_brace:
2613       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2614       return;
2615     case tok::l_square:
2616       parseSquare();
2617       break;
2618     case tok::l_brace: {
2619       if (!tryToParseBracedList())
2620         parseChildBlock();
2621       break;
2622     }
2623     case tok::at:
2624       nextToken();
2625       if (FormatTok->is(tok::l_brace)) {
2626         nextToken();
2627         parseBracedList();
2628       }
2629       break;
2630     default:
2631       nextToken();
2632       break;
2633     }
2634   } while (!eof());
2635 }
2636 
2637 void UnwrappedLineParser::keepAncestorBraces() {
2638   if (!Style.RemoveBracesLLVM)
2639     return;
2640 
2641   const int MaxNestingLevels = 2;
2642   const int Size = NestedTooDeep.size();
2643   if (Size >= MaxNestingLevels)
2644     NestedTooDeep[Size - MaxNestingLevels] = true;
2645   NestedTooDeep.push_back(false);
2646 }
2647 
2648 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2649   for (const auto &Token : llvm::reverse(Line.Tokens))
2650     if (Token.Tok->isNot(tok::comment))
2651       return Token.Tok;
2652 
2653   return nullptr;
2654 }
2655 
2656 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2657   FormatToken *Tok = nullptr;
2658 
2659   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2660       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2661     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2662               ? getLastNonComment(*Line)
2663               : Line->Tokens.back().Tok;
2664     assert(Tok);
2665     if (Tok->BraceCount < 0) {
2666       assert(Tok->BraceCount == -1);
2667       Tok = nullptr;
2668     } else {
2669       Tok->BraceCount = -1;
2670     }
2671   }
2672 
2673   addUnwrappedLine();
2674   ++Line->Level;
2675   parseStructuralElement();
2676 
2677   if (Tok) {
2678     assert(!Line->InPPDirective);
2679     Tok = nullptr;
2680     for (const auto &L : llvm::reverse(*CurrentLines)) {
2681       if (!L.InPPDirective && getLastNonComment(L)) {
2682         Tok = L.Tokens.back().Tok;
2683         break;
2684       }
2685     }
2686     assert(Tok);
2687     ++Tok->BraceCount;
2688   }
2689 
2690   if (CheckEOF && eof())
2691     addUnwrappedLine();
2692 
2693   --Line->Level;
2694 }
2695 
2696 static void markOptionalBraces(FormatToken *LeftBrace) {
2697   if (!LeftBrace)
2698     return;
2699 
2700   assert(LeftBrace->is(tok::l_brace));
2701 
2702   FormatToken *RightBrace = LeftBrace->MatchingParen;
2703   if (!RightBrace) {
2704     assert(!LeftBrace->Optional);
2705     return;
2706   }
2707 
2708   assert(RightBrace->is(tok::r_brace));
2709   assert(RightBrace->MatchingParen == LeftBrace);
2710   assert(LeftBrace->Optional == RightBrace->Optional);
2711 
2712   LeftBrace->Optional = true;
2713   RightBrace->Optional = true;
2714 }
2715 
2716 void UnwrappedLineParser::handleAttributes() {
2717   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2718   if (FormatTok->isAttribute())
2719     nextToken();
2720   else if (FormatTok->is(tok::l_square))
2721     handleCppAttributes();
2722 }
2723 
2724 bool UnwrappedLineParser::handleCppAttributes() {
2725   // Handle [[likely]] / [[unlikely]] attributes.
2726   assert(FormatTok->is(tok::l_square));
2727   if (!tryToParseSimpleAttribute())
2728     return false;
2729   parseSquare();
2730   return true;
2731 }
2732 
2733 /// Returns whether \c Tok begins a block.
2734 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2735   // FIXME: rename the function or make
2736   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2737   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2738                            : Tok.is(tok::l_brace);
2739 }
2740 
2741 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2742                                                   bool KeepBraces,
2743                                                   bool IsVerilogAssert) {
2744   assert((FormatTok->is(tok::kw_if) ||
2745           (Style.isVerilog() &&
2746            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2747                               Keywords.kw_assume, Keywords.kw_cover))) &&
2748          "'if' expected");
2749   nextToken();
2750 
2751   if (IsVerilogAssert) {
2752     // Handle `assert #0` and `assert final`.
2753     if (FormatTok->is(Keywords.kw_verilogHash)) {
2754       nextToken();
2755       if (FormatTok->is(tok::numeric_constant))
2756         nextToken();
2757     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2758                                   Keywords.kw_sequence)) {
2759       nextToken();
2760     }
2761   }
2762 
2763   // TableGen's if statement has the form of `if <cond> then { ... }`.
2764   if (Style.isTableGen()) {
2765     while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2766       // Simply skip until then. This range only contains a value.
2767       nextToken();
2768     }
2769   }
2770 
2771   // Handle `if !consteval`.
2772   if (FormatTok->is(tok::exclaim))
2773     nextToken();
2774 
2775   bool KeepIfBraces = true;
2776   if (FormatTok->is(tok::kw_consteval)) {
2777     nextToken();
2778   } else {
2779     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2780     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2781       nextToken();
2782     if (FormatTok->is(tok::l_paren)) {
2783       FormatTok->setFinalizedType(TT_ConditionLParen);
2784       parseParens();
2785     }
2786   }
2787   handleAttributes();
2788   // The then action is optional in Verilog assert statements.
2789   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2790     nextToken();
2791     addUnwrappedLine();
2792     return nullptr;
2793   }
2794 
2795   bool NeedsUnwrappedLine = false;
2796   keepAncestorBraces();
2797 
2798   FormatToken *IfLeftBrace = nullptr;
2799   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2800 
2801   if (isBlockBegin(*FormatTok)) {
2802     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2803     IfLeftBrace = FormatTok;
2804     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2805     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2806                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2807     setPreviousRBraceType(TT_ControlStatementRBrace);
2808     if (Style.BraceWrapping.BeforeElse)
2809       addUnwrappedLine();
2810     else
2811       NeedsUnwrappedLine = true;
2812   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2813     addUnwrappedLine();
2814   } else {
2815     parseUnbracedBody();
2816   }
2817 
2818   if (Style.RemoveBracesLLVM) {
2819     assert(!NestedTooDeep.empty());
2820     KeepIfBraces = KeepIfBraces ||
2821                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2822                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2823                    IfBlockKind == IfStmtKind::IfElseIf;
2824   }
2825 
2826   bool KeepElseBraces = KeepIfBraces;
2827   FormatToken *ElseLeftBrace = nullptr;
2828   IfStmtKind Kind = IfStmtKind::IfOnly;
2829 
2830   if (FormatTok->is(tok::kw_else)) {
2831     if (Style.RemoveBracesLLVM) {
2832       NestedTooDeep.back() = false;
2833       Kind = IfStmtKind::IfElse;
2834     }
2835     nextToken();
2836     handleAttributes();
2837     if (isBlockBegin(*FormatTok)) {
2838       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2839       FormatTok->setFinalizedType(TT_ElseLBrace);
2840       ElseLeftBrace = FormatTok;
2841       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2842       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2843       FormatToken *IfLBrace =
2844           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2845                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2846       setPreviousRBraceType(TT_ElseRBrace);
2847       if (FormatTok->is(tok::kw_else)) {
2848         KeepElseBraces = KeepElseBraces ||
2849                          ElseBlockKind == IfStmtKind::IfOnly ||
2850                          ElseBlockKind == IfStmtKind::IfElseIf;
2851       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2852         KeepElseBraces = true;
2853         assert(ElseLeftBrace->MatchingParen);
2854         markOptionalBraces(ElseLeftBrace);
2855       }
2856       addUnwrappedLine();
2857     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2858       const FormatToken *Previous = Tokens->getPreviousToken();
2859       assert(Previous);
2860       const bool IsPrecededByComment = Previous->is(tok::comment);
2861       if (IsPrecededByComment) {
2862         addUnwrappedLine();
2863         ++Line->Level;
2864       }
2865       bool TooDeep = true;
2866       if (Style.RemoveBracesLLVM) {
2867         Kind = IfStmtKind::IfElseIf;
2868         TooDeep = NestedTooDeep.pop_back_val();
2869       }
2870       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2871       if (Style.RemoveBracesLLVM)
2872         NestedTooDeep.push_back(TooDeep);
2873       if (IsPrecededByComment)
2874         --Line->Level;
2875     } else {
2876       parseUnbracedBody(/*CheckEOF=*/true);
2877     }
2878   } else {
2879     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2880     if (NeedsUnwrappedLine)
2881       addUnwrappedLine();
2882   }
2883 
2884   if (!Style.RemoveBracesLLVM)
2885     return nullptr;
2886 
2887   assert(!NestedTooDeep.empty());
2888   KeepElseBraces = KeepElseBraces ||
2889                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2890                    NestedTooDeep.back();
2891 
2892   NestedTooDeep.pop_back();
2893 
2894   if (!KeepIfBraces && !KeepElseBraces) {
2895     markOptionalBraces(IfLeftBrace);
2896     markOptionalBraces(ElseLeftBrace);
2897   } else if (IfLeftBrace) {
2898     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2899     if (IfRightBrace) {
2900       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2901       assert(!IfLeftBrace->Optional);
2902       assert(!IfRightBrace->Optional);
2903       IfLeftBrace->MatchingParen = nullptr;
2904       IfRightBrace->MatchingParen = nullptr;
2905     }
2906   }
2907 
2908   if (IfKind)
2909     *IfKind = Kind;
2910 
2911   return IfLeftBrace;
2912 }
2913 
2914 void UnwrappedLineParser::parseTryCatch() {
2915   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2916   nextToken();
2917   bool NeedsUnwrappedLine = false;
2918   if (FormatTok->is(tok::colon)) {
2919     // We are in a function try block, what comes is an initializer list.
2920     nextToken();
2921 
2922     // In case identifiers were removed by clang-tidy, what might follow is
2923     // multiple commas in sequence - before the first identifier.
2924     while (FormatTok->is(tok::comma))
2925       nextToken();
2926 
2927     while (FormatTok->is(tok::identifier)) {
2928       nextToken();
2929       if (FormatTok->is(tok::l_paren))
2930         parseParens();
2931       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2932           FormatTok->is(tok::l_brace)) {
2933         do {
2934           nextToken();
2935         } while (FormatTok->isNot(tok::r_brace));
2936         nextToken();
2937       }
2938 
2939       // In case identifiers were removed by clang-tidy, what might follow is
2940       // multiple commas in sequence - after the first identifier.
2941       while (FormatTok->is(tok::comma))
2942         nextToken();
2943     }
2944   }
2945   // Parse try with resource.
2946   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2947     parseParens();
2948 
2949   keepAncestorBraces();
2950 
2951   if (FormatTok->is(tok::l_brace)) {
2952     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2953     parseBlock();
2954     if (Style.BraceWrapping.BeforeCatch)
2955       addUnwrappedLine();
2956     else
2957       NeedsUnwrappedLine = true;
2958   } else if (FormatTok->isNot(tok::kw_catch)) {
2959     // The C++ standard requires a compound-statement after a try.
2960     // If there's none, we try to assume there's a structuralElement
2961     // and try to continue.
2962     addUnwrappedLine();
2963     ++Line->Level;
2964     parseStructuralElement();
2965     --Line->Level;
2966   }
2967   while (true) {
2968     if (FormatTok->is(tok::at))
2969       nextToken();
2970     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2971                              tok::kw___finally) ||
2972           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2973            FormatTok->is(Keywords.kw_finally)) ||
2974           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2975            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2976       break;
2977     }
2978     nextToken();
2979     while (FormatTok->isNot(tok::l_brace)) {
2980       if (FormatTok->is(tok::l_paren)) {
2981         parseParens();
2982         continue;
2983       }
2984       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2985         if (Style.RemoveBracesLLVM)
2986           NestedTooDeep.pop_back();
2987         return;
2988       }
2989       nextToken();
2990     }
2991     NeedsUnwrappedLine = false;
2992     Line->MustBeDeclaration = false;
2993     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2994     parseBlock();
2995     if (Style.BraceWrapping.BeforeCatch)
2996       addUnwrappedLine();
2997     else
2998       NeedsUnwrappedLine = true;
2999   }
3000 
3001   if (Style.RemoveBracesLLVM)
3002     NestedTooDeep.pop_back();
3003 
3004   if (NeedsUnwrappedLine)
3005     addUnwrappedLine();
3006 }
3007 
3008 void UnwrappedLineParser::parseNamespace() {
3009   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3010          "'namespace' expected");
3011 
3012   const FormatToken &InitialToken = *FormatTok;
3013   nextToken();
3014   if (InitialToken.is(TT_NamespaceMacro)) {
3015     parseParens();
3016   } else {
3017     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3018                               tok::l_square, tok::period, tok::l_paren) ||
3019            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3020       if (FormatTok->is(tok::l_square))
3021         parseSquare();
3022       else if (FormatTok->is(tok::l_paren))
3023         parseParens();
3024       else
3025         nextToken();
3026     }
3027   }
3028   if (FormatTok->is(tok::l_brace)) {
3029     FormatTok->setFinalizedType(TT_NamespaceLBrace);
3030 
3031     if (ShouldBreakBeforeBrace(Style, InitialToken))
3032       addUnwrappedLine();
3033 
3034     unsigned AddLevels =
3035         Style.NamespaceIndentation == FormatStyle::NI_All ||
3036                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3037                  DeclarationScopeStack.size() > 1)
3038             ? 1u
3039             : 0u;
3040     bool ManageWhitesmithsBraces =
3041         AddLevels == 0u &&
3042         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3043 
3044     // If we're in Whitesmiths mode, indent the brace if we're not indenting
3045     // the whole block.
3046     if (ManageWhitesmithsBraces)
3047       ++Line->Level;
3048 
3049     // Munch the semicolon after a namespace. This is more common than one would
3050     // think. Putting the semicolon into its own line is very ugly.
3051     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3052                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3053                ManageWhitesmithsBraces);
3054 
3055     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3056 
3057     if (ManageWhitesmithsBraces)
3058       --Line->Level;
3059   }
3060   // FIXME: Add error handling.
3061 }
3062 
3063 void UnwrappedLineParser::parseNew() {
3064   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3065   nextToken();
3066 
3067   if (Style.isCSharp()) {
3068     do {
3069       // Handle constructor invocation, e.g. `new(field: value)`.
3070       if (FormatTok->is(tok::l_paren))
3071         parseParens();
3072 
3073       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3074       if (FormatTok->is(tok::l_brace))
3075         parseBracedList();
3076 
3077       if (FormatTok->isOneOf(tok::semi, tok::comma))
3078         return;
3079 
3080       nextToken();
3081     } while (!eof());
3082   }
3083 
3084   if (Style.Language != FormatStyle::LK_Java)
3085     return;
3086 
3087   // In Java, we can parse everything up to the parens, which aren't optional.
3088   do {
3089     // There should not be a ;, { or } before the new's open paren.
3090     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3091       return;
3092 
3093     // Consume the parens.
3094     if (FormatTok->is(tok::l_paren)) {
3095       parseParens();
3096 
3097       // If there is a class body of an anonymous class, consume that as child.
3098       if (FormatTok->is(tok::l_brace))
3099         parseChildBlock();
3100       return;
3101     }
3102     nextToken();
3103   } while (!eof());
3104 }
3105 
3106 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3107   keepAncestorBraces();
3108 
3109   if (isBlockBegin(*FormatTok)) {
3110     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3111     FormatToken *LeftBrace = FormatTok;
3112     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3113     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3114                /*MunchSemi=*/true, KeepBraces);
3115     setPreviousRBraceType(TT_ControlStatementRBrace);
3116     if (!KeepBraces) {
3117       assert(!NestedTooDeep.empty());
3118       if (!NestedTooDeep.back())
3119         markOptionalBraces(LeftBrace);
3120     }
3121     if (WrapRightBrace)
3122       addUnwrappedLine();
3123   } else {
3124     parseUnbracedBody();
3125   }
3126 
3127   if (!KeepBraces)
3128     NestedTooDeep.pop_back();
3129 }
3130 
3131 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3132   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3133           (Style.isVerilog() &&
3134            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3135                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3136                               Keywords.kw_final, Keywords.kw_initial,
3137                               Keywords.kw_foreach, Keywords.kw_forever,
3138                               Keywords.kw_repeat))) &&
3139          "'for', 'while' or foreach macro expected");
3140   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3141                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3142 
3143   nextToken();
3144   // JS' for await ( ...
3145   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3146     nextToken();
3147   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3148     nextToken();
3149   if (HasParens && FormatTok->is(tok::l_paren)) {
3150     // The type is only set for Verilog basically because we were afraid to
3151     // change the existing behavior for loops. See the discussion on D121756 for
3152     // details.
3153     if (Style.isVerilog())
3154       FormatTok->setFinalizedType(TT_ConditionLParen);
3155     parseParens();
3156   }
3157 
3158   if (Style.isVerilog()) {
3159     // Event control.
3160     parseVerilogSensitivityList();
3161   } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3162              Tokens->getPreviousToken()->is(tok::r_paren)) {
3163     nextToken();
3164     addUnwrappedLine();
3165     return;
3166   }
3167 
3168   handleAttributes();
3169   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3170 }
3171 
3172 void UnwrappedLineParser::parseDoWhile() {
3173   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3174   nextToken();
3175 
3176   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3177 
3178   // FIXME: Add error handling.
3179   if (FormatTok->isNot(tok::kw_while)) {
3180     addUnwrappedLine();
3181     return;
3182   }
3183 
3184   FormatTok->setFinalizedType(TT_DoWhile);
3185 
3186   // If in Whitesmiths mode, the line with the while() needs to be indented
3187   // to the same level as the block.
3188   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3189     ++Line->Level;
3190 
3191   nextToken();
3192   parseStructuralElement();
3193 }
3194 
3195 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3196   nextToken();
3197   unsigned OldLineLevel = Line->Level;
3198   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3199     --Line->Level;
3200   if (LeftAlignLabel)
3201     Line->Level = 0;
3202 
3203   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3204       FormatTok->is(tok::l_brace)) {
3205 
3206     CompoundStatementIndenter Indenter(this, Line->Level,
3207                                        Style.BraceWrapping.AfterCaseLabel,
3208                                        Style.BraceWrapping.IndentBraces);
3209     parseBlock();
3210     if (FormatTok->is(tok::kw_break)) {
3211       if (Style.BraceWrapping.AfterControlStatement ==
3212           FormatStyle::BWACS_Always) {
3213         addUnwrappedLine();
3214         if (!Style.IndentCaseBlocks &&
3215             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3216           ++Line->Level;
3217         }
3218       }
3219       parseStructuralElement();
3220     }
3221     addUnwrappedLine();
3222   } else {
3223     if (FormatTok->is(tok::semi))
3224       nextToken();
3225     addUnwrappedLine();
3226   }
3227   Line->Level = OldLineLevel;
3228   if (FormatTok->isNot(tok::l_brace)) {
3229     parseStructuralElement();
3230     addUnwrappedLine();
3231   }
3232 }
3233 
3234 void UnwrappedLineParser::parseCaseLabel() {
3235   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3236 
3237   // FIXME: fix handling of complex expressions here.
3238   do {
3239     nextToken();
3240     if (FormatTok->is(tok::colon)) {
3241       FormatTok->setFinalizedType(TT_CaseLabelColon);
3242       break;
3243     }
3244   } while (!eof());
3245   parseLabel();
3246 }
3247 
3248 void UnwrappedLineParser::parseSwitch() {
3249   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3250   nextToken();
3251   if (FormatTok->is(tok::l_paren))
3252     parseParens();
3253 
3254   keepAncestorBraces();
3255 
3256   if (FormatTok->is(tok::l_brace)) {
3257     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3258     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3259     parseBlock();
3260     setPreviousRBraceType(TT_ControlStatementRBrace);
3261     addUnwrappedLine();
3262   } else {
3263     addUnwrappedLine();
3264     ++Line->Level;
3265     parseStructuralElement();
3266     --Line->Level;
3267   }
3268 
3269   if (Style.RemoveBracesLLVM)
3270     NestedTooDeep.pop_back();
3271 }
3272 
3273 // Operators that can follow a C variable.
3274 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3275   switch (kind) {
3276   case tok::ampamp:
3277   case tok::ampequal:
3278   case tok::arrow:
3279   case tok::caret:
3280   case tok::caretequal:
3281   case tok::comma:
3282   case tok::ellipsis:
3283   case tok::equal:
3284   case tok::equalequal:
3285   case tok::exclaim:
3286   case tok::exclaimequal:
3287   case tok::greater:
3288   case tok::greaterequal:
3289   case tok::greatergreater:
3290   case tok::greatergreaterequal:
3291   case tok::l_paren:
3292   case tok::l_square:
3293   case tok::less:
3294   case tok::lessequal:
3295   case tok::lessless:
3296   case tok::lesslessequal:
3297   case tok::minus:
3298   case tok::minusequal:
3299   case tok::minusminus:
3300   case tok::percent:
3301   case tok::percentequal:
3302   case tok::period:
3303   case tok::pipe:
3304   case tok::pipeequal:
3305   case tok::pipepipe:
3306   case tok::plus:
3307   case tok::plusequal:
3308   case tok::plusplus:
3309   case tok::question:
3310   case tok::r_brace:
3311   case tok::r_paren:
3312   case tok::r_square:
3313   case tok::semi:
3314   case tok::slash:
3315   case tok::slashequal:
3316   case tok::star:
3317   case tok::starequal:
3318     return true;
3319   default:
3320     return false;
3321   }
3322 }
3323 
3324 void UnwrappedLineParser::parseAccessSpecifier() {
3325   FormatToken *AccessSpecifierCandidate = FormatTok;
3326   nextToken();
3327   // Understand Qt's slots.
3328   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3329     nextToken();
3330   // Otherwise, we don't know what it is, and we'd better keep the next token.
3331   if (FormatTok->is(tok::colon)) {
3332     nextToken();
3333     addUnwrappedLine();
3334   } else if (FormatTok->isNot(tok::coloncolon) &&
3335              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3336     // Not a variable name nor namespace name.
3337     addUnwrappedLine();
3338   } else if (AccessSpecifierCandidate) {
3339     // Consider the access specifier to be a C identifier.
3340     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3341   }
3342 }
3343 
3344 /// \brief Parses a requires, decides if it is a clause or an expression.
3345 /// \pre The current token has to be the requires keyword.
3346 /// \returns true if it parsed a clause.
3347 bool clang::format::UnwrappedLineParser::parseRequires() {
3348   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3349   auto RequiresToken = FormatTok;
3350 
3351   // We try to guess if it is a requires clause, or a requires expression. For
3352   // that we first consume the keyword and check the next token.
3353   nextToken();
3354 
3355   switch (FormatTok->Tok.getKind()) {
3356   case tok::l_brace:
3357     // This can only be an expression, never a clause.
3358     parseRequiresExpression(RequiresToken);
3359     return false;
3360   case tok::l_paren:
3361     // Clauses and expression can start with a paren, it's unclear what we have.
3362     break;
3363   default:
3364     // All other tokens can only be a clause.
3365     parseRequiresClause(RequiresToken);
3366     return true;
3367   }
3368 
3369   // Looking forward we would have to decide if there are function declaration
3370   // like arguments to the requires expression:
3371   // requires (T t) {
3372   // Or there is a constraint expression for the requires clause:
3373   // requires (C<T> && ...
3374 
3375   // But first let's look behind.
3376   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3377 
3378   if (!PreviousNonComment ||
3379       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3380     // If there is no token, or an expression left brace, we are a requires
3381     // clause within a requires expression.
3382     parseRequiresClause(RequiresToken);
3383     return true;
3384   }
3385 
3386   switch (PreviousNonComment->Tok.getKind()) {
3387   case tok::greater:
3388   case tok::r_paren:
3389   case tok::kw_noexcept:
3390   case tok::kw_const:
3391     // This is a requires clause.
3392     parseRequiresClause(RequiresToken);
3393     return true;
3394   case tok::amp:
3395   case tok::ampamp: {
3396     // This can be either:
3397     // if (... && requires (T t) ...)
3398     // Or
3399     // void member(...) && requires (C<T> ...
3400     // We check the one token before that for a const:
3401     // void member(...) const && requires (C<T> ...
3402     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3403     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3404       parseRequiresClause(RequiresToken);
3405       return true;
3406     }
3407     break;
3408   }
3409   default:
3410     if (PreviousNonComment->isTypeOrIdentifier()) {
3411       // This is a requires clause.
3412       parseRequiresClause(RequiresToken);
3413       return true;
3414     }
3415     // It's an expression.
3416     parseRequiresExpression(RequiresToken);
3417     return false;
3418   }
3419 
3420   // Now we look forward and try to check if the paren content is a parameter
3421   // list. The parameters can be cv-qualified and contain references or
3422   // pointers.
3423   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3424   // of stuff: typename, const, *, &, &&, ::, identifiers.
3425 
3426   unsigned StoredPosition = Tokens->getPosition();
3427   FormatToken *NextToken = Tokens->getNextToken();
3428   int Lookahead = 0;
3429   auto PeekNext = [&Lookahead, &NextToken, this] {
3430     ++Lookahead;
3431     NextToken = Tokens->getNextToken();
3432   };
3433 
3434   bool FoundType = false;
3435   bool LastWasColonColon = false;
3436   int OpenAngles = 0;
3437 
3438   for (; Lookahead < 50; PeekNext()) {
3439     switch (NextToken->Tok.getKind()) {
3440     case tok::kw_volatile:
3441     case tok::kw_const:
3442     case tok::comma:
3443       if (OpenAngles == 0) {
3444         FormatTok = Tokens->setPosition(StoredPosition);
3445         parseRequiresExpression(RequiresToken);
3446         return false;
3447       }
3448       break;
3449     case tok::r_paren:
3450     case tok::pipepipe:
3451       FormatTok = Tokens->setPosition(StoredPosition);
3452       parseRequiresClause(RequiresToken);
3453       return true;
3454     case tok::eof:
3455       // Break out of the loop.
3456       Lookahead = 50;
3457       break;
3458     case tok::coloncolon:
3459       LastWasColonColon = true;
3460       break;
3461     case tok::identifier:
3462       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3463         FormatTok = Tokens->setPosition(StoredPosition);
3464         parseRequiresExpression(RequiresToken);
3465         return false;
3466       }
3467       FoundType = true;
3468       LastWasColonColon = false;
3469       break;
3470     case tok::less:
3471       ++OpenAngles;
3472       break;
3473     case tok::greater:
3474       --OpenAngles;
3475       break;
3476     default:
3477       if (NextToken->isSimpleTypeSpecifier()) {
3478         FormatTok = Tokens->setPosition(StoredPosition);
3479         parseRequiresExpression(RequiresToken);
3480         return false;
3481       }
3482       break;
3483     }
3484   }
3485   // This seems to be a complicated expression, just assume it's a clause.
3486   FormatTok = Tokens->setPosition(StoredPosition);
3487   parseRequiresClause(RequiresToken);
3488   return true;
3489 }
3490 
3491 /// \brief Parses a requires clause.
3492 /// \param RequiresToken The requires keyword token, which starts this clause.
3493 /// \pre We need to be on the next token after the requires keyword.
3494 /// \sa parseRequiresExpression
3495 ///
3496 /// Returns if it either has finished parsing the clause, or it detects, that
3497 /// the clause is incorrect.
3498 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3499   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3500   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3501 
3502   // If there is no previous token, we are within a requires expression,
3503   // otherwise we will always have the template or function declaration in front
3504   // of it.
3505   bool InRequiresExpression =
3506       !RequiresToken->Previous ||
3507       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3508 
3509   RequiresToken->setFinalizedType(InRequiresExpression
3510                                       ? TT_RequiresClauseInARequiresExpression
3511                                       : TT_RequiresClause);
3512 
3513   // NOTE: parseConstraintExpression is only ever called from this function.
3514   // It could be inlined into here.
3515   parseConstraintExpression();
3516 
3517   if (!InRequiresExpression)
3518     FormatTok->Previous->ClosesRequiresClause = true;
3519 }
3520 
3521 /// \brief Parses a requires expression.
3522 /// \param RequiresToken The requires keyword token, which starts this clause.
3523 /// \pre We need to be on the next token after the requires keyword.
3524 /// \sa parseRequiresClause
3525 ///
3526 /// Returns if it either has finished parsing the expression, or it detects,
3527 /// that the expression is incorrect.
3528 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3529   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3530   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3531 
3532   RequiresToken->setFinalizedType(TT_RequiresExpression);
3533 
3534   if (FormatTok->is(tok::l_paren)) {
3535     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3536     parseParens();
3537   }
3538 
3539   if (FormatTok->is(tok::l_brace)) {
3540     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3541     parseChildBlock();
3542   }
3543 }
3544 
3545 /// \brief Parses a constraint expression.
3546 ///
3547 /// This is the body of a requires clause. It returns, when the parsing is
3548 /// complete, or the expression is incorrect.
3549 void UnwrappedLineParser::parseConstraintExpression() {
3550   // The special handling for lambdas is needed since tryToParseLambda() eats a
3551   // token and if a requires expression is the last part of a requires clause
3552   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3553   // not set on the correct token. Thus we need to be aware if we even expect a
3554   // lambda to be possible.
3555   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3556   bool LambdaNextTimeAllowed = true;
3557 
3558   // Within lambda declarations, it is permitted to put a requires clause after
3559   // its template parameter list, which would place the requires clause right
3560   // before the parentheses of the parameters of the lambda declaration. Thus,
3561   // we track if we expect to see grouping parentheses at all.
3562   // Without this check, `requires foo<T> (T t)` in the below example would be
3563   // seen as the whole requires clause, accidentally eating the parameters of
3564   // the lambda.
3565   // [&]<typename T> requires foo<T> (T t) { ... };
3566   bool TopLevelParensAllowed = true;
3567 
3568   do {
3569     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3570 
3571     switch (FormatTok->Tok.getKind()) {
3572     case tok::kw_requires: {
3573       auto RequiresToken = FormatTok;
3574       nextToken();
3575       parseRequiresExpression(RequiresToken);
3576       break;
3577     }
3578 
3579     case tok::l_paren:
3580       if (!TopLevelParensAllowed)
3581         return;
3582       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3583       TopLevelParensAllowed = false;
3584       break;
3585 
3586     case tok::l_square:
3587       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3588         return;
3589       break;
3590 
3591     case tok::kw_const:
3592     case tok::semi:
3593     case tok::kw_class:
3594     case tok::kw_struct:
3595     case tok::kw_union:
3596       return;
3597 
3598     case tok::l_brace:
3599       // Potential function body.
3600       return;
3601 
3602     case tok::ampamp:
3603     case tok::pipepipe:
3604       FormatTok->setFinalizedType(TT_BinaryOperator);
3605       nextToken();
3606       LambdaNextTimeAllowed = true;
3607       TopLevelParensAllowed = true;
3608       break;
3609 
3610     case tok::comma:
3611     case tok::comment:
3612       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3613       nextToken();
3614       break;
3615 
3616     case tok::kw_sizeof:
3617     case tok::greater:
3618     case tok::greaterequal:
3619     case tok::greatergreater:
3620     case tok::less:
3621     case tok::lessequal:
3622     case tok::lessless:
3623     case tok::equalequal:
3624     case tok::exclaim:
3625     case tok::exclaimequal:
3626     case tok::plus:
3627     case tok::minus:
3628     case tok::star:
3629     case tok::slash:
3630       LambdaNextTimeAllowed = true;
3631       TopLevelParensAllowed = true;
3632       // Just eat them.
3633       nextToken();
3634       break;
3635 
3636     case tok::numeric_constant:
3637     case tok::coloncolon:
3638     case tok::kw_true:
3639     case tok::kw_false:
3640       TopLevelParensAllowed = false;
3641       // Just eat them.
3642       nextToken();
3643       break;
3644 
3645     case tok::kw_static_cast:
3646     case tok::kw_const_cast:
3647     case tok::kw_reinterpret_cast:
3648     case tok::kw_dynamic_cast:
3649       nextToken();
3650       if (FormatTok->isNot(tok::less))
3651         return;
3652 
3653       nextToken();
3654       parseBracedList(/*IsAngleBracket=*/true);
3655       break;
3656 
3657     default:
3658       if (!FormatTok->Tok.getIdentifierInfo()) {
3659         // Identifiers are part of the default case, we check for more then
3660         // tok::identifier to handle builtin type traits.
3661         return;
3662       }
3663 
3664       // We need to differentiate identifiers for a template deduction guide,
3665       // variables, or function return types (the constraint expression has
3666       // ended before that), and basically all other cases. But it's easier to
3667       // check the other way around.
3668       assert(FormatTok->Previous);
3669       switch (FormatTok->Previous->Tok.getKind()) {
3670       case tok::coloncolon:  // Nested identifier.
3671       case tok::ampamp:      // Start of a function or variable for the
3672       case tok::pipepipe:    // constraint expression. (binary)
3673       case tok::exclaim:     // The same as above, but unary.
3674       case tok::kw_requires: // Initial identifier of a requires clause.
3675       case tok::equal:       // Initial identifier of a concept declaration.
3676         break;
3677       default:
3678         return;
3679       }
3680 
3681       // Read identifier with optional template declaration.
3682       nextToken();
3683       if (FormatTok->is(tok::less)) {
3684         nextToken();
3685         parseBracedList(/*IsAngleBracket=*/true);
3686       }
3687       TopLevelParensAllowed = false;
3688       break;
3689     }
3690   } while (!eof());
3691 }
3692 
3693 bool UnwrappedLineParser::parseEnum() {
3694   const FormatToken &InitialToken = *FormatTok;
3695 
3696   // Won't be 'enum' for NS_ENUMs.
3697   if (FormatTok->is(tok::kw_enum))
3698     nextToken();
3699 
3700   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3701   // declarations. An "enum" keyword followed by a colon would be a syntax
3702   // error and thus assume it is just an identifier.
3703   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3704     return false;
3705 
3706   // In protobuf, "enum" can be used as a field name.
3707   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3708     return false;
3709 
3710   // Eat up enum class ...
3711   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3712     nextToken();
3713 
3714   while (FormatTok->Tok.getIdentifierInfo() ||
3715          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3716                             tok::greater, tok::comma, tok::question,
3717                             tok::l_square, tok::r_square)) {
3718     if (Style.isVerilog()) {
3719       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3720       nextToken();
3721       // In Verilog the base type can have dimensions.
3722       while (FormatTok->is(tok::l_square))
3723         parseSquare();
3724     } else {
3725       nextToken();
3726     }
3727     // We can have macros or attributes in between 'enum' and the enum name.
3728     if (FormatTok->is(tok::l_paren))
3729       parseParens();
3730     assert(FormatTok->isNot(TT_AttributeSquare));
3731     if (FormatTok->is(tok::identifier)) {
3732       nextToken();
3733       // If there are two identifiers in a row, this is likely an elaborate
3734       // return type. In Java, this can be "implements", etc.
3735       if (Style.isCpp() && FormatTok->is(tok::identifier))
3736         return false;
3737     }
3738   }
3739 
3740   // Just a declaration or something is wrong.
3741   if (FormatTok->isNot(tok::l_brace))
3742     return true;
3743   FormatTok->setFinalizedType(TT_EnumLBrace);
3744   FormatTok->setBlockKind(BK_Block);
3745 
3746   if (Style.Language == FormatStyle::LK_Java) {
3747     // Java enums are different.
3748     parseJavaEnumBody();
3749     return true;
3750   }
3751   if (Style.Language == FormatStyle::LK_Proto) {
3752     parseBlock(/*MustBeDeclaration=*/true);
3753     return true;
3754   }
3755 
3756   if (!Style.AllowShortEnumsOnASingleLine &&
3757       ShouldBreakBeforeBrace(Style, InitialToken)) {
3758     addUnwrappedLine();
3759   }
3760   // Parse enum body.
3761   nextToken();
3762   if (!Style.AllowShortEnumsOnASingleLine) {
3763     addUnwrappedLine();
3764     Line->Level += 1;
3765   }
3766   bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3767   if (!Style.AllowShortEnumsOnASingleLine)
3768     Line->Level -= 1;
3769   if (HasError) {
3770     if (FormatTok->is(tok::semi))
3771       nextToken();
3772     addUnwrappedLine();
3773   }
3774   setPreviousRBraceType(TT_EnumRBrace);
3775   return true;
3776 
3777   // There is no addUnwrappedLine() here so that we fall through to parsing a
3778   // structural element afterwards. Thus, in "enum A {} n, m;",
3779   // "} n, m;" will end up in one unwrapped line.
3780 }
3781 
3782 bool UnwrappedLineParser::parseStructLike() {
3783   // parseRecord falls through and does not yet add an unwrapped line as a
3784   // record declaration or definition can start a structural element.
3785   parseRecord();
3786   // This does not apply to Java, JavaScript and C#.
3787   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3788       Style.isCSharp()) {
3789     if (FormatTok->is(tok::semi))
3790       nextToken();
3791     addUnwrappedLine();
3792     return true;
3793   }
3794   return false;
3795 }
3796 
3797 namespace {
3798 // A class used to set and restore the Token position when peeking
3799 // ahead in the token source.
3800 class ScopedTokenPosition {
3801   unsigned StoredPosition;
3802   FormatTokenSource *Tokens;
3803 
3804 public:
3805   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3806     assert(Tokens && "Tokens expected to not be null");
3807     StoredPosition = Tokens->getPosition();
3808   }
3809 
3810   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3811 };
3812 } // namespace
3813 
3814 // Look to see if we have [[ by looking ahead, if
3815 // its not then rewind to the original position.
3816 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3817   ScopedTokenPosition AutoPosition(Tokens);
3818   FormatToken *Tok = Tokens->getNextToken();
3819   // We already read the first [ check for the second.
3820   if (Tok->isNot(tok::l_square))
3821     return false;
3822   // Double check that the attribute is just something
3823   // fairly simple.
3824   while (Tok->isNot(tok::eof)) {
3825     if (Tok->is(tok::r_square))
3826       break;
3827     Tok = Tokens->getNextToken();
3828   }
3829   if (Tok->is(tok::eof))
3830     return false;
3831   Tok = Tokens->getNextToken();
3832   if (Tok->isNot(tok::r_square))
3833     return false;
3834   Tok = Tokens->getNextToken();
3835   if (Tok->is(tok::semi))
3836     return false;
3837   return true;
3838 }
3839 
3840 void UnwrappedLineParser::parseJavaEnumBody() {
3841   assert(FormatTok->is(tok::l_brace));
3842   const FormatToken *OpeningBrace = FormatTok;
3843 
3844   // Determine whether the enum is simple, i.e. does not have a semicolon or
3845   // constants with class bodies. Simple enums can be formatted like braced
3846   // lists, contracted to a single line, etc.
3847   unsigned StoredPosition = Tokens->getPosition();
3848   bool IsSimple = true;
3849   FormatToken *Tok = Tokens->getNextToken();
3850   while (Tok->isNot(tok::eof)) {
3851     if (Tok->is(tok::r_brace))
3852       break;
3853     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3854       IsSimple = false;
3855       break;
3856     }
3857     // FIXME: This will also mark enums with braces in the arguments to enum
3858     // constants as "not simple". This is probably fine in practice, though.
3859     Tok = Tokens->getNextToken();
3860   }
3861   FormatTok = Tokens->setPosition(StoredPosition);
3862 
3863   if (IsSimple) {
3864     nextToken();
3865     parseBracedList();
3866     addUnwrappedLine();
3867     return;
3868   }
3869 
3870   // Parse the body of a more complex enum.
3871   // First add a line for everything up to the "{".
3872   nextToken();
3873   addUnwrappedLine();
3874   ++Line->Level;
3875 
3876   // Parse the enum constants.
3877   while (!eof()) {
3878     if (FormatTok->is(tok::l_brace)) {
3879       // Parse the constant's class body.
3880       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3881                  /*MunchSemi=*/false);
3882     } else if (FormatTok->is(tok::l_paren)) {
3883       parseParens();
3884     } else if (FormatTok->is(tok::comma)) {
3885       nextToken();
3886       addUnwrappedLine();
3887     } else if (FormatTok->is(tok::semi)) {
3888       nextToken();
3889       addUnwrappedLine();
3890       break;
3891     } else if (FormatTok->is(tok::r_brace)) {
3892       addUnwrappedLine();
3893       break;
3894     } else {
3895       nextToken();
3896     }
3897   }
3898 
3899   // Parse the class body after the enum's ";" if any.
3900   parseLevel(OpeningBrace);
3901   nextToken();
3902   --Line->Level;
3903   addUnwrappedLine();
3904 }
3905 
3906 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3907   const FormatToken &InitialToken = *FormatTok;
3908   nextToken();
3909 
3910   auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3911     return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
3912   };
3913   // The actual identifier can be a nested name specifier, and in macros
3914   // it is often token-pasted.
3915   // An [[attribute]] can be before the identifier.
3916   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3917                             tok::kw_alignas, tok::l_square) ||
3918          FormatTok->isAttribute() ||
3919          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3920           FormatTok->isOneOf(tok::period, tok::comma))) {
3921     if (Style.isJavaScript() &&
3922         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3923       // JavaScript/TypeScript supports inline object types in
3924       // extends/implements positions:
3925       //     class Foo implements {bar: number} { }
3926       nextToken();
3927       if (FormatTok->is(tok::l_brace)) {
3928         tryToParseBracedList();
3929         continue;
3930       }
3931     }
3932     if (FormatTok->is(tok::l_square) && handleCppAttributes())
3933       continue;
3934     nextToken();
3935     // We can have macros in between 'class' and the class name.
3936     if (!IsNonMacroIdentifier(FormatTok->Previous) &&
3937         FormatTok->is(tok::l_paren)) {
3938       parseParens();
3939     }
3940   }
3941 
3942   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3943     int AngleNestingLevel = 0;
3944     do {
3945       if (FormatTok->is(tok::less))
3946         ++AngleNestingLevel;
3947       else if (FormatTok->is(tok::greater))
3948         --AngleNestingLevel;
3949 
3950       if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) &&
3951           IsNonMacroIdentifier(FormatTok->Previous)) {
3952         break;
3953       }
3954       if (FormatTok->is(tok::l_brace)) {
3955         calculateBraceTypes(/*ExpectClassBody=*/true);
3956         if (!tryToParseBracedList())
3957           break;
3958       }
3959       if (FormatTok->is(tok::l_square)) {
3960         FormatToken *Previous = FormatTok->Previous;
3961         if (!Previous ||
3962             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3963           // Don't try parsing a lambda if we had a closing parenthesis before,
3964           // it was probably a pointer to an array: int (*)[].
3965           if (!tryToParseLambda())
3966             continue;
3967         } else {
3968           parseSquare();
3969           continue;
3970         }
3971       }
3972       if (FormatTok->is(tok::semi))
3973         return;
3974       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3975         addUnwrappedLine();
3976         nextToken();
3977         parseCSharpGenericTypeConstraint();
3978         break;
3979       }
3980       nextToken();
3981     } while (!eof());
3982   }
3983 
3984   auto GetBraceTypes =
3985       [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3986     switch (RecordTok.Tok.getKind()) {
3987     case tok::kw_class:
3988       return {TT_ClassLBrace, TT_ClassRBrace};
3989     case tok::kw_struct:
3990       return {TT_StructLBrace, TT_StructRBrace};
3991     case tok::kw_union:
3992       return {TT_UnionLBrace, TT_UnionRBrace};
3993     default:
3994       // Useful for e.g. interface.
3995       return {TT_RecordLBrace, TT_RecordRBrace};
3996     }
3997   };
3998   if (FormatTok->is(tok::l_brace)) {
3999     auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4000     FormatTok->setFinalizedType(OpenBraceType);
4001     if (ParseAsExpr) {
4002       parseChildBlock();
4003     } else {
4004       if (ShouldBreakBeforeBrace(Style, InitialToken))
4005         addUnwrappedLine();
4006 
4007       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4008       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4009     }
4010     setPreviousRBraceType(ClosingBraceType);
4011   }
4012   // There is no addUnwrappedLine() here so that we fall through to parsing a
4013   // structural element afterwards. Thus, in "class A {} n, m;",
4014   // "} n, m;" will end up in one unwrapped line.
4015 }
4016 
4017 void UnwrappedLineParser::parseObjCMethod() {
4018   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4019          "'(' or identifier expected.");
4020   do {
4021     if (FormatTok->is(tok::semi)) {
4022       nextToken();
4023       addUnwrappedLine();
4024       return;
4025     } else if (FormatTok->is(tok::l_brace)) {
4026       if (Style.BraceWrapping.AfterFunction)
4027         addUnwrappedLine();
4028       parseBlock();
4029       addUnwrappedLine();
4030       return;
4031     } else {
4032       nextToken();
4033     }
4034   } while (!eof());
4035 }
4036 
4037 void UnwrappedLineParser::parseObjCProtocolList() {
4038   assert(FormatTok->is(tok::less) && "'<' expected.");
4039   do {
4040     nextToken();
4041     // Early exit in case someone forgot a close angle.
4042     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4043         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4044       return;
4045     }
4046   } while (!eof() && FormatTok->isNot(tok::greater));
4047   nextToken(); // Skip '>'.
4048 }
4049 
4050 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4051   do {
4052     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4053       nextToken();
4054       addUnwrappedLine();
4055       break;
4056     }
4057     if (FormatTok->is(tok::l_brace)) {
4058       parseBlock();
4059       // In ObjC interfaces, nothing should be following the "}".
4060       addUnwrappedLine();
4061     } else if (FormatTok->is(tok::r_brace)) {
4062       // Ignore stray "}". parseStructuralElement doesn't consume them.
4063       nextToken();
4064       addUnwrappedLine();
4065     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4066       nextToken();
4067       parseObjCMethod();
4068     } else {
4069       parseStructuralElement();
4070     }
4071   } while (!eof());
4072 }
4073 
4074 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4075   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4076          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4077   nextToken();
4078   nextToken(); // interface name
4079 
4080   // @interface can be followed by a lightweight generic
4081   // specialization list, then either a base class or a category.
4082   if (FormatTok->is(tok::less))
4083     parseObjCLightweightGenerics();
4084   if (FormatTok->is(tok::colon)) {
4085     nextToken();
4086     nextToken(); // base class name
4087     // The base class can also have lightweight generics applied to it.
4088     if (FormatTok->is(tok::less))
4089       parseObjCLightweightGenerics();
4090   } else if (FormatTok->is(tok::l_paren)) {
4091     // Skip category, if present.
4092     parseParens();
4093   }
4094 
4095   if (FormatTok->is(tok::less))
4096     parseObjCProtocolList();
4097 
4098   if (FormatTok->is(tok::l_brace)) {
4099     if (Style.BraceWrapping.AfterObjCDeclaration)
4100       addUnwrappedLine();
4101     parseBlock(/*MustBeDeclaration=*/true);
4102   }
4103 
4104   // With instance variables, this puts '}' on its own line.  Without instance
4105   // variables, this ends the @interface line.
4106   addUnwrappedLine();
4107 
4108   parseObjCUntilAtEnd();
4109 }
4110 
4111 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4112   assert(FormatTok->is(tok::less));
4113   // Unlike protocol lists, generic parameterizations support
4114   // nested angles:
4115   //
4116   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4117   //     NSObject <NSCopying, NSSecureCoding>
4118   //
4119   // so we need to count how many open angles we have left.
4120   unsigned NumOpenAngles = 1;
4121   do {
4122     nextToken();
4123     // Early exit in case someone forgot a close angle.
4124     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4125         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4126       break;
4127     }
4128     if (FormatTok->is(tok::less)) {
4129       ++NumOpenAngles;
4130     } else if (FormatTok->is(tok::greater)) {
4131       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4132       --NumOpenAngles;
4133     }
4134   } while (!eof() && NumOpenAngles != 0);
4135   nextToken(); // Skip '>'.
4136 }
4137 
4138 // Returns true for the declaration/definition form of @protocol,
4139 // false for the expression form.
4140 bool UnwrappedLineParser::parseObjCProtocol() {
4141   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4142   nextToken();
4143 
4144   if (FormatTok->is(tok::l_paren)) {
4145     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4146     return false;
4147   }
4148 
4149   // The definition/declaration form,
4150   // @protocol Foo
4151   // - (int)someMethod;
4152   // @end
4153 
4154   nextToken(); // protocol name
4155 
4156   if (FormatTok->is(tok::less))
4157     parseObjCProtocolList();
4158 
4159   // Check for protocol declaration.
4160   if (FormatTok->is(tok::semi)) {
4161     nextToken();
4162     addUnwrappedLine();
4163     return true;
4164   }
4165 
4166   addUnwrappedLine();
4167   parseObjCUntilAtEnd();
4168   return true;
4169 }
4170 
4171 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4172   bool IsImport = FormatTok->is(Keywords.kw_import);
4173   assert(IsImport || FormatTok->is(tok::kw_export));
4174   nextToken();
4175 
4176   // Consume the "default" in "export default class/function".
4177   if (FormatTok->is(tok::kw_default))
4178     nextToken();
4179 
4180   // Consume "async function", "function" and "default function", so that these
4181   // get parsed as free-standing JS functions, i.e. do not require a trailing
4182   // semicolon.
4183   if (FormatTok->is(Keywords.kw_async))
4184     nextToken();
4185   if (FormatTok->is(Keywords.kw_function)) {
4186     nextToken();
4187     return;
4188   }
4189 
4190   // For imports, `export *`, `export {...}`, consume the rest of the line up
4191   // to the terminating `;`. For everything else, just return and continue
4192   // parsing the structural element, i.e. the declaration or expression for
4193   // `export default`.
4194   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4195       !FormatTok->isStringLiteral() &&
4196       !(FormatTok->is(Keywords.kw_type) &&
4197         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4198     return;
4199   }
4200 
4201   while (!eof()) {
4202     if (FormatTok->is(tok::semi))
4203       return;
4204     if (Line->Tokens.empty()) {
4205       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4206       // import statement should terminate.
4207       return;
4208     }
4209     if (FormatTok->is(tok::l_brace)) {
4210       FormatTok->setBlockKind(BK_Block);
4211       nextToken();
4212       parseBracedList();
4213     } else {
4214       nextToken();
4215     }
4216   }
4217 }
4218 
4219 void UnwrappedLineParser::parseStatementMacro() {
4220   nextToken();
4221   if (FormatTok->is(tok::l_paren))
4222     parseParens();
4223   if (FormatTok->is(tok::semi))
4224     nextToken();
4225   addUnwrappedLine();
4226 }
4227 
4228 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4229   // consume things like a::`b.c[d:e] or a::*
4230   while (true) {
4231     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4232                            tok::coloncolon, tok::hash) ||
4233         Keywords.isVerilogIdentifier(*FormatTok)) {
4234       nextToken();
4235     } else if (FormatTok->is(tok::l_square)) {
4236       parseSquare();
4237     } else {
4238       break;
4239     }
4240   }
4241 }
4242 
4243 void UnwrappedLineParser::parseVerilogSensitivityList() {
4244   if (FormatTok->isNot(tok::at))
4245     return;
4246   nextToken();
4247   // A block event expression has 2 at signs.
4248   if (FormatTok->is(tok::at))
4249     nextToken();
4250   switch (FormatTok->Tok.getKind()) {
4251   case tok::star:
4252     nextToken();
4253     break;
4254   case tok::l_paren:
4255     parseParens();
4256     break;
4257   default:
4258     parseVerilogHierarchyIdentifier();
4259     break;
4260   }
4261 }
4262 
4263 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4264   unsigned AddLevels = 0;
4265 
4266   if (FormatTok->is(Keywords.kw_clocking)) {
4267     nextToken();
4268     if (Keywords.isVerilogIdentifier(*FormatTok))
4269       nextToken();
4270     parseVerilogSensitivityList();
4271     if (FormatTok->is(tok::semi))
4272       nextToken();
4273   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4274                                 Keywords.kw_casez, Keywords.kw_randcase,
4275                                 Keywords.kw_randsequence)) {
4276     if (Style.IndentCaseLabels)
4277       AddLevels++;
4278     nextToken();
4279     if (FormatTok->is(tok::l_paren)) {
4280       FormatTok->setFinalizedType(TT_ConditionLParen);
4281       parseParens();
4282     }
4283     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4284       nextToken();
4285     // The case header has no semicolon.
4286   } else {
4287     // "module" etc.
4288     nextToken();
4289     // all the words like the name of the module and specifiers like
4290     // "automatic" and the width of function return type
4291     while (true) {
4292       if (FormatTok->is(tok::l_square)) {
4293         auto Prev = FormatTok->getPreviousNonComment();
4294         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4295           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4296         parseSquare();
4297       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4298                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4299         nextToken();
4300       } else {
4301         break;
4302       }
4303     }
4304 
4305     auto NewLine = [this]() {
4306       addUnwrappedLine();
4307       Line->IsContinuation = true;
4308     };
4309 
4310     // package imports
4311     while (FormatTok->is(Keywords.kw_import)) {
4312       NewLine();
4313       nextToken();
4314       parseVerilogHierarchyIdentifier();
4315       if (FormatTok->is(tok::semi))
4316         nextToken();
4317     }
4318 
4319     // parameters and ports
4320     if (FormatTok->is(Keywords.kw_verilogHash)) {
4321       NewLine();
4322       nextToken();
4323       if (FormatTok->is(tok::l_paren)) {
4324         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4325         parseParens();
4326       }
4327     }
4328     if (FormatTok->is(tok::l_paren)) {
4329       NewLine();
4330       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4331       parseParens();
4332     }
4333 
4334     // extends and implements
4335     if (FormatTok->is(Keywords.kw_extends)) {
4336       NewLine();
4337       nextToken();
4338       parseVerilogHierarchyIdentifier();
4339       if (FormatTok->is(tok::l_paren))
4340         parseParens();
4341     }
4342     if (FormatTok->is(Keywords.kw_implements)) {
4343       NewLine();
4344       do {
4345         nextToken();
4346         parseVerilogHierarchyIdentifier();
4347       } while (FormatTok->is(tok::comma));
4348     }
4349 
4350     // Coverage event for cover groups.
4351     if (FormatTok->is(tok::at)) {
4352       NewLine();
4353       parseVerilogSensitivityList();
4354     }
4355 
4356     if (FormatTok->is(tok::semi))
4357       nextToken(/*LevelDifference=*/1);
4358     addUnwrappedLine();
4359   }
4360 
4361   return AddLevels;
4362 }
4363 
4364 void UnwrappedLineParser::parseVerilogTable() {
4365   assert(FormatTok->is(Keywords.kw_table));
4366   nextToken(/*LevelDifference=*/1);
4367   addUnwrappedLine();
4368 
4369   auto InitialLevel = Line->Level++;
4370   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4371     FormatToken *Tok = FormatTok;
4372     nextToken();
4373     if (Tok->is(tok::semi))
4374       addUnwrappedLine();
4375     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4376       Tok->setFinalizedType(TT_VerilogTableItem);
4377   }
4378   Line->Level = InitialLevel;
4379   nextToken(/*LevelDifference=*/-1);
4380   addUnwrappedLine();
4381 }
4382 
4383 void UnwrappedLineParser::parseVerilogCaseLabel() {
4384   // The label will get unindented in AnnotatingParser. If there are no leading
4385   // spaces, indent the rest here so that things inside the block will be
4386   // indented relative to things outside. We don't use parseLabel because we
4387   // don't know whether this colon is a label or a ternary expression at this
4388   // point.
4389   auto OrigLevel = Line->Level;
4390   auto FirstLine = CurrentLines->size();
4391   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4392     ++Line->Level;
4393   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4394     --Line->Level;
4395   parseStructuralElement();
4396   // Restore the indentation in both the new line and the line that has the
4397   // label.
4398   if (CurrentLines->size() > FirstLine)
4399     (*CurrentLines)[FirstLine].Level = OrigLevel;
4400   Line->Level = OrigLevel;
4401 }
4402 
4403 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4404   for (const auto &N : Line.Tokens) {
4405     if (N.Tok->MacroCtx)
4406       return true;
4407     for (const UnwrappedLine &Child : N.Children)
4408       if (containsExpansion(Child))
4409         return true;
4410   }
4411   return false;
4412 }
4413 
4414 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4415   if (Line->Tokens.empty())
4416     return;
4417   LLVM_DEBUG({
4418     if (!parsingPPDirective()) {
4419       llvm::dbgs() << "Adding unwrapped line:\n";
4420       printDebugInfo(*Line);
4421     }
4422   });
4423 
4424   // If this line closes a block when in Whitesmiths mode, remember that
4425   // information so that the level can be decreased after the line is added.
4426   // This has to happen after the addition of the line since the line itself
4427   // needs to be indented.
4428   bool ClosesWhitesmithsBlock =
4429       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4430       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4431 
4432   // If the current line was expanded from a macro call, we use it to
4433   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4434   // line and the unexpanded token stream.
4435   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4436     if (!Reconstruct)
4437       Reconstruct.emplace(Line->Level, Unexpanded);
4438     Reconstruct->addLine(*Line);
4439 
4440     // While the reconstructed unexpanded lines are stored in the normal
4441     // flow of lines, the expanded lines are stored on the side to be analyzed
4442     // in an extra step.
4443     CurrentExpandedLines.push_back(std::move(*Line));
4444 
4445     if (Reconstruct->finished()) {
4446       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4447       assert(!Reconstructed.Tokens.empty() &&
4448              "Reconstructed must at least contain the macro identifier.");
4449       assert(!parsingPPDirective());
4450       LLVM_DEBUG({
4451         llvm::dbgs() << "Adding unexpanded line:\n";
4452         printDebugInfo(Reconstructed);
4453       });
4454       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4455       Lines.push_back(std::move(Reconstructed));
4456       CurrentExpandedLines.clear();
4457       Reconstruct.reset();
4458     }
4459   } else {
4460     // At the top level we only get here when no unexpansion is going on, or
4461     // when conditional formatting led to unfinished macro reconstructions.
4462     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4463     CurrentLines->push_back(std::move(*Line));
4464   }
4465   Line->Tokens.clear();
4466   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4467   Line->FirstStartColumn = 0;
4468   Line->IsContinuation = false;
4469   Line->SeenDecltypeAuto = false;
4470 
4471   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4472     --Line->Level;
4473   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4474     CurrentLines->append(
4475         std::make_move_iterator(PreprocessorDirectives.begin()),
4476         std::make_move_iterator(PreprocessorDirectives.end()));
4477     PreprocessorDirectives.clear();
4478   }
4479   // Disconnect the current token from the last token on the previous line.
4480   FormatTok->Previous = nullptr;
4481 }
4482 
4483 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4484 
4485 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4486   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4487          FormatTok.NewlinesBefore > 0;
4488 }
4489 
4490 // Checks if \p FormatTok is a line comment that continues the line comment
4491 // section on \p Line.
4492 static bool
4493 continuesLineCommentSection(const FormatToken &FormatTok,
4494                             const UnwrappedLine &Line,
4495                             const llvm::Regex &CommentPragmasRegex) {
4496   if (Line.Tokens.empty())
4497     return false;
4498 
4499   StringRef IndentContent = FormatTok.TokenText;
4500   if (FormatTok.TokenText.starts_with("//") ||
4501       FormatTok.TokenText.starts_with("/*")) {
4502     IndentContent = FormatTok.TokenText.substr(2);
4503   }
4504   if (CommentPragmasRegex.match(IndentContent))
4505     return false;
4506 
4507   // If Line starts with a line comment, then FormatTok continues the comment
4508   // section if its original column is greater or equal to the original start
4509   // column of the line.
4510   //
4511   // Define the min column token of a line as follows: if a line ends in '{' or
4512   // contains a '{' followed by a line comment, then the min column token is
4513   // that '{'. Otherwise, the min column token of the line is the first token of
4514   // the line.
4515   //
4516   // If Line starts with a token other than a line comment, then FormatTok
4517   // continues the comment section if its original column is greater than the
4518   // original start column of the min column token of the line.
4519   //
4520   // For example, the second line comment continues the first in these cases:
4521   //
4522   // // first line
4523   // // second line
4524   //
4525   // and:
4526   //
4527   // // first line
4528   //  // second line
4529   //
4530   // and:
4531   //
4532   // int i; // first line
4533   //  // second line
4534   //
4535   // and:
4536   //
4537   // do { // first line
4538   //      // second line
4539   //   int i;
4540   // } while (true);
4541   //
4542   // and:
4543   //
4544   // enum {
4545   //   a, // first line
4546   //    // second line
4547   //   b
4548   // };
4549   //
4550   // The second line comment doesn't continue the first in these cases:
4551   //
4552   //   // first line
4553   //  // second line
4554   //
4555   // and:
4556   //
4557   // int i; // first line
4558   // // second line
4559   //
4560   // and:
4561   //
4562   // do { // first line
4563   //   // second line
4564   //   int i;
4565   // } while (true);
4566   //
4567   // and:
4568   //
4569   // enum {
4570   //   a, // first line
4571   //   // second line
4572   // };
4573   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4574 
4575   // Scan for '{//'. If found, use the column of '{' as a min column for line
4576   // comment section continuation.
4577   const FormatToken *PreviousToken = nullptr;
4578   for (const UnwrappedLineNode &Node : Line.Tokens) {
4579     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4580         isLineComment(*Node.Tok)) {
4581       MinColumnToken = PreviousToken;
4582       break;
4583     }
4584     PreviousToken = Node.Tok;
4585 
4586     // Grab the last newline preceding a token in this unwrapped line.
4587     if (Node.Tok->NewlinesBefore > 0)
4588       MinColumnToken = Node.Tok;
4589   }
4590   if (PreviousToken && PreviousToken->is(tok::l_brace))
4591     MinColumnToken = PreviousToken;
4592 
4593   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4594                               MinColumnToken);
4595 }
4596 
4597 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4598   bool JustComments = Line->Tokens.empty();
4599   for (FormatToken *Tok : CommentsBeforeNextToken) {
4600     // Line comments that belong to the same line comment section are put on the
4601     // same line since later we might want to reflow content between them.
4602     // Additional fine-grained breaking of line comment sections is controlled
4603     // by the class BreakableLineCommentSection in case it is desirable to keep
4604     // several line comment sections in the same unwrapped line.
4605     //
4606     // FIXME: Consider putting separate line comment sections as children to the
4607     // unwrapped line instead.
4608     Tok->ContinuesLineCommentSection =
4609         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4610     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4611       addUnwrappedLine();
4612     pushToken(Tok);
4613   }
4614   if (NewlineBeforeNext && JustComments)
4615     addUnwrappedLine();
4616   CommentsBeforeNextToken.clear();
4617 }
4618 
4619 void UnwrappedLineParser::nextToken(int LevelDifference) {
4620   if (eof())
4621     return;
4622   flushComments(isOnNewLine(*FormatTok));
4623   pushToken(FormatTok);
4624   FormatToken *Previous = FormatTok;
4625   if (!Style.isJavaScript())
4626     readToken(LevelDifference);
4627   else
4628     readTokenWithJavaScriptASI();
4629   FormatTok->Previous = Previous;
4630   if (Style.isVerilog()) {
4631     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4632     // keywords like `begin`, we can't treat them the same as left braces
4633     // because some contexts require one of them.  For example structs use
4634     // braces and if blocks use keywords, and a left brace can occur in an if
4635     // statement, but it is not a block.  For keywords like `end`, we simply
4636     // treat them the same as right braces.
4637     if (Keywords.isVerilogEnd(*FormatTok))
4638       FormatTok->Tok.setKind(tok::r_brace);
4639   }
4640 }
4641 
4642 void UnwrappedLineParser::distributeComments(
4643     const SmallVectorImpl<FormatToken *> &Comments,
4644     const FormatToken *NextTok) {
4645   // Whether or not a line comment token continues a line is controlled by
4646   // the method continuesLineCommentSection, with the following caveat:
4647   //
4648   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4649   // that each comment line from the trail is aligned with the next token, if
4650   // the next token exists. If a trail exists, the beginning of the maximal
4651   // trail is marked as a start of a new comment section.
4652   //
4653   // For example in this code:
4654   //
4655   // int a; // line about a
4656   //   // line 1 about b
4657   //   // line 2 about b
4658   //   int b;
4659   //
4660   // the two lines about b form a maximal trail, so there are two sections, the
4661   // first one consisting of the single comment "// line about a" and the
4662   // second one consisting of the next two comments.
4663   if (Comments.empty())
4664     return;
4665   bool ShouldPushCommentsInCurrentLine = true;
4666   bool HasTrailAlignedWithNextToken = false;
4667   unsigned StartOfTrailAlignedWithNextToken = 0;
4668   if (NextTok) {
4669     // We are skipping the first element intentionally.
4670     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4671       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4672         HasTrailAlignedWithNextToken = true;
4673         StartOfTrailAlignedWithNextToken = i;
4674       }
4675     }
4676   }
4677   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4678     FormatToken *FormatTok = Comments[i];
4679     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4680       FormatTok->ContinuesLineCommentSection = false;
4681     } else {
4682       FormatTok->ContinuesLineCommentSection =
4683           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4684     }
4685     if (!FormatTok->ContinuesLineCommentSection &&
4686         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4687       ShouldPushCommentsInCurrentLine = false;
4688     }
4689     if (ShouldPushCommentsInCurrentLine)
4690       pushToken(FormatTok);
4691     else
4692       CommentsBeforeNextToken.push_back(FormatTok);
4693   }
4694 }
4695 
4696 void UnwrappedLineParser::readToken(int LevelDifference) {
4697   SmallVector<FormatToken *, 1> Comments;
4698   bool PreviousWasComment = false;
4699   bool FirstNonCommentOnLine = false;
4700   do {
4701     FormatTok = Tokens->getNextToken();
4702     assert(FormatTok);
4703     while (FormatTok->getType() == TT_ConflictStart ||
4704            FormatTok->getType() == TT_ConflictEnd ||
4705            FormatTok->getType() == TT_ConflictAlternative) {
4706       if (FormatTok->getType() == TT_ConflictStart)
4707         conditionalCompilationStart(/*Unreachable=*/false);
4708       else if (FormatTok->getType() == TT_ConflictAlternative)
4709         conditionalCompilationAlternative();
4710       else if (FormatTok->getType() == TT_ConflictEnd)
4711         conditionalCompilationEnd();
4712       FormatTok = Tokens->getNextToken();
4713       FormatTok->MustBreakBefore = true;
4714       FormatTok->MustBreakBeforeFinalized = true;
4715     }
4716 
4717     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4718                                       const FormatToken &Tok,
4719                                       bool PreviousWasComment) {
4720       auto IsFirstOnLine = [](const FormatToken &Tok) {
4721         return Tok.HasUnescapedNewline || Tok.IsFirst;
4722       };
4723 
4724       // Consider preprocessor directives preceded by block comments as first
4725       // on line.
4726       if (PreviousWasComment)
4727         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4728       return IsFirstOnLine(Tok);
4729     };
4730 
4731     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4732         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4733     PreviousWasComment = FormatTok->is(tok::comment);
4734 
4735     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4736            (!Style.isVerilog() ||
4737             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4738            FirstNonCommentOnLine) {
4739       distributeComments(Comments, FormatTok);
4740       Comments.clear();
4741       // If there is an unfinished unwrapped line, we flush the preprocessor
4742       // directives only after that unwrapped line was finished later.
4743       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4744       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4745       assert((LevelDifference >= 0 ||
4746               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4747              "LevelDifference makes Line->Level negative");
4748       Line->Level += LevelDifference;
4749       // Comments stored before the preprocessor directive need to be output
4750       // before the preprocessor directive, at the same level as the
4751       // preprocessor directive, as we consider them to apply to the directive.
4752       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4753           PPBranchLevel > 0) {
4754         Line->Level += PPBranchLevel;
4755       }
4756       flushComments(isOnNewLine(*FormatTok));
4757       parsePPDirective();
4758       PreviousWasComment = FormatTok->is(tok::comment);
4759       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4760           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4761     }
4762 
4763     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4764         !Line->InPPDirective) {
4765       continue;
4766     }
4767 
4768     if (FormatTok->is(tok::identifier) &&
4769         Macros.defined(FormatTok->TokenText) &&
4770         // FIXME: Allow expanding macros in preprocessor directives.
4771         !Line->InPPDirective) {
4772       FormatToken *ID = FormatTok;
4773       unsigned Position = Tokens->getPosition();
4774 
4775       // To correctly parse the code, we need to replace the tokens of the macro
4776       // call with its expansion.
4777       auto PreCall = std::move(Line);
4778       Line.reset(new UnwrappedLine);
4779       bool OldInExpansion = InExpansion;
4780       InExpansion = true;
4781       // We parse the macro call into a new line.
4782       auto Args = parseMacroCall();
4783       InExpansion = OldInExpansion;
4784       assert(Line->Tokens.front().Tok == ID);
4785       // And remember the unexpanded macro call tokens.
4786       auto UnexpandedLine = std::move(Line);
4787       // Reset to the old line.
4788       Line = std::move(PreCall);
4789 
4790       LLVM_DEBUG({
4791         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4792         if (Args) {
4793           llvm::dbgs() << "(";
4794           for (const auto &Arg : Args.value())
4795             for (const auto &T : Arg)
4796               llvm::dbgs() << T->TokenText << " ";
4797           llvm::dbgs() << ")";
4798         }
4799         llvm::dbgs() << "\n";
4800       });
4801       if (Macros.objectLike(ID->TokenText) && Args &&
4802           !Macros.hasArity(ID->TokenText, Args->size())) {
4803         // The macro is either
4804         // - object-like, but we got argumnets, or
4805         // - overloaded to be both object-like and function-like, but none of
4806         //   the function-like arities match the number of arguments.
4807         // Thus, expand as object-like macro.
4808         LLVM_DEBUG(llvm::dbgs()
4809                    << "Macro \"" << ID->TokenText
4810                    << "\" not overloaded for arity " << Args->size()
4811                    << "or not function-like, using object-like overload.");
4812         Args.reset();
4813         UnexpandedLine->Tokens.resize(1);
4814         Tokens->setPosition(Position);
4815         nextToken();
4816         assert(!Args && Macros.objectLike(ID->TokenText));
4817       }
4818       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4819           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4820         // Next, we insert the expanded tokens in the token stream at the
4821         // current position, and continue parsing.
4822         Unexpanded[ID] = std::move(UnexpandedLine);
4823         SmallVector<FormatToken *, 8> Expansion =
4824             Macros.expand(ID, std::move(Args));
4825         if (!Expansion.empty())
4826           FormatTok = Tokens->insertTokens(Expansion);
4827 
4828         LLVM_DEBUG({
4829           llvm::dbgs() << "Expanded: ";
4830           for (const auto &T : Expansion)
4831             llvm::dbgs() << T->TokenText << " ";
4832           llvm::dbgs() << "\n";
4833         });
4834       } else {
4835         LLVM_DEBUG({
4836           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4837                        << "\", because it was used ";
4838           if (Args)
4839             llvm::dbgs() << "with " << Args->size();
4840           else
4841             llvm::dbgs() << "without";
4842           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4843         });
4844         Tokens->setPosition(Position);
4845         FormatTok = ID;
4846       }
4847     }
4848 
4849     if (FormatTok->isNot(tok::comment)) {
4850       distributeComments(Comments, FormatTok);
4851       Comments.clear();
4852       return;
4853     }
4854 
4855     Comments.push_back(FormatTok);
4856   } while (!eof());
4857 
4858   distributeComments(Comments, nullptr);
4859   Comments.clear();
4860 }
4861 
4862 namespace {
4863 template <typename Iterator>
4864 void pushTokens(Iterator Begin, Iterator End,
4865                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4866   for (auto I = Begin; I != End; ++I) {
4867     Into.push_back(I->Tok);
4868     for (const auto &Child : I->Children)
4869       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4870   }
4871 }
4872 } // namespace
4873 
4874 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4875 UnwrappedLineParser::parseMacroCall() {
4876   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4877   assert(Line->Tokens.empty());
4878   nextToken();
4879   if (FormatTok->isNot(tok::l_paren))
4880     return Args;
4881   unsigned Position = Tokens->getPosition();
4882   FormatToken *Tok = FormatTok;
4883   nextToken();
4884   Args.emplace();
4885   auto ArgStart = std::prev(Line->Tokens.end());
4886 
4887   int Parens = 0;
4888   do {
4889     switch (FormatTok->Tok.getKind()) {
4890     case tok::l_paren:
4891       ++Parens;
4892       nextToken();
4893       break;
4894     case tok::r_paren: {
4895       if (Parens > 0) {
4896         --Parens;
4897         nextToken();
4898         break;
4899       }
4900       Args->push_back({});
4901       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4902       nextToken();
4903       return Args;
4904     }
4905     case tok::comma: {
4906       if (Parens > 0) {
4907         nextToken();
4908         break;
4909       }
4910       Args->push_back({});
4911       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4912       nextToken();
4913       ArgStart = std::prev(Line->Tokens.end());
4914       break;
4915     }
4916     default:
4917       nextToken();
4918       break;
4919     }
4920   } while (!eof());
4921   Line->Tokens.resize(1);
4922   Tokens->setPosition(Position);
4923   FormatTok = Tok;
4924   return {};
4925 }
4926 
4927 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4928   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4929   if (MustBreakBeforeNextToken) {
4930     Line->Tokens.back().Tok->MustBreakBefore = true;
4931     Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
4932     MustBreakBeforeNextToken = false;
4933   }
4934 }
4935 
4936 } // end namespace format
4937 } // end namespace clang
4938