xref: /freebsd/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp (revision 415efcecd8b80f68e76376ef2b854cb6f5c84b5a)
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #include <algorithm>
29 #include <utility>
30 
31 #define DEBUG_TYPE "format-parser"
32 
33 namespace clang {
34 namespace format {
35 
36 namespace {
37 
printLine(llvm::raw_ostream & OS,const UnwrappedLine & Line,StringRef Prefix="",bool PrintText=false)38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39                StringRef Prefix = "", bool PrintText = false) {
40   OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41      << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42   bool NewLine = false;
43   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                     E = Line.Tokens.end();
45        I != E; ++I) {
46     if (NewLine) {
47       OS << Prefix;
48       NewLine = false;
49     }
50     OS << I->Tok->Tok.getName() << "["
51        << "T=" << (unsigned)I->Tok->getType()
52        << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53        << "\"] ";
54     for (SmallVectorImpl<UnwrappedLine>::const_iterator
55              CI = I->Children.begin(),
56              CE = I->Children.end();
57          CI != CE; ++CI) {
58       OS << "\n";
59       printLine(OS, *CI, (Prefix + "  ").str());
60       NewLine = true;
61     }
62   }
63   if (!NewLine)
64     OS << "\n";
65 }
66 
printDebugInfo(const UnwrappedLine & Line)67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68   printLine(llvm::dbgs(), Line);
69 }
70 
71 class ScopedDeclarationState {
72 public:
ScopedDeclarationState(UnwrappedLine & Line,llvm::BitVector & Stack,bool MustBeDeclaration)73   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74                          bool MustBeDeclaration)
75       : Line(Line), Stack(Stack) {
76     Line.MustBeDeclaration = MustBeDeclaration;
77     Stack.push_back(MustBeDeclaration);
78   }
~ScopedDeclarationState()79   ~ScopedDeclarationState() {
80     Stack.pop_back();
81     if (!Stack.empty())
82       Line.MustBeDeclaration = Stack.back();
83     else
84       Line.MustBeDeclaration = true;
85   }
86 
87 private:
88   UnwrappedLine &Line;
89   llvm::BitVector &Stack;
90 };
91 
92 } // end anonymous namespace
93 
operator <<(std::ostream & Stream,const UnwrappedLine & Line)94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
95   llvm::raw_os_ostream OS(Stream);
96   printLine(OS, Line);
97   return Stream;
98 }
99 
100 class ScopedLineState {
101 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)102   ScopedLineState(UnwrappedLineParser &Parser,
103                   bool SwitchToPreprocessorLines = false)
104       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
105     if (SwitchToPreprocessorLines)
106       Parser.CurrentLines = &Parser.PreprocessorDirectives;
107     else if (!Parser.Line->Tokens.empty())
108       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
109     PreBlockLine = std::move(Parser.Line);
110     Parser.Line = std::make_unique<UnwrappedLine>();
111     Parser.Line->Level = PreBlockLine->Level;
112     Parser.Line->PPLevel = PreBlockLine->PPLevel;
113     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
114     Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
115     Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
116   }
117 
~ScopedLineState()118   ~ScopedLineState() {
119     if (!Parser.Line->Tokens.empty())
120       Parser.addUnwrappedLine();
121     assert(Parser.Line->Tokens.empty());
122     Parser.Line = std::move(PreBlockLine);
123     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
124       Parser.MustBreakBeforeNextToken = true;
125     Parser.CurrentLines = OriginalLines;
126   }
127 
128 private:
129   UnwrappedLineParser &Parser;
130 
131   std::unique_ptr<UnwrappedLine> PreBlockLine;
132   SmallVectorImpl<UnwrappedLine> *OriginalLines;
133 };
134 
135 class CompoundStatementIndenter {
136 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)137   CompoundStatementIndenter(UnwrappedLineParser *Parser,
138                             const FormatStyle &Style, unsigned &LineLevel)
139       : CompoundStatementIndenter(Parser, LineLevel,
140                                   Style.BraceWrapping.AfterControlStatement,
141                                   Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)142   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
143                             bool WrapBrace, bool IndentBrace)
144       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
145     if (WrapBrace)
146       Parser->addUnwrappedLine();
147     if (IndentBrace)
148       ++LineLevel;
149   }
~CompoundStatementIndenter()150   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
151 
152 private:
153   unsigned &LineLevel;
154   unsigned OldLineLevel;
155 };
156 
UnwrappedLineParser(SourceManager & SourceMgr,const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)157 UnwrappedLineParser::UnwrappedLineParser(
158     SourceManager &SourceMgr, const FormatStyle &Style,
159     const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
160     ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
161     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
162     IdentifierTable &IdentTable)
163     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
164       CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
165       LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
166       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
167       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
168       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
169                        ? IG_Rejected
170                        : IG_Inited),
171       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
172       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
173   assert(IsCpp == LangOpts.CXXOperatorNames);
174 }
175 
reset()176 void UnwrappedLineParser::reset() {
177   PPBranchLevel = -1;
178   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
179                      ? IG_Rejected
180                      : IG_Inited;
181   IncludeGuardToken = nullptr;
182   Line.reset(new UnwrappedLine);
183   CommentsBeforeNextToken.clear();
184   FormatTok = nullptr;
185   MustBreakBeforeNextToken = false;
186   IsDecltypeAutoFunction = false;
187   PreprocessorDirectives.clear();
188   CurrentLines = &Lines;
189   DeclarationScopeStack.clear();
190   NestedTooDeep.clear();
191   NestedLambdas.clear();
192   PPStack.clear();
193   Line->FirstStartColumn = FirstStartColumn;
194 
195   if (!Unexpanded.empty())
196     for (FormatToken *Token : AllTokens)
197       Token->MacroCtx.reset();
198   CurrentExpandedLines.clear();
199   ExpandedLines.clear();
200   Unexpanded.clear();
201   InExpansion = false;
202   Reconstruct.reset();
203 }
204 
parse()205 void UnwrappedLineParser::parse() {
206   IndexedTokenSource TokenSource(AllTokens);
207   Line->FirstStartColumn = FirstStartColumn;
208   do {
209     LLVM_DEBUG(llvm::dbgs() << "----\n");
210     reset();
211     Tokens = &TokenSource;
212     TokenSource.reset();
213 
214     readToken();
215     parseFile();
216 
217     // If we found an include guard then all preprocessor directives (other than
218     // the guard) are over-indented by one.
219     if (IncludeGuard == IG_Found) {
220       for (auto &Line : Lines)
221         if (Line.InPPDirective && Line.Level > 0)
222           --Line.Level;
223     }
224 
225     // Create line with eof token.
226     assert(eof());
227     pushToken(FormatTok);
228     addUnwrappedLine();
229 
230     // In a first run, format everything with the lines containing macro calls
231     // replaced by the expansion.
232     if (!ExpandedLines.empty()) {
233       LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
234       for (const auto &Line : Lines) {
235         if (!Line.Tokens.empty()) {
236           auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
237           if (it != ExpandedLines.end()) {
238             for (const auto &Expanded : it->second) {
239               LLVM_DEBUG(printDebugInfo(Expanded));
240               Callback.consumeUnwrappedLine(Expanded);
241             }
242             continue;
243           }
244         }
245         LLVM_DEBUG(printDebugInfo(Line));
246         Callback.consumeUnwrappedLine(Line);
247       }
248       Callback.finishRun();
249     }
250 
251     LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
252     for (const UnwrappedLine &Line : Lines) {
253       LLVM_DEBUG(printDebugInfo(Line));
254       Callback.consumeUnwrappedLine(Line);
255     }
256     Callback.finishRun();
257     Lines.clear();
258     while (!PPLevelBranchIndex.empty() &&
259            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
260       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
261       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
262     }
263     if (!PPLevelBranchIndex.empty()) {
264       ++PPLevelBranchIndex.back();
265       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
266       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
267     }
268   } while (!PPLevelBranchIndex.empty());
269 }
270 
parseFile()271 void UnwrappedLineParser::parseFile() {
272   // The top-level context in a file always has declarations, except for pre-
273   // processor directives and JavaScript files.
274   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
275   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
276                                           MustBeDeclaration);
277   if (Style.Language == FormatStyle::LK_TextProto)
278     parseBracedList();
279   else
280     parseLevel();
281   // Make sure to format the remaining tokens.
282   //
283   // LK_TextProto is special since its top-level is parsed as the body of a
284   // braced list, which does not necessarily have natural line separators such
285   // as a semicolon. Comments after the last entry that have been determined to
286   // not belong to that line, as in:
287   //   key: value
288   //   // endfile comment
289   // do not have a chance to be put on a line of their own until this point.
290   // Here we add this newline before end-of-file comments.
291   if (Style.Language == FormatStyle::LK_TextProto &&
292       !CommentsBeforeNextToken.empty()) {
293     addUnwrappedLine();
294   }
295   flushComments(true);
296   addUnwrappedLine();
297 }
298 
parseCSharpGenericTypeConstraint()299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
300   do {
301     switch (FormatTok->Tok.getKind()) {
302     case tok::l_brace:
303       return;
304     default:
305       if (FormatTok->is(Keywords.kw_where)) {
306         addUnwrappedLine();
307         nextToken();
308         parseCSharpGenericTypeConstraint();
309         break;
310       }
311       nextToken();
312       break;
313     }
314   } while (!eof());
315 }
316 
parseCSharpAttribute()317 void UnwrappedLineParser::parseCSharpAttribute() {
318   int UnpairedSquareBrackets = 1;
319   do {
320     switch (FormatTok->Tok.getKind()) {
321     case tok::r_square:
322       nextToken();
323       --UnpairedSquareBrackets;
324       if (UnpairedSquareBrackets == 0) {
325         addUnwrappedLine();
326         return;
327       }
328       break;
329     case tok::l_square:
330       ++UnpairedSquareBrackets;
331       nextToken();
332       break;
333     default:
334       nextToken();
335       break;
336     }
337   } while (!eof());
338 }
339 
precededByCommentOrPPDirective() const340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
341   if (!Lines.empty() && Lines.back().InPPDirective)
342     return true;
343 
344   const FormatToken *Previous = Tokens->getPreviousToken();
345   return Previous && Previous->is(tok::comment) &&
346          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
347 }
348 
349 /// \brief Parses a level, that is ???.
350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
351 /// \param IfKind The \p if statement kind in the level.
352 /// \param IfLeftBrace The left brace of the \p if block in the level.
353 /// \returns true if a simple block of if/else/for/while, or false otherwise.
354 /// (A simple block has a single statement.)
parseLevel(const FormatToken * OpeningBrace,IfStmtKind * IfKind,FormatToken ** IfLeftBrace)355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
356                                      IfStmtKind *IfKind,
357                                      FormatToken **IfLeftBrace) {
358   const bool InRequiresExpression =
359       OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
360   const bool IsPrecededByCommentOrPPDirective =
361       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
362   FormatToken *IfLBrace = nullptr;
363   bool HasDoWhile = false;
364   bool HasLabel = false;
365   unsigned StatementCount = 0;
366   bool SwitchLabelEncountered = false;
367 
368   do {
369     if (FormatTok->isAttribute()) {
370       nextToken();
371       if (FormatTok->is(tok::l_paren))
372         parseParens();
373       continue;
374     }
375     tok::TokenKind Kind = FormatTok->Tok.getKind();
376     if (FormatTok->is(TT_MacroBlockBegin))
377       Kind = tok::l_brace;
378     else if (FormatTok->is(TT_MacroBlockEnd))
379       Kind = tok::r_brace;
380 
381     auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
382                          &HasLabel, &StatementCount] {
383       parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
384                              HasDoWhile ? nullptr : &HasDoWhile,
385                              HasLabel ? nullptr : &HasLabel);
386       ++StatementCount;
387       assert(StatementCount > 0 && "StatementCount overflow!");
388     };
389 
390     switch (Kind) {
391     case tok::comment:
392       nextToken();
393       addUnwrappedLine();
394       break;
395     case tok::l_brace:
396       if (InRequiresExpression) {
397         FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
398       } else if (FormatTok->Previous &&
399                  FormatTok->Previous->ClosesRequiresClause) {
400         // We need the 'default' case here to correctly parse a function
401         // l_brace.
402         ParseDefault();
403         continue;
404       }
405       if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
406         if (tryToParseBracedList())
407           continue;
408         FormatTok->setFinalizedType(TT_BlockLBrace);
409       }
410       parseBlock();
411       ++StatementCount;
412       assert(StatementCount > 0 && "StatementCount overflow!");
413       addUnwrappedLine();
414       break;
415     case tok::r_brace:
416       if (OpeningBrace) {
417         if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
418             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
419           return false;
420         }
421         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
422             HasDoWhile || IsPrecededByCommentOrPPDirective ||
423             precededByCommentOrPPDirective()) {
424           return false;
425         }
426         const FormatToken *Next = Tokens->peekNextToken();
427         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
428           return false;
429         if (IfLeftBrace)
430           *IfLeftBrace = IfLBrace;
431         return true;
432       }
433       nextToken();
434       addUnwrappedLine();
435       break;
436     case tok::kw_default: {
437       unsigned StoredPosition = Tokens->getPosition();
438       auto *Next = Tokens->getNextNonComment();
439       FormatTok = Tokens->setPosition(StoredPosition);
440       if (!Next->isOneOf(tok::colon, tok::arrow)) {
441         // default not followed by `:` or `->` is not a case label; treat it
442         // like an identifier.
443         parseStructuralElement();
444         break;
445       }
446       // Else, if it is 'default:', fall through to the case handling.
447       [[fallthrough]];
448     }
449     case tok::kw_case:
450       if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
451           (Style.isJavaScript() && Line->MustBeDeclaration)) {
452         // Proto: there are no switch/case statements
453         // Verilog: Case labels don't have this word. We handle case
454         // labels including default in TokenAnnotator.
455         // JavaScript: A 'case: string' style field declaration.
456         ParseDefault();
457         break;
458       }
459       if (!SwitchLabelEncountered &&
460           (Style.IndentCaseLabels ||
461            (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
462            (Line->InPPDirective && Line->Level == 1))) {
463         ++Line->Level;
464       }
465       SwitchLabelEncountered = true;
466       parseStructuralElement();
467       break;
468     case tok::l_square:
469       if (Style.isCSharp()) {
470         nextToken();
471         parseCSharpAttribute();
472         break;
473       }
474       if (handleCppAttributes())
475         break;
476       [[fallthrough]];
477     default:
478       ParseDefault();
479       break;
480     }
481   } while (!eof());
482 
483   return false;
484 }
485 
calculateBraceTypes(bool ExpectClassBody)486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
487   // We'll parse forward through the tokens until we hit
488   // a closing brace or eof - note that getNextToken() will
489   // parse macros, so this will magically work inside macro
490   // definitions, too.
491   unsigned StoredPosition = Tokens->getPosition();
492   FormatToken *Tok = FormatTok;
493   const FormatToken *PrevTok = Tok->Previous;
494   // Keep a stack of positions of lbrace tokens. We will
495   // update information about whether an lbrace starts a
496   // braced init list or a different block during the loop.
497   struct StackEntry {
498     FormatToken *Tok;
499     const FormatToken *PrevTok;
500   };
501   SmallVector<StackEntry, 8> LBraceStack;
502   assert(Tok->is(tok::l_brace));
503 
504   do {
505     auto *NextTok = Tokens->getNextNonComment();
506 
507     if (!Line->InMacroBody && !Style.isTableGen()) {
508       // Skip PPDirective lines and comments.
509       while (NextTok->is(tok::hash)) {
510         NextTok = Tokens->getNextToken();
511         if (NextTok->is(tok::pp_not_keyword))
512           break;
513         do {
514           NextTok = Tokens->getNextToken();
515         } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
516 
517         while (NextTok->is(tok::comment))
518           NextTok = Tokens->getNextToken();
519       }
520     }
521 
522     switch (Tok->Tok.getKind()) {
523     case tok::l_brace:
524       if (Style.isJavaScript() && PrevTok) {
525         if (PrevTok->isOneOf(tok::colon, tok::less)) {
526           // A ':' indicates this code is in a type, or a braced list
527           // following a label in an object literal ({a: {b: 1}}).
528           // A '<' could be an object used in a comparison, but that is nonsense
529           // code (can never return true), so more likely it is a generic type
530           // argument (`X<{a: string; b: number}>`).
531           // The code below could be confused by semicolons between the
532           // individual members in a type member list, which would normally
533           // trigger BK_Block. In both cases, this must be parsed as an inline
534           // braced init.
535           Tok->setBlockKind(BK_BracedInit);
536         } else if (PrevTok->is(tok::r_paren)) {
537           // `) { }` can only occur in function or method declarations in JS.
538           Tok->setBlockKind(BK_Block);
539         }
540       } else {
541         Tok->setBlockKind(BK_Unknown);
542       }
543       LBraceStack.push_back({Tok, PrevTok});
544       break;
545     case tok::r_brace:
546       if (LBraceStack.empty())
547         break;
548       if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
549         bool ProbablyBracedList = false;
550         if (Style.Language == FormatStyle::LK_Proto) {
551           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
552         } else if (LBrace->isNot(TT_EnumLBrace)) {
553           // Using OriginalColumn to distinguish between ObjC methods and
554           // binary operators is a bit hacky.
555           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
556                                   NextTok->OriginalColumn == 0;
557 
558           // Try to detect a braced list. Note that regardless how we mark inner
559           // braces here, we will overwrite the BlockKind later if we parse a
560           // braced list (where all blocks inside are by default braced lists),
561           // or when we explicitly detect blocks (for example while parsing
562           // lambdas).
563 
564           // If we already marked the opening brace as braced list, the closing
565           // must also be part of it.
566           ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
567 
568           ProbablyBracedList = ProbablyBracedList ||
569                                (Style.isJavaScript() &&
570                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
571                                                  Keywords.kw_as));
572           ProbablyBracedList =
573               ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
574                                                NextTok->is(tok::l_paren)));
575 
576           // If there is a comma, semicolon or right paren after the closing
577           // brace, we assume this is a braced initializer list.
578           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
579           // braced list in JS.
580           ProbablyBracedList =
581               ProbablyBracedList ||
582               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
583                                tok::r_paren, tok::r_square, tok::ellipsis);
584 
585           // Distinguish between braced list in a constructor initializer list
586           // followed by constructor body, or just adjacent blocks.
587           ProbablyBracedList =
588               ProbablyBracedList ||
589               (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
590                LBraceStack.back().PrevTok->isOneOf(tok::identifier,
591                                                    tok::greater));
592 
593           ProbablyBracedList =
594               ProbablyBracedList ||
595               (NextTok->is(tok::identifier) &&
596                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
597 
598           ProbablyBracedList = ProbablyBracedList ||
599                                (NextTok->is(tok::semi) &&
600                                 (!ExpectClassBody || LBraceStack.size() != 1));
601 
602           ProbablyBracedList =
603               ProbablyBracedList ||
604               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
605 
606           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
607             // We can have an array subscript after a braced init
608             // list, but C++11 attributes are expected after blocks.
609             NextTok = Tokens->getNextToken();
610             ProbablyBracedList = NextTok->isNot(tok::l_square);
611           }
612 
613           // Cpp macro definition body that is a nonempty braced list or block:
614           if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
615               !FormatTok->Previous && NextTok->is(tok::eof) &&
616               // A statement can end with only `;` (simple statement), a block
617               // closing brace (compound statement), or `:` (label statement).
618               // If PrevTok is a block opening brace, Tok ends an empty block.
619               !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
620             ProbablyBracedList = true;
621           }
622         }
623         const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
624         Tok->setBlockKind(BlockKind);
625         LBrace->setBlockKind(BlockKind);
626       }
627       LBraceStack.pop_back();
628       break;
629     case tok::identifier:
630       if (Tok->isNot(TT_StatementMacro))
631         break;
632       [[fallthrough]];
633     case tok::at:
634     case tok::semi:
635     case tok::kw_if:
636     case tok::kw_while:
637     case tok::kw_for:
638     case tok::kw_switch:
639     case tok::kw_try:
640     case tok::kw___try:
641       if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
642         LBraceStack.back().Tok->setBlockKind(BK_Block);
643       break;
644     default:
645       break;
646     }
647 
648     PrevTok = Tok;
649     Tok = NextTok;
650   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
651 
652   // Assume other blocks for all unclosed opening braces.
653   for (const auto &Entry : LBraceStack)
654     if (Entry.Tok->is(BK_Unknown))
655       Entry.Tok->setBlockKind(BK_Block);
656 
657   FormatTok = Tokens->setPosition(StoredPosition);
658 }
659 
660 // Sets the token type of the directly previous right brace.
setPreviousRBraceType(TokenType Type)661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
662   if (auto Prev = FormatTok->getPreviousNonComment();
663       Prev && Prev->is(tok::r_brace)) {
664     Prev->setFinalizedType(Type);
665   }
666 }
667 
668 template <class T>
hash_combine(std::size_t & seed,const T & v)669 static inline void hash_combine(std::size_t &seed, const T &v) {
670   std::hash<T> hasher;
671   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
672 }
673 
computePPHash() const674 size_t UnwrappedLineParser::computePPHash() const {
675   size_t h = 0;
676   for (const auto &i : PPStack) {
677     hash_combine(h, size_t(i.Kind));
678     hash_combine(h, i.Line);
679   }
680   return h;
681 }
682 
683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
684 // is not null, subtracts its length (plus the preceding space) when computing
685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
686 // running the token annotator on it so that we can restore them afterward.
mightFitOnOneLine(UnwrappedLine & ParsedLine,const FormatToken * OpeningBrace) const687 bool UnwrappedLineParser::mightFitOnOneLine(
688     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
689   const auto ColumnLimit = Style.ColumnLimit;
690   if (ColumnLimit == 0)
691     return true;
692 
693   auto &Tokens = ParsedLine.Tokens;
694   assert(!Tokens.empty());
695 
696   const auto *LastToken = Tokens.back().Tok;
697   assert(LastToken);
698 
699   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
700 
701   int Index = 0;
702   for (const auto &Token : Tokens) {
703     assert(Token.Tok);
704     auto &SavedToken = SavedTokens[Index++];
705     SavedToken.Tok = new FormatToken;
706     SavedToken.Tok->copyFrom(*Token.Tok);
707     SavedToken.Children = std::move(Token.Children);
708   }
709 
710   AnnotatedLine Line(ParsedLine);
711   assert(Line.Last == LastToken);
712 
713   TokenAnnotator Annotator(Style, Keywords);
714   Annotator.annotate(Line);
715   Annotator.calculateFormattingInformation(Line);
716 
717   auto Length = LastToken->TotalLength;
718   if (OpeningBrace) {
719     assert(OpeningBrace != Tokens.front().Tok);
720     if (auto Prev = OpeningBrace->Previous;
721         Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
722       Length -= ColumnLimit;
723     }
724     Length -= OpeningBrace->TokenText.size() + 1;
725   }
726 
727   if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
728     assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
729     Length -= FirstToken->TokenText.size() + 1;
730   }
731 
732   Index = 0;
733   for (auto &Token : Tokens) {
734     const auto &SavedToken = SavedTokens[Index++];
735     Token.Tok->copyFrom(*SavedToken.Tok);
736     Token.Children = std::move(SavedToken.Children);
737     delete SavedToken.Tok;
738   }
739 
740   // If these change PPLevel needs to be used for get correct indentation.
741   assert(!Line.InMacroBody);
742   assert(!Line.InPPDirective);
743   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
744 }
745 
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool KeepBraces,IfStmtKind * IfKind,bool UnindentWhitesmithsBraces)746 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
747                                              unsigned AddLevels, bool MunchSemi,
748                                              bool KeepBraces,
749                                              IfStmtKind *IfKind,
750                                              bool UnindentWhitesmithsBraces) {
751   auto HandleVerilogBlockLabel = [this]() {
752     // ":" name
753     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
754       nextToken();
755       if (Keywords.isVerilogIdentifier(*FormatTok))
756         nextToken();
757     }
758   };
759 
760   // Whether this is a Verilog-specific block that has a special header like a
761   // module.
762   const bool VerilogHierarchy =
763       Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
764   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
765           (Style.isVerilog() &&
766            (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
767          "'{' or macro block token expected");
768   FormatToken *Tok = FormatTok;
769   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
770   auto Index = CurrentLines->size();
771   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
772   FormatTok->setBlockKind(BK_Block);
773 
774   // For Whitesmiths mode, jump to the next level prior to skipping over the
775   // braces.
776   if (!VerilogHierarchy && AddLevels > 0 &&
777       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
778     ++Line->Level;
779   }
780 
781   size_t PPStartHash = computePPHash();
782 
783   const unsigned InitialLevel = Line->Level;
784   if (VerilogHierarchy) {
785     AddLevels += parseVerilogHierarchyHeader();
786   } else {
787     nextToken(/*LevelDifference=*/AddLevels);
788     HandleVerilogBlockLabel();
789   }
790 
791   // Bail out if there are too many levels. Otherwise, the stack might overflow.
792   if (Line->Level > 300)
793     return nullptr;
794 
795   if (MacroBlock && FormatTok->is(tok::l_paren))
796     parseParens();
797 
798   size_t NbPreprocessorDirectives =
799       !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
800   addUnwrappedLine();
801   size_t OpeningLineIndex =
802       CurrentLines->empty()
803           ? (UnwrappedLine::kInvalidIndex)
804           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
805 
806   // Whitesmiths is weird here. The brace needs to be indented for the namespace
807   // block, but the block itself may not be indented depending on the style
808   // settings. This allows the format to back up one level in those cases.
809   if (UnindentWhitesmithsBraces)
810     --Line->Level;
811 
812   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
813                                           MustBeDeclaration);
814   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
815     Line->Level += AddLevels;
816 
817   FormatToken *IfLBrace = nullptr;
818   const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
819 
820   if (eof())
821     return IfLBrace;
822 
823   if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
824                  : FormatTok->isNot(tok::r_brace)) {
825     Line->Level = InitialLevel;
826     FormatTok->setBlockKind(BK_Block);
827     return IfLBrace;
828   }
829 
830   if (FormatTok->is(tok::r_brace)) {
831     FormatTok->setBlockKind(BK_Block);
832     if (Tok->is(TT_NamespaceLBrace))
833       FormatTok->setFinalizedType(TT_NamespaceRBrace);
834   }
835 
836   const bool IsFunctionRBrace =
837       FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
838 
839   auto RemoveBraces = [=]() mutable {
840     if (!SimpleBlock)
841       return false;
842     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
843     assert(FormatTok->is(tok::r_brace));
844     const bool WrappedOpeningBrace = !Tok->Previous;
845     if (WrappedOpeningBrace && FollowedByComment)
846       return false;
847     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
848     if (KeepBraces && !HasRequiredIfBraces)
849       return false;
850     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
851       const FormatToken *Previous = Tokens->getPreviousToken();
852       assert(Previous);
853       if (Previous->is(tok::r_brace) && !Previous->Optional)
854         return false;
855     }
856     assert(!CurrentLines->empty());
857     auto &LastLine = CurrentLines->back();
858     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
859       return false;
860     if (Tok->is(TT_ElseLBrace))
861       return true;
862     if (WrappedOpeningBrace) {
863       assert(Index > 0);
864       --Index; // The line above the wrapped l_brace.
865       Tok = nullptr;
866     }
867     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
868   };
869   if (RemoveBraces()) {
870     Tok->MatchingParen = FormatTok;
871     FormatTok->MatchingParen = Tok;
872   }
873 
874   size_t PPEndHash = computePPHash();
875 
876   // Munch the closing brace.
877   nextToken(/*LevelDifference=*/-AddLevels);
878 
879   // When this is a function block and there is an unnecessary semicolon
880   // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
881   // it later).
882   if (Style.RemoveSemicolon && IsFunctionRBrace) {
883     while (FormatTok->is(tok::semi)) {
884       FormatTok->Optional = true;
885       nextToken();
886     }
887   }
888 
889   HandleVerilogBlockLabel();
890 
891   if (MacroBlock && FormatTok->is(tok::l_paren))
892     parseParens();
893 
894   Line->Level = InitialLevel;
895 
896   if (FormatTok->is(tok::kw_noexcept)) {
897     // A noexcept in a requires expression.
898     nextToken();
899   }
900 
901   if (FormatTok->is(tok::arrow)) {
902     // Following the } or noexcept we can find a trailing return type arrow
903     // as part of an implicit conversion constraint.
904     nextToken();
905     parseStructuralElement();
906   }
907 
908   if (MunchSemi && FormatTok->is(tok::semi))
909     nextToken();
910 
911   if (PPStartHash == PPEndHash) {
912     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
913     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
914       // Update the opening line to add the forward reference as well
915       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
916           CurrentLines->size() - 1;
917     }
918   }
919 
920   return IfLBrace;
921 }
922 
isGoogScope(const UnwrappedLine & Line)923 static bool isGoogScope(const UnwrappedLine &Line) {
924   // FIXME: Closure-library specific stuff should not be hard-coded but be
925   // configurable.
926   if (Line.Tokens.size() < 4)
927     return false;
928   auto I = Line.Tokens.begin();
929   if (I->Tok->TokenText != "goog")
930     return false;
931   ++I;
932   if (I->Tok->isNot(tok::period))
933     return false;
934   ++I;
935   if (I->Tok->TokenText != "scope")
936     return false;
937   ++I;
938   return I->Tok->is(tok::l_paren);
939 }
940 
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)941 static bool isIIFE(const UnwrappedLine &Line,
942                    const AdditionalKeywords &Keywords) {
943   // Look for the start of an immediately invoked anonymous function.
944   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
945   // This is commonly done in JavaScript to create a new, anonymous scope.
946   // Example: (function() { ... })()
947   if (Line.Tokens.size() < 3)
948     return false;
949   auto I = Line.Tokens.begin();
950   if (I->Tok->isNot(tok::l_paren))
951     return false;
952   ++I;
953   if (I->Tok->isNot(Keywords.kw_function))
954     return false;
955   ++I;
956   return I->Tok->is(tok::l_paren);
957 }
958 
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)959 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
960                                    const FormatToken &InitialToken) {
961   tok::TokenKind Kind = InitialToken.Tok.getKind();
962   if (InitialToken.is(TT_NamespaceMacro))
963     Kind = tok::kw_namespace;
964 
965   switch (Kind) {
966   case tok::kw_namespace:
967     return Style.BraceWrapping.AfterNamespace;
968   case tok::kw_class:
969     return Style.BraceWrapping.AfterClass;
970   case tok::kw_union:
971     return Style.BraceWrapping.AfterUnion;
972   case tok::kw_struct:
973     return Style.BraceWrapping.AfterStruct;
974   case tok::kw_enum:
975     return Style.BraceWrapping.AfterEnum;
976   default:
977     return false;
978   }
979 }
980 
parseChildBlock()981 void UnwrappedLineParser::parseChildBlock() {
982   assert(FormatTok->is(tok::l_brace));
983   FormatTok->setBlockKind(BK_Block);
984   const FormatToken *OpeningBrace = FormatTok;
985   nextToken();
986   {
987     bool SkipIndent = (Style.isJavaScript() &&
988                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
989     ScopedLineState LineState(*this);
990     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
991                                             /*MustBeDeclaration=*/false);
992     Line->Level += SkipIndent ? 0 : 1;
993     parseLevel(OpeningBrace);
994     flushComments(isOnNewLine(*FormatTok));
995     Line->Level -= SkipIndent ? 0 : 1;
996   }
997   nextToken();
998 }
999 
parsePPDirective()1000 void UnwrappedLineParser::parsePPDirective() {
1001   assert(FormatTok->is(tok::hash) && "'#' expected");
1002   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1003 
1004   nextToken();
1005 
1006   if (!FormatTok->Tok.getIdentifierInfo()) {
1007     parsePPUnknown();
1008     return;
1009   }
1010 
1011   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1012   case tok::pp_define:
1013     parsePPDefine();
1014     return;
1015   case tok::pp_if:
1016     parsePPIf(/*IfDef=*/false);
1017     break;
1018   case tok::pp_ifdef:
1019   case tok::pp_ifndef:
1020     parsePPIf(/*IfDef=*/true);
1021     break;
1022   case tok::pp_else:
1023   case tok::pp_elifdef:
1024   case tok::pp_elifndef:
1025   case tok::pp_elif:
1026     parsePPElse();
1027     break;
1028   case tok::pp_endif:
1029     parsePPEndIf();
1030     break;
1031   case tok::pp_pragma:
1032     parsePPPragma();
1033     break;
1034   default:
1035     parsePPUnknown();
1036     break;
1037   }
1038 }
1039 
conditionalCompilationCondition(bool Unreachable)1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1041   size_t Line = CurrentLines->size();
1042   if (CurrentLines == &PreprocessorDirectives)
1043     Line += Lines.size();
1044 
1045   if (Unreachable ||
1046       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1047     PPStack.push_back({PP_Unreachable, Line});
1048   } else {
1049     PPStack.push_back({PP_Conditional, Line});
1050   }
1051 }
1052 
conditionalCompilationStart(bool Unreachable)1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1054   ++PPBranchLevel;
1055   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1056   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1057     PPLevelBranchIndex.push_back(0);
1058     PPLevelBranchCount.push_back(0);
1059   }
1060   PPChainBranchIndex.push(Unreachable ? -1 : 0);
1061   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1062   conditionalCompilationCondition(Unreachable || Skip);
1063 }
1064 
conditionalCompilationAlternative()1065 void UnwrappedLineParser::conditionalCompilationAlternative() {
1066   if (!PPStack.empty())
1067     PPStack.pop_back();
1068   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1069   if (!PPChainBranchIndex.empty())
1070     ++PPChainBranchIndex.top();
1071   conditionalCompilationCondition(
1072       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1073       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1074 }
1075 
conditionalCompilationEnd()1076 void UnwrappedLineParser::conditionalCompilationEnd() {
1077   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1078   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1079     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1080       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1081   }
1082   // Guard against #endif's without #if.
1083   if (PPBranchLevel > -1)
1084     --PPBranchLevel;
1085   if (!PPChainBranchIndex.empty())
1086     PPChainBranchIndex.pop();
1087   if (!PPStack.empty())
1088     PPStack.pop_back();
1089 }
1090 
parsePPIf(bool IfDef)1091 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1092   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1093   nextToken();
1094   bool Unreachable = false;
1095   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1096     Unreachable = true;
1097   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1098     Unreachable = true;
1099   conditionalCompilationStart(Unreachable);
1100   FormatToken *IfCondition = FormatTok;
1101   // If there's a #ifndef on the first line, and the only lines before it are
1102   // comments, it could be an include guard.
1103   bool MaybeIncludeGuard = IfNDef;
1104   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1105     for (auto &Line : Lines) {
1106       if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1107         MaybeIncludeGuard = false;
1108         IncludeGuard = IG_Rejected;
1109         break;
1110       }
1111     }
1112   }
1113   --PPBranchLevel;
1114   parsePPUnknown();
1115   ++PPBranchLevel;
1116   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1117     IncludeGuard = IG_IfNdefed;
1118     IncludeGuardToken = IfCondition;
1119   }
1120 }
1121 
parsePPElse()1122 void UnwrappedLineParser::parsePPElse() {
1123   // If a potential include guard has an #else, it's not an include guard.
1124   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1125     IncludeGuard = IG_Rejected;
1126   // Don't crash when there is an #else without an #if.
1127   assert(PPBranchLevel >= -1);
1128   if (PPBranchLevel == -1)
1129     conditionalCompilationStart(/*Unreachable=*/true);
1130   conditionalCompilationAlternative();
1131   --PPBranchLevel;
1132   parsePPUnknown();
1133   ++PPBranchLevel;
1134 }
1135 
parsePPEndIf()1136 void UnwrappedLineParser::parsePPEndIf() {
1137   conditionalCompilationEnd();
1138   parsePPUnknown();
1139   // If the #endif of a potential include guard is the last thing in the file,
1140   // then we found an include guard.
1141   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1142       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1143     IncludeGuard = IG_Found;
1144   }
1145 }
1146 
parsePPDefine()1147 void UnwrappedLineParser::parsePPDefine() {
1148   nextToken();
1149 
1150   if (!FormatTok->Tok.getIdentifierInfo()) {
1151     IncludeGuard = IG_Rejected;
1152     IncludeGuardToken = nullptr;
1153     parsePPUnknown();
1154     return;
1155   }
1156 
1157   if (IncludeGuard == IG_IfNdefed &&
1158       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1159     IncludeGuard = IG_Defined;
1160     IncludeGuardToken = nullptr;
1161     for (auto &Line : Lines) {
1162       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1163         IncludeGuard = IG_Rejected;
1164         break;
1165       }
1166     }
1167   }
1168 
1169   // In the context of a define, even keywords should be treated as normal
1170   // identifiers. Setting the kind to identifier is not enough, because we need
1171   // to treat additional keywords like __except as well, which are already
1172   // identifiers. Setting the identifier info to null interferes with include
1173   // guard processing above, and changes preprocessing nesting.
1174   FormatTok->Tok.setKind(tok::identifier);
1175   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1176   nextToken();
1177   if (FormatTok->Tok.getKind() == tok::l_paren &&
1178       !FormatTok->hasWhitespaceBefore()) {
1179     parseParens();
1180   }
1181   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1182     Line->Level += PPBranchLevel + 1;
1183   addUnwrappedLine();
1184   ++Line->Level;
1185 
1186   Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1187   assert((int)Line->PPLevel >= 0);
1188   Line->InMacroBody = true;
1189 
1190   if (Style.SkipMacroDefinitionBody) {
1191     while (!eof()) {
1192       FormatTok->Finalized = true;
1193       FormatTok = Tokens->getNextToken();
1194     }
1195     addUnwrappedLine();
1196     return;
1197   }
1198 
1199   // Errors during a preprocessor directive can only affect the layout of the
1200   // preprocessor directive, and thus we ignore them. An alternative approach
1201   // would be to use the same approach we use on the file level (no
1202   // re-indentation if there was a structural error) within the macro
1203   // definition.
1204   parseFile();
1205 }
1206 
parsePPPragma()1207 void UnwrappedLineParser::parsePPPragma() {
1208   Line->InPragmaDirective = true;
1209   parsePPUnknown();
1210 }
1211 
parsePPUnknown()1212 void UnwrappedLineParser::parsePPUnknown() {
1213   do {
1214     nextToken();
1215   } while (!eof());
1216   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1217     Line->Level += PPBranchLevel + 1;
1218   addUnwrappedLine();
1219 }
1220 
1221 // Here we exclude certain tokens that are not usually the first token in an
1222 // unwrapped line. This is used in attempt to distinguish macro calls without
1223 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)1224 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1225   // Semicolon can be a null-statement, l_square can be a start of a macro or
1226   // a C++11 attribute, but this doesn't seem to be common.
1227   return !Tok.isOneOf(tok::semi, tok::l_brace,
1228                       // Tokens that can only be used as binary operators and a
1229                       // part of overloaded operator names.
1230                       tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1231                       tok::less, tok::greater, tok::slash, tok::percent,
1232                       tok::lessless, tok::greatergreater, tok::equal,
1233                       tok::plusequal, tok::minusequal, tok::starequal,
1234                       tok::slashequal, tok::percentequal, tok::ampequal,
1235                       tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1236                       tok::lesslessequal,
1237                       // Colon is used in labels, base class lists, initializer
1238                       // lists, range-based for loops, ternary operator, but
1239                       // should never be the first token in an unwrapped line.
1240                       tok::colon,
1241                       // 'noexcept' is a trailing annotation.
1242                       tok::kw_noexcept);
1243 }
1244 
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1245 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1246                           const FormatToken *FormatTok) {
1247   // FIXME: This returns true for C/C++ keywords like 'struct'.
1248   return FormatTok->is(tok::identifier) &&
1249          (!FormatTok->Tok.getIdentifierInfo() ||
1250           !FormatTok->isOneOf(
1251               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1252               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1253               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1254               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1255               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1256               Keywords.kw_instanceof, Keywords.kw_interface,
1257               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1258 }
1259 
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1261                                  const FormatToken *FormatTok) {
1262   return FormatTok->Tok.isLiteral() ||
1263          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1264          mustBeJSIdent(Keywords, FormatTok);
1265 }
1266 
1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1268 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1269 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1270                            const FormatToken *FormatTok) {
1271   return FormatTok->isOneOf(
1272       tok::kw_return, Keywords.kw_yield,
1273       // conditionals
1274       tok::kw_if, tok::kw_else,
1275       // loops
1276       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1277       // switch/case
1278       tok::kw_switch, tok::kw_case,
1279       // exceptions
1280       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1281       // declaration
1282       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1283       Keywords.kw_async, Keywords.kw_function,
1284       // import/export
1285       Keywords.kw_import, tok::kw_export);
1286 }
1287 
1288 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)1289 static bool isC78Type(const FormatToken &Tok) {
1290   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1291                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1292                      tok::identifier);
1293 }
1294 
1295 // This function checks whether a token starts the first parameter declaration
1296 // in a K&R C (aka C78) function definition, e.g.:
1297 //   int f(a, b)
1298 //   short a, b;
1299 //   {
1300 //      return a + b;
1301 //   }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1302 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1303                                const FormatToken *FuncName) {
1304   assert(Tok);
1305   assert(Next);
1306   assert(FuncName);
1307 
1308   if (FuncName->isNot(tok::identifier))
1309     return false;
1310 
1311   const FormatToken *Prev = FuncName->Previous;
1312   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1313     return false;
1314 
1315   if (!isC78Type(*Tok) &&
1316       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1317     return false;
1318   }
1319 
1320   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1321     return false;
1322 
1323   Tok = Tok->Previous;
1324   if (!Tok || Tok->isNot(tok::r_paren))
1325     return false;
1326 
1327   Tok = Tok->Previous;
1328   if (!Tok || Tok->isNot(tok::identifier))
1329     return false;
1330 
1331   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1332 }
1333 
parseModuleImport()1334 bool UnwrappedLineParser::parseModuleImport() {
1335   assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1336 
1337   if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1338       !Token->Tok.getIdentifierInfo() &&
1339       !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1340     return false;
1341   }
1342 
1343   nextToken();
1344   while (!eof()) {
1345     if (FormatTok->is(tok::colon)) {
1346       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1347     }
1348     // Handle import <foo/bar.h> as we would an include statement.
1349     else if (FormatTok->is(tok::less)) {
1350       nextToken();
1351       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1352         // Mark tokens up to the trailing line comments as implicit string
1353         // literals.
1354         if (FormatTok->isNot(tok::comment) &&
1355             !FormatTok->TokenText.starts_with("//")) {
1356           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1357         }
1358         nextToken();
1359       }
1360     }
1361     if (FormatTok->is(tok::semi)) {
1362       nextToken();
1363       break;
1364     }
1365     nextToken();
1366   }
1367 
1368   addUnwrappedLine();
1369   return true;
1370 }
1371 
1372 // readTokenWithJavaScriptASI reads the next token and terminates the current
1373 // line if JavaScript Automatic Semicolon Insertion must
1374 // happen between the current token and the next token.
1375 //
1376 // This method is conservative - it cannot cover all edge cases of JavaScript,
1377 // but only aims to correctly handle certain well known cases. It *must not*
1378 // return true in speculative cases.
readTokenWithJavaScriptASI()1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1380   FormatToken *Previous = FormatTok;
1381   readToken();
1382   FormatToken *Next = FormatTok;
1383 
1384   bool IsOnSameLine =
1385       CommentsBeforeNextToken.empty()
1386           ? Next->NewlinesBefore == 0
1387           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1388   if (IsOnSameLine)
1389     return;
1390 
1391   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1392   bool PreviousStartsTemplateExpr =
1393       Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1394   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1395     // If the line contains an '@' sign, the previous token might be an
1396     // annotation, which can precede another identifier/value.
1397     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1398       return LineNode.Tok->is(tok::at);
1399     });
1400     if (HasAt)
1401       return;
1402   }
1403   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1404     return addUnwrappedLine();
1405   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1406   bool NextEndsTemplateExpr =
1407       Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1408   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1409       (PreviousMustBeValue ||
1410        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1411                          tok::minusminus))) {
1412     return addUnwrappedLine();
1413   }
1414   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1415       isJSDeclOrStmt(Keywords, Next)) {
1416     return addUnwrappedLine();
1417   }
1418 }
1419 
parseStructuralElement(const FormatToken * OpeningBrace,IfStmtKind * IfKind,FormatToken ** IfLeftBrace,bool * HasDoWhile,bool * HasLabel)1420 void UnwrappedLineParser::parseStructuralElement(
1421     const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1422     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1423   if (Style.Language == FormatStyle::LK_TableGen &&
1424       FormatTok->is(tok::pp_include)) {
1425     nextToken();
1426     if (FormatTok->is(tok::string_literal))
1427       nextToken();
1428     addUnwrappedLine();
1429     return;
1430   }
1431 
1432   if (IsCpp) {
1433     while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1434     }
1435   } else if (Style.isVerilog()) {
1436     if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1437       parseForOrWhileLoop(/*HasParens=*/false);
1438       return;
1439     }
1440     if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1441       parseForOrWhileLoop();
1442       return;
1443     }
1444     if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1445                            Keywords.kw_assume, Keywords.kw_cover)) {
1446       parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1447       return;
1448     }
1449 
1450     // Skip things that can exist before keywords like 'if' and 'case'.
1451     while (true) {
1452       if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1453                              Keywords.kw_unique0)) {
1454         nextToken();
1455       } else if (FormatTok->is(tok::l_paren) &&
1456                  Tokens->peekNextToken()->is(tok::star)) {
1457         parseParens();
1458       } else {
1459         break;
1460       }
1461     }
1462   }
1463 
1464   // Tokens that only make sense at the beginning of a line.
1465   if (FormatTok->isAccessSpecifierKeyword()) {
1466     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1467         Style.isCSharp()) {
1468       nextToken();
1469     } else {
1470       parseAccessSpecifier();
1471     }
1472     return;
1473   }
1474   switch (FormatTok->Tok.getKind()) {
1475   case tok::kw_asm:
1476     nextToken();
1477     if (FormatTok->is(tok::l_brace)) {
1478       FormatTok->setFinalizedType(TT_InlineASMBrace);
1479       nextToken();
1480       while (FormatTok && !eof()) {
1481         if (FormatTok->is(tok::r_brace)) {
1482           FormatTok->setFinalizedType(TT_InlineASMBrace);
1483           nextToken();
1484           addUnwrappedLine();
1485           break;
1486         }
1487         FormatTok->Finalized = true;
1488         nextToken();
1489       }
1490     }
1491     break;
1492   case tok::kw_namespace:
1493     parseNamespace();
1494     return;
1495   case tok::kw_if: {
1496     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497       // field/method declaration.
1498       break;
1499     }
1500     FormatToken *Tok = parseIfThenElse(IfKind);
1501     if (IfLeftBrace)
1502       *IfLeftBrace = Tok;
1503     return;
1504   }
1505   case tok::kw_for:
1506   case tok::kw_while:
1507     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1508       // field/method declaration.
1509       break;
1510     }
1511     parseForOrWhileLoop();
1512     return;
1513   case tok::kw_do:
1514     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1515       // field/method declaration.
1516       break;
1517     }
1518     parseDoWhile();
1519     if (HasDoWhile)
1520       *HasDoWhile = true;
1521     return;
1522   case tok::kw_switch:
1523     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524       // 'switch: string' field declaration.
1525       break;
1526     }
1527     parseSwitch(/*IsExpr=*/false);
1528     return;
1529   case tok::kw_default: {
1530     // In Verilog default along with other labels are handled in the next loop.
1531     if (Style.isVerilog())
1532       break;
1533     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534       // 'default: string' field declaration.
1535       break;
1536     }
1537     auto *Default = FormatTok;
1538     nextToken();
1539     if (FormatTok->is(tok::colon)) {
1540       FormatTok->setFinalizedType(TT_CaseLabelColon);
1541       parseLabel();
1542       return;
1543     }
1544     if (FormatTok->is(tok::arrow)) {
1545       FormatTok->setFinalizedType(TT_CaseLabelArrow);
1546       Default->setFinalizedType(TT_SwitchExpressionLabel);
1547       parseLabel();
1548       return;
1549     }
1550     // e.g. "default void f() {}" in a Java interface.
1551     break;
1552   }
1553   case tok::kw_case:
1554     // Proto: there are no switch/case statements.
1555     if (Style.Language == FormatStyle::LK_Proto) {
1556       nextToken();
1557       return;
1558     }
1559     if (Style.isVerilog()) {
1560       parseBlock();
1561       addUnwrappedLine();
1562       return;
1563     }
1564     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1565       // 'case: string' field declaration.
1566       nextToken();
1567       break;
1568     }
1569     parseCaseLabel();
1570     return;
1571   case tok::kw_try:
1572   case tok::kw___try:
1573     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1574       // field/method declaration.
1575       break;
1576     }
1577     parseTryCatch();
1578     return;
1579   case tok::kw_extern:
1580     nextToken();
1581     if (Style.isVerilog()) {
1582       // In Verilog and extern module declaration looks like a start of module.
1583       // But there is no body and endmodule. So we handle it separately.
1584       if (Keywords.isVerilogHierarchy(*FormatTok)) {
1585         parseVerilogHierarchyHeader();
1586         return;
1587       }
1588     } else if (FormatTok->is(tok::string_literal)) {
1589       nextToken();
1590       if (FormatTok->is(tok::l_brace)) {
1591         if (Style.BraceWrapping.AfterExternBlock)
1592           addUnwrappedLine();
1593         // Either we indent or for backwards compatibility we follow the
1594         // AfterExternBlock style.
1595         unsigned AddLevels =
1596             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1597                     (Style.BraceWrapping.AfterExternBlock &&
1598                      Style.IndentExternBlock ==
1599                          FormatStyle::IEBS_AfterExternBlock)
1600                 ? 1u
1601                 : 0u;
1602         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1603         addUnwrappedLine();
1604         return;
1605       }
1606     }
1607     break;
1608   case tok::kw_export:
1609     if (Style.isJavaScript()) {
1610       parseJavaScriptEs6ImportExport();
1611       return;
1612     }
1613     if (IsCpp) {
1614       nextToken();
1615       if (FormatTok->is(tok::kw_namespace)) {
1616         parseNamespace();
1617         return;
1618       }
1619       if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1620         return;
1621     }
1622     break;
1623   case tok::kw_inline:
1624     nextToken();
1625     if (FormatTok->is(tok::kw_namespace)) {
1626       parseNamespace();
1627       return;
1628     }
1629     break;
1630   case tok::identifier:
1631     if (FormatTok->is(TT_ForEachMacro)) {
1632       parseForOrWhileLoop();
1633       return;
1634     }
1635     if (FormatTok->is(TT_MacroBlockBegin)) {
1636       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1637                  /*MunchSemi=*/false);
1638       return;
1639     }
1640     if (FormatTok->is(Keywords.kw_import)) {
1641       if (Style.isJavaScript()) {
1642         parseJavaScriptEs6ImportExport();
1643         return;
1644       }
1645       if (Style.Language == FormatStyle::LK_Proto) {
1646         nextToken();
1647         if (FormatTok->is(tok::kw_public))
1648           nextToken();
1649         if (FormatTok->isNot(tok::string_literal))
1650           return;
1651         nextToken();
1652         if (FormatTok->is(tok::semi))
1653           nextToken();
1654         addUnwrappedLine();
1655         return;
1656       }
1657       if (IsCpp && parseModuleImport())
1658         return;
1659     }
1660     if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1661                                     Keywords.kw_slots, Keywords.kw_qslots)) {
1662       nextToken();
1663       if (FormatTok->is(tok::colon)) {
1664         nextToken();
1665         addUnwrappedLine();
1666         return;
1667       }
1668     }
1669     if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1670       parseStatementMacro();
1671       return;
1672     }
1673     if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1674       parseNamespace();
1675       return;
1676     }
1677     // In Verilog labels can be any expression, so we don't do them here.
1678     // JS doesn't have macros, and within classes colons indicate fields, not
1679     // labels.
1680     // TableGen doesn't have labels.
1681     if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1682         Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1683       nextToken();
1684       if (!Line->InMacroBody || CurrentLines->size() > 1)
1685         Line->Tokens.begin()->Tok->MustBreakBefore = true;
1686       FormatTok->setFinalizedType(TT_GotoLabelColon);
1687       parseLabel(!Style.IndentGotoLabels);
1688       if (HasLabel)
1689         *HasLabel = true;
1690       return;
1691     }
1692     // In all other cases, parse the declaration.
1693     break;
1694   default:
1695     break;
1696   }
1697 
1698   for (const bool InRequiresExpression =
1699            OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1700        !eof();) {
1701     if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1702       if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1703           Next && Next->isBinaryOperator()) {
1704         FormatTok->Tok.setKind(tok::identifier);
1705       }
1706     }
1707     const FormatToken *Previous = FormatTok->Previous;
1708     switch (FormatTok->Tok.getKind()) {
1709     case tok::at:
1710       nextToken();
1711       if (FormatTok->is(tok::l_brace)) {
1712         nextToken();
1713         parseBracedList();
1714         break;
1715       } else if (Style.Language == FormatStyle::LK_Java &&
1716                  FormatTok->is(Keywords.kw_interface)) {
1717         nextToken();
1718         break;
1719       }
1720       switch (FormatTok->Tok.getObjCKeywordID()) {
1721       case tok::objc_public:
1722       case tok::objc_protected:
1723       case tok::objc_package:
1724       case tok::objc_private:
1725         return parseAccessSpecifier();
1726       case tok::objc_interface:
1727       case tok::objc_implementation:
1728         return parseObjCInterfaceOrImplementation();
1729       case tok::objc_protocol:
1730         if (parseObjCProtocol())
1731           return;
1732         break;
1733       case tok::objc_end:
1734         return; // Handled by the caller.
1735       case tok::objc_optional:
1736       case tok::objc_required:
1737         nextToken();
1738         addUnwrappedLine();
1739         return;
1740       case tok::objc_autoreleasepool:
1741         nextToken();
1742         if (FormatTok->is(tok::l_brace)) {
1743           if (Style.BraceWrapping.AfterControlStatement ==
1744               FormatStyle::BWACS_Always) {
1745             addUnwrappedLine();
1746           }
1747           parseBlock();
1748         }
1749         addUnwrappedLine();
1750         return;
1751       case tok::objc_synchronized:
1752         nextToken();
1753         if (FormatTok->is(tok::l_paren)) {
1754           // Skip synchronization object
1755           parseParens();
1756         }
1757         if (FormatTok->is(tok::l_brace)) {
1758           if (Style.BraceWrapping.AfterControlStatement ==
1759               FormatStyle::BWACS_Always) {
1760             addUnwrappedLine();
1761           }
1762           parseBlock();
1763         }
1764         addUnwrappedLine();
1765         return;
1766       case tok::objc_try:
1767         // This branch isn't strictly necessary (the kw_try case below would
1768         // do this too after the tok::at is parsed above).  But be explicit.
1769         parseTryCatch();
1770         return;
1771       default:
1772         break;
1773       }
1774       break;
1775     case tok::kw_requires: {
1776       if (IsCpp) {
1777         bool ParsedClause = parseRequires();
1778         if (ParsedClause)
1779           return;
1780       } else {
1781         nextToken();
1782       }
1783       break;
1784     }
1785     case tok::kw_enum:
1786       // Ignore if this is part of "template <enum ..." or "... -> enum" or
1787       // "template <..., enum ...>".
1788       if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1789         nextToken();
1790         break;
1791       }
1792 
1793       // parseEnum falls through and does not yet add an unwrapped line as an
1794       // enum definition can start a structural element.
1795       if (!parseEnum())
1796         break;
1797       // This only applies to C++ and Verilog.
1798       if (!IsCpp && !Style.isVerilog()) {
1799         addUnwrappedLine();
1800         return;
1801       }
1802       break;
1803     case tok::kw_typedef:
1804       nextToken();
1805       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1806                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1807                              Keywords.kw_CF_CLOSED_ENUM,
1808                              Keywords.kw_NS_CLOSED_ENUM)) {
1809         parseEnum();
1810       }
1811       break;
1812     case tok::kw_class:
1813       if (Style.isVerilog()) {
1814         parseBlock();
1815         addUnwrappedLine();
1816         return;
1817       }
1818       if (Style.isTableGen()) {
1819         // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1820         // This is same as def and so on.
1821         nextToken();
1822         break;
1823       }
1824       [[fallthrough]];
1825     case tok::kw_struct:
1826     case tok::kw_union:
1827       if (parseStructLike())
1828         return;
1829       break;
1830     case tok::kw_decltype:
1831       nextToken();
1832       if (FormatTok->is(tok::l_paren)) {
1833         parseParens();
1834         assert(FormatTok->Previous);
1835         if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1836                                               tok::l_paren)) {
1837           Line->SeenDecltypeAuto = true;
1838         }
1839       }
1840       break;
1841     case tok::period:
1842       nextToken();
1843       // In Java, classes have an implicit static member "class".
1844       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1845           FormatTok->is(tok::kw_class)) {
1846         nextToken();
1847       }
1848       if (Style.isJavaScript() && FormatTok &&
1849           FormatTok->Tok.getIdentifierInfo()) {
1850         // JavaScript only has pseudo keywords, all keywords are allowed to
1851         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1852         nextToken();
1853       }
1854       break;
1855     case tok::semi:
1856       nextToken();
1857       addUnwrappedLine();
1858       return;
1859     case tok::r_brace:
1860       addUnwrappedLine();
1861       return;
1862     case tok::l_paren: {
1863       parseParens();
1864       // Break the unwrapped line if a K&R C function definition has a parameter
1865       // declaration.
1866       if (OpeningBrace || !IsCpp || !Previous || eof())
1867         break;
1868       if (isC78ParameterDecl(FormatTok,
1869                              Tokens->peekNextToken(/*SkipComment=*/true),
1870                              Previous)) {
1871         addUnwrappedLine();
1872         return;
1873       }
1874       break;
1875     }
1876     case tok::kw_operator:
1877       nextToken();
1878       if (FormatTok->isBinaryOperator())
1879         nextToken();
1880       break;
1881     case tok::caret:
1882       nextToken();
1883       // Block return type.
1884       if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1885         nextToken();
1886         // Return types: pointers are ok too.
1887         while (FormatTok->is(tok::star))
1888           nextToken();
1889       }
1890       // Block argument list.
1891       if (FormatTok->is(tok::l_paren))
1892         parseParens();
1893       // Block body.
1894       if (FormatTok->is(tok::l_brace))
1895         parseChildBlock();
1896       break;
1897     case tok::l_brace:
1898       if (InRequiresExpression)
1899         FormatTok->setFinalizedType(TT_BracedListLBrace);
1900       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1901         IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1902         // A block outside of parentheses must be the last part of a
1903         // structural element.
1904         // FIXME: Figure out cases where this is not true, and add projections
1905         // for them (the one we know is missing are lambdas).
1906         if (Style.Language == FormatStyle::LK_Java &&
1907             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1908           // If necessary, we could set the type to something different than
1909           // TT_FunctionLBrace.
1910           if (Style.BraceWrapping.AfterControlStatement ==
1911               FormatStyle::BWACS_Always) {
1912             addUnwrappedLine();
1913           }
1914         } else if (Style.BraceWrapping.AfterFunction) {
1915           addUnwrappedLine();
1916         }
1917         if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1918           FormatTok->setFinalizedType(TT_FunctionLBrace);
1919         parseBlock();
1920         IsDecltypeAutoFunction = false;
1921         addUnwrappedLine();
1922         return;
1923       }
1924       // Otherwise this was a braced init list, and the structural
1925       // element continues.
1926       break;
1927     case tok::kw_try:
1928       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1929         // field/method declaration.
1930         nextToken();
1931         break;
1932       }
1933       // We arrive here when parsing function-try blocks.
1934       if (Style.BraceWrapping.AfterFunction)
1935         addUnwrappedLine();
1936       parseTryCatch();
1937       return;
1938     case tok::identifier: {
1939       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1940           Line->MustBeDeclaration) {
1941         addUnwrappedLine();
1942         parseCSharpGenericTypeConstraint();
1943         break;
1944       }
1945       if (FormatTok->is(TT_MacroBlockEnd)) {
1946         addUnwrappedLine();
1947         return;
1948       }
1949 
1950       // Function declarations (as opposed to function expressions) are parsed
1951       // on their own unwrapped line by continuing this loop. Function
1952       // expressions (functions that are not on their own line) must not create
1953       // a new unwrapped line, so they are special cased below.
1954       size_t TokenCount = Line->Tokens.size();
1955       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1956           (TokenCount > 1 ||
1957            (TokenCount == 1 &&
1958             Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1959         tryToParseJSFunction();
1960         break;
1961       }
1962       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1963           FormatTok->is(Keywords.kw_interface)) {
1964         if (Style.isJavaScript()) {
1965           // In JavaScript/TypeScript, "interface" can be used as a standalone
1966           // identifier, e.g. in `var interface = 1;`. If "interface" is
1967           // followed by another identifier, it is very like to be an actual
1968           // interface declaration.
1969           unsigned StoredPosition = Tokens->getPosition();
1970           FormatToken *Next = Tokens->getNextToken();
1971           FormatTok = Tokens->setPosition(StoredPosition);
1972           if (!mustBeJSIdent(Keywords, Next)) {
1973             nextToken();
1974             break;
1975           }
1976         }
1977         parseRecord();
1978         addUnwrappedLine();
1979         return;
1980       }
1981 
1982       if (Style.isVerilog()) {
1983         if (FormatTok->is(Keywords.kw_table)) {
1984           parseVerilogTable();
1985           return;
1986         }
1987         if (Keywords.isVerilogBegin(*FormatTok) ||
1988             Keywords.isVerilogHierarchy(*FormatTok)) {
1989           parseBlock();
1990           addUnwrappedLine();
1991           return;
1992         }
1993       }
1994 
1995       if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
1996         if (parseStructLike())
1997           return;
1998         break;
1999       }
2000 
2001       if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2002         parseStatementMacro();
2003         return;
2004       }
2005 
2006       // See if the following token should start a new unwrapped line.
2007       StringRef Text = FormatTok->TokenText;
2008 
2009       FormatToken *PreviousToken = FormatTok;
2010       nextToken();
2011 
2012       // JS doesn't have macros, and within classes colons indicate fields, not
2013       // labels.
2014       if (Style.isJavaScript())
2015         break;
2016 
2017       auto OneTokenSoFar = [&]() {
2018         auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2019         while (I != E && I->Tok->is(tok::comment))
2020           ++I;
2021         if (Style.isVerilog())
2022           while (I != E && I->Tok->is(tok::hash))
2023             ++I;
2024         return I != E && (++I == E);
2025       };
2026       if (OneTokenSoFar()) {
2027         // Recognize function-like macro usages without trailing semicolon as
2028         // well as free-standing macros like Q_OBJECT.
2029         bool FunctionLike = FormatTok->is(tok::l_paren);
2030         if (FunctionLike)
2031           parseParens();
2032 
2033         bool FollowedByNewline =
2034             CommentsBeforeNextToken.empty()
2035                 ? FormatTok->NewlinesBefore > 0
2036                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2037 
2038         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2039             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2040           if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2041             PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2042           addUnwrappedLine();
2043           return;
2044         }
2045       }
2046       break;
2047     }
2048     case tok::equal:
2049       if ((Style.isJavaScript() || Style.isCSharp()) &&
2050           FormatTok->is(TT_FatArrow)) {
2051         tryToParseChildBlock();
2052         break;
2053       }
2054 
2055       nextToken();
2056       if (FormatTok->is(tok::l_brace)) {
2057         // Block kind should probably be set to BK_BracedInit for any language.
2058         // C# needs this change to ensure that array initialisers and object
2059         // initialisers are indented the same way.
2060         if (Style.isCSharp())
2061           FormatTok->setBlockKind(BK_BracedInit);
2062         // TableGen's defset statement has syntax of the form,
2063         // `defset <type> <name> = { <statement>... }`
2064         if (Style.isTableGen() &&
2065             Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2066           FormatTok->setFinalizedType(TT_FunctionLBrace);
2067           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2068                      /*MunchSemi=*/false);
2069           addUnwrappedLine();
2070           break;
2071         }
2072         nextToken();
2073         parseBracedList();
2074       } else if (Style.Language == FormatStyle::LK_Proto &&
2075                  FormatTok->is(tok::less)) {
2076         nextToken();
2077         parseBracedList(/*IsAngleBracket=*/true);
2078       }
2079       break;
2080     case tok::l_square:
2081       parseSquare();
2082       break;
2083     case tok::kw_new:
2084       parseNew();
2085       break;
2086     case tok::kw_switch:
2087       if (Style.Language == FormatStyle::LK_Java)
2088         parseSwitch(/*IsExpr=*/true);
2089       else
2090         nextToken();
2091       break;
2092     case tok::kw_case:
2093       // Proto: there are no switch/case statements.
2094       if (Style.Language == FormatStyle::LK_Proto) {
2095         nextToken();
2096         return;
2097       }
2098       // In Verilog switch is called case.
2099       if (Style.isVerilog()) {
2100         parseBlock();
2101         addUnwrappedLine();
2102         return;
2103       }
2104       if (Style.isJavaScript() && Line->MustBeDeclaration) {
2105         // 'case: string' field declaration.
2106         nextToken();
2107         break;
2108       }
2109       parseCaseLabel();
2110       break;
2111     case tok::kw_default:
2112       nextToken();
2113       if (Style.isVerilog()) {
2114         if (FormatTok->is(tok::colon)) {
2115           // The label will be handled in the next iteration.
2116           break;
2117         }
2118         if (FormatTok->is(Keywords.kw_clocking)) {
2119           // A default clocking block.
2120           parseBlock();
2121           addUnwrappedLine();
2122           return;
2123         }
2124         parseVerilogCaseLabel();
2125         return;
2126       }
2127       break;
2128     case tok::colon:
2129       nextToken();
2130       if (Style.isVerilog()) {
2131         parseVerilogCaseLabel();
2132         return;
2133       }
2134       break;
2135     case tok::greater:
2136       nextToken();
2137       if (FormatTok->is(tok::l_brace))
2138         FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2139       break;
2140     default:
2141       nextToken();
2142       break;
2143     }
2144   }
2145 }
2146 
tryToParsePropertyAccessor()2147 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2148   assert(FormatTok->is(tok::l_brace));
2149   if (!Style.isCSharp())
2150     return false;
2151   // See if it's a property accessor.
2152   if (FormatTok->Previous->isNot(tok::identifier))
2153     return false;
2154 
2155   // See if we are inside a property accessor.
2156   //
2157   // Record the current tokenPosition so that we can advance and
2158   // reset the current token. `Next` is not set yet so we need
2159   // another way to advance along the token stream.
2160   unsigned int StoredPosition = Tokens->getPosition();
2161   FormatToken *Tok = Tokens->getNextToken();
2162 
2163   // A trivial property accessor is of the form:
2164   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2165   // Track these as they do not require line breaks to be introduced.
2166   bool HasSpecialAccessor = false;
2167   bool IsTrivialPropertyAccessor = true;
2168   while (!eof()) {
2169     if (Tok->isAccessSpecifierKeyword() ||
2170         Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get,
2171                      Keywords.kw_init, Keywords.kw_set)) {
2172       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2173         HasSpecialAccessor = true;
2174       Tok = Tokens->getNextToken();
2175       continue;
2176     }
2177     if (Tok->isNot(tok::r_brace))
2178       IsTrivialPropertyAccessor = false;
2179     break;
2180   }
2181 
2182   if (!HasSpecialAccessor) {
2183     Tokens->setPosition(StoredPosition);
2184     return false;
2185   }
2186 
2187   // Try to parse the property accessor:
2188   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2189   Tokens->setPosition(StoredPosition);
2190   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2191     addUnwrappedLine();
2192   nextToken();
2193   do {
2194     switch (FormatTok->Tok.getKind()) {
2195     case tok::r_brace:
2196       nextToken();
2197       if (FormatTok->is(tok::equal)) {
2198         while (!eof() && FormatTok->isNot(tok::semi))
2199           nextToken();
2200         nextToken();
2201       }
2202       addUnwrappedLine();
2203       return true;
2204     case tok::l_brace:
2205       ++Line->Level;
2206       parseBlock(/*MustBeDeclaration=*/true);
2207       addUnwrappedLine();
2208       --Line->Level;
2209       break;
2210     case tok::equal:
2211       if (FormatTok->is(TT_FatArrow)) {
2212         ++Line->Level;
2213         do {
2214           nextToken();
2215         } while (!eof() && FormatTok->isNot(tok::semi));
2216         nextToken();
2217         addUnwrappedLine();
2218         --Line->Level;
2219         break;
2220       }
2221       nextToken();
2222       break;
2223     default:
2224       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2225                              Keywords.kw_set) &&
2226           !IsTrivialPropertyAccessor) {
2227         // Non-trivial get/set needs to be on its own line.
2228         addUnwrappedLine();
2229       }
2230       nextToken();
2231     }
2232   } while (!eof());
2233 
2234   // Unreachable for well-formed code (paired '{' and '}').
2235   return true;
2236 }
2237 
tryToParseLambda()2238 bool UnwrappedLineParser::tryToParseLambda() {
2239   assert(FormatTok->is(tok::l_square));
2240   if (!IsCpp) {
2241     nextToken();
2242     return false;
2243   }
2244   FormatToken &LSquare = *FormatTok;
2245   if (!tryToParseLambdaIntroducer())
2246     return false;
2247 
2248   bool SeenArrow = false;
2249   bool InTemplateParameterList = false;
2250 
2251   while (FormatTok->isNot(tok::l_brace)) {
2252     if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2253       nextToken();
2254       continue;
2255     }
2256     switch (FormatTok->Tok.getKind()) {
2257     case tok::l_brace:
2258       break;
2259     case tok::l_paren:
2260       parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2261       break;
2262     case tok::l_square:
2263       parseSquare();
2264       break;
2265     case tok::less:
2266       assert(FormatTok->Previous);
2267       if (FormatTok->Previous->is(tok::r_square))
2268         InTemplateParameterList = true;
2269       nextToken();
2270       break;
2271     case tok::kw_auto:
2272     case tok::kw_class:
2273     case tok::kw_struct:
2274     case tok::kw_union:
2275     case tok::kw_template:
2276     case tok::kw_typename:
2277     case tok::amp:
2278     case tok::star:
2279     case tok::kw_const:
2280     case tok::kw_constexpr:
2281     case tok::kw_consteval:
2282     case tok::comma:
2283     case tok::greater:
2284     case tok::identifier:
2285     case tok::numeric_constant:
2286     case tok::coloncolon:
2287     case tok::kw_mutable:
2288     case tok::kw_noexcept:
2289     case tok::kw_static:
2290       nextToken();
2291       break;
2292     // Specialization of a template with an integer parameter can contain
2293     // arithmetic, logical, comparison and ternary operators.
2294     //
2295     // FIXME: This also accepts sequences of operators that are not in the scope
2296     // of a template argument list.
2297     //
2298     // In a C++ lambda a template type can only occur after an arrow. We use
2299     // this as an heuristic to distinguish between Objective-C expressions
2300     // followed by an `a->b` expression, such as:
2301     // ([obj func:arg] + a->b)
2302     // Otherwise the code below would parse as a lambda.
2303     case tok::plus:
2304     case tok::minus:
2305     case tok::exclaim:
2306     case tok::tilde:
2307     case tok::slash:
2308     case tok::percent:
2309     case tok::lessless:
2310     case tok::pipe:
2311     case tok::pipepipe:
2312     case tok::ampamp:
2313     case tok::caret:
2314     case tok::equalequal:
2315     case tok::exclaimequal:
2316     case tok::greaterequal:
2317     case tok::lessequal:
2318     case tok::question:
2319     case tok::colon:
2320     case tok::ellipsis:
2321     case tok::kw_true:
2322     case tok::kw_false:
2323       if (SeenArrow || InTemplateParameterList) {
2324         nextToken();
2325         break;
2326       }
2327       return true;
2328     case tok::arrow:
2329       // This might or might not actually be a lambda arrow (this could be an
2330       // ObjC method invocation followed by a dereferencing arrow). We might
2331       // reset this back to TT_Unknown in TokenAnnotator.
2332       FormatTok->setFinalizedType(TT_LambdaArrow);
2333       SeenArrow = true;
2334       nextToken();
2335       break;
2336     case tok::kw_requires: {
2337       auto *RequiresToken = FormatTok;
2338       nextToken();
2339       parseRequiresClause(RequiresToken);
2340       break;
2341     }
2342     case tok::equal:
2343       if (!InTemplateParameterList)
2344         return true;
2345       nextToken();
2346       break;
2347     default:
2348       return true;
2349     }
2350   }
2351 
2352   FormatTok->setFinalizedType(TT_LambdaLBrace);
2353   LSquare.setFinalizedType(TT_LambdaLSquare);
2354 
2355   NestedLambdas.push_back(Line->SeenDecltypeAuto);
2356   parseChildBlock();
2357   assert(!NestedLambdas.empty());
2358   NestedLambdas.pop_back();
2359 
2360   return true;
2361 }
2362 
tryToParseLambdaIntroducer()2363 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2364   const FormatToken *Previous = FormatTok->Previous;
2365   const FormatToken *LeftSquare = FormatTok;
2366   nextToken();
2367   if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2368                      !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2369                                         tok::kw_co_yield, tok::kw_co_return)) ||
2370                     Previous->closesScope())) ||
2371       LeftSquare->isCppStructuredBinding(IsCpp)) {
2372     return false;
2373   }
2374   if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2375     return false;
2376   if (FormatTok->is(tok::r_square)) {
2377     const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2378     if (Next->is(tok::greater))
2379       return false;
2380   }
2381   parseSquare(/*LambdaIntroducer=*/true);
2382   return true;
2383 }
2384 
tryToParseJSFunction()2385 void UnwrappedLineParser::tryToParseJSFunction() {
2386   assert(FormatTok->is(Keywords.kw_function));
2387   if (FormatTok->is(Keywords.kw_async))
2388     nextToken();
2389   // Consume "function".
2390   nextToken();
2391 
2392   // Consume * (generator function). Treat it like C++'s overloaded operators.
2393   if (FormatTok->is(tok::star)) {
2394     FormatTok->setFinalizedType(TT_OverloadedOperator);
2395     nextToken();
2396   }
2397 
2398   // Consume function name.
2399   if (FormatTok->is(tok::identifier))
2400     nextToken();
2401 
2402   if (FormatTok->isNot(tok::l_paren))
2403     return;
2404 
2405   // Parse formal parameter list.
2406   parseParens();
2407 
2408   if (FormatTok->is(tok::colon)) {
2409     // Parse a type definition.
2410     nextToken();
2411 
2412     // Eat the type declaration. For braced inline object types, balance braces,
2413     // otherwise just parse until finding an l_brace for the function body.
2414     if (FormatTok->is(tok::l_brace))
2415       tryToParseBracedList();
2416     else
2417       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2418         nextToken();
2419   }
2420 
2421   if (FormatTok->is(tok::semi))
2422     return;
2423 
2424   parseChildBlock();
2425 }
2426 
tryToParseBracedList()2427 bool UnwrappedLineParser::tryToParseBracedList() {
2428   if (FormatTok->is(BK_Unknown))
2429     calculateBraceTypes();
2430   assert(FormatTok->isNot(BK_Unknown));
2431   if (FormatTok->is(BK_Block))
2432     return false;
2433   nextToken();
2434   parseBracedList();
2435   return true;
2436 }
2437 
tryToParseChildBlock()2438 bool UnwrappedLineParser::tryToParseChildBlock() {
2439   assert(Style.isJavaScript() || Style.isCSharp());
2440   assert(FormatTok->is(TT_FatArrow));
2441   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2442   // They always start an expression or a child block if followed by a curly
2443   // brace.
2444   nextToken();
2445   if (FormatTok->isNot(tok::l_brace))
2446     return false;
2447   parseChildBlock();
2448   return true;
2449 }
2450 
parseBracedList(bool IsAngleBracket,bool IsEnum)2451 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2452   assert(!IsAngleBracket || !IsEnum);
2453   bool HasError = false;
2454 
2455   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2456   // replace this by using parseAssignmentExpression() inside.
2457   do {
2458     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2459         tryToParseChildBlock()) {
2460       continue;
2461     }
2462     if (Style.isJavaScript()) {
2463       if (FormatTok->is(Keywords.kw_function)) {
2464         tryToParseJSFunction();
2465         continue;
2466       }
2467       if (FormatTok->is(tok::l_brace)) {
2468         // Could be a method inside of a braced list `{a() { return 1; }}`.
2469         if (tryToParseBracedList())
2470           continue;
2471         parseChildBlock();
2472       }
2473     }
2474     if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2475       if (IsEnum) {
2476         FormatTok->setBlockKind(BK_Block);
2477         if (!Style.AllowShortEnumsOnASingleLine)
2478           addUnwrappedLine();
2479       }
2480       nextToken();
2481       return !HasError;
2482     }
2483     switch (FormatTok->Tok.getKind()) {
2484     case tok::l_square:
2485       if (Style.isCSharp())
2486         parseSquare();
2487       else
2488         tryToParseLambda();
2489       break;
2490     case tok::l_paren:
2491       parseParens();
2492       // JavaScript can just have free standing methods and getters/setters in
2493       // object literals. Detect them by a "{" following ")".
2494       if (Style.isJavaScript()) {
2495         if (FormatTok->is(tok::l_brace))
2496           parseChildBlock();
2497         break;
2498       }
2499       break;
2500     case tok::l_brace:
2501       // Assume there are no blocks inside a braced init list apart
2502       // from the ones we explicitly parse out (like lambdas).
2503       FormatTok->setBlockKind(BK_BracedInit);
2504       if (!IsAngleBracket) {
2505         auto *Prev = FormatTok->Previous;
2506         if (Prev && Prev->is(tok::greater))
2507           Prev->setFinalizedType(TT_TemplateCloser);
2508       }
2509       nextToken();
2510       parseBracedList();
2511       break;
2512     case tok::less:
2513       nextToken();
2514       if (IsAngleBracket)
2515         parseBracedList(/*IsAngleBracket=*/true);
2516       break;
2517     case tok::semi:
2518       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2519       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2520       // used for error recovery if we have otherwise determined that this is
2521       // a braced list.
2522       if (Style.isJavaScript()) {
2523         nextToken();
2524         break;
2525       }
2526       HasError = true;
2527       if (!IsEnum)
2528         return false;
2529       nextToken();
2530       break;
2531     case tok::comma:
2532       nextToken();
2533       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2534         addUnwrappedLine();
2535       break;
2536     default:
2537       nextToken();
2538       break;
2539     }
2540   } while (!eof());
2541   return false;
2542 }
2543 
2544 /// \brief Parses a pair of parentheses (and everything between them).
2545 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2546 /// double ampersands. This applies for all nested scopes as well.
2547 ///
2548 /// Returns whether there is a `=` token between the parentheses.
parseParens(TokenType AmpAmpTokenType)2549 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2550   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2551   auto *LeftParen = FormatTok;
2552   bool SeenEqual = false;
2553   bool MightBeFoldExpr = false;
2554   const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2555   nextToken();
2556   do {
2557     switch (FormatTok->Tok.getKind()) {
2558     case tok::l_paren:
2559       if (parseParens(AmpAmpTokenType))
2560         SeenEqual = true;
2561       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2562         parseChildBlock();
2563       break;
2564     case tok::r_paren: {
2565       auto *Prev = LeftParen->Previous;
2566       if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2567           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2568         const auto *Next = Tokens->peekNextToken();
2569         const bool DoubleParens =
2570             Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2571         const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2572         const bool Blacklisted =
2573             PrevPrev &&
2574             (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2575              (SeenEqual &&
2576               (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2577                PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2578         const bool ReturnParens =
2579             Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2580             ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2581              (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2582             Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2583             Next->is(tok::semi);
2584         if ((DoubleParens && !Blacklisted) || ReturnParens) {
2585           LeftParen->Optional = true;
2586           FormatTok->Optional = true;
2587         }
2588       }
2589       if (Prev) {
2590         if (Prev->is(TT_TypenameMacro)) {
2591           LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2592           FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2593         } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
2594           Prev->setFinalizedType(TT_TemplateCloser);
2595         }
2596       }
2597       nextToken();
2598       return SeenEqual;
2599     }
2600     case tok::r_brace:
2601       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2602       return SeenEqual;
2603     case tok::l_square:
2604       tryToParseLambda();
2605       break;
2606     case tok::l_brace:
2607       if (!tryToParseBracedList())
2608         parseChildBlock();
2609       break;
2610     case tok::at:
2611       nextToken();
2612       if (FormatTok->is(tok::l_brace)) {
2613         nextToken();
2614         parseBracedList();
2615       }
2616       break;
2617     case tok::ellipsis:
2618       MightBeFoldExpr = true;
2619       nextToken();
2620       break;
2621     case tok::equal:
2622       SeenEqual = true;
2623       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2624         tryToParseChildBlock();
2625       else
2626         nextToken();
2627       break;
2628     case tok::kw_class:
2629       if (Style.isJavaScript())
2630         parseRecord(/*ParseAsExpr=*/true);
2631       else
2632         nextToken();
2633       break;
2634     case tok::identifier:
2635       if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2636         tryToParseJSFunction();
2637       else
2638         nextToken();
2639       break;
2640     case tok::kw_switch:
2641       if (Style.Language == FormatStyle::LK_Java)
2642         parseSwitch(/*IsExpr=*/true);
2643       else
2644         nextToken();
2645       break;
2646     case tok::kw_requires: {
2647       auto RequiresToken = FormatTok;
2648       nextToken();
2649       parseRequiresExpression(RequiresToken);
2650       break;
2651     }
2652     case tok::ampamp:
2653       if (AmpAmpTokenType != TT_Unknown)
2654         FormatTok->setFinalizedType(AmpAmpTokenType);
2655       [[fallthrough]];
2656     default:
2657       nextToken();
2658       break;
2659     }
2660   } while (!eof());
2661   return SeenEqual;
2662 }
2663 
parseSquare(bool LambdaIntroducer)2664 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2665   if (!LambdaIntroducer) {
2666     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2667     if (tryToParseLambda())
2668       return;
2669   }
2670   do {
2671     switch (FormatTok->Tok.getKind()) {
2672     case tok::l_paren:
2673       parseParens();
2674       break;
2675     case tok::r_square:
2676       nextToken();
2677       return;
2678     case tok::r_brace:
2679       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2680       return;
2681     case tok::l_square:
2682       parseSquare();
2683       break;
2684     case tok::l_brace: {
2685       if (!tryToParseBracedList())
2686         parseChildBlock();
2687       break;
2688     }
2689     case tok::at:
2690     case tok::colon:
2691       nextToken();
2692       if (FormatTok->is(tok::l_brace)) {
2693         nextToken();
2694         parseBracedList();
2695       }
2696       break;
2697     default:
2698       nextToken();
2699       break;
2700     }
2701   } while (!eof());
2702 }
2703 
keepAncestorBraces()2704 void UnwrappedLineParser::keepAncestorBraces() {
2705   if (!Style.RemoveBracesLLVM)
2706     return;
2707 
2708   const int MaxNestingLevels = 2;
2709   const int Size = NestedTooDeep.size();
2710   if (Size >= MaxNestingLevels)
2711     NestedTooDeep[Size - MaxNestingLevels] = true;
2712   NestedTooDeep.push_back(false);
2713 }
2714 
getLastNonComment(const UnwrappedLine & Line)2715 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2716   for (const auto &Token : llvm::reverse(Line.Tokens))
2717     if (Token.Tok->isNot(tok::comment))
2718       return Token.Tok;
2719 
2720   return nullptr;
2721 }
2722 
parseUnbracedBody(bool CheckEOF)2723 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2724   FormatToken *Tok = nullptr;
2725 
2726   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2727       PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2728     Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2729               ? getLastNonComment(*Line)
2730               : Line->Tokens.back().Tok;
2731     assert(Tok);
2732     if (Tok->BraceCount < 0) {
2733       assert(Tok->BraceCount == -1);
2734       Tok = nullptr;
2735     } else {
2736       Tok->BraceCount = -1;
2737     }
2738   }
2739 
2740   addUnwrappedLine();
2741   ++Line->Level;
2742   ++Line->UnbracedBodyLevel;
2743   parseStructuralElement();
2744   --Line->UnbracedBodyLevel;
2745 
2746   if (Tok) {
2747     assert(!Line->InPPDirective);
2748     Tok = nullptr;
2749     for (const auto &L : llvm::reverse(*CurrentLines)) {
2750       if (!L.InPPDirective && getLastNonComment(L)) {
2751         Tok = L.Tokens.back().Tok;
2752         break;
2753       }
2754     }
2755     assert(Tok);
2756     ++Tok->BraceCount;
2757   }
2758 
2759   if (CheckEOF && eof())
2760     addUnwrappedLine();
2761 
2762   --Line->Level;
2763 }
2764 
markOptionalBraces(FormatToken * LeftBrace)2765 static void markOptionalBraces(FormatToken *LeftBrace) {
2766   if (!LeftBrace)
2767     return;
2768 
2769   assert(LeftBrace->is(tok::l_brace));
2770 
2771   FormatToken *RightBrace = LeftBrace->MatchingParen;
2772   if (!RightBrace) {
2773     assert(!LeftBrace->Optional);
2774     return;
2775   }
2776 
2777   assert(RightBrace->is(tok::r_brace));
2778   assert(RightBrace->MatchingParen == LeftBrace);
2779   assert(LeftBrace->Optional == RightBrace->Optional);
2780 
2781   LeftBrace->Optional = true;
2782   RightBrace->Optional = true;
2783 }
2784 
handleAttributes()2785 void UnwrappedLineParser::handleAttributes() {
2786   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2787   if (FormatTok->isAttribute())
2788     nextToken();
2789   else if (FormatTok->is(tok::l_square))
2790     handleCppAttributes();
2791 }
2792 
handleCppAttributes()2793 bool UnwrappedLineParser::handleCppAttributes() {
2794   // Handle [[likely]] / [[unlikely]] attributes.
2795   assert(FormatTok->is(tok::l_square));
2796   if (!tryToParseSimpleAttribute())
2797     return false;
2798   parseSquare();
2799   return true;
2800 }
2801 
2802 /// Returns whether \c Tok begins a block.
isBlockBegin(const FormatToken & Tok) const2803 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2804   // FIXME: rename the function or make
2805   // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2806   return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2807                            : Tok.is(tok::l_brace);
2808 }
2809 
parseIfThenElse(IfStmtKind * IfKind,bool KeepBraces,bool IsVerilogAssert)2810 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2811                                                   bool KeepBraces,
2812                                                   bool IsVerilogAssert) {
2813   assert((FormatTok->is(tok::kw_if) ||
2814           (Style.isVerilog() &&
2815            FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2816                               Keywords.kw_assume, Keywords.kw_cover))) &&
2817          "'if' expected");
2818   nextToken();
2819 
2820   if (IsVerilogAssert) {
2821     // Handle `assert #0` and `assert final`.
2822     if (FormatTok->is(Keywords.kw_verilogHash)) {
2823       nextToken();
2824       if (FormatTok->is(tok::numeric_constant))
2825         nextToken();
2826     } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2827                                   Keywords.kw_sequence)) {
2828       nextToken();
2829     }
2830   }
2831 
2832   // TableGen's if statement has the form of `if <cond> then { ... }`.
2833   if (Style.isTableGen()) {
2834     while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2835       // Simply skip until then. This range only contains a value.
2836       nextToken();
2837     }
2838   }
2839 
2840   // Handle `if !consteval`.
2841   if (FormatTok->is(tok::exclaim))
2842     nextToken();
2843 
2844   bool KeepIfBraces = true;
2845   if (FormatTok->is(tok::kw_consteval)) {
2846     nextToken();
2847   } else {
2848     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2849     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2850       nextToken();
2851     if (FormatTok->is(tok::l_paren)) {
2852       FormatTok->setFinalizedType(TT_ConditionLParen);
2853       parseParens();
2854     }
2855   }
2856   handleAttributes();
2857   // The then action is optional in Verilog assert statements.
2858   if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2859     nextToken();
2860     addUnwrappedLine();
2861     return nullptr;
2862   }
2863 
2864   bool NeedsUnwrappedLine = false;
2865   keepAncestorBraces();
2866 
2867   FormatToken *IfLeftBrace = nullptr;
2868   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2869 
2870   if (isBlockBegin(*FormatTok)) {
2871     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2872     IfLeftBrace = FormatTok;
2873     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2874     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2875                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2876     setPreviousRBraceType(TT_ControlStatementRBrace);
2877     if (Style.BraceWrapping.BeforeElse)
2878       addUnwrappedLine();
2879     else
2880       NeedsUnwrappedLine = true;
2881   } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2882     addUnwrappedLine();
2883   } else {
2884     parseUnbracedBody();
2885   }
2886 
2887   if (Style.RemoveBracesLLVM) {
2888     assert(!NestedTooDeep.empty());
2889     KeepIfBraces = KeepIfBraces ||
2890                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2891                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2892                    IfBlockKind == IfStmtKind::IfElseIf;
2893   }
2894 
2895   bool KeepElseBraces = KeepIfBraces;
2896   FormatToken *ElseLeftBrace = nullptr;
2897   IfStmtKind Kind = IfStmtKind::IfOnly;
2898 
2899   if (FormatTok->is(tok::kw_else)) {
2900     if (Style.RemoveBracesLLVM) {
2901       NestedTooDeep.back() = false;
2902       Kind = IfStmtKind::IfElse;
2903     }
2904     nextToken();
2905     handleAttributes();
2906     if (isBlockBegin(*FormatTok)) {
2907       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2908       FormatTok->setFinalizedType(TT_ElseLBrace);
2909       ElseLeftBrace = FormatTok;
2910       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2911       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2912       FormatToken *IfLBrace =
2913           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2914                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2915       setPreviousRBraceType(TT_ElseRBrace);
2916       if (FormatTok->is(tok::kw_else)) {
2917         KeepElseBraces = KeepElseBraces ||
2918                          ElseBlockKind == IfStmtKind::IfOnly ||
2919                          ElseBlockKind == IfStmtKind::IfElseIf;
2920       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2921         KeepElseBraces = true;
2922         assert(ElseLeftBrace->MatchingParen);
2923         markOptionalBraces(ElseLeftBrace);
2924       }
2925       addUnwrappedLine();
2926     } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2927       const FormatToken *Previous = Tokens->getPreviousToken();
2928       assert(Previous);
2929       const bool IsPrecededByComment = Previous->is(tok::comment);
2930       if (IsPrecededByComment) {
2931         addUnwrappedLine();
2932         ++Line->Level;
2933       }
2934       bool TooDeep = true;
2935       if (Style.RemoveBracesLLVM) {
2936         Kind = IfStmtKind::IfElseIf;
2937         TooDeep = NestedTooDeep.pop_back_val();
2938       }
2939       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2940       if (Style.RemoveBracesLLVM)
2941         NestedTooDeep.push_back(TooDeep);
2942       if (IsPrecededByComment)
2943         --Line->Level;
2944     } else {
2945       parseUnbracedBody(/*CheckEOF=*/true);
2946     }
2947   } else {
2948     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2949     if (NeedsUnwrappedLine)
2950       addUnwrappedLine();
2951   }
2952 
2953   if (!Style.RemoveBracesLLVM)
2954     return nullptr;
2955 
2956   assert(!NestedTooDeep.empty());
2957   KeepElseBraces = KeepElseBraces ||
2958                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2959                    NestedTooDeep.back();
2960 
2961   NestedTooDeep.pop_back();
2962 
2963   if (!KeepIfBraces && !KeepElseBraces) {
2964     markOptionalBraces(IfLeftBrace);
2965     markOptionalBraces(ElseLeftBrace);
2966   } else if (IfLeftBrace) {
2967     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2968     if (IfRightBrace) {
2969       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2970       assert(!IfLeftBrace->Optional);
2971       assert(!IfRightBrace->Optional);
2972       IfLeftBrace->MatchingParen = nullptr;
2973       IfRightBrace->MatchingParen = nullptr;
2974     }
2975   }
2976 
2977   if (IfKind)
2978     *IfKind = Kind;
2979 
2980   return IfLeftBrace;
2981 }
2982 
parseTryCatch()2983 void UnwrappedLineParser::parseTryCatch() {
2984   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2985   nextToken();
2986   bool NeedsUnwrappedLine = false;
2987   bool HasCtorInitializer = false;
2988   if (FormatTok->is(tok::colon)) {
2989     auto *Colon = FormatTok;
2990     // We are in a function try block, what comes is an initializer list.
2991     nextToken();
2992     if (FormatTok->is(tok::identifier)) {
2993       HasCtorInitializer = true;
2994       Colon->setFinalizedType(TT_CtorInitializerColon);
2995     }
2996 
2997     // In case identifiers were removed by clang-tidy, what might follow is
2998     // multiple commas in sequence - before the first identifier.
2999     while (FormatTok->is(tok::comma))
3000       nextToken();
3001 
3002     while (FormatTok->is(tok::identifier)) {
3003       nextToken();
3004       if (FormatTok->is(tok::l_paren)) {
3005         parseParens();
3006       } else if (FormatTok->is(tok::l_brace)) {
3007         nextToken();
3008         parseBracedList();
3009       }
3010 
3011       // In case identifiers were removed by clang-tidy, what might follow is
3012       // multiple commas in sequence - after the first identifier.
3013       while (FormatTok->is(tok::comma))
3014         nextToken();
3015     }
3016   }
3017   // Parse try with resource.
3018   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
3019     parseParens();
3020 
3021   keepAncestorBraces();
3022 
3023   if (FormatTok->is(tok::l_brace)) {
3024     if (HasCtorInitializer)
3025       FormatTok->setFinalizedType(TT_FunctionLBrace);
3026     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3027     parseBlock();
3028     if (Style.BraceWrapping.BeforeCatch)
3029       addUnwrappedLine();
3030     else
3031       NeedsUnwrappedLine = true;
3032   } else if (FormatTok->isNot(tok::kw_catch)) {
3033     // The C++ standard requires a compound-statement after a try.
3034     // If there's none, we try to assume there's a structuralElement
3035     // and try to continue.
3036     addUnwrappedLine();
3037     ++Line->Level;
3038     parseStructuralElement();
3039     --Line->Level;
3040   }
3041   while (true) {
3042     if (FormatTok->is(tok::at))
3043       nextToken();
3044     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3045                              tok::kw___finally) ||
3046           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3047            FormatTok->is(Keywords.kw_finally)) ||
3048           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3049            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3050       break;
3051     }
3052     nextToken();
3053     while (FormatTok->isNot(tok::l_brace)) {
3054       if (FormatTok->is(tok::l_paren)) {
3055         parseParens();
3056         continue;
3057       }
3058       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3059         if (Style.RemoveBracesLLVM)
3060           NestedTooDeep.pop_back();
3061         return;
3062       }
3063       nextToken();
3064     }
3065     NeedsUnwrappedLine = false;
3066     Line->MustBeDeclaration = false;
3067     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3068     parseBlock();
3069     if (Style.BraceWrapping.BeforeCatch)
3070       addUnwrappedLine();
3071     else
3072       NeedsUnwrappedLine = true;
3073   }
3074 
3075   if (Style.RemoveBracesLLVM)
3076     NestedTooDeep.pop_back();
3077 
3078   if (NeedsUnwrappedLine)
3079     addUnwrappedLine();
3080 }
3081 
parseNamespace()3082 void UnwrappedLineParser::parseNamespace() {
3083   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3084          "'namespace' expected");
3085 
3086   const FormatToken &InitialToken = *FormatTok;
3087   nextToken();
3088   if (InitialToken.is(TT_NamespaceMacro)) {
3089     parseParens();
3090   } else {
3091     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3092                               tok::l_square, tok::period, tok::l_paren) ||
3093            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3094       if (FormatTok->is(tok::l_square))
3095         parseSquare();
3096       else if (FormatTok->is(tok::l_paren))
3097         parseParens();
3098       else
3099         nextToken();
3100     }
3101   }
3102   if (FormatTok->is(tok::l_brace)) {
3103     FormatTok->setFinalizedType(TT_NamespaceLBrace);
3104 
3105     if (ShouldBreakBeforeBrace(Style, InitialToken))
3106       addUnwrappedLine();
3107 
3108     unsigned AddLevels =
3109         Style.NamespaceIndentation == FormatStyle::NI_All ||
3110                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3111                  DeclarationScopeStack.size() > 1)
3112             ? 1u
3113             : 0u;
3114     bool ManageWhitesmithsBraces =
3115         AddLevels == 0u &&
3116         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3117 
3118     // If we're in Whitesmiths mode, indent the brace if we're not indenting
3119     // the whole block.
3120     if (ManageWhitesmithsBraces)
3121       ++Line->Level;
3122 
3123     // Munch the semicolon after a namespace. This is more common than one would
3124     // think. Putting the semicolon into its own line is very ugly.
3125     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3126                /*KeepBraces=*/true, /*IfKind=*/nullptr,
3127                ManageWhitesmithsBraces);
3128 
3129     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3130 
3131     if (ManageWhitesmithsBraces)
3132       --Line->Level;
3133   }
3134   // FIXME: Add error handling.
3135 }
3136 
parseNew()3137 void UnwrappedLineParser::parseNew() {
3138   assert(FormatTok->is(tok::kw_new) && "'new' expected");
3139   nextToken();
3140 
3141   if (Style.isCSharp()) {
3142     do {
3143       // Handle constructor invocation, e.g. `new(field: value)`.
3144       if (FormatTok->is(tok::l_paren))
3145         parseParens();
3146 
3147       // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3148       if (FormatTok->is(tok::l_brace))
3149         parseBracedList();
3150 
3151       if (FormatTok->isOneOf(tok::semi, tok::comma))
3152         return;
3153 
3154       nextToken();
3155     } while (!eof());
3156   }
3157 
3158   if (Style.Language != FormatStyle::LK_Java)
3159     return;
3160 
3161   // In Java, we can parse everything up to the parens, which aren't optional.
3162   do {
3163     // There should not be a ;, { or } before the new's open paren.
3164     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3165       return;
3166 
3167     // Consume the parens.
3168     if (FormatTok->is(tok::l_paren)) {
3169       parseParens();
3170 
3171       // If there is a class body of an anonymous class, consume that as child.
3172       if (FormatTok->is(tok::l_brace))
3173         parseChildBlock();
3174       return;
3175     }
3176     nextToken();
3177   } while (!eof());
3178 }
3179 
parseLoopBody(bool KeepBraces,bool WrapRightBrace)3180 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3181   keepAncestorBraces();
3182 
3183   if (isBlockBegin(*FormatTok)) {
3184     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3185     FormatToken *LeftBrace = FormatTok;
3186     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3187     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3188                /*MunchSemi=*/true, KeepBraces);
3189     setPreviousRBraceType(TT_ControlStatementRBrace);
3190     if (!KeepBraces) {
3191       assert(!NestedTooDeep.empty());
3192       if (!NestedTooDeep.back())
3193         markOptionalBraces(LeftBrace);
3194     }
3195     if (WrapRightBrace)
3196       addUnwrappedLine();
3197   } else {
3198     parseUnbracedBody();
3199   }
3200 
3201   if (!KeepBraces)
3202     NestedTooDeep.pop_back();
3203 }
3204 
parseForOrWhileLoop(bool HasParens)3205 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3206   assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3207           (Style.isVerilog() &&
3208            FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3209                               Keywords.kw_always_ff, Keywords.kw_always_latch,
3210                               Keywords.kw_final, Keywords.kw_initial,
3211                               Keywords.kw_foreach, Keywords.kw_forever,
3212                               Keywords.kw_repeat))) &&
3213          "'for', 'while' or foreach macro expected");
3214   const bool KeepBraces = !Style.RemoveBracesLLVM ||
3215                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3216 
3217   nextToken();
3218   // JS' for await ( ...
3219   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3220     nextToken();
3221   if (IsCpp && FormatTok->is(tok::kw_co_await))
3222     nextToken();
3223   if (HasParens && FormatTok->is(tok::l_paren)) {
3224     // The type is only set for Verilog basically because we were afraid to
3225     // change the existing behavior for loops. See the discussion on D121756 for
3226     // details.
3227     if (Style.isVerilog())
3228       FormatTok->setFinalizedType(TT_ConditionLParen);
3229     parseParens();
3230   }
3231 
3232   if (Style.isVerilog()) {
3233     // Event control.
3234     parseVerilogSensitivityList();
3235   } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3236              Tokens->getPreviousToken()->is(tok::r_paren)) {
3237     nextToken();
3238     addUnwrappedLine();
3239     return;
3240   }
3241 
3242   handleAttributes();
3243   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3244 }
3245 
parseDoWhile()3246 void UnwrappedLineParser::parseDoWhile() {
3247   assert(FormatTok->is(tok::kw_do) && "'do' expected");
3248   nextToken();
3249 
3250   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3251 
3252   // FIXME: Add error handling.
3253   if (FormatTok->isNot(tok::kw_while)) {
3254     addUnwrappedLine();
3255     return;
3256   }
3257 
3258   FormatTok->setFinalizedType(TT_DoWhile);
3259 
3260   // If in Whitesmiths mode, the line with the while() needs to be indented
3261   // to the same level as the block.
3262   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3263     ++Line->Level;
3264 
3265   nextToken();
3266   parseStructuralElement();
3267 }
3268 
parseLabel(bool LeftAlignLabel)3269 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3270   nextToken();
3271   unsigned OldLineLevel = Line->Level;
3272 
3273   if (LeftAlignLabel)
3274     Line->Level = 0;
3275   else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3276     --Line->Level;
3277 
3278   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3279       FormatTok->is(tok::l_brace)) {
3280 
3281     CompoundStatementIndenter Indenter(this, Line->Level,
3282                                        Style.BraceWrapping.AfterCaseLabel,
3283                                        Style.BraceWrapping.IndentBraces);
3284     parseBlock();
3285     if (FormatTok->is(tok::kw_break)) {
3286       if (Style.BraceWrapping.AfterControlStatement ==
3287           FormatStyle::BWACS_Always) {
3288         addUnwrappedLine();
3289         if (!Style.IndentCaseBlocks &&
3290             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3291           ++Line->Level;
3292         }
3293       }
3294       parseStructuralElement();
3295     }
3296     addUnwrappedLine();
3297   } else {
3298     if (FormatTok->is(tok::semi))
3299       nextToken();
3300     addUnwrappedLine();
3301   }
3302   Line->Level = OldLineLevel;
3303   if (FormatTok->isNot(tok::l_brace)) {
3304     parseStructuralElement();
3305     addUnwrappedLine();
3306   }
3307 }
3308 
parseCaseLabel()3309 void UnwrappedLineParser::parseCaseLabel() {
3310   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3311   auto *Case = FormatTok;
3312 
3313   // FIXME: fix handling of complex expressions here.
3314   do {
3315     nextToken();
3316     if (FormatTok->is(tok::colon)) {
3317       FormatTok->setFinalizedType(TT_CaseLabelColon);
3318       break;
3319     }
3320     if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3321       FormatTok->setFinalizedType(TT_CaseLabelArrow);
3322       Case->setFinalizedType(TT_SwitchExpressionLabel);
3323       break;
3324     }
3325   } while (!eof());
3326   parseLabel();
3327 }
3328 
parseSwitch(bool IsExpr)3329 void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3330   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3331   nextToken();
3332   if (FormatTok->is(tok::l_paren))
3333     parseParens();
3334 
3335   keepAncestorBraces();
3336 
3337   if (FormatTok->is(tok::l_brace)) {
3338     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3339     FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3340                                        : TT_ControlStatementLBrace);
3341     if (IsExpr)
3342       parseChildBlock();
3343     else
3344       parseBlock();
3345     setPreviousRBraceType(TT_ControlStatementRBrace);
3346     if (!IsExpr)
3347       addUnwrappedLine();
3348   } else {
3349     addUnwrappedLine();
3350     ++Line->Level;
3351     parseStructuralElement();
3352     --Line->Level;
3353   }
3354 
3355   if (Style.RemoveBracesLLVM)
3356     NestedTooDeep.pop_back();
3357 }
3358 
3359 // Operators that can follow a C variable.
isCOperatorFollowingVar(tok::TokenKind Kind)3360 static bool isCOperatorFollowingVar(tok::TokenKind Kind) {
3361   switch (Kind) {
3362   case tok::ampamp:
3363   case tok::ampequal:
3364   case tok::arrow:
3365   case tok::caret:
3366   case tok::caretequal:
3367   case tok::comma:
3368   case tok::ellipsis:
3369   case tok::equal:
3370   case tok::equalequal:
3371   case tok::exclaim:
3372   case tok::exclaimequal:
3373   case tok::greater:
3374   case tok::greaterequal:
3375   case tok::greatergreater:
3376   case tok::greatergreaterequal:
3377   case tok::l_paren:
3378   case tok::l_square:
3379   case tok::less:
3380   case tok::lessequal:
3381   case tok::lessless:
3382   case tok::lesslessequal:
3383   case tok::minus:
3384   case tok::minusequal:
3385   case tok::minusminus:
3386   case tok::percent:
3387   case tok::percentequal:
3388   case tok::period:
3389   case tok::pipe:
3390   case tok::pipeequal:
3391   case tok::pipepipe:
3392   case tok::plus:
3393   case tok::plusequal:
3394   case tok::plusplus:
3395   case tok::question:
3396   case tok::r_brace:
3397   case tok::r_paren:
3398   case tok::r_square:
3399   case tok::semi:
3400   case tok::slash:
3401   case tok::slashequal:
3402   case tok::star:
3403   case tok::starequal:
3404     return true;
3405   default:
3406     return false;
3407   }
3408 }
3409 
parseAccessSpecifier()3410 void UnwrappedLineParser::parseAccessSpecifier() {
3411   FormatToken *AccessSpecifierCandidate = FormatTok;
3412   nextToken();
3413   // Understand Qt's slots.
3414   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3415     nextToken();
3416   // Otherwise, we don't know what it is, and we'd better keep the next token.
3417   if (FormatTok->is(tok::colon)) {
3418     nextToken();
3419     addUnwrappedLine();
3420   } else if (FormatTok->isNot(tok::coloncolon) &&
3421              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3422     // Not a variable name nor namespace name.
3423     addUnwrappedLine();
3424   } else if (AccessSpecifierCandidate) {
3425     // Consider the access specifier to be a C identifier.
3426     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3427   }
3428 }
3429 
3430 /// \brief Parses a requires, decides if it is a clause or an expression.
3431 /// \pre The current token has to be the requires keyword.
3432 /// \returns true if it parsed a clause.
parseRequires()3433 bool UnwrappedLineParser::parseRequires() {
3434   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3435   auto RequiresToken = FormatTok;
3436 
3437   // We try to guess if it is a requires clause, or a requires expression. For
3438   // that we first consume the keyword and check the next token.
3439   nextToken();
3440 
3441   switch (FormatTok->Tok.getKind()) {
3442   case tok::l_brace:
3443     // This can only be an expression, never a clause.
3444     parseRequiresExpression(RequiresToken);
3445     return false;
3446   case tok::l_paren:
3447     // Clauses and expression can start with a paren, it's unclear what we have.
3448     break;
3449   default:
3450     // All other tokens can only be a clause.
3451     parseRequiresClause(RequiresToken);
3452     return true;
3453   }
3454 
3455   // Looking forward we would have to decide if there are function declaration
3456   // like arguments to the requires expression:
3457   // requires (T t) {
3458   // Or there is a constraint expression for the requires clause:
3459   // requires (C<T> && ...
3460 
3461   // But first let's look behind.
3462   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3463 
3464   if (!PreviousNonComment ||
3465       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3466     // If there is no token, or an expression left brace, we are a requires
3467     // clause within a requires expression.
3468     parseRequiresClause(RequiresToken);
3469     return true;
3470   }
3471 
3472   switch (PreviousNonComment->Tok.getKind()) {
3473   case tok::greater:
3474   case tok::r_paren:
3475   case tok::kw_noexcept:
3476   case tok::kw_const:
3477     // This is a requires clause.
3478     parseRequiresClause(RequiresToken);
3479     return true;
3480   case tok::amp:
3481   case tok::ampamp: {
3482     // This can be either:
3483     // if (... && requires (T t) ...)
3484     // Or
3485     // void member(...) && requires (C<T> ...
3486     // We check the one token before that for a const:
3487     // void member(...) const && requires (C<T> ...
3488     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3489     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3490       parseRequiresClause(RequiresToken);
3491       return true;
3492     }
3493     break;
3494   }
3495   default:
3496     if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3497       // This is a requires clause.
3498       parseRequiresClause(RequiresToken);
3499       return true;
3500     }
3501     // It's an expression.
3502     parseRequiresExpression(RequiresToken);
3503     return false;
3504   }
3505 
3506   // Now we look forward and try to check if the paren content is a parameter
3507   // list. The parameters can be cv-qualified and contain references or
3508   // pointers.
3509   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3510   // of stuff: typename, const, *, &, &&, ::, identifiers.
3511 
3512   unsigned StoredPosition = Tokens->getPosition();
3513   FormatToken *NextToken = Tokens->getNextToken();
3514   int Lookahead = 0;
3515   auto PeekNext = [&Lookahead, &NextToken, this] {
3516     ++Lookahead;
3517     NextToken = Tokens->getNextToken();
3518   };
3519 
3520   bool FoundType = false;
3521   bool LastWasColonColon = false;
3522   int OpenAngles = 0;
3523 
3524   for (; Lookahead < 50; PeekNext()) {
3525     switch (NextToken->Tok.getKind()) {
3526     case tok::kw_volatile:
3527     case tok::kw_const:
3528     case tok::comma:
3529       if (OpenAngles == 0) {
3530         FormatTok = Tokens->setPosition(StoredPosition);
3531         parseRequiresExpression(RequiresToken);
3532         return false;
3533       }
3534       break;
3535     case tok::eof:
3536       // Break out of the loop.
3537       Lookahead = 50;
3538       break;
3539     case tok::coloncolon:
3540       LastWasColonColon = true;
3541       break;
3542     case tok::kw_decltype:
3543     case tok::identifier:
3544       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3545         FormatTok = Tokens->setPosition(StoredPosition);
3546         parseRequiresExpression(RequiresToken);
3547         return false;
3548       }
3549       FoundType = true;
3550       LastWasColonColon = false;
3551       break;
3552     case tok::less:
3553       ++OpenAngles;
3554       break;
3555     case tok::greater:
3556       --OpenAngles;
3557       break;
3558     default:
3559       if (NextToken->isTypeName(LangOpts)) {
3560         FormatTok = Tokens->setPosition(StoredPosition);
3561         parseRequiresExpression(RequiresToken);
3562         return false;
3563       }
3564       break;
3565     }
3566   }
3567   // This seems to be a complicated expression, just assume it's a clause.
3568   FormatTok = Tokens->setPosition(StoredPosition);
3569   parseRequiresClause(RequiresToken);
3570   return true;
3571 }
3572 
3573 /// \brief Parses a requires clause.
3574 /// \param RequiresToken The requires keyword token, which starts this clause.
3575 /// \pre We need to be on the next token after the requires keyword.
3576 /// \sa parseRequiresExpression
3577 ///
3578 /// Returns if it either has finished parsing the clause, or it detects, that
3579 /// the clause is incorrect.
parseRequiresClause(FormatToken * RequiresToken)3580 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3581   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3582   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3583 
3584   // If there is no previous token, we are within a requires expression,
3585   // otherwise we will always have the template or function declaration in front
3586   // of it.
3587   bool InRequiresExpression =
3588       !RequiresToken->Previous ||
3589       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3590 
3591   RequiresToken->setFinalizedType(InRequiresExpression
3592                                       ? TT_RequiresClauseInARequiresExpression
3593                                       : TT_RequiresClause);
3594 
3595   // NOTE: parseConstraintExpression is only ever called from this function.
3596   // It could be inlined into here.
3597   parseConstraintExpression();
3598 
3599   if (!InRequiresExpression)
3600     FormatTok->Previous->ClosesRequiresClause = true;
3601 }
3602 
3603 /// \brief Parses a requires expression.
3604 /// \param RequiresToken The requires keyword token, which starts this clause.
3605 /// \pre We need to be on the next token after the requires keyword.
3606 /// \sa parseRequiresClause
3607 ///
3608 /// Returns if it either has finished parsing the expression, or it detects,
3609 /// that the expression is incorrect.
parseRequiresExpression(FormatToken * RequiresToken)3610 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3611   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3612   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3613 
3614   RequiresToken->setFinalizedType(TT_RequiresExpression);
3615 
3616   if (FormatTok->is(tok::l_paren)) {
3617     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3618     parseParens();
3619   }
3620 
3621   if (FormatTok->is(tok::l_brace)) {
3622     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3623     parseChildBlock();
3624   }
3625 }
3626 
3627 /// \brief Parses a constraint expression.
3628 ///
3629 /// This is the body of a requires clause. It returns, when the parsing is
3630 /// complete, or the expression is incorrect.
parseConstraintExpression()3631 void UnwrappedLineParser::parseConstraintExpression() {
3632   // The special handling for lambdas is needed since tryToParseLambda() eats a
3633   // token and if a requires expression is the last part of a requires clause
3634   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3635   // not set on the correct token. Thus we need to be aware if we even expect a
3636   // lambda to be possible.
3637   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3638   bool LambdaNextTimeAllowed = true;
3639 
3640   // Within lambda declarations, it is permitted to put a requires clause after
3641   // its template parameter list, which would place the requires clause right
3642   // before the parentheses of the parameters of the lambda declaration. Thus,
3643   // we track if we expect to see grouping parentheses at all.
3644   // Without this check, `requires foo<T> (T t)` in the below example would be
3645   // seen as the whole requires clause, accidentally eating the parameters of
3646   // the lambda.
3647   // [&]<typename T> requires foo<T> (T t) { ... };
3648   bool TopLevelParensAllowed = true;
3649 
3650   do {
3651     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3652 
3653     switch (FormatTok->Tok.getKind()) {
3654     case tok::kw_requires: {
3655       auto RequiresToken = FormatTok;
3656       nextToken();
3657       parseRequiresExpression(RequiresToken);
3658       break;
3659     }
3660 
3661     case tok::l_paren:
3662       if (!TopLevelParensAllowed)
3663         return;
3664       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3665       TopLevelParensAllowed = false;
3666       break;
3667 
3668     case tok::l_square:
3669       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3670         return;
3671       break;
3672 
3673     case tok::kw_const:
3674     case tok::semi:
3675     case tok::kw_class:
3676     case tok::kw_struct:
3677     case tok::kw_union:
3678       return;
3679 
3680     case tok::l_brace:
3681       // Potential function body.
3682       return;
3683 
3684     case tok::ampamp:
3685     case tok::pipepipe:
3686       FormatTok->setFinalizedType(TT_BinaryOperator);
3687       nextToken();
3688       LambdaNextTimeAllowed = true;
3689       TopLevelParensAllowed = true;
3690       break;
3691 
3692     case tok::comma:
3693     case tok::comment:
3694       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3695       nextToken();
3696       break;
3697 
3698     case tok::kw_sizeof:
3699     case tok::greater:
3700     case tok::greaterequal:
3701     case tok::greatergreater:
3702     case tok::less:
3703     case tok::lessequal:
3704     case tok::lessless:
3705     case tok::equalequal:
3706     case tok::exclaim:
3707     case tok::exclaimequal:
3708     case tok::plus:
3709     case tok::minus:
3710     case tok::star:
3711     case tok::slash:
3712       LambdaNextTimeAllowed = true;
3713       TopLevelParensAllowed = true;
3714       // Just eat them.
3715       nextToken();
3716       break;
3717 
3718     case tok::numeric_constant:
3719     case tok::coloncolon:
3720     case tok::kw_true:
3721     case tok::kw_false:
3722       TopLevelParensAllowed = false;
3723       // Just eat them.
3724       nextToken();
3725       break;
3726 
3727     case tok::kw_static_cast:
3728     case tok::kw_const_cast:
3729     case tok::kw_reinterpret_cast:
3730     case tok::kw_dynamic_cast:
3731       nextToken();
3732       if (FormatTok->isNot(tok::less))
3733         return;
3734 
3735       nextToken();
3736       parseBracedList(/*IsAngleBracket=*/true);
3737       break;
3738 
3739     default:
3740       if (!FormatTok->Tok.getIdentifierInfo()) {
3741         // Identifiers are part of the default case, we check for more then
3742         // tok::identifier to handle builtin type traits.
3743         return;
3744       }
3745 
3746       // We need to differentiate identifiers for a template deduction guide,
3747       // variables, or function return types (the constraint expression has
3748       // ended before that), and basically all other cases. But it's easier to
3749       // check the other way around.
3750       assert(FormatTok->Previous);
3751       switch (FormatTok->Previous->Tok.getKind()) {
3752       case tok::coloncolon:  // Nested identifier.
3753       case tok::ampamp:      // Start of a function or variable for the
3754       case tok::pipepipe:    // constraint expression. (binary)
3755       case tok::exclaim:     // The same as above, but unary.
3756       case tok::kw_requires: // Initial identifier of a requires clause.
3757       case tok::equal:       // Initial identifier of a concept declaration.
3758         break;
3759       default:
3760         return;
3761       }
3762 
3763       // Read identifier with optional template declaration.
3764       nextToken();
3765       if (FormatTok->is(tok::less)) {
3766         nextToken();
3767         parseBracedList(/*IsAngleBracket=*/true);
3768       }
3769       TopLevelParensAllowed = false;
3770       break;
3771     }
3772   } while (!eof());
3773 }
3774 
parseEnum()3775 bool UnwrappedLineParser::parseEnum() {
3776   const FormatToken &InitialToken = *FormatTok;
3777 
3778   // Won't be 'enum' for NS_ENUMs.
3779   if (FormatTok->is(tok::kw_enum))
3780     nextToken();
3781 
3782   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3783   // declarations. An "enum" keyword followed by a colon would be a syntax
3784   // error and thus assume it is just an identifier.
3785   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3786     return false;
3787 
3788   // In protobuf, "enum" can be used as a field name.
3789   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3790     return false;
3791 
3792   if (IsCpp) {
3793     // Eat up enum class ...
3794     if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3795       nextToken();
3796     while (FormatTok->is(tok::l_square))
3797       if (!handleCppAttributes())
3798         return false;
3799   }
3800 
3801   while (FormatTok->Tok.getIdentifierInfo() ||
3802          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3803                             tok::greater, tok::comma, tok::question,
3804                             tok::l_square)) {
3805     if (Style.isVerilog()) {
3806       FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3807       nextToken();
3808       // In Verilog the base type can have dimensions.
3809       while (FormatTok->is(tok::l_square))
3810         parseSquare();
3811     } else {
3812       nextToken();
3813     }
3814     // We can have macros or attributes in between 'enum' and the enum name.
3815     if (FormatTok->is(tok::l_paren))
3816       parseParens();
3817     if (FormatTok->is(tok::identifier)) {
3818       nextToken();
3819       // If there are two identifiers in a row, this is likely an elaborate
3820       // return type. In Java, this can be "implements", etc.
3821       if (IsCpp && FormatTok->is(tok::identifier))
3822         return false;
3823     }
3824   }
3825 
3826   // Just a declaration or something is wrong.
3827   if (FormatTok->isNot(tok::l_brace))
3828     return true;
3829   FormatTok->setFinalizedType(TT_EnumLBrace);
3830   FormatTok->setBlockKind(BK_Block);
3831 
3832   if (Style.Language == FormatStyle::LK_Java) {
3833     // Java enums are different.
3834     parseJavaEnumBody();
3835     return true;
3836   }
3837   if (Style.Language == FormatStyle::LK_Proto) {
3838     parseBlock(/*MustBeDeclaration=*/true);
3839     return true;
3840   }
3841 
3842   if (!Style.AllowShortEnumsOnASingleLine &&
3843       ShouldBreakBeforeBrace(Style, InitialToken)) {
3844     addUnwrappedLine();
3845   }
3846   // Parse enum body.
3847   nextToken();
3848   if (!Style.AllowShortEnumsOnASingleLine) {
3849     addUnwrappedLine();
3850     Line->Level += 1;
3851   }
3852   bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3853   if (!Style.AllowShortEnumsOnASingleLine)
3854     Line->Level -= 1;
3855   if (HasError) {
3856     if (FormatTok->is(tok::semi))
3857       nextToken();
3858     addUnwrappedLine();
3859   }
3860   setPreviousRBraceType(TT_EnumRBrace);
3861   return true;
3862 
3863   // There is no addUnwrappedLine() here so that we fall through to parsing a
3864   // structural element afterwards. Thus, in "enum A {} n, m;",
3865   // "} n, m;" will end up in one unwrapped line.
3866 }
3867 
parseStructLike()3868 bool UnwrappedLineParser::parseStructLike() {
3869   // parseRecord falls through and does not yet add an unwrapped line as a
3870   // record declaration or definition can start a structural element.
3871   parseRecord();
3872   // This does not apply to Java, JavaScript and C#.
3873   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3874       Style.isCSharp()) {
3875     if (FormatTok->is(tok::semi))
3876       nextToken();
3877     addUnwrappedLine();
3878     return true;
3879   }
3880   return false;
3881 }
3882 
3883 namespace {
3884 // A class used to set and restore the Token position when peeking
3885 // ahead in the token source.
3886 class ScopedTokenPosition {
3887   unsigned StoredPosition;
3888   FormatTokenSource *Tokens;
3889 
3890 public:
ScopedTokenPosition(FormatTokenSource * Tokens)3891   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3892     assert(Tokens && "Tokens expected to not be null");
3893     StoredPosition = Tokens->getPosition();
3894   }
3895 
~ScopedTokenPosition()3896   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3897 };
3898 } // namespace
3899 
3900 // Look to see if we have [[ by looking ahead, if
3901 // its not then rewind to the original position.
tryToParseSimpleAttribute()3902 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3903   ScopedTokenPosition AutoPosition(Tokens);
3904   FormatToken *Tok = Tokens->getNextToken();
3905   // We already read the first [ check for the second.
3906   if (Tok->isNot(tok::l_square))
3907     return false;
3908   // Double check that the attribute is just something
3909   // fairly simple.
3910   while (Tok->isNot(tok::eof)) {
3911     if (Tok->is(tok::r_square))
3912       break;
3913     Tok = Tokens->getNextToken();
3914   }
3915   if (Tok->is(tok::eof))
3916     return false;
3917   Tok = Tokens->getNextToken();
3918   if (Tok->isNot(tok::r_square))
3919     return false;
3920   Tok = Tokens->getNextToken();
3921   if (Tok->is(tok::semi))
3922     return false;
3923   return true;
3924 }
3925 
parseJavaEnumBody()3926 void UnwrappedLineParser::parseJavaEnumBody() {
3927   assert(FormatTok->is(tok::l_brace));
3928   const FormatToken *OpeningBrace = FormatTok;
3929 
3930   // Determine whether the enum is simple, i.e. does not have a semicolon or
3931   // constants with class bodies. Simple enums can be formatted like braced
3932   // lists, contracted to a single line, etc.
3933   unsigned StoredPosition = Tokens->getPosition();
3934   bool IsSimple = true;
3935   FormatToken *Tok = Tokens->getNextToken();
3936   while (Tok->isNot(tok::eof)) {
3937     if (Tok->is(tok::r_brace))
3938       break;
3939     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3940       IsSimple = false;
3941       break;
3942     }
3943     // FIXME: This will also mark enums with braces in the arguments to enum
3944     // constants as "not simple". This is probably fine in practice, though.
3945     Tok = Tokens->getNextToken();
3946   }
3947   FormatTok = Tokens->setPosition(StoredPosition);
3948 
3949   if (IsSimple) {
3950     nextToken();
3951     parseBracedList();
3952     addUnwrappedLine();
3953     return;
3954   }
3955 
3956   // Parse the body of a more complex enum.
3957   // First add a line for everything up to the "{".
3958   nextToken();
3959   addUnwrappedLine();
3960   ++Line->Level;
3961 
3962   // Parse the enum constants.
3963   while (!eof()) {
3964     if (FormatTok->is(tok::l_brace)) {
3965       // Parse the constant's class body.
3966       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3967                  /*MunchSemi=*/false);
3968     } else if (FormatTok->is(tok::l_paren)) {
3969       parseParens();
3970     } else if (FormatTok->is(tok::comma)) {
3971       nextToken();
3972       addUnwrappedLine();
3973     } else if (FormatTok->is(tok::semi)) {
3974       nextToken();
3975       addUnwrappedLine();
3976       break;
3977     } else if (FormatTok->is(tok::r_brace)) {
3978       addUnwrappedLine();
3979       break;
3980     } else {
3981       nextToken();
3982     }
3983   }
3984 
3985   // Parse the class body after the enum's ";" if any.
3986   parseLevel(OpeningBrace);
3987   nextToken();
3988   --Line->Level;
3989   addUnwrappedLine();
3990 }
3991 
parseRecord(bool ParseAsExpr)3992 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3993   const FormatToken &InitialToken = *FormatTok;
3994   nextToken();
3995 
3996   const FormatToken *ClassName = nullptr;
3997   bool IsDerived = false;
3998   auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3999     return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4000   };
4001   // JavaScript/TypeScript supports anonymous classes like:
4002   // a = class extends foo { }
4003   bool JSPastExtendsOrImplements = false;
4004   // The actual identifier can be a nested name specifier, and in macros
4005   // it is often token-pasted.
4006   // An [[attribute]] can be before the identifier.
4007   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4008                             tok::kw_alignas, tok::l_square) ||
4009          FormatTok->isAttribute() ||
4010          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4011           FormatTok->isOneOf(tok::period, tok::comma))) {
4012     if (Style.isJavaScript() &&
4013         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4014       JSPastExtendsOrImplements = true;
4015       // JavaScript/TypeScript supports inline object types in
4016       // extends/implements positions:
4017       //     class Foo implements {bar: number} { }
4018       nextToken();
4019       if (FormatTok->is(tok::l_brace)) {
4020         tryToParseBracedList();
4021         continue;
4022       }
4023     }
4024     if (FormatTok->is(tok::l_square) && handleCppAttributes())
4025       continue;
4026     const auto *Previous = FormatTok;
4027     nextToken();
4028     switch (FormatTok->Tok.getKind()) {
4029     case tok::l_paren:
4030       // We can have macros in between 'class' and the class name.
4031       if (!IsNonMacroIdentifier(Previous) ||
4032           // e.g. `struct macro(a) S { int i; };`
4033           Previous->Previous == &InitialToken) {
4034         parseParens();
4035       }
4036       break;
4037     case tok::coloncolon:
4038     case tok::hashhash:
4039       break;
4040     default:
4041       if (!JSPastExtendsOrImplements && !ClassName &&
4042           Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
4043         ClassName = Previous;
4044       }
4045     }
4046   }
4047 
4048   auto IsListInitialization = [&] {
4049     if (!ClassName || IsDerived)
4050       return false;
4051     assert(FormatTok->is(tok::l_brace));
4052     const auto *Prev = FormatTok->getPreviousNonComment();
4053     assert(Prev);
4054     return Prev != ClassName && Prev->is(tok::identifier) &&
4055            Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4056   };
4057 
4058   if (FormatTok->isOneOf(tok::colon, tok::less)) {
4059     int AngleNestingLevel = 0;
4060     do {
4061       if (FormatTok->is(tok::less))
4062         ++AngleNestingLevel;
4063       else if (FormatTok->is(tok::greater))
4064         --AngleNestingLevel;
4065 
4066       if (AngleNestingLevel == 0) {
4067         if (FormatTok->is(tok::colon)) {
4068           IsDerived = true;
4069         } else if (FormatTok->is(tok::identifier) &&
4070                    FormatTok->Previous->is(tok::coloncolon)) {
4071           ClassName = FormatTok;
4072         } else if (FormatTok->is(tok::l_paren) &&
4073                    IsNonMacroIdentifier(FormatTok->Previous)) {
4074           break;
4075         }
4076       }
4077       if (FormatTok->is(tok::l_brace)) {
4078         if (AngleNestingLevel == 0 && IsListInitialization())
4079           return;
4080         calculateBraceTypes(/*ExpectClassBody=*/true);
4081         if (!tryToParseBracedList())
4082           break;
4083       }
4084       if (FormatTok->is(tok::l_square)) {
4085         FormatToken *Previous = FormatTok->Previous;
4086         if (!Previous || (Previous->isNot(tok::r_paren) &&
4087                           !Previous->isTypeOrIdentifier(LangOpts))) {
4088           // Don't try parsing a lambda if we had a closing parenthesis before,
4089           // it was probably a pointer to an array: int (*)[].
4090           if (!tryToParseLambda())
4091             continue;
4092         } else {
4093           parseSquare();
4094           continue;
4095         }
4096       }
4097       if (FormatTok->is(tok::semi))
4098         return;
4099       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4100         addUnwrappedLine();
4101         nextToken();
4102         parseCSharpGenericTypeConstraint();
4103         break;
4104       }
4105       nextToken();
4106     } while (!eof());
4107   }
4108 
4109   auto GetBraceTypes =
4110       [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4111     switch (RecordTok.Tok.getKind()) {
4112     case tok::kw_class:
4113       return {TT_ClassLBrace, TT_ClassRBrace};
4114     case tok::kw_struct:
4115       return {TT_StructLBrace, TT_StructRBrace};
4116     case tok::kw_union:
4117       return {TT_UnionLBrace, TT_UnionRBrace};
4118     default:
4119       // Useful for e.g. interface.
4120       return {TT_RecordLBrace, TT_RecordRBrace};
4121     }
4122   };
4123   if (FormatTok->is(tok::l_brace)) {
4124     if (IsListInitialization())
4125       return;
4126     auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4127     FormatTok->setFinalizedType(OpenBraceType);
4128     if (ParseAsExpr) {
4129       parseChildBlock();
4130     } else {
4131       if (ShouldBreakBeforeBrace(Style, InitialToken))
4132         addUnwrappedLine();
4133 
4134       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4135       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4136     }
4137     setPreviousRBraceType(ClosingBraceType);
4138   }
4139   // There is no addUnwrappedLine() here so that we fall through to parsing a
4140   // structural element afterwards. Thus, in "class A {} n, m;",
4141   // "} n, m;" will end up in one unwrapped line.
4142 }
4143 
parseObjCMethod()4144 void UnwrappedLineParser::parseObjCMethod() {
4145   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4146          "'(' or identifier expected.");
4147   do {
4148     if (FormatTok->is(tok::semi)) {
4149       nextToken();
4150       addUnwrappedLine();
4151       return;
4152     } else if (FormatTok->is(tok::l_brace)) {
4153       if (Style.BraceWrapping.AfterFunction)
4154         addUnwrappedLine();
4155       parseBlock();
4156       addUnwrappedLine();
4157       return;
4158     } else {
4159       nextToken();
4160     }
4161   } while (!eof());
4162 }
4163 
parseObjCProtocolList()4164 void UnwrappedLineParser::parseObjCProtocolList() {
4165   assert(FormatTok->is(tok::less) && "'<' expected.");
4166   do {
4167     nextToken();
4168     // Early exit in case someone forgot a close angle.
4169     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4170         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4171       return;
4172     }
4173   } while (!eof() && FormatTok->isNot(tok::greater));
4174   nextToken(); // Skip '>'.
4175 }
4176 
parseObjCUntilAtEnd()4177 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4178   do {
4179     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4180       nextToken();
4181       addUnwrappedLine();
4182       break;
4183     }
4184     if (FormatTok->is(tok::l_brace)) {
4185       parseBlock();
4186       // In ObjC interfaces, nothing should be following the "}".
4187       addUnwrappedLine();
4188     } else if (FormatTok->is(tok::r_brace)) {
4189       // Ignore stray "}". parseStructuralElement doesn't consume them.
4190       nextToken();
4191       addUnwrappedLine();
4192     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4193       nextToken();
4194       parseObjCMethod();
4195     } else {
4196       parseStructuralElement();
4197     }
4198   } while (!eof());
4199 }
4200 
parseObjCInterfaceOrImplementation()4201 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4202   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4203          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4204   nextToken();
4205   nextToken(); // interface name
4206 
4207   // @interface can be followed by a lightweight generic
4208   // specialization list, then either a base class or a category.
4209   if (FormatTok->is(tok::less))
4210     parseObjCLightweightGenerics();
4211   if (FormatTok->is(tok::colon)) {
4212     nextToken();
4213     nextToken(); // base class name
4214     // The base class can also have lightweight generics applied to it.
4215     if (FormatTok->is(tok::less))
4216       parseObjCLightweightGenerics();
4217   } else if (FormatTok->is(tok::l_paren)) {
4218     // Skip category, if present.
4219     parseParens();
4220   }
4221 
4222   if (FormatTok->is(tok::less))
4223     parseObjCProtocolList();
4224 
4225   if (FormatTok->is(tok::l_brace)) {
4226     if (Style.BraceWrapping.AfterObjCDeclaration)
4227       addUnwrappedLine();
4228     parseBlock(/*MustBeDeclaration=*/true);
4229   }
4230 
4231   // With instance variables, this puts '}' on its own line.  Without instance
4232   // variables, this ends the @interface line.
4233   addUnwrappedLine();
4234 
4235   parseObjCUntilAtEnd();
4236 }
4237 
parseObjCLightweightGenerics()4238 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4239   assert(FormatTok->is(tok::less));
4240   // Unlike protocol lists, generic parameterizations support
4241   // nested angles:
4242   //
4243   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4244   //     NSObject <NSCopying, NSSecureCoding>
4245   //
4246   // so we need to count how many open angles we have left.
4247   unsigned NumOpenAngles = 1;
4248   do {
4249     nextToken();
4250     // Early exit in case someone forgot a close angle.
4251     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4252         FormatTok->isObjCAtKeyword(tok::objc_end)) {
4253       break;
4254     }
4255     if (FormatTok->is(tok::less)) {
4256       ++NumOpenAngles;
4257     } else if (FormatTok->is(tok::greater)) {
4258       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4259       --NumOpenAngles;
4260     }
4261   } while (!eof() && NumOpenAngles != 0);
4262   nextToken(); // Skip '>'.
4263 }
4264 
4265 // Returns true for the declaration/definition form of @protocol,
4266 // false for the expression form.
parseObjCProtocol()4267 bool UnwrappedLineParser::parseObjCProtocol() {
4268   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4269   nextToken();
4270 
4271   if (FormatTok->is(tok::l_paren)) {
4272     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4273     return false;
4274   }
4275 
4276   // The definition/declaration form,
4277   // @protocol Foo
4278   // - (int)someMethod;
4279   // @end
4280 
4281   nextToken(); // protocol name
4282 
4283   if (FormatTok->is(tok::less))
4284     parseObjCProtocolList();
4285 
4286   // Check for protocol declaration.
4287   if (FormatTok->is(tok::semi)) {
4288     nextToken();
4289     addUnwrappedLine();
4290     return true;
4291   }
4292 
4293   addUnwrappedLine();
4294   parseObjCUntilAtEnd();
4295   return true;
4296 }
4297 
parseJavaScriptEs6ImportExport()4298 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4299   bool IsImport = FormatTok->is(Keywords.kw_import);
4300   assert(IsImport || FormatTok->is(tok::kw_export));
4301   nextToken();
4302 
4303   // Consume the "default" in "export default class/function".
4304   if (FormatTok->is(tok::kw_default))
4305     nextToken();
4306 
4307   // Consume "async function", "function" and "default function", so that these
4308   // get parsed as free-standing JS functions, i.e. do not require a trailing
4309   // semicolon.
4310   if (FormatTok->is(Keywords.kw_async))
4311     nextToken();
4312   if (FormatTok->is(Keywords.kw_function)) {
4313     nextToken();
4314     return;
4315   }
4316 
4317   // For imports, `export *`, `export {...}`, consume the rest of the line up
4318   // to the terminating `;`. For everything else, just return and continue
4319   // parsing the structural element, i.e. the declaration or expression for
4320   // `export default`.
4321   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4322       !FormatTok->isStringLiteral() &&
4323       !(FormatTok->is(Keywords.kw_type) &&
4324         Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4325     return;
4326   }
4327 
4328   while (!eof()) {
4329     if (FormatTok->is(tok::semi))
4330       return;
4331     if (Line->Tokens.empty()) {
4332       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4333       // import statement should terminate.
4334       return;
4335     }
4336     if (FormatTok->is(tok::l_brace)) {
4337       FormatTok->setBlockKind(BK_Block);
4338       nextToken();
4339       parseBracedList();
4340     } else {
4341       nextToken();
4342     }
4343   }
4344 }
4345 
parseStatementMacro()4346 void UnwrappedLineParser::parseStatementMacro() {
4347   nextToken();
4348   if (FormatTok->is(tok::l_paren))
4349     parseParens();
4350   if (FormatTok->is(tok::semi))
4351     nextToken();
4352   addUnwrappedLine();
4353 }
4354 
parseVerilogHierarchyIdentifier()4355 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4356   // consume things like a::`b.c[d:e] or a::*
4357   while (true) {
4358     if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4359                            tok::coloncolon, tok::hash) ||
4360         Keywords.isVerilogIdentifier(*FormatTok)) {
4361       nextToken();
4362     } else if (FormatTok->is(tok::l_square)) {
4363       parseSquare();
4364     } else {
4365       break;
4366     }
4367   }
4368 }
4369 
parseVerilogSensitivityList()4370 void UnwrappedLineParser::parseVerilogSensitivityList() {
4371   if (FormatTok->isNot(tok::at))
4372     return;
4373   nextToken();
4374   // A block event expression has 2 at signs.
4375   if (FormatTok->is(tok::at))
4376     nextToken();
4377   switch (FormatTok->Tok.getKind()) {
4378   case tok::star:
4379     nextToken();
4380     break;
4381   case tok::l_paren:
4382     parseParens();
4383     break;
4384   default:
4385     parseVerilogHierarchyIdentifier();
4386     break;
4387   }
4388 }
4389 
parseVerilogHierarchyHeader()4390 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4391   unsigned AddLevels = 0;
4392 
4393   if (FormatTok->is(Keywords.kw_clocking)) {
4394     nextToken();
4395     if (Keywords.isVerilogIdentifier(*FormatTok))
4396       nextToken();
4397     parseVerilogSensitivityList();
4398     if (FormatTok->is(tok::semi))
4399       nextToken();
4400   } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4401                                 Keywords.kw_casez, Keywords.kw_randcase,
4402                                 Keywords.kw_randsequence)) {
4403     if (Style.IndentCaseLabels)
4404       AddLevels++;
4405     nextToken();
4406     if (FormatTok->is(tok::l_paren)) {
4407       FormatTok->setFinalizedType(TT_ConditionLParen);
4408       parseParens();
4409     }
4410     if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4411       nextToken();
4412     // The case header has no semicolon.
4413   } else {
4414     // "module" etc.
4415     nextToken();
4416     // all the words like the name of the module and specifiers like
4417     // "automatic" and the width of function return type
4418     while (true) {
4419       if (FormatTok->is(tok::l_square)) {
4420         auto Prev = FormatTok->getPreviousNonComment();
4421         if (Prev && Keywords.isVerilogIdentifier(*Prev))
4422           Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4423         parseSquare();
4424       } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4425                  FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4426         nextToken();
4427       } else {
4428         break;
4429       }
4430     }
4431 
4432     auto NewLine = [this]() {
4433       addUnwrappedLine();
4434       Line->IsContinuation = true;
4435     };
4436 
4437     // package imports
4438     while (FormatTok->is(Keywords.kw_import)) {
4439       NewLine();
4440       nextToken();
4441       parseVerilogHierarchyIdentifier();
4442       if (FormatTok->is(tok::semi))
4443         nextToken();
4444     }
4445 
4446     // parameters and ports
4447     if (FormatTok->is(Keywords.kw_verilogHash)) {
4448       NewLine();
4449       nextToken();
4450       if (FormatTok->is(tok::l_paren)) {
4451         FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4452         parseParens();
4453       }
4454     }
4455     if (FormatTok->is(tok::l_paren)) {
4456       NewLine();
4457       FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4458       parseParens();
4459     }
4460 
4461     // extends and implements
4462     if (FormatTok->is(Keywords.kw_extends)) {
4463       NewLine();
4464       nextToken();
4465       parseVerilogHierarchyIdentifier();
4466       if (FormatTok->is(tok::l_paren))
4467         parseParens();
4468     }
4469     if (FormatTok->is(Keywords.kw_implements)) {
4470       NewLine();
4471       do {
4472         nextToken();
4473         parseVerilogHierarchyIdentifier();
4474       } while (FormatTok->is(tok::comma));
4475     }
4476 
4477     // Coverage event for cover groups.
4478     if (FormatTok->is(tok::at)) {
4479       NewLine();
4480       parseVerilogSensitivityList();
4481     }
4482 
4483     if (FormatTok->is(tok::semi))
4484       nextToken(/*LevelDifference=*/1);
4485     addUnwrappedLine();
4486   }
4487 
4488   return AddLevels;
4489 }
4490 
parseVerilogTable()4491 void UnwrappedLineParser::parseVerilogTable() {
4492   assert(FormatTok->is(Keywords.kw_table));
4493   nextToken(/*LevelDifference=*/1);
4494   addUnwrappedLine();
4495 
4496   auto InitialLevel = Line->Level++;
4497   while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4498     FormatToken *Tok = FormatTok;
4499     nextToken();
4500     if (Tok->is(tok::semi))
4501       addUnwrappedLine();
4502     else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4503       Tok->setFinalizedType(TT_VerilogTableItem);
4504   }
4505   Line->Level = InitialLevel;
4506   nextToken(/*LevelDifference=*/-1);
4507   addUnwrappedLine();
4508 }
4509 
parseVerilogCaseLabel()4510 void UnwrappedLineParser::parseVerilogCaseLabel() {
4511   // The label will get unindented in AnnotatingParser. If there are no leading
4512   // spaces, indent the rest here so that things inside the block will be
4513   // indented relative to things outside. We don't use parseLabel because we
4514   // don't know whether this colon is a label or a ternary expression at this
4515   // point.
4516   auto OrigLevel = Line->Level;
4517   auto FirstLine = CurrentLines->size();
4518   if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4519     ++Line->Level;
4520   else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4521     --Line->Level;
4522   parseStructuralElement();
4523   // Restore the indentation in both the new line and the line that has the
4524   // label.
4525   if (CurrentLines->size() > FirstLine)
4526     (*CurrentLines)[FirstLine].Level = OrigLevel;
4527   Line->Level = OrigLevel;
4528 }
4529 
containsExpansion(const UnwrappedLine & Line) const4530 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4531   for (const auto &N : Line.Tokens) {
4532     if (N.Tok->MacroCtx)
4533       return true;
4534     for (const UnwrappedLine &Child : N.Children)
4535       if (containsExpansion(Child))
4536         return true;
4537   }
4538   return false;
4539 }
4540 
addUnwrappedLine(LineLevel AdjustLevel)4541 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4542   if (Line->Tokens.empty())
4543     return;
4544   LLVM_DEBUG({
4545     if (!parsingPPDirective()) {
4546       llvm::dbgs() << "Adding unwrapped line:\n";
4547       printDebugInfo(*Line);
4548     }
4549   });
4550 
4551   // If this line closes a block when in Whitesmiths mode, remember that
4552   // information so that the level can be decreased after the line is added.
4553   // This has to happen after the addition of the line since the line itself
4554   // needs to be indented.
4555   bool ClosesWhitesmithsBlock =
4556       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4557       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4558 
4559   // If the current line was expanded from a macro call, we use it to
4560   // reconstruct an unwrapped line from the structure of the expanded unwrapped
4561   // line and the unexpanded token stream.
4562   if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4563     if (!Reconstruct)
4564       Reconstruct.emplace(Line->Level, Unexpanded);
4565     Reconstruct->addLine(*Line);
4566 
4567     // While the reconstructed unexpanded lines are stored in the normal
4568     // flow of lines, the expanded lines are stored on the side to be analyzed
4569     // in an extra step.
4570     CurrentExpandedLines.push_back(std::move(*Line));
4571 
4572     if (Reconstruct->finished()) {
4573       UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4574       assert(!Reconstructed.Tokens.empty() &&
4575              "Reconstructed must at least contain the macro identifier.");
4576       assert(!parsingPPDirective());
4577       LLVM_DEBUG({
4578         llvm::dbgs() << "Adding unexpanded line:\n";
4579         printDebugInfo(Reconstructed);
4580       });
4581       ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4582       Lines.push_back(std::move(Reconstructed));
4583       CurrentExpandedLines.clear();
4584       Reconstruct.reset();
4585     }
4586   } else {
4587     // At the top level we only get here when no unexpansion is going on, or
4588     // when conditional formatting led to unfinished macro reconstructions.
4589     assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4590     CurrentLines->push_back(std::move(*Line));
4591   }
4592   Line->Tokens.clear();
4593   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4594   Line->FirstStartColumn = 0;
4595   Line->IsContinuation = false;
4596   Line->SeenDecltypeAuto = false;
4597 
4598   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4599     --Line->Level;
4600   if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4601     CurrentLines->append(
4602         std::make_move_iterator(PreprocessorDirectives.begin()),
4603         std::make_move_iterator(PreprocessorDirectives.end()));
4604     PreprocessorDirectives.clear();
4605   }
4606   // Disconnect the current token from the last token on the previous line.
4607   FormatTok->Previous = nullptr;
4608 }
4609 
eof() const4610 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4611 
isOnNewLine(const FormatToken & FormatTok)4612 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4613   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4614          FormatTok.NewlinesBefore > 0;
4615 }
4616 
4617 // Checks if \p FormatTok is a line comment that continues the line comment
4618 // section on \p Line.
4619 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)4620 continuesLineCommentSection(const FormatToken &FormatTok,
4621                             const UnwrappedLine &Line,
4622                             const llvm::Regex &CommentPragmasRegex) {
4623   if (Line.Tokens.empty())
4624     return false;
4625 
4626   StringRef IndentContent = FormatTok.TokenText;
4627   if (FormatTok.TokenText.starts_with("//") ||
4628       FormatTok.TokenText.starts_with("/*")) {
4629     IndentContent = FormatTok.TokenText.substr(2);
4630   }
4631   if (CommentPragmasRegex.match(IndentContent))
4632     return false;
4633 
4634   // If Line starts with a line comment, then FormatTok continues the comment
4635   // section if its original column is greater or equal to the original start
4636   // column of the line.
4637   //
4638   // Define the min column token of a line as follows: if a line ends in '{' or
4639   // contains a '{' followed by a line comment, then the min column token is
4640   // that '{'. Otherwise, the min column token of the line is the first token of
4641   // the line.
4642   //
4643   // If Line starts with a token other than a line comment, then FormatTok
4644   // continues the comment section if its original column is greater than the
4645   // original start column of the min column token of the line.
4646   //
4647   // For example, the second line comment continues the first in these cases:
4648   //
4649   // // first line
4650   // // second line
4651   //
4652   // and:
4653   //
4654   // // first line
4655   //  // second line
4656   //
4657   // and:
4658   //
4659   // int i; // first line
4660   //  // second line
4661   //
4662   // and:
4663   //
4664   // do { // first line
4665   //      // second line
4666   //   int i;
4667   // } while (true);
4668   //
4669   // and:
4670   //
4671   // enum {
4672   //   a, // first line
4673   //    // second line
4674   //   b
4675   // };
4676   //
4677   // The second line comment doesn't continue the first in these cases:
4678   //
4679   //   // first line
4680   //  // second line
4681   //
4682   // and:
4683   //
4684   // int i; // first line
4685   // // second line
4686   //
4687   // and:
4688   //
4689   // do { // first line
4690   //   // second line
4691   //   int i;
4692   // } while (true);
4693   //
4694   // and:
4695   //
4696   // enum {
4697   //   a, // first line
4698   //   // second line
4699   // };
4700   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4701 
4702   // Scan for '{//'. If found, use the column of '{' as a min column for line
4703   // comment section continuation.
4704   const FormatToken *PreviousToken = nullptr;
4705   for (const UnwrappedLineNode &Node : Line.Tokens) {
4706     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4707         isLineComment(*Node.Tok)) {
4708       MinColumnToken = PreviousToken;
4709       break;
4710     }
4711     PreviousToken = Node.Tok;
4712 
4713     // Grab the last newline preceding a token in this unwrapped line.
4714     if (Node.Tok->NewlinesBefore > 0)
4715       MinColumnToken = Node.Tok;
4716   }
4717   if (PreviousToken && PreviousToken->is(tok::l_brace))
4718     MinColumnToken = PreviousToken;
4719 
4720   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4721                               MinColumnToken);
4722 }
4723 
flushComments(bool NewlineBeforeNext)4724 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4725   bool JustComments = Line->Tokens.empty();
4726   for (FormatToken *Tok : CommentsBeforeNextToken) {
4727     // Line comments that belong to the same line comment section are put on the
4728     // same line since later we might want to reflow content between them.
4729     // Additional fine-grained breaking of line comment sections is controlled
4730     // by the class BreakableLineCommentSection in case it is desirable to keep
4731     // several line comment sections in the same unwrapped line.
4732     //
4733     // FIXME: Consider putting separate line comment sections as children to the
4734     // unwrapped line instead.
4735     Tok->ContinuesLineCommentSection =
4736         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4737     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4738       addUnwrappedLine();
4739     pushToken(Tok);
4740   }
4741   if (NewlineBeforeNext && JustComments)
4742     addUnwrappedLine();
4743   CommentsBeforeNextToken.clear();
4744 }
4745 
nextToken(int LevelDifference)4746 void UnwrappedLineParser::nextToken(int LevelDifference) {
4747   if (eof())
4748     return;
4749   flushComments(isOnNewLine(*FormatTok));
4750   pushToken(FormatTok);
4751   FormatToken *Previous = FormatTok;
4752   if (!Style.isJavaScript())
4753     readToken(LevelDifference);
4754   else
4755     readTokenWithJavaScriptASI();
4756   FormatTok->Previous = Previous;
4757   if (Style.isVerilog()) {
4758     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4759     // keywords like `begin`, we can't treat them the same as left braces
4760     // because some contexts require one of them.  For example structs use
4761     // braces and if blocks use keywords, and a left brace can occur in an if
4762     // statement, but it is not a block.  For keywords like `end`, we simply
4763     // treat them the same as right braces.
4764     if (Keywords.isVerilogEnd(*FormatTok))
4765       FormatTok->Tok.setKind(tok::r_brace);
4766   }
4767 }
4768 
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)4769 void UnwrappedLineParser::distributeComments(
4770     const SmallVectorImpl<FormatToken *> &Comments,
4771     const FormatToken *NextTok) {
4772   // Whether or not a line comment token continues a line is controlled by
4773   // the method continuesLineCommentSection, with the following caveat:
4774   //
4775   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4776   // that each comment line from the trail is aligned with the next token, if
4777   // the next token exists. If a trail exists, the beginning of the maximal
4778   // trail is marked as a start of a new comment section.
4779   //
4780   // For example in this code:
4781   //
4782   // int a; // line about a
4783   //   // line 1 about b
4784   //   // line 2 about b
4785   //   int b;
4786   //
4787   // the two lines about b form a maximal trail, so there are two sections, the
4788   // first one consisting of the single comment "// line about a" and the
4789   // second one consisting of the next two comments.
4790   if (Comments.empty())
4791     return;
4792   bool ShouldPushCommentsInCurrentLine = true;
4793   bool HasTrailAlignedWithNextToken = false;
4794   unsigned StartOfTrailAlignedWithNextToken = 0;
4795   if (NextTok) {
4796     // We are skipping the first element intentionally.
4797     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4798       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4799         HasTrailAlignedWithNextToken = true;
4800         StartOfTrailAlignedWithNextToken = i;
4801       }
4802     }
4803   }
4804   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4805     FormatToken *FormatTok = Comments[i];
4806     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4807       FormatTok->ContinuesLineCommentSection = false;
4808     } else {
4809       FormatTok->ContinuesLineCommentSection =
4810           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4811     }
4812     if (!FormatTok->ContinuesLineCommentSection &&
4813         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4814       ShouldPushCommentsInCurrentLine = false;
4815     }
4816     if (ShouldPushCommentsInCurrentLine)
4817       pushToken(FormatTok);
4818     else
4819       CommentsBeforeNextToken.push_back(FormatTok);
4820   }
4821 }
4822 
readToken(int LevelDifference)4823 void UnwrappedLineParser::readToken(int LevelDifference) {
4824   SmallVector<FormatToken *, 1> Comments;
4825   bool PreviousWasComment = false;
4826   bool FirstNonCommentOnLine = false;
4827   do {
4828     FormatTok = Tokens->getNextToken();
4829     assert(FormatTok);
4830     while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4831                               TT_ConflictAlternative)) {
4832       if (FormatTok->is(TT_ConflictStart))
4833         conditionalCompilationStart(/*Unreachable=*/false);
4834       else if (FormatTok->is(TT_ConflictAlternative))
4835         conditionalCompilationAlternative();
4836       else if (FormatTok->is(TT_ConflictEnd))
4837         conditionalCompilationEnd();
4838       FormatTok = Tokens->getNextToken();
4839       FormatTok->MustBreakBefore = true;
4840       FormatTok->MustBreakBeforeFinalized = true;
4841     }
4842 
4843     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4844                                       const FormatToken &Tok,
4845                                       bool PreviousWasComment) {
4846       auto IsFirstOnLine = [](const FormatToken &Tok) {
4847         return Tok.HasUnescapedNewline || Tok.IsFirst;
4848       };
4849 
4850       // Consider preprocessor directives preceded by block comments as first
4851       // on line.
4852       if (PreviousWasComment)
4853         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4854       return IsFirstOnLine(Tok);
4855     };
4856 
4857     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4858         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4859     PreviousWasComment = FormatTok->is(tok::comment);
4860 
4861     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4862            (!Style.isVerilog() ||
4863             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4864            FirstNonCommentOnLine) {
4865       distributeComments(Comments, FormatTok);
4866       Comments.clear();
4867       // If there is an unfinished unwrapped line, we flush the preprocessor
4868       // directives only after that unwrapped line was finished later.
4869       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4870       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4871       assert((LevelDifference >= 0 ||
4872               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4873              "LevelDifference makes Line->Level negative");
4874       Line->Level += LevelDifference;
4875       // Comments stored before the preprocessor directive need to be output
4876       // before the preprocessor directive, at the same level as the
4877       // preprocessor directive, as we consider them to apply to the directive.
4878       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4879           PPBranchLevel > 0) {
4880         Line->Level += PPBranchLevel;
4881       }
4882       assert(Line->Level >= Line->UnbracedBodyLevel);
4883       Line->Level -= Line->UnbracedBodyLevel;
4884       flushComments(isOnNewLine(*FormatTok));
4885       parsePPDirective();
4886       PreviousWasComment = FormatTok->is(tok::comment);
4887       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4888           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4889     }
4890 
4891     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4892         !Line->InPPDirective) {
4893       continue;
4894     }
4895 
4896     if (FormatTok->is(tok::identifier) &&
4897         Macros.defined(FormatTok->TokenText) &&
4898         // FIXME: Allow expanding macros in preprocessor directives.
4899         !Line->InPPDirective) {
4900       FormatToken *ID = FormatTok;
4901       unsigned Position = Tokens->getPosition();
4902 
4903       // To correctly parse the code, we need to replace the tokens of the macro
4904       // call with its expansion.
4905       auto PreCall = std::move(Line);
4906       Line.reset(new UnwrappedLine);
4907       bool OldInExpansion = InExpansion;
4908       InExpansion = true;
4909       // We parse the macro call into a new line.
4910       auto Args = parseMacroCall();
4911       InExpansion = OldInExpansion;
4912       assert(Line->Tokens.front().Tok == ID);
4913       // And remember the unexpanded macro call tokens.
4914       auto UnexpandedLine = std::move(Line);
4915       // Reset to the old line.
4916       Line = std::move(PreCall);
4917 
4918       LLVM_DEBUG({
4919         llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4920         if (Args) {
4921           llvm::dbgs() << "(";
4922           for (const auto &Arg : Args.value())
4923             for (const auto &T : Arg)
4924               llvm::dbgs() << T->TokenText << " ";
4925           llvm::dbgs() << ")";
4926         }
4927         llvm::dbgs() << "\n";
4928       });
4929       if (Macros.objectLike(ID->TokenText) && Args &&
4930           !Macros.hasArity(ID->TokenText, Args->size())) {
4931         // The macro is either
4932         // - object-like, but we got argumnets, or
4933         // - overloaded to be both object-like and function-like, but none of
4934         //   the function-like arities match the number of arguments.
4935         // Thus, expand as object-like macro.
4936         LLVM_DEBUG(llvm::dbgs()
4937                    << "Macro \"" << ID->TokenText
4938                    << "\" not overloaded for arity " << Args->size()
4939                    << "or not function-like, using object-like overload.");
4940         Args.reset();
4941         UnexpandedLine->Tokens.resize(1);
4942         Tokens->setPosition(Position);
4943         nextToken();
4944         assert(!Args && Macros.objectLike(ID->TokenText));
4945       }
4946       if ((!Args && Macros.objectLike(ID->TokenText)) ||
4947           (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4948         // Next, we insert the expanded tokens in the token stream at the
4949         // current position, and continue parsing.
4950         Unexpanded[ID] = std::move(UnexpandedLine);
4951         SmallVector<FormatToken *, 8> Expansion =
4952             Macros.expand(ID, std::move(Args));
4953         if (!Expansion.empty())
4954           FormatTok = Tokens->insertTokens(Expansion);
4955 
4956         LLVM_DEBUG({
4957           llvm::dbgs() << "Expanded: ";
4958           for (const auto &T : Expansion)
4959             llvm::dbgs() << T->TokenText << " ";
4960           llvm::dbgs() << "\n";
4961         });
4962       } else {
4963         LLVM_DEBUG({
4964           llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4965                        << "\", because it was used ";
4966           if (Args)
4967             llvm::dbgs() << "with " << Args->size();
4968           else
4969             llvm::dbgs() << "without";
4970           llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4971         });
4972         Tokens->setPosition(Position);
4973         FormatTok = ID;
4974       }
4975     }
4976 
4977     if (FormatTok->isNot(tok::comment)) {
4978       distributeComments(Comments, FormatTok);
4979       Comments.clear();
4980       return;
4981     }
4982 
4983     Comments.push_back(FormatTok);
4984   } while (!eof());
4985 
4986   distributeComments(Comments, nullptr);
4987   Comments.clear();
4988 }
4989 
4990 namespace {
4991 template <typename Iterator>
pushTokens(Iterator Begin,Iterator End,llvm::SmallVectorImpl<FormatToken * > & Into)4992 void pushTokens(Iterator Begin, Iterator End,
4993                 llvm::SmallVectorImpl<FormatToken *> &Into) {
4994   for (auto I = Begin; I != End; ++I) {
4995     Into.push_back(I->Tok);
4996     for (const auto &Child : I->Children)
4997       pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4998   }
4999 }
5000 } // namespace
5001 
5002 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
parseMacroCall()5003 UnwrappedLineParser::parseMacroCall() {
5004   std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5005   assert(Line->Tokens.empty());
5006   nextToken();
5007   if (FormatTok->isNot(tok::l_paren))
5008     return Args;
5009   unsigned Position = Tokens->getPosition();
5010   FormatToken *Tok = FormatTok;
5011   nextToken();
5012   Args.emplace();
5013   auto ArgStart = std::prev(Line->Tokens.end());
5014 
5015   int Parens = 0;
5016   do {
5017     switch (FormatTok->Tok.getKind()) {
5018     case tok::l_paren:
5019       ++Parens;
5020       nextToken();
5021       break;
5022     case tok::r_paren: {
5023       if (Parens > 0) {
5024         --Parens;
5025         nextToken();
5026         break;
5027       }
5028       Args->push_back({});
5029       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5030       nextToken();
5031       return Args;
5032     }
5033     case tok::comma: {
5034       if (Parens > 0) {
5035         nextToken();
5036         break;
5037       }
5038       Args->push_back({});
5039       pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5040       nextToken();
5041       ArgStart = std::prev(Line->Tokens.end());
5042       break;
5043     }
5044     default:
5045       nextToken();
5046       break;
5047     }
5048   } while (!eof());
5049   Line->Tokens.resize(1);
5050   Tokens->setPosition(Position);
5051   FormatTok = Tok;
5052   return {};
5053 }
5054 
pushToken(FormatToken * Tok)5055 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5056   Line->Tokens.push_back(UnwrappedLineNode(Tok));
5057   if (MustBreakBeforeNextToken) {
5058     Line->Tokens.back().Tok->MustBreakBefore = true;
5059     Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5060     MustBreakBeforeNextToken = false;
5061   }
5062 }
5063 
5064 } // end namespace format
5065 } // end namespace clang
5066