xref: /freebsd/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31   virtual FormatToken *getNextToken() = 0;
32 
33   virtual unsigned getPosition() = 0;
34   virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                          bool MustBeDeclaration)
43       : Line(Line), Stack(Stack) {
44     Line.MustBeDeclaration = MustBeDeclaration;
45     Stack.push_back(MustBeDeclaration);
46   }
47   ~ScopedDeclarationState() {
48     Stack.pop_back();
49     if (!Stack.empty())
50       Line.MustBeDeclaration = Stack.back();
51     else
52       Line.MustBeDeclaration = true;
53   }
54 
55 private:
56   UnwrappedLine &Line;
57   std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68                                  const FormatToken *Previous,
69                                  const FormatToken *MinColumnToken) {
70   if (!Previous || !MinColumnToken)
71     return false;
72   unsigned MinContinueColumn =
73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75          isLineComment(*Previous) &&
76          FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                    FormatToken *&ResetToken)
83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85         Token(nullptr), PreviousToken(nullptr) {
86     FakeEOF.Tok.startToken();
87     FakeEOF.Tok.setKind(tok::eof);
88     TokenSource = this;
89     Line.Level = 0;
90     Line.InPPDirective = true;
91   }
92 
93   ~ScopedMacroState() override {
94     TokenSource = PreviousTokenSource;
95     ResetToken = Token;
96     Line.InPPDirective = false;
97     Line.Level = PreviousLineLevel;
98   }
99 
100   FormatToken *getNextToken() override {
101     // The \c UnwrappedLineParser guards against this by never calling
102     // \c getNextToken() after it has encountered the first eof token.
103     assert(!eof());
104     PreviousToken = Token;
105     Token = PreviousTokenSource->getNextToken();
106     if (eof())
107       return &FakeEOF;
108     return Token;
109   }
110 
111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113   FormatToken *setPosition(unsigned Position) override {
114     PreviousToken = nullptr;
115     Token = PreviousTokenSource->setPosition(Position);
116     return Token;
117   }
118 
119 private:
120   bool eof() {
121     return Token && Token->HasUnescapedNewline &&
122            !continuesLineComment(*Token, PreviousToken,
123                                  /*MinColumnToken=*/PreviousToken);
124   }
125 
126   FormatToken FakeEOF;
127   UnwrappedLine &Line;
128   FormatTokenSource *&TokenSource;
129   FormatToken *&ResetToken;
130   unsigned PreviousLineLevel;
131   FormatTokenSource *PreviousTokenSource;
132 
133   FormatToken *Token;
134   FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
139 class ScopedLineState {
140 public:
141   ScopedLineState(UnwrappedLineParser &Parser,
142                   bool SwitchToPreprocessorLines = false)
143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144     if (SwitchToPreprocessorLines)
145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
146     else if (!Parser.Line->Tokens.empty())
147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148     PreBlockLine = std::move(Parser.Line);
149     Parser.Line = std::make_unique<UnwrappedLine>();
150     Parser.Line->Level = PreBlockLine->Level;
151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152   }
153 
154   ~ScopedLineState() {
155     if (!Parser.Line->Tokens.empty()) {
156       Parser.addUnwrappedLine();
157     }
158     assert(Parser.Line->Tokens.empty());
159     Parser.Line = std::move(PreBlockLine);
160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161       Parser.MustBreakBeforeNextToken = true;
162     Parser.CurrentLines = OriginalLines;
163   }
164 
165 private:
166   UnwrappedLineParser &Parser;
167 
168   std::unique_ptr<UnwrappedLine> PreBlockLine;
169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
172 class CompoundStatementIndenter {
173 public:
174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
175                             const FormatStyle &Style, unsigned &LineLevel)
176       : CompoundStatementIndenter(Parser, LineLevel,
177                                   Style.BraceWrapping.AfterControlStatement,
178                                   Style.BraceWrapping.IndentBraces) {}
179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                             bool WrapBrace, bool IndentBrace)
181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182     if (WrapBrace)
183       Parser->addUnwrappedLine();
184     if (IndentBrace)
185       ++LineLevel;
186   }
187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190   unsigned &LineLevel;
191   unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199       : Tokens(Tokens), Position(-1) {}
200 
201   FormatToken *getNextToken() override {
202     ++Position;
203     return Tokens[Position];
204   }
205 
206   unsigned getPosition() override {
207     assert(Position >= 0);
208     return Position;
209   }
210 
211   FormatToken *setPosition(unsigned P) override {
212     Position = P;
213     return Tokens[Position];
214   }
215 
216   void reset() { Position = -1; }
217 
218 private:
219   ArrayRef<FormatToken *> Tokens;
220   int Position;
221 };
222 
223 } // end anonymous namespace
224 
225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                          const AdditionalKeywords &Keywords,
227                                          unsigned FirstStartColumn,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                        ? IG_Rejected
236                        : IG_Inited),
237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240   PPBranchLevel = -1;
241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                      ? IG_Rejected
243                      : IG_Inited;
244   IncludeGuardToken = nullptr;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     LLVM_DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267 
268     // If we found an include guard then all preprocessor directives (other than
269     // the guard) are over-indented by one.
270     if (IncludeGuard == IG_Found)
271       for (auto &Line : Lines)
272         if (Line.InPPDirective && Line.Level > 0)
273           --Line.Level;
274 
275     // Create line with eof token.
276     pushToken(FormatTok);
277     addUnwrappedLine();
278 
279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                   E = Lines.end();
281          I != E; ++I) {
282       Callback.consumeUnwrappedLine(*I);
283     }
284     Callback.finishRun();
285     Lines.clear();
286     while (!PPLevelBranchIndex.empty() &&
287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290     }
291     if (!PPLevelBranchIndex.empty()) {
292       ++PPLevelBranchIndex.back();
293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295     }
296   } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300   // The top-level context in a file always has declarations, except for pre-
301   // processor directives and JavaScript files.
302   bool MustBeDeclaration =
303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                           MustBeDeclaration);
306   if (Style.Language == FormatStyle::LK_TextProto)
307     parseBracedList();
308   else
309     parseLevel(/*HasOpeningBrace=*/false);
310   // Make sure to format the remaining tokens.
311   //
312   // LK_TextProto is special since its top-level is parsed as the body of a
313   // braced list, which does not necessarily have natural line separators such
314   // as a semicolon. Comments after the last entry that have been determined to
315   // not belong to that line, as in:
316   //   key: value
317   //   // endfile comment
318   // do not have a chance to be put on a line of their own until this point.
319   // Here we add this newline before end-of-file comments.
320   if (Style.Language == FormatStyle::LK_TextProto &&
321       !CommentsBeforeNextToken.empty())
322     addUnwrappedLine();
323   flushComments(true);
324   addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::l_brace:
331       return;
332     default:
333       if (FormatTok->is(Keywords.kw_where)) {
334         addUnwrappedLine();
335         nextToken();
336         parseCSharpGenericTypeConstraint();
337         break;
338       }
339       nextToken();
340       break;
341     }
342   } while (!eof());
343 }
344 
345 void UnwrappedLineParser::parseCSharpAttribute() {
346   int UnpairedSquareBrackets = 1;
347   do {
348     switch (FormatTok->Tok.getKind()) {
349     case tok::r_square:
350       nextToken();
351       --UnpairedSquareBrackets;
352       if (UnpairedSquareBrackets == 0) {
353         addUnwrappedLine();
354         return;
355       }
356       break;
357     case tok::l_square:
358       ++UnpairedSquareBrackets;
359       nextToken();
360       break;
361     default:
362       nextToken();
363       break;
364     }
365   } while (!eof());
366 }
367 
368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369   bool SwitchLabelEncountered = false;
370   do {
371     tok::TokenKind kind = FormatTok->Tok.getKind();
372     if (FormatTok->getType() == TT_MacroBlockBegin) {
373       kind = tok::l_brace;
374     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375       kind = tok::r_brace;
376     }
377 
378     switch (kind) {
379     case tok::comment:
380       nextToken();
381       addUnwrappedLine();
382       break;
383     case tok::l_brace:
384       // FIXME: Add parameter whether this can happen - if this happens, we must
385       // be in a non-declaration context.
386       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387         continue;
388       parseBlock(/*MustBeDeclaration=*/false);
389       addUnwrappedLine();
390       break;
391     case tok::r_brace:
392       if (HasOpeningBrace)
393         return;
394       nextToken();
395       addUnwrappedLine();
396       break;
397     case tok::kw_default: {
398       unsigned StoredPosition = Tokens->getPosition();
399       FormatToken *Next;
400       do {
401         Next = Tokens->getNextToken();
402       } while (Next && Next->is(tok::comment));
403       FormatTok = Tokens->setPosition(StoredPosition);
404       if (Next && Next->isNot(tok::colon)) {
405         // default not followed by ':' is not a case label; treat it like
406         // an identifier.
407         parseStructuralElement();
408         break;
409       }
410       // Else, if it is 'default:', fall through to the case handling.
411       LLVM_FALLTHROUGH;
412     }
413     case tok::kw_case:
414       if (Style.Language == FormatStyle::LK_JavaScript &&
415           Line->MustBeDeclaration) {
416         // A 'case: string' style field declaration.
417         parseStructuralElement();
418         break;
419       }
420       if (!SwitchLabelEncountered &&
421           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422         ++Line->Level;
423       SwitchLabelEncountered = true;
424       parseStructuralElement();
425       break;
426     case tok::l_square:
427       if (Style.isCSharp()) {
428         nextToken();
429         parseCSharpAttribute();
430         break;
431       }
432       LLVM_FALLTHROUGH;
433     default:
434       parseStructuralElement();
435       break;
436     }
437   } while (!eof());
438 }
439 
440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441   // We'll parse forward through the tokens until we hit
442   // a closing brace or eof - note that getNextToken() will
443   // parse macros, so this will magically work inside macro
444   // definitions, too.
445   unsigned StoredPosition = Tokens->getPosition();
446   FormatToken *Tok = FormatTok;
447   const FormatToken *PrevTok = Tok->Previous;
448   // Keep a stack of positions of lbrace tokens. We will
449   // update information about whether an lbrace starts a
450   // braced init list or a different block during the loop.
451   SmallVector<FormatToken *, 8> LBraceStack;
452   assert(Tok->Tok.is(tok::l_brace));
453   do {
454     // Get next non-comment token.
455     FormatToken *NextTok;
456     unsigned ReadTokens = 0;
457     do {
458       NextTok = Tokens->getNextToken();
459       ++ReadTokens;
460     } while (NextTok->is(tok::comment));
461 
462     switch (Tok->Tok.getKind()) {
463     case tok::l_brace:
464       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465         if (PrevTok->isOneOf(tok::colon, tok::less))
466           // A ':' indicates this code is in a type, or a braced list
467           // following a label in an object literal ({a: {b: 1}}).
468           // A '<' could be an object used in a comparison, but that is nonsense
469           // code (can never return true), so more likely it is a generic type
470           // argument (`X<{a: string; b: number}>`).
471           // The code below could be confused by semicolons between the
472           // individual members in a type member list, which would normally
473           // trigger BK_Block. In both cases, this must be parsed as an inline
474           // braced init.
475           Tok->BlockKind = BK_BracedInit;
476         else if (PrevTok->is(tok::r_paren))
477           // `) { }` can only occur in function or method declarations in JS.
478           Tok->BlockKind = BK_Block;
479       } else {
480         Tok->BlockKind = BK_Unknown;
481       }
482       LBraceStack.push_back(Tok);
483       break;
484     case tok::r_brace:
485       if (LBraceStack.empty())
486         break;
487       if (LBraceStack.back()->BlockKind == BK_Unknown) {
488         bool ProbablyBracedList = false;
489         if (Style.Language == FormatStyle::LK_Proto) {
490           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491         } else {
492           // Using OriginalColumn to distinguish between ObjC methods and
493           // binary operators is a bit hacky.
494           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495                                   NextTok->OriginalColumn == 0;
496 
497           // If there is a comma, semicolon or right paren after the closing
498           // brace, we assume this is a braced initializer list.  Note that
499           // regardless how we mark inner braces here, we will overwrite the
500           // BlockKind later if we parse a braced list (where all blocks
501           // inside are by default braced lists), or when we explicitly detect
502           // blocks (for example while parsing lambdas).
503           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504           // braced list in JS.
505           ProbablyBracedList =
506               (Style.Language == FormatStyle::LK_JavaScript &&
507                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508                                 Keywords.kw_as)) ||
509               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511                                tok::r_paren, tok::r_square, tok::l_brace,
512                                tok::ellipsis) ||
513               (NextTok->is(tok::identifier) &&
514                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515               (NextTok->is(tok::semi) &&
516                (!ExpectClassBody || LBraceStack.size() != 1)) ||
517               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519             // We can have an array subscript after a braced init
520             // list, but C++11 attributes are expected after blocks.
521             NextTok = Tokens->getNextToken();
522             ++ReadTokens;
523             ProbablyBracedList = NextTok->isNot(tok::l_square);
524           }
525         }
526         if (ProbablyBracedList) {
527           Tok->BlockKind = BK_BracedInit;
528           LBraceStack.back()->BlockKind = BK_BracedInit;
529         } else {
530           Tok->BlockKind = BK_Block;
531           LBraceStack.back()->BlockKind = BK_Block;
532         }
533       }
534       LBraceStack.pop_back();
535       break;
536     case tok::identifier:
537       if (!Tok->is(TT_StatementMacro))
538         break;
539       LLVM_FALLTHROUGH;
540     case tok::at:
541     case tok::semi:
542     case tok::kw_if:
543     case tok::kw_while:
544     case tok::kw_for:
545     case tok::kw_switch:
546     case tok::kw_try:
547     case tok::kw___try:
548       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
549         LBraceStack.back()->BlockKind = BK_Block;
550       break;
551     default:
552       break;
553     }
554     PrevTok = Tok;
555     Tok = NextTok;
556   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557 
558   // Assume other blocks for all unclosed opening braces.
559   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560     if (LBraceStack[i]->BlockKind == BK_Unknown)
561       LBraceStack[i]->BlockKind = BK_Block;
562   }
563 
564   FormatTok = Tokens->setPosition(StoredPosition);
565 }
566 
567 template <class T>
568 static inline void hash_combine(std::size_t &seed, const T &v) {
569   std::hash<T> hasher;
570   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572 
573 size_t UnwrappedLineParser::computePPHash() const {
574   size_t h = 0;
575   for (const auto &i : PPStack) {
576     hash_combine(h, size_t(i.Kind));
577     hash_combine(h, i.Line);
578   }
579   return h;
580 }
581 
582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
583                                      bool MunchSemi) {
584   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
585          "'{' or macro block token expected");
586   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
587   FormatTok->BlockKind = BK_Block;
588 
589   size_t PPStartHash = computePPHash();
590 
591   unsigned InitialLevel = Line->Level;
592   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
593 
594   if (MacroBlock && FormatTok->is(tok::l_paren))
595     parseParens();
596 
597   size_t NbPreprocessorDirectives =
598       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
599   addUnwrappedLine();
600   size_t OpeningLineIndex =
601       CurrentLines->empty()
602           ? (UnwrappedLine::kInvalidIndex)
603           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
604 
605   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
606                                           MustBeDeclaration);
607   if (AddLevel)
608     ++Line->Level;
609   parseLevel(/*HasOpeningBrace=*/true);
610 
611   if (eof())
612     return;
613 
614   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
615                  : !FormatTok->is(tok::r_brace)) {
616     Line->Level = InitialLevel;
617     FormatTok->BlockKind = BK_Block;
618     return;
619   }
620 
621   size_t PPEndHash = computePPHash();
622 
623   // Munch the closing brace.
624   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
625 
626   if (MacroBlock && FormatTok->is(tok::l_paren))
627     parseParens();
628 
629   if (MunchSemi && FormatTok->Tok.is(tok::semi))
630     nextToken();
631   Line->Level = InitialLevel;
632 
633   if (PPStartHash == PPEndHash) {
634     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
635     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
636       // Update the opening line to add the forward reference as well
637       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
638           CurrentLines->size() - 1;
639     }
640   }
641 }
642 
643 static bool isGoogScope(const UnwrappedLine &Line) {
644   // FIXME: Closure-library specific stuff should not be hard-coded but be
645   // configurable.
646   if (Line.Tokens.size() < 4)
647     return false;
648   auto I = Line.Tokens.begin();
649   if (I->Tok->TokenText != "goog")
650     return false;
651   ++I;
652   if (I->Tok->isNot(tok::period))
653     return false;
654   ++I;
655   if (I->Tok->TokenText != "scope")
656     return false;
657   ++I;
658   return I->Tok->is(tok::l_paren);
659 }
660 
661 static bool isIIFE(const UnwrappedLine &Line,
662                    const AdditionalKeywords &Keywords) {
663   // Look for the start of an immediately invoked anonymous function.
664   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
665   // This is commonly done in JavaScript to create a new, anonymous scope.
666   // Example: (function() { ... })()
667   if (Line.Tokens.size() < 3)
668     return false;
669   auto I = Line.Tokens.begin();
670   if (I->Tok->isNot(tok::l_paren))
671     return false;
672   ++I;
673   if (I->Tok->isNot(Keywords.kw_function))
674     return false;
675   ++I;
676   return I->Tok->is(tok::l_paren);
677 }
678 
679 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
680                                    const FormatToken &InitialToken) {
681   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
682     return Style.BraceWrapping.AfterNamespace;
683   if (InitialToken.is(tok::kw_class))
684     return Style.BraceWrapping.AfterClass;
685   if (InitialToken.is(tok::kw_union))
686     return Style.BraceWrapping.AfterUnion;
687   if (InitialToken.is(tok::kw_struct))
688     return Style.BraceWrapping.AfterStruct;
689   return false;
690 }
691 
692 void UnwrappedLineParser::parseChildBlock() {
693   FormatTok->BlockKind = BK_Block;
694   nextToken();
695   {
696     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
697                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
698     ScopedLineState LineState(*this);
699     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
700                                             /*MustBeDeclaration=*/false);
701     Line->Level += SkipIndent ? 0 : 1;
702     parseLevel(/*HasOpeningBrace=*/true);
703     flushComments(isOnNewLine(*FormatTok));
704     Line->Level -= SkipIndent ? 0 : 1;
705   }
706   nextToken();
707 }
708 
709 void UnwrappedLineParser::parsePPDirective() {
710   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
711   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
712 
713   nextToken();
714 
715   if (!FormatTok->Tok.getIdentifierInfo()) {
716     parsePPUnknown();
717     return;
718   }
719 
720   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
721   case tok::pp_define:
722     parsePPDefine();
723     return;
724   case tok::pp_if:
725     parsePPIf(/*IfDef=*/false);
726     break;
727   case tok::pp_ifdef:
728   case tok::pp_ifndef:
729     parsePPIf(/*IfDef=*/true);
730     break;
731   case tok::pp_else:
732     parsePPElse();
733     break;
734   case tok::pp_elif:
735     parsePPElIf();
736     break;
737   case tok::pp_endif:
738     parsePPEndIf();
739     break;
740   default:
741     parsePPUnknown();
742     break;
743   }
744 }
745 
746 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
747   size_t Line = CurrentLines->size();
748   if (CurrentLines == &PreprocessorDirectives)
749     Line += Lines.size();
750 
751   if (Unreachable ||
752       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
753     PPStack.push_back({PP_Unreachable, Line});
754   else
755     PPStack.push_back({PP_Conditional, Line});
756 }
757 
758 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
759   ++PPBranchLevel;
760   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
761   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
762     PPLevelBranchIndex.push_back(0);
763     PPLevelBranchCount.push_back(0);
764   }
765   PPChainBranchIndex.push(0);
766   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
767   conditionalCompilationCondition(Unreachable || Skip);
768 }
769 
770 void UnwrappedLineParser::conditionalCompilationAlternative() {
771   if (!PPStack.empty())
772     PPStack.pop_back();
773   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
774   if (!PPChainBranchIndex.empty())
775     ++PPChainBranchIndex.top();
776   conditionalCompilationCondition(
777       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
778       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
779 }
780 
781 void UnwrappedLineParser::conditionalCompilationEnd() {
782   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
783   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
784     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
785       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
786     }
787   }
788   // Guard against #endif's without #if.
789   if (PPBranchLevel > -1)
790     --PPBranchLevel;
791   if (!PPChainBranchIndex.empty())
792     PPChainBranchIndex.pop();
793   if (!PPStack.empty())
794     PPStack.pop_back();
795 }
796 
797 void UnwrappedLineParser::parsePPIf(bool IfDef) {
798   bool IfNDef = FormatTok->is(tok::pp_ifndef);
799   nextToken();
800   bool Unreachable = false;
801   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
802     Unreachable = true;
803   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
804     Unreachable = true;
805   conditionalCompilationStart(Unreachable);
806   FormatToken *IfCondition = FormatTok;
807   // If there's a #ifndef on the first line, and the only lines before it are
808   // comments, it could be an include guard.
809   bool MaybeIncludeGuard = IfNDef;
810   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
811     for (auto &Line : Lines) {
812       if (!Line.Tokens.front().Tok->is(tok::comment)) {
813         MaybeIncludeGuard = false;
814         IncludeGuard = IG_Rejected;
815         break;
816       }
817     }
818   --PPBranchLevel;
819   parsePPUnknown();
820   ++PPBranchLevel;
821   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
822     IncludeGuard = IG_IfNdefed;
823     IncludeGuardToken = IfCondition;
824   }
825 }
826 
827 void UnwrappedLineParser::parsePPElse() {
828   // If a potential include guard has an #else, it's not an include guard.
829   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
830     IncludeGuard = IG_Rejected;
831   conditionalCompilationAlternative();
832   if (PPBranchLevel > -1)
833     --PPBranchLevel;
834   parsePPUnknown();
835   ++PPBranchLevel;
836 }
837 
838 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
839 
840 void UnwrappedLineParser::parsePPEndIf() {
841   conditionalCompilationEnd();
842   parsePPUnknown();
843   // If the #endif of a potential include guard is the last thing in the file,
844   // then we found an include guard.
845   unsigned TokenPosition = Tokens->getPosition();
846   FormatToken *PeekNext = AllTokens[TokenPosition];
847   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
848       PeekNext->is(tok::eof) &&
849       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
850     IncludeGuard = IG_Found;
851 }
852 
853 void UnwrappedLineParser::parsePPDefine() {
854   nextToken();
855 
856   if (!FormatTok->Tok.getIdentifierInfo()) {
857     IncludeGuard = IG_Rejected;
858     IncludeGuardToken = nullptr;
859     parsePPUnknown();
860     return;
861   }
862 
863   if (IncludeGuard == IG_IfNdefed &&
864       IncludeGuardToken->TokenText == FormatTok->TokenText) {
865     IncludeGuard = IG_Defined;
866     IncludeGuardToken = nullptr;
867     for (auto &Line : Lines) {
868       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
869         IncludeGuard = IG_Rejected;
870         break;
871       }
872     }
873   }
874 
875   nextToken();
876   if (FormatTok->Tok.getKind() == tok::l_paren &&
877       FormatTok->WhitespaceRange.getBegin() ==
878           FormatTok->WhitespaceRange.getEnd()) {
879     parseParens();
880   }
881   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
882     Line->Level += PPBranchLevel + 1;
883   addUnwrappedLine();
884   ++Line->Level;
885 
886   // Errors during a preprocessor directive can only affect the layout of the
887   // preprocessor directive, and thus we ignore them. An alternative approach
888   // would be to use the same approach we use on the file level (no
889   // re-indentation if there was a structural error) within the macro
890   // definition.
891   parseFile();
892 }
893 
894 void UnwrappedLineParser::parsePPUnknown() {
895   do {
896     nextToken();
897   } while (!eof());
898   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
899     Line->Level += PPBranchLevel + 1;
900   addUnwrappedLine();
901 }
902 
903 // Here we exclude certain tokens that are not usually the first token in an
904 // unwrapped line. This is used in attempt to distinguish macro calls without
905 // trailing semicolons from other constructs split to several lines.
906 static bool tokenCanStartNewLine(const FormatToken &Tok) {
907   // Semicolon can be a null-statement, l_square can be a start of a macro or
908   // a C++11 attribute, but this doesn't seem to be common.
909   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
910          Tok.isNot(TT_AttributeSquare) &&
911          // Tokens that can only be used as binary operators and a part of
912          // overloaded operator names.
913          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
914          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
915          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
916          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
917          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
918          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
919          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
920          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
921          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
922          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
923          Tok.isNot(tok::lesslessequal) &&
924          // Colon is used in labels, base class lists, initializer lists,
925          // range-based for loops, ternary operator, but should never be the
926          // first token in an unwrapped line.
927          Tok.isNot(tok::colon) &&
928          // 'noexcept' is a trailing annotation.
929          Tok.isNot(tok::kw_noexcept);
930 }
931 
932 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
933                           const FormatToken *FormatTok) {
934   // FIXME: This returns true for C/C++ keywords like 'struct'.
935   return FormatTok->is(tok::identifier) &&
936          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
937           !FormatTok->isOneOf(
938               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
939               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
940               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
941               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
942               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
943               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
944               Keywords.kw_from));
945 }
946 
947 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
948                                  const FormatToken *FormatTok) {
949   return FormatTok->Tok.isLiteral() ||
950          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
951          mustBeJSIdent(Keywords, FormatTok);
952 }
953 
954 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
955 // when encountered after a value (see mustBeJSIdentOrValue).
956 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
957                            const FormatToken *FormatTok) {
958   return FormatTok->isOneOf(
959       tok::kw_return, Keywords.kw_yield,
960       // conditionals
961       tok::kw_if, tok::kw_else,
962       // loops
963       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
964       // switch/case
965       tok::kw_switch, tok::kw_case,
966       // exceptions
967       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
968       // declaration
969       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
970       Keywords.kw_async, Keywords.kw_function,
971       // import/export
972       Keywords.kw_import, tok::kw_export);
973 }
974 
975 // readTokenWithJavaScriptASI reads the next token and terminates the current
976 // line if JavaScript Automatic Semicolon Insertion must
977 // happen between the current token and the next token.
978 //
979 // This method is conservative - it cannot cover all edge cases of JavaScript,
980 // but only aims to correctly handle certain well known cases. It *must not*
981 // return true in speculative cases.
982 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
983   FormatToken *Previous = FormatTok;
984   readToken();
985   FormatToken *Next = FormatTok;
986 
987   bool IsOnSameLine =
988       CommentsBeforeNextToken.empty()
989           ? Next->NewlinesBefore == 0
990           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
991   if (IsOnSameLine)
992     return;
993 
994   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
995   bool PreviousStartsTemplateExpr =
996       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
997   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
998     // If the line contains an '@' sign, the previous token might be an
999     // annotation, which can precede another identifier/value.
1000     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1001                               [](UnwrappedLineNode &LineNode) {
1002                                 return LineNode.Tok->is(tok::at);
1003                               }) != Line->Tokens.end();
1004     if (HasAt)
1005       return;
1006   }
1007   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1008     return addUnwrappedLine();
1009   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1010   bool NextEndsTemplateExpr =
1011       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1012   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1013       (PreviousMustBeValue ||
1014        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1015                          tok::minusminus)))
1016     return addUnwrappedLine();
1017   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1018       isJSDeclOrStmt(Keywords, Next))
1019     return addUnwrappedLine();
1020 }
1021 
1022 void UnwrappedLineParser::parseStructuralElement() {
1023   assert(!FormatTok->is(tok::l_brace));
1024   if (Style.Language == FormatStyle::LK_TableGen &&
1025       FormatTok->is(tok::pp_include)) {
1026     nextToken();
1027     if (FormatTok->is(tok::string_literal))
1028       nextToken();
1029     addUnwrappedLine();
1030     return;
1031   }
1032   switch (FormatTok->Tok.getKind()) {
1033   case tok::kw_asm:
1034     nextToken();
1035     if (FormatTok->is(tok::l_brace)) {
1036       FormatTok->setType(TT_InlineASMBrace);
1037       nextToken();
1038       while (FormatTok && FormatTok->isNot(tok::eof)) {
1039         if (FormatTok->is(tok::r_brace)) {
1040           FormatTok->setType(TT_InlineASMBrace);
1041           nextToken();
1042           addUnwrappedLine();
1043           break;
1044         }
1045         FormatTok->Finalized = true;
1046         nextToken();
1047       }
1048     }
1049     break;
1050   case tok::kw_namespace:
1051     parseNamespace();
1052     return;
1053   case tok::kw_public:
1054   case tok::kw_protected:
1055   case tok::kw_private:
1056     if (Style.Language == FormatStyle::LK_Java ||
1057         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1058       nextToken();
1059     else
1060       parseAccessSpecifier();
1061     return;
1062   case tok::kw_if:
1063     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1064       // field/method declaration.
1065       break;
1066     parseIfThenElse();
1067     return;
1068   case tok::kw_for:
1069   case tok::kw_while:
1070     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1071       // field/method declaration.
1072       break;
1073     parseForOrWhileLoop();
1074     return;
1075   case tok::kw_do:
1076     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1077       // field/method declaration.
1078       break;
1079     parseDoWhile();
1080     return;
1081   case tok::kw_switch:
1082     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1083       // 'switch: string' field declaration.
1084       break;
1085     parseSwitch();
1086     return;
1087   case tok::kw_default:
1088     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1089       // 'default: string' field declaration.
1090       break;
1091     nextToken();
1092     if (FormatTok->is(tok::colon)) {
1093       parseLabel();
1094       return;
1095     }
1096     // e.g. "default void f() {}" in a Java interface.
1097     break;
1098   case tok::kw_case:
1099     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1100       // 'case: string' field declaration.
1101       break;
1102     parseCaseLabel();
1103     return;
1104   case tok::kw_try:
1105   case tok::kw___try:
1106     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1107       // field/method declaration.
1108       break;
1109     parseTryCatch();
1110     return;
1111   case tok::kw_extern:
1112     nextToken();
1113     if (FormatTok->Tok.is(tok::string_literal)) {
1114       nextToken();
1115       if (FormatTok->Tok.is(tok::l_brace)) {
1116         if (!Style.IndentExternBlock) {
1117           if (Style.BraceWrapping.AfterExternBlock) {
1118             addUnwrappedLine();
1119           }
1120           parseBlock(/*MustBeDeclaration=*/true,
1121                      /*AddLevel=*/Style.BraceWrapping.AfterExternBlock);
1122         } else {
1123           parseBlock(/*MustBeDeclaration=*/true,
1124                      /*AddLevel=*/Style.IndentExternBlock ==
1125                          FormatStyle::IEBS_Indent);
1126         }
1127         addUnwrappedLine();
1128         return;
1129       }
1130     }
1131     break;
1132   case tok::kw_export:
1133     if (Style.Language == FormatStyle::LK_JavaScript) {
1134       parseJavaScriptEs6ImportExport();
1135       return;
1136     }
1137     if (!Style.isCpp())
1138       break;
1139     // Handle C++ "(inline|export) namespace".
1140     LLVM_FALLTHROUGH;
1141   case tok::kw_inline:
1142     nextToken();
1143     if (FormatTok->Tok.is(tok::kw_namespace)) {
1144       parseNamespace();
1145       return;
1146     }
1147     break;
1148   case tok::identifier:
1149     if (FormatTok->is(TT_ForEachMacro)) {
1150       parseForOrWhileLoop();
1151       return;
1152     }
1153     if (FormatTok->is(TT_MacroBlockBegin)) {
1154       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1155                  /*MunchSemi=*/false);
1156       return;
1157     }
1158     if (FormatTok->is(Keywords.kw_import)) {
1159       if (Style.Language == FormatStyle::LK_JavaScript) {
1160         parseJavaScriptEs6ImportExport();
1161         return;
1162       }
1163       if (Style.Language == FormatStyle::LK_Proto) {
1164         nextToken();
1165         if (FormatTok->is(tok::kw_public))
1166           nextToken();
1167         if (!FormatTok->is(tok::string_literal))
1168           return;
1169         nextToken();
1170         if (FormatTok->is(tok::semi))
1171           nextToken();
1172         addUnwrappedLine();
1173         return;
1174       }
1175     }
1176     if (Style.isCpp() &&
1177         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1178                            Keywords.kw_slots, Keywords.kw_qslots)) {
1179       nextToken();
1180       if (FormatTok->is(tok::colon)) {
1181         nextToken();
1182         addUnwrappedLine();
1183         return;
1184       }
1185     }
1186     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1187       parseStatementMacro();
1188       return;
1189     }
1190     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1191       parseNamespace();
1192       return;
1193     }
1194     // In all other cases, parse the declaration.
1195     break;
1196   default:
1197     break;
1198   }
1199   do {
1200     const FormatToken *Previous = FormatTok->Previous;
1201     switch (FormatTok->Tok.getKind()) {
1202     case tok::at:
1203       nextToken();
1204       if (FormatTok->Tok.is(tok::l_brace)) {
1205         nextToken();
1206         parseBracedList();
1207         break;
1208       } else if (Style.Language == FormatStyle::LK_Java &&
1209                  FormatTok->is(Keywords.kw_interface)) {
1210         nextToken();
1211         break;
1212       }
1213       switch (FormatTok->Tok.getObjCKeywordID()) {
1214       case tok::objc_public:
1215       case tok::objc_protected:
1216       case tok::objc_package:
1217       case tok::objc_private:
1218         return parseAccessSpecifier();
1219       case tok::objc_interface:
1220       case tok::objc_implementation:
1221         return parseObjCInterfaceOrImplementation();
1222       case tok::objc_protocol:
1223         if (parseObjCProtocol())
1224           return;
1225         break;
1226       case tok::objc_end:
1227         return; // Handled by the caller.
1228       case tok::objc_optional:
1229       case tok::objc_required:
1230         nextToken();
1231         addUnwrappedLine();
1232         return;
1233       case tok::objc_autoreleasepool:
1234         nextToken();
1235         if (FormatTok->Tok.is(tok::l_brace)) {
1236           if (Style.BraceWrapping.AfterControlStatement ==
1237               FormatStyle::BWACS_Always)
1238             addUnwrappedLine();
1239           parseBlock(/*MustBeDeclaration=*/false);
1240         }
1241         addUnwrappedLine();
1242         return;
1243       case tok::objc_synchronized:
1244         nextToken();
1245         if (FormatTok->Tok.is(tok::l_paren))
1246           // Skip synchronization object
1247           parseParens();
1248         if (FormatTok->Tok.is(tok::l_brace)) {
1249           if (Style.BraceWrapping.AfterControlStatement ==
1250               FormatStyle::BWACS_Always)
1251             addUnwrappedLine();
1252           parseBlock(/*MustBeDeclaration=*/false);
1253         }
1254         addUnwrappedLine();
1255         return;
1256       case tok::objc_try:
1257         // This branch isn't strictly necessary (the kw_try case below would
1258         // do this too after the tok::at is parsed above).  But be explicit.
1259         parseTryCatch();
1260         return;
1261       default:
1262         break;
1263       }
1264       break;
1265     case tok::kw_enum:
1266       // Ignore if this is part of "template <enum ...".
1267       if (Previous && Previous->is(tok::less)) {
1268         nextToken();
1269         break;
1270       }
1271 
1272       // parseEnum falls through and does not yet add an unwrapped line as an
1273       // enum definition can start a structural element.
1274       if (!parseEnum())
1275         break;
1276       // This only applies for C++.
1277       if (!Style.isCpp()) {
1278         addUnwrappedLine();
1279         return;
1280       }
1281       break;
1282     case tok::kw_typedef:
1283       nextToken();
1284       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1285                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1286                              Keywords.kw_CF_CLOSED_ENUM,
1287                              Keywords.kw_NS_CLOSED_ENUM))
1288         parseEnum();
1289       break;
1290     case tok::kw_struct:
1291     case tok::kw_union:
1292     case tok::kw_class:
1293       // parseRecord falls through and does not yet add an unwrapped line as a
1294       // record declaration or definition can start a structural element.
1295       parseRecord();
1296       // This does not apply for Java, JavaScript and C#.
1297       if (Style.Language == FormatStyle::LK_Java ||
1298           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1299         if (FormatTok->is(tok::semi))
1300           nextToken();
1301         addUnwrappedLine();
1302         return;
1303       }
1304       break;
1305     case tok::period:
1306       nextToken();
1307       // In Java, classes have an implicit static member "class".
1308       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1309           FormatTok->is(tok::kw_class))
1310         nextToken();
1311       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1312           FormatTok->Tok.getIdentifierInfo())
1313         // JavaScript only has pseudo keywords, all keywords are allowed to
1314         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1315         nextToken();
1316       break;
1317     case tok::semi:
1318       nextToken();
1319       addUnwrappedLine();
1320       return;
1321     case tok::r_brace:
1322       addUnwrappedLine();
1323       return;
1324     case tok::l_paren:
1325       parseParens();
1326       break;
1327     case tok::kw_operator:
1328       nextToken();
1329       if (FormatTok->isBinaryOperator())
1330         nextToken();
1331       break;
1332     case tok::caret:
1333       nextToken();
1334       if (FormatTok->Tok.isAnyIdentifier() ||
1335           FormatTok->isSimpleTypeSpecifier())
1336         nextToken();
1337       if (FormatTok->is(tok::l_paren))
1338         parseParens();
1339       if (FormatTok->is(tok::l_brace))
1340         parseChildBlock();
1341       break;
1342     case tok::l_brace:
1343       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1344         // A block outside of parentheses must be the last part of a
1345         // structural element.
1346         // FIXME: Figure out cases where this is not true, and add projections
1347         // for them (the one we know is missing are lambdas).
1348         if (Style.BraceWrapping.AfterFunction)
1349           addUnwrappedLine();
1350         FormatTok->setType(TT_FunctionLBrace);
1351         parseBlock(/*MustBeDeclaration=*/false);
1352         addUnwrappedLine();
1353         return;
1354       }
1355       // Otherwise this was a braced init list, and the structural
1356       // element continues.
1357       break;
1358     case tok::kw_try:
1359       if (Style.Language == FormatStyle::LK_JavaScript &&
1360           Line->MustBeDeclaration) {
1361         // field/method declaration.
1362         nextToken();
1363         break;
1364       }
1365       // We arrive here when parsing function-try blocks.
1366       if (Style.BraceWrapping.AfterFunction)
1367         addUnwrappedLine();
1368       parseTryCatch();
1369       return;
1370     case tok::identifier: {
1371       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1372           Line->MustBeDeclaration) {
1373         addUnwrappedLine();
1374         parseCSharpGenericTypeConstraint();
1375         break;
1376       }
1377       if (FormatTok->is(TT_MacroBlockEnd)) {
1378         addUnwrappedLine();
1379         return;
1380       }
1381 
1382       // Function declarations (as opposed to function expressions) are parsed
1383       // on their own unwrapped line by continuing this loop. Function
1384       // expressions (functions that are not on their own line) must not create
1385       // a new unwrapped line, so they are special cased below.
1386       size_t TokenCount = Line->Tokens.size();
1387       if (Style.Language == FormatStyle::LK_JavaScript &&
1388           FormatTok->is(Keywords.kw_function) &&
1389           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1390                                                      Keywords.kw_async)))) {
1391         tryToParseJSFunction();
1392         break;
1393       }
1394       if ((Style.Language == FormatStyle::LK_JavaScript ||
1395            Style.Language == FormatStyle::LK_Java) &&
1396           FormatTok->is(Keywords.kw_interface)) {
1397         if (Style.Language == FormatStyle::LK_JavaScript) {
1398           // In JavaScript/TypeScript, "interface" can be used as a standalone
1399           // identifier, e.g. in `var interface = 1;`. If "interface" is
1400           // followed by another identifier, it is very like to be an actual
1401           // interface declaration.
1402           unsigned StoredPosition = Tokens->getPosition();
1403           FormatToken *Next = Tokens->getNextToken();
1404           FormatTok = Tokens->setPosition(StoredPosition);
1405           if (Next && !mustBeJSIdent(Keywords, Next)) {
1406             nextToken();
1407             break;
1408           }
1409         }
1410         parseRecord();
1411         addUnwrappedLine();
1412         return;
1413       }
1414 
1415       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1416         parseStatementMacro();
1417         return;
1418       }
1419 
1420       // See if the following token should start a new unwrapped line.
1421       StringRef Text = FormatTok->TokenText;
1422       nextToken();
1423 
1424       // JS doesn't have macros, and within classes colons indicate fields, not
1425       // labels.
1426       if (Style.Language == FormatStyle::LK_JavaScript)
1427         break;
1428 
1429       TokenCount = Line->Tokens.size();
1430       if (TokenCount == 1 ||
1431           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1432         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1433           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1434           parseLabel(!Style.IndentGotoLabels);
1435           return;
1436         }
1437         // Recognize function-like macro usages without trailing semicolon as
1438         // well as free-standing macros like Q_OBJECT.
1439         bool FunctionLike = FormatTok->is(tok::l_paren);
1440         if (FunctionLike)
1441           parseParens();
1442 
1443         bool FollowedByNewline =
1444             CommentsBeforeNextToken.empty()
1445                 ? FormatTok->NewlinesBefore > 0
1446                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1447 
1448         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1449             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1450           addUnwrappedLine();
1451           return;
1452         }
1453       }
1454       break;
1455     }
1456     case tok::equal:
1457       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1458       // TT_JsFatArrow. The always start an expression or a child block if
1459       // followed by a curly.
1460       if (FormatTok->is(TT_JsFatArrow)) {
1461         nextToken();
1462         if (FormatTok->is(tok::l_brace)) {
1463           // C# may break after => if the next character is a newline.
1464           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1465             // calling `addUnwrappedLine()` here causes odd parsing errors.
1466             FormatTok->MustBreakBefore = true;
1467           }
1468           parseChildBlock();
1469         }
1470         break;
1471       }
1472 
1473       nextToken();
1474       if (FormatTok->Tok.is(tok::l_brace)) {
1475         // Block kind should probably be set to BK_BracedInit for any language.
1476         // C# needs this change to ensure that array initialisers and object
1477         // initialisers are indented the same way.
1478         if (Style.isCSharp())
1479           FormatTok->BlockKind = BK_BracedInit;
1480         nextToken();
1481         parseBracedList();
1482       } else if (Style.Language == FormatStyle::LK_Proto &&
1483                  FormatTok->Tok.is(tok::less)) {
1484         nextToken();
1485         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1486                         /*ClosingBraceKind=*/tok::greater);
1487       }
1488       break;
1489     case tok::l_square:
1490       parseSquare();
1491       break;
1492     case tok::kw_new:
1493       parseNew();
1494       break;
1495     default:
1496       nextToken();
1497       break;
1498     }
1499   } while (!eof());
1500 }
1501 
1502 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1503   assert(FormatTok->is(tok::l_brace));
1504   if (!Style.isCSharp())
1505     return false;
1506   // See if it's a property accessor.
1507   if (FormatTok->Previous->isNot(tok::identifier))
1508     return false;
1509 
1510   // See if we are inside a property accessor.
1511   //
1512   // Record the current tokenPosition so that we can advance and
1513   // reset the current token. `Next` is not set yet so we need
1514   // another way to advance along the token stream.
1515   unsigned int StoredPosition = Tokens->getPosition();
1516   FormatToken *Tok = Tokens->getNextToken();
1517 
1518   // A trivial property accessor is of the form:
1519   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1520   // Track these as they do not require line breaks to be introduced.
1521   bool HasGetOrSet = false;
1522   bool IsTrivialPropertyAccessor = true;
1523   while (!eof()) {
1524     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1525                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1526                      Keywords.kw_set)) {
1527       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1528         HasGetOrSet = true;
1529       Tok = Tokens->getNextToken();
1530       continue;
1531     }
1532     if (Tok->isNot(tok::r_brace))
1533       IsTrivialPropertyAccessor = false;
1534     break;
1535   }
1536 
1537   if (!HasGetOrSet) {
1538     Tokens->setPosition(StoredPosition);
1539     return false;
1540   }
1541 
1542   // Try to parse the property accessor:
1543   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1544   Tokens->setPosition(StoredPosition);
1545   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1546     addUnwrappedLine();
1547   nextToken();
1548   do {
1549     switch (FormatTok->Tok.getKind()) {
1550     case tok::r_brace:
1551       nextToken();
1552       if (FormatTok->is(tok::equal)) {
1553         while (!eof() && FormatTok->isNot(tok::semi))
1554           nextToken();
1555         nextToken();
1556       }
1557       addUnwrappedLine();
1558       return true;
1559     case tok::l_brace:
1560       ++Line->Level;
1561       parseBlock(/*MustBeDeclaration=*/true);
1562       addUnwrappedLine();
1563       --Line->Level;
1564       break;
1565     case tok::equal:
1566       if (FormatTok->is(TT_JsFatArrow)) {
1567         ++Line->Level;
1568         do {
1569           nextToken();
1570         } while (!eof() && FormatTok->isNot(tok::semi));
1571         nextToken();
1572         addUnwrappedLine();
1573         --Line->Level;
1574         break;
1575       }
1576       nextToken();
1577       break;
1578     default:
1579       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1580           !IsTrivialPropertyAccessor) {
1581         // Non-trivial get/set needs to be on its own line.
1582         addUnwrappedLine();
1583       }
1584       nextToken();
1585     }
1586   } while (!eof());
1587 
1588   // Unreachable for well-formed code (paired '{' and '}').
1589   return true;
1590 }
1591 
1592 bool UnwrappedLineParser::tryToParseLambda() {
1593   if (!Style.isCpp()) {
1594     nextToken();
1595     return false;
1596   }
1597   assert(FormatTok->is(tok::l_square));
1598   FormatToken &LSquare = *FormatTok;
1599   if (!tryToParseLambdaIntroducer())
1600     return false;
1601 
1602   bool SeenArrow = false;
1603 
1604   while (FormatTok->isNot(tok::l_brace)) {
1605     if (FormatTok->isSimpleTypeSpecifier()) {
1606       nextToken();
1607       continue;
1608     }
1609     switch (FormatTok->Tok.getKind()) {
1610     case tok::l_brace:
1611       break;
1612     case tok::l_paren:
1613       parseParens();
1614       break;
1615     case tok::amp:
1616     case tok::star:
1617     case tok::kw_const:
1618     case tok::comma:
1619     case tok::less:
1620     case tok::greater:
1621     case tok::identifier:
1622     case tok::numeric_constant:
1623     case tok::coloncolon:
1624     case tok::kw_class:
1625     case tok::kw_mutable:
1626     case tok::kw_noexcept:
1627     case tok::kw_template:
1628     case tok::kw_typename:
1629       nextToken();
1630       break;
1631     // Specialization of a template with an integer parameter can contain
1632     // arithmetic, logical, comparison and ternary operators.
1633     //
1634     // FIXME: This also accepts sequences of operators that are not in the scope
1635     // of a template argument list.
1636     //
1637     // In a C++ lambda a template type can only occur after an arrow. We use
1638     // this as an heuristic to distinguish between Objective-C expressions
1639     // followed by an `a->b` expression, such as:
1640     // ([obj func:arg] + a->b)
1641     // Otherwise the code below would parse as a lambda.
1642     //
1643     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1644     // explicit template lists: []<bool b = true && false>(U &&u){}
1645     case tok::plus:
1646     case tok::minus:
1647     case tok::exclaim:
1648     case tok::tilde:
1649     case tok::slash:
1650     case tok::percent:
1651     case tok::lessless:
1652     case tok::pipe:
1653     case tok::pipepipe:
1654     case tok::ampamp:
1655     case tok::caret:
1656     case tok::equalequal:
1657     case tok::exclaimequal:
1658     case tok::greaterequal:
1659     case tok::lessequal:
1660     case tok::question:
1661     case tok::colon:
1662     case tok::ellipsis:
1663     case tok::kw_true:
1664     case tok::kw_false:
1665       if (SeenArrow) {
1666         nextToken();
1667         break;
1668       }
1669       return true;
1670     case tok::arrow:
1671       // This might or might not actually be a lambda arrow (this could be an
1672       // ObjC method invocation followed by a dereferencing arrow). We might
1673       // reset this back to TT_Unknown in TokenAnnotator.
1674       FormatTok->setType(TT_LambdaArrow);
1675       SeenArrow = true;
1676       nextToken();
1677       break;
1678     default:
1679       return true;
1680     }
1681   }
1682   FormatTok->setType(TT_LambdaLBrace);
1683   LSquare.setType(TT_LambdaLSquare);
1684   parseChildBlock();
1685   return true;
1686 }
1687 
1688 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1689   const FormatToken *Previous = FormatTok->Previous;
1690   if (Previous &&
1691       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1692                          tok::kw_delete, tok::l_square) ||
1693        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1694        Previous->isSimpleTypeSpecifier())) {
1695     nextToken();
1696     return false;
1697   }
1698   nextToken();
1699   if (FormatTok->is(tok::l_square)) {
1700     return false;
1701   }
1702   parseSquare(/*LambdaIntroducer=*/true);
1703   return true;
1704 }
1705 
1706 void UnwrappedLineParser::tryToParseJSFunction() {
1707   assert(FormatTok->is(Keywords.kw_function) ||
1708          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1709   if (FormatTok->is(Keywords.kw_async))
1710     nextToken();
1711   // Consume "function".
1712   nextToken();
1713 
1714   // Consume * (generator function). Treat it like C++'s overloaded operators.
1715   if (FormatTok->is(tok::star)) {
1716     FormatTok->setType(TT_OverloadedOperator);
1717     nextToken();
1718   }
1719 
1720   // Consume function name.
1721   if (FormatTok->is(tok::identifier))
1722     nextToken();
1723 
1724   if (FormatTok->isNot(tok::l_paren))
1725     return;
1726 
1727   // Parse formal parameter list.
1728   parseParens();
1729 
1730   if (FormatTok->is(tok::colon)) {
1731     // Parse a type definition.
1732     nextToken();
1733 
1734     // Eat the type declaration. For braced inline object types, balance braces,
1735     // otherwise just parse until finding an l_brace for the function body.
1736     if (FormatTok->is(tok::l_brace))
1737       tryToParseBracedList();
1738     else
1739       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1740         nextToken();
1741   }
1742 
1743   if (FormatTok->is(tok::semi))
1744     return;
1745 
1746   parseChildBlock();
1747 }
1748 
1749 bool UnwrappedLineParser::tryToParseBracedList() {
1750   if (FormatTok->BlockKind == BK_Unknown)
1751     calculateBraceTypes();
1752   assert(FormatTok->BlockKind != BK_Unknown);
1753   if (FormatTok->BlockKind == BK_Block)
1754     return false;
1755   nextToken();
1756   parseBracedList();
1757   return true;
1758 }
1759 
1760 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1761                                           bool IsEnum,
1762                                           tok::TokenKind ClosingBraceKind) {
1763   bool HasError = false;
1764 
1765   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1766   // replace this by using parseAssigmentExpression() inside.
1767   do {
1768     if (Style.isCSharp()) {
1769       if (FormatTok->is(TT_JsFatArrow)) {
1770         nextToken();
1771         // Fat arrows can be followed by simple expressions or by child blocks
1772         // in curly braces.
1773         if (FormatTok->is(tok::l_brace)) {
1774           parseChildBlock();
1775           continue;
1776         }
1777       }
1778     }
1779     if (Style.Language == FormatStyle::LK_JavaScript) {
1780       if (FormatTok->is(Keywords.kw_function) ||
1781           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1782         tryToParseJSFunction();
1783         continue;
1784       }
1785       if (FormatTok->is(TT_JsFatArrow)) {
1786         nextToken();
1787         // Fat arrows can be followed by simple expressions or by child blocks
1788         // in curly braces.
1789         if (FormatTok->is(tok::l_brace)) {
1790           parseChildBlock();
1791           continue;
1792         }
1793       }
1794       if (FormatTok->is(tok::l_brace)) {
1795         // Could be a method inside of a braced list `{a() { return 1; }}`.
1796         if (tryToParseBracedList())
1797           continue;
1798         parseChildBlock();
1799       }
1800     }
1801     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1802       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1803         addUnwrappedLine();
1804       nextToken();
1805       return !HasError;
1806     }
1807     switch (FormatTok->Tok.getKind()) {
1808     case tok::caret:
1809       nextToken();
1810       if (FormatTok->is(tok::l_brace)) {
1811         parseChildBlock();
1812       }
1813       break;
1814     case tok::l_square:
1815       if (Style.isCSharp())
1816         parseSquare();
1817       else
1818         tryToParseLambda();
1819       break;
1820     case tok::l_paren:
1821       parseParens();
1822       // JavaScript can just have free standing methods and getters/setters in
1823       // object literals. Detect them by a "{" following ")".
1824       if (Style.Language == FormatStyle::LK_JavaScript) {
1825         if (FormatTok->is(tok::l_brace))
1826           parseChildBlock();
1827         break;
1828       }
1829       break;
1830     case tok::l_brace:
1831       // Assume there are no blocks inside a braced init list apart
1832       // from the ones we explicitly parse out (like lambdas).
1833       FormatTok->BlockKind = BK_BracedInit;
1834       nextToken();
1835       parseBracedList();
1836       break;
1837     case tok::less:
1838       if (Style.Language == FormatStyle::LK_Proto) {
1839         nextToken();
1840         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1841                         /*ClosingBraceKind=*/tok::greater);
1842       } else {
1843         nextToken();
1844       }
1845       break;
1846     case tok::semi:
1847       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1848       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1849       // used for error recovery if we have otherwise determined that this is
1850       // a braced list.
1851       if (Style.Language == FormatStyle::LK_JavaScript) {
1852         nextToken();
1853         break;
1854       }
1855       HasError = true;
1856       if (!ContinueOnSemicolons)
1857         return !HasError;
1858       nextToken();
1859       break;
1860     case tok::comma:
1861       nextToken();
1862       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1863         addUnwrappedLine();
1864       break;
1865     default:
1866       nextToken();
1867       break;
1868     }
1869   } while (!eof());
1870   return false;
1871 }
1872 
1873 void UnwrappedLineParser::parseParens() {
1874   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1875   nextToken();
1876   do {
1877     switch (FormatTok->Tok.getKind()) {
1878     case tok::l_paren:
1879       parseParens();
1880       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1881         parseChildBlock();
1882       break;
1883     case tok::r_paren:
1884       nextToken();
1885       return;
1886     case tok::r_brace:
1887       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1888       return;
1889     case tok::l_square:
1890       tryToParseLambda();
1891       break;
1892     case tok::l_brace:
1893       if (!tryToParseBracedList())
1894         parseChildBlock();
1895       break;
1896     case tok::at:
1897       nextToken();
1898       if (FormatTok->Tok.is(tok::l_brace)) {
1899         nextToken();
1900         parseBracedList();
1901       }
1902       break;
1903     case tok::kw_class:
1904       if (Style.Language == FormatStyle::LK_JavaScript)
1905         parseRecord(/*ParseAsExpr=*/true);
1906       else
1907         nextToken();
1908       break;
1909     case tok::identifier:
1910       if (Style.Language == FormatStyle::LK_JavaScript &&
1911           (FormatTok->is(Keywords.kw_function) ||
1912            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1913         tryToParseJSFunction();
1914       else
1915         nextToken();
1916       break;
1917     default:
1918       nextToken();
1919       break;
1920     }
1921   } while (!eof());
1922 }
1923 
1924 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1925   if (!LambdaIntroducer) {
1926     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1927     if (tryToParseLambda())
1928       return;
1929   }
1930   do {
1931     switch (FormatTok->Tok.getKind()) {
1932     case tok::l_paren:
1933       parseParens();
1934       break;
1935     case tok::r_square:
1936       nextToken();
1937       return;
1938     case tok::r_brace:
1939       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1940       return;
1941     case tok::l_square:
1942       parseSquare();
1943       break;
1944     case tok::l_brace: {
1945       if (!tryToParseBracedList())
1946         parseChildBlock();
1947       break;
1948     }
1949     case tok::at:
1950       nextToken();
1951       if (FormatTok->Tok.is(tok::l_brace)) {
1952         nextToken();
1953         parseBracedList();
1954       }
1955       break;
1956     default:
1957       nextToken();
1958       break;
1959     }
1960   } while (!eof());
1961 }
1962 
1963 void UnwrappedLineParser::parseIfThenElse() {
1964   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1965   nextToken();
1966   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1967     nextToken();
1968   if (FormatTok->Tok.is(tok::l_paren))
1969     parseParens();
1970   // handle [[likely]] / [[unlikely]]
1971   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
1972     parseSquare();
1973   bool NeedsUnwrappedLine = false;
1974   if (FormatTok->Tok.is(tok::l_brace)) {
1975     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1976     parseBlock(/*MustBeDeclaration=*/false);
1977     if (Style.BraceWrapping.BeforeElse)
1978       addUnwrappedLine();
1979     else
1980       NeedsUnwrappedLine = true;
1981   } else {
1982     addUnwrappedLine();
1983     ++Line->Level;
1984     parseStructuralElement();
1985     --Line->Level;
1986   }
1987   if (FormatTok->Tok.is(tok::kw_else)) {
1988     nextToken();
1989     // handle [[likely]] / [[unlikely]]
1990     if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
1991       parseSquare();
1992     if (FormatTok->Tok.is(tok::l_brace)) {
1993       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1994       parseBlock(/*MustBeDeclaration=*/false);
1995       addUnwrappedLine();
1996     } else if (FormatTok->Tok.is(tok::kw_if)) {
1997       parseIfThenElse();
1998     } else {
1999       addUnwrappedLine();
2000       ++Line->Level;
2001       parseStructuralElement();
2002       if (FormatTok->is(tok::eof))
2003         addUnwrappedLine();
2004       --Line->Level;
2005     }
2006   } else if (NeedsUnwrappedLine) {
2007     addUnwrappedLine();
2008   }
2009 }
2010 
2011 void UnwrappedLineParser::parseTryCatch() {
2012   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2013   nextToken();
2014   bool NeedsUnwrappedLine = false;
2015   if (FormatTok->is(tok::colon)) {
2016     // We are in a function try block, what comes is an initializer list.
2017     nextToken();
2018 
2019     // In case identifiers were removed by clang-tidy, what might follow is
2020     // multiple commas in sequence - before the first identifier.
2021     while (FormatTok->is(tok::comma))
2022       nextToken();
2023 
2024     while (FormatTok->is(tok::identifier)) {
2025       nextToken();
2026       if (FormatTok->is(tok::l_paren))
2027         parseParens();
2028 
2029       // In case identifiers were removed by clang-tidy, what might follow is
2030       // multiple commas in sequence - after the first identifier.
2031       while (FormatTok->is(tok::comma))
2032         nextToken();
2033     }
2034   }
2035   // Parse try with resource.
2036   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2037     parseParens();
2038   }
2039   if (FormatTok->is(tok::l_brace)) {
2040     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2041     parseBlock(/*MustBeDeclaration=*/false);
2042     if (Style.BraceWrapping.BeforeCatch) {
2043       addUnwrappedLine();
2044     } else {
2045       NeedsUnwrappedLine = true;
2046     }
2047   } else if (!FormatTok->is(tok::kw_catch)) {
2048     // The C++ standard requires a compound-statement after a try.
2049     // If there's none, we try to assume there's a structuralElement
2050     // and try to continue.
2051     addUnwrappedLine();
2052     ++Line->Level;
2053     parseStructuralElement();
2054     --Line->Level;
2055   }
2056   while (1) {
2057     if (FormatTok->is(tok::at))
2058       nextToken();
2059     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2060                              tok::kw___finally) ||
2061           ((Style.Language == FormatStyle::LK_Java ||
2062             Style.Language == FormatStyle::LK_JavaScript) &&
2063            FormatTok->is(Keywords.kw_finally)) ||
2064           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2065            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2066       break;
2067     nextToken();
2068     while (FormatTok->isNot(tok::l_brace)) {
2069       if (FormatTok->is(tok::l_paren)) {
2070         parseParens();
2071         continue;
2072       }
2073       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2074         return;
2075       nextToken();
2076     }
2077     NeedsUnwrappedLine = false;
2078     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2079     parseBlock(/*MustBeDeclaration=*/false);
2080     if (Style.BraceWrapping.BeforeCatch)
2081       addUnwrappedLine();
2082     else
2083       NeedsUnwrappedLine = true;
2084   }
2085   if (NeedsUnwrappedLine)
2086     addUnwrappedLine();
2087 }
2088 
2089 void UnwrappedLineParser::parseNamespace() {
2090   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2091          "'namespace' expected");
2092 
2093   const FormatToken &InitialToken = *FormatTok;
2094   nextToken();
2095   if (InitialToken.is(TT_NamespaceMacro)) {
2096     parseParens();
2097   } else {
2098     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2099                               tok::l_square)) {
2100       if (FormatTok->is(tok::l_square))
2101         parseSquare();
2102       else
2103         nextToken();
2104     }
2105   }
2106   if (FormatTok->Tok.is(tok::l_brace)) {
2107     if (ShouldBreakBeforeBrace(Style, InitialToken))
2108       addUnwrappedLine();
2109 
2110     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
2111                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2112                      DeclarationScopeStack.size() > 1);
2113     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2114     // Munch the semicolon after a namespace. This is more common than one would
2115     // think. Putting the semicolon into its own line is very ugly.
2116     if (FormatTok->Tok.is(tok::semi))
2117       nextToken();
2118     addUnwrappedLine();
2119   }
2120   // FIXME: Add error handling.
2121 }
2122 
2123 void UnwrappedLineParser::parseNew() {
2124   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2125   nextToken();
2126 
2127   if (Style.isCSharp()) {
2128     do {
2129       if (FormatTok->is(tok::l_brace))
2130         parseBracedList();
2131 
2132       if (FormatTok->isOneOf(tok::semi, tok::comma))
2133         return;
2134 
2135       nextToken();
2136     } while (!eof());
2137   }
2138 
2139   if (Style.Language != FormatStyle::LK_Java)
2140     return;
2141 
2142   // In Java, we can parse everything up to the parens, which aren't optional.
2143   do {
2144     // There should not be a ;, { or } before the new's open paren.
2145     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2146       return;
2147 
2148     // Consume the parens.
2149     if (FormatTok->is(tok::l_paren)) {
2150       parseParens();
2151 
2152       // If there is a class body of an anonymous class, consume that as child.
2153       if (FormatTok->is(tok::l_brace))
2154         parseChildBlock();
2155       return;
2156     }
2157     nextToken();
2158   } while (!eof());
2159 }
2160 
2161 void UnwrappedLineParser::parseForOrWhileLoop() {
2162   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2163          "'for', 'while' or foreach macro expected");
2164   nextToken();
2165   // JS' for await ( ...
2166   if (Style.Language == FormatStyle::LK_JavaScript &&
2167       FormatTok->is(Keywords.kw_await))
2168     nextToken();
2169   if (FormatTok->Tok.is(tok::l_paren))
2170     parseParens();
2171   if (FormatTok->Tok.is(tok::l_brace)) {
2172     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2173     parseBlock(/*MustBeDeclaration=*/false);
2174     addUnwrappedLine();
2175   } else {
2176     addUnwrappedLine();
2177     ++Line->Level;
2178     parseStructuralElement();
2179     --Line->Level;
2180   }
2181 }
2182 
2183 void UnwrappedLineParser::parseDoWhile() {
2184   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2185   nextToken();
2186   if (FormatTok->Tok.is(tok::l_brace)) {
2187     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2188     parseBlock(/*MustBeDeclaration=*/false);
2189     if (Style.BraceWrapping.BeforeWhile)
2190       addUnwrappedLine();
2191   } else {
2192     addUnwrappedLine();
2193     ++Line->Level;
2194     parseStructuralElement();
2195     --Line->Level;
2196   }
2197 
2198   // FIXME: Add error handling.
2199   if (!FormatTok->Tok.is(tok::kw_while)) {
2200     addUnwrappedLine();
2201     return;
2202   }
2203 
2204   nextToken();
2205   parseStructuralElement();
2206 }
2207 
2208 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2209   nextToken();
2210   unsigned OldLineLevel = Line->Level;
2211   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2212     --Line->Level;
2213   if (LeftAlignLabel)
2214     Line->Level = 0;
2215   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2216       FormatTok->Tok.is(tok::l_brace)) {
2217     CompoundStatementIndenter Indenter(this, Line->Level,
2218                                        Style.BraceWrapping.AfterCaseLabel,
2219                                        Style.BraceWrapping.IndentBraces);
2220     parseBlock(/*MustBeDeclaration=*/false);
2221     if (FormatTok->Tok.is(tok::kw_break)) {
2222       if (Style.BraceWrapping.AfterControlStatement ==
2223           FormatStyle::BWACS_Always) {
2224         addUnwrappedLine();
2225         if (!Style.IndentCaseBlocks &&
2226             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2227           Line->Level++;
2228         }
2229       }
2230       parseStructuralElement();
2231     }
2232     addUnwrappedLine();
2233   } else {
2234     if (FormatTok->is(tok::semi))
2235       nextToken();
2236     addUnwrappedLine();
2237   }
2238   Line->Level = OldLineLevel;
2239   if (FormatTok->isNot(tok::l_brace)) {
2240     parseStructuralElement();
2241     addUnwrappedLine();
2242   }
2243 }
2244 
2245 void UnwrappedLineParser::parseCaseLabel() {
2246   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2247   // FIXME: fix handling of complex expressions here.
2248   do {
2249     nextToken();
2250   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2251   parseLabel();
2252 }
2253 
2254 void UnwrappedLineParser::parseSwitch() {
2255   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2256   nextToken();
2257   if (FormatTok->Tok.is(tok::l_paren))
2258     parseParens();
2259   if (FormatTok->Tok.is(tok::l_brace)) {
2260     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2261     parseBlock(/*MustBeDeclaration=*/false);
2262     addUnwrappedLine();
2263   } else {
2264     addUnwrappedLine();
2265     ++Line->Level;
2266     parseStructuralElement();
2267     --Line->Level;
2268   }
2269 }
2270 
2271 void UnwrappedLineParser::parseAccessSpecifier() {
2272   nextToken();
2273   // Understand Qt's slots.
2274   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2275     nextToken();
2276   // Otherwise, we don't know what it is, and we'd better keep the next token.
2277   if (FormatTok->Tok.is(tok::colon))
2278     nextToken();
2279   addUnwrappedLine();
2280 }
2281 
2282 bool UnwrappedLineParser::parseEnum() {
2283   // Won't be 'enum' for NS_ENUMs.
2284   if (FormatTok->Tok.is(tok::kw_enum))
2285     nextToken();
2286 
2287   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2288   // declarations. An "enum" keyword followed by a colon would be a syntax
2289   // error and thus assume it is just an identifier.
2290   if (Style.Language == FormatStyle::LK_JavaScript &&
2291       FormatTok->isOneOf(tok::colon, tok::question))
2292     return false;
2293 
2294   // In protobuf, "enum" can be used as a field name.
2295   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2296     return false;
2297 
2298   // Eat up enum class ...
2299   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2300     nextToken();
2301 
2302   while (FormatTok->Tok.getIdentifierInfo() ||
2303          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2304                             tok::greater, tok::comma, tok::question)) {
2305     nextToken();
2306     // We can have macros or attributes in between 'enum' and the enum name.
2307     if (FormatTok->is(tok::l_paren))
2308       parseParens();
2309     if (FormatTok->is(tok::identifier)) {
2310       nextToken();
2311       // If there are two identifiers in a row, this is likely an elaborate
2312       // return type. In Java, this can be "implements", etc.
2313       if (Style.isCpp() && FormatTok->is(tok::identifier))
2314         return false;
2315     }
2316   }
2317 
2318   // Just a declaration or something is wrong.
2319   if (FormatTok->isNot(tok::l_brace))
2320     return true;
2321   FormatTok->BlockKind = BK_Block;
2322 
2323   if (Style.Language == FormatStyle::LK_Java) {
2324     // Java enums are different.
2325     parseJavaEnumBody();
2326     return true;
2327   }
2328   if (Style.Language == FormatStyle::LK_Proto) {
2329     parseBlock(/*MustBeDeclaration=*/true);
2330     return true;
2331   }
2332 
2333   if (!Style.AllowShortEnumsOnASingleLine)
2334     addUnwrappedLine();
2335   // Parse enum body.
2336   nextToken();
2337   if (!Style.AllowShortEnumsOnASingleLine) {
2338     addUnwrappedLine();
2339     Line->Level += 1;
2340   }
2341   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2342                                    /*IsEnum=*/true);
2343   if (!Style.AllowShortEnumsOnASingleLine)
2344     Line->Level -= 1;
2345   if (HasError) {
2346     if (FormatTok->is(tok::semi))
2347       nextToken();
2348     addUnwrappedLine();
2349   }
2350   return true;
2351 
2352   // There is no addUnwrappedLine() here so that we fall through to parsing a
2353   // structural element afterwards. Thus, in "enum A {} n, m;",
2354   // "} n, m;" will end up in one unwrapped line.
2355 }
2356 
2357 namespace {
2358 // A class used to set and restore the Token position when peeking
2359 // ahead in the token source.
2360 class ScopedTokenPosition {
2361   unsigned StoredPosition;
2362   FormatTokenSource *Tokens;
2363 
2364 public:
2365   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2366     assert(Tokens && "Tokens expected to not be null");
2367     StoredPosition = Tokens->getPosition();
2368   }
2369 
2370   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2371 };
2372 } // namespace
2373 
2374 // Look to see if we have [[ by looking ahead, if
2375 // its not then rewind to the original position.
2376 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2377   ScopedTokenPosition AutoPosition(Tokens);
2378   FormatToken *Tok = Tokens->getNextToken();
2379   // We already read the first [ check for the second.
2380   if (Tok && !Tok->is(tok::l_square)) {
2381     return false;
2382   }
2383   // Double check that the attribute is just something
2384   // fairly simple.
2385   while (Tok) {
2386     if (Tok->is(tok::r_square)) {
2387       break;
2388     }
2389     Tok = Tokens->getNextToken();
2390   }
2391   Tok = Tokens->getNextToken();
2392   if (Tok && !Tok->is(tok::r_square)) {
2393     return false;
2394   }
2395   Tok = Tokens->getNextToken();
2396   if (Tok && Tok->is(tok::semi)) {
2397     return false;
2398   }
2399   return true;
2400 }
2401 
2402 void UnwrappedLineParser::parseJavaEnumBody() {
2403   // Determine whether the enum is simple, i.e. does not have a semicolon or
2404   // constants with class bodies. Simple enums can be formatted like braced
2405   // lists, contracted to a single line, etc.
2406   unsigned StoredPosition = Tokens->getPosition();
2407   bool IsSimple = true;
2408   FormatToken *Tok = Tokens->getNextToken();
2409   while (Tok) {
2410     if (Tok->is(tok::r_brace))
2411       break;
2412     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2413       IsSimple = false;
2414       break;
2415     }
2416     // FIXME: This will also mark enums with braces in the arguments to enum
2417     // constants as "not simple". This is probably fine in practice, though.
2418     Tok = Tokens->getNextToken();
2419   }
2420   FormatTok = Tokens->setPosition(StoredPosition);
2421 
2422   if (IsSimple) {
2423     nextToken();
2424     parseBracedList();
2425     addUnwrappedLine();
2426     return;
2427   }
2428 
2429   // Parse the body of a more complex enum.
2430   // First add a line for everything up to the "{".
2431   nextToken();
2432   addUnwrappedLine();
2433   ++Line->Level;
2434 
2435   // Parse the enum constants.
2436   while (FormatTok) {
2437     if (FormatTok->is(tok::l_brace)) {
2438       // Parse the constant's class body.
2439       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2440                  /*MunchSemi=*/false);
2441     } else if (FormatTok->is(tok::l_paren)) {
2442       parseParens();
2443     } else if (FormatTok->is(tok::comma)) {
2444       nextToken();
2445       addUnwrappedLine();
2446     } else if (FormatTok->is(tok::semi)) {
2447       nextToken();
2448       addUnwrappedLine();
2449       break;
2450     } else if (FormatTok->is(tok::r_brace)) {
2451       addUnwrappedLine();
2452       break;
2453     } else {
2454       nextToken();
2455     }
2456   }
2457 
2458   // Parse the class body after the enum's ";" if any.
2459   parseLevel(/*HasOpeningBrace=*/true);
2460   nextToken();
2461   --Line->Level;
2462   addUnwrappedLine();
2463 }
2464 
2465 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2466   const FormatToken &InitialToken = *FormatTok;
2467   nextToken();
2468 
2469   // The actual identifier can be a nested name specifier, and in macros
2470   // it is often token-pasted.
2471   // An [[attribute]] can be before the identifier.
2472   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2473                             tok::kw___attribute, tok::kw___declspec,
2474                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2475          ((Style.Language == FormatStyle::LK_Java ||
2476            Style.Language == FormatStyle::LK_JavaScript) &&
2477           FormatTok->isOneOf(tok::period, tok::comma))) {
2478     if (Style.Language == FormatStyle::LK_JavaScript &&
2479         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2480       // JavaScript/TypeScript supports inline object types in
2481       // extends/implements positions:
2482       //     class Foo implements {bar: number} { }
2483       nextToken();
2484       if (FormatTok->is(tok::l_brace)) {
2485         tryToParseBracedList();
2486         continue;
2487       }
2488     }
2489     bool IsNonMacroIdentifier =
2490         FormatTok->is(tok::identifier) &&
2491         FormatTok->TokenText != FormatTok->TokenText.upper();
2492     nextToken();
2493     // We can have macros or attributes in between 'class' and the class name.
2494     if (!IsNonMacroIdentifier) {
2495       if (FormatTok->Tok.is(tok::l_paren)) {
2496         parseParens();
2497       } else if (FormatTok->is(TT_AttributeSquare)) {
2498         parseSquare();
2499         // Consume the closing TT_AttributeSquare.
2500         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2501           nextToken();
2502       }
2503     }
2504   }
2505 
2506   // Note that parsing away template declarations here leads to incorrectly
2507   // accepting function declarations as record declarations.
2508   // In general, we cannot solve this problem. Consider:
2509   // class A<int> B() {}
2510   // which can be a function definition or a class definition when B() is a
2511   // macro. If we find enough real-world cases where this is a problem, we
2512   // can parse for the 'template' keyword in the beginning of the statement,
2513   // and thus rule out the record production in case there is no template
2514   // (this would still leave us with an ambiguity between template function
2515   // and class declarations).
2516   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2517     while (!eof()) {
2518       if (FormatTok->is(tok::l_brace)) {
2519         calculateBraceTypes(/*ExpectClassBody=*/true);
2520         if (!tryToParseBracedList())
2521           break;
2522       }
2523       if (FormatTok->Tok.is(tok::semi))
2524         return;
2525       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2526         addUnwrappedLine();
2527         nextToken();
2528         parseCSharpGenericTypeConstraint();
2529         break;
2530       }
2531       nextToken();
2532     }
2533   }
2534   if (FormatTok->Tok.is(tok::l_brace)) {
2535     if (ParseAsExpr) {
2536       parseChildBlock();
2537     } else {
2538       if (ShouldBreakBeforeBrace(Style, InitialToken))
2539         addUnwrappedLine();
2540 
2541       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2542                  /*MunchSemi=*/false);
2543     }
2544   }
2545   // There is no addUnwrappedLine() here so that we fall through to parsing a
2546   // structural element afterwards. Thus, in "class A {} n, m;",
2547   // "} n, m;" will end up in one unwrapped line.
2548 }
2549 
2550 void UnwrappedLineParser::parseObjCMethod() {
2551   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2552          "'(' or identifier expected.");
2553   do {
2554     if (FormatTok->Tok.is(tok::semi)) {
2555       nextToken();
2556       addUnwrappedLine();
2557       return;
2558     } else if (FormatTok->Tok.is(tok::l_brace)) {
2559       if (Style.BraceWrapping.AfterFunction)
2560         addUnwrappedLine();
2561       parseBlock(/*MustBeDeclaration=*/false);
2562       addUnwrappedLine();
2563       return;
2564     } else {
2565       nextToken();
2566     }
2567   } while (!eof());
2568 }
2569 
2570 void UnwrappedLineParser::parseObjCProtocolList() {
2571   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2572   do {
2573     nextToken();
2574     // Early exit in case someone forgot a close angle.
2575     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2576         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2577       return;
2578   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2579   nextToken(); // Skip '>'.
2580 }
2581 
2582 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2583   do {
2584     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2585       nextToken();
2586       addUnwrappedLine();
2587       break;
2588     }
2589     if (FormatTok->is(tok::l_brace)) {
2590       parseBlock(/*MustBeDeclaration=*/false);
2591       // In ObjC interfaces, nothing should be following the "}".
2592       addUnwrappedLine();
2593     } else if (FormatTok->is(tok::r_brace)) {
2594       // Ignore stray "}". parseStructuralElement doesn't consume them.
2595       nextToken();
2596       addUnwrappedLine();
2597     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2598       nextToken();
2599       parseObjCMethod();
2600     } else {
2601       parseStructuralElement();
2602     }
2603   } while (!eof());
2604 }
2605 
2606 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2607   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2608          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2609   nextToken();
2610   nextToken(); // interface name
2611 
2612   // @interface can be followed by a lightweight generic
2613   // specialization list, then either a base class or a category.
2614   if (FormatTok->Tok.is(tok::less)) {
2615     // Unlike protocol lists, generic parameterizations support
2616     // nested angles:
2617     //
2618     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2619     //     NSObject <NSCopying, NSSecureCoding>
2620     //
2621     // so we need to count how many open angles we have left.
2622     unsigned NumOpenAngles = 1;
2623     do {
2624       nextToken();
2625       // Early exit in case someone forgot a close angle.
2626       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2627           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2628         break;
2629       if (FormatTok->Tok.is(tok::less))
2630         ++NumOpenAngles;
2631       else if (FormatTok->Tok.is(tok::greater)) {
2632         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2633         --NumOpenAngles;
2634       }
2635     } while (!eof() && NumOpenAngles != 0);
2636     nextToken(); // Skip '>'.
2637   }
2638   if (FormatTok->Tok.is(tok::colon)) {
2639     nextToken();
2640     nextToken(); // base class name
2641   } else if (FormatTok->Tok.is(tok::l_paren))
2642     // Skip category, if present.
2643     parseParens();
2644 
2645   if (FormatTok->Tok.is(tok::less))
2646     parseObjCProtocolList();
2647 
2648   if (FormatTok->Tok.is(tok::l_brace)) {
2649     if (Style.BraceWrapping.AfterObjCDeclaration)
2650       addUnwrappedLine();
2651     parseBlock(/*MustBeDeclaration=*/true);
2652   }
2653 
2654   // With instance variables, this puts '}' on its own line.  Without instance
2655   // variables, this ends the @interface line.
2656   addUnwrappedLine();
2657 
2658   parseObjCUntilAtEnd();
2659 }
2660 
2661 // Returns true for the declaration/definition form of @protocol,
2662 // false for the expression form.
2663 bool UnwrappedLineParser::parseObjCProtocol() {
2664   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2665   nextToken();
2666 
2667   if (FormatTok->is(tok::l_paren))
2668     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2669     return false;
2670 
2671   // The definition/declaration form,
2672   // @protocol Foo
2673   // - (int)someMethod;
2674   // @end
2675 
2676   nextToken(); // protocol name
2677 
2678   if (FormatTok->Tok.is(tok::less))
2679     parseObjCProtocolList();
2680 
2681   // Check for protocol declaration.
2682   if (FormatTok->Tok.is(tok::semi)) {
2683     nextToken();
2684     addUnwrappedLine();
2685     return true;
2686   }
2687 
2688   addUnwrappedLine();
2689   parseObjCUntilAtEnd();
2690   return true;
2691 }
2692 
2693 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2694   bool IsImport = FormatTok->is(Keywords.kw_import);
2695   assert(IsImport || FormatTok->is(tok::kw_export));
2696   nextToken();
2697 
2698   // Consume the "default" in "export default class/function".
2699   if (FormatTok->is(tok::kw_default))
2700     nextToken();
2701 
2702   // Consume "async function", "function" and "default function", so that these
2703   // get parsed as free-standing JS functions, i.e. do not require a trailing
2704   // semicolon.
2705   if (FormatTok->is(Keywords.kw_async))
2706     nextToken();
2707   if (FormatTok->is(Keywords.kw_function)) {
2708     nextToken();
2709     return;
2710   }
2711 
2712   // For imports, `export *`, `export {...}`, consume the rest of the line up
2713   // to the terminating `;`. For everything else, just return and continue
2714   // parsing the structural element, i.e. the declaration or expression for
2715   // `export default`.
2716   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2717       !FormatTok->isStringLiteral())
2718     return;
2719 
2720   while (!eof()) {
2721     if (FormatTok->is(tok::semi))
2722       return;
2723     if (Line->Tokens.empty()) {
2724       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2725       // import statement should terminate.
2726       return;
2727     }
2728     if (FormatTok->is(tok::l_brace)) {
2729       FormatTok->BlockKind = BK_Block;
2730       nextToken();
2731       parseBracedList();
2732     } else {
2733       nextToken();
2734     }
2735   }
2736 }
2737 
2738 void UnwrappedLineParser::parseStatementMacro() {
2739   nextToken();
2740   if (FormatTok->is(tok::l_paren))
2741     parseParens();
2742   if (FormatTok->is(tok::semi))
2743     nextToken();
2744   addUnwrappedLine();
2745 }
2746 
2747 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2748                                                  StringRef Prefix = "") {
2749   llvm::dbgs() << Prefix << "Line(" << Line.Level
2750                << ", FSC=" << Line.FirstStartColumn << ")"
2751                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2752   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2753                                                     E = Line.Tokens.end();
2754        I != E; ++I) {
2755     llvm::dbgs() << I->Tok->Tok.getName() << "["
2756                  << "T=" << I->Tok->getType()
2757                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2758   }
2759   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2760                                                     E = Line.Tokens.end();
2761        I != E; ++I) {
2762     const UnwrappedLineNode &Node = *I;
2763     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2764              I = Node.Children.begin(),
2765              E = Node.Children.end();
2766          I != E; ++I) {
2767       printDebugInfo(*I, "\nChild: ");
2768     }
2769   }
2770   llvm::dbgs() << "\n";
2771 }
2772 
2773 void UnwrappedLineParser::addUnwrappedLine() {
2774   if (Line->Tokens.empty())
2775     return;
2776   LLVM_DEBUG({
2777     if (CurrentLines == &Lines)
2778       printDebugInfo(*Line);
2779   });
2780   CurrentLines->push_back(std::move(*Line));
2781   Line->Tokens.clear();
2782   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2783   Line->FirstStartColumn = 0;
2784   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2785     CurrentLines->append(
2786         std::make_move_iterator(PreprocessorDirectives.begin()),
2787         std::make_move_iterator(PreprocessorDirectives.end()));
2788     PreprocessorDirectives.clear();
2789   }
2790   // Disconnect the current token from the last token on the previous line.
2791   FormatTok->Previous = nullptr;
2792 }
2793 
2794 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2795 
2796 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2797   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2798          FormatTok.NewlinesBefore > 0;
2799 }
2800 
2801 // Checks if \p FormatTok is a line comment that continues the line comment
2802 // section on \p Line.
2803 static bool
2804 continuesLineCommentSection(const FormatToken &FormatTok,
2805                             const UnwrappedLine &Line,
2806                             const llvm::Regex &CommentPragmasRegex) {
2807   if (Line.Tokens.empty())
2808     return false;
2809 
2810   StringRef IndentContent = FormatTok.TokenText;
2811   if (FormatTok.TokenText.startswith("//") ||
2812       FormatTok.TokenText.startswith("/*"))
2813     IndentContent = FormatTok.TokenText.substr(2);
2814   if (CommentPragmasRegex.match(IndentContent))
2815     return false;
2816 
2817   // If Line starts with a line comment, then FormatTok continues the comment
2818   // section if its original column is greater or equal to the original start
2819   // column of the line.
2820   //
2821   // Define the min column token of a line as follows: if a line ends in '{' or
2822   // contains a '{' followed by a line comment, then the min column token is
2823   // that '{'. Otherwise, the min column token of the line is the first token of
2824   // the line.
2825   //
2826   // If Line starts with a token other than a line comment, then FormatTok
2827   // continues the comment section if its original column is greater than the
2828   // original start column of the min column token of the line.
2829   //
2830   // For example, the second line comment continues the first in these cases:
2831   //
2832   // // first line
2833   // // second line
2834   //
2835   // and:
2836   //
2837   // // first line
2838   //  // second line
2839   //
2840   // and:
2841   //
2842   // int i; // first line
2843   //  // second line
2844   //
2845   // and:
2846   //
2847   // do { // first line
2848   //      // second line
2849   //   int i;
2850   // } while (true);
2851   //
2852   // and:
2853   //
2854   // enum {
2855   //   a, // first line
2856   //    // second line
2857   //   b
2858   // };
2859   //
2860   // The second line comment doesn't continue the first in these cases:
2861   //
2862   //   // first line
2863   //  // second line
2864   //
2865   // and:
2866   //
2867   // int i; // first line
2868   // // second line
2869   //
2870   // and:
2871   //
2872   // do { // first line
2873   //   // second line
2874   //   int i;
2875   // } while (true);
2876   //
2877   // and:
2878   //
2879   // enum {
2880   //   a, // first line
2881   //   // second line
2882   // };
2883   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2884 
2885   // Scan for '{//'. If found, use the column of '{' as a min column for line
2886   // comment section continuation.
2887   const FormatToken *PreviousToken = nullptr;
2888   for (const UnwrappedLineNode &Node : Line.Tokens) {
2889     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2890         isLineComment(*Node.Tok)) {
2891       MinColumnToken = PreviousToken;
2892       break;
2893     }
2894     PreviousToken = Node.Tok;
2895 
2896     // Grab the last newline preceding a token in this unwrapped line.
2897     if (Node.Tok->NewlinesBefore > 0) {
2898       MinColumnToken = Node.Tok;
2899     }
2900   }
2901   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2902     MinColumnToken = PreviousToken;
2903   }
2904 
2905   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2906                               MinColumnToken);
2907 }
2908 
2909 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2910   bool JustComments = Line->Tokens.empty();
2911   for (SmallVectorImpl<FormatToken *>::const_iterator
2912            I = CommentsBeforeNextToken.begin(),
2913            E = CommentsBeforeNextToken.end();
2914        I != E; ++I) {
2915     // Line comments that belong to the same line comment section are put on the
2916     // same line since later we might want to reflow content between them.
2917     // Additional fine-grained breaking of line comment sections is controlled
2918     // by the class BreakableLineCommentSection in case it is desirable to keep
2919     // several line comment sections in the same unwrapped line.
2920     //
2921     // FIXME: Consider putting separate line comment sections as children to the
2922     // unwrapped line instead.
2923     (*I)->ContinuesLineCommentSection =
2924         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2925     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2926       addUnwrappedLine();
2927     pushToken(*I);
2928   }
2929   if (NewlineBeforeNext && JustComments)
2930     addUnwrappedLine();
2931   CommentsBeforeNextToken.clear();
2932 }
2933 
2934 void UnwrappedLineParser::nextToken(int LevelDifference) {
2935   if (eof())
2936     return;
2937   flushComments(isOnNewLine(*FormatTok));
2938   pushToken(FormatTok);
2939   FormatToken *Previous = FormatTok;
2940   if (Style.Language != FormatStyle::LK_JavaScript)
2941     readToken(LevelDifference);
2942   else
2943     readTokenWithJavaScriptASI();
2944   FormatTok->Previous = Previous;
2945 }
2946 
2947 void UnwrappedLineParser::distributeComments(
2948     const SmallVectorImpl<FormatToken *> &Comments,
2949     const FormatToken *NextTok) {
2950   // Whether or not a line comment token continues a line is controlled by
2951   // the method continuesLineCommentSection, with the following caveat:
2952   //
2953   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2954   // that each comment line from the trail is aligned with the next token, if
2955   // the next token exists. If a trail exists, the beginning of the maximal
2956   // trail is marked as a start of a new comment section.
2957   //
2958   // For example in this code:
2959   //
2960   // int a; // line about a
2961   //   // line 1 about b
2962   //   // line 2 about b
2963   //   int b;
2964   //
2965   // the two lines about b form a maximal trail, so there are two sections, the
2966   // first one consisting of the single comment "// line about a" and the
2967   // second one consisting of the next two comments.
2968   if (Comments.empty())
2969     return;
2970   bool ShouldPushCommentsInCurrentLine = true;
2971   bool HasTrailAlignedWithNextToken = false;
2972   unsigned StartOfTrailAlignedWithNextToken = 0;
2973   if (NextTok) {
2974     // We are skipping the first element intentionally.
2975     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2976       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2977         HasTrailAlignedWithNextToken = true;
2978         StartOfTrailAlignedWithNextToken = i;
2979       }
2980     }
2981   }
2982   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2983     FormatToken *FormatTok = Comments[i];
2984     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2985       FormatTok->ContinuesLineCommentSection = false;
2986     } else {
2987       FormatTok->ContinuesLineCommentSection =
2988           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2989     }
2990     if (!FormatTok->ContinuesLineCommentSection &&
2991         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2992       ShouldPushCommentsInCurrentLine = false;
2993     }
2994     if (ShouldPushCommentsInCurrentLine) {
2995       pushToken(FormatTok);
2996     } else {
2997       CommentsBeforeNextToken.push_back(FormatTok);
2998     }
2999   }
3000 }
3001 
3002 void UnwrappedLineParser::readToken(int LevelDifference) {
3003   SmallVector<FormatToken *, 1> Comments;
3004   do {
3005     FormatTok = Tokens->getNextToken();
3006     assert(FormatTok);
3007     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3008            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3009       distributeComments(Comments, FormatTok);
3010       Comments.clear();
3011       // If there is an unfinished unwrapped line, we flush the preprocessor
3012       // directives only after that unwrapped line was finished later.
3013       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3014       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3015       assert((LevelDifference >= 0 ||
3016               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3017              "LevelDifference makes Line->Level negative");
3018       Line->Level += LevelDifference;
3019       // Comments stored before the preprocessor directive need to be output
3020       // before the preprocessor directive, at the same level as the
3021       // preprocessor directive, as we consider them to apply to the directive.
3022       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3023           PPBranchLevel > 0)
3024         Line->Level += PPBranchLevel;
3025       flushComments(isOnNewLine(*FormatTok));
3026       parsePPDirective();
3027     }
3028     while (FormatTok->getType() == TT_ConflictStart ||
3029            FormatTok->getType() == TT_ConflictEnd ||
3030            FormatTok->getType() == TT_ConflictAlternative) {
3031       if (FormatTok->getType() == TT_ConflictStart) {
3032         conditionalCompilationStart(/*Unreachable=*/false);
3033       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3034         conditionalCompilationAlternative();
3035       } else if (FormatTok->getType() == TT_ConflictEnd) {
3036         conditionalCompilationEnd();
3037       }
3038       FormatTok = Tokens->getNextToken();
3039       FormatTok->MustBreakBefore = true;
3040     }
3041 
3042     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3043         !Line->InPPDirective) {
3044       continue;
3045     }
3046 
3047     if (!FormatTok->Tok.is(tok::comment)) {
3048       distributeComments(Comments, FormatTok);
3049       Comments.clear();
3050       return;
3051     }
3052 
3053     Comments.push_back(FormatTok);
3054   } while (!eof());
3055 
3056   distributeComments(Comments, nullptr);
3057   Comments.clear();
3058 }
3059 
3060 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3061   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3062   if (MustBreakBeforeNextToken) {
3063     Line->Tokens.back().Tok->MustBreakBefore = true;
3064     MustBreakBeforeNextToken = false;
3065   }
3066 }
3067 
3068 } // end namespace format
3069 } // end namespace clang
3070