xref: /freebsd/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp (revision 5deeebd8c6ca991269e72902a7a62cada57947f6)
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "format-token-annotator"
23 
24 namespace clang {
25 namespace format {
26 
mustBreakAfterAttributes(const FormatToken & Tok,const FormatStyle & Style)27 static bool mustBreakAfterAttributes(const FormatToken &Tok,
28                                      const FormatStyle &Style) {
29   switch (Style.BreakAfterAttributes) {
30   case FormatStyle::ABS_Always:
31     return true;
32   case FormatStyle::ABS_Leave:
33     return Tok.NewlinesBefore > 0;
34   default:
35     return false;
36   }
37 }
38 
39 namespace {
40 
41 /// Returns \c true if the line starts with a token that can start a statement
42 /// with an initializer.
startsWithInitStatement(const AnnotatedLine & Line)43 static bool startsWithInitStatement(const AnnotatedLine &Line) {
44   return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
45          Line.startsWith(tok::kw_switch);
46 }
47 
48 /// Returns \c true if the token can be used as an identifier in
49 /// an Objective-C \c \@selector, \c false otherwise.
50 ///
51 /// Because getFormattingLangOpts() always lexes source code as
52 /// Objective-C++, C++ keywords like \c new and \c delete are
53 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
54 ///
55 /// For Objective-C and Objective-C++, both identifiers and keywords
56 /// are valid inside @selector(...) (or a macro which
57 /// invokes @selector(...)). So, we allow treat any identifier or
58 /// keyword as a potential Objective-C selector component.
canBeObjCSelectorComponent(const FormatToken & Tok)59 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
60   return Tok.Tok.getIdentifierInfo();
61 }
62 
63 /// With `Left` being '(', check if we're at either `[...](` or
64 /// `[...]<...>(`, where the [ opens a lambda capture list.
isLambdaParameterList(const FormatToken * Left)65 static bool isLambdaParameterList(const FormatToken *Left) {
66   // Skip <...> if present.
67   if (Left->Previous && Left->Previous->is(tok::greater) &&
68       Left->Previous->MatchingParen &&
69       Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
70     Left = Left->Previous->MatchingParen;
71   }
72 
73   // Check for `[...]`.
74   return Left->Previous && Left->Previous->is(tok::r_square) &&
75          Left->Previous->MatchingParen &&
76          Left->Previous->MatchingParen->is(TT_LambdaLSquare);
77 }
78 
79 /// Returns \c true if the token is followed by a boolean condition, \c false
80 /// otherwise.
isKeywordWithCondition(const FormatToken & Tok)81 static bool isKeywordWithCondition(const FormatToken &Tok) {
82   return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
83                      tok::kw_constexpr, tok::kw_catch);
84 }
85 
86 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
isCppAttribute(bool IsCpp,const FormatToken & Tok)87 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
88   if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
89     return false;
90   // The first square bracket is part of an ObjC array literal
91   if (Tok.Previous && Tok.Previous->is(tok::at))
92     return false;
93   const FormatToken *AttrTok = Tok.Next->Next;
94   if (!AttrTok)
95     return false;
96   // C++17 '[[using ns: foo, bar(baz, blech)]]'
97   // We assume nobody will name an ObjC variable 'using'.
98   if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
99     return true;
100   if (AttrTok->isNot(tok::identifier))
101     return false;
102   while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
103     // ObjC message send. We assume nobody will use : in a C++11 attribute
104     // specifier parameter, although this is technically valid:
105     // [[foo(:)]].
106     if (AttrTok->is(tok::colon) ||
107         AttrTok->startsSequence(tok::identifier, tok::identifier) ||
108         AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
109       return false;
110     }
111     if (AttrTok->is(tok::ellipsis))
112       return true;
113     AttrTok = AttrTok->Next;
114   }
115   return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
116 }
117 
118 /// A parser that gathers additional information about tokens.
119 ///
120 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
121 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
122 /// into template parameter lists.
123 class AnnotatingParser {
124 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,const AdditionalKeywords & Keywords,SmallVector<ScopeType> & Scopes)125   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
126                    const AdditionalKeywords &Keywords,
127                    SmallVector<ScopeType> &Scopes)
128       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
129         IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)),
130         Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) {
131     assert(IsCpp == LangOpts.CXXOperatorNames);
132     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
133     resetTokenMetadata();
134   }
135 
136 private:
getScopeType(const FormatToken & Token) const137   ScopeType getScopeType(const FormatToken &Token) const {
138     switch (Token.getType()) {
139     case TT_FunctionLBrace:
140     case TT_LambdaLBrace:
141       return ST_Function;
142     case TT_ClassLBrace:
143     case TT_StructLBrace:
144     case TT_UnionLBrace:
145       return ST_Class;
146     default:
147       return ST_Other;
148     }
149   }
150 
parseAngle()151   bool parseAngle() {
152     if (!CurrentToken || !CurrentToken->Previous)
153       return false;
154     if (NonTemplateLess.count(CurrentToken->Previous) > 0)
155       return false;
156 
157     if (const auto &Previous = *CurrentToken->Previous; // The '<'.
158         Previous.Previous) {
159       if (Previous.Previous->Tok.isLiteral())
160         return false;
161       if (Previous.Previous->is(tok::r_brace))
162         return false;
163       if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
164           (!Previous.Previous->MatchingParen ||
165            Previous.Previous->MatchingParen->isNot(
166                TT_OverloadedOperatorLParen))) {
167         return false;
168       }
169       if (Previous.Previous->is(tok::kw_operator) &&
170           CurrentToken->is(tok::l_paren)) {
171         return false;
172       }
173     }
174 
175     FormatToken *Left = CurrentToken->Previous;
176     Left->ParentBracket = Contexts.back().ContextKind;
177     ScopedContextCreator ContextCreator(*this, tok::less, 12);
178     Contexts.back().IsExpression = false;
179 
180     const auto *BeforeLess = Left->Previous;
181 
182     // If there's a template keyword before the opening angle bracket, this is a
183     // template parameter, not an argument.
184     if (BeforeLess && BeforeLess->isNot(tok::kw_template))
185       Contexts.back().ContextType = Context::TemplateArgument;
186 
187     if (Style.Language == FormatStyle::LK_Java &&
188         CurrentToken->is(tok::question)) {
189       next();
190     }
191 
192     for (bool SeenTernaryOperator = false, MaybeAngles = true; CurrentToken;) {
193       const bool InExpr = Contexts[Contexts.size() - 2].IsExpression;
194       if (CurrentToken->is(tok::greater)) {
195         const auto *Next = CurrentToken->Next;
196         if (CurrentToken->isNot(TT_TemplateCloser)) {
197           // Try to do a better job at looking for ">>" within the condition of
198           // a statement. Conservatively insert spaces between consecutive ">"
199           // tokens to prevent splitting right shift operators and potentially
200           // altering program semantics. This check is overly conservative and
201           // will prevent spaces from being inserted in select nested template
202           // parameter cases, but should not alter program semantics.
203           if (Next && Next->is(tok::greater) &&
204               Left->ParentBracket != tok::less &&
205               CurrentToken->getStartOfNonWhitespace() ==
206                   Next->getStartOfNonWhitespace().getLocWithOffset(-1)) {
207             return false;
208           }
209           if (InExpr && SeenTernaryOperator &&
210               (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
211             return false;
212           }
213           if (!MaybeAngles)
214             return false;
215         }
216         Left->MatchingParen = CurrentToken;
217         CurrentToken->MatchingParen = Left;
218         // In TT_Proto, we must distignuish between:
219         //   map<key, value>
220         //   msg < item: data >
221         //   msg: < item: data >
222         // In TT_TextProto, map<key, value> does not occur.
223         if (Style.Language == FormatStyle::LK_TextProto ||
224             (Style.Language == FormatStyle::LK_Proto && BeforeLess &&
225              BeforeLess->isOneOf(TT_SelectorName, TT_DictLiteral))) {
226           CurrentToken->setType(TT_DictLiteral);
227         } else {
228           CurrentToken->setType(TT_TemplateCloser);
229           CurrentToken->Tok.setLength(1);
230         }
231         if (Next && Next->Tok.isLiteral())
232           return false;
233         next();
234         return true;
235       }
236       if (CurrentToken->is(tok::question) &&
237           Style.Language == FormatStyle::LK_Java) {
238         next();
239         continue;
240       }
241       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace))
242         return false;
243       const auto &Prev = *CurrentToken->Previous;
244       // If a && or || is found and interpreted as a binary operator, this set
245       // of angles is likely part of something like "a < b && c > d". If the
246       // angles are inside an expression, the ||/&& might also be a binary
247       // operator that was misinterpreted because we are parsing template
248       // parameters.
249       // FIXME: This is getting out of hand, write a decent parser.
250       if (MaybeAngles && InExpr && !Line.startsWith(tok::kw_template) &&
251           Prev.is(TT_BinaryOperator)) {
252         const auto Precedence = Prev.getPrecedence();
253         if (Precedence > prec::Conditional && Precedence < prec::Relational)
254           MaybeAngles = false;
255       }
256       if (Prev.isOneOf(tok::question, tok::colon) && !Style.isProto())
257         SeenTernaryOperator = true;
258       updateParameterCount(Left, CurrentToken);
259       if (Style.Language == FormatStyle::LK_Proto) {
260         if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
261           if (CurrentToken->is(tok::colon) ||
262               (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
263                Previous->isNot(tok::colon))) {
264             Previous->setType(TT_SelectorName);
265           }
266         }
267       }
268       if (Style.isTableGen()) {
269         if (CurrentToken->isOneOf(tok::comma, tok::equal)) {
270           // They appear as separators. Unless they are not in class definition.
271           next();
272           continue;
273         }
274         // In angle, there must be Value like tokens. Types are also able to be
275         // parsed in the same way with Values.
276         if (!parseTableGenValue())
277           return false;
278         continue;
279       }
280       if (!consumeToken())
281         return false;
282     }
283     return false;
284   }
285 
parseUntouchableParens()286   bool parseUntouchableParens() {
287     while (CurrentToken) {
288       CurrentToken->Finalized = true;
289       switch (CurrentToken->Tok.getKind()) {
290       case tok::l_paren:
291         next();
292         if (!parseUntouchableParens())
293           return false;
294         continue;
295       case tok::r_paren:
296         next();
297         return true;
298       default:
299         // no-op
300         break;
301       }
302       next();
303     }
304     return false;
305   }
306 
parseParens(bool LookForDecls=false)307   bool parseParens(bool LookForDecls = false) {
308     if (!CurrentToken)
309       return false;
310     assert(CurrentToken->Previous && "Unknown previous token");
311     FormatToken &OpeningParen = *CurrentToken->Previous;
312     assert(OpeningParen.is(tok::l_paren));
313     FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
314     OpeningParen.ParentBracket = Contexts.back().ContextKind;
315     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
316 
317     // FIXME: This is a bit of a hack. Do better.
318     Contexts.back().ColonIsForRangeExpr =
319         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
320 
321     if (OpeningParen.Previous &&
322         OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
323       OpeningParen.Finalized = true;
324       return parseUntouchableParens();
325     }
326 
327     bool StartsObjCMethodExpr = false;
328     if (!Style.isVerilog()) {
329       if (FormatToken *MaybeSel = OpeningParen.Previous) {
330         // @selector( starts a selector.
331         if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
332             MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
333           StartsObjCMethodExpr = true;
334         }
335       }
336     }
337 
338     if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
339       // Find the previous kw_operator token.
340       FormatToken *Prev = &OpeningParen;
341       while (Prev->isNot(tok::kw_operator)) {
342         Prev = Prev->Previous;
343         assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
344       }
345 
346       // If faced with "a.operator*(argument)" or "a->operator*(argument)",
347       // i.e. the operator is called as a member function,
348       // then the argument must be an expression.
349       bool OperatorCalledAsMemberFunction =
350           Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
351       Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
352     } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) {
353       Contexts.back().IsExpression = true;
354       Contexts.back().ContextType = Context::VerilogInstancePortList;
355     } else if (Style.isJavaScript() &&
356                (Line.startsWith(Keywords.kw_type, tok::identifier) ||
357                 Line.startsWith(tok::kw_export, Keywords.kw_type,
358                                 tok::identifier))) {
359       // type X = (...);
360       // export type X = (...);
361       Contexts.back().IsExpression = false;
362     } else if (OpeningParen.Previous &&
363                (OpeningParen.Previous->isOneOf(
364                     tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit,
365                     tok::kw_while, tok::l_paren, tok::comma,
366                     TT_BinaryOperator) ||
367                 OpeningParen.Previous->isIf())) {
368       // static_assert, if and while usually contain expressions.
369       Contexts.back().IsExpression = true;
370     } else if (Style.isJavaScript() && OpeningParen.Previous &&
371                (OpeningParen.Previous->is(Keywords.kw_function) ||
372                 (OpeningParen.Previous->endsSequence(tok::identifier,
373                                                      Keywords.kw_function)))) {
374       // function(...) or function f(...)
375       Contexts.back().IsExpression = false;
376     } else if (Style.isJavaScript() && OpeningParen.Previous &&
377                OpeningParen.Previous->is(TT_JsTypeColon)) {
378       // let x: (SomeType);
379       Contexts.back().IsExpression = false;
380     } else if (isLambdaParameterList(&OpeningParen)) {
381       // This is a parameter list of a lambda expression.
382       Contexts.back().IsExpression = false;
383     } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
384       Contexts.back().IsExpression = false;
385     } else if (OpeningParen.Previous &&
386                OpeningParen.Previous->is(tok::kw__Generic)) {
387       Contexts.back().ContextType = Context::C11GenericSelection;
388       Contexts.back().IsExpression = true;
389     } else if (Line.InPPDirective &&
390                (!OpeningParen.Previous ||
391                 OpeningParen.Previous->isNot(tok::identifier))) {
392       Contexts.back().IsExpression = true;
393     } else if (Contexts[Contexts.size() - 2].CaretFound) {
394       // This is the parameter list of an ObjC block.
395       Contexts.back().IsExpression = false;
396     } else if (OpeningParen.Previous &&
397                OpeningParen.Previous->is(TT_ForEachMacro)) {
398       // The first argument to a foreach macro is a declaration.
399       Contexts.back().ContextType = Context::ForEachMacro;
400       Contexts.back().IsExpression = false;
401     } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
402                OpeningParen.Previous->MatchingParen->isOneOf(
403                    TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
404       Contexts.back().IsExpression = false;
405     } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
406       bool IsForOrCatch =
407           OpeningParen.Previous &&
408           OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
409       Contexts.back().IsExpression = !IsForOrCatch;
410     }
411 
412     if (Style.isTableGen()) {
413       if (FormatToken *Prev = OpeningParen.Previous) {
414         if (Prev->is(TT_TableGenCondOperator)) {
415           Contexts.back().IsTableGenCondOpe = true;
416           Contexts.back().IsExpression = true;
417         } else if (Contexts.size() > 1 &&
418                    Contexts[Contexts.size() - 2].IsTableGenBangOpe) {
419           // Hack to handle bang operators. The parent context's flag
420           // was set by parseTableGenSimpleValue().
421           // We have to specify the context outside because the prev of "(" may
422           // be ">", not the bang operator in this case.
423           Contexts.back().IsTableGenBangOpe = true;
424           Contexts.back().IsExpression = true;
425         } else {
426           // Otherwise, this paren seems DAGArg.
427           if (!parseTableGenDAGArg())
428             return false;
429           return parseTableGenDAGArgAndList(&OpeningParen);
430         }
431       }
432     }
433 
434     // Infer the role of the l_paren based on the previous token if we haven't
435     // detected one yet.
436     if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
437       if (PrevNonComment->isAttribute()) {
438         OpeningParen.setType(TT_AttributeLParen);
439       } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
440                                          tok::kw_typeof,
441 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
442 #include "clang/Basic/TransformTypeTraits.def"
443                                          tok::kw__Atomic)) {
444         OpeningParen.setType(TT_TypeDeclarationParen);
445         // decltype() and typeof() usually contain expressions.
446         if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
447           Contexts.back().IsExpression = true;
448       }
449     }
450 
451     if (StartsObjCMethodExpr) {
452       Contexts.back().ColonIsObjCMethodExpr = true;
453       OpeningParen.setType(TT_ObjCMethodExpr);
454     }
455 
456     // MightBeFunctionType and ProbablyFunctionType are used for
457     // function pointer and reference types as well as Objective-C
458     // block types:
459     //
460     // void (*FunctionPointer)(void);
461     // void (&FunctionReference)(void);
462     // void (&&FunctionReference)(void);
463     // void (^ObjCBlock)(void);
464     bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
465     bool ProbablyFunctionType =
466         CurrentToken->isPointerOrReference() || CurrentToken->is(tok::caret);
467     bool HasMultipleLines = false;
468     bool HasMultipleParametersOnALine = false;
469     bool MightBeObjCForRangeLoop =
470         OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
471     FormatToken *PossibleObjCForInToken = nullptr;
472     while (CurrentToken) {
473       // LookForDecls is set when "if (" has been seen. Check for
474       // 'identifier' '*' 'identifier' followed by not '=' -- this
475       // '*' has to be a binary operator but determineStarAmpUsage() will
476       // categorize it as an unary operator, so set the right type here.
477       if (LookForDecls && CurrentToken->Next) {
478         FormatToken *Prev = CurrentToken->getPreviousNonComment();
479         if (Prev) {
480           FormatToken *PrevPrev = Prev->getPreviousNonComment();
481           FormatToken *Next = CurrentToken->Next;
482           if (PrevPrev && PrevPrev->is(tok::identifier) &&
483               PrevPrev->isNot(TT_TypeName) && Prev->isPointerOrReference() &&
484               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
485             Prev->setType(TT_BinaryOperator);
486             LookForDecls = false;
487           }
488         }
489       }
490 
491       if (CurrentToken->Previous->is(TT_PointerOrReference) &&
492           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
493                                                     tok::coloncolon)) {
494         ProbablyFunctionType = true;
495       }
496       if (CurrentToken->is(tok::comma))
497         MightBeFunctionType = false;
498       if (CurrentToken->Previous->is(TT_BinaryOperator))
499         Contexts.back().IsExpression = true;
500       if (CurrentToken->is(tok::r_paren)) {
501         if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
502             ProbablyFunctionType && CurrentToken->Next &&
503             (CurrentToken->Next->is(tok::l_paren) ||
504              (CurrentToken->Next->is(tok::l_square) &&
505               Line.MustBeDeclaration))) {
506           OpeningParen.setType(OpeningParen.Next->is(tok::caret)
507                                    ? TT_ObjCBlockLParen
508                                    : TT_FunctionTypeLParen);
509         }
510         OpeningParen.MatchingParen = CurrentToken;
511         CurrentToken->MatchingParen = &OpeningParen;
512 
513         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
514             OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
515           // Detect the case where macros are used to generate lambdas or
516           // function bodies, e.g.:
517           //   auto my_lambda = MACRO((Type *type, int i) { .. body .. });
518           for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
519                Tok = Tok->Next) {
520             if (Tok->is(TT_BinaryOperator) && Tok->isPointerOrReference())
521               Tok->setType(TT_PointerOrReference);
522           }
523         }
524 
525         if (StartsObjCMethodExpr) {
526           CurrentToken->setType(TT_ObjCMethodExpr);
527           if (Contexts.back().FirstObjCSelectorName) {
528             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
529                 Contexts.back().LongestObjCSelectorName;
530           }
531         }
532 
533         if (OpeningParen.is(TT_AttributeLParen))
534           CurrentToken->setType(TT_AttributeRParen);
535         if (OpeningParen.is(TT_TypeDeclarationParen))
536           CurrentToken->setType(TT_TypeDeclarationParen);
537         if (OpeningParen.Previous &&
538             OpeningParen.Previous->is(TT_JavaAnnotation)) {
539           CurrentToken->setType(TT_JavaAnnotation);
540         }
541         if (OpeningParen.Previous &&
542             OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
543           CurrentToken->setType(TT_LeadingJavaAnnotation);
544         }
545         if (OpeningParen.Previous &&
546             OpeningParen.Previous->is(TT_AttributeSquare)) {
547           CurrentToken->setType(TT_AttributeSquare);
548         }
549 
550         if (!HasMultipleLines)
551           OpeningParen.setPackingKind(PPK_Inconclusive);
552         else if (HasMultipleParametersOnALine)
553           OpeningParen.setPackingKind(PPK_BinPacked);
554         else
555           OpeningParen.setPackingKind(PPK_OnePerLine);
556 
557         next();
558         return true;
559       }
560       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
561         return false;
562 
563       if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
564         OpeningParen.setType(TT_Unknown);
565       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
566           !CurrentToken->Next->HasUnescapedNewline &&
567           !CurrentToken->Next->isTrailingComment()) {
568         HasMultipleParametersOnALine = true;
569       }
570       bool ProbablyFunctionTypeLParen =
571           (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
572            CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
573       if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
574            CurrentToken->Previous->isTypeName(LangOpts)) &&
575           !(CurrentToken->is(tok::l_brace) ||
576             (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
577         Contexts.back().IsExpression = false;
578       }
579       if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
580         MightBeObjCForRangeLoop = false;
581         if (PossibleObjCForInToken) {
582           PossibleObjCForInToken->setType(TT_Unknown);
583           PossibleObjCForInToken = nullptr;
584         }
585       }
586       if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
587         PossibleObjCForInToken = CurrentToken;
588         PossibleObjCForInToken->setType(TT_ObjCForIn);
589       }
590       // When we discover a 'new', we set CanBeExpression to 'false' in order to
591       // parse the type correctly. Reset that after a comma.
592       if (CurrentToken->is(tok::comma))
593         Contexts.back().CanBeExpression = true;
594 
595       if (Style.isTableGen()) {
596         if (CurrentToken->is(tok::comma)) {
597           if (Contexts.back().IsTableGenCondOpe)
598             CurrentToken->setType(TT_TableGenCondOperatorComma);
599           next();
600         } else if (CurrentToken->is(tok::colon)) {
601           if (Contexts.back().IsTableGenCondOpe)
602             CurrentToken->setType(TT_TableGenCondOperatorColon);
603           next();
604         }
605         // In TableGen there must be Values in parens.
606         if (!parseTableGenValue())
607           return false;
608         continue;
609       }
610 
611       FormatToken *Tok = CurrentToken;
612       if (!consumeToken())
613         return false;
614       updateParameterCount(&OpeningParen, Tok);
615       if (CurrentToken && CurrentToken->HasUnescapedNewline)
616         HasMultipleLines = true;
617     }
618     return false;
619   }
620 
isCSharpAttributeSpecifier(const FormatToken & Tok)621   bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
622     if (!Style.isCSharp())
623       return false;
624 
625     // `identifier[i]` is not an attribute.
626     if (Tok.Previous && Tok.Previous->is(tok::identifier))
627       return false;
628 
629     // Chains of [] in `identifier[i][j][k]` are not attributes.
630     if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
631       auto *MatchingParen = Tok.Previous->MatchingParen;
632       if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
633         return false;
634     }
635 
636     const FormatToken *AttrTok = Tok.Next;
637     if (!AttrTok)
638       return false;
639 
640     // Just an empty declaration e.g. string [].
641     if (AttrTok->is(tok::r_square))
642       return false;
643 
644     // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
645     while (AttrTok && AttrTok->isNot(tok::r_square))
646       AttrTok = AttrTok->Next;
647 
648     if (!AttrTok)
649       return false;
650 
651     // Allow an attribute to be the only content of a file.
652     AttrTok = AttrTok->Next;
653     if (!AttrTok)
654       return true;
655 
656     // Limit this to being an access modifier that follows.
657     if (AttrTok->isAccessSpecifierKeyword() ||
658         AttrTok->isOneOf(tok::comment, tok::kw_class, tok::kw_static,
659                          tok::l_square, Keywords.kw_internal)) {
660       return true;
661     }
662 
663     // incase its a [XXX] retval func(....
664     if (AttrTok->Next &&
665         AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
666       return true;
667     }
668 
669     return false;
670   }
671 
parseSquare()672   bool parseSquare() {
673     if (!CurrentToken)
674       return false;
675 
676     // A '[' could be an index subscript (after an identifier or after
677     // ')' or ']'), it could be the start of an Objective-C method
678     // expression, it could the start of an Objective-C array literal,
679     // or it could be a C++ attribute specifier [[foo::bar]].
680     FormatToken *Left = CurrentToken->Previous;
681     Left->ParentBracket = Contexts.back().ContextKind;
682     FormatToken *Parent = Left->getPreviousNonComment();
683 
684     // Cases where '>' is followed by '['.
685     // In C++, this can happen either in array of templates (foo<int>[10])
686     // or when array is a nested template type (unique_ptr<type1<type2>[]>).
687     bool CppArrayTemplates =
688         IsCpp && Parent && Parent->is(TT_TemplateCloser) &&
689         (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
690          Contexts.back().ContextType == Context::TemplateArgument);
691 
692     const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
693     const bool IsCpp11AttributeSpecifier =
694         isCppAttribute(IsCpp, *Left) || IsInnerSquare;
695 
696     // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
697     bool IsCSharpAttributeSpecifier =
698         isCSharpAttributeSpecifier(*Left) ||
699         Contexts.back().InCSharpAttributeSpecifier;
700 
701     bool InsideInlineASM = Line.startsWith(tok::kw_asm);
702     bool IsCppStructuredBinding = Left->isCppStructuredBinding(IsCpp);
703     bool StartsObjCMethodExpr =
704         !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
705         IsCpp && !IsCpp11AttributeSpecifier && !IsCSharpAttributeSpecifier &&
706         Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
707         !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
708         (!Parent ||
709          Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
710                          tok::kw_return, tok::kw_throw) ||
711          Parent->isUnaryOperator() ||
712          // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
713          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
714          (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
715           prec::Unknown));
716     bool ColonFound = false;
717 
718     unsigned BindingIncrease = 1;
719     if (IsCppStructuredBinding) {
720       Left->setType(TT_StructuredBindingLSquare);
721     } else if (Left->is(TT_Unknown)) {
722       if (StartsObjCMethodExpr) {
723         Left->setType(TT_ObjCMethodExpr);
724       } else if (InsideInlineASM) {
725         Left->setType(TT_InlineASMSymbolicNameLSquare);
726       } else if (IsCpp11AttributeSpecifier) {
727         Left->setType(TT_AttributeSquare);
728         if (!IsInnerSquare && Left->Previous)
729           Left->Previous->EndsCppAttributeGroup = false;
730       } else if (Style.isJavaScript() && Parent &&
731                  Contexts.back().ContextKind == tok::l_brace &&
732                  Parent->isOneOf(tok::l_brace, tok::comma)) {
733         Left->setType(TT_JsComputedPropertyName);
734       } else if (IsCpp && Contexts.back().ContextKind == tok::l_brace &&
735                  Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
736         Left->setType(TT_DesignatedInitializerLSquare);
737       } else if (IsCSharpAttributeSpecifier) {
738         Left->setType(TT_AttributeSquare);
739       } else if (CurrentToken->is(tok::r_square) && Parent &&
740                  Parent->is(TT_TemplateCloser)) {
741         Left->setType(TT_ArraySubscriptLSquare);
742       } else if (Style.isProto()) {
743         // Square braces in LK_Proto can either be message field attributes:
744         //
745         // optional Aaa aaa = 1 [
746         //   (aaa) = aaa
747         // ];
748         //
749         // extensions 123 [
750         //   (aaa) = aaa
751         // ];
752         //
753         // or text proto extensions (in options):
754         //
755         // option (Aaa.options) = {
756         //   [type.type/type] {
757         //     key: value
758         //   }
759         // }
760         //
761         // or repeated fields (in options):
762         //
763         // option (Aaa.options) = {
764         //   keys: [ 1, 2, 3 ]
765         // }
766         //
767         // In the first and the third case we want to spread the contents inside
768         // the square braces; in the second we want to keep them inline.
769         Left->setType(TT_ArrayInitializerLSquare);
770         if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
771                                 tok::equal) &&
772             !Left->endsSequence(tok::l_square, tok::numeric_constant,
773                                 tok::identifier) &&
774             !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
775           Left->setType(TT_ProtoExtensionLSquare);
776           BindingIncrease = 10;
777         }
778       } else if (!CppArrayTemplates && Parent &&
779                  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
780                                  tok::comma, tok::l_paren, tok::l_square,
781                                  tok::question, tok::colon, tok::kw_return,
782                                  // Should only be relevant to JavaScript:
783                                  tok::kw_default)) {
784         Left->setType(TT_ArrayInitializerLSquare);
785       } else {
786         BindingIncrease = 10;
787         Left->setType(TT_ArraySubscriptLSquare);
788       }
789     }
790 
791     ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
792     Contexts.back().IsExpression = true;
793     if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
794       Contexts.back().IsExpression = false;
795 
796     Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
797     Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
798     Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
799 
800     while (CurrentToken) {
801       if (CurrentToken->is(tok::r_square)) {
802         if (IsCpp11AttributeSpecifier) {
803           CurrentToken->setType(TT_AttributeSquare);
804           if (!IsInnerSquare)
805             CurrentToken->EndsCppAttributeGroup = true;
806         }
807         if (IsCSharpAttributeSpecifier) {
808           CurrentToken->setType(TT_AttributeSquare);
809         } else if (((CurrentToken->Next &&
810                      CurrentToken->Next->is(tok::l_paren)) ||
811                     (CurrentToken->Previous &&
812                      CurrentToken->Previous->Previous == Left)) &&
813                    Left->is(TT_ObjCMethodExpr)) {
814           // An ObjC method call is rarely followed by an open parenthesis. It
815           // also can't be composed of just one token, unless it's a macro that
816           // will be expanded to more tokens.
817           // FIXME: Do we incorrectly label ":" with this?
818           StartsObjCMethodExpr = false;
819           Left->setType(TT_Unknown);
820         }
821         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
822           CurrentToken->setType(TT_ObjCMethodExpr);
823           // If we haven't seen a colon yet, make sure the last identifier
824           // before the r_square is tagged as a selector name component.
825           if (!ColonFound && CurrentToken->Previous &&
826               CurrentToken->Previous->is(TT_Unknown) &&
827               canBeObjCSelectorComponent(*CurrentToken->Previous)) {
828             CurrentToken->Previous->setType(TT_SelectorName);
829           }
830           // determineStarAmpUsage() thinks that '*' '[' is allocating an
831           // array of pointers, but if '[' starts a selector then '*' is a
832           // binary operator.
833           if (Parent && Parent->is(TT_PointerOrReference))
834             Parent->overwriteFixedType(TT_BinaryOperator);
835         }
836         // An arrow after an ObjC method expression is not a lambda arrow.
837         if (CurrentToken->is(TT_ObjCMethodExpr) && CurrentToken->Next &&
838             CurrentToken->Next->is(TT_LambdaArrow)) {
839           CurrentToken->Next->overwriteFixedType(TT_Unknown);
840         }
841         Left->MatchingParen = CurrentToken;
842         CurrentToken->MatchingParen = Left;
843         // FirstObjCSelectorName is set when a colon is found. This does
844         // not work, however, when the method has no parameters.
845         // Here, we set FirstObjCSelectorName when the end of the method call is
846         // reached, in case it was not set already.
847         if (!Contexts.back().FirstObjCSelectorName) {
848           FormatToken *Previous = CurrentToken->getPreviousNonComment();
849           if (Previous && Previous->is(TT_SelectorName)) {
850             Previous->ObjCSelectorNameParts = 1;
851             Contexts.back().FirstObjCSelectorName = Previous;
852           }
853         } else {
854           Left->ParameterCount =
855               Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
856         }
857         if (Contexts.back().FirstObjCSelectorName) {
858           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
859               Contexts.back().LongestObjCSelectorName;
860           if (Left->BlockParameterCount > 1)
861             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
862         }
863         if (Style.isTableGen() && Left->is(TT_TableGenListOpener))
864           CurrentToken->setType(TT_TableGenListCloser);
865         next();
866         return true;
867       }
868       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
869         return false;
870       if (CurrentToken->is(tok::colon)) {
871         if (IsCpp11AttributeSpecifier &&
872             CurrentToken->endsSequence(tok::colon, tok::identifier,
873                                        tok::kw_using)) {
874           // Remember that this is a [[using ns: foo]] C++ attribute, so we
875           // don't add a space before the colon (unlike other colons).
876           CurrentToken->setType(TT_AttributeColon);
877         } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
878                    Left->isOneOf(TT_ArraySubscriptLSquare,
879                                  TT_DesignatedInitializerLSquare)) {
880           Left->setType(TT_ObjCMethodExpr);
881           StartsObjCMethodExpr = true;
882           Contexts.back().ColonIsObjCMethodExpr = true;
883           if (Parent && Parent->is(tok::r_paren)) {
884             // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
885             Parent->setType(TT_CastRParen);
886           }
887         }
888         ColonFound = true;
889       }
890       if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
891           !ColonFound) {
892         Left->setType(TT_ArrayInitializerLSquare);
893       }
894       FormatToken *Tok = CurrentToken;
895       if (Style.isTableGen()) {
896         if (CurrentToken->isOneOf(tok::comma, tok::minus, tok::ellipsis)) {
897           // '-' and '...' appears as a separator in slice.
898           next();
899         } else {
900           // In TableGen there must be a list of Values in square brackets.
901           // It must be ValueList or SliceElements.
902           if (!parseTableGenValue())
903             return false;
904         }
905         updateParameterCount(Left, Tok);
906         continue;
907       }
908       if (!consumeToken())
909         return false;
910       updateParameterCount(Left, Tok);
911     }
912     return false;
913   }
914 
skipToNextNonComment()915   void skipToNextNonComment() {
916     next();
917     while (CurrentToken && CurrentToken->is(tok::comment))
918       next();
919   }
920 
921   // Simplified parser for TableGen Value. Returns true on success.
922   // It consists of SimpleValues, SimpleValues with Suffixes, and Value followed
923   // by '#', paste operator.
924   // There also exists the case the Value is parsed as NameValue.
925   // In this case, the Value ends if '{' is found.
parseTableGenValue(bool ParseNameMode=false)926   bool parseTableGenValue(bool ParseNameMode = false) {
927     if (!CurrentToken)
928       return false;
929     while (CurrentToken->is(tok::comment))
930       next();
931     if (!parseTableGenSimpleValue())
932       return false;
933     if (!CurrentToken)
934       return true;
935     // Value "#" [Value]
936     if (CurrentToken->is(tok::hash)) {
937       if (CurrentToken->Next &&
938           CurrentToken->Next->isOneOf(tok::colon, tok::semi, tok::l_brace)) {
939         // Trailing paste operator.
940         // These are only the allowed cases in TGParser::ParseValue().
941         CurrentToken->setType(TT_TableGenTrailingPasteOperator);
942         next();
943         return true;
944       }
945       FormatToken *HashTok = CurrentToken;
946       skipToNextNonComment();
947       HashTok->setType(TT_Unknown);
948       if (!parseTableGenValue(ParseNameMode))
949         return false;
950     }
951     // In name mode, '{' is regarded as the end of the value.
952     // See TGParser::ParseValue in TGParser.cpp
953     if (ParseNameMode && CurrentToken->is(tok::l_brace))
954       return true;
955     // These tokens indicates this is a value with suffixes.
956     if (CurrentToken->isOneOf(tok::l_brace, tok::l_square, tok::period)) {
957       CurrentToken->setType(TT_TableGenValueSuffix);
958       FormatToken *Suffix = CurrentToken;
959       skipToNextNonComment();
960       if (Suffix->is(tok::l_square))
961         return parseSquare();
962       if (Suffix->is(tok::l_brace)) {
963         Scopes.push_back(getScopeType(*Suffix));
964         return parseBrace();
965       }
966     }
967     return true;
968   }
969 
970   // TokVarName    ::=  "$" ualpha (ualpha |  "0"..."9")*
971   // Appears as a part of DagArg.
972   // This does not change the current token on fail.
tryToParseTableGenTokVar()973   bool tryToParseTableGenTokVar() {
974     if (!CurrentToken)
975       return false;
976     if (CurrentToken->is(tok::identifier) &&
977         CurrentToken->TokenText.front() == '$') {
978       skipToNextNonComment();
979       return true;
980     }
981     return false;
982   }
983 
984   // DagArg       ::=  Value [":" TokVarName] | TokVarName
985   // Appears as a part of SimpleValue6.
parseTableGenDAGArg(bool AlignColon=false)986   bool parseTableGenDAGArg(bool AlignColon = false) {
987     if (tryToParseTableGenTokVar())
988       return true;
989     if (parseTableGenValue()) {
990       if (CurrentToken && CurrentToken->is(tok::colon)) {
991         if (AlignColon)
992           CurrentToken->setType(TT_TableGenDAGArgListColonToAlign);
993         else
994           CurrentToken->setType(TT_TableGenDAGArgListColon);
995         skipToNextNonComment();
996         return tryToParseTableGenTokVar();
997       }
998       return true;
999     }
1000     return false;
1001   }
1002 
1003   // Judge if the token is a operator ID to insert line break in DAGArg.
1004   // That is, TableGenBreakingDAGArgOperators is empty (by the definition of the
1005   // option) or the token is in the list.
isTableGenDAGArgBreakingOperator(const FormatToken & Tok)1006   bool isTableGenDAGArgBreakingOperator(const FormatToken &Tok) {
1007     auto &Opes = Style.TableGenBreakingDAGArgOperators;
1008     // If the list is empty, all operators are breaking operators.
1009     if (Opes.empty())
1010       return true;
1011     // Otherwise, the operator is limited to normal identifiers.
1012     if (Tok.isNot(tok::identifier) ||
1013         Tok.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
1014       return false;
1015     }
1016     // The case next is colon, it is not a operator of identifier.
1017     if (!Tok.Next || Tok.Next->is(tok::colon))
1018       return false;
1019     return std::find(Opes.begin(), Opes.end(), Tok.TokenText.str()) !=
1020            Opes.end();
1021   }
1022 
1023   // SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
1024   // This parses SimpleValue 6's inside part of "(" ")"
parseTableGenDAGArgAndList(FormatToken * Opener)1025   bool parseTableGenDAGArgAndList(FormatToken *Opener) {
1026     FormatToken *FirstTok = CurrentToken;
1027     if (!parseTableGenDAGArg())
1028       return false;
1029     bool BreakInside = false;
1030     if (Style.TableGenBreakInsideDAGArg != FormatStyle::DAS_DontBreak) {
1031       // Specialized detection for DAGArgOperator, that determines the way of
1032       // line break for this DAGArg elements.
1033       if (isTableGenDAGArgBreakingOperator(*FirstTok)) {
1034         // Special case for identifier DAGArg operator.
1035         BreakInside = true;
1036         Opener->setType(TT_TableGenDAGArgOpenerToBreak);
1037         if (FirstTok->isOneOf(TT_TableGenBangOperator,
1038                               TT_TableGenCondOperator)) {
1039           // Special case for bang/cond operators. Set the whole operator as
1040           // the DAGArg operator. Always break after it.
1041           CurrentToken->Previous->setType(TT_TableGenDAGArgOperatorToBreak);
1042         } else if (FirstTok->is(tok::identifier)) {
1043           if (Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll)
1044             FirstTok->setType(TT_TableGenDAGArgOperatorToBreak);
1045           else
1046             FirstTok->setType(TT_TableGenDAGArgOperatorID);
1047         }
1048       }
1049     }
1050     // Parse the [DagArgList] part
1051     bool FirstDAGArgListElm = true;
1052     while (CurrentToken) {
1053       if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
1054         CurrentToken->setType(BreakInside ? TT_TableGenDAGArgListCommaToBreak
1055                                           : TT_TableGenDAGArgListComma);
1056         skipToNextNonComment();
1057       }
1058       if (CurrentToken && CurrentToken->is(tok::r_paren)) {
1059         CurrentToken->setType(TT_TableGenDAGArgCloser);
1060         Opener->MatchingParen = CurrentToken;
1061         CurrentToken->MatchingParen = Opener;
1062         skipToNextNonComment();
1063         return true;
1064       }
1065       if (!parseTableGenDAGArg(
1066               BreakInside &&
1067               Style.AlignConsecutiveTableGenBreakingDAGArgColons.Enabled)) {
1068         return false;
1069       }
1070       FirstDAGArgListElm = false;
1071     }
1072     return false;
1073   }
1074 
parseTableGenSimpleValue()1075   bool parseTableGenSimpleValue() {
1076     assert(Style.isTableGen());
1077     if (!CurrentToken)
1078       return false;
1079     FormatToken *Tok = CurrentToken;
1080     skipToNextNonComment();
1081     // SimpleValue 1, 2, 3: Literals
1082     if (Tok->isOneOf(tok::numeric_constant, tok::string_literal,
1083                      TT_TableGenMultiLineString, tok::kw_true, tok::kw_false,
1084                      tok::question, tok::kw_int)) {
1085       return true;
1086     }
1087     // SimpleValue 4: ValueList, Type
1088     if (Tok->is(tok::l_brace)) {
1089       Scopes.push_back(getScopeType(*Tok));
1090       return parseBrace();
1091     }
1092     // SimpleValue 5: List initializer
1093     if (Tok->is(tok::l_square)) {
1094       Tok->setType(TT_TableGenListOpener);
1095       if (!parseSquare())
1096         return false;
1097       if (Tok->is(tok::less)) {
1098         CurrentToken->setType(TT_TemplateOpener);
1099         return parseAngle();
1100       }
1101       return true;
1102     }
1103     // SimpleValue 6: DAGArg [DAGArgList]
1104     // SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
1105     if (Tok->is(tok::l_paren)) {
1106       Tok->setType(TT_TableGenDAGArgOpener);
1107       return parseTableGenDAGArgAndList(Tok);
1108     }
1109     // SimpleValue 9: Bang operator
1110     if (Tok->is(TT_TableGenBangOperator)) {
1111       if (CurrentToken && CurrentToken->is(tok::less)) {
1112         CurrentToken->setType(TT_TemplateOpener);
1113         skipToNextNonComment();
1114         if (!parseAngle())
1115           return false;
1116       }
1117       if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1118         return false;
1119       skipToNextNonComment();
1120       // FIXME: Hack using inheritance to child context
1121       Contexts.back().IsTableGenBangOpe = true;
1122       bool Result = parseParens();
1123       Contexts.back().IsTableGenBangOpe = false;
1124       return Result;
1125     }
1126     // SimpleValue 9: Cond operator
1127     if (Tok->is(TT_TableGenCondOperator)) {
1128       Tok = CurrentToken;
1129       skipToNextNonComment();
1130       if (!Tok || Tok->isNot(tok::l_paren))
1131         return false;
1132       bool Result = parseParens();
1133       return Result;
1134     }
1135     // We have to check identifier at the last because the kind of bang/cond
1136     // operators are also identifier.
1137     // SimpleValue 7: Identifiers
1138     if (Tok->is(tok::identifier)) {
1139       // SimpleValue 8: Anonymous record
1140       if (CurrentToken && CurrentToken->is(tok::less)) {
1141         CurrentToken->setType(TT_TemplateOpener);
1142         skipToNextNonComment();
1143         return parseAngle();
1144       }
1145       return true;
1146     }
1147 
1148     return false;
1149   }
1150 
couldBeInStructArrayInitializer() const1151   bool couldBeInStructArrayInitializer() const {
1152     if (Contexts.size() < 2)
1153       return false;
1154     // We want to back up no more then 2 context levels i.e.
1155     // . { { <-
1156     const auto End = std::next(Contexts.rbegin(), 2);
1157     auto Last = Contexts.rbegin();
1158     unsigned Depth = 0;
1159     for (; Last != End; ++Last)
1160       if (Last->ContextKind == tok::l_brace)
1161         ++Depth;
1162     return Depth == 2 && Last->ContextKind != tok::l_brace;
1163   }
1164 
parseBrace()1165   bool parseBrace() {
1166     if (!CurrentToken)
1167       return true;
1168 
1169     assert(CurrentToken->Previous);
1170     FormatToken &OpeningBrace = *CurrentToken->Previous;
1171     assert(OpeningBrace.is(tok::l_brace));
1172     OpeningBrace.ParentBracket = Contexts.back().ContextKind;
1173 
1174     if (Contexts.back().CaretFound)
1175       OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
1176     Contexts.back().CaretFound = false;
1177 
1178     ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
1179     Contexts.back().ColonIsDictLiteral = true;
1180     if (OpeningBrace.is(BK_BracedInit))
1181       Contexts.back().IsExpression = true;
1182     if (Style.isJavaScript() && OpeningBrace.Previous &&
1183         OpeningBrace.Previous->is(TT_JsTypeColon)) {
1184       Contexts.back().IsExpression = false;
1185     }
1186     if (Style.isVerilog() &&
1187         (!OpeningBrace.getPreviousNonComment() ||
1188          OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
1189       Contexts.back().VerilogMayBeConcatenation = true;
1190     }
1191     if (Style.isTableGen())
1192       Contexts.back().ColonIsDictLiteral = false;
1193 
1194     unsigned CommaCount = 0;
1195     while (CurrentToken) {
1196       if (CurrentToken->is(tok::r_brace)) {
1197         assert(!Scopes.empty());
1198         assert(Scopes.back() == getScopeType(OpeningBrace));
1199         Scopes.pop_back();
1200         assert(OpeningBrace.Optional == CurrentToken->Optional);
1201         OpeningBrace.MatchingParen = CurrentToken;
1202         CurrentToken->MatchingParen = &OpeningBrace;
1203         if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1204           if (OpeningBrace.ParentBracket == tok::l_brace &&
1205               couldBeInStructArrayInitializer() && CommaCount > 0) {
1206             Contexts.back().ContextType = Context::StructArrayInitializer;
1207           }
1208         }
1209         next();
1210         return true;
1211       }
1212       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
1213         return false;
1214       updateParameterCount(&OpeningBrace, CurrentToken);
1215       if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
1216         FormatToken *Previous = CurrentToken->getPreviousNonComment();
1217         if (Previous->is(TT_JsTypeOptionalQuestion))
1218           Previous = Previous->getPreviousNonComment();
1219         if ((CurrentToken->is(tok::colon) && !Style.isTableGen() &&
1220              (!Contexts.back().ColonIsDictLiteral || !IsCpp)) ||
1221             Style.isProto()) {
1222           OpeningBrace.setType(TT_DictLiteral);
1223           if (Previous->Tok.getIdentifierInfo() ||
1224               Previous->is(tok::string_literal)) {
1225             Previous->setType(TT_SelectorName);
1226           }
1227         }
1228         if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown) &&
1229             !Style.isTableGen()) {
1230           OpeningBrace.setType(TT_DictLiteral);
1231         } else if (Style.isJavaScript()) {
1232           OpeningBrace.overwriteFixedType(TT_DictLiteral);
1233         }
1234       }
1235       if (CurrentToken->is(tok::comma)) {
1236         if (Style.isJavaScript())
1237           OpeningBrace.overwriteFixedType(TT_DictLiteral);
1238         ++CommaCount;
1239       }
1240       if (!consumeToken())
1241         return false;
1242     }
1243     return true;
1244   }
1245 
updateParameterCount(FormatToken * Left,FormatToken * Current)1246   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
1247     // For ObjC methods, the number of parameters is calculated differently as
1248     // method declarations have a different structure (the parameters are not
1249     // inside a bracket scope).
1250     if (Current->is(tok::l_brace) && Current->is(BK_Block))
1251       ++Left->BlockParameterCount;
1252     if (Current->is(tok::comma)) {
1253       ++Left->ParameterCount;
1254       if (!Left->Role)
1255         Left->Role.reset(new CommaSeparatedList(Style));
1256       Left->Role->CommaFound(Current);
1257     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
1258       Left->ParameterCount = 1;
1259     }
1260   }
1261 
parseConditional()1262   bool parseConditional() {
1263     while (CurrentToken) {
1264       if (CurrentToken->is(tok::colon)) {
1265         CurrentToken->setType(TT_ConditionalExpr);
1266         next();
1267         return true;
1268       }
1269       if (!consumeToken())
1270         return false;
1271     }
1272     return false;
1273   }
1274 
parseTemplateDeclaration()1275   bool parseTemplateDeclaration() {
1276     if (!CurrentToken || CurrentToken->isNot(tok::less))
1277       return false;
1278 
1279     CurrentToken->setType(TT_TemplateOpener);
1280     next();
1281 
1282     TemplateDeclarationDepth++;
1283     const bool WellFormed = parseAngle();
1284     TemplateDeclarationDepth--;
1285     if (!WellFormed)
1286       return false;
1287 
1288     if (CurrentToken && TemplateDeclarationDepth == 0)
1289       CurrentToken->Previous->ClosesTemplateDeclaration = true;
1290 
1291     return true;
1292   }
1293 
consumeToken()1294   bool consumeToken() {
1295     if (IsCpp) {
1296       const auto *Prev = CurrentToken->getPreviousNonComment();
1297       if (Prev && Prev->is(tok::r_square) && Prev->is(TT_AttributeSquare) &&
1298           CurrentToken->isOneOf(tok::kw_if, tok::kw_switch, tok::kw_case,
1299                                 tok::kw_default, tok::kw_for, tok::kw_while) &&
1300           mustBreakAfterAttributes(*CurrentToken, Style)) {
1301         CurrentToken->MustBreakBefore = true;
1302       }
1303     }
1304     FormatToken *Tok = CurrentToken;
1305     next();
1306     // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
1307     // operators.
1308     if (Tok->is(TT_VerilogTableItem))
1309       return true;
1310     // Multi-line string itself is a single annotated token.
1311     if (Tok->is(TT_TableGenMultiLineString))
1312       return true;
1313     switch (Tok->Tok.getKind()) {
1314     case tok::plus:
1315     case tok::minus:
1316       if (!Tok->Previous && Line.MustBeDeclaration)
1317         Tok->setType(TT_ObjCMethodSpecifier);
1318       break;
1319     case tok::colon:
1320       if (!Tok->Previous)
1321         return false;
1322       // Goto labels and case labels are already identified in
1323       // UnwrappedLineParser.
1324       if (Tok->isTypeFinalized())
1325         break;
1326       // Colons from ?: are handled in parseConditional().
1327       if (Style.isJavaScript()) {
1328         if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
1329             (Contexts.size() == 1 &&               // switch/case labels
1330              !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
1331             Contexts.back().ContextKind == tok::l_paren ||  // function params
1332             Contexts.back().ContextKind == tok::l_square || // array type
1333             (!Contexts.back().IsExpression &&
1334              Contexts.back().ContextKind == tok::l_brace) || // object type
1335             (Contexts.size() == 1 &&
1336              Line.MustBeDeclaration)) { // method/property declaration
1337           Contexts.back().IsExpression = false;
1338           Tok->setType(TT_JsTypeColon);
1339           break;
1340         }
1341       } else if (Style.isCSharp()) {
1342         if (Contexts.back().InCSharpAttributeSpecifier) {
1343           Tok->setType(TT_AttributeColon);
1344           break;
1345         }
1346         if (Contexts.back().ContextKind == tok::l_paren) {
1347           Tok->setType(TT_CSharpNamedArgumentColon);
1348           break;
1349         }
1350       } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
1351         // The distribution weight operators are labeled
1352         // TT_BinaryOperator by the lexer.
1353         if (Keywords.isVerilogEnd(*Tok->Previous) ||
1354             Keywords.isVerilogBegin(*Tok->Previous)) {
1355           Tok->setType(TT_VerilogBlockLabelColon);
1356         } else if (Contexts.back().ContextKind == tok::l_square) {
1357           Tok->setType(TT_BitFieldColon);
1358         } else if (Contexts.back().ColonIsDictLiteral) {
1359           Tok->setType(TT_DictLiteral);
1360         } else if (Contexts.size() == 1) {
1361           // In Verilog a case label doesn't have the case keyword. We
1362           // assume a colon following an expression is a case label.
1363           // Colons from ?: are annotated in parseConditional().
1364           Tok->setType(TT_CaseLabelColon);
1365           if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
1366             --Line.Level;
1367         }
1368         break;
1369       }
1370       if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
1371           Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
1372           Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
1373         Tok->setType(TT_ModulePartitionColon);
1374       } else if (Line.First->is(tok::kw_asm)) {
1375         Tok->setType(TT_InlineASMColon);
1376       } else if (Contexts.back().ColonIsDictLiteral || Style.isProto()) {
1377         Tok->setType(TT_DictLiteral);
1378         if (Style.Language == FormatStyle::LK_TextProto) {
1379           if (FormatToken *Previous = Tok->getPreviousNonComment())
1380             Previous->setType(TT_SelectorName);
1381         }
1382       } else if (Contexts.back().ColonIsObjCMethodExpr ||
1383                  Line.startsWith(TT_ObjCMethodSpecifier)) {
1384         Tok->setType(TT_ObjCMethodExpr);
1385         const FormatToken *BeforePrevious = Tok->Previous->Previous;
1386         // Ensure we tag all identifiers in method declarations as
1387         // TT_SelectorName.
1388         bool UnknownIdentifierInMethodDeclaration =
1389             Line.startsWith(TT_ObjCMethodSpecifier) &&
1390             Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1391         if (!BeforePrevious ||
1392             // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1393             !(BeforePrevious->is(TT_CastRParen) ||
1394               (BeforePrevious->is(TT_ObjCMethodExpr) &&
1395                BeforePrevious->is(tok::colon))) ||
1396             BeforePrevious->is(tok::r_square) ||
1397             Contexts.back().LongestObjCSelectorName == 0 ||
1398             UnknownIdentifierInMethodDeclaration) {
1399           Tok->Previous->setType(TT_SelectorName);
1400           if (!Contexts.back().FirstObjCSelectorName) {
1401             Contexts.back().FirstObjCSelectorName = Tok->Previous;
1402           } else if (Tok->Previous->ColumnWidth >
1403                      Contexts.back().LongestObjCSelectorName) {
1404             Contexts.back().LongestObjCSelectorName =
1405                 Tok->Previous->ColumnWidth;
1406           }
1407           Tok->Previous->ParameterIndex =
1408               Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1409           ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1410         }
1411       } else if (Contexts.back().ColonIsForRangeExpr) {
1412         Tok->setType(TT_RangeBasedForLoopColon);
1413       } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1414         Tok->setType(TT_GenericSelectionColon);
1415       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1416         Tok->setType(TT_BitFieldColon);
1417       } else if (Contexts.size() == 1 &&
1418                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1419                                       tok::kw_default)) {
1420         FormatToken *Prev = Tok->getPreviousNonComment();
1421         if (!Prev)
1422           break;
1423         if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1424             Prev->ClosesRequiresClause) {
1425           Tok->setType(TT_CtorInitializerColon);
1426         } else if (Prev->is(tok::kw_try)) {
1427           // Member initializer list within function try block.
1428           FormatToken *PrevPrev = Prev->getPreviousNonComment();
1429           if (!PrevPrev)
1430             break;
1431           if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1432             Tok->setType(TT_CtorInitializerColon);
1433         } else {
1434           Tok->setType(TT_InheritanceColon);
1435           if (Prev->isAccessSpecifierKeyword())
1436             Line.Type = LT_AccessModifier;
1437         }
1438       } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1439                  (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1440                   (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1441                    Tok->Next->Next->is(tok::colon)))) {
1442         // This handles a special macro in ObjC code where selectors including
1443         // the colon are passed as macro arguments.
1444         Tok->setType(TT_ObjCMethodExpr);
1445       }
1446       break;
1447     case tok::pipe:
1448     case tok::amp:
1449       // | and & in declarations/type expressions represent union and
1450       // intersection types, respectively.
1451       if (Style.isJavaScript() && !Contexts.back().IsExpression)
1452         Tok->setType(TT_JsTypeOperator);
1453       break;
1454     case tok::kw_if:
1455       if (Style.isTableGen()) {
1456         // In TableGen it has the form 'if' <value> 'then'.
1457         if (!parseTableGenValue())
1458           return false;
1459         if (CurrentToken && CurrentToken->is(Keywords.kw_then))
1460           next(); // skip then
1461         break;
1462       }
1463       if (CurrentToken &&
1464           CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1465         next();
1466       }
1467       [[fallthrough]];
1468     case tok::kw_while:
1469       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1470         next();
1471         if (!parseParens(/*LookForDecls=*/true))
1472           return false;
1473       }
1474       break;
1475     case tok::kw_for:
1476       if (Style.isJavaScript()) {
1477         // x.for and {for: ...}
1478         if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1479             (Tok->Next && Tok->Next->is(tok::colon))) {
1480           break;
1481         }
1482         // JS' for await ( ...
1483         if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1484           next();
1485       }
1486       if (IsCpp && CurrentToken && CurrentToken->is(tok::kw_co_await))
1487         next();
1488       Contexts.back().ColonIsForRangeExpr = true;
1489       if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1490         return false;
1491       next();
1492       if (!parseParens())
1493         return false;
1494       break;
1495     case tok::l_paren:
1496       // When faced with 'operator()()', the kw_operator handler incorrectly
1497       // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1498       // the first two parens OverloadedOperators and the second l_paren an
1499       // OverloadedOperatorLParen.
1500       if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1501           Tok->Previous->MatchingParen &&
1502           Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1503         Tok->Previous->setType(TT_OverloadedOperator);
1504         Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1505         Tok->setType(TT_OverloadedOperatorLParen);
1506       }
1507 
1508       if (Style.isVerilog()) {
1509         // Identify the parameter list and port list in a module instantiation.
1510         // This is still needed when we already have
1511         // UnwrappedLineParser::parseVerilogHierarchyHeader because that
1512         // function is only responsible for the definition, not the
1513         // instantiation.
1514         auto IsInstancePort = [&]() {
1515           const FormatToken *Prev = Tok->getPreviousNonComment();
1516           const FormatToken *PrevPrev;
1517           // In the following example all 4 left parentheses will be treated as
1518           // 'TT_VerilogInstancePortLParen'.
1519           //
1520           //   module_x instance_1(port_1); // Case A.
1521           //   module_x #(parameter_1)      // Case B.
1522           //       instance_2(port_1),      // Case C.
1523           //       instance_3(port_1);      // Case D.
1524           if (!Prev || !(PrevPrev = Prev->getPreviousNonComment()))
1525             return false;
1526           // Case A.
1527           if (Keywords.isVerilogIdentifier(*Prev) &&
1528               Keywords.isVerilogIdentifier(*PrevPrev)) {
1529             return true;
1530           }
1531           // Case B.
1532           if (Prev->is(Keywords.kw_verilogHash) &&
1533               Keywords.isVerilogIdentifier(*PrevPrev)) {
1534             return true;
1535           }
1536           // Case C.
1537           if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::r_paren))
1538             return true;
1539           // Case D.
1540           if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::comma)) {
1541             const FormatToken *PrevParen = PrevPrev->getPreviousNonComment();
1542             if (PrevParen->is(tok::r_paren) && PrevParen->MatchingParen &&
1543                 PrevParen->MatchingParen->is(TT_VerilogInstancePortLParen)) {
1544               return true;
1545             }
1546           }
1547           return false;
1548         };
1549 
1550         if (IsInstancePort())
1551           Tok->setFinalizedType(TT_VerilogInstancePortLParen);
1552       }
1553 
1554       if (!parseParens())
1555         return false;
1556       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1557           !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1558           !Line.startsWith(tok::l_paren) &&
1559           !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
1560         if (const auto *Previous = Tok->Previous;
1561             !Previous ||
1562             (!Previous->isAttribute() &&
1563              !Previous->isOneOf(TT_RequiresClause, TT_LeadingJavaAnnotation))) {
1564           Line.MightBeFunctionDecl = true;
1565           Tok->MightBeFunctionDeclParen = true;
1566         }
1567       }
1568       break;
1569     case tok::l_square:
1570       if (Style.isTableGen())
1571         Tok->setType(TT_TableGenListOpener);
1572       if (!parseSquare())
1573         return false;
1574       break;
1575     case tok::l_brace:
1576       if (Style.Language == FormatStyle::LK_TextProto) {
1577         FormatToken *Previous = Tok->getPreviousNonComment();
1578         if (Previous && Previous->isNot(TT_DictLiteral))
1579           Previous->setType(TT_SelectorName);
1580       }
1581       Scopes.push_back(getScopeType(*Tok));
1582       if (!parseBrace())
1583         return false;
1584       break;
1585     case tok::less:
1586       if (parseAngle()) {
1587         Tok->setType(TT_TemplateOpener);
1588         // In TT_Proto, we must distignuish between:
1589         //   map<key, value>
1590         //   msg < item: data >
1591         //   msg: < item: data >
1592         // In TT_TextProto, map<key, value> does not occur.
1593         if (Style.Language == FormatStyle::LK_TextProto ||
1594             (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1595              Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1596           Tok->setType(TT_DictLiteral);
1597           FormatToken *Previous = Tok->getPreviousNonComment();
1598           if (Previous && Previous->isNot(TT_DictLiteral))
1599             Previous->setType(TT_SelectorName);
1600         }
1601         if (Style.isTableGen())
1602           Tok->setType(TT_TemplateOpener);
1603       } else {
1604         Tok->setType(TT_BinaryOperator);
1605         NonTemplateLess.insert(Tok);
1606         CurrentToken = Tok;
1607         next();
1608       }
1609       break;
1610     case tok::r_paren:
1611     case tok::r_square:
1612       return false;
1613     case tok::r_brace:
1614       // Don't pop scope when encountering unbalanced r_brace.
1615       if (!Scopes.empty())
1616         Scopes.pop_back();
1617       // Lines can start with '}'.
1618       if (Tok->Previous)
1619         return false;
1620       break;
1621     case tok::greater:
1622       if (Style.Language != FormatStyle::LK_TextProto && Tok->is(TT_Unknown))
1623         Tok->setType(TT_BinaryOperator);
1624       if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1625         Tok->SpacesRequiredBefore = 1;
1626       break;
1627     case tok::kw_operator:
1628       if (Style.isProto())
1629         break;
1630       while (CurrentToken &&
1631              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1632         if (CurrentToken->isOneOf(tok::star, tok::amp))
1633           CurrentToken->setType(TT_PointerOrReference);
1634         auto Next = CurrentToken->getNextNonComment();
1635         if (!Next)
1636           break;
1637         if (Next->is(tok::less))
1638           next();
1639         else
1640           consumeToken();
1641         if (!CurrentToken)
1642           break;
1643         auto Previous = CurrentToken->getPreviousNonComment();
1644         assert(Previous);
1645         if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator))
1646           break;
1647         if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1648                               tok::star, tok::arrow, tok::amp, tok::ampamp) ||
1649             // User defined literal.
1650             Previous->TokenText.starts_with("\"\"")) {
1651           Previous->setType(TT_OverloadedOperator);
1652           if (CurrentToken->isOneOf(tok::less, tok::greater))
1653             break;
1654         }
1655       }
1656       if (CurrentToken && CurrentToken->is(tok::l_paren))
1657         CurrentToken->setType(TT_OverloadedOperatorLParen);
1658       if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1659         CurrentToken->Previous->setType(TT_OverloadedOperator);
1660       break;
1661     case tok::question:
1662       if (Style.isJavaScript() && Tok->Next &&
1663           Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1664                              tok::r_brace, tok::r_square)) {
1665         // Question marks before semicolons, colons, etc. indicate optional
1666         // types (fields, parameters), e.g.
1667         //   function(x?: string, y?) {...}
1668         //   class X { y?; }
1669         Tok->setType(TT_JsTypeOptionalQuestion);
1670         break;
1671       }
1672       // Declarations cannot be conditional expressions, this can only be part
1673       // of a type declaration.
1674       if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1675           Style.isJavaScript()) {
1676         break;
1677       }
1678       if (Style.isCSharp()) {
1679         // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1680         // nullable types.
1681 
1682         // `Type?)`, `Type?>`, `Type? name;`
1683         if (Tok->Next &&
1684             (Tok->Next->startsSequence(tok::question, tok::r_paren) ||
1685              Tok->Next->startsSequence(tok::question, tok::greater) ||
1686              Tok->Next->startsSequence(tok::question, tok::identifier,
1687                                        tok::semi))) {
1688           Tok->setType(TT_CSharpNullable);
1689           break;
1690         }
1691 
1692         // `Type? name =`
1693         if (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1694             Tok->Next->Next->is(tok::equal)) {
1695           Tok->setType(TT_CSharpNullable);
1696           break;
1697         }
1698 
1699         // Line.MustBeDeclaration will be true for `Type? name;`.
1700         // But not
1701         // cond ? "A" : "B";
1702         // cond ? id : "B";
1703         // cond ? cond2 ? "A" : "B" : "C";
1704         if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
1705             (!Tok->Next ||
1706              !Tok->Next->isOneOf(tok::identifier, tok::string_literal) ||
1707              !Tok->Next->Next ||
1708              !Tok->Next->Next->isOneOf(tok::colon, tok::question))) {
1709           Tok->setType(TT_CSharpNullable);
1710           break;
1711         }
1712       }
1713       parseConditional();
1714       break;
1715     case tok::kw_template:
1716       parseTemplateDeclaration();
1717       break;
1718     case tok::comma:
1719       switch (Contexts.back().ContextType) {
1720       case Context::CtorInitializer:
1721         Tok->setType(TT_CtorInitializerComma);
1722         break;
1723       case Context::InheritanceList:
1724         Tok->setType(TT_InheritanceComma);
1725         break;
1726       case Context::VerilogInstancePortList:
1727         Tok->setFinalizedType(TT_VerilogInstancePortComma);
1728         break;
1729       default:
1730         if (Style.isVerilog() && Contexts.size() == 1 &&
1731             Line.startsWith(Keywords.kw_assign)) {
1732           Tok->setFinalizedType(TT_VerilogAssignComma);
1733         } else if (Contexts.back().FirstStartOfName &&
1734                    (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1735           Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1736           Line.IsMultiVariableDeclStmt = true;
1737         }
1738         break;
1739       }
1740       if (Contexts.back().ContextType == Context::ForEachMacro)
1741         Contexts.back().IsExpression = true;
1742       break;
1743     case tok::kw_default:
1744       // Unindent case labels.
1745       if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1746           (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1747         --Line.Level;
1748       }
1749       break;
1750     case tok::identifier:
1751       if (Tok->isOneOf(Keywords.kw___has_include,
1752                        Keywords.kw___has_include_next)) {
1753         parseHasInclude();
1754       }
1755       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1756           Tok->Next->isNot(tok::l_paren)) {
1757         Tok->setType(TT_CSharpGenericTypeConstraint);
1758         parseCSharpGenericTypeConstraint();
1759         if (!Tok->getPreviousNonComment())
1760           Line.IsContinuation = true;
1761       }
1762       if (Style.isTableGen()) {
1763         if (Tok->is(Keywords.kw_assert)) {
1764           if (!parseTableGenValue())
1765             return false;
1766         } else if (Tok->isOneOf(Keywords.kw_def, Keywords.kw_defm) &&
1767                    (!Tok->Next ||
1768                     !Tok->Next->isOneOf(tok::colon, tok::l_brace))) {
1769           // The case NameValue appears.
1770           if (!parseTableGenValue(true))
1771             return false;
1772         }
1773       }
1774       break;
1775     case tok::arrow:
1776       if (Tok->isNot(TT_LambdaArrow) && Tok->Previous &&
1777           Tok->Previous->is(tok::kw_noexcept)) {
1778         Tok->setType(TT_TrailingReturnArrow);
1779       }
1780       break;
1781     case tok::equal:
1782       // In TableGen, there must be a value after "=";
1783       if (Style.isTableGen() && !parseTableGenValue())
1784         return false;
1785       break;
1786     default:
1787       break;
1788     }
1789     return true;
1790   }
1791 
parseCSharpGenericTypeConstraint()1792   void parseCSharpGenericTypeConstraint() {
1793     int OpenAngleBracketsCount = 0;
1794     while (CurrentToken) {
1795       if (CurrentToken->is(tok::less)) {
1796         // parseAngle is too greedy and will consume the whole line.
1797         CurrentToken->setType(TT_TemplateOpener);
1798         ++OpenAngleBracketsCount;
1799         next();
1800       } else if (CurrentToken->is(tok::greater)) {
1801         CurrentToken->setType(TT_TemplateCloser);
1802         --OpenAngleBracketsCount;
1803         next();
1804       } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1805         // We allow line breaks after GenericTypeConstraintComma's
1806         // so do not flag commas in Generics as GenericTypeConstraintComma's.
1807         CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1808         next();
1809       } else if (CurrentToken->is(Keywords.kw_where)) {
1810         CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1811         next();
1812       } else if (CurrentToken->is(tok::colon)) {
1813         CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1814         next();
1815       } else {
1816         next();
1817       }
1818     }
1819   }
1820 
parseIncludeDirective()1821   void parseIncludeDirective() {
1822     if (CurrentToken && CurrentToken->is(tok::less)) {
1823       next();
1824       while (CurrentToken) {
1825         // Mark tokens up to the trailing line comments as implicit string
1826         // literals.
1827         if (CurrentToken->isNot(tok::comment) &&
1828             !CurrentToken->TokenText.starts_with("//")) {
1829           CurrentToken->setType(TT_ImplicitStringLiteral);
1830         }
1831         next();
1832       }
1833     }
1834   }
1835 
parseWarningOrError()1836   void parseWarningOrError() {
1837     next();
1838     // We still want to format the whitespace left of the first token of the
1839     // warning or error.
1840     next();
1841     while (CurrentToken) {
1842       CurrentToken->setType(TT_ImplicitStringLiteral);
1843       next();
1844     }
1845   }
1846 
parsePragma()1847   void parsePragma() {
1848     next(); // Consume "pragma".
1849     if (CurrentToken &&
1850         CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1851                               Keywords.kw_region)) {
1852       bool IsMarkOrRegion =
1853           CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1854       next();
1855       next(); // Consume first token (so we fix leading whitespace).
1856       while (CurrentToken) {
1857         if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1858           CurrentToken->setType(TT_ImplicitStringLiteral);
1859         next();
1860       }
1861     }
1862   }
1863 
parseHasInclude()1864   void parseHasInclude() {
1865     if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1866       return;
1867     next(); // '('
1868     parseIncludeDirective();
1869     next(); // ')'
1870   }
1871 
parsePreprocessorDirective()1872   LineType parsePreprocessorDirective() {
1873     bool IsFirstToken = CurrentToken->IsFirst;
1874     LineType Type = LT_PreprocessorDirective;
1875     next();
1876     if (!CurrentToken)
1877       return Type;
1878 
1879     if (Style.isJavaScript() && IsFirstToken) {
1880       // JavaScript files can contain shebang lines of the form:
1881       // #!/usr/bin/env node
1882       // Treat these like C++ #include directives.
1883       while (CurrentToken) {
1884         // Tokens cannot be comments here.
1885         CurrentToken->setType(TT_ImplicitStringLiteral);
1886         next();
1887       }
1888       return LT_ImportStatement;
1889     }
1890 
1891     if (CurrentToken->is(tok::numeric_constant)) {
1892       CurrentToken->SpacesRequiredBefore = 1;
1893       return Type;
1894     }
1895     // Hashes in the middle of a line can lead to any strange token
1896     // sequence.
1897     if (!CurrentToken->Tok.getIdentifierInfo())
1898       return Type;
1899     // In Verilog macro expansions start with a backtick just like preprocessor
1900     // directives. Thus we stop if the word is not a preprocessor directive.
1901     if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1902       return LT_Invalid;
1903     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1904     case tok::pp_include:
1905     case tok::pp_include_next:
1906     case tok::pp_import:
1907       next();
1908       parseIncludeDirective();
1909       Type = LT_ImportStatement;
1910       break;
1911     case tok::pp_error:
1912     case tok::pp_warning:
1913       parseWarningOrError();
1914       break;
1915     case tok::pp_pragma:
1916       parsePragma();
1917       break;
1918     case tok::pp_if:
1919     case tok::pp_elif:
1920       Contexts.back().IsExpression = true;
1921       next();
1922       if (CurrentToken)
1923         CurrentToken->SpacesRequiredBefore = true;
1924       parseLine();
1925       break;
1926     default:
1927       break;
1928     }
1929     while (CurrentToken) {
1930       FormatToken *Tok = CurrentToken;
1931       next();
1932       if (Tok->is(tok::l_paren)) {
1933         parseParens();
1934       } else if (Tok->isOneOf(Keywords.kw___has_include,
1935                               Keywords.kw___has_include_next)) {
1936         parseHasInclude();
1937       }
1938     }
1939     return Type;
1940   }
1941 
1942 public:
parseLine()1943   LineType parseLine() {
1944     if (!CurrentToken)
1945       return LT_Invalid;
1946     NonTemplateLess.clear();
1947     if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1948       // We were not yet allowed to use C++17 optional when this was being
1949       // written. So we used LT_Invalid to mark that the line is not a
1950       // preprocessor directive.
1951       auto Type = parsePreprocessorDirective();
1952       if (Type != LT_Invalid)
1953         return Type;
1954     }
1955 
1956     // Directly allow to 'import <string-literal>' to support protocol buffer
1957     // definitions (github.com/google/protobuf) or missing "#" (either way we
1958     // should not break the line).
1959     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1960     if ((Style.Language == FormatStyle::LK_Java &&
1961          CurrentToken->is(Keywords.kw_package)) ||
1962         (!Style.isVerilog() && Info &&
1963          Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1964          CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1965                                      tok::kw_static))) {
1966       next();
1967       parseIncludeDirective();
1968       return LT_ImportStatement;
1969     }
1970 
1971     // If this line starts and ends in '<' and '>', respectively, it is likely
1972     // part of "#define <a/b.h>".
1973     if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1974       parseIncludeDirective();
1975       return LT_ImportStatement;
1976     }
1977 
1978     // In .proto files, top-level options and package statements are very
1979     // similar to import statements and should not be line-wrapped.
1980     if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1981         CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1982       next();
1983       if (CurrentToken && CurrentToken->is(tok::identifier)) {
1984         while (CurrentToken)
1985           next();
1986         return LT_ImportStatement;
1987       }
1988     }
1989 
1990     bool KeywordVirtualFound = false;
1991     bool ImportStatement = false;
1992 
1993     // import {...} from '...';
1994     if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1995       ImportStatement = true;
1996 
1997     while (CurrentToken) {
1998       if (CurrentToken->is(tok::kw_virtual))
1999         KeywordVirtualFound = true;
2000       if (Style.isJavaScript()) {
2001         // export {...} from '...';
2002         // An export followed by "from 'some string';" is a re-export from
2003         // another module identified by a URI and is treated as a
2004         // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
2005         // Just "export {...};" or "export class ..." should not be treated as
2006         // an import in this sense.
2007         if (Line.First->is(tok::kw_export) &&
2008             CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
2009             CurrentToken->Next->isStringLiteral()) {
2010           ImportStatement = true;
2011         }
2012         if (isClosureImportStatement(*CurrentToken))
2013           ImportStatement = true;
2014       }
2015       if (!consumeToken())
2016         return LT_Invalid;
2017     }
2018     if (Line.Type == LT_AccessModifier)
2019       return LT_AccessModifier;
2020     if (KeywordVirtualFound)
2021       return LT_VirtualFunctionDecl;
2022     if (ImportStatement)
2023       return LT_ImportStatement;
2024 
2025     if (Line.startsWith(TT_ObjCMethodSpecifier)) {
2026       if (Contexts.back().FirstObjCSelectorName) {
2027         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
2028             Contexts.back().LongestObjCSelectorName;
2029       }
2030       return LT_ObjCMethodDecl;
2031     }
2032 
2033     for (const auto &ctx : Contexts)
2034       if (ctx.ContextType == Context::StructArrayInitializer)
2035         return LT_ArrayOfStructInitializer;
2036 
2037     return LT_Other;
2038   }
2039 
2040 private:
isClosureImportStatement(const FormatToken & Tok)2041   bool isClosureImportStatement(const FormatToken &Tok) {
2042     // FIXME: Closure-library specific stuff should not be hard-coded but be
2043     // configurable.
2044     return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
2045            Tok.Next->Next &&
2046            (Tok.Next->Next->TokenText == "module" ||
2047             Tok.Next->Next->TokenText == "provide" ||
2048             Tok.Next->Next->TokenText == "require" ||
2049             Tok.Next->Next->TokenText == "requireType" ||
2050             Tok.Next->Next->TokenText == "forwardDeclare") &&
2051            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
2052   }
2053 
resetTokenMetadata()2054   void resetTokenMetadata() {
2055     if (!CurrentToken)
2056       return;
2057 
2058     // Reset token type in case we have already looked at it and then
2059     // recovered from an error (e.g. failure to find the matching >).
2060     if (!CurrentToken->isTypeFinalized() &&
2061         !CurrentToken->isOneOf(
2062             TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
2063             TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
2064             TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
2065             TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
2066             TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
2067             TT_UntouchableMacroFunc, TT_StatementAttributeLikeMacro,
2068             TT_FunctionLikeOrFreestandingMacro, TT_ClassLBrace, TT_EnumLBrace,
2069             TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause,
2070             TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
2071             TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
2072             TT_BracedListLBrace)) {
2073       CurrentToken->setType(TT_Unknown);
2074     }
2075     CurrentToken->Role.reset();
2076     CurrentToken->MatchingParen = nullptr;
2077     CurrentToken->FakeLParens.clear();
2078     CurrentToken->FakeRParens = 0;
2079   }
2080 
next()2081   void next() {
2082     if (!CurrentToken)
2083       return;
2084 
2085     CurrentToken->NestingLevel = Contexts.size() - 1;
2086     CurrentToken->BindingStrength = Contexts.back().BindingStrength;
2087     modifyContext(*CurrentToken);
2088     determineTokenType(*CurrentToken);
2089     CurrentToken = CurrentToken->Next;
2090 
2091     resetTokenMetadata();
2092   }
2093 
2094   /// A struct to hold information valid in a specific context, e.g.
2095   /// a pair of parenthesis.
2096   struct Context {
Contextclang::format::__anonadd98d890111::AnnotatingParser::Context2097     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
2098             bool IsExpression)
2099         : ContextKind(ContextKind), BindingStrength(BindingStrength),
2100           IsExpression(IsExpression) {}
2101 
2102     tok::TokenKind ContextKind;
2103     unsigned BindingStrength;
2104     bool IsExpression;
2105     unsigned LongestObjCSelectorName = 0;
2106     bool ColonIsForRangeExpr = false;
2107     bool ColonIsDictLiteral = false;
2108     bool ColonIsObjCMethodExpr = false;
2109     FormatToken *FirstObjCSelectorName = nullptr;
2110     FormatToken *FirstStartOfName = nullptr;
2111     bool CanBeExpression = true;
2112     bool CaretFound = false;
2113     bool InCpp11AttributeSpecifier = false;
2114     bool InCSharpAttributeSpecifier = false;
2115     bool VerilogAssignmentFound = false;
2116     // Whether the braces may mean concatenation instead of structure or array
2117     // literal.
2118     bool VerilogMayBeConcatenation = false;
2119     bool IsTableGenDAGArg = false;
2120     bool IsTableGenBangOpe = false;
2121     bool IsTableGenCondOpe = false;
2122     enum {
2123       Unknown,
2124       // Like the part after `:` in a constructor.
2125       //   Context(...) : IsExpression(IsExpression)
2126       CtorInitializer,
2127       // Like in the parentheses in a foreach.
2128       ForEachMacro,
2129       // Like the inheritance list in a class declaration.
2130       //   class Input : public IO
2131       InheritanceList,
2132       // Like in the braced list.
2133       //   int x[] = {};
2134       StructArrayInitializer,
2135       // Like in `static_cast<int>`.
2136       TemplateArgument,
2137       // C11 _Generic selection.
2138       C11GenericSelection,
2139       // Like in the outer parentheses in `ffnand ff1(.q());`.
2140       VerilogInstancePortList,
2141     } ContextType = Unknown;
2142   };
2143 
2144   /// Puts a new \c Context onto the stack \c Contexts for the lifetime
2145   /// of each instance.
2146   struct ScopedContextCreator {
2147     AnnotatingParser &P;
2148 
ScopedContextCreatorclang::format::__anonadd98d890111::AnnotatingParser::ScopedContextCreator2149     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
2150                          unsigned Increase)
2151         : P(P) {
2152       P.Contexts.push_back(Context(ContextKind,
2153                                    P.Contexts.back().BindingStrength + Increase,
2154                                    P.Contexts.back().IsExpression));
2155     }
2156 
~ScopedContextCreatorclang::format::__anonadd98d890111::AnnotatingParser::ScopedContextCreator2157     ~ScopedContextCreator() {
2158       if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
2159         if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
2160           P.Contexts.pop_back();
2161           P.Contexts.back().ContextType = Context::StructArrayInitializer;
2162           return;
2163         }
2164       }
2165       P.Contexts.pop_back();
2166     }
2167   };
2168 
modifyContext(const FormatToken & Current)2169   void modifyContext(const FormatToken &Current) {
2170     auto AssignmentStartsExpression = [&]() {
2171       if (Current.getPrecedence() != prec::Assignment)
2172         return false;
2173 
2174       if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
2175         return false;
2176       if (Line.First->is(tok::kw_template)) {
2177         assert(Current.Previous);
2178         if (Current.Previous->is(tok::kw_operator)) {
2179           // `template ... operator=` cannot be an expression.
2180           return false;
2181         }
2182 
2183         // `template` keyword can start a variable template.
2184         const FormatToken *Tok = Line.First->getNextNonComment();
2185         assert(Tok); // Current token is on the same line.
2186         if (Tok->isNot(TT_TemplateOpener)) {
2187           // Explicit template instantiations do not have `<>`.
2188           return false;
2189         }
2190 
2191         // This is the default value of a template parameter, determine if it's
2192         // type or non-type.
2193         if (Contexts.back().ContextKind == tok::less) {
2194           assert(Current.Previous->Previous);
2195           return !Current.Previous->Previous->isOneOf(tok::kw_typename,
2196                                                       tok::kw_class);
2197         }
2198 
2199         Tok = Tok->MatchingParen;
2200         if (!Tok)
2201           return false;
2202         Tok = Tok->getNextNonComment();
2203         if (!Tok)
2204           return false;
2205 
2206         if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
2207                          tok::kw_using)) {
2208           return false;
2209         }
2210 
2211         return true;
2212       }
2213 
2214       // Type aliases use `type X = ...;` in TypeScript and can be exported
2215       // using `export type ...`.
2216       if (Style.isJavaScript() &&
2217           (Line.startsWith(Keywords.kw_type, tok::identifier) ||
2218            Line.startsWith(tok::kw_export, Keywords.kw_type,
2219                            tok::identifier))) {
2220         return false;
2221       }
2222 
2223       return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
2224     };
2225 
2226     if (AssignmentStartsExpression()) {
2227       Contexts.back().IsExpression = true;
2228       if (!Line.startsWith(TT_UnaryOperator)) {
2229         for (FormatToken *Previous = Current.Previous;
2230              Previous && Previous->Previous &&
2231              !Previous->Previous->isOneOf(tok::comma, tok::semi);
2232              Previous = Previous->Previous) {
2233           if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
2234             Previous = Previous->MatchingParen;
2235             if (!Previous)
2236               break;
2237           }
2238           if (Previous->opensScope())
2239             break;
2240           if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
2241               Previous->isPointerOrReference() && Previous->Previous &&
2242               Previous->Previous->isNot(tok::equal)) {
2243             Previous->setType(TT_PointerOrReference);
2244           }
2245         }
2246       }
2247     } else if (Current.is(tok::lessless) &&
2248                (!Current.Previous ||
2249                 Current.Previous->isNot(tok::kw_operator))) {
2250       Contexts.back().IsExpression = true;
2251     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
2252       Contexts.back().IsExpression = true;
2253     } else if (Current.is(TT_TrailingReturnArrow)) {
2254       Contexts.back().IsExpression = false;
2255     } else if (Current.isOneOf(TT_LambdaArrow, Keywords.kw_assert)) {
2256       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
2257     } else if (Current.Previous &&
2258                Current.Previous->is(TT_CtorInitializerColon)) {
2259       Contexts.back().IsExpression = true;
2260       Contexts.back().ContextType = Context::CtorInitializer;
2261     } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
2262       Contexts.back().ContextType = Context::InheritanceList;
2263     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
2264       for (FormatToken *Previous = Current.Previous;
2265            Previous && Previous->isOneOf(tok::star, tok::amp);
2266            Previous = Previous->Previous) {
2267         Previous->setType(TT_PointerOrReference);
2268       }
2269       if (Line.MustBeDeclaration &&
2270           Contexts.front().ContextType != Context::CtorInitializer) {
2271         Contexts.back().IsExpression = false;
2272       }
2273     } else if (Current.is(tok::kw_new)) {
2274       Contexts.back().CanBeExpression = false;
2275     } else if (Current.is(tok::semi) ||
2276                (Current.is(tok::exclaim) && Current.Previous &&
2277                 Current.Previous->isNot(tok::kw_operator))) {
2278       // This should be the condition or increment in a for-loop.
2279       // But not operator !() (can't use TT_OverloadedOperator here as its not
2280       // been annotated yet).
2281       Contexts.back().IsExpression = true;
2282     }
2283   }
2284 
untilMatchingParen(FormatToken * Current)2285   static FormatToken *untilMatchingParen(FormatToken *Current) {
2286     // Used when `MatchingParen` is not yet established.
2287     int ParenLevel = 0;
2288     while (Current) {
2289       if (Current->is(tok::l_paren))
2290         ++ParenLevel;
2291       if (Current->is(tok::r_paren))
2292         --ParenLevel;
2293       if (ParenLevel < 1)
2294         break;
2295       Current = Current->Next;
2296     }
2297     return Current;
2298   }
2299 
isDeductionGuide(FormatToken & Current)2300   static bool isDeductionGuide(FormatToken &Current) {
2301     // Look for a deduction guide template<T> A(...) -> A<...>;
2302     if (Current.Previous && Current.Previous->is(tok::r_paren) &&
2303         Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
2304       // Find the TemplateCloser.
2305       FormatToken *TemplateCloser = Current.Next->Next;
2306       int NestingLevel = 0;
2307       while (TemplateCloser) {
2308         // Skip over an expressions in parens  A<(3 < 2)>;
2309         if (TemplateCloser->is(tok::l_paren)) {
2310           // No Matching Paren yet so skip to matching paren
2311           TemplateCloser = untilMatchingParen(TemplateCloser);
2312           if (!TemplateCloser)
2313             break;
2314         }
2315         if (TemplateCloser->is(tok::less))
2316           ++NestingLevel;
2317         if (TemplateCloser->is(tok::greater))
2318           --NestingLevel;
2319         if (NestingLevel < 1)
2320           break;
2321         TemplateCloser = TemplateCloser->Next;
2322       }
2323       // Assuming we have found the end of the template ensure its followed
2324       // with a semi-colon.
2325       if (TemplateCloser && TemplateCloser->Next &&
2326           TemplateCloser->Next->is(tok::semi) &&
2327           Current.Previous->MatchingParen) {
2328         // Determine if the identifier `A` prior to the A<..>; is the same as
2329         // prior to the A(..)
2330         FormatToken *LeadingIdentifier =
2331             Current.Previous->MatchingParen->Previous;
2332 
2333         return LeadingIdentifier &&
2334                LeadingIdentifier->TokenText == Current.Next->TokenText;
2335       }
2336     }
2337     return false;
2338   }
2339 
determineTokenType(FormatToken & Current)2340   void determineTokenType(FormatToken &Current) {
2341     if (Current.isNot(TT_Unknown)) {
2342       // The token type is already known.
2343       return;
2344     }
2345 
2346     if ((Style.isJavaScript() || Style.isCSharp()) &&
2347         Current.is(tok::exclaim)) {
2348       if (Current.Previous) {
2349         bool IsIdentifier =
2350             Style.isJavaScript()
2351                 ? Keywords.isJavaScriptIdentifier(
2352                       *Current.Previous, /* AcceptIdentifierName= */ true)
2353                 : Current.Previous->is(tok::identifier);
2354         if (IsIdentifier ||
2355             Current.Previous->isOneOf(
2356                 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
2357                 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
2358                 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
2359             Current.Previous->Tok.isLiteral()) {
2360           Current.setType(TT_NonNullAssertion);
2361           return;
2362         }
2363       }
2364       if (Current.Next &&
2365           Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
2366         Current.setType(TT_NonNullAssertion);
2367         return;
2368       }
2369     }
2370 
2371     // Line.MightBeFunctionDecl can only be true after the parentheses of a
2372     // function declaration have been found. In this case, 'Current' is a
2373     // trailing token of this declaration and thus cannot be a name.
2374     if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
2375         Current.is(Keywords.kw_instanceof)) {
2376       Current.setType(TT_BinaryOperator);
2377     } else if (isStartOfName(Current) &&
2378                (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
2379       Contexts.back().FirstStartOfName = &Current;
2380       Current.setType(TT_StartOfName);
2381     } else if (Current.is(tok::semi)) {
2382       // Reset FirstStartOfName after finding a semicolon so that a for loop
2383       // with multiple increment statements is not confused with a for loop
2384       // having multiple variable declarations.
2385       Contexts.back().FirstStartOfName = nullptr;
2386     } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
2387       AutoFound = true;
2388     } else if (Current.is(tok::arrow) &&
2389                Style.Language == FormatStyle::LK_Java) {
2390       Current.setType(TT_LambdaArrow);
2391     } else if (Current.is(tok::arrow) && Style.isVerilog()) {
2392       // The implication operator.
2393       Current.setType(TT_BinaryOperator);
2394     } else if (Current.is(tok::arrow) && AutoFound &&
2395                Line.MightBeFunctionDecl && Current.NestingLevel == 0 &&
2396                !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
2397       // not auto operator->() -> xxx;
2398       Current.setType(TT_TrailingReturnArrow);
2399     } else if (Current.is(tok::arrow) && Current.Previous &&
2400                Current.Previous->is(tok::r_brace)) {
2401       // Concept implicit conversion constraint needs to be treated like
2402       // a trailing return type  ... } -> <type>.
2403       Current.setType(TT_TrailingReturnArrow);
2404     } else if (isDeductionGuide(Current)) {
2405       // Deduction guides trailing arrow " A(...) -> A<T>;".
2406       Current.setType(TT_TrailingReturnArrow);
2407     } else if (Current.isPointerOrReference()) {
2408       Current.setType(determineStarAmpUsage(
2409           Current,
2410           Contexts.back().CanBeExpression && Contexts.back().IsExpression,
2411           Contexts.back().ContextType == Context::TemplateArgument));
2412     } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
2413                (Style.isVerilog() && Current.is(tok::pipe))) {
2414       Current.setType(determinePlusMinusCaretUsage(Current));
2415       if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
2416         Contexts.back().CaretFound = true;
2417     } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
2418       Current.setType(determineIncrementUsage(Current));
2419     } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
2420       Current.setType(TT_UnaryOperator);
2421     } else if (Current.is(tok::question)) {
2422       if (Style.isJavaScript() && Line.MustBeDeclaration &&
2423           !Contexts.back().IsExpression) {
2424         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
2425         // on the interface, not a ternary expression.
2426         Current.setType(TT_JsTypeOptionalQuestion);
2427       } else if (Style.isTableGen()) {
2428         // In TableGen, '?' is just an identifier like token.
2429         Current.setType(TT_Unknown);
2430       } else {
2431         Current.setType(TT_ConditionalExpr);
2432       }
2433     } else if (Current.isBinaryOperator() &&
2434                (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
2435                (Current.isNot(tok::greater) &&
2436                 Style.Language != FormatStyle::LK_TextProto)) {
2437       if (Style.isVerilog()) {
2438         if (Current.is(tok::lessequal) && Contexts.size() == 1 &&
2439             !Contexts.back().VerilogAssignmentFound) {
2440           // In Verilog `<=` is assignment if in its own statement. It is a
2441           // statement instead of an expression, that is it can not be chained.
2442           Current.ForcedPrecedence = prec::Assignment;
2443           Current.setFinalizedType(TT_BinaryOperator);
2444         }
2445         if (Current.getPrecedence() == prec::Assignment)
2446           Contexts.back().VerilogAssignmentFound = true;
2447       }
2448       Current.setType(TT_BinaryOperator);
2449     } else if (Current.is(tok::comment)) {
2450       if (Current.TokenText.starts_with("/*")) {
2451         if (Current.TokenText.ends_with("*/")) {
2452           Current.setType(TT_BlockComment);
2453         } else {
2454           // The lexer has for some reason determined a comment here. But we
2455           // cannot really handle it, if it isn't properly terminated.
2456           Current.Tok.setKind(tok::unknown);
2457         }
2458       } else {
2459         Current.setType(TT_LineComment);
2460       }
2461     } else if (Current.is(tok::string_literal)) {
2462       if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
2463           Current.getPreviousNonComment() &&
2464           Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
2465           Current.getNextNonComment() &&
2466           Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
2467         Current.setType(TT_StringInConcatenation);
2468       }
2469     } else if (Current.is(tok::l_paren)) {
2470       if (lParenStartsCppCast(Current))
2471         Current.setType(TT_CppCastLParen);
2472     } else if (Current.is(tok::r_paren)) {
2473       if (rParenEndsCast(Current))
2474         Current.setType(TT_CastRParen);
2475       if (Current.MatchingParen && Current.Next &&
2476           !Current.Next->isBinaryOperator() &&
2477           !Current.Next->isOneOf(
2478               tok::semi, tok::colon, tok::l_brace, tok::l_paren, tok::comma,
2479               tok::period, tok::arrow, tok::coloncolon, tok::kw_noexcept)) {
2480         if (FormatToken *AfterParen = Current.MatchingParen->Next;
2481             AfterParen && AfterParen->isNot(tok::caret)) {
2482           // Make sure this isn't the return type of an Obj-C block declaration.
2483           if (FormatToken *BeforeParen = Current.MatchingParen->Previous;
2484               BeforeParen && BeforeParen->is(tok::identifier) &&
2485               BeforeParen->isNot(TT_TypenameMacro) &&
2486               BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
2487               (!BeforeParen->Previous ||
2488                BeforeParen->Previous->ClosesTemplateDeclaration ||
2489                BeforeParen->Previous->ClosesRequiresClause)) {
2490             Current.setType(TT_FunctionAnnotationRParen);
2491           }
2492         }
2493       }
2494     } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
2495                Style.Language != FormatStyle::LK_Java) {
2496       // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
2497       // marks declarations and properties that need special formatting.
2498       switch (Current.Next->Tok.getObjCKeywordID()) {
2499       case tok::objc_interface:
2500       case tok::objc_implementation:
2501       case tok::objc_protocol:
2502         Current.setType(TT_ObjCDecl);
2503         break;
2504       case tok::objc_property:
2505         Current.setType(TT_ObjCProperty);
2506         break;
2507       default:
2508         break;
2509       }
2510     } else if (Current.is(tok::period)) {
2511       FormatToken *PreviousNoComment = Current.getPreviousNonComment();
2512       if (PreviousNoComment &&
2513           PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
2514         Current.setType(TT_DesignatedInitializerPeriod);
2515       } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
2516                  Current.Previous->isOneOf(TT_JavaAnnotation,
2517                                            TT_LeadingJavaAnnotation)) {
2518         Current.setType(Current.Previous->getType());
2519       }
2520     } else if (canBeObjCSelectorComponent(Current) &&
2521                // FIXME(bug 36976): ObjC return types shouldn't use
2522                // TT_CastRParen.
2523                Current.Previous && Current.Previous->is(TT_CastRParen) &&
2524                Current.Previous->MatchingParen &&
2525                Current.Previous->MatchingParen->Previous &&
2526                Current.Previous->MatchingParen->Previous->is(
2527                    TT_ObjCMethodSpecifier)) {
2528       // This is the first part of an Objective-C selector name. (If there's no
2529       // colon after this, this is the only place which annotates the identifier
2530       // as a selector.)
2531       Current.setType(TT_SelectorName);
2532     } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2533                                tok::kw_requires) &&
2534                Current.Previous &&
2535                !Current.Previous->isOneOf(tok::equal, tok::at,
2536                                           TT_CtorInitializerComma,
2537                                           TT_CtorInitializerColon) &&
2538                Line.MightBeFunctionDecl && Contexts.size() == 1) {
2539       // Line.MightBeFunctionDecl can only be true after the parentheses of a
2540       // function declaration have been found.
2541       Current.setType(TT_TrailingAnnotation);
2542     } else if ((Style.Language == FormatStyle::LK_Java ||
2543                 Style.isJavaScript()) &&
2544                Current.Previous) {
2545       if (Current.Previous->is(tok::at) &&
2546           Current.isNot(Keywords.kw_interface)) {
2547         const FormatToken &AtToken = *Current.Previous;
2548         const FormatToken *Previous = AtToken.getPreviousNonComment();
2549         if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2550           Current.setType(TT_LeadingJavaAnnotation);
2551         else
2552           Current.setType(TT_JavaAnnotation);
2553       } else if (Current.Previous->is(tok::period) &&
2554                  Current.Previous->isOneOf(TT_JavaAnnotation,
2555                                            TT_LeadingJavaAnnotation)) {
2556         Current.setType(Current.Previous->getType());
2557       }
2558     }
2559   }
2560 
2561   /// Take a guess at whether \p Tok starts a name of a function or
2562   /// variable declaration.
2563   ///
2564   /// This is a heuristic based on whether \p Tok is an identifier following
2565   /// something that is likely a type.
isStartOfName(const FormatToken & Tok)2566   bool isStartOfName(const FormatToken &Tok) {
2567     // Handled in ExpressionParser for Verilog.
2568     if (Style.isVerilog())
2569       return false;
2570 
2571     if (Tok.isNot(tok::identifier) || !Tok.Previous)
2572       return false;
2573 
2574     if (const auto *NextNonComment = Tok.getNextNonComment();
2575         (!NextNonComment && !Line.InMacroBody) ||
2576         (NextNonComment &&
2577          (NextNonComment->isPointerOrReference() ||
2578           NextNonComment->is(tok::string_literal) ||
2579           (Line.InPragmaDirective && NextNonComment->is(tok::identifier))))) {
2580       return false;
2581     }
2582 
2583     if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2584                               Keywords.kw_as)) {
2585       return false;
2586     }
2587     if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2588       return false;
2589 
2590     // Skip "const" as it does not have an influence on whether this is a name.
2591     FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2592 
2593     // For javascript const can be like "let" or "var"
2594     if (!Style.isJavaScript())
2595       while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2596         PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2597 
2598     if (!PreviousNotConst)
2599       return false;
2600 
2601     if (PreviousNotConst->ClosesRequiresClause)
2602       return false;
2603 
2604     if (Style.isTableGen()) {
2605       // keywords such as let and def* defines names.
2606       if (Keywords.isTableGenDefinition(*PreviousNotConst))
2607         return true;
2608       // Otherwise C++ style declarations is available only inside the brace.
2609       if (Contexts.back().ContextKind != tok::l_brace)
2610         return false;
2611     }
2612 
2613     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2614                        PreviousNotConst->Previous &&
2615                        PreviousNotConst->Previous->is(tok::hash);
2616 
2617     if (PreviousNotConst->is(TT_TemplateCloser)) {
2618       return PreviousNotConst && PreviousNotConst->MatchingParen &&
2619              PreviousNotConst->MatchingParen->Previous &&
2620              PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2621              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2622     }
2623 
2624     if ((PreviousNotConst->is(tok::r_paren) &&
2625          PreviousNotConst->is(TT_TypeDeclarationParen)) ||
2626         PreviousNotConst->is(TT_AttributeRParen)) {
2627       return true;
2628     }
2629 
2630     // If is a preprocess keyword like #define.
2631     if (IsPPKeyword)
2632       return false;
2633 
2634     // int a or auto a.
2635     if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto) &&
2636         PreviousNotConst->isNot(TT_StatementAttributeLikeMacro)) {
2637       return true;
2638     }
2639 
2640     // *a or &a or &&a.
2641     if (PreviousNotConst->is(TT_PointerOrReference))
2642       return true;
2643 
2644     // MyClass a;
2645     if (PreviousNotConst->isTypeName(LangOpts))
2646       return true;
2647 
2648     // type[] a in Java
2649     if (Style.Language == FormatStyle::LK_Java &&
2650         PreviousNotConst->is(tok::r_square)) {
2651       return true;
2652     }
2653 
2654     // const a = in JavaScript.
2655     return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2656   }
2657 
2658   /// Determine whether '(' is starting a C++ cast.
lParenStartsCppCast(const FormatToken & Tok)2659   bool lParenStartsCppCast(const FormatToken &Tok) {
2660     // C-style casts are only used in C++.
2661     if (!IsCpp)
2662       return false;
2663 
2664     FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2665     if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2666         LeftOfParens->MatchingParen) {
2667       auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2668       if (Prev &&
2669           Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2670                         tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2671         // FIXME: Maybe we should handle identifiers ending with "_cast",
2672         // e.g. any_cast?
2673         return true;
2674       }
2675     }
2676     return false;
2677   }
2678 
2679   /// Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)2680   bool rParenEndsCast(const FormatToken &Tok) {
2681     assert(Tok.is(tok::r_paren));
2682 
2683     if (!Tok.MatchingParen || !Tok.Previous)
2684       return false;
2685 
2686     // C-style casts are only used in C++, C# and Java.
2687     if (!IsCpp && !Style.isCSharp() && Style.Language != FormatStyle::LK_Java)
2688       return false;
2689 
2690     const auto *LParen = Tok.MatchingParen;
2691     const auto *BeforeRParen = Tok.Previous;
2692     const auto *AfterRParen = Tok.Next;
2693 
2694     // Empty parens aren't casts and there are no casts at the end of the line.
2695     if (BeforeRParen == LParen || !AfterRParen)
2696       return false;
2697 
2698     if (LParen->is(TT_OverloadedOperatorLParen))
2699       return false;
2700 
2701     auto *LeftOfParens = LParen->getPreviousNonComment();
2702     if (LeftOfParens) {
2703       // If there is a closing parenthesis left of the current
2704       // parentheses, look past it as these might be chained casts.
2705       if (LeftOfParens->is(tok::r_paren) &&
2706           LeftOfParens->isNot(TT_CastRParen)) {
2707         if (!LeftOfParens->MatchingParen ||
2708             !LeftOfParens->MatchingParen->Previous) {
2709           return false;
2710         }
2711         LeftOfParens = LeftOfParens->MatchingParen->Previous;
2712       }
2713 
2714       if (LeftOfParens->is(tok::r_square)) {
2715         //   delete[] (void *)ptr;
2716         auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2717           if (Tok->isNot(tok::r_square))
2718             return nullptr;
2719 
2720           Tok = Tok->getPreviousNonComment();
2721           if (!Tok || Tok->isNot(tok::l_square))
2722             return nullptr;
2723 
2724           Tok = Tok->getPreviousNonComment();
2725           if (!Tok || Tok->isNot(tok::kw_delete))
2726             return nullptr;
2727           return Tok;
2728         };
2729         if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2730           LeftOfParens = MaybeDelete;
2731       }
2732 
2733       // The Condition directly below this one will see the operator arguments
2734       // as a (void *foo) cast.
2735       //   void operator delete(void *foo) ATTRIB;
2736       if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2737           LeftOfParens->Previous->is(tok::kw_operator)) {
2738         return false;
2739       }
2740 
2741       // If there is an identifier (or with a few exceptions a keyword) right
2742       // before the parentheses, this is unlikely to be a cast.
2743       if (LeftOfParens->Tok.getIdentifierInfo() &&
2744           !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2745                                  tok::kw_delete, tok::kw_throw)) {
2746         return false;
2747       }
2748 
2749       // Certain other tokens right before the parentheses are also signals that
2750       // this cannot be a cast.
2751       if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2752                                 TT_TemplateCloser, tok::ellipsis)) {
2753         return false;
2754       }
2755     }
2756 
2757     if (AfterRParen->is(tok::question) ||
2758         (AfterRParen->is(tok::ampamp) && !BeforeRParen->isTypeName(LangOpts))) {
2759       return false;
2760     }
2761 
2762     // `foreach((A a, B b) in someList)` should not be seen as a cast.
2763     if (AfterRParen->is(Keywords.kw_in) && Style.isCSharp())
2764       return false;
2765 
2766     // Functions which end with decorations like volatile, noexcept are unlikely
2767     // to be casts.
2768     if (AfterRParen->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2769                              tok::kw_requires, tok::kw_throw, tok::arrow,
2770                              Keywords.kw_override, Keywords.kw_final) ||
2771         isCppAttribute(IsCpp, *AfterRParen)) {
2772       return false;
2773     }
2774 
2775     // As Java has no function types, a "(" after the ")" likely means that this
2776     // is a cast.
2777     if (Style.Language == FormatStyle::LK_Java && AfterRParen->is(tok::l_paren))
2778       return true;
2779 
2780     // If a (non-string) literal follows, this is likely a cast.
2781     if (AfterRParen->isOneOf(tok::kw_sizeof, tok::kw_alignof) ||
2782         (AfterRParen->Tok.isLiteral() &&
2783          AfterRParen->isNot(tok::string_literal))) {
2784       return true;
2785     }
2786 
2787     // Heuristically try to determine whether the parentheses contain a type.
2788     auto IsQualifiedPointerOrReference = [](const FormatToken *T,
2789                                             const LangOptions &LangOpts) {
2790       // This is used to handle cases such as x = (foo *const)&y;
2791       assert(!T->isTypeName(LangOpts) && "Should have already been checked");
2792       // Strip trailing qualifiers such as const or volatile when checking
2793       // whether the parens could be a cast to a pointer/reference type.
2794       while (T) {
2795         if (T->is(TT_AttributeRParen)) {
2796           // Handle `x = (foo *__attribute__((foo)))&v;`:
2797           assert(T->is(tok::r_paren));
2798           assert(T->MatchingParen);
2799           assert(T->MatchingParen->is(tok::l_paren));
2800           assert(T->MatchingParen->is(TT_AttributeLParen));
2801           if (const auto *Tok = T->MatchingParen->Previous;
2802               Tok && Tok->isAttribute()) {
2803             T = Tok->Previous;
2804             continue;
2805           }
2806         } else if (T->is(TT_AttributeSquare)) {
2807           // Handle `x = (foo *[[clang::foo]])&v;`:
2808           if (T->MatchingParen && T->MatchingParen->Previous) {
2809             T = T->MatchingParen->Previous;
2810             continue;
2811           }
2812         } else if (T->canBePointerOrReferenceQualifier()) {
2813           T = T->Previous;
2814           continue;
2815         }
2816         break;
2817       }
2818       return T && T->is(TT_PointerOrReference);
2819     };
2820     bool ParensAreType =
2821         BeforeRParen->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2822         BeforeRParen->isTypeName(LangOpts) ||
2823         IsQualifiedPointerOrReference(BeforeRParen, LangOpts);
2824     bool ParensCouldEndDecl =
2825         AfterRParen->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2826     if (ParensAreType && !ParensCouldEndDecl)
2827       return true;
2828 
2829     // At this point, we heuristically assume that there are no casts at the
2830     // start of the line. We assume that we have found most cases where there
2831     // are by the logic above, e.g. "(void)x;".
2832     if (!LeftOfParens)
2833       return false;
2834 
2835     // Certain token types inside the parentheses mean that this can't be a
2836     // cast.
2837     for (const auto *Token = LParen->Next; Token != &Tok; Token = Token->Next)
2838       if (Token->is(TT_BinaryOperator))
2839         return false;
2840 
2841     // If the following token is an identifier or 'this', this is a cast. All
2842     // cases where this can be something else are handled above.
2843     if (AfterRParen->isOneOf(tok::identifier, tok::kw_this))
2844       return true;
2845 
2846     // Look for a cast `( x ) (`.
2847     if (AfterRParen->is(tok::l_paren) && BeforeRParen->Previous) {
2848       if (BeforeRParen->is(tok::identifier) &&
2849           BeforeRParen->Previous->is(tok::l_paren)) {
2850         return true;
2851       }
2852     }
2853 
2854     if (!AfterRParen->Next)
2855       return false;
2856 
2857     if (AfterRParen->is(tok::l_brace) &&
2858         AfterRParen->getBlockKind() == BK_BracedInit) {
2859       return true;
2860     }
2861 
2862     // If the next token after the parenthesis is a unary operator, assume
2863     // that this is cast, unless there are unexpected tokens inside the
2864     // parenthesis.
2865     const bool NextIsAmpOrStar = AfterRParen->isOneOf(tok::amp, tok::star);
2866     if (!(AfterRParen->isUnaryOperator() || NextIsAmpOrStar) ||
2867         AfterRParen->is(tok::plus) ||
2868         !AfterRParen->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2869       return false;
2870     }
2871 
2872     if (NextIsAmpOrStar &&
2873         (AfterRParen->Next->is(tok::numeric_constant) || Line.InPPDirective)) {
2874       return false;
2875     }
2876 
2877     if (Line.InPPDirective && AfterRParen->is(tok::minus))
2878       return false;
2879 
2880     // Search for unexpected tokens.
2881     for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) {
2882       if (Prev->is(tok::r_paren)) {
2883         if (Prev->is(TT_CastRParen))
2884           return false;
2885         Prev = Prev->MatchingParen;
2886         if (!Prev)
2887           return false;
2888         if (Prev->is(TT_FunctionTypeLParen))
2889           break;
2890         continue;
2891       }
2892       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2893         return false;
2894     }
2895 
2896     return true;
2897   }
2898 
2899   /// Returns true if the token is used as a unary operator.
determineUnaryOperatorByUsage(const FormatToken & Tok)2900   bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2901     const FormatToken *PrevToken = Tok.getPreviousNonComment();
2902     if (!PrevToken)
2903       return true;
2904 
2905     // These keywords are deliberately not included here because they may
2906     // precede only one of unary star/amp and plus/minus but not both.  They are
2907     // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2908     //
2909     // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2910     //   know how they can be followed by a star or amp.
2911     if (PrevToken->isOneOf(
2912             TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2913             tok::equal, tok::question, tok::l_square, tok::l_brace,
2914             tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2915             tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2916       return true;
2917     }
2918 
2919     // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2920     // where the unary `+` operator is overloaded, it is reasonable to write
2921     // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2922     if (PrevToken->is(tok::kw_sizeof))
2923       return true;
2924 
2925     // A sequence of leading unary operators.
2926     if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2927       return true;
2928 
2929     // There can't be two consecutive binary operators.
2930     if (PrevToken->is(TT_BinaryOperator))
2931       return true;
2932 
2933     return false;
2934   }
2935 
2936   /// Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)2937   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2938                                   bool InTemplateArgument) {
2939     if (Style.isJavaScript())
2940       return TT_BinaryOperator;
2941 
2942     // && in C# must be a binary operator.
2943     if (Style.isCSharp() && Tok.is(tok::ampamp))
2944       return TT_BinaryOperator;
2945 
2946     if (Style.isVerilog()) {
2947       // In Verilog, `*` can only be a binary operator.  `&` can be either unary
2948       // or binary.  `*` also includes `*>` in module path declarations in
2949       // specify blocks because merged tokens take the type of the first one by
2950       // default.
2951       if (Tok.is(tok::star))
2952         return TT_BinaryOperator;
2953       return determineUnaryOperatorByUsage(Tok) ? TT_UnaryOperator
2954                                                 : TT_BinaryOperator;
2955     }
2956 
2957     const FormatToken *PrevToken = Tok.getPreviousNonComment();
2958     if (!PrevToken)
2959       return TT_UnaryOperator;
2960     if (PrevToken->is(TT_TypeName))
2961       return TT_PointerOrReference;
2962     if (PrevToken->isOneOf(tok::kw_new, tok::kw_delete) && Tok.is(tok::ampamp))
2963       return TT_BinaryOperator;
2964 
2965     const FormatToken *NextToken = Tok.getNextNonComment();
2966 
2967     if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2968       return TT_BinaryOperator;
2969 
2970     if (!NextToken ||
2971         NextToken->isOneOf(tok::arrow, tok::equal, tok::comma, tok::r_paren,
2972                            TT_RequiresClause) ||
2973         (NextToken->is(tok::kw_noexcept) && !IsExpression) ||
2974         NextToken->canBePointerOrReferenceQualifier() ||
2975         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2976       return TT_PointerOrReference;
2977     }
2978 
2979     if (PrevToken->is(tok::coloncolon))
2980       return TT_PointerOrReference;
2981 
2982     if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2983       return TT_PointerOrReference;
2984 
2985     if (determineUnaryOperatorByUsage(Tok))
2986       return TT_UnaryOperator;
2987 
2988     if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2989       return TT_PointerOrReference;
2990     if (NextToken->is(tok::kw_operator) && !IsExpression)
2991       return TT_PointerOrReference;
2992     if (NextToken->isOneOf(tok::comma, tok::semi))
2993       return TT_PointerOrReference;
2994 
2995     // After right braces, star tokens are likely to be pointers to struct,
2996     // union, or class.
2997     //   struct {} *ptr;
2998     // This by itself is not sufficient to distinguish from multiplication
2999     // following a brace-initialized expression, as in:
3000     // int i = int{42} * 2;
3001     // In the struct case, the part of the struct declaration until the `{` and
3002     // the `}` are put on separate unwrapped lines; in the brace-initialized
3003     // case, the matching `{` is on the same unwrapped line, so check for the
3004     // presence of the matching brace to distinguish between those.
3005     if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
3006         !PrevToken->MatchingParen) {
3007       return TT_PointerOrReference;
3008     }
3009 
3010     if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
3011       return TT_UnaryOperator;
3012 
3013     if (PrevToken->Tok.isLiteral() ||
3014         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
3015                            tok::kw_false, tok::r_brace)) {
3016       return TT_BinaryOperator;
3017     }
3018 
3019     const FormatToken *NextNonParen = NextToken;
3020     while (NextNonParen && NextNonParen->is(tok::l_paren))
3021       NextNonParen = NextNonParen->getNextNonComment();
3022     if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
3023                          NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
3024                          NextNonParen->isUnaryOperator())) {
3025       return TT_BinaryOperator;
3026     }
3027 
3028     // If we know we're in a template argument, there are no named declarations.
3029     // Thus, having an identifier on the right-hand side indicates a binary
3030     // operator.
3031     if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
3032       return TT_BinaryOperator;
3033 
3034     // "&&" followed by "(", "*", or "&" is quite unlikely to be two successive
3035     // unary "&".
3036     if (Tok.is(tok::ampamp) &&
3037         NextToken->isOneOf(tok::l_paren, tok::star, tok::amp)) {
3038       return TT_BinaryOperator;
3039     }
3040 
3041     // This catches some cases where evaluation order is used as control flow:
3042     //   aaa && aaa->f();
3043     if (NextToken->Tok.isAnyIdentifier()) {
3044       const FormatToken *NextNextToken = NextToken->getNextNonComment();
3045       if (NextNextToken && NextNextToken->is(tok::arrow))
3046         return TT_BinaryOperator;
3047     }
3048 
3049     // It is very unlikely that we are going to find a pointer or reference type
3050     // definition on the RHS of an assignment.
3051     if (IsExpression && !Contexts.back().CaretFound)
3052       return TT_BinaryOperator;
3053 
3054     // Opeartors at class scope are likely pointer or reference members.
3055     if (!Scopes.empty() && Scopes.back() == ST_Class)
3056       return TT_PointerOrReference;
3057 
3058     // Tokens that indicate member access or chained operator& use.
3059     auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
3060       return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
3061                                       tok::arrowstar, tok::periodstar);
3062     };
3063 
3064     // It's more likely that & represents operator& than an uninitialized
3065     // reference.
3066     if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
3067         IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
3068         NextToken && NextToken->Tok.isAnyIdentifier()) {
3069       if (auto NextNext = NextToken->getNextNonComment();
3070           NextNext &&
3071           (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
3072         return TT_BinaryOperator;
3073       }
3074     }
3075 
3076     return TT_PointerOrReference;
3077   }
3078 
determinePlusMinusCaretUsage(const FormatToken & Tok)3079   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
3080     if (determineUnaryOperatorByUsage(Tok))
3081       return TT_UnaryOperator;
3082 
3083     const FormatToken *PrevToken = Tok.getPreviousNonComment();
3084     if (!PrevToken)
3085       return TT_UnaryOperator;
3086 
3087     if (PrevToken->is(tok::at))
3088       return TT_UnaryOperator;
3089 
3090     // Fall back to marking the token as binary operator.
3091     return TT_BinaryOperator;
3092   }
3093 
3094   /// Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)3095   TokenType determineIncrementUsage(const FormatToken &Tok) {
3096     const FormatToken *PrevToken = Tok.getPreviousNonComment();
3097     if (!PrevToken || PrevToken->is(TT_CastRParen))
3098       return TT_UnaryOperator;
3099     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
3100       return TT_TrailingUnaryOperator;
3101 
3102     return TT_UnaryOperator;
3103   }
3104 
3105   SmallVector<Context, 8> Contexts;
3106 
3107   const FormatStyle &Style;
3108   AnnotatedLine &Line;
3109   FormatToken *CurrentToken;
3110   bool AutoFound;
3111   bool IsCpp;
3112   LangOptions LangOpts;
3113   const AdditionalKeywords &Keywords;
3114 
3115   SmallVector<ScopeType> &Scopes;
3116 
3117   // Set of "<" tokens that do not open a template parameter list. If parseAngle
3118   // determines that a specific token can't be a template opener, it will make
3119   // same decision irrespective of the decisions for tokens leading up to it.
3120   // Store this information to prevent this from causing exponential runtime.
3121   llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
3122 
3123   int TemplateDeclarationDepth;
3124 };
3125 
3126 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
3127 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
3128 
3129 /// Parses binary expressions by inserting fake parenthesis based on
3130 /// operator precedence.
3131 class ExpressionParser {
3132 public:
ExpressionParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,AnnotatedLine & Line)3133   ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
3134                    AnnotatedLine &Line)
3135       : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
3136 
3137   /// Parse expressions with the given operator precedence.
parse(int Precedence=0)3138   void parse(int Precedence = 0) {
3139     // Skip 'return' and ObjC selector colons as they are not part of a binary
3140     // expression.
3141     while (Current && (Current->is(tok::kw_return) ||
3142                        (Current->is(tok::colon) &&
3143                         Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
3144       next();
3145     }
3146 
3147     if (!Current || Precedence > PrecedenceArrowAndPeriod)
3148       return;
3149 
3150     // Conditional expressions need to be parsed separately for proper nesting.
3151     if (Precedence == prec::Conditional) {
3152       parseConditionalExpr();
3153       return;
3154     }
3155 
3156     // Parse unary operators, which all have a higher precedence than binary
3157     // operators.
3158     if (Precedence == PrecedenceUnaryOperator) {
3159       parseUnaryOperator();
3160       return;
3161     }
3162 
3163     FormatToken *Start = Current;
3164     FormatToken *LatestOperator = nullptr;
3165     unsigned OperatorIndex = 0;
3166     // The first name of the current type in a port list.
3167     FormatToken *VerilogFirstOfType = nullptr;
3168 
3169     while (Current) {
3170       // In Verilog ports in a module header that don't have a type take the
3171       // type of the previous one.  For example,
3172       //   module a(output b,
3173       //                   c,
3174       //            output d);
3175       // In this case there need to be fake parentheses around b and c.
3176       if (Style.isVerilog() && Precedence == prec::Comma) {
3177         VerilogFirstOfType =
3178             verilogGroupDecl(VerilogFirstOfType, LatestOperator);
3179       }
3180 
3181       // Consume operators with higher precedence.
3182       parse(Precedence + 1);
3183 
3184       int CurrentPrecedence = getCurrentPrecedence();
3185 
3186       if (Precedence == CurrentPrecedence && Current &&
3187           Current->is(TT_SelectorName)) {
3188         if (LatestOperator)
3189           addFakeParenthesis(Start, prec::Level(Precedence));
3190         Start = Current;
3191       }
3192 
3193       if ((Style.isCSharp() || Style.isJavaScript() ||
3194            Style.Language == FormatStyle::LK_Java) &&
3195           Precedence == prec::Additive && Current) {
3196         // A string can be broken without parentheses around it when it is
3197         // already in a sequence of strings joined by `+` signs.
3198         FormatToken *Prev = Current->getPreviousNonComment();
3199         if (Prev && Prev->is(tok::string_literal) &&
3200             (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
3201                                                  TT_StringInConcatenation))) {
3202           Prev->setType(TT_StringInConcatenation);
3203         }
3204       }
3205 
3206       // At the end of the line or when an operator with lower precedence is
3207       // found, insert fake parenthesis and return.
3208       if (!Current ||
3209           (Current->closesScope() &&
3210            (Current->MatchingParen || Current->is(TT_TemplateString))) ||
3211           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
3212           (CurrentPrecedence == prec::Conditional &&
3213            Precedence == prec::Assignment && Current->is(tok::colon))) {
3214         break;
3215       }
3216 
3217       // Consume scopes: (), [], <> and {}
3218       // In addition to that we handle require clauses as scope, so that the
3219       // constraints in that are correctly indented.
3220       if (Current->opensScope() ||
3221           Current->isOneOf(TT_RequiresClause,
3222                            TT_RequiresClauseInARequiresExpression)) {
3223         // In fragment of a JavaScript template string can look like '}..${' and
3224         // thus close a scope and open a new one at the same time.
3225         while (Current && (!Current->closesScope() || Current->opensScope())) {
3226           next();
3227           parse();
3228         }
3229         next();
3230       } else {
3231         // Operator found.
3232         if (CurrentPrecedence == Precedence) {
3233           if (LatestOperator)
3234             LatestOperator->NextOperator = Current;
3235           LatestOperator = Current;
3236           Current->OperatorIndex = OperatorIndex;
3237           ++OperatorIndex;
3238         }
3239         next(/*SkipPastLeadingComments=*/Precedence > 0);
3240       }
3241     }
3242 
3243     // Group variables of the same type.
3244     if (Style.isVerilog() && Precedence == prec::Comma && VerilogFirstOfType)
3245       addFakeParenthesis(VerilogFirstOfType, prec::Comma);
3246 
3247     if (LatestOperator && (Current || Precedence > 0)) {
3248       // The requires clauses do not neccessarily end in a semicolon or a brace,
3249       // but just go over to struct/class or a function declaration, we need to
3250       // intervene so that the fake right paren is inserted correctly.
3251       auto End =
3252           (Start->Previous &&
3253            Start->Previous->isOneOf(TT_RequiresClause,
3254                                     TT_RequiresClauseInARequiresExpression))
3255               ? [this]() {
3256                   auto Ret = Current ? Current : Line.Last;
3257                   while (!Ret->ClosesRequiresClause && Ret->Previous)
3258                     Ret = Ret->Previous;
3259                   return Ret;
3260                 }()
3261               : nullptr;
3262 
3263       if (Precedence == PrecedenceArrowAndPeriod) {
3264         // Call expressions don't have a binary operator precedence.
3265         addFakeParenthesis(Start, prec::Unknown, End);
3266       } else {
3267         addFakeParenthesis(Start, prec::Level(Precedence), End);
3268       }
3269     }
3270   }
3271 
3272 private:
3273   /// Gets the precedence (+1) of the given token for binary operators
3274   /// and other tokens that we treat like binary operators.
getCurrentPrecedence()3275   int getCurrentPrecedence() {
3276     if (Current) {
3277       const FormatToken *NextNonComment = Current->getNextNonComment();
3278       if (Current->is(TT_ConditionalExpr))
3279         return prec::Conditional;
3280       if (NextNonComment && Current->is(TT_SelectorName) &&
3281           (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
3282            (Style.isProto() && NextNonComment->is(tok::less)))) {
3283         return prec::Assignment;
3284       }
3285       if (Current->is(TT_JsComputedPropertyName))
3286         return prec::Assignment;
3287       if (Current->is(TT_LambdaArrow))
3288         return prec::Comma;
3289       if (Current->is(TT_FatArrow))
3290         return prec::Assignment;
3291       if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
3292           (Current->is(tok::comment) && NextNonComment &&
3293            NextNonComment->is(TT_SelectorName))) {
3294         return 0;
3295       }
3296       if (Current->is(TT_RangeBasedForLoopColon))
3297         return prec::Comma;
3298       if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3299           Current->is(Keywords.kw_instanceof)) {
3300         return prec::Relational;
3301       }
3302       if (Style.isJavaScript() &&
3303           Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
3304         return prec::Relational;
3305       }
3306       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
3307         return Current->getPrecedence();
3308       if (Current->isOneOf(tok::period, tok::arrow) &&
3309           Current->isNot(TT_TrailingReturnArrow)) {
3310         return PrecedenceArrowAndPeriod;
3311       }
3312       if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3313           Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
3314                            Keywords.kw_throws)) {
3315         return 0;
3316       }
3317       // In Verilog case labels are not on separate lines straight out of
3318       // UnwrappedLineParser. The colon is not part of an expression.
3319       if (Style.isVerilog() && Current->is(tok::colon))
3320         return 0;
3321     }
3322     return -1;
3323   }
3324 
addFakeParenthesis(FormatToken * Start,prec::Level Precedence,FormatToken * End=nullptr)3325   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
3326                           FormatToken *End = nullptr) {
3327     // Do not assign fake parenthesis to tokens that are part of an
3328     // unexpanded macro call. The line within the macro call contains
3329     // the parenthesis and commas, and we will not find operators within
3330     // that structure.
3331     if (Start->MacroParent)
3332       return;
3333 
3334     Start->FakeLParens.push_back(Precedence);
3335     if (Precedence > prec::Unknown)
3336       Start->StartsBinaryExpression = true;
3337     if (!End && Current)
3338       End = Current->getPreviousNonComment();
3339     if (End) {
3340       ++End->FakeRParens;
3341       if (Precedence > prec::Unknown)
3342         End->EndsBinaryExpression = true;
3343     }
3344   }
3345 
3346   /// Parse unary operator expressions and surround them with fake
3347   /// parentheses if appropriate.
parseUnaryOperator()3348   void parseUnaryOperator() {
3349     llvm::SmallVector<FormatToken *, 2> Tokens;
3350     while (Current && Current->is(TT_UnaryOperator)) {
3351       Tokens.push_back(Current);
3352       next();
3353     }
3354     parse(PrecedenceArrowAndPeriod);
3355     for (FormatToken *Token : llvm::reverse(Tokens)) {
3356       // The actual precedence doesn't matter.
3357       addFakeParenthesis(Token, prec::Unknown);
3358     }
3359   }
3360 
parseConditionalExpr()3361   void parseConditionalExpr() {
3362     while (Current && Current->isTrailingComment())
3363       next();
3364     FormatToken *Start = Current;
3365     parse(prec::LogicalOr);
3366     if (!Current || Current->isNot(tok::question))
3367       return;
3368     next();
3369     parse(prec::Assignment);
3370     if (!Current || Current->isNot(TT_ConditionalExpr))
3371       return;
3372     next();
3373     parse(prec::Assignment);
3374     addFakeParenthesis(Start, prec::Conditional);
3375   }
3376 
next(bool SkipPastLeadingComments=true)3377   void next(bool SkipPastLeadingComments = true) {
3378     if (Current)
3379       Current = Current->Next;
3380     while (Current &&
3381            (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
3382            Current->isTrailingComment()) {
3383       Current = Current->Next;
3384     }
3385   }
3386 
3387   // Add fake parenthesis around declarations of the same type for example in a
3388   // module prototype. Return the first port / variable of the current type.
verilogGroupDecl(FormatToken * FirstOfType,FormatToken * PreviousComma)3389   FormatToken *verilogGroupDecl(FormatToken *FirstOfType,
3390                                 FormatToken *PreviousComma) {
3391     if (!Current)
3392       return nullptr;
3393 
3394     FormatToken *Start = Current;
3395 
3396     // Skip attributes.
3397     while (Start->startsSequence(tok::l_paren, tok::star)) {
3398       if (!(Start = Start->MatchingParen) ||
3399           !(Start = Start->getNextNonComment())) {
3400         return nullptr;
3401       }
3402     }
3403 
3404     FormatToken *Tok = Start;
3405 
3406     if (Tok->is(Keywords.kw_assign))
3407       Tok = Tok->getNextNonComment();
3408 
3409     // Skip any type qualifiers to find the first identifier. It may be either a
3410     // new type name or a variable name. There can be several type qualifiers
3411     // preceding a variable name, and we can not tell them apart by looking at
3412     // the word alone since a macro can be defined as either a type qualifier or
3413     // a variable name. Thus we use the last word before the dimensions instead
3414     // of the first word as the candidate for the variable or type name.
3415     FormatToken *First = nullptr;
3416     while (Tok) {
3417       FormatToken *Next = Tok->getNextNonComment();
3418 
3419       if (Tok->is(tok::hash)) {
3420         // Start of a macro expansion.
3421         First = Tok;
3422         Tok = Next;
3423         if (Tok)
3424           Tok = Tok->getNextNonComment();
3425       } else if (Tok->is(tok::hashhash)) {
3426         // Concatenation. Skip.
3427         Tok = Next;
3428         if (Tok)
3429           Tok = Tok->getNextNonComment();
3430       } else if (Keywords.isVerilogQualifier(*Tok) ||
3431                  Keywords.isVerilogIdentifier(*Tok)) {
3432         First = Tok;
3433         Tok = Next;
3434         // The name may have dots like `interface_foo.modport_foo`.
3435         while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) &&
3436                (Tok = Tok->getNextNonComment())) {
3437           if (Keywords.isVerilogIdentifier(*Tok))
3438             Tok = Tok->getNextNonComment();
3439         }
3440       } else if (!Next) {
3441         Tok = nullptr;
3442       } else if (Tok->is(tok::l_paren)) {
3443         // Make sure the parenthesized list is a drive strength. Otherwise the
3444         // statement may be a module instantiation in which case we have already
3445         // found the instance name.
3446         if (Next->isOneOf(
3447                 Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large,
3448                 Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1,
3449                 Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1,
3450                 Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0,
3451                 Keywords.kw_weak1)) {
3452           Tok->setType(TT_VerilogStrength);
3453           Tok = Tok->MatchingParen;
3454           if (Tok) {
3455             Tok->setType(TT_VerilogStrength);
3456             Tok = Tok->getNextNonComment();
3457           }
3458         } else {
3459           break;
3460         }
3461       } else if (Tok->is(Keywords.kw_verilogHash)) {
3462         // Delay control.
3463         if (Next->is(tok::l_paren))
3464           Next = Next->MatchingParen;
3465         if (Next)
3466           Tok = Next->getNextNonComment();
3467       } else {
3468         break;
3469       }
3470     }
3471 
3472     // Find the second identifier. If it exists it will be the name.
3473     FormatToken *Second = nullptr;
3474     // Dimensions.
3475     while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen))
3476       Tok = Tok->getNextNonComment();
3477     if (Tok && (Tok->is(tok::hash) || Keywords.isVerilogIdentifier(*Tok)))
3478       Second = Tok;
3479 
3480     // If the second identifier doesn't exist and there are qualifiers, the type
3481     // is implied.
3482     FormatToken *TypedName = nullptr;
3483     if (Second) {
3484       TypedName = Second;
3485       if (First && First->is(TT_Unknown))
3486         First->setType(TT_VerilogDimensionedTypeName);
3487     } else if (First != Start) {
3488       // If 'First' is null, then this isn't a declaration, 'TypedName' gets set
3489       // to null as intended.
3490       TypedName = First;
3491     }
3492 
3493     if (TypedName) {
3494       // This is a declaration with a new type.
3495       if (TypedName->is(TT_Unknown))
3496         TypedName->setType(TT_StartOfName);
3497       // Group variables of the previous type.
3498       if (FirstOfType && PreviousComma) {
3499         PreviousComma->setType(TT_VerilogTypeComma);
3500         addFakeParenthesis(FirstOfType, prec::Comma, PreviousComma->Previous);
3501       }
3502 
3503       FirstOfType = TypedName;
3504 
3505       // Don't let higher precedence handle the qualifiers. For example if we
3506       // have:
3507       //    parameter x = 0
3508       // We skip `parameter` here. This way the fake parentheses for the
3509       // assignment will be around `x = 0`.
3510       while (Current && Current != FirstOfType) {
3511         if (Current->opensScope()) {
3512           next();
3513           parse();
3514         }
3515         next();
3516       }
3517     }
3518 
3519     return FirstOfType;
3520   }
3521 
3522   const FormatStyle &Style;
3523   const AdditionalKeywords &Keywords;
3524   const AnnotatedLine &Line;
3525   FormatToken *Current;
3526 };
3527 
3528 } // end anonymous namespace
3529 
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines) const3530 void TokenAnnotator::setCommentLineLevels(
3531     SmallVectorImpl<AnnotatedLine *> &Lines) const {
3532   const AnnotatedLine *NextNonCommentLine = nullptr;
3533   for (AnnotatedLine *Line : llvm::reverse(Lines)) {
3534     assert(Line->First);
3535 
3536     // If the comment is currently aligned with the line immediately following
3537     // it, that's probably intentional and we should keep it.
3538     if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
3539         Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
3540         NextNonCommentLine->First->OriginalColumn ==
3541             Line->First->OriginalColumn) {
3542       const bool PPDirectiveOrImportStmt =
3543           NextNonCommentLine->Type == LT_PreprocessorDirective ||
3544           NextNonCommentLine->Type == LT_ImportStatement;
3545       if (PPDirectiveOrImportStmt)
3546         Line->Type = LT_CommentAbovePPDirective;
3547       // Align comments for preprocessor lines with the # in column 0 if
3548       // preprocessor lines are not indented. Otherwise, align with the next
3549       // line.
3550       Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
3551                             PPDirectiveOrImportStmt
3552                         ? 0
3553                         : NextNonCommentLine->Level;
3554     } else {
3555       NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
3556     }
3557 
3558     setCommentLineLevels(Line->Children);
3559   }
3560 }
3561 
maxNestingDepth(const AnnotatedLine & Line)3562 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
3563   unsigned Result = 0;
3564   for (const auto *Tok = Line.First; Tok; Tok = Tok->Next)
3565     Result = std::max(Result, Tok->NestingLevel);
3566   return Result;
3567 }
3568 
3569 // Returns the name of a function with no return type, e.g. a constructor or
3570 // destructor.
getFunctionName(const AnnotatedLine & Line,FormatToken * & OpeningParen)3571 static FormatToken *getFunctionName(const AnnotatedLine &Line,
3572                                     FormatToken *&OpeningParen) {
3573   for (FormatToken *Tok = Line.getFirstNonComment(), *Name = nullptr; Tok;
3574        Tok = Tok->getNextNonComment()) {
3575     // Skip C++11 attributes both before and after the function name.
3576     if (Tok->is(tok::l_square) && Tok->is(TT_AttributeSquare)) {
3577       Tok = Tok->MatchingParen;
3578       if (!Tok)
3579         break;
3580       continue;
3581     }
3582 
3583     // Make sure the name is followed by a pair of parentheses.
3584     if (Name) {
3585       if (Tok->is(tok::l_paren) && Tok->isNot(TT_FunctionTypeLParen) &&
3586           Tok->MatchingParen) {
3587         OpeningParen = Tok;
3588         return Name;
3589       }
3590       return nullptr;
3591     }
3592 
3593     // Skip keywords that may precede the constructor/destructor name.
3594     if (Tok->isOneOf(tok::kw_friend, tok::kw_inline, tok::kw_virtual,
3595                      tok::kw_constexpr, tok::kw_consteval, tok::kw_explicit)) {
3596       continue;
3597     }
3598 
3599     // A qualified name may start from the global namespace.
3600     if (Tok->is(tok::coloncolon)) {
3601       Tok = Tok->Next;
3602       if (!Tok)
3603         break;
3604     }
3605 
3606     // Skip to the unqualified part of the name.
3607     while (Tok->startsSequence(tok::identifier, tok::coloncolon)) {
3608       assert(Tok->Next);
3609       Tok = Tok->Next->Next;
3610       if (!Tok)
3611         return nullptr;
3612     }
3613 
3614     // Skip the `~` if a destructor name.
3615     if (Tok->is(tok::tilde)) {
3616       Tok = Tok->Next;
3617       if (!Tok)
3618         break;
3619     }
3620 
3621     // Make sure the name is not already annotated, e.g. as NamespaceMacro.
3622     if (Tok->isNot(tok::identifier) || Tok->isNot(TT_Unknown))
3623       break;
3624 
3625     Name = Tok;
3626   }
3627 
3628   return nullptr;
3629 }
3630 
3631 // Checks if Tok is a constructor/destructor name qualified by its class name.
isCtorOrDtorName(const FormatToken * Tok)3632 static bool isCtorOrDtorName(const FormatToken *Tok) {
3633   assert(Tok && Tok->is(tok::identifier));
3634   const auto *Prev = Tok->Previous;
3635 
3636   if (Prev && Prev->is(tok::tilde))
3637     Prev = Prev->Previous;
3638 
3639   if (!Prev || !Prev->endsSequence(tok::coloncolon, tok::identifier))
3640     return false;
3641 
3642   assert(Prev->Previous);
3643   return Prev->Previous->TokenText == Tok->TokenText;
3644 }
3645 
annotate(AnnotatedLine & Line)3646 void TokenAnnotator::annotate(AnnotatedLine &Line) {
3647   AnnotatingParser Parser(Style, Line, Keywords, Scopes);
3648   Line.Type = Parser.parseLine();
3649 
3650   for (auto &Child : Line.Children)
3651     annotate(*Child);
3652 
3653   // With very deep nesting, ExpressionParser uses lots of stack and the
3654   // formatting algorithm is very slow. We're not going to do a good job here
3655   // anyway - it's probably generated code being formatted by mistake.
3656   // Just skip the whole line.
3657   if (maxNestingDepth(Line) > 50)
3658     Line.Type = LT_Invalid;
3659 
3660   if (Line.Type == LT_Invalid)
3661     return;
3662 
3663   ExpressionParser ExprParser(Style, Keywords, Line);
3664   ExprParser.parse();
3665 
3666   if (IsCpp) {
3667     FormatToken *OpeningParen = nullptr;
3668     auto *Tok = getFunctionName(Line, OpeningParen);
3669     if (Tok && ((!Scopes.empty() && Scopes.back() == ST_Class) ||
3670                 Line.endsWith(TT_FunctionLBrace) || isCtorOrDtorName(Tok))) {
3671       Tok->setFinalizedType(TT_CtorDtorDeclName);
3672       assert(OpeningParen);
3673       OpeningParen->setFinalizedType(TT_FunctionDeclarationLParen);
3674     }
3675   }
3676 
3677   if (Line.startsWith(TT_ObjCMethodSpecifier))
3678     Line.Type = LT_ObjCMethodDecl;
3679   else if (Line.startsWith(TT_ObjCDecl))
3680     Line.Type = LT_ObjCDecl;
3681   else if (Line.startsWith(TT_ObjCProperty))
3682     Line.Type = LT_ObjCProperty;
3683 
3684   auto *First = Line.First;
3685   First->SpacesRequiredBefore = 1;
3686   First->CanBreakBefore = First->MustBreakBefore;
3687 }
3688 
3689 // This function heuristically determines whether 'Current' starts the name of a
3690 // function declaration.
isFunctionDeclarationName(const LangOptions & LangOpts,const FormatToken & Current,const AnnotatedLine & Line,FormatToken * & ClosingParen)3691 static bool isFunctionDeclarationName(const LangOptions &LangOpts,
3692                                       const FormatToken &Current,
3693                                       const AnnotatedLine &Line,
3694                                       FormatToken *&ClosingParen) {
3695   assert(Current.Previous);
3696 
3697   if (Current.is(TT_FunctionDeclarationName))
3698     return true;
3699 
3700   if (!Current.Tok.getIdentifierInfo())
3701     return false;
3702 
3703   const auto &Previous = *Current.Previous;
3704 
3705   if (const auto *PrevPrev = Previous.Previous;
3706       PrevPrev && PrevPrev->is(TT_ObjCDecl)) {
3707     return false;
3708   }
3709 
3710   auto skipOperatorName =
3711       [&LangOpts](const FormatToken *Next) -> const FormatToken * {
3712     for (; Next; Next = Next->Next) {
3713       if (Next->is(TT_OverloadedOperatorLParen))
3714         return Next;
3715       if (Next->is(TT_OverloadedOperator))
3716         continue;
3717       if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
3718         // For 'new[]' and 'delete[]'.
3719         if (Next->Next &&
3720             Next->Next->startsSequence(tok::l_square, tok::r_square)) {
3721           Next = Next->Next->Next;
3722         }
3723         continue;
3724       }
3725       if (Next->startsSequence(tok::l_square, tok::r_square)) {
3726         // For operator[]().
3727         Next = Next->Next;
3728         continue;
3729       }
3730       if ((Next->isTypeName(LangOpts) || Next->is(tok::identifier)) &&
3731           Next->Next && Next->Next->isPointerOrReference()) {
3732         // For operator void*(), operator char*(), operator Foo*().
3733         Next = Next->Next;
3734         continue;
3735       }
3736       if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3737         Next = Next->MatchingParen;
3738         continue;
3739       }
3740 
3741       break;
3742     }
3743     return nullptr;
3744   };
3745 
3746   const auto *Next = Current.Next;
3747   const bool IsCpp = LangOpts.CXXOperatorNames;
3748 
3749   // Find parentheses of parameter list.
3750   if (Current.is(tok::kw_operator)) {
3751     if (Previous.Tok.getIdentifierInfo() &&
3752         !Previous.isOneOf(tok::kw_return, tok::kw_co_return)) {
3753       return true;
3754     }
3755     if (Previous.is(tok::r_paren) && Previous.is(TT_TypeDeclarationParen)) {
3756       assert(Previous.MatchingParen);
3757       assert(Previous.MatchingParen->is(tok::l_paren));
3758       assert(Previous.MatchingParen->is(TT_TypeDeclarationParen));
3759       return true;
3760     }
3761     if (!Previous.isPointerOrReference() && Previous.isNot(TT_TemplateCloser))
3762       return false;
3763     Next = skipOperatorName(Next);
3764   } else {
3765     if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0)
3766       return false;
3767     for (; Next; Next = Next->Next) {
3768       if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3769         Next = Next->MatchingParen;
3770       } else if (Next->is(tok::coloncolon)) {
3771         Next = Next->Next;
3772         if (!Next)
3773           return false;
3774         if (Next->is(tok::kw_operator)) {
3775           Next = skipOperatorName(Next->Next);
3776           break;
3777         }
3778         if (Next->isNot(tok::identifier))
3779           return false;
3780       } else if (isCppAttribute(IsCpp, *Next)) {
3781         Next = Next->MatchingParen;
3782         if (!Next)
3783           return false;
3784       } else if (Next->is(tok::l_paren)) {
3785         break;
3786       } else {
3787         return false;
3788       }
3789     }
3790   }
3791 
3792   // Check whether parameter list can belong to a function declaration.
3793   if (!Next || Next->isNot(tok::l_paren) || !Next->MatchingParen)
3794     return false;
3795   ClosingParen = Next->MatchingParen;
3796   assert(ClosingParen->is(tok::r_paren));
3797   // If the lines ends with "{", this is likely a function definition.
3798   if (Line.Last->is(tok::l_brace))
3799     return true;
3800   if (Next->Next == ClosingParen)
3801     return true; // Empty parentheses.
3802   // If there is an &/&& after the r_paren, this is likely a function.
3803   if (ClosingParen->Next && ClosingParen->Next->is(TT_PointerOrReference))
3804     return true;
3805 
3806   // Check for K&R C function definitions (and C++ function definitions with
3807   // unnamed parameters), e.g.:
3808   //   int f(i)
3809   //   {
3810   //     return i + 1;
3811   //   }
3812   //   bool g(size_t = 0, bool b = false)
3813   //   {
3814   //     return !b;
3815   //   }
3816   if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
3817       !Line.endsWith(tok::semi)) {
3818     return true;
3819   }
3820 
3821   for (const FormatToken *Tok = Next->Next; Tok && Tok != ClosingParen;
3822        Tok = Tok->Next) {
3823     if (Tok->is(TT_TypeDeclarationParen))
3824       return true;
3825     if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
3826       Tok = Tok->MatchingParen;
3827       continue;
3828     }
3829     if (Tok->is(tok::kw_const) || Tok->isTypeName(LangOpts) ||
3830         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) {
3831       return true;
3832     }
3833     if (Tok->isOneOf(tok::l_brace, TT_ObjCMethodExpr) || Tok->Tok.isLiteral())
3834       return false;
3835   }
3836   return false;
3837 }
3838 
mustBreakForReturnType(const AnnotatedLine & Line) const3839 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
3840   assert(Line.MightBeFunctionDecl);
3841 
3842   if ((Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
3843        Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevelDefinitions) &&
3844       Line.Level > 0) {
3845     return false;
3846   }
3847 
3848   switch (Style.BreakAfterReturnType) {
3849   case FormatStyle::RTBS_None:
3850   case FormatStyle::RTBS_Automatic:
3851   case FormatStyle::RTBS_ExceptShortType:
3852     return false;
3853   case FormatStyle::RTBS_All:
3854   case FormatStyle::RTBS_TopLevel:
3855     return true;
3856   case FormatStyle::RTBS_AllDefinitions:
3857   case FormatStyle::RTBS_TopLevelDefinitions:
3858     return Line.mightBeFunctionDefinition();
3859   }
3860 
3861   return false;
3862 }
3863 
calculateFormattingInformation(AnnotatedLine & Line) const3864 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
3865   for (AnnotatedLine *ChildLine : Line.Children)
3866     calculateFormattingInformation(*ChildLine);
3867 
3868   auto *First = Line.First;
3869   First->TotalLength = First->IsMultiline
3870                            ? Style.ColumnLimit
3871                            : Line.FirstStartColumn + First->ColumnWidth;
3872   FormatToken *Current = First->Next;
3873   bool InFunctionDecl = Line.MightBeFunctionDecl;
3874   bool AlignArrayOfStructures =
3875       (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
3876        Line.Type == LT_ArrayOfStructInitializer);
3877   if (AlignArrayOfStructures)
3878     calculateArrayInitializerColumnList(Line);
3879 
3880   bool SeenName = false;
3881   bool LineIsFunctionDeclaration = false;
3882   FormatToken *ClosingParen = nullptr;
3883   FormatToken *AfterLastAttribute = nullptr;
3884 
3885   for (auto *Tok = Current; Tok; Tok = Tok->Next) {
3886     if (Tok->is(TT_StartOfName))
3887       SeenName = true;
3888     if (Tok->Previous->EndsCppAttributeGroup)
3889       AfterLastAttribute = Tok;
3890     if (const bool IsCtorOrDtor = Tok->is(TT_CtorDtorDeclName);
3891         IsCtorOrDtor ||
3892         isFunctionDeclarationName(LangOpts, *Tok, Line, ClosingParen)) {
3893       if (!IsCtorOrDtor)
3894         Tok->setFinalizedType(TT_FunctionDeclarationName);
3895       LineIsFunctionDeclaration = true;
3896       SeenName = true;
3897       if (ClosingParen) {
3898         auto *OpeningParen = ClosingParen->MatchingParen;
3899         assert(OpeningParen);
3900         if (OpeningParen->is(TT_Unknown))
3901           OpeningParen->setType(TT_FunctionDeclarationLParen);
3902       }
3903       break;
3904     }
3905   }
3906 
3907   if (IsCpp && (LineIsFunctionDeclaration || First->is(TT_CtorDtorDeclName)) &&
3908       Line.endsWith(tok::semi, tok::r_brace)) {
3909     auto *Tok = Line.Last->Previous;
3910     while (Tok->isNot(tok::r_brace))
3911       Tok = Tok->Previous;
3912     if (auto *LBrace = Tok->MatchingParen; LBrace) {
3913       assert(LBrace->is(tok::l_brace));
3914       Tok->setBlockKind(BK_Block);
3915       LBrace->setBlockKind(BK_Block);
3916       LBrace->setFinalizedType(TT_FunctionLBrace);
3917     }
3918   }
3919 
3920   if (IsCpp && SeenName && AfterLastAttribute &&
3921       mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
3922     AfterLastAttribute->MustBreakBefore = true;
3923     if (LineIsFunctionDeclaration)
3924       Line.ReturnTypeWrapped = true;
3925   }
3926 
3927   if (IsCpp) {
3928     if (!LineIsFunctionDeclaration) {
3929       // Annotate */&/&& in `operator` function calls as binary operators.
3930       for (const auto *Tok = First; Tok; Tok = Tok->Next) {
3931         if (Tok->isNot(tok::kw_operator))
3932           continue;
3933         do {
3934           Tok = Tok->Next;
3935         } while (Tok && Tok->isNot(TT_OverloadedOperatorLParen));
3936         if (!Tok || !Tok->MatchingParen)
3937           break;
3938         const auto *LeftParen = Tok;
3939         for (Tok = Tok->Next; Tok && Tok != LeftParen->MatchingParen;
3940              Tok = Tok->Next) {
3941           if (Tok->isNot(tok::identifier))
3942             continue;
3943           auto *Next = Tok->Next;
3944           const bool NextIsBinaryOperator =
3945               Next && Next->isPointerOrReference() && Next->Next &&
3946               Next->Next->is(tok::identifier);
3947           if (!NextIsBinaryOperator)
3948             continue;
3949           Next->setType(TT_BinaryOperator);
3950           Tok = Next;
3951         }
3952       }
3953     } else if (ClosingParen) {
3954       for (auto *Tok = ClosingParen->Next; Tok; Tok = Tok->Next) {
3955         if (Tok->is(TT_CtorInitializerColon))
3956           break;
3957         if (Tok->is(tok::arrow)) {
3958           Tok->setType(TT_TrailingReturnArrow);
3959           break;
3960         }
3961         if (Tok->isNot(TT_TrailingAnnotation))
3962           continue;
3963         const auto *Next = Tok->Next;
3964         if (!Next || Next->isNot(tok::l_paren))
3965           continue;
3966         Tok = Next->MatchingParen;
3967         if (!Tok)
3968           break;
3969       }
3970     }
3971   }
3972 
3973   while (Current) {
3974     const FormatToken *Prev = Current->Previous;
3975     if (Current->is(TT_LineComment)) {
3976       if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
3977         Current->SpacesRequiredBefore =
3978             (Style.Cpp11BracedListStyle && !Style.SpacesInParensOptions.Other)
3979                 ? 0
3980                 : 1;
3981       } else if (Prev->is(TT_VerilogMultiLineListLParen)) {
3982         Current->SpacesRequiredBefore = 0;
3983       } else {
3984         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
3985       }
3986 
3987       // If we find a trailing comment, iterate backwards to determine whether
3988       // it seems to relate to a specific parameter. If so, break before that
3989       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
3990       // to the previous line in:
3991       //   SomeFunction(a,
3992       //                b, // comment
3993       //                c);
3994       if (!Current->HasUnescapedNewline) {
3995         for (FormatToken *Parameter = Current->Previous; Parameter;
3996              Parameter = Parameter->Previous) {
3997           if (Parameter->isOneOf(tok::comment, tok::r_brace))
3998             break;
3999           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
4000             if (Parameter->Previous->isNot(TT_CtorInitializerComma) &&
4001                 Parameter->HasUnescapedNewline) {
4002               Parameter->MustBreakBefore = true;
4003             }
4004             break;
4005           }
4006         }
4007       }
4008     } else if (!Current->Finalized && Current->SpacesRequiredBefore == 0 &&
4009                spaceRequiredBefore(Line, *Current)) {
4010       Current->SpacesRequiredBefore = 1;
4011     }
4012 
4013     const auto &Children = Prev->Children;
4014     if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
4015       Current->MustBreakBefore = true;
4016     } else {
4017       Current->MustBreakBefore =
4018           Current->MustBreakBefore || mustBreakBefore(Line, *Current);
4019       if (!Current->MustBreakBefore && InFunctionDecl &&
4020           Current->is(TT_FunctionDeclarationName)) {
4021         Current->MustBreakBefore = mustBreakForReturnType(Line);
4022       }
4023     }
4024 
4025     Current->CanBreakBefore =
4026         Current->MustBreakBefore || canBreakBefore(Line, *Current);
4027     unsigned ChildSize = 0;
4028     if (Prev->Children.size() == 1) {
4029       FormatToken &LastOfChild = *Prev->Children[0]->Last;
4030       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
4031                                                   : LastOfChild.TotalLength + 1;
4032     }
4033     if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
4034         (Prev->Children.size() == 1 &&
4035          Prev->Children[0]->First->MustBreakBefore) ||
4036         Current->IsMultiline) {
4037       Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
4038     } else {
4039       Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
4040                              ChildSize + Current->SpacesRequiredBefore;
4041     }
4042 
4043     if (Current->is(TT_CtorInitializerColon))
4044       InFunctionDecl = false;
4045 
4046     // FIXME: Only calculate this if CanBreakBefore is true once static
4047     // initializers etc. are sorted out.
4048     // FIXME: Move magic numbers to a better place.
4049 
4050     // Reduce penalty for aligning ObjC method arguments using the colon
4051     // alignment as this is the canonical way (still prefer fitting everything
4052     // into one line if possible). Trying to fit a whole expression into one
4053     // line should not force other line breaks (e.g. when ObjC method
4054     // expression is a part of other expression).
4055     Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
4056     if (Style.Language == FormatStyle::LK_ObjC &&
4057         Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
4058       if (Current->ParameterIndex == 1)
4059         Current->SplitPenalty += 5 * Current->BindingStrength;
4060     } else {
4061       Current->SplitPenalty += 20 * Current->BindingStrength;
4062     }
4063 
4064     Current = Current->Next;
4065   }
4066 
4067   calculateUnbreakableTailLengths(Line);
4068   unsigned IndentLevel = Line.Level;
4069   for (Current = First; Current; Current = Current->Next) {
4070     if (Current->Role)
4071       Current->Role->precomputeFormattingInfos(Current);
4072     if (Current->MatchingParen &&
4073         Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
4074         IndentLevel > 0) {
4075       --IndentLevel;
4076     }
4077     Current->IndentLevel = IndentLevel;
4078     if (Current->opensBlockOrBlockTypeList(Style))
4079       ++IndentLevel;
4080   }
4081 
4082   LLVM_DEBUG({ printDebugInfo(Line); });
4083 }
4084 
calculateUnbreakableTailLengths(AnnotatedLine & Line) const4085 void TokenAnnotator::calculateUnbreakableTailLengths(
4086     AnnotatedLine &Line) const {
4087   unsigned UnbreakableTailLength = 0;
4088   FormatToken *Current = Line.Last;
4089   while (Current) {
4090     Current->UnbreakableTailLength = UnbreakableTailLength;
4091     if (Current->CanBreakBefore ||
4092         Current->isOneOf(tok::comment, tok::string_literal)) {
4093       UnbreakableTailLength = 0;
4094     } else {
4095       UnbreakableTailLength +=
4096           Current->ColumnWidth + Current->SpacesRequiredBefore;
4097     }
4098     Current = Current->Previous;
4099   }
4100 }
4101 
calculateArrayInitializerColumnList(AnnotatedLine & Line) const4102 void TokenAnnotator::calculateArrayInitializerColumnList(
4103     AnnotatedLine &Line) const {
4104   if (Line.First == Line.Last)
4105     return;
4106   auto *CurrentToken = Line.First;
4107   CurrentToken->ArrayInitializerLineStart = true;
4108   unsigned Depth = 0;
4109   while (CurrentToken && CurrentToken != Line.Last) {
4110     if (CurrentToken->is(tok::l_brace)) {
4111       CurrentToken->IsArrayInitializer = true;
4112       if (CurrentToken->Next)
4113         CurrentToken->Next->MustBreakBefore = true;
4114       CurrentToken =
4115           calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
4116     } else {
4117       CurrentToken = CurrentToken->Next;
4118     }
4119   }
4120 }
4121 
calculateInitializerColumnList(AnnotatedLine & Line,FormatToken * CurrentToken,unsigned Depth) const4122 FormatToken *TokenAnnotator::calculateInitializerColumnList(
4123     AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
4124   while (CurrentToken && CurrentToken != Line.Last) {
4125     if (CurrentToken->is(tok::l_brace))
4126       ++Depth;
4127     else if (CurrentToken->is(tok::r_brace))
4128       --Depth;
4129     if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
4130       CurrentToken = CurrentToken->Next;
4131       if (!CurrentToken)
4132         break;
4133       CurrentToken->StartsColumn = true;
4134       CurrentToken = CurrentToken->Previous;
4135     }
4136     CurrentToken = CurrentToken->Next;
4137   }
4138   return CurrentToken;
4139 }
4140 
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl) const4141 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
4142                                       const FormatToken &Tok,
4143                                       bool InFunctionDecl) const {
4144   const FormatToken &Left = *Tok.Previous;
4145   const FormatToken &Right = Tok;
4146 
4147   if (Left.is(tok::semi))
4148     return 0;
4149 
4150   // Language specific handling.
4151   if (Style.Language == FormatStyle::LK_Java) {
4152     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
4153       return 1;
4154     if (Right.is(Keywords.kw_implements))
4155       return 2;
4156     if (Left.is(tok::comma) && Left.NestingLevel == 0)
4157       return 3;
4158   } else if (Style.isJavaScript()) {
4159     if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
4160       return 100;
4161     if (Left.is(TT_JsTypeColon))
4162       return 35;
4163     if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
4164         (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
4165       return 100;
4166     }
4167     // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
4168     if (Left.opensScope() && Right.closesScope())
4169       return 200;
4170   } else if (Style.Language == FormatStyle::LK_Proto) {
4171     if (Right.is(tok::l_square))
4172       return 1;
4173     if (Right.is(tok::period))
4174       return 500;
4175   }
4176 
4177   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
4178     return 1;
4179   if (Right.is(tok::l_square)) {
4180     if (Left.is(tok::r_square))
4181       return 200;
4182     // Slightly prefer formatting local lambda definitions like functions.
4183     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
4184       return 35;
4185     if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4186                        TT_ArrayInitializerLSquare,
4187                        TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
4188       return 500;
4189     }
4190   }
4191 
4192   if (Left.is(tok::coloncolon))
4193     return Style.PenaltyBreakScopeResolution;
4194   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
4195       Right.is(tok::kw_operator)) {
4196     if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
4197       return 3;
4198     if (Left.is(TT_StartOfName))
4199       return 110;
4200     if (InFunctionDecl && Right.NestingLevel == 0)
4201       return Style.PenaltyReturnTypeOnItsOwnLine;
4202     return 200;
4203   }
4204   if (Right.is(TT_PointerOrReference))
4205     return 190;
4206   if (Right.is(TT_LambdaArrow))
4207     return 110;
4208   if (Left.is(tok::equal) && Right.is(tok::l_brace))
4209     return 160;
4210   if (Left.is(TT_CastRParen))
4211     return 100;
4212   if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
4213     return 5000;
4214   if (Left.is(tok::comment))
4215     return 1000;
4216 
4217   if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
4218                    TT_CtorInitializerColon)) {
4219     return 2;
4220   }
4221 
4222   if (Right.isMemberAccess()) {
4223     // Breaking before the "./->" of a chained call/member access is reasonably
4224     // cheap, as formatting those with one call per line is generally
4225     // desirable. In particular, it should be cheaper to break before the call
4226     // than it is to break inside a call's parameters, which could lead to weird
4227     // "hanging" indents. The exception is the very last "./->" to support this
4228     // frequent pattern:
4229     //
4230     //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
4231     //       dddddddd);
4232     //
4233     // which might otherwise be blown up onto many lines. Here, clang-format
4234     // won't produce "hanging" indents anyway as there is no other trailing
4235     // call.
4236     //
4237     // Also apply higher penalty is not a call as that might lead to a wrapping
4238     // like:
4239     //
4240     //   aaaaaaa
4241     //       .aaaaaaaaa.bbbbbbbb(cccccccc);
4242     return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
4243                ? 150
4244                : 35;
4245   }
4246 
4247   if (Right.is(TT_TrailingAnnotation) &&
4248       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
4249     // Moving trailing annotations to the next line is fine for ObjC method
4250     // declarations.
4251     if (Line.startsWith(TT_ObjCMethodSpecifier))
4252       return 10;
4253     // Generally, breaking before a trailing annotation is bad unless it is
4254     // function-like. It seems to be especially preferable to keep standard
4255     // annotations (i.e. "const", "final" and "override") on the same line.
4256     // Use a slightly higher penalty after ")" so that annotations like
4257     // "const override" are kept together.
4258     bool is_short_annotation = Right.TokenText.size() < 10;
4259     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
4260   }
4261 
4262   // In for-loops, prefer breaking at ',' and ';'.
4263   if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
4264     return 4;
4265 
4266   // In Objective-C method expressions, prefer breaking before "param:" over
4267   // breaking after it.
4268   if (Right.is(TT_SelectorName))
4269     return 0;
4270   if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
4271     return Line.MightBeFunctionDecl ? 50 : 500;
4272 
4273   // In Objective-C type declarations, avoid breaking after the category's
4274   // open paren (we'll prefer breaking after the protocol list's opening
4275   // angle bracket, if present).
4276   if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
4277       Left.Previous->isOneOf(tok::identifier, tok::greater)) {
4278     return 500;
4279   }
4280 
4281   if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
4282     return Style.PenaltyBreakOpenParenthesis;
4283   if (Left.is(tok::l_paren) && InFunctionDecl &&
4284       Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
4285     return 100;
4286   }
4287   if (Left.is(tok::l_paren) && Left.Previous &&
4288       (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
4289        Left.Previous->isIf())) {
4290     return 1000;
4291   }
4292   if (Left.is(tok::equal) && InFunctionDecl)
4293     return 110;
4294   if (Right.is(tok::r_brace))
4295     return 1;
4296   if (Left.is(TT_TemplateOpener))
4297     return 100;
4298   if (Left.opensScope()) {
4299     // If we aren't aligning after opening parens/braces we can always break
4300     // here unless the style does not want us to place all arguments on the
4301     // next line.
4302     if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
4303         (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
4304       return 0;
4305     }
4306     if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
4307       return 19;
4308     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
4309                                    : 19;
4310   }
4311   if (Left.is(TT_JavaAnnotation))
4312     return 50;
4313 
4314   if (Left.is(TT_UnaryOperator))
4315     return 60;
4316   if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
4317       Left.Previous->isLabelString() &&
4318       (Left.NextOperator || Left.OperatorIndex != 0)) {
4319     return 50;
4320   }
4321   if (Right.is(tok::plus) && Left.isLabelString() &&
4322       (Right.NextOperator || Right.OperatorIndex != 0)) {
4323     return 25;
4324   }
4325   if (Left.is(tok::comma))
4326     return 1;
4327   if (Right.is(tok::lessless) && Left.isLabelString() &&
4328       (Right.NextOperator || Right.OperatorIndex != 1)) {
4329     return 25;
4330   }
4331   if (Right.is(tok::lessless)) {
4332     // Breaking at a << is really cheap.
4333     if (Left.isNot(tok::r_paren) || Right.OperatorIndex > 0) {
4334       // Slightly prefer to break before the first one in log-like statements.
4335       return 2;
4336     }
4337     return 1;
4338   }
4339   if (Left.ClosesTemplateDeclaration)
4340     return Style.PenaltyBreakTemplateDeclaration;
4341   if (Left.ClosesRequiresClause)
4342     return 0;
4343   if (Left.is(TT_ConditionalExpr))
4344     return prec::Conditional;
4345   prec::Level Level = Left.getPrecedence();
4346   if (Level == prec::Unknown)
4347     Level = Right.getPrecedence();
4348   if (Level == prec::Assignment)
4349     return Style.PenaltyBreakAssignment;
4350   if (Level != prec::Unknown)
4351     return Level;
4352 
4353   return 3;
4354 }
4355 
spaceRequiredBeforeParens(const FormatToken & Right) const4356 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
4357   if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
4358     return true;
4359   if (Right.is(TT_OverloadedOperatorLParen) &&
4360       Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
4361     return true;
4362   }
4363   if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
4364       Right.ParameterCount > 0) {
4365     return true;
4366   }
4367   return false;
4368 }
4369 
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right) const4370 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
4371                                           const FormatToken &Left,
4372                                           const FormatToken &Right) const {
4373   if (Left.is(tok::kw_return) &&
4374       !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
4375     return true;
4376   }
4377   if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
4378       Right.MatchingParen->is(TT_CastRParen)) {
4379     return true;
4380   }
4381   if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
4382     return true;
4383   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
4384       Left.Tok.getObjCKeywordID() == tok::objc_property) {
4385     return true;
4386   }
4387   if (Right.is(tok::hashhash))
4388     return Left.is(tok::hash);
4389   if (Left.isOneOf(tok::hashhash, tok::hash))
4390     return Right.is(tok::hash);
4391   if (Left.is(BK_Block) && Right.is(tok::r_brace) &&
4392       Right.MatchingParen == &Left && Line.Children.empty()) {
4393     return Style.SpaceInEmptyBlock;
4394   }
4395   if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
4396       (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
4397        Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
4398     return Style.SpacesInParensOptions.InEmptyParentheses;
4399   }
4400   if (Style.SpacesInParens == FormatStyle::SIPO_Custom &&
4401       Style.SpacesInParensOptions.ExceptDoubleParentheses &&
4402       Left.is(tok::r_paren) && Right.is(tok::r_paren)) {
4403     auto *InnerLParen = Left.MatchingParen;
4404     if (InnerLParen && InnerLParen->Previous == Right.MatchingParen) {
4405       InnerLParen->SpacesRequiredBefore = 0;
4406       return false;
4407     }
4408   }
4409   if (Style.SpacesInParensOptions.InConditionalStatements) {
4410     const FormatToken *LeftParen = nullptr;
4411     if (Left.is(tok::l_paren))
4412       LeftParen = &Left;
4413     else if (Right.is(tok::r_paren) && Right.MatchingParen)
4414       LeftParen = Right.MatchingParen;
4415     if (LeftParen) {
4416       if (LeftParen->is(TT_ConditionLParen))
4417         return true;
4418       if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
4419         return true;
4420     }
4421   }
4422 
4423   // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
4424   if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
4425                                              // function return type 'auto'
4426                                              TT_FunctionTypeLParen)) {
4427     return true;
4428   }
4429 
4430   // auto{x} auto(x)
4431   if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
4432     return false;
4433 
4434   const auto *BeforeLeft = Left.Previous;
4435 
4436   // operator co_await(x)
4437   if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && BeforeLeft &&
4438       BeforeLeft->is(tok::kw_operator)) {
4439     return false;
4440   }
4441   // co_await (x), co_yield (x), co_return (x)
4442   if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
4443       !Right.isOneOf(tok::semi, tok::r_paren)) {
4444     return true;
4445   }
4446 
4447   if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
4448     return (Right.is(TT_CastRParen) ||
4449             (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
4450                ? Style.SpacesInParensOptions.InCStyleCasts
4451                : Style.SpacesInParensOptions.Other;
4452   }
4453   if (Right.isOneOf(tok::semi, tok::comma))
4454     return false;
4455   if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
4456     bool IsLightweightGeneric = Right.MatchingParen &&
4457                                 Right.MatchingParen->Next &&
4458                                 Right.MatchingParen->Next->is(tok::colon);
4459     return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
4460   }
4461   if (Right.is(tok::less) && Left.is(tok::kw_template))
4462     return Style.SpaceAfterTemplateKeyword;
4463   if (Left.isOneOf(tok::exclaim, tok::tilde))
4464     return false;
4465   if (Left.is(tok::at) &&
4466       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
4467                     tok::numeric_constant, tok::l_paren, tok::l_brace,
4468                     tok::kw_true, tok::kw_false)) {
4469     return false;
4470   }
4471   if (Left.is(tok::colon))
4472     return Left.isNot(TT_ObjCMethodExpr);
4473   if (Left.is(tok::coloncolon))
4474     return false;
4475   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
4476     if (Style.Language == FormatStyle::LK_TextProto ||
4477         (Style.Language == FormatStyle::LK_Proto &&
4478          (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
4479       // Format empty list as `<>`.
4480       if (Left.is(tok::less) && Right.is(tok::greater))
4481         return false;
4482       return !Style.Cpp11BracedListStyle;
4483     }
4484     // Don't attempt to format operator<(), as it is handled later.
4485     if (Right.isNot(TT_OverloadedOperatorLParen))
4486       return false;
4487   }
4488   if (Right.is(tok::ellipsis)) {
4489     return Left.Tok.isLiteral() || (Left.is(tok::identifier) && BeforeLeft &&
4490                                     BeforeLeft->is(tok::kw_case));
4491   }
4492   if (Left.is(tok::l_square) && Right.is(tok::amp))
4493     return Style.SpacesInSquareBrackets;
4494   if (Right.is(TT_PointerOrReference)) {
4495     if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
4496       if (!Left.MatchingParen)
4497         return true;
4498       FormatToken *TokenBeforeMatchingParen =
4499           Left.MatchingParen->getPreviousNonComment();
4500       if (!TokenBeforeMatchingParen || Left.isNot(TT_TypeDeclarationParen))
4501         return true;
4502     }
4503     // Add a space if the previous token is a pointer qualifier or the closing
4504     // parenthesis of __attribute__(()) expression and the style requires spaces
4505     // after pointer qualifiers.
4506     if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
4507          Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4508         (Left.is(TT_AttributeRParen) ||
4509          Left.canBePointerOrReferenceQualifier())) {
4510       return true;
4511     }
4512     if (Left.Tok.isLiteral())
4513       return true;
4514     // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
4515     if (Left.isTypeOrIdentifier(LangOpts) && Right.Next && Right.Next->Next &&
4516         Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
4517       return getTokenPointerOrReferenceAlignment(Right) !=
4518              FormatStyle::PAS_Left;
4519     }
4520     return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
4521            (getTokenPointerOrReferenceAlignment(Right) !=
4522                 FormatStyle::PAS_Left ||
4523             (Line.IsMultiVariableDeclStmt &&
4524              (Left.NestingLevel == 0 ||
4525               (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
4526   }
4527   if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
4528       (Left.isNot(TT_PointerOrReference) ||
4529        (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
4530         !Line.IsMultiVariableDeclStmt))) {
4531     return true;
4532   }
4533   if (Left.is(TT_PointerOrReference)) {
4534     // Add a space if the next token is a pointer qualifier and the style
4535     // requires spaces before pointer qualifiers.
4536     if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
4537          Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4538         Right.canBePointerOrReferenceQualifier()) {
4539       return true;
4540     }
4541     // & 1
4542     if (Right.Tok.isLiteral())
4543       return true;
4544     // & /* comment
4545     if (Right.is(TT_BlockComment))
4546       return true;
4547     // foo() -> const Bar * override/final
4548     // S::foo() & noexcept/requires
4549     if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final, tok::kw_noexcept,
4550                       TT_RequiresClause) &&
4551         Right.isNot(TT_StartOfName)) {
4552       return true;
4553     }
4554     // & {
4555     if (Right.is(tok::l_brace) && Right.is(BK_Block))
4556       return true;
4557     // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
4558     if (BeforeLeft && BeforeLeft->isTypeOrIdentifier(LangOpts) && Right.Next &&
4559         Right.Next->is(TT_RangeBasedForLoopColon)) {
4560       return getTokenPointerOrReferenceAlignment(Left) !=
4561              FormatStyle::PAS_Right;
4562     }
4563     if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
4564                       tok::l_paren)) {
4565       return false;
4566     }
4567     if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
4568       return false;
4569     // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
4570     // because it does not take into account nested scopes like lambdas.
4571     // In multi-variable declaration statements, attach */& to the variable
4572     // independently of the style. However, avoid doing it if we are in a nested
4573     // scope, e.g. lambda. We still need to special-case statements with
4574     // initializers.
4575     if (Line.IsMultiVariableDeclStmt &&
4576         (Left.NestingLevel == Line.First->NestingLevel ||
4577          ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
4578           startsWithInitStatement(Line)))) {
4579       return false;
4580     }
4581     if (!BeforeLeft)
4582       return false;
4583     if (BeforeLeft->is(tok::coloncolon)) {
4584       if (Left.isNot(tok::star))
4585         return false;
4586       assert(Style.PointerAlignment != FormatStyle::PAS_Right);
4587       if (!Right.startsSequence(tok::identifier, tok::r_paren))
4588         return true;
4589       assert(Right.Next);
4590       const auto *LParen = Right.Next->MatchingParen;
4591       return !LParen || LParen->isNot(TT_FunctionTypeLParen);
4592     }
4593     return !BeforeLeft->isOneOf(tok::l_paren, tok::l_square);
4594   }
4595   // Ensure right pointer alignment with ellipsis e.g. int *...P
4596   if (Left.is(tok::ellipsis) && BeforeLeft &&
4597       BeforeLeft->isPointerOrReference()) {
4598     return Style.PointerAlignment != FormatStyle::PAS_Right;
4599   }
4600 
4601   if (Right.is(tok::star) && Left.is(tok::l_paren))
4602     return false;
4603   if (Left.is(tok::star) && Right.isPointerOrReference())
4604     return false;
4605   if (Right.isPointerOrReference()) {
4606     const FormatToken *Previous = &Left;
4607     while (Previous && Previous->isNot(tok::kw_operator)) {
4608       if (Previous->is(tok::identifier) || Previous->isTypeName(LangOpts)) {
4609         Previous = Previous->getPreviousNonComment();
4610         continue;
4611       }
4612       if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
4613         Previous = Previous->MatchingParen->getPreviousNonComment();
4614         continue;
4615       }
4616       if (Previous->is(tok::coloncolon)) {
4617         Previous = Previous->getPreviousNonComment();
4618         continue;
4619       }
4620       break;
4621     }
4622     // Space between the type and the * in:
4623     //   operator void*()
4624     //   operator char*()
4625     //   operator void const*()
4626     //   operator void volatile*()
4627     //   operator /*comment*/ const char*()
4628     //   operator volatile /*comment*/ char*()
4629     //   operator Foo*()
4630     //   operator C<T>*()
4631     //   operator std::Foo*()
4632     //   operator C<T>::D<U>*()
4633     // dependent on PointerAlignment style.
4634     if (Previous) {
4635       if (Previous->endsSequence(tok::kw_operator))
4636         return Style.PointerAlignment != FormatStyle::PAS_Left;
4637       if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
4638         return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
4639                (Style.SpaceAroundPointerQualifiers ==
4640                 FormatStyle::SAPQ_After) ||
4641                (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
4642       }
4643     }
4644   }
4645   if (Style.isCSharp() && Left.is(Keywords.kw_is) && Right.is(tok::l_square))
4646     return true;
4647   const auto SpaceRequiredForArrayInitializerLSquare =
4648       [](const FormatToken &LSquareTok, const FormatStyle &Style) {
4649         return Style.SpacesInContainerLiterals ||
4650                (Style.isProto() && !Style.Cpp11BracedListStyle &&
4651                 LSquareTok.endsSequence(tok::l_square, tok::colon,
4652                                         TT_SelectorName));
4653       };
4654   if (Left.is(tok::l_square)) {
4655     return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
4656             SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
4657            (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
4658                          TT_LambdaLSquare) &&
4659             Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
4660   }
4661   if (Right.is(tok::r_square)) {
4662     return Right.MatchingParen &&
4663            ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
4664              SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
4665                                                      Style)) ||
4666             (Style.SpacesInSquareBrackets &&
4667              Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
4668                                           TT_StructuredBindingLSquare,
4669                                           TT_LambdaLSquare)));
4670   }
4671   if (Right.is(tok::l_square) &&
4672       !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4673                      TT_DesignatedInitializerLSquare,
4674                      TT_StructuredBindingLSquare, TT_AttributeSquare) &&
4675       !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
4676       !(Left.isNot(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
4677         Right.is(TT_ArraySubscriptLSquare))) {
4678     return false;
4679   }
4680   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
4681     return !Left.Children.empty(); // No spaces in "{}".
4682   if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
4683       (Right.is(tok::r_brace) && Right.MatchingParen &&
4684        Right.MatchingParen->isNot(BK_Block))) {
4685     return !Style.Cpp11BracedListStyle || Style.SpacesInParensOptions.Other;
4686   }
4687   if (Left.is(TT_BlockComment)) {
4688     // No whitespace in x(/*foo=*/1), except for JavaScript.
4689     return Style.isJavaScript() || !Left.TokenText.ends_with("=*/");
4690   }
4691 
4692   // Space between template and attribute.
4693   // e.g. template <typename T> [[nodiscard]] ...
4694   if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
4695     return true;
4696   // Space before parentheses common for all languages
4697   if (Right.is(tok::l_paren)) {
4698     if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
4699       return spaceRequiredBeforeParens(Right);
4700     if (Left.isOneOf(TT_RequiresClause,
4701                      TT_RequiresClauseInARequiresExpression)) {
4702       return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
4703              spaceRequiredBeforeParens(Right);
4704     }
4705     if (Left.is(TT_RequiresExpression)) {
4706       return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
4707              spaceRequiredBeforeParens(Right);
4708     }
4709     if (Left.is(TT_AttributeRParen) ||
4710         (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
4711       return true;
4712     }
4713     if (Left.is(TT_ForEachMacro)) {
4714       return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
4715              spaceRequiredBeforeParens(Right);
4716     }
4717     if (Left.is(TT_IfMacro)) {
4718       return Style.SpaceBeforeParensOptions.AfterIfMacros ||
4719              spaceRequiredBeforeParens(Right);
4720     }
4721     if (Style.SpaceBeforeParens == FormatStyle::SBPO_Custom &&
4722         Left.isOneOf(tok::kw_new, tok::kw_delete) &&
4723         Right.isNot(TT_OverloadedOperatorLParen) &&
4724         !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
4725       return Style.SpaceBeforeParensOptions.AfterPlacementOperator;
4726     }
4727     if (Line.Type == LT_ObjCDecl)
4728       return true;
4729     if (Left.is(tok::semi))
4730       return true;
4731     if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
4732                      tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
4733         Left.isIf(Line.Type != LT_PreprocessorDirective) ||
4734         Right.is(TT_ConditionLParen)) {
4735       return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4736              spaceRequiredBeforeParens(Right);
4737     }
4738 
4739     // TODO add Operator overloading specific Options to
4740     // SpaceBeforeParensOptions
4741     if (Right.is(TT_OverloadedOperatorLParen))
4742       return spaceRequiredBeforeParens(Right);
4743     // Function declaration or definition
4744     if (Line.MightBeFunctionDecl && Right.is(TT_FunctionDeclarationLParen)) {
4745       if (spaceRequiredBeforeParens(Right))
4746         return true;
4747       const auto &Options = Style.SpaceBeforeParensOptions;
4748       return Line.mightBeFunctionDefinition()
4749                  ? Options.AfterFunctionDefinitionName
4750                  : Options.AfterFunctionDeclarationName;
4751     }
4752     // Lambda
4753     if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
4754         Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
4755       return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4756              spaceRequiredBeforeParens(Right);
4757     }
4758     if (!BeforeLeft || !BeforeLeft->isOneOf(tok::period, tok::arrow)) {
4759       if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
4760         return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4761                spaceRequiredBeforeParens(Right);
4762       }
4763       if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
4764         return ((!Line.MightBeFunctionDecl || !BeforeLeft) &&
4765                 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4766                spaceRequiredBeforeParens(Right);
4767       }
4768 
4769       if (Left.is(tok::r_square) && Left.MatchingParen &&
4770           Left.MatchingParen->Previous &&
4771           Left.MatchingParen->Previous->is(tok::kw_delete)) {
4772         return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4773                spaceRequiredBeforeParens(Right);
4774       }
4775     }
4776     // Handle builtins like identifiers.
4777     if (Line.Type != LT_PreprocessorDirective &&
4778         (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
4779       return spaceRequiredBeforeParens(Right);
4780     }
4781     return false;
4782   }
4783   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
4784     return false;
4785   if (Right.is(TT_UnaryOperator)) {
4786     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
4787            (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
4788   }
4789   // No space between the variable name and the initializer list.
4790   // A a1{1};
4791   // Verilog doesn't have such syntax, but it has word operators that are C++
4792   // identifiers like `a inside {b, c}`. So the rule is not applicable.
4793   if (!Style.isVerilog() &&
4794       (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
4795                     tok::r_paren) ||
4796        Left.isTypeName(LangOpts)) &&
4797       Right.is(tok::l_brace) && Right.getNextNonComment() &&
4798       Right.isNot(BK_Block)) {
4799     return false;
4800   }
4801   if (Left.is(tok::period) || Right.is(tok::period))
4802     return false;
4803   // u#str, U#str, L#str, u8#str
4804   // uR#str, UR#str, LR#str, u8R#str
4805   if (Right.is(tok::hash) && Left.is(tok::identifier) &&
4806       (Left.TokenText == "L" || Left.TokenText == "u" ||
4807        Left.TokenText == "U" || Left.TokenText == "u8" ||
4808        Left.TokenText == "LR" || Left.TokenText == "uR" ||
4809        Left.TokenText == "UR" || Left.TokenText == "u8R")) {
4810     return false;
4811   }
4812   if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
4813       Left.MatchingParen->Previous &&
4814       (Left.MatchingParen->Previous->is(tok::period) ||
4815        Left.MatchingParen->Previous->is(tok::coloncolon))) {
4816     // Java call to generic function with explicit type:
4817     // A.<B<C<...>>>DoSomething();
4818     // A::<B<C<...>>>DoSomething();  // With a Java 8 method reference.
4819     return false;
4820   }
4821   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
4822     return false;
4823   if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
4824     // Objective-C dictionary literal -> no space after opening brace.
4825     return false;
4826   }
4827   if (Right.is(tok::r_brace) && Right.MatchingParen &&
4828       Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
4829     // Objective-C dictionary literal -> no space before closing brace.
4830     return false;
4831   }
4832   if (Right.is(TT_TrailingAnnotation) && Right.isOneOf(tok::amp, tok::ampamp) &&
4833       Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
4834       (!Right.Next || Right.Next->is(tok::semi))) {
4835     // Match const and volatile ref-qualifiers without any additional
4836     // qualifiers such as
4837     // void Fn() const &;
4838     return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4839   }
4840 
4841   return true;
4842 }
4843 
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Right) const4844 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
4845                                          const FormatToken &Right) const {
4846   const FormatToken &Left = *Right.Previous;
4847 
4848   // If the token is finalized don't touch it (as it could be in a
4849   // clang-format-off section).
4850   if (Left.Finalized)
4851     return Right.hasWhitespaceBefore();
4852 
4853   const bool IsVerilog = Style.isVerilog();
4854   assert(!IsVerilog || !IsCpp);
4855 
4856   // Never ever merge two words.
4857   if (Keywords.isWordLike(Right, IsVerilog) &&
4858       Keywords.isWordLike(Left, IsVerilog)) {
4859     return true;
4860   }
4861 
4862   // Leave a space between * and /* to avoid C4138 `comment end` found outside
4863   // of comment.
4864   if (Left.is(tok::star) && Right.is(tok::comment))
4865     return true;
4866 
4867   if (IsCpp) {
4868     if (Left.is(TT_OverloadedOperator) &&
4869         Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) {
4870       return true;
4871     }
4872     // Space between UDL and dot: auto b = 4s .count();
4873     if (Right.is(tok::period) && Left.is(tok::numeric_constant))
4874       return true;
4875     // Space between import <iostream>.
4876     // or import .....;
4877     if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
4878       return true;
4879     // Space between `module :` and `import :`.
4880     if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
4881         Right.is(TT_ModulePartitionColon)) {
4882       return true;
4883     }
4884     // No space between import foo:bar but keep a space between import :bar;
4885     if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
4886       return false;
4887     // No space between :bar;
4888     if (Left.is(TT_ModulePartitionColon) &&
4889         Right.isOneOf(tok::identifier, tok::kw_private)) {
4890       return false;
4891     }
4892     if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
4893         Line.First->is(Keywords.kw_import)) {
4894       return false;
4895     }
4896     // Space in __attribute__((attr)) ::type.
4897     if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
4898         Right.is(tok::coloncolon)) {
4899       return true;
4900     }
4901 
4902     if (Left.is(tok::kw_operator))
4903       return Right.is(tok::coloncolon);
4904     if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
4905         !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
4906       return true;
4907     }
4908     if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
4909         Right.is(TT_TemplateOpener)) {
4910       return true;
4911     }
4912     // C++ Core Guidelines suppression tag, e.g. `[[suppress(type.5)]]`.
4913     if (Left.is(tok::identifier) && Right.is(tok::numeric_constant))
4914       return Right.TokenText[0] != '.';
4915     // `Left` is a keyword (including C++ alternative operator) or identifier.
4916     if (Left.Tok.getIdentifierInfo() && Right.Tok.isLiteral())
4917       return true;
4918   } else if (Style.isProto()) {
4919     if (Right.is(tok::period) &&
4920         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
4921                      Keywords.kw_repeated, Keywords.kw_extend)) {
4922       return true;
4923     }
4924     if (Right.is(tok::l_paren) &&
4925         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
4926       return true;
4927     }
4928     if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
4929       return true;
4930     // Slashes occur in text protocol extension syntax: [type/type] { ... }.
4931     if (Left.is(tok::slash) || Right.is(tok::slash))
4932       return false;
4933     if (Left.MatchingParen &&
4934         Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
4935         Right.isOneOf(tok::l_brace, tok::less)) {
4936       return !Style.Cpp11BracedListStyle;
4937     }
4938     // A percent is probably part of a formatting specification, such as %lld.
4939     if (Left.is(tok::percent))
4940       return false;
4941     // Preserve the existence of a space before a percent for cases like 0x%04x
4942     // and "%d %d"
4943     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
4944       return Right.hasWhitespaceBefore();
4945   } else if (Style.isJson()) {
4946     if (Right.is(tok::colon) && Left.is(tok::string_literal))
4947       return Style.SpaceBeforeJsonColon;
4948   } else if (Style.isCSharp()) {
4949     // Require spaces around '{' and  before '}' unless they appear in
4950     // interpolated strings. Interpolated strings are merged into a single token
4951     // so cannot have spaces inserted by this function.
4952 
4953     // No space between 'this' and '['
4954     if (Left.is(tok::kw_this) && Right.is(tok::l_square))
4955       return false;
4956 
4957     // No space between 'new' and '('
4958     if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
4959       return false;
4960 
4961     // Space before { (including space within '{ {').
4962     if (Right.is(tok::l_brace))
4963       return true;
4964 
4965     // Spaces inside braces.
4966     if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
4967       return true;
4968 
4969     if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
4970       return true;
4971 
4972     // Spaces around '=>'.
4973     if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
4974       return true;
4975 
4976     // No spaces around attribute target colons
4977     if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
4978       return false;
4979 
4980     // space between type and variable e.g. Dictionary<string,string> foo;
4981     if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
4982       return true;
4983 
4984     // spaces inside square brackets.
4985     if (Left.is(tok::l_square) || Right.is(tok::r_square))
4986       return Style.SpacesInSquareBrackets;
4987 
4988     // No space before ? in nullable types.
4989     if (Right.is(TT_CSharpNullable))
4990       return false;
4991 
4992     // No space before null forgiving '!'.
4993     if (Right.is(TT_NonNullAssertion))
4994       return false;
4995 
4996     // No space between consecutive commas '[,,]'.
4997     if (Left.is(tok::comma) && Right.is(tok::comma))
4998       return false;
4999 
5000     // space after var in `var (key, value)`
5001     if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
5002       return true;
5003 
5004     // space between keywords and paren e.g. "using ("
5005     if (Right.is(tok::l_paren)) {
5006       if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
5007                        Keywords.kw_lock)) {
5008         return Style.SpaceBeforeParensOptions.AfterControlStatements ||
5009                spaceRequiredBeforeParens(Right);
5010       }
5011     }
5012 
5013     // space between method modifier and opening parenthesis of a tuple return
5014     // type
5015     if ((Left.isAccessSpecifierKeyword() ||
5016          Left.isOneOf(tok::kw_virtual, tok::kw_extern, tok::kw_static,
5017                       Keywords.kw_internal, Keywords.kw_abstract,
5018                       Keywords.kw_sealed, Keywords.kw_override,
5019                       Keywords.kw_async, Keywords.kw_unsafe)) &&
5020         Right.is(tok::l_paren)) {
5021       return true;
5022     }
5023   } else if (Style.isJavaScript()) {
5024     if (Left.is(TT_FatArrow))
5025       return true;
5026     // for await ( ...
5027     if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
5028         Left.Previous->is(tok::kw_for)) {
5029       return true;
5030     }
5031     if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
5032         Right.MatchingParen) {
5033       const FormatToken *Next = Right.MatchingParen->getNextNonComment();
5034       // An async arrow function, for example: `x = async () => foo();`,
5035       // as opposed to calling a function called async: `x = async();`
5036       if (Next && Next->is(TT_FatArrow))
5037         return true;
5038     }
5039     if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
5040         (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
5041       return false;
5042     }
5043     // In tagged template literals ("html`bar baz`"), there is no space between
5044     // the tag identifier and the template string.
5045     if (Keywords.isJavaScriptIdentifier(Left,
5046                                         /* AcceptIdentifierName= */ false) &&
5047         Right.is(TT_TemplateString)) {
5048       return false;
5049     }
5050     if (Right.is(tok::star) &&
5051         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
5052       return false;
5053     }
5054     if (Right.isOneOf(tok::l_brace, tok::l_square) &&
5055         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
5056                      Keywords.kw_extends, Keywords.kw_implements)) {
5057       return true;
5058     }
5059     if (Right.is(tok::l_paren)) {
5060       // JS methods can use some keywords as names (e.g. `delete()`).
5061       if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
5062         return false;
5063       // Valid JS method names can include keywords, e.g. `foo.delete()` or
5064       // `bar.instanceof()`. Recognize call positions by preceding period.
5065       if (Left.Previous && Left.Previous->is(tok::period) &&
5066           Left.Tok.getIdentifierInfo()) {
5067         return false;
5068       }
5069       // Additional unary JavaScript operators that need a space after.
5070       if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
5071                        tok::kw_void)) {
5072         return true;
5073       }
5074     }
5075     // `foo as const;` casts into a const type.
5076     if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
5077       return false;
5078     if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
5079                       tok::kw_const) ||
5080          // "of" is only a keyword if it appears after another identifier
5081          // (e.g. as "const x of y" in a for loop), or after a destructuring
5082          // operation (const [x, y] of z, const {a, b} of c).
5083          (Left.is(Keywords.kw_of) && Left.Previous &&
5084           (Left.Previous->is(tok::identifier) ||
5085            Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
5086         (!Left.Previous || Left.Previous->isNot(tok::period))) {
5087       return true;
5088     }
5089     if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
5090         Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
5091       return false;
5092     }
5093     if (Left.is(Keywords.kw_as) &&
5094         Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
5095       return true;
5096     }
5097     if (Left.is(tok::kw_default) && Left.Previous &&
5098         Left.Previous->is(tok::kw_export)) {
5099       return true;
5100     }
5101     if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
5102       return true;
5103     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
5104       return false;
5105     if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
5106       return false;
5107     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
5108         Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
5109       return false;
5110     }
5111     if (Left.is(tok::ellipsis))
5112       return false;
5113     if (Left.is(TT_TemplateCloser) &&
5114         !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
5115                        Keywords.kw_implements, Keywords.kw_extends)) {
5116       // Type assertions ('<type>expr') are not followed by whitespace. Other
5117       // locations that should have whitespace following are identified by the
5118       // above set of follower tokens.
5119       return false;
5120     }
5121     if (Right.is(TT_NonNullAssertion))
5122       return false;
5123     if (Left.is(TT_NonNullAssertion) &&
5124         Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
5125       return true; // "x! as string", "x! in y"
5126     }
5127   } else if (Style.Language == FormatStyle::LK_Java) {
5128     if (Left.is(TT_CaseLabelArrow) || Right.is(TT_CaseLabelArrow))
5129       return true;
5130     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
5131       return true;
5132     // spaces inside square brackets.
5133     if (Left.is(tok::l_square) || Right.is(tok::r_square))
5134       return Style.SpacesInSquareBrackets;
5135 
5136     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
5137       return Style.SpaceBeforeParensOptions.AfterControlStatements ||
5138              spaceRequiredBeforeParens(Right);
5139     }
5140     if ((Left.isAccessSpecifierKeyword() ||
5141          Left.isOneOf(tok::kw_static, Keywords.kw_final, Keywords.kw_abstract,
5142                       Keywords.kw_native)) &&
5143         Right.is(TT_TemplateOpener)) {
5144       return true;
5145     }
5146   } else if (IsVerilog) {
5147     // An escaped identifier ends with whitespace.
5148     if (Left.is(tok::identifier) && Left.TokenText[0] == '\\')
5149       return true;
5150     // Add space between things in a primitive's state table unless in a
5151     // transition like `(0?)`.
5152     if ((Left.is(TT_VerilogTableItem) &&
5153          !Right.isOneOf(tok::r_paren, tok::semi)) ||
5154         (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
5155       const FormatToken *Next = Right.getNextNonComment();
5156       return !(Next && Next->is(tok::r_paren));
5157     }
5158     // Don't add space within a delay like `#0`.
5159     if (Left.isNot(TT_BinaryOperator) &&
5160         Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
5161       return false;
5162     }
5163     // Add space after a delay.
5164     if (Right.isNot(tok::semi) &&
5165         (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
5166          Left.endsSequence(tok::numeric_constant,
5167                            Keywords.kw_verilogHashHash) ||
5168          (Left.is(tok::r_paren) && Left.MatchingParen &&
5169           Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
5170       return true;
5171     }
5172     // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
5173     // literal like `'{}`.
5174     if (Left.is(Keywords.kw_apostrophe) ||
5175         (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
5176       return false;
5177     }
5178     // Add spaces around the implication operator `->`.
5179     if (Left.is(tok::arrow) || Right.is(tok::arrow))
5180       return true;
5181     // Don't add spaces between two at signs. Like in a coverage event.
5182     // Don't add spaces between at and a sensitivity list like
5183     // `@(posedge clk)`.
5184     if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at))
5185       return false;
5186     // Add space between the type name and dimension like `logic [1:0]`.
5187     if (Right.is(tok::l_square) &&
5188         Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
5189       return true;
5190     }
5191     // In a tagged union expression, there should be a space after the tag.
5192     if (Right.isOneOf(tok::period, Keywords.kw_apostrophe) &&
5193         Keywords.isVerilogIdentifier(Left) && Left.getPreviousNonComment() &&
5194         Left.getPreviousNonComment()->is(Keywords.kw_tagged)) {
5195       return true;
5196     }
5197     // Don't add spaces between a casting type and the quote or repetition count
5198     // and the brace. The case of tagged union expressions is handled by the
5199     // previous rule.
5200     if ((Right.is(Keywords.kw_apostrophe) ||
5201          (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
5202         !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
5203           Keywords.isVerilogWordOperator(Left)) &&
5204         (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
5205                       tok::numeric_constant) ||
5206          Keywords.isWordLike(Left))) {
5207       return false;
5208     }
5209     // Don't add spaces in imports like `import foo::*;`.
5210     if ((Right.is(tok::star) && Left.is(tok::coloncolon)) ||
5211         (Left.is(tok::star) && Right.is(tok::semi))) {
5212       return false;
5213     }
5214     // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
5215     if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
5216       return true;
5217     // Add space before drive strength like in `wire (strong1, pull0)`.
5218     if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength))
5219       return true;
5220     // Don't add space in a streaming concatenation like `{>>{j}}`.
5221     if ((Left.is(tok::l_brace) &&
5222          Right.isOneOf(tok::lessless, tok::greatergreater)) ||
5223         (Left.endsSequence(tok::lessless, tok::l_brace) ||
5224          Left.endsSequence(tok::greatergreater, tok::l_brace))) {
5225       return false;
5226     }
5227   } else if (Style.isTableGen()) {
5228     // Avoid to connect [ and {. [{ is start token of multiline string.
5229     if (Left.is(tok::l_square) && Right.is(tok::l_brace))
5230       return true;
5231     if (Left.is(tok::r_brace) && Right.is(tok::r_square))
5232       return true;
5233     // Do not insert around colon in DAGArg and cond operator.
5234     if (Right.isOneOf(TT_TableGenDAGArgListColon,
5235                       TT_TableGenDAGArgListColonToAlign) ||
5236         Left.isOneOf(TT_TableGenDAGArgListColon,
5237                      TT_TableGenDAGArgListColonToAlign)) {
5238       return false;
5239     }
5240     if (Right.is(TT_TableGenCondOperatorColon))
5241       return false;
5242     if (Left.isOneOf(TT_TableGenDAGArgOperatorID,
5243                      TT_TableGenDAGArgOperatorToBreak) &&
5244         Right.isNot(TT_TableGenDAGArgCloser)) {
5245       return true;
5246     }
5247     // Do not insert bang operators and consequent openers.
5248     if (Right.isOneOf(tok::l_paren, tok::less) &&
5249         Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
5250       return false;
5251     }
5252     // Trailing paste requires space before '{' or ':', the case in name values.
5253     // Not before ';', the case in normal values.
5254     if (Left.is(TT_TableGenTrailingPasteOperator) &&
5255         Right.isOneOf(tok::l_brace, tok::colon)) {
5256       return true;
5257     }
5258     // Otherwise paste operator does not prefer space around.
5259     if (Left.is(tok::hash) || Right.is(tok::hash))
5260       return false;
5261     // Sure not to connect after defining keywords.
5262     if (Keywords.isTableGenDefinition(Left))
5263       return true;
5264   }
5265 
5266   if (Left.is(TT_ImplicitStringLiteral))
5267     return Right.hasWhitespaceBefore();
5268   if (Line.Type == LT_ObjCMethodDecl) {
5269     if (Left.is(TT_ObjCMethodSpecifier))
5270       return true;
5271     if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) &&
5272         canBeObjCSelectorComponent(Right)) {
5273       // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
5274       // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
5275       // method declaration.
5276       return false;
5277     }
5278   }
5279   if (Line.Type == LT_ObjCProperty &&
5280       (Right.is(tok::equal) || Left.is(tok::equal))) {
5281     return false;
5282   }
5283 
5284   if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
5285       Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
5286     return true;
5287   }
5288   if (Left.is(tok::comma) && Right.isNot(TT_OverloadedOperatorLParen) &&
5289       // In an unexpanded macro call we only find the parentheses and commas
5290       // in a line; the commas and closing parenthesis do not require a space.
5291       (Left.Children.empty() || !Left.MacroParent)) {
5292     return true;
5293   }
5294   if (Right.is(tok::comma))
5295     return false;
5296   if (Right.is(TT_ObjCBlockLParen))
5297     return true;
5298   if (Right.is(TT_CtorInitializerColon))
5299     return Style.SpaceBeforeCtorInitializerColon;
5300   if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
5301     return false;
5302   if (Right.is(TT_RangeBasedForLoopColon) &&
5303       !Style.SpaceBeforeRangeBasedForLoopColon) {
5304     return false;
5305   }
5306   if (Left.is(TT_BitFieldColon)) {
5307     return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
5308            Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
5309   }
5310   if (Right.is(tok::colon)) {
5311     if (Right.is(TT_CaseLabelColon))
5312       return Style.SpaceBeforeCaseColon;
5313     if (Right.is(TT_GotoLabelColon))
5314       return false;
5315     // `private:` and `public:`.
5316     if (!Right.getNextNonComment())
5317       return false;
5318     if (Right.is(TT_ObjCMethodExpr))
5319       return false;
5320     if (Left.is(tok::question))
5321       return false;
5322     if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
5323       return false;
5324     if (Right.is(TT_DictLiteral))
5325       return Style.SpacesInContainerLiterals;
5326     if (Right.is(TT_AttributeColon))
5327       return false;
5328     if (Right.is(TT_CSharpNamedArgumentColon))
5329       return false;
5330     if (Right.is(TT_GenericSelectionColon))
5331       return false;
5332     if (Right.is(TT_BitFieldColon)) {
5333       return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
5334              Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
5335     }
5336     return true;
5337   }
5338   // Do not merge "- -" into "--".
5339   if ((Left.isOneOf(tok::minus, tok::minusminus) &&
5340        Right.isOneOf(tok::minus, tok::minusminus)) ||
5341       (Left.isOneOf(tok::plus, tok::plusplus) &&
5342        Right.isOneOf(tok::plus, tok::plusplus))) {
5343     return true;
5344   }
5345   if (Left.is(TT_UnaryOperator)) {
5346     // Lambda captures allow for a lone &, so "&]" needs to be properly
5347     // handled.
5348     if (Left.is(tok::amp) && Right.is(tok::r_square))
5349       return Style.SpacesInSquareBrackets;
5350     return Style.SpaceAfterLogicalNot && Left.is(tok::exclaim);
5351   }
5352 
5353   // If the next token is a binary operator or a selector name, we have
5354   // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
5355   if (Left.is(TT_CastRParen)) {
5356     return Style.SpaceAfterCStyleCast ||
5357            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
5358   }
5359 
5360   auto ShouldAddSpacesInAngles = [this, &Right]() {
5361     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
5362       return true;
5363     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
5364       return Right.hasWhitespaceBefore();
5365     return false;
5366   };
5367 
5368   if (Left.is(tok::greater) && Right.is(tok::greater)) {
5369     if (Style.Language == FormatStyle::LK_TextProto ||
5370         (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
5371       return !Style.Cpp11BracedListStyle;
5372     }
5373     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
5374            ((Style.Standard < FormatStyle::LS_Cpp11) ||
5375             ShouldAddSpacesInAngles());
5376   }
5377   if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
5378       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
5379       (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
5380     return false;
5381   }
5382   if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
5383       Right.getPrecedence() == prec::Assignment) {
5384     return false;
5385   }
5386   if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
5387       (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
5388     return false;
5389   }
5390   if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
5391     // Generally don't remove existing spaces between an identifier and "::".
5392     // The identifier might actually be a macro name such as ALWAYS_INLINE. If
5393     // this turns out to be too lenient, add analysis of the identifier itself.
5394     return Right.hasWhitespaceBefore();
5395   }
5396   if (Right.is(tok::coloncolon) &&
5397       !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
5398     // Put a space between < and :: in vector< ::std::string >
5399     return (Left.is(TT_TemplateOpener) &&
5400             ((Style.Standard < FormatStyle::LS_Cpp11) ||
5401              ShouldAddSpacesInAngles())) ||
5402            !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
5403                           tok::kw___super, TT_TemplateOpener,
5404                           TT_TemplateCloser)) ||
5405            (Left.is(tok::l_paren) && Style.SpacesInParensOptions.Other);
5406   }
5407   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
5408     return ShouldAddSpacesInAngles();
5409   // Space before TT_StructuredBindingLSquare.
5410   if (Right.is(TT_StructuredBindingLSquare)) {
5411     return !Left.isOneOf(tok::amp, tok::ampamp) ||
5412            getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
5413   }
5414   // Space before & or && following a TT_StructuredBindingLSquare.
5415   if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
5416       Right.isOneOf(tok::amp, tok::ampamp)) {
5417     return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
5418   }
5419   if ((Right.is(TT_BinaryOperator) && Left.isNot(tok::l_paren)) ||
5420       (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
5421        Right.isNot(tok::r_paren))) {
5422     return true;
5423   }
5424   if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
5425       Left.MatchingParen &&
5426       Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
5427     return false;
5428   }
5429   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
5430       Line.Type == LT_ImportStatement) {
5431     return true;
5432   }
5433   if (Right.is(TT_TrailingUnaryOperator))
5434     return false;
5435   if (Left.is(TT_RegexLiteral))
5436     return false;
5437   return spaceRequiredBetween(Line, Left, Right);
5438 }
5439 
5440 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)5441 static bool isAllmanBrace(const FormatToken &Tok) {
5442   return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
5443          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
5444 }
5445 
5446 // Returns 'true' if 'Tok' is a function argument.
IsFunctionArgument(const FormatToken & Tok)5447 static bool IsFunctionArgument(const FormatToken &Tok) {
5448   return Tok.MatchingParen && Tok.MatchingParen->Next &&
5449          Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
5450 }
5451 
5452 static bool
isItAnEmptyLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)5453 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
5454                          FormatStyle::ShortLambdaStyle ShortLambdaOption) {
5455   return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
5456 }
5457 
isAllmanLambdaBrace(const FormatToken & Tok)5458 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
5459   return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
5460          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
5461 }
5462 
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const5463 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
5464                                      const FormatToken &Right) const {
5465   const FormatToken &Left = *Right.Previous;
5466   if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
5467     return true;
5468 
5469   if (Style.BreakFunctionDefinitionParameters && Line.MightBeFunctionDecl &&
5470       Line.mightBeFunctionDefinition() && Left.MightBeFunctionDeclParen &&
5471       Left.ParameterCount > 0) {
5472     return true;
5473   }
5474 
5475   const auto *BeforeLeft = Left.Previous;
5476   const auto *AfterRight = Right.Next;
5477 
5478   if (Style.isCSharp()) {
5479     if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
5480         Style.BraceWrapping.AfterFunction) {
5481       return true;
5482     }
5483     if (Right.is(TT_CSharpNamedArgumentColon) ||
5484         Left.is(TT_CSharpNamedArgumentColon)) {
5485       return false;
5486     }
5487     if (Right.is(TT_CSharpGenericTypeConstraint))
5488       return true;
5489     if (AfterRight && AfterRight->is(TT_FatArrow) &&
5490         (Right.is(tok::numeric_constant) ||
5491          (Right.is(tok::identifier) && Right.TokenText == "_"))) {
5492       return true;
5493     }
5494 
5495     // Break after C# [...] and before public/protected/private/internal.
5496     if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
5497         (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
5498          Right.is(Keywords.kw_internal))) {
5499       return true;
5500     }
5501     // Break between ] and [ but only when there are really 2 attributes.
5502     if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
5503         Left.is(tok::r_square) && Right.is(tok::l_square)) {
5504       return true;
5505     }
5506   } else if (Style.isJavaScript()) {
5507     // FIXME: This might apply to other languages and token kinds.
5508     if (Right.is(tok::string_literal) && Left.is(tok::plus) && BeforeLeft &&
5509         BeforeLeft->is(tok::string_literal)) {
5510       return true;
5511     }
5512     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
5513         BeforeLeft && BeforeLeft->is(tok::equal) &&
5514         Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
5515                             tok::kw_const) &&
5516         // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
5517         // above.
5518         !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
5519       // Object literals on the top level of a file are treated as "enum-style".
5520       // Each key/value pair is put on a separate line, instead of bin-packing.
5521       return true;
5522     }
5523     if (Left.is(tok::l_brace) && Line.Level == 0 &&
5524         (Line.startsWith(tok::kw_enum) ||
5525          Line.startsWith(tok::kw_const, tok::kw_enum) ||
5526          Line.startsWith(tok::kw_export, tok::kw_enum) ||
5527          Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
5528       // JavaScript top-level enum key/value pairs are put on separate lines
5529       // instead of bin-packing.
5530       return true;
5531     }
5532     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && BeforeLeft &&
5533         BeforeLeft->is(TT_FatArrow)) {
5534       // JS arrow function (=> {...}).
5535       switch (Style.AllowShortLambdasOnASingleLine) {
5536       case FormatStyle::SLS_All:
5537         return false;
5538       case FormatStyle::SLS_None:
5539         return true;
5540       case FormatStyle::SLS_Empty:
5541         return !Left.Children.empty();
5542       case FormatStyle::SLS_Inline:
5543         // allow one-lining inline (e.g. in function call args) and empty arrow
5544         // functions.
5545         return (Left.NestingLevel == 0 && Line.Level == 0) &&
5546                !Left.Children.empty();
5547       }
5548       llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
5549     }
5550 
5551     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
5552         !Left.Children.empty()) {
5553       // Support AllowShortFunctionsOnASingleLine for JavaScript.
5554       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
5555              Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
5556              (Left.NestingLevel == 0 && Line.Level == 0 &&
5557               Style.AllowShortFunctionsOnASingleLine &
5558                   FormatStyle::SFS_InlineOnly);
5559     }
5560   } else if (Style.Language == FormatStyle::LK_Java) {
5561     if (Right.is(tok::plus) && Left.is(tok::string_literal) && AfterRight &&
5562         AfterRight->is(tok::string_literal)) {
5563       return true;
5564     }
5565   } else if (Style.isVerilog()) {
5566     // Break between assignments.
5567     if (Left.is(TT_VerilogAssignComma))
5568       return true;
5569     // Break between ports of different types.
5570     if (Left.is(TT_VerilogTypeComma))
5571       return true;
5572     // Break between ports in a module instantiation and after the parameter
5573     // list.
5574     if (Style.VerilogBreakBetweenInstancePorts &&
5575         (Left.is(TT_VerilogInstancePortComma) ||
5576          (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) &&
5577           Left.MatchingParen &&
5578           Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) {
5579       return true;
5580     }
5581     // Break after labels. In Verilog labels don't have the 'case' keyword, so
5582     // it is hard to identify them in UnwrappedLineParser.
5583     if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
5584       return true;
5585   } else if (Style.BreakAdjacentStringLiterals &&
5586              (IsCpp || Style.isProto() ||
5587               Style.Language == FormatStyle::LK_TableGen)) {
5588     if (Left.isStringLiteral() && Right.isStringLiteral())
5589       return true;
5590   }
5591 
5592   // Basic JSON newline processing.
5593   if (Style.isJson()) {
5594     // Always break after a JSON record opener.
5595     // {
5596     // }
5597     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
5598       return true;
5599     // Always break after a JSON array opener based on BreakArrays.
5600     if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
5601          Right.isNot(tok::r_square)) ||
5602         Left.is(tok::comma)) {
5603       if (Right.is(tok::l_brace))
5604         return true;
5605       // scan to the right if an we see an object or an array inside
5606       // then break.
5607       for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
5608         if (Tok->isOneOf(tok::l_brace, tok::l_square))
5609           return true;
5610         if (Tok->isOneOf(tok::r_brace, tok::r_square))
5611           break;
5612       }
5613       return Style.BreakArrays;
5614     }
5615   } else if (Style.isTableGen()) {
5616     // Break the comma in side cond operators.
5617     // !cond(case1:1,
5618     //       case2:0);
5619     if (Left.is(TT_TableGenCondOperatorComma))
5620       return true;
5621     if (Left.is(TT_TableGenDAGArgOperatorToBreak) &&
5622         Right.isNot(TT_TableGenDAGArgCloser)) {
5623       return true;
5624     }
5625     if (Left.is(TT_TableGenDAGArgListCommaToBreak))
5626       return true;
5627     if (Right.is(TT_TableGenDAGArgCloser) && Right.MatchingParen &&
5628         Right.MatchingParen->is(TT_TableGenDAGArgOpenerToBreak) &&
5629         &Left != Right.MatchingParen->Next) {
5630       // Check to avoid empty DAGArg such as (ins).
5631       return Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll;
5632     }
5633   }
5634 
5635   if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
5636       Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
5637     return true;
5638   }
5639 
5640   // If the last token before a '}', ']', or ')' is a comma or a trailing
5641   // comment, the intention is to insert a line break after it in order to make
5642   // shuffling around entries easier. Import statements, especially in
5643   // JavaScript, can be an exception to this rule.
5644   if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
5645     const FormatToken *BeforeClosingBrace = nullptr;
5646     if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
5647          (Style.isJavaScript() && Left.is(tok::l_paren))) &&
5648         Left.isNot(BK_Block) && Left.MatchingParen) {
5649       BeforeClosingBrace = Left.MatchingParen->Previous;
5650     } else if (Right.MatchingParen &&
5651                (Right.MatchingParen->isOneOf(tok::l_brace,
5652                                              TT_ArrayInitializerLSquare) ||
5653                 (Style.isJavaScript() &&
5654                  Right.MatchingParen->is(tok::l_paren)))) {
5655       BeforeClosingBrace = &Left;
5656     }
5657     if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
5658                                BeforeClosingBrace->isTrailingComment())) {
5659       return true;
5660     }
5661   }
5662 
5663   if (Right.is(tok::comment)) {
5664     return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
5665            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
5666   }
5667   if (Left.isTrailingComment())
5668     return true;
5669   if (Left.IsUnterminatedLiteral)
5670     return true;
5671 
5672   if (BeforeLeft && BeforeLeft->is(tok::lessless) &&
5673       Left.is(tok::string_literal) && Right.is(tok::lessless) && AfterRight &&
5674       AfterRight->is(tok::string_literal)) {
5675     return Right.NewlinesBefore > 0;
5676   }
5677 
5678   if (Right.is(TT_RequiresClause)) {
5679     switch (Style.RequiresClausePosition) {
5680     case FormatStyle::RCPS_OwnLine:
5681     case FormatStyle::RCPS_WithFollowing:
5682       return true;
5683     default:
5684       break;
5685     }
5686   }
5687   // Can break after template<> declaration
5688   if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
5689       Left.MatchingParen->NestingLevel == 0) {
5690     // Put concepts on the next line e.g.
5691     // template<typename T>
5692     // concept ...
5693     if (Right.is(tok::kw_concept))
5694       return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
5695     return Style.BreakTemplateDeclarations == FormatStyle::BTDS_Yes ||
5696            (Style.BreakTemplateDeclarations == FormatStyle::BTDS_Leave &&
5697             Right.NewlinesBefore > 0);
5698   }
5699   if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
5700     switch (Style.RequiresClausePosition) {
5701     case FormatStyle::RCPS_OwnLine:
5702     case FormatStyle::RCPS_WithPreceding:
5703       return true;
5704     default:
5705       break;
5706     }
5707   }
5708   if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
5709     if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
5710         (Left.is(TT_CtorInitializerComma) ||
5711          Right.is(TT_CtorInitializerColon))) {
5712       return true;
5713     }
5714 
5715     if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5716         Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
5717       return true;
5718     }
5719   }
5720   if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
5721       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
5722       Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
5723     return true;
5724   }
5725   if (Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly) {
5726     if ((Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon ||
5727          Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) &&
5728         Right.is(TT_CtorInitializerColon)) {
5729       return true;
5730     }
5731 
5732     if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5733         Left.is(TT_CtorInitializerColon)) {
5734       return true;
5735     }
5736   }
5737   // Break only if we have multiple inheritance.
5738   if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
5739       Right.is(TT_InheritanceComma)) {
5740     return true;
5741   }
5742   if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
5743       Left.is(TT_InheritanceComma)) {
5744     return true;
5745   }
5746   if (Right.is(tok::string_literal) && Right.TokenText.starts_with("R\"")) {
5747     // Multiline raw string literals are special wrt. line breaks. The author
5748     // has made a deliberate choice and might have aligned the contents of the
5749     // string literal accordingly. Thus, we try keep existing line breaks.
5750     return Right.IsMultiline && Right.NewlinesBefore > 0;
5751   }
5752   if ((Left.is(tok::l_brace) ||
5753        (Left.is(tok::less) && BeforeLeft && BeforeLeft->is(tok::equal))) &&
5754       Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
5755     // Don't put enums or option definitions onto single lines in protocol
5756     // buffers.
5757     return true;
5758   }
5759   if (Right.is(TT_InlineASMBrace))
5760     return Right.HasUnescapedNewline;
5761 
5762   if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
5763     auto *FirstNonComment = Line.getFirstNonComment();
5764     bool AccessSpecifier =
5765         FirstNonComment && (FirstNonComment->is(Keywords.kw_internal) ||
5766                             FirstNonComment->isAccessSpecifierKeyword());
5767 
5768     if (Style.BraceWrapping.AfterEnum) {
5769       if (Line.startsWith(tok::kw_enum) ||
5770           Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
5771         return true;
5772       }
5773       // Ensure BraceWrapping for `public enum A {`.
5774       if (AccessSpecifier && FirstNonComment->Next &&
5775           FirstNonComment->Next->is(tok::kw_enum)) {
5776         return true;
5777       }
5778     }
5779 
5780     // Ensure BraceWrapping for `public interface A {`.
5781     if (Style.BraceWrapping.AfterClass &&
5782         ((AccessSpecifier && FirstNonComment->Next &&
5783           FirstNonComment->Next->is(Keywords.kw_interface)) ||
5784          Line.startsWith(Keywords.kw_interface))) {
5785       return true;
5786     }
5787 
5788     // Don't attempt to interpret struct return types as structs.
5789     if (Right.isNot(TT_FunctionLBrace)) {
5790       return (Line.startsWith(tok::kw_class) &&
5791               Style.BraceWrapping.AfterClass) ||
5792              (Line.startsWith(tok::kw_struct) &&
5793               Style.BraceWrapping.AfterStruct);
5794     }
5795   }
5796 
5797   if (Left.is(TT_ObjCBlockLBrace) &&
5798       Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
5799     return true;
5800   }
5801 
5802   // Ensure wrapping after __attribute__((XX)) and @interface etc.
5803   if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
5804       Right.is(TT_ObjCDecl)) {
5805     return true;
5806   }
5807 
5808   if (Left.is(TT_LambdaLBrace)) {
5809     if (IsFunctionArgument(Left) &&
5810         Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
5811       return false;
5812     }
5813 
5814     if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
5815         Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
5816         (!Left.Children.empty() &&
5817          Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
5818       return true;
5819     }
5820   }
5821 
5822   if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
5823       (Left.isPointerOrReference() || Left.is(TT_TemplateCloser))) {
5824     return true;
5825   }
5826 
5827   // Put multiple Java annotation on a new line.
5828   if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
5829       Left.is(TT_LeadingJavaAnnotation) &&
5830       Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
5831       (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
5832     return true;
5833   }
5834 
5835   if (Right.is(TT_ProtoExtensionLSquare))
5836     return true;
5837 
5838   // In text proto instances if a submessage contains at least 2 entries and at
5839   // least one of them is a submessage, like A { ... B { ... } ... },
5840   // put all of the entries of A on separate lines by forcing the selector of
5841   // the submessage B to be put on a newline.
5842   //
5843   // Example: these can stay on one line:
5844   // a { scalar_1: 1 scalar_2: 2 }
5845   // a { b { key: value } }
5846   //
5847   // and these entries need to be on a new line even if putting them all in one
5848   // line is under the column limit:
5849   // a {
5850   //   scalar: 1
5851   //   b { key: value }
5852   // }
5853   //
5854   // We enforce this by breaking before a submessage field that has previous
5855   // siblings, *and* breaking before a field that follows a submessage field.
5856   //
5857   // Be careful to exclude the case  [proto.ext] { ... } since the `]` is
5858   // the TT_SelectorName there, but we don't want to break inside the brackets.
5859   //
5860   // Another edge case is @submessage { key: value }, which is a common
5861   // substitution placeholder. In this case we want to keep `@` and `submessage`
5862   // together.
5863   //
5864   // We ensure elsewhere that extensions are always on their own line.
5865   if (Style.isProto() && Right.is(TT_SelectorName) &&
5866       Right.isNot(tok::r_square) && AfterRight) {
5867     // Keep `@submessage` together in:
5868     // @submessage { key: value }
5869     if (Left.is(tok::at))
5870       return false;
5871     // Look for the scope opener after selector in cases like:
5872     // selector { ...
5873     // selector: { ...
5874     // selector: @base { ...
5875     const auto *LBrace = AfterRight;
5876     if (LBrace && LBrace->is(tok::colon)) {
5877       LBrace = LBrace->Next;
5878       if (LBrace && LBrace->is(tok::at)) {
5879         LBrace = LBrace->Next;
5880         if (LBrace)
5881           LBrace = LBrace->Next;
5882       }
5883     }
5884     if (LBrace &&
5885         // The scope opener is one of {, [, <:
5886         // selector { ... }
5887         // selector [ ... ]
5888         // selector < ... >
5889         //
5890         // In case of selector { ... }, the l_brace is TT_DictLiteral.
5891         // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
5892         // so we check for immediately following r_brace.
5893         ((LBrace->is(tok::l_brace) &&
5894           (LBrace->is(TT_DictLiteral) ||
5895            (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
5896          LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
5897       // If Left.ParameterCount is 0, then this submessage entry is not the
5898       // first in its parent submessage, and we want to break before this entry.
5899       // If Left.ParameterCount is greater than 0, then its parent submessage
5900       // might contain 1 or more entries and we want to break before this entry
5901       // if it contains at least 2 entries. We deal with this case later by
5902       // detecting and breaking before the next entry in the parent submessage.
5903       if (Left.ParameterCount == 0)
5904         return true;
5905       // However, if this submessage is the first entry in its parent
5906       // submessage, Left.ParameterCount might be 1 in some cases.
5907       // We deal with this case later by detecting an entry
5908       // following a closing paren of this submessage.
5909     }
5910 
5911     // If this is an entry immediately following a submessage, it will be
5912     // preceded by a closing paren of that submessage, like in:
5913     //     left---.  .---right
5914     //            v  v
5915     // sub: { ... } key: value
5916     // If there was a comment between `}` an `key` above, then `key` would be
5917     // put on a new line anyways.
5918     if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
5919       return true;
5920   }
5921 
5922   return false;
5923 }
5924 
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const5925 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
5926                                     const FormatToken &Right) const {
5927   const FormatToken &Left = *Right.Previous;
5928   // Language-specific stuff.
5929   if (Style.isCSharp()) {
5930     if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
5931         Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
5932       return false;
5933     }
5934     // Only break after commas for generic type constraints.
5935     if (Line.First->is(TT_CSharpGenericTypeConstraint))
5936       return Left.is(TT_CSharpGenericTypeConstraintComma);
5937     // Keep nullable operators attached to their identifiers.
5938     if (Right.is(TT_CSharpNullable))
5939       return false;
5940   } else if (Style.Language == FormatStyle::LK_Java) {
5941     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5942                      Keywords.kw_implements)) {
5943       return false;
5944     }
5945     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5946                       Keywords.kw_implements)) {
5947       return true;
5948     }
5949   } else if (Style.isJavaScript()) {
5950     const FormatToken *NonComment = Right.getPreviousNonComment();
5951     if (NonComment &&
5952         (NonComment->isAccessSpecifierKeyword() ||
5953          NonComment->isOneOf(
5954              tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
5955              tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
5956              tok::kw_static, Keywords.kw_readonly, Keywords.kw_override,
5957              Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set,
5958              Keywords.kw_async, Keywords.kw_await))) {
5959       return false; // Otherwise automatic semicolon insertion would trigger.
5960     }
5961     if (Right.NestingLevel == 0 &&
5962         (Left.Tok.getIdentifierInfo() ||
5963          Left.isOneOf(tok::r_square, tok::r_paren)) &&
5964         Right.isOneOf(tok::l_square, tok::l_paren)) {
5965       return false; // Otherwise automatic semicolon insertion would trigger.
5966     }
5967     if (NonComment && NonComment->is(tok::identifier) &&
5968         NonComment->TokenText == "asserts") {
5969       return false;
5970     }
5971     if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
5972       return false;
5973     if (Left.is(TT_JsTypeColon))
5974       return true;
5975     // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
5976     if (Left.is(tok::exclaim) && Right.is(tok::colon))
5977       return false;
5978     // Look for is type annotations like:
5979     // function f(): a is B { ... }
5980     // Do not break before is in these cases.
5981     if (Right.is(Keywords.kw_is)) {
5982       const FormatToken *Next = Right.getNextNonComment();
5983       // If `is` is followed by a colon, it's likely that it's a dict key, so
5984       // ignore it for this check.
5985       // For example this is common in Polymer:
5986       // Polymer({
5987       //   is: 'name',
5988       //   ...
5989       // });
5990       if (!Next || Next->isNot(tok::colon))
5991         return false;
5992     }
5993     if (Left.is(Keywords.kw_in))
5994       return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
5995     if (Right.is(Keywords.kw_in))
5996       return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5997     if (Right.is(Keywords.kw_as))
5998       return false; // must not break before as in 'x as type' casts
5999     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
6000       // extends and infer can appear as keywords in conditional types:
6001       //   https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
6002       // do not break before them, as the expressions are subject to ASI.
6003       return false;
6004     }
6005     if (Left.is(Keywords.kw_as))
6006       return true;
6007     if (Left.is(TT_NonNullAssertion))
6008       return true;
6009     if (Left.is(Keywords.kw_declare) &&
6010         Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
6011                       Keywords.kw_function, tok::kw_class, tok::kw_enum,
6012                       Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
6013                       Keywords.kw_let, tok::kw_const)) {
6014       // See grammar for 'declare' statements at:
6015       // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
6016       return false;
6017     }
6018     if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
6019         Right.isOneOf(tok::identifier, tok::string_literal)) {
6020       return false; // must not break in "module foo { ...}"
6021     }
6022     if (Right.is(TT_TemplateString) && Right.closesScope())
6023       return false;
6024     // Don't split tagged template literal so there is a break between the tag
6025     // identifier and template string.
6026     if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
6027       return false;
6028     if (Left.is(TT_TemplateString) && Left.opensScope())
6029       return true;
6030   } else if (Style.isTableGen()) {
6031     // Avoid to break after "def", "class", "let" and so on.
6032     if (Keywords.isTableGenDefinition(Left))
6033       return false;
6034     // Avoid to break after '(' in the cases that is in bang operators.
6035     if (Right.is(tok::l_paren)) {
6036       return !Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator,
6037                            TT_TemplateCloser);
6038     }
6039     // Avoid to break between the value and its suffix part.
6040     if (Left.is(TT_TableGenValueSuffix))
6041       return false;
6042     // Avoid to break around paste operator.
6043     if (Left.is(tok::hash) || Right.is(tok::hash))
6044       return false;
6045     if (Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator))
6046       return false;
6047   }
6048 
6049   if (Left.is(tok::at))
6050     return false;
6051   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
6052     return false;
6053   if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
6054     return Right.isNot(tok::l_paren);
6055   if (Right.is(TT_PointerOrReference)) {
6056     return Line.IsMultiVariableDeclStmt ||
6057            (getTokenPointerOrReferenceAlignment(Right) ==
6058                 FormatStyle::PAS_Right &&
6059             (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
6060   }
6061   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
6062       Right.is(tok::kw_operator)) {
6063     return true;
6064   }
6065   if (Left.is(TT_PointerOrReference))
6066     return false;
6067   if (Right.isTrailingComment()) {
6068     // We rely on MustBreakBefore being set correctly here as we should not
6069     // change the "binding" behavior of a comment.
6070     // The first comment in a braced lists is always interpreted as belonging to
6071     // the first list element. Otherwise, it should be placed outside of the
6072     // list.
6073     return Left.is(BK_BracedInit) ||
6074            (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
6075             Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
6076   }
6077   if (Left.is(tok::question) && Right.is(tok::colon))
6078     return false;
6079   if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
6080     return Style.BreakBeforeTernaryOperators;
6081   if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
6082     return !Style.BreakBeforeTernaryOperators;
6083   if (Left.is(TT_InheritanceColon))
6084     return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
6085   if (Right.is(TT_InheritanceColon))
6086     return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
6087   if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) &&
6088       Left.isNot(TT_SelectorName)) {
6089     return true;
6090   }
6091 
6092   if (Right.is(tok::colon) &&
6093       !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
6094     return false;
6095   }
6096   if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
6097     if (Style.isProto()) {
6098       if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
6099         return false;
6100       // Prevent cases like:
6101       //
6102       // submessage:
6103       //     { key: valueeeeeeeeeeee }
6104       //
6105       // when the snippet does not fit into one line.
6106       // Prefer:
6107       //
6108       // submessage: {
6109       //   key: valueeeeeeeeeeee
6110       // }
6111       //
6112       // instead, even if it is longer by one line.
6113       //
6114       // Note that this allows the "{" to go over the column limit
6115       // when the column limit is just between ":" and "{", but that does
6116       // not happen too often and alternative formattings in this case are
6117       // not much better.
6118       //
6119       // The code covers the cases:
6120       //
6121       // submessage: { ... }
6122       // submessage: < ... >
6123       // repeated: [ ... ]
6124       if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
6125            Right.is(TT_DictLiteral)) ||
6126           Right.is(TT_ArrayInitializerLSquare)) {
6127         return false;
6128       }
6129     }
6130     return true;
6131   }
6132   if (Right.is(tok::r_square) && Right.MatchingParen &&
6133       Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
6134     return false;
6135   }
6136   if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
6137                                     Right.Next->is(TT_ObjCMethodExpr))) {
6138     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
6139   }
6140   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
6141     return true;
6142   if (Right.is(tok::kw_concept))
6143     return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
6144   if (Right.is(TT_RequiresClause))
6145     return true;
6146   if (Left.ClosesTemplateDeclaration) {
6147     return Style.BreakTemplateDeclarations != FormatStyle::BTDS_Leave ||
6148            Right.NewlinesBefore > 0;
6149   }
6150   if (Left.is(TT_FunctionAnnotationRParen))
6151     return true;
6152   if (Left.ClosesRequiresClause)
6153     return true;
6154   if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
6155                     TT_OverloadedOperator)) {
6156     return false;
6157   }
6158   if (Left.is(TT_RangeBasedForLoopColon))
6159     return true;
6160   if (Right.is(TT_RangeBasedForLoopColon))
6161     return false;
6162   if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
6163     return true;
6164   if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
6165       (Left.is(tok::less) && Right.is(tok::less))) {
6166     return false;
6167   }
6168   if (Right.is(TT_BinaryOperator) &&
6169       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
6170       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
6171        Right.getPrecedence() != prec::Assignment)) {
6172     return true;
6173   }
6174   if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
6175       Left.is(tok::kw_operator)) {
6176     return false;
6177   }
6178   if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
6179       Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
6180     return false;
6181   }
6182   if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
6183       !Style.Cpp11BracedListStyle) {
6184     return false;
6185   }
6186   if (Left.is(TT_AttributeLParen) ||
6187       (Left.is(tok::l_paren) && Left.is(TT_TypeDeclarationParen))) {
6188     return false;
6189   }
6190   if (Left.is(tok::l_paren) && Left.Previous &&
6191       (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
6192     return false;
6193   }
6194   if (Right.is(TT_ImplicitStringLiteral))
6195     return false;
6196 
6197   if (Right.is(TT_TemplateCloser))
6198     return false;
6199   if (Right.is(tok::r_square) && Right.MatchingParen &&
6200       Right.MatchingParen->is(TT_LambdaLSquare)) {
6201     return false;
6202   }
6203 
6204   // We only break before r_brace if there was a corresponding break before
6205   // the l_brace, which is tracked by BreakBeforeClosingBrace.
6206   if (Right.is(tok::r_brace)) {
6207     return Right.MatchingParen && (Right.MatchingParen->is(BK_Block) ||
6208                                    (Right.isBlockIndentedInitRBrace(Style)));
6209   }
6210 
6211   // We only break before r_paren if we're in a block indented context.
6212   if (Right.is(tok::r_paren)) {
6213     if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
6214         !Right.MatchingParen) {
6215       return false;
6216     }
6217     auto Next = Right.Next;
6218     if (Next && Next->is(tok::r_paren))
6219       Next = Next->Next;
6220     if (Next && Next->is(tok::l_paren))
6221       return false;
6222     const FormatToken *Previous = Right.MatchingParen->Previous;
6223     return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
6224   }
6225 
6226   // Allow breaking after a trailing annotation, e.g. after a method
6227   // declaration.
6228   if (Left.is(TT_TrailingAnnotation)) {
6229     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
6230                           tok::less, tok::coloncolon);
6231   }
6232 
6233   if (Right.isAttribute())
6234     return true;
6235 
6236   if (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))
6237     return Left.isNot(TT_AttributeSquare);
6238 
6239   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
6240     return true;
6241 
6242   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
6243     return true;
6244 
6245   if (Left.is(TT_CtorInitializerColon)) {
6246     return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
6247            (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
6248   }
6249   if (Right.is(TT_CtorInitializerColon))
6250     return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
6251   if (Left.is(TT_CtorInitializerComma) &&
6252       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
6253     return false;
6254   }
6255   if (Right.is(TT_CtorInitializerComma) &&
6256       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
6257     return true;
6258   }
6259   if (Left.is(TT_InheritanceComma) &&
6260       Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
6261     return false;
6262   }
6263   if (Right.is(TT_InheritanceComma) &&
6264       Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
6265     return true;
6266   }
6267   if (Left.is(TT_ArrayInitializerLSquare))
6268     return true;
6269   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
6270     return true;
6271   if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
6272       !Left.isOneOf(tok::arrowstar, tok::lessless) &&
6273       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
6274       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
6275        Left.getPrecedence() == prec::Assignment)) {
6276     return true;
6277   }
6278   if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
6279       (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
6280     return false;
6281   }
6282 
6283   auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
6284   if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
6285     if (isAllmanLambdaBrace(Left))
6286       return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
6287     if (isAllmanLambdaBrace(Right))
6288       return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
6289   }
6290 
6291   if (Right.is(tok::kw_noexcept) && Right.is(TT_TrailingAnnotation)) {
6292     switch (Style.AllowBreakBeforeNoexceptSpecifier) {
6293     case FormatStyle::BBNSS_Never:
6294       return false;
6295     case FormatStyle::BBNSS_Always:
6296       return true;
6297     case FormatStyle::BBNSS_OnlyWithParen:
6298       return Right.Next && Right.Next->is(tok::l_paren);
6299     }
6300   }
6301 
6302   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
6303                       tok::kw_class, tok::kw_struct, tok::comment) ||
6304          Right.isMemberAccess() ||
6305          Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
6306                        tok::colon, tok::l_square, tok::at) ||
6307          (Left.is(tok::r_paren) &&
6308           Right.isOneOf(tok::identifier, tok::kw_const)) ||
6309          (Left.is(tok::l_paren) && Right.isNot(tok::r_paren)) ||
6310          (Left.is(TT_TemplateOpener) && Right.isNot(TT_TemplateCloser));
6311 }
6312 
printDebugInfo(const AnnotatedLine & Line) const6313 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
6314   llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
6315                << ", T=" << Line.Type << ", C=" << Line.IsContinuation
6316                << "):\n";
6317   const FormatToken *Tok = Line.First;
6318   while (Tok) {
6319     llvm::errs() << " M=" << Tok->MustBreakBefore
6320                  << " C=" << Tok->CanBreakBefore
6321                  << " T=" << getTokenTypeName(Tok->getType())
6322                  << " S=" << Tok->SpacesRequiredBefore
6323                  << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
6324                  << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
6325                  << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
6326                  << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
6327     for (prec::Level LParen : Tok->FakeLParens)
6328       llvm::errs() << LParen << "/";
6329     llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
6330     llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
6331     llvm::errs() << " Text='" << Tok->TokenText << "'\n";
6332     if (!Tok->Next)
6333       assert(Tok == Line.Last);
6334     Tok = Tok->Next;
6335   }
6336   llvm::errs() << "----\n";
6337 }
6338 
6339 FormatStyle::PointerAlignmentStyle
getTokenReferenceAlignment(const FormatToken & Reference) const6340 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
6341   assert(Reference.isOneOf(tok::amp, tok::ampamp));
6342   switch (Style.ReferenceAlignment) {
6343   case FormatStyle::RAS_Pointer:
6344     return Style.PointerAlignment;
6345   case FormatStyle::RAS_Left:
6346     return FormatStyle::PAS_Left;
6347   case FormatStyle::RAS_Right:
6348     return FormatStyle::PAS_Right;
6349   case FormatStyle::RAS_Middle:
6350     return FormatStyle::PAS_Middle;
6351   }
6352   assert(0); //"Unhandled value of ReferenceAlignment"
6353   return Style.PointerAlignment;
6354 }
6355 
6356 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(const FormatToken & PointerOrReference) const6357 TokenAnnotator::getTokenPointerOrReferenceAlignment(
6358     const FormatToken &PointerOrReference) const {
6359   if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
6360     switch (Style.ReferenceAlignment) {
6361     case FormatStyle::RAS_Pointer:
6362       return Style.PointerAlignment;
6363     case FormatStyle::RAS_Left:
6364       return FormatStyle::PAS_Left;
6365     case FormatStyle::RAS_Right:
6366       return FormatStyle::PAS_Right;
6367     case FormatStyle::RAS_Middle:
6368       return FormatStyle::PAS_Middle;
6369     }
6370   }
6371   assert(PointerOrReference.is(tok::star));
6372   return Style.PointerAlignment;
6373 }
6374 
6375 } // namespace format
6376 } // namespace clang
6377