1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
21
22 #define DEBUG_TYPE "format-token-annotator"
23
24 namespace clang {
25 namespace format {
26
mustBreakAfterAttributes(const FormatToken & Tok,const FormatStyle & Style)27 static bool mustBreakAfterAttributes(const FormatToken &Tok,
28 const FormatStyle &Style) {
29 switch (Style.BreakAfterAttributes) {
30 case FormatStyle::ABS_Always:
31 return true;
32 case FormatStyle::ABS_Leave:
33 return Tok.NewlinesBefore > 0;
34 default:
35 return false;
36 }
37 }
38
39 namespace {
40
41 /// Returns \c true if the line starts with a token that can start a statement
42 /// with an initializer.
startsWithInitStatement(const AnnotatedLine & Line)43 static bool startsWithInitStatement(const AnnotatedLine &Line) {
44 return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
45 Line.startsWith(tok::kw_switch);
46 }
47
48 /// Returns \c true if the token can be used as an identifier in
49 /// an Objective-C \c \@selector, \c false otherwise.
50 ///
51 /// Because getFormattingLangOpts() always lexes source code as
52 /// Objective-C++, C++ keywords like \c new and \c delete are
53 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
54 ///
55 /// For Objective-C and Objective-C++, both identifiers and keywords
56 /// are valid inside @selector(...) (or a macro which
57 /// invokes @selector(...)). So, we allow treat any identifier or
58 /// keyword as a potential Objective-C selector component.
canBeObjCSelectorComponent(const FormatToken & Tok)59 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
60 return Tok.Tok.getIdentifierInfo();
61 }
62
63 /// With `Left` being '(', check if we're at either `[...](` or
64 /// `[...]<...>(`, where the [ opens a lambda capture list.
isLambdaParameterList(const FormatToken * Left)65 static bool isLambdaParameterList(const FormatToken *Left) {
66 // Skip <...> if present.
67 if (Left->Previous && Left->Previous->is(tok::greater) &&
68 Left->Previous->MatchingParen &&
69 Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
70 Left = Left->Previous->MatchingParen;
71 }
72
73 // Check for `[...]`.
74 return Left->Previous && Left->Previous->is(tok::r_square) &&
75 Left->Previous->MatchingParen &&
76 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
77 }
78
79 /// Returns \c true if the token is followed by a boolean condition, \c false
80 /// otherwise.
isKeywordWithCondition(const FormatToken & Tok)81 static bool isKeywordWithCondition(const FormatToken &Tok) {
82 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
83 tok::kw_constexpr, tok::kw_catch);
84 }
85
86 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
isCppAttribute(bool IsCpp,const FormatToken & Tok)87 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
88 if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
89 return false;
90 // The first square bracket is part of an ObjC array literal
91 if (Tok.Previous && Tok.Previous->is(tok::at))
92 return false;
93 const FormatToken *AttrTok = Tok.Next->Next;
94 if (!AttrTok)
95 return false;
96 // C++17 '[[using ns: foo, bar(baz, blech)]]'
97 // We assume nobody will name an ObjC variable 'using'.
98 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
99 return true;
100 if (AttrTok->isNot(tok::identifier))
101 return false;
102 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
103 // ObjC message send. We assume nobody will use : in a C++11 attribute
104 // specifier parameter, although this is technically valid:
105 // [[foo(:)]].
106 if (AttrTok->is(tok::colon) ||
107 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
108 AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
109 return false;
110 }
111 if (AttrTok->is(tok::ellipsis))
112 return true;
113 AttrTok = AttrTok->Next;
114 }
115 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
116 }
117
118 /// A parser that gathers additional information about tokens.
119 ///
120 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
121 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
122 /// into template parameter lists.
123 class AnnotatingParser {
124 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,const AdditionalKeywords & Keywords,SmallVector<ScopeType> & Scopes)125 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
126 const AdditionalKeywords &Keywords,
127 SmallVector<ScopeType> &Scopes)
128 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
129 IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)),
130 Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) {
131 assert(IsCpp == LangOpts.CXXOperatorNames);
132 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
133 resetTokenMetadata();
134 }
135
136 private:
getScopeType(const FormatToken & Token) const137 ScopeType getScopeType(const FormatToken &Token) const {
138 switch (Token.getType()) {
139 case TT_FunctionLBrace:
140 case TT_LambdaLBrace:
141 return ST_Function;
142 case TT_ClassLBrace:
143 case TT_StructLBrace:
144 case TT_UnionLBrace:
145 return ST_Class;
146 default:
147 return ST_Other;
148 }
149 }
150
parseAngle()151 bool parseAngle() {
152 if (!CurrentToken || !CurrentToken->Previous)
153 return false;
154 if (NonTemplateLess.count(CurrentToken->Previous) > 0)
155 return false;
156
157 if (const auto &Previous = *CurrentToken->Previous; // The '<'.
158 Previous.Previous) {
159 if (Previous.Previous->Tok.isLiteral())
160 return false;
161 if (Previous.Previous->is(tok::r_brace))
162 return false;
163 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
164 (!Previous.Previous->MatchingParen ||
165 Previous.Previous->MatchingParen->isNot(
166 TT_OverloadedOperatorLParen))) {
167 return false;
168 }
169 if (Previous.Previous->is(tok::kw_operator) &&
170 CurrentToken->is(tok::l_paren)) {
171 return false;
172 }
173 }
174
175 FormatToken *Left = CurrentToken->Previous;
176 Left->ParentBracket = Contexts.back().ContextKind;
177 ScopedContextCreator ContextCreator(*this, tok::less, 12);
178 Contexts.back().IsExpression = false;
179
180 const auto *BeforeLess = Left->Previous;
181
182 // If there's a template keyword before the opening angle bracket, this is a
183 // template parameter, not an argument.
184 if (BeforeLess && BeforeLess->isNot(tok::kw_template))
185 Contexts.back().ContextType = Context::TemplateArgument;
186
187 if (Style.Language == FormatStyle::LK_Java &&
188 CurrentToken->is(tok::question)) {
189 next();
190 }
191
192 for (bool SeenTernaryOperator = false, MaybeAngles = true; CurrentToken;) {
193 const bool InExpr = Contexts[Contexts.size() - 2].IsExpression;
194 if (CurrentToken->is(tok::greater)) {
195 const auto *Next = CurrentToken->Next;
196 if (CurrentToken->isNot(TT_TemplateCloser)) {
197 // Try to do a better job at looking for ">>" within the condition of
198 // a statement. Conservatively insert spaces between consecutive ">"
199 // tokens to prevent splitting right shift operators and potentially
200 // altering program semantics. This check is overly conservative and
201 // will prevent spaces from being inserted in select nested template
202 // parameter cases, but should not alter program semantics.
203 if (Next && Next->is(tok::greater) &&
204 Left->ParentBracket != tok::less &&
205 CurrentToken->getStartOfNonWhitespace() ==
206 Next->getStartOfNonWhitespace().getLocWithOffset(-1)) {
207 return false;
208 }
209 if (InExpr && SeenTernaryOperator &&
210 (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
211 return false;
212 }
213 if (!MaybeAngles)
214 return false;
215 }
216 Left->MatchingParen = CurrentToken;
217 CurrentToken->MatchingParen = Left;
218 // In TT_Proto, we must distignuish between:
219 // map<key, value>
220 // msg < item: data >
221 // msg: < item: data >
222 // In TT_TextProto, map<key, value> does not occur.
223 if (Style.Language == FormatStyle::LK_TextProto ||
224 (Style.Language == FormatStyle::LK_Proto && BeforeLess &&
225 BeforeLess->isOneOf(TT_SelectorName, TT_DictLiteral))) {
226 CurrentToken->setType(TT_DictLiteral);
227 } else {
228 CurrentToken->setType(TT_TemplateCloser);
229 CurrentToken->Tok.setLength(1);
230 }
231 if (Next && Next->Tok.isLiteral())
232 return false;
233 next();
234 return true;
235 }
236 if (CurrentToken->is(tok::question) &&
237 Style.Language == FormatStyle::LK_Java) {
238 next();
239 continue;
240 }
241 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace))
242 return false;
243 const auto &Prev = *CurrentToken->Previous;
244 // If a && or || is found and interpreted as a binary operator, this set
245 // of angles is likely part of something like "a < b && c > d". If the
246 // angles are inside an expression, the ||/&& might also be a binary
247 // operator that was misinterpreted because we are parsing template
248 // parameters.
249 // FIXME: This is getting out of hand, write a decent parser.
250 if (MaybeAngles && InExpr && !Line.startsWith(tok::kw_template) &&
251 Prev.is(TT_BinaryOperator)) {
252 const auto Precedence = Prev.getPrecedence();
253 if (Precedence > prec::Conditional && Precedence < prec::Relational)
254 MaybeAngles = false;
255 }
256 if (Prev.isOneOf(tok::question, tok::colon) && !Style.isProto())
257 SeenTernaryOperator = true;
258 updateParameterCount(Left, CurrentToken);
259 if (Style.Language == FormatStyle::LK_Proto) {
260 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
261 if (CurrentToken->is(tok::colon) ||
262 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
263 Previous->isNot(tok::colon))) {
264 Previous->setType(TT_SelectorName);
265 }
266 }
267 }
268 if (Style.isTableGen()) {
269 if (CurrentToken->isOneOf(tok::comma, tok::equal)) {
270 // They appear as separators. Unless they are not in class definition.
271 next();
272 continue;
273 }
274 // In angle, there must be Value like tokens. Types are also able to be
275 // parsed in the same way with Values.
276 if (!parseTableGenValue())
277 return false;
278 continue;
279 }
280 if (!consumeToken())
281 return false;
282 }
283 return false;
284 }
285
parseUntouchableParens()286 bool parseUntouchableParens() {
287 while (CurrentToken) {
288 CurrentToken->Finalized = true;
289 switch (CurrentToken->Tok.getKind()) {
290 case tok::l_paren:
291 next();
292 if (!parseUntouchableParens())
293 return false;
294 continue;
295 case tok::r_paren:
296 next();
297 return true;
298 default:
299 // no-op
300 break;
301 }
302 next();
303 }
304 return false;
305 }
306
parseParens(bool LookForDecls=false)307 bool parseParens(bool LookForDecls = false) {
308 if (!CurrentToken)
309 return false;
310 assert(CurrentToken->Previous && "Unknown previous token");
311 FormatToken &OpeningParen = *CurrentToken->Previous;
312 assert(OpeningParen.is(tok::l_paren));
313 FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
314 OpeningParen.ParentBracket = Contexts.back().ContextKind;
315 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
316
317 // FIXME: This is a bit of a hack. Do better.
318 Contexts.back().ColonIsForRangeExpr =
319 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
320
321 if (OpeningParen.Previous &&
322 OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
323 OpeningParen.Finalized = true;
324 return parseUntouchableParens();
325 }
326
327 bool StartsObjCMethodExpr = false;
328 if (!Style.isVerilog()) {
329 if (FormatToken *MaybeSel = OpeningParen.Previous) {
330 // @selector( starts a selector.
331 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
332 MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
333 StartsObjCMethodExpr = true;
334 }
335 }
336 }
337
338 if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
339 // Find the previous kw_operator token.
340 FormatToken *Prev = &OpeningParen;
341 while (Prev->isNot(tok::kw_operator)) {
342 Prev = Prev->Previous;
343 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
344 }
345
346 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
347 // i.e. the operator is called as a member function,
348 // then the argument must be an expression.
349 bool OperatorCalledAsMemberFunction =
350 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
351 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
352 } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) {
353 Contexts.back().IsExpression = true;
354 Contexts.back().ContextType = Context::VerilogInstancePortList;
355 } else if (Style.isJavaScript() &&
356 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
357 Line.startsWith(tok::kw_export, Keywords.kw_type,
358 tok::identifier))) {
359 // type X = (...);
360 // export type X = (...);
361 Contexts.back().IsExpression = false;
362 } else if (OpeningParen.Previous &&
363 (OpeningParen.Previous->isOneOf(
364 tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit,
365 tok::kw_while, tok::l_paren, tok::comma,
366 TT_BinaryOperator) ||
367 OpeningParen.Previous->isIf())) {
368 // static_assert, if and while usually contain expressions.
369 Contexts.back().IsExpression = true;
370 } else if (Style.isJavaScript() && OpeningParen.Previous &&
371 (OpeningParen.Previous->is(Keywords.kw_function) ||
372 (OpeningParen.Previous->endsSequence(tok::identifier,
373 Keywords.kw_function)))) {
374 // function(...) or function f(...)
375 Contexts.back().IsExpression = false;
376 } else if (Style.isJavaScript() && OpeningParen.Previous &&
377 OpeningParen.Previous->is(TT_JsTypeColon)) {
378 // let x: (SomeType);
379 Contexts.back().IsExpression = false;
380 } else if (isLambdaParameterList(&OpeningParen)) {
381 // This is a parameter list of a lambda expression.
382 Contexts.back().IsExpression = false;
383 } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
384 Contexts.back().IsExpression = false;
385 } else if (OpeningParen.Previous &&
386 OpeningParen.Previous->is(tok::kw__Generic)) {
387 Contexts.back().ContextType = Context::C11GenericSelection;
388 Contexts.back().IsExpression = true;
389 } else if (Line.InPPDirective &&
390 (!OpeningParen.Previous ||
391 OpeningParen.Previous->isNot(tok::identifier))) {
392 Contexts.back().IsExpression = true;
393 } else if (Contexts[Contexts.size() - 2].CaretFound) {
394 // This is the parameter list of an ObjC block.
395 Contexts.back().IsExpression = false;
396 } else if (OpeningParen.Previous &&
397 OpeningParen.Previous->is(TT_ForEachMacro)) {
398 // The first argument to a foreach macro is a declaration.
399 Contexts.back().ContextType = Context::ForEachMacro;
400 Contexts.back().IsExpression = false;
401 } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
402 OpeningParen.Previous->MatchingParen->isOneOf(
403 TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
404 Contexts.back().IsExpression = false;
405 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
406 bool IsForOrCatch =
407 OpeningParen.Previous &&
408 OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
409 Contexts.back().IsExpression = !IsForOrCatch;
410 }
411
412 if (Style.isTableGen()) {
413 if (FormatToken *Prev = OpeningParen.Previous) {
414 if (Prev->is(TT_TableGenCondOperator)) {
415 Contexts.back().IsTableGenCondOpe = true;
416 Contexts.back().IsExpression = true;
417 } else if (Contexts.size() > 1 &&
418 Contexts[Contexts.size() - 2].IsTableGenBangOpe) {
419 // Hack to handle bang operators. The parent context's flag
420 // was set by parseTableGenSimpleValue().
421 // We have to specify the context outside because the prev of "(" may
422 // be ">", not the bang operator in this case.
423 Contexts.back().IsTableGenBangOpe = true;
424 Contexts.back().IsExpression = true;
425 } else {
426 // Otherwise, this paren seems DAGArg.
427 if (!parseTableGenDAGArg())
428 return false;
429 return parseTableGenDAGArgAndList(&OpeningParen);
430 }
431 }
432 }
433
434 // Infer the role of the l_paren based on the previous token if we haven't
435 // detected one yet.
436 if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
437 if (PrevNonComment->isAttribute()) {
438 OpeningParen.setType(TT_AttributeLParen);
439 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
440 tok::kw_typeof,
441 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
442 #include "clang/Basic/TransformTypeTraits.def"
443 tok::kw__Atomic)) {
444 OpeningParen.setType(TT_TypeDeclarationParen);
445 // decltype() and typeof() usually contain expressions.
446 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
447 Contexts.back().IsExpression = true;
448 }
449 }
450
451 if (StartsObjCMethodExpr) {
452 Contexts.back().ColonIsObjCMethodExpr = true;
453 OpeningParen.setType(TT_ObjCMethodExpr);
454 }
455
456 // MightBeFunctionType and ProbablyFunctionType are used for
457 // function pointer and reference types as well as Objective-C
458 // block types:
459 //
460 // void (*FunctionPointer)(void);
461 // void (&FunctionReference)(void);
462 // void (&&FunctionReference)(void);
463 // void (^ObjCBlock)(void);
464 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
465 bool ProbablyFunctionType =
466 CurrentToken->isPointerOrReference() || CurrentToken->is(tok::caret);
467 bool HasMultipleLines = false;
468 bool HasMultipleParametersOnALine = false;
469 bool MightBeObjCForRangeLoop =
470 OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
471 FormatToken *PossibleObjCForInToken = nullptr;
472 while (CurrentToken) {
473 // LookForDecls is set when "if (" has been seen. Check for
474 // 'identifier' '*' 'identifier' followed by not '=' -- this
475 // '*' has to be a binary operator but determineStarAmpUsage() will
476 // categorize it as an unary operator, so set the right type here.
477 if (LookForDecls && CurrentToken->Next) {
478 FormatToken *Prev = CurrentToken->getPreviousNonComment();
479 if (Prev) {
480 FormatToken *PrevPrev = Prev->getPreviousNonComment();
481 FormatToken *Next = CurrentToken->Next;
482 if (PrevPrev && PrevPrev->is(tok::identifier) &&
483 PrevPrev->isNot(TT_TypeName) && Prev->isPointerOrReference() &&
484 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
485 Prev->setType(TT_BinaryOperator);
486 LookForDecls = false;
487 }
488 }
489 }
490
491 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
492 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
493 tok::coloncolon)) {
494 ProbablyFunctionType = true;
495 }
496 if (CurrentToken->is(tok::comma))
497 MightBeFunctionType = false;
498 if (CurrentToken->Previous->is(TT_BinaryOperator))
499 Contexts.back().IsExpression = true;
500 if (CurrentToken->is(tok::r_paren)) {
501 if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
502 ProbablyFunctionType && CurrentToken->Next &&
503 (CurrentToken->Next->is(tok::l_paren) ||
504 (CurrentToken->Next->is(tok::l_square) &&
505 Line.MustBeDeclaration))) {
506 OpeningParen.setType(OpeningParen.Next->is(tok::caret)
507 ? TT_ObjCBlockLParen
508 : TT_FunctionTypeLParen);
509 }
510 OpeningParen.MatchingParen = CurrentToken;
511 CurrentToken->MatchingParen = &OpeningParen;
512
513 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
514 OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
515 // Detect the case where macros are used to generate lambdas or
516 // function bodies, e.g.:
517 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
518 for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
519 Tok = Tok->Next) {
520 if (Tok->is(TT_BinaryOperator) && Tok->isPointerOrReference())
521 Tok->setType(TT_PointerOrReference);
522 }
523 }
524
525 if (StartsObjCMethodExpr) {
526 CurrentToken->setType(TT_ObjCMethodExpr);
527 if (Contexts.back().FirstObjCSelectorName) {
528 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
529 Contexts.back().LongestObjCSelectorName;
530 }
531 }
532
533 if (OpeningParen.is(TT_AttributeLParen))
534 CurrentToken->setType(TT_AttributeRParen);
535 if (OpeningParen.is(TT_TypeDeclarationParen))
536 CurrentToken->setType(TT_TypeDeclarationParen);
537 if (OpeningParen.Previous &&
538 OpeningParen.Previous->is(TT_JavaAnnotation)) {
539 CurrentToken->setType(TT_JavaAnnotation);
540 }
541 if (OpeningParen.Previous &&
542 OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
543 CurrentToken->setType(TT_LeadingJavaAnnotation);
544 }
545 if (OpeningParen.Previous &&
546 OpeningParen.Previous->is(TT_AttributeSquare)) {
547 CurrentToken->setType(TT_AttributeSquare);
548 }
549
550 if (!HasMultipleLines)
551 OpeningParen.setPackingKind(PPK_Inconclusive);
552 else if (HasMultipleParametersOnALine)
553 OpeningParen.setPackingKind(PPK_BinPacked);
554 else
555 OpeningParen.setPackingKind(PPK_OnePerLine);
556
557 next();
558 return true;
559 }
560 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
561 return false;
562
563 if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
564 OpeningParen.setType(TT_Unknown);
565 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
566 !CurrentToken->Next->HasUnescapedNewline &&
567 !CurrentToken->Next->isTrailingComment()) {
568 HasMultipleParametersOnALine = true;
569 }
570 bool ProbablyFunctionTypeLParen =
571 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
572 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
573 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
574 CurrentToken->Previous->isTypeName(LangOpts)) &&
575 !(CurrentToken->is(tok::l_brace) ||
576 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
577 Contexts.back().IsExpression = false;
578 }
579 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
580 MightBeObjCForRangeLoop = false;
581 if (PossibleObjCForInToken) {
582 PossibleObjCForInToken->setType(TT_Unknown);
583 PossibleObjCForInToken = nullptr;
584 }
585 }
586 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
587 PossibleObjCForInToken = CurrentToken;
588 PossibleObjCForInToken->setType(TT_ObjCForIn);
589 }
590 // When we discover a 'new', we set CanBeExpression to 'false' in order to
591 // parse the type correctly. Reset that after a comma.
592 if (CurrentToken->is(tok::comma))
593 Contexts.back().CanBeExpression = true;
594
595 if (Style.isTableGen()) {
596 if (CurrentToken->is(tok::comma)) {
597 if (Contexts.back().IsTableGenCondOpe)
598 CurrentToken->setType(TT_TableGenCondOperatorComma);
599 next();
600 } else if (CurrentToken->is(tok::colon)) {
601 if (Contexts.back().IsTableGenCondOpe)
602 CurrentToken->setType(TT_TableGenCondOperatorColon);
603 next();
604 }
605 // In TableGen there must be Values in parens.
606 if (!parseTableGenValue())
607 return false;
608 continue;
609 }
610
611 FormatToken *Tok = CurrentToken;
612 if (!consumeToken())
613 return false;
614 updateParameterCount(&OpeningParen, Tok);
615 if (CurrentToken && CurrentToken->HasUnescapedNewline)
616 HasMultipleLines = true;
617 }
618 return false;
619 }
620
isCSharpAttributeSpecifier(const FormatToken & Tok)621 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
622 if (!Style.isCSharp())
623 return false;
624
625 // `identifier[i]` is not an attribute.
626 if (Tok.Previous && Tok.Previous->is(tok::identifier))
627 return false;
628
629 // Chains of [] in `identifier[i][j][k]` are not attributes.
630 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
631 auto *MatchingParen = Tok.Previous->MatchingParen;
632 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
633 return false;
634 }
635
636 const FormatToken *AttrTok = Tok.Next;
637 if (!AttrTok)
638 return false;
639
640 // Just an empty declaration e.g. string [].
641 if (AttrTok->is(tok::r_square))
642 return false;
643
644 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
645 while (AttrTok && AttrTok->isNot(tok::r_square))
646 AttrTok = AttrTok->Next;
647
648 if (!AttrTok)
649 return false;
650
651 // Allow an attribute to be the only content of a file.
652 AttrTok = AttrTok->Next;
653 if (!AttrTok)
654 return true;
655
656 // Limit this to being an access modifier that follows.
657 if (AttrTok->isAccessSpecifierKeyword() ||
658 AttrTok->isOneOf(tok::comment, tok::kw_class, tok::kw_static,
659 tok::l_square, Keywords.kw_internal)) {
660 return true;
661 }
662
663 // incase its a [XXX] retval func(....
664 if (AttrTok->Next &&
665 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
666 return true;
667 }
668
669 return false;
670 }
671
parseSquare()672 bool parseSquare() {
673 if (!CurrentToken)
674 return false;
675
676 // A '[' could be an index subscript (after an identifier or after
677 // ')' or ']'), it could be the start of an Objective-C method
678 // expression, it could the start of an Objective-C array literal,
679 // or it could be a C++ attribute specifier [[foo::bar]].
680 FormatToken *Left = CurrentToken->Previous;
681 Left->ParentBracket = Contexts.back().ContextKind;
682 FormatToken *Parent = Left->getPreviousNonComment();
683
684 // Cases where '>' is followed by '['.
685 // In C++, this can happen either in array of templates (foo<int>[10])
686 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
687 bool CppArrayTemplates =
688 IsCpp && Parent && Parent->is(TT_TemplateCloser) &&
689 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
690 Contexts.back().ContextType == Context::TemplateArgument);
691
692 const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
693 const bool IsCpp11AttributeSpecifier =
694 isCppAttribute(IsCpp, *Left) || IsInnerSquare;
695
696 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
697 bool IsCSharpAttributeSpecifier =
698 isCSharpAttributeSpecifier(*Left) ||
699 Contexts.back().InCSharpAttributeSpecifier;
700
701 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
702 bool IsCppStructuredBinding = Left->isCppStructuredBinding(IsCpp);
703 bool StartsObjCMethodExpr =
704 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
705 IsCpp && !IsCpp11AttributeSpecifier && !IsCSharpAttributeSpecifier &&
706 Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
707 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
708 (!Parent ||
709 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
710 tok::kw_return, tok::kw_throw) ||
711 Parent->isUnaryOperator() ||
712 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
713 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
714 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
715 prec::Unknown));
716 bool ColonFound = false;
717
718 unsigned BindingIncrease = 1;
719 if (IsCppStructuredBinding) {
720 Left->setType(TT_StructuredBindingLSquare);
721 } else if (Left->is(TT_Unknown)) {
722 if (StartsObjCMethodExpr) {
723 Left->setType(TT_ObjCMethodExpr);
724 } else if (InsideInlineASM) {
725 Left->setType(TT_InlineASMSymbolicNameLSquare);
726 } else if (IsCpp11AttributeSpecifier) {
727 Left->setType(TT_AttributeSquare);
728 if (!IsInnerSquare && Left->Previous)
729 Left->Previous->EndsCppAttributeGroup = false;
730 } else if (Style.isJavaScript() && Parent &&
731 Contexts.back().ContextKind == tok::l_brace &&
732 Parent->isOneOf(tok::l_brace, tok::comma)) {
733 Left->setType(TT_JsComputedPropertyName);
734 } else if (IsCpp && Contexts.back().ContextKind == tok::l_brace &&
735 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
736 Left->setType(TT_DesignatedInitializerLSquare);
737 } else if (IsCSharpAttributeSpecifier) {
738 Left->setType(TT_AttributeSquare);
739 } else if (CurrentToken->is(tok::r_square) && Parent &&
740 Parent->is(TT_TemplateCloser)) {
741 Left->setType(TT_ArraySubscriptLSquare);
742 } else if (Style.isProto()) {
743 // Square braces in LK_Proto can either be message field attributes:
744 //
745 // optional Aaa aaa = 1 [
746 // (aaa) = aaa
747 // ];
748 //
749 // extensions 123 [
750 // (aaa) = aaa
751 // ];
752 //
753 // or text proto extensions (in options):
754 //
755 // option (Aaa.options) = {
756 // [type.type/type] {
757 // key: value
758 // }
759 // }
760 //
761 // or repeated fields (in options):
762 //
763 // option (Aaa.options) = {
764 // keys: [ 1, 2, 3 ]
765 // }
766 //
767 // In the first and the third case we want to spread the contents inside
768 // the square braces; in the second we want to keep them inline.
769 Left->setType(TT_ArrayInitializerLSquare);
770 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
771 tok::equal) &&
772 !Left->endsSequence(tok::l_square, tok::numeric_constant,
773 tok::identifier) &&
774 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
775 Left->setType(TT_ProtoExtensionLSquare);
776 BindingIncrease = 10;
777 }
778 } else if (!CppArrayTemplates && Parent &&
779 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
780 tok::comma, tok::l_paren, tok::l_square,
781 tok::question, tok::colon, tok::kw_return,
782 // Should only be relevant to JavaScript:
783 tok::kw_default)) {
784 Left->setType(TT_ArrayInitializerLSquare);
785 } else {
786 BindingIncrease = 10;
787 Left->setType(TT_ArraySubscriptLSquare);
788 }
789 }
790
791 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
792 Contexts.back().IsExpression = true;
793 if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
794 Contexts.back().IsExpression = false;
795
796 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
797 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
798 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
799
800 while (CurrentToken) {
801 if (CurrentToken->is(tok::r_square)) {
802 if (IsCpp11AttributeSpecifier) {
803 CurrentToken->setType(TT_AttributeSquare);
804 if (!IsInnerSquare)
805 CurrentToken->EndsCppAttributeGroup = true;
806 }
807 if (IsCSharpAttributeSpecifier) {
808 CurrentToken->setType(TT_AttributeSquare);
809 } else if (((CurrentToken->Next &&
810 CurrentToken->Next->is(tok::l_paren)) ||
811 (CurrentToken->Previous &&
812 CurrentToken->Previous->Previous == Left)) &&
813 Left->is(TT_ObjCMethodExpr)) {
814 // An ObjC method call is rarely followed by an open parenthesis. It
815 // also can't be composed of just one token, unless it's a macro that
816 // will be expanded to more tokens.
817 // FIXME: Do we incorrectly label ":" with this?
818 StartsObjCMethodExpr = false;
819 Left->setType(TT_Unknown);
820 }
821 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
822 CurrentToken->setType(TT_ObjCMethodExpr);
823 // If we haven't seen a colon yet, make sure the last identifier
824 // before the r_square is tagged as a selector name component.
825 if (!ColonFound && CurrentToken->Previous &&
826 CurrentToken->Previous->is(TT_Unknown) &&
827 canBeObjCSelectorComponent(*CurrentToken->Previous)) {
828 CurrentToken->Previous->setType(TT_SelectorName);
829 }
830 // determineStarAmpUsage() thinks that '*' '[' is allocating an
831 // array of pointers, but if '[' starts a selector then '*' is a
832 // binary operator.
833 if (Parent && Parent->is(TT_PointerOrReference))
834 Parent->overwriteFixedType(TT_BinaryOperator);
835 }
836 // An arrow after an ObjC method expression is not a lambda arrow.
837 if (CurrentToken->is(TT_ObjCMethodExpr) && CurrentToken->Next &&
838 CurrentToken->Next->is(TT_LambdaArrow)) {
839 CurrentToken->Next->overwriteFixedType(TT_Unknown);
840 }
841 Left->MatchingParen = CurrentToken;
842 CurrentToken->MatchingParen = Left;
843 // FirstObjCSelectorName is set when a colon is found. This does
844 // not work, however, when the method has no parameters.
845 // Here, we set FirstObjCSelectorName when the end of the method call is
846 // reached, in case it was not set already.
847 if (!Contexts.back().FirstObjCSelectorName) {
848 FormatToken *Previous = CurrentToken->getPreviousNonComment();
849 if (Previous && Previous->is(TT_SelectorName)) {
850 Previous->ObjCSelectorNameParts = 1;
851 Contexts.back().FirstObjCSelectorName = Previous;
852 }
853 } else {
854 Left->ParameterCount =
855 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
856 }
857 if (Contexts.back().FirstObjCSelectorName) {
858 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
859 Contexts.back().LongestObjCSelectorName;
860 if (Left->BlockParameterCount > 1)
861 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
862 }
863 if (Style.isTableGen() && Left->is(TT_TableGenListOpener))
864 CurrentToken->setType(TT_TableGenListCloser);
865 next();
866 return true;
867 }
868 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
869 return false;
870 if (CurrentToken->is(tok::colon)) {
871 if (IsCpp11AttributeSpecifier &&
872 CurrentToken->endsSequence(tok::colon, tok::identifier,
873 tok::kw_using)) {
874 // Remember that this is a [[using ns: foo]] C++ attribute, so we
875 // don't add a space before the colon (unlike other colons).
876 CurrentToken->setType(TT_AttributeColon);
877 } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
878 Left->isOneOf(TT_ArraySubscriptLSquare,
879 TT_DesignatedInitializerLSquare)) {
880 Left->setType(TT_ObjCMethodExpr);
881 StartsObjCMethodExpr = true;
882 Contexts.back().ColonIsObjCMethodExpr = true;
883 if (Parent && Parent->is(tok::r_paren)) {
884 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
885 Parent->setType(TT_CastRParen);
886 }
887 }
888 ColonFound = true;
889 }
890 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
891 !ColonFound) {
892 Left->setType(TT_ArrayInitializerLSquare);
893 }
894 FormatToken *Tok = CurrentToken;
895 if (Style.isTableGen()) {
896 if (CurrentToken->isOneOf(tok::comma, tok::minus, tok::ellipsis)) {
897 // '-' and '...' appears as a separator in slice.
898 next();
899 } else {
900 // In TableGen there must be a list of Values in square brackets.
901 // It must be ValueList or SliceElements.
902 if (!parseTableGenValue())
903 return false;
904 }
905 updateParameterCount(Left, Tok);
906 continue;
907 }
908 if (!consumeToken())
909 return false;
910 updateParameterCount(Left, Tok);
911 }
912 return false;
913 }
914
skipToNextNonComment()915 void skipToNextNonComment() {
916 next();
917 while (CurrentToken && CurrentToken->is(tok::comment))
918 next();
919 }
920
921 // Simplified parser for TableGen Value. Returns true on success.
922 // It consists of SimpleValues, SimpleValues with Suffixes, and Value followed
923 // by '#', paste operator.
924 // There also exists the case the Value is parsed as NameValue.
925 // In this case, the Value ends if '{' is found.
parseTableGenValue(bool ParseNameMode=false)926 bool parseTableGenValue(bool ParseNameMode = false) {
927 if (!CurrentToken)
928 return false;
929 while (CurrentToken->is(tok::comment))
930 next();
931 if (!parseTableGenSimpleValue())
932 return false;
933 if (!CurrentToken)
934 return true;
935 // Value "#" [Value]
936 if (CurrentToken->is(tok::hash)) {
937 if (CurrentToken->Next &&
938 CurrentToken->Next->isOneOf(tok::colon, tok::semi, tok::l_brace)) {
939 // Trailing paste operator.
940 // These are only the allowed cases in TGParser::ParseValue().
941 CurrentToken->setType(TT_TableGenTrailingPasteOperator);
942 next();
943 return true;
944 }
945 FormatToken *HashTok = CurrentToken;
946 skipToNextNonComment();
947 HashTok->setType(TT_Unknown);
948 if (!parseTableGenValue(ParseNameMode))
949 return false;
950 }
951 // In name mode, '{' is regarded as the end of the value.
952 // See TGParser::ParseValue in TGParser.cpp
953 if (ParseNameMode && CurrentToken->is(tok::l_brace))
954 return true;
955 // These tokens indicates this is a value with suffixes.
956 if (CurrentToken->isOneOf(tok::l_brace, tok::l_square, tok::period)) {
957 CurrentToken->setType(TT_TableGenValueSuffix);
958 FormatToken *Suffix = CurrentToken;
959 skipToNextNonComment();
960 if (Suffix->is(tok::l_square))
961 return parseSquare();
962 if (Suffix->is(tok::l_brace)) {
963 Scopes.push_back(getScopeType(*Suffix));
964 return parseBrace();
965 }
966 }
967 return true;
968 }
969
970 // TokVarName ::= "$" ualpha (ualpha | "0"..."9")*
971 // Appears as a part of DagArg.
972 // This does not change the current token on fail.
tryToParseTableGenTokVar()973 bool tryToParseTableGenTokVar() {
974 if (!CurrentToken)
975 return false;
976 if (CurrentToken->is(tok::identifier) &&
977 CurrentToken->TokenText.front() == '$') {
978 skipToNextNonComment();
979 return true;
980 }
981 return false;
982 }
983
984 // DagArg ::= Value [":" TokVarName] | TokVarName
985 // Appears as a part of SimpleValue6.
parseTableGenDAGArg(bool AlignColon=false)986 bool parseTableGenDAGArg(bool AlignColon = false) {
987 if (tryToParseTableGenTokVar())
988 return true;
989 if (parseTableGenValue()) {
990 if (CurrentToken && CurrentToken->is(tok::colon)) {
991 if (AlignColon)
992 CurrentToken->setType(TT_TableGenDAGArgListColonToAlign);
993 else
994 CurrentToken->setType(TT_TableGenDAGArgListColon);
995 skipToNextNonComment();
996 return tryToParseTableGenTokVar();
997 }
998 return true;
999 }
1000 return false;
1001 }
1002
1003 // Judge if the token is a operator ID to insert line break in DAGArg.
1004 // That is, TableGenBreakingDAGArgOperators is empty (by the definition of the
1005 // option) or the token is in the list.
isTableGenDAGArgBreakingOperator(const FormatToken & Tok)1006 bool isTableGenDAGArgBreakingOperator(const FormatToken &Tok) {
1007 auto &Opes = Style.TableGenBreakingDAGArgOperators;
1008 // If the list is empty, all operators are breaking operators.
1009 if (Opes.empty())
1010 return true;
1011 // Otherwise, the operator is limited to normal identifiers.
1012 if (Tok.isNot(tok::identifier) ||
1013 Tok.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
1014 return false;
1015 }
1016 // The case next is colon, it is not a operator of identifier.
1017 if (!Tok.Next || Tok.Next->is(tok::colon))
1018 return false;
1019 return std::find(Opes.begin(), Opes.end(), Tok.TokenText.str()) !=
1020 Opes.end();
1021 }
1022
1023 // SimpleValue6 ::= "(" DagArg [DagArgList] ")"
1024 // This parses SimpleValue 6's inside part of "(" ")"
parseTableGenDAGArgAndList(FormatToken * Opener)1025 bool parseTableGenDAGArgAndList(FormatToken *Opener) {
1026 FormatToken *FirstTok = CurrentToken;
1027 if (!parseTableGenDAGArg())
1028 return false;
1029 bool BreakInside = false;
1030 if (Style.TableGenBreakInsideDAGArg != FormatStyle::DAS_DontBreak) {
1031 // Specialized detection for DAGArgOperator, that determines the way of
1032 // line break for this DAGArg elements.
1033 if (isTableGenDAGArgBreakingOperator(*FirstTok)) {
1034 // Special case for identifier DAGArg operator.
1035 BreakInside = true;
1036 Opener->setType(TT_TableGenDAGArgOpenerToBreak);
1037 if (FirstTok->isOneOf(TT_TableGenBangOperator,
1038 TT_TableGenCondOperator)) {
1039 // Special case for bang/cond operators. Set the whole operator as
1040 // the DAGArg operator. Always break after it.
1041 CurrentToken->Previous->setType(TT_TableGenDAGArgOperatorToBreak);
1042 } else if (FirstTok->is(tok::identifier)) {
1043 if (Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll)
1044 FirstTok->setType(TT_TableGenDAGArgOperatorToBreak);
1045 else
1046 FirstTok->setType(TT_TableGenDAGArgOperatorID);
1047 }
1048 }
1049 }
1050 // Parse the [DagArgList] part
1051 bool FirstDAGArgListElm = true;
1052 while (CurrentToken) {
1053 if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
1054 CurrentToken->setType(BreakInside ? TT_TableGenDAGArgListCommaToBreak
1055 : TT_TableGenDAGArgListComma);
1056 skipToNextNonComment();
1057 }
1058 if (CurrentToken && CurrentToken->is(tok::r_paren)) {
1059 CurrentToken->setType(TT_TableGenDAGArgCloser);
1060 Opener->MatchingParen = CurrentToken;
1061 CurrentToken->MatchingParen = Opener;
1062 skipToNextNonComment();
1063 return true;
1064 }
1065 if (!parseTableGenDAGArg(
1066 BreakInside &&
1067 Style.AlignConsecutiveTableGenBreakingDAGArgColons.Enabled)) {
1068 return false;
1069 }
1070 FirstDAGArgListElm = false;
1071 }
1072 return false;
1073 }
1074
parseTableGenSimpleValue()1075 bool parseTableGenSimpleValue() {
1076 assert(Style.isTableGen());
1077 if (!CurrentToken)
1078 return false;
1079 FormatToken *Tok = CurrentToken;
1080 skipToNextNonComment();
1081 // SimpleValue 1, 2, 3: Literals
1082 if (Tok->isOneOf(tok::numeric_constant, tok::string_literal,
1083 TT_TableGenMultiLineString, tok::kw_true, tok::kw_false,
1084 tok::question, tok::kw_int)) {
1085 return true;
1086 }
1087 // SimpleValue 4: ValueList, Type
1088 if (Tok->is(tok::l_brace)) {
1089 Scopes.push_back(getScopeType(*Tok));
1090 return parseBrace();
1091 }
1092 // SimpleValue 5: List initializer
1093 if (Tok->is(tok::l_square)) {
1094 Tok->setType(TT_TableGenListOpener);
1095 if (!parseSquare())
1096 return false;
1097 if (Tok->is(tok::less)) {
1098 CurrentToken->setType(TT_TemplateOpener);
1099 return parseAngle();
1100 }
1101 return true;
1102 }
1103 // SimpleValue 6: DAGArg [DAGArgList]
1104 // SimpleValue6 ::= "(" DagArg [DagArgList] ")"
1105 if (Tok->is(tok::l_paren)) {
1106 Tok->setType(TT_TableGenDAGArgOpener);
1107 return parseTableGenDAGArgAndList(Tok);
1108 }
1109 // SimpleValue 9: Bang operator
1110 if (Tok->is(TT_TableGenBangOperator)) {
1111 if (CurrentToken && CurrentToken->is(tok::less)) {
1112 CurrentToken->setType(TT_TemplateOpener);
1113 skipToNextNonComment();
1114 if (!parseAngle())
1115 return false;
1116 }
1117 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1118 return false;
1119 skipToNextNonComment();
1120 // FIXME: Hack using inheritance to child context
1121 Contexts.back().IsTableGenBangOpe = true;
1122 bool Result = parseParens();
1123 Contexts.back().IsTableGenBangOpe = false;
1124 return Result;
1125 }
1126 // SimpleValue 9: Cond operator
1127 if (Tok->is(TT_TableGenCondOperator)) {
1128 Tok = CurrentToken;
1129 skipToNextNonComment();
1130 if (!Tok || Tok->isNot(tok::l_paren))
1131 return false;
1132 bool Result = parseParens();
1133 return Result;
1134 }
1135 // We have to check identifier at the last because the kind of bang/cond
1136 // operators are also identifier.
1137 // SimpleValue 7: Identifiers
1138 if (Tok->is(tok::identifier)) {
1139 // SimpleValue 8: Anonymous record
1140 if (CurrentToken && CurrentToken->is(tok::less)) {
1141 CurrentToken->setType(TT_TemplateOpener);
1142 skipToNextNonComment();
1143 return parseAngle();
1144 }
1145 return true;
1146 }
1147
1148 return false;
1149 }
1150
couldBeInStructArrayInitializer() const1151 bool couldBeInStructArrayInitializer() const {
1152 if (Contexts.size() < 2)
1153 return false;
1154 // We want to back up no more then 2 context levels i.e.
1155 // . { { <-
1156 const auto End = std::next(Contexts.rbegin(), 2);
1157 auto Last = Contexts.rbegin();
1158 unsigned Depth = 0;
1159 for (; Last != End; ++Last)
1160 if (Last->ContextKind == tok::l_brace)
1161 ++Depth;
1162 return Depth == 2 && Last->ContextKind != tok::l_brace;
1163 }
1164
parseBrace()1165 bool parseBrace() {
1166 if (!CurrentToken)
1167 return true;
1168
1169 assert(CurrentToken->Previous);
1170 FormatToken &OpeningBrace = *CurrentToken->Previous;
1171 assert(OpeningBrace.is(tok::l_brace));
1172 OpeningBrace.ParentBracket = Contexts.back().ContextKind;
1173
1174 if (Contexts.back().CaretFound)
1175 OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
1176 Contexts.back().CaretFound = false;
1177
1178 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
1179 Contexts.back().ColonIsDictLiteral = true;
1180 if (OpeningBrace.is(BK_BracedInit))
1181 Contexts.back().IsExpression = true;
1182 if (Style.isJavaScript() && OpeningBrace.Previous &&
1183 OpeningBrace.Previous->is(TT_JsTypeColon)) {
1184 Contexts.back().IsExpression = false;
1185 }
1186 if (Style.isVerilog() &&
1187 (!OpeningBrace.getPreviousNonComment() ||
1188 OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
1189 Contexts.back().VerilogMayBeConcatenation = true;
1190 }
1191 if (Style.isTableGen())
1192 Contexts.back().ColonIsDictLiteral = false;
1193
1194 unsigned CommaCount = 0;
1195 while (CurrentToken) {
1196 if (CurrentToken->is(tok::r_brace)) {
1197 assert(!Scopes.empty());
1198 assert(Scopes.back() == getScopeType(OpeningBrace));
1199 Scopes.pop_back();
1200 assert(OpeningBrace.Optional == CurrentToken->Optional);
1201 OpeningBrace.MatchingParen = CurrentToken;
1202 CurrentToken->MatchingParen = &OpeningBrace;
1203 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1204 if (OpeningBrace.ParentBracket == tok::l_brace &&
1205 couldBeInStructArrayInitializer() && CommaCount > 0) {
1206 Contexts.back().ContextType = Context::StructArrayInitializer;
1207 }
1208 }
1209 next();
1210 return true;
1211 }
1212 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
1213 return false;
1214 updateParameterCount(&OpeningBrace, CurrentToken);
1215 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
1216 FormatToken *Previous = CurrentToken->getPreviousNonComment();
1217 if (Previous->is(TT_JsTypeOptionalQuestion))
1218 Previous = Previous->getPreviousNonComment();
1219 if ((CurrentToken->is(tok::colon) && !Style.isTableGen() &&
1220 (!Contexts.back().ColonIsDictLiteral || !IsCpp)) ||
1221 Style.isProto()) {
1222 OpeningBrace.setType(TT_DictLiteral);
1223 if (Previous->Tok.getIdentifierInfo() ||
1224 Previous->is(tok::string_literal)) {
1225 Previous->setType(TT_SelectorName);
1226 }
1227 }
1228 if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown) &&
1229 !Style.isTableGen()) {
1230 OpeningBrace.setType(TT_DictLiteral);
1231 } else if (Style.isJavaScript()) {
1232 OpeningBrace.overwriteFixedType(TT_DictLiteral);
1233 }
1234 }
1235 if (CurrentToken->is(tok::comma)) {
1236 if (Style.isJavaScript())
1237 OpeningBrace.overwriteFixedType(TT_DictLiteral);
1238 ++CommaCount;
1239 }
1240 if (!consumeToken())
1241 return false;
1242 }
1243 return true;
1244 }
1245
updateParameterCount(FormatToken * Left,FormatToken * Current)1246 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
1247 // For ObjC methods, the number of parameters is calculated differently as
1248 // method declarations have a different structure (the parameters are not
1249 // inside a bracket scope).
1250 if (Current->is(tok::l_brace) && Current->is(BK_Block))
1251 ++Left->BlockParameterCount;
1252 if (Current->is(tok::comma)) {
1253 ++Left->ParameterCount;
1254 if (!Left->Role)
1255 Left->Role.reset(new CommaSeparatedList(Style));
1256 Left->Role->CommaFound(Current);
1257 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
1258 Left->ParameterCount = 1;
1259 }
1260 }
1261
parseConditional()1262 bool parseConditional() {
1263 while (CurrentToken) {
1264 if (CurrentToken->is(tok::colon)) {
1265 CurrentToken->setType(TT_ConditionalExpr);
1266 next();
1267 return true;
1268 }
1269 if (!consumeToken())
1270 return false;
1271 }
1272 return false;
1273 }
1274
parseTemplateDeclaration()1275 bool parseTemplateDeclaration() {
1276 if (!CurrentToken || CurrentToken->isNot(tok::less))
1277 return false;
1278
1279 CurrentToken->setType(TT_TemplateOpener);
1280 next();
1281
1282 TemplateDeclarationDepth++;
1283 const bool WellFormed = parseAngle();
1284 TemplateDeclarationDepth--;
1285 if (!WellFormed)
1286 return false;
1287
1288 if (CurrentToken && TemplateDeclarationDepth == 0)
1289 CurrentToken->Previous->ClosesTemplateDeclaration = true;
1290
1291 return true;
1292 }
1293
consumeToken()1294 bool consumeToken() {
1295 if (IsCpp) {
1296 const auto *Prev = CurrentToken->getPreviousNonComment();
1297 if (Prev && Prev->is(tok::r_square) && Prev->is(TT_AttributeSquare) &&
1298 CurrentToken->isOneOf(tok::kw_if, tok::kw_switch, tok::kw_case,
1299 tok::kw_default, tok::kw_for, tok::kw_while) &&
1300 mustBreakAfterAttributes(*CurrentToken, Style)) {
1301 CurrentToken->MustBreakBefore = true;
1302 }
1303 }
1304 FormatToken *Tok = CurrentToken;
1305 next();
1306 // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
1307 // operators.
1308 if (Tok->is(TT_VerilogTableItem))
1309 return true;
1310 // Multi-line string itself is a single annotated token.
1311 if (Tok->is(TT_TableGenMultiLineString))
1312 return true;
1313 switch (Tok->Tok.getKind()) {
1314 case tok::plus:
1315 case tok::minus:
1316 if (!Tok->Previous && Line.MustBeDeclaration)
1317 Tok->setType(TT_ObjCMethodSpecifier);
1318 break;
1319 case tok::colon:
1320 if (!Tok->Previous)
1321 return false;
1322 // Goto labels and case labels are already identified in
1323 // UnwrappedLineParser.
1324 if (Tok->isTypeFinalized())
1325 break;
1326 // Colons from ?: are handled in parseConditional().
1327 if (Style.isJavaScript()) {
1328 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
1329 (Contexts.size() == 1 && // switch/case labels
1330 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
1331 Contexts.back().ContextKind == tok::l_paren || // function params
1332 Contexts.back().ContextKind == tok::l_square || // array type
1333 (!Contexts.back().IsExpression &&
1334 Contexts.back().ContextKind == tok::l_brace) || // object type
1335 (Contexts.size() == 1 &&
1336 Line.MustBeDeclaration)) { // method/property declaration
1337 Contexts.back().IsExpression = false;
1338 Tok->setType(TT_JsTypeColon);
1339 break;
1340 }
1341 } else if (Style.isCSharp()) {
1342 if (Contexts.back().InCSharpAttributeSpecifier) {
1343 Tok->setType(TT_AttributeColon);
1344 break;
1345 }
1346 if (Contexts.back().ContextKind == tok::l_paren) {
1347 Tok->setType(TT_CSharpNamedArgumentColon);
1348 break;
1349 }
1350 } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
1351 // The distribution weight operators are labeled
1352 // TT_BinaryOperator by the lexer.
1353 if (Keywords.isVerilogEnd(*Tok->Previous) ||
1354 Keywords.isVerilogBegin(*Tok->Previous)) {
1355 Tok->setType(TT_VerilogBlockLabelColon);
1356 } else if (Contexts.back().ContextKind == tok::l_square) {
1357 Tok->setType(TT_BitFieldColon);
1358 } else if (Contexts.back().ColonIsDictLiteral) {
1359 Tok->setType(TT_DictLiteral);
1360 } else if (Contexts.size() == 1) {
1361 // In Verilog a case label doesn't have the case keyword. We
1362 // assume a colon following an expression is a case label.
1363 // Colons from ?: are annotated in parseConditional().
1364 Tok->setType(TT_CaseLabelColon);
1365 if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
1366 --Line.Level;
1367 }
1368 break;
1369 }
1370 if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
1371 Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
1372 Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
1373 Tok->setType(TT_ModulePartitionColon);
1374 } else if (Line.First->is(tok::kw_asm)) {
1375 Tok->setType(TT_InlineASMColon);
1376 } else if (Contexts.back().ColonIsDictLiteral || Style.isProto()) {
1377 Tok->setType(TT_DictLiteral);
1378 if (Style.Language == FormatStyle::LK_TextProto) {
1379 if (FormatToken *Previous = Tok->getPreviousNonComment())
1380 Previous->setType(TT_SelectorName);
1381 }
1382 } else if (Contexts.back().ColonIsObjCMethodExpr ||
1383 Line.startsWith(TT_ObjCMethodSpecifier)) {
1384 Tok->setType(TT_ObjCMethodExpr);
1385 const FormatToken *BeforePrevious = Tok->Previous->Previous;
1386 // Ensure we tag all identifiers in method declarations as
1387 // TT_SelectorName.
1388 bool UnknownIdentifierInMethodDeclaration =
1389 Line.startsWith(TT_ObjCMethodSpecifier) &&
1390 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1391 if (!BeforePrevious ||
1392 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1393 !(BeforePrevious->is(TT_CastRParen) ||
1394 (BeforePrevious->is(TT_ObjCMethodExpr) &&
1395 BeforePrevious->is(tok::colon))) ||
1396 BeforePrevious->is(tok::r_square) ||
1397 Contexts.back().LongestObjCSelectorName == 0 ||
1398 UnknownIdentifierInMethodDeclaration) {
1399 Tok->Previous->setType(TT_SelectorName);
1400 if (!Contexts.back().FirstObjCSelectorName) {
1401 Contexts.back().FirstObjCSelectorName = Tok->Previous;
1402 } else if (Tok->Previous->ColumnWidth >
1403 Contexts.back().LongestObjCSelectorName) {
1404 Contexts.back().LongestObjCSelectorName =
1405 Tok->Previous->ColumnWidth;
1406 }
1407 Tok->Previous->ParameterIndex =
1408 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1409 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1410 }
1411 } else if (Contexts.back().ColonIsForRangeExpr) {
1412 Tok->setType(TT_RangeBasedForLoopColon);
1413 } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1414 Tok->setType(TT_GenericSelectionColon);
1415 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1416 Tok->setType(TT_BitFieldColon);
1417 } else if (Contexts.size() == 1 &&
1418 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1419 tok::kw_default)) {
1420 FormatToken *Prev = Tok->getPreviousNonComment();
1421 if (!Prev)
1422 break;
1423 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1424 Prev->ClosesRequiresClause) {
1425 Tok->setType(TT_CtorInitializerColon);
1426 } else if (Prev->is(tok::kw_try)) {
1427 // Member initializer list within function try block.
1428 FormatToken *PrevPrev = Prev->getPreviousNonComment();
1429 if (!PrevPrev)
1430 break;
1431 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1432 Tok->setType(TT_CtorInitializerColon);
1433 } else {
1434 Tok->setType(TT_InheritanceColon);
1435 if (Prev->isAccessSpecifierKeyword())
1436 Line.Type = LT_AccessModifier;
1437 }
1438 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1439 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1440 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1441 Tok->Next->Next->is(tok::colon)))) {
1442 // This handles a special macro in ObjC code where selectors including
1443 // the colon are passed as macro arguments.
1444 Tok->setType(TT_ObjCMethodExpr);
1445 }
1446 break;
1447 case tok::pipe:
1448 case tok::amp:
1449 // | and & in declarations/type expressions represent union and
1450 // intersection types, respectively.
1451 if (Style.isJavaScript() && !Contexts.back().IsExpression)
1452 Tok->setType(TT_JsTypeOperator);
1453 break;
1454 case tok::kw_if:
1455 if (Style.isTableGen()) {
1456 // In TableGen it has the form 'if' <value> 'then'.
1457 if (!parseTableGenValue())
1458 return false;
1459 if (CurrentToken && CurrentToken->is(Keywords.kw_then))
1460 next(); // skip then
1461 break;
1462 }
1463 if (CurrentToken &&
1464 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1465 next();
1466 }
1467 [[fallthrough]];
1468 case tok::kw_while:
1469 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1470 next();
1471 if (!parseParens(/*LookForDecls=*/true))
1472 return false;
1473 }
1474 break;
1475 case tok::kw_for:
1476 if (Style.isJavaScript()) {
1477 // x.for and {for: ...}
1478 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1479 (Tok->Next && Tok->Next->is(tok::colon))) {
1480 break;
1481 }
1482 // JS' for await ( ...
1483 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1484 next();
1485 }
1486 if (IsCpp && CurrentToken && CurrentToken->is(tok::kw_co_await))
1487 next();
1488 Contexts.back().ColonIsForRangeExpr = true;
1489 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1490 return false;
1491 next();
1492 if (!parseParens())
1493 return false;
1494 break;
1495 case tok::l_paren:
1496 // When faced with 'operator()()', the kw_operator handler incorrectly
1497 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1498 // the first two parens OverloadedOperators and the second l_paren an
1499 // OverloadedOperatorLParen.
1500 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1501 Tok->Previous->MatchingParen &&
1502 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1503 Tok->Previous->setType(TT_OverloadedOperator);
1504 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1505 Tok->setType(TT_OverloadedOperatorLParen);
1506 }
1507
1508 if (Style.isVerilog()) {
1509 // Identify the parameter list and port list in a module instantiation.
1510 // This is still needed when we already have
1511 // UnwrappedLineParser::parseVerilogHierarchyHeader because that
1512 // function is only responsible for the definition, not the
1513 // instantiation.
1514 auto IsInstancePort = [&]() {
1515 const FormatToken *Prev = Tok->getPreviousNonComment();
1516 const FormatToken *PrevPrev;
1517 // In the following example all 4 left parentheses will be treated as
1518 // 'TT_VerilogInstancePortLParen'.
1519 //
1520 // module_x instance_1(port_1); // Case A.
1521 // module_x #(parameter_1) // Case B.
1522 // instance_2(port_1), // Case C.
1523 // instance_3(port_1); // Case D.
1524 if (!Prev || !(PrevPrev = Prev->getPreviousNonComment()))
1525 return false;
1526 // Case A.
1527 if (Keywords.isVerilogIdentifier(*Prev) &&
1528 Keywords.isVerilogIdentifier(*PrevPrev)) {
1529 return true;
1530 }
1531 // Case B.
1532 if (Prev->is(Keywords.kw_verilogHash) &&
1533 Keywords.isVerilogIdentifier(*PrevPrev)) {
1534 return true;
1535 }
1536 // Case C.
1537 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::r_paren))
1538 return true;
1539 // Case D.
1540 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::comma)) {
1541 const FormatToken *PrevParen = PrevPrev->getPreviousNonComment();
1542 if (PrevParen->is(tok::r_paren) && PrevParen->MatchingParen &&
1543 PrevParen->MatchingParen->is(TT_VerilogInstancePortLParen)) {
1544 return true;
1545 }
1546 }
1547 return false;
1548 };
1549
1550 if (IsInstancePort())
1551 Tok->setFinalizedType(TT_VerilogInstancePortLParen);
1552 }
1553
1554 if (!parseParens())
1555 return false;
1556 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1557 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1558 !Line.startsWith(tok::l_paren) &&
1559 !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
1560 if (const auto *Previous = Tok->Previous;
1561 !Previous ||
1562 (!Previous->isAttribute() &&
1563 !Previous->isOneOf(TT_RequiresClause, TT_LeadingJavaAnnotation))) {
1564 Line.MightBeFunctionDecl = true;
1565 Tok->MightBeFunctionDeclParen = true;
1566 }
1567 }
1568 break;
1569 case tok::l_square:
1570 if (Style.isTableGen())
1571 Tok->setType(TT_TableGenListOpener);
1572 if (!parseSquare())
1573 return false;
1574 break;
1575 case tok::l_brace:
1576 if (Style.Language == FormatStyle::LK_TextProto) {
1577 FormatToken *Previous = Tok->getPreviousNonComment();
1578 if (Previous && Previous->isNot(TT_DictLiteral))
1579 Previous->setType(TT_SelectorName);
1580 }
1581 Scopes.push_back(getScopeType(*Tok));
1582 if (!parseBrace())
1583 return false;
1584 break;
1585 case tok::less:
1586 if (parseAngle()) {
1587 Tok->setType(TT_TemplateOpener);
1588 // In TT_Proto, we must distignuish between:
1589 // map<key, value>
1590 // msg < item: data >
1591 // msg: < item: data >
1592 // In TT_TextProto, map<key, value> does not occur.
1593 if (Style.Language == FormatStyle::LK_TextProto ||
1594 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1595 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1596 Tok->setType(TT_DictLiteral);
1597 FormatToken *Previous = Tok->getPreviousNonComment();
1598 if (Previous && Previous->isNot(TT_DictLiteral))
1599 Previous->setType(TT_SelectorName);
1600 }
1601 if (Style.isTableGen())
1602 Tok->setType(TT_TemplateOpener);
1603 } else {
1604 Tok->setType(TT_BinaryOperator);
1605 NonTemplateLess.insert(Tok);
1606 CurrentToken = Tok;
1607 next();
1608 }
1609 break;
1610 case tok::r_paren:
1611 case tok::r_square:
1612 return false;
1613 case tok::r_brace:
1614 // Don't pop scope when encountering unbalanced r_brace.
1615 if (!Scopes.empty())
1616 Scopes.pop_back();
1617 // Lines can start with '}'.
1618 if (Tok->Previous)
1619 return false;
1620 break;
1621 case tok::greater:
1622 if (Style.Language != FormatStyle::LK_TextProto && Tok->is(TT_Unknown))
1623 Tok->setType(TT_BinaryOperator);
1624 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1625 Tok->SpacesRequiredBefore = 1;
1626 break;
1627 case tok::kw_operator:
1628 if (Style.isProto())
1629 break;
1630 while (CurrentToken &&
1631 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1632 if (CurrentToken->isOneOf(tok::star, tok::amp))
1633 CurrentToken->setType(TT_PointerOrReference);
1634 auto Next = CurrentToken->getNextNonComment();
1635 if (!Next)
1636 break;
1637 if (Next->is(tok::less))
1638 next();
1639 else
1640 consumeToken();
1641 if (!CurrentToken)
1642 break;
1643 auto Previous = CurrentToken->getPreviousNonComment();
1644 assert(Previous);
1645 if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator))
1646 break;
1647 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1648 tok::star, tok::arrow, tok::amp, tok::ampamp) ||
1649 // User defined literal.
1650 Previous->TokenText.starts_with("\"\"")) {
1651 Previous->setType(TT_OverloadedOperator);
1652 if (CurrentToken->isOneOf(tok::less, tok::greater))
1653 break;
1654 }
1655 }
1656 if (CurrentToken && CurrentToken->is(tok::l_paren))
1657 CurrentToken->setType(TT_OverloadedOperatorLParen);
1658 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1659 CurrentToken->Previous->setType(TT_OverloadedOperator);
1660 break;
1661 case tok::question:
1662 if (Style.isJavaScript() && Tok->Next &&
1663 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1664 tok::r_brace, tok::r_square)) {
1665 // Question marks before semicolons, colons, etc. indicate optional
1666 // types (fields, parameters), e.g.
1667 // function(x?: string, y?) {...}
1668 // class X { y?; }
1669 Tok->setType(TT_JsTypeOptionalQuestion);
1670 break;
1671 }
1672 // Declarations cannot be conditional expressions, this can only be part
1673 // of a type declaration.
1674 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1675 Style.isJavaScript()) {
1676 break;
1677 }
1678 if (Style.isCSharp()) {
1679 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1680 // nullable types.
1681
1682 // `Type?)`, `Type?>`, `Type? name;`
1683 if (Tok->Next &&
1684 (Tok->Next->startsSequence(tok::question, tok::r_paren) ||
1685 Tok->Next->startsSequence(tok::question, tok::greater) ||
1686 Tok->Next->startsSequence(tok::question, tok::identifier,
1687 tok::semi))) {
1688 Tok->setType(TT_CSharpNullable);
1689 break;
1690 }
1691
1692 // `Type? name =`
1693 if (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1694 Tok->Next->Next->is(tok::equal)) {
1695 Tok->setType(TT_CSharpNullable);
1696 break;
1697 }
1698
1699 // Line.MustBeDeclaration will be true for `Type? name;`.
1700 // But not
1701 // cond ? "A" : "B";
1702 // cond ? id : "B";
1703 // cond ? cond2 ? "A" : "B" : "C";
1704 if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
1705 (!Tok->Next ||
1706 !Tok->Next->isOneOf(tok::identifier, tok::string_literal) ||
1707 !Tok->Next->Next ||
1708 !Tok->Next->Next->isOneOf(tok::colon, tok::question))) {
1709 Tok->setType(TT_CSharpNullable);
1710 break;
1711 }
1712 }
1713 parseConditional();
1714 break;
1715 case tok::kw_template:
1716 parseTemplateDeclaration();
1717 break;
1718 case tok::comma:
1719 switch (Contexts.back().ContextType) {
1720 case Context::CtorInitializer:
1721 Tok->setType(TT_CtorInitializerComma);
1722 break;
1723 case Context::InheritanceList:
1724 Tok->setType(TT_InheritanceComma);
1725 break;
1726 case Context::VerilogInstancePortList:
1727 Tok->setFinalizedType(TT_VerilogInstancePortComma);
1728 break;
1729 default:
1730 if (Style.isVerilog() && Contexts.size() == 1 &&
1731 Line.startsWith(Keywords.kw_assign)) {
1732 Tok->setFinalizedType(TT_VerilogAssignComma);
1733 } else if (Contexts.back().FirstStartOfName &&
1734 (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1735 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1736 Line.IsMultiVariableDeclStmt = true;
1737 }
1738 break;
1739 }
1740 if (Contexts.back().ContextType == Context::ForEachMacro)
1741 Contexts.back().IsExpression = true;
1742 break;
1743 case tok::kw_default:
1744 // Unindent case labels.
1745 if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1746 (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1747 --Line.Level;
1748 }
1749 break;
1750 case tok::identifier:
1751 if (Tok->isOneOf(Keywords.kw___has_include,
1752 Keywords.kw___has_include_next)) {
1753 parseHasInclude();
1754 }
1755 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1756 Tok->Next->isNot(tok::l_paren)) {
1757 Tok->setType(TT_CSharpGenericTypeConstraint);
1758 parseCSharpGenericTypeConstraint();
1759 if (!Tok->getPreviousNonComment())
1760 Line.IsContinuation = true;
1761 }
1762 if (Style.isTableGen()) {
1763 if (Tok->is(Keywords.kw_assert)) {
1764 if (!parseTableGenValue())
1765 return false;
1766 } else if (Tok->isOneOf(Keywords.kw_def, Keywords.kw_defm) &&
1767 (!Tok->Next ||
1768 !Tok->Next->isOneOf(tok::colon, tok::l_brace))) {
1769 // The case NameValue appears.
1770 if (!parseTableGenValue(true))
1771 return false;
1772 }
1773 }
1774 break;
1775 case tok::arrow:
1776 if (Tok->isNot(TT_LambdaArrow) && Tok->Previous &&
1777 Tok->Previous->is(tok::kw_noexcept)) {
1778 Tok->setType(TT_TrailingReturnArrow);
1779 }
1780 break;
1781 case tok::equal:
1782 // In TableGen, there must be a value after "=";
1783 if (Style.isTableGen() && !parseTableGenValue())
1784 return false;
1785 break;
1786 default:
1787 break;
1788 }
1789 return true;
1790 }
1791
parseCSharpGenericTypeConstraint()1792 void parseCSharpGenericTypeConstraint() {
1793 int OpenAngleBracketsCount = 0;
1794 while (CurrentToken) {
1795 if (CurrentToken->is(tok::less)) {
1796 // parseAngle is too greedy and will consume the whole line.
1797 CurrentToken->setType(TT_TemplateOpener);
1798 ++OpenAngleBracketsCount;
1799 next();
1800 } else if (CurrentToken->is(tok::greater)) {
1801 CurrentToken->setType(TT_TemplateCloser);
1802 --OpenAngleBracketsCount;
1803 next();
1804 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1805 // We allow line breaks after GenericTypeConstraintComma's
1806 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1807 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1808 next();
1809 } else if (CurrentToken->is(Keywords.kw_where)) {
1810 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1811 next();
1812 } else if (CurrentToken->is(tok::colon)) {
1813 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1814 next();
1815 } else {
1816 next();
1817 }
1818 }
1819 }
1820
parseIncludeDirective()1821 void parseIncludeDirective() {
1822 if (CurrentToken && CurrentToken->is(tok::less)) {
1823 next();
1824 while (CurrentToken) {
1825 // Mark tokens up to the trailing line comments as implicit string
1826 // literals.
1827 if (CurrentToken->isNot(tok::comment) &&
1828 !CurrentToken->TokenText.starts_with("//")) {
1829 CurrentToken->setType(TT_ImplicitStringLiteral);
1830 }
1831 next();
1832 }
1833 }
1834 }
1835
parseWarningOrError()1836 void parseWarningOrError() {
1837 next();
1838 // We still want to format the whitespace left of the first token of the
1839 // warning or error.
1840 next();
1841 while (CurrentToken) {
1842 CurrentToken->setType(TT_ImplicitStringLiteral);
1843 next();
1844 }
1845 }
1846
parsePragma()1847 void parsePragma() {
1848 next(); // Consume "pragma".
1849 if (CurrentToken &&
1850 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1851 Keywords.kw_region)) {
1852 bool IsMarkOrRegion =
1853 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1854 next();
1855 next(); // Consume first token (so we fix leading whitespace).
1856 while (CurrentToken) {
1857 if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1858 CurrentToken->setType(TT_ImplicitStringLiteral);
1859 next();
1860 }
1861 }
1862 }
1863
parseHasInclude()1864 void parseHasInclude() {
1865 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1866 return;
1867 next(); // '('
1868 parseIncludeDirective();
1869 next(); // ')'
1870 }
1871
parsePreprocessorDirective()1872 LineType parsePreprocessorDirective() {
1873 bool IsFirstToken = CurrentToken->IsFirst;
1874 LineType Type = LT_PreprocessorDirective;
1875 next();
1876 if (!CurrentToken)
1877 return Type;
1878
1879 if (Style.isJavaScript() && IsFirstToken) {
1880 // JavaScript files can contain shebang lines of the form:
1881 // #!/usr/bin/env node
1882 // Treat these like C++ #include directives.
1883 while (CurrentToken) {
1884 // Tokens cannot be comments here.
1885 CurrentToken->setType(TT_ImplicitStringLiteral);
1886 next();
1887 }
1888 return LT_ImportStatement;
1889 }
1890
1891 if (CurrentToken->is(tok::numeric_constant)) {
1892 CurrentToken->SpacesRequiredBefore = 1;
1893 return Type;
1894 }
1895 // Hashes in the middle of a line can lead to any strange token
1896 // sequence.
1897 if (!CurrentToken->Tok.getIdentifierInfo())
1898 return Type;
1899 // In Verilog macro expansions start with a backtick just like preprocessor
1900 // directives. Thus we stop if the word is not a preprocessor directive.
1901 if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1902 return LT_Invalid;
1903 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1904 case tok::pp_include:
1905 case tok::pp_include_next:
1906 case tok::pp_import:
1907 next();
1908 parseIncludeDirective();
1909 Type = LT_ImportStatement;
1910 break;
1911 case tok::pp_error:
1912 case tok::pp_warning:
1913 parseWarningOrError();
1914 break;
1915 case tok::pp_pragma:
1916 parsePragma();
1917 break;
1918 case tok::pp_if:
1919 case tok::pp_elif:
1920 Contexts.back().IsExpression = true;
1921 next();
1922 if (CurrentToken)
1923 CurrentToken->SpacesRequiredBefore = true;
1924 parseLine();
1925 break;
1926 default:
1927 break;
1928 }
1929 while (CurrentToken) {
1930 FormatToken *Tok = CurrentToken;
1931 next();
1932 if (Tok->is(tok::l_paren)) {
1933 parseParens();
1934 } else if (Tok->isOneOf(Keywords.kw___has_include,
1935 Keywords.kw___has_include_next)) {
1936 parseHasInclude();
1937 }
1938 }
1939 return Type;
1940 }
1941
1942 public:
parseLine()1943 LineType parseLine() {
1944 if (!CurrentToken)
1945 return LT_Invalid;
1946 NonTemplateLess.clear();
1947 if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1948 // We were not yet allowed to use C++17 optional when this was being
1949 // written. So we used LT_Invalid to mark that the line is not a
1950 // preprocessor directive.
1951 auto Type = parsePreprocessorDirective();
1952 if (Type != LT_Invalid)
1953 return Type;
1954 }
1955
1956 // Directly allow to 'import <string-literal>' to support protocol buffer
1957 // definitions (github.com/google/protobuf) or missing "#" (either way we
1958 // should not break the line).
1959 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1960 if ((Style.Language == FormatStyle::LK_Java &&
1961 CurrentToken->is(Keywords.kw_package)) ||
1962 (!Style.isVerilog() && Info &&
1963 Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1964 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1965 tok::kw_static))) {
1966 next();
1967 parseIncludeDirective();
1968 return LT_ImportStatement;
1969 }
1970
1971 // If this line starts and ends in '<' and '>', respectively, it is likely
1972 // part of "#define <a/b.h>".
1973 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1974 parseIncludeDirective();
1975 return LT_ImportStatement;
1976 }
1977
1978 // In .proto files, top-level options and package statements are very
1979 // similar to import statements and should not be line-wrapped.
1980 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1981 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1982 next();
1983 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1984 while (CurrentToken)
1985 next();
1986 return LT_ImportStatement;
1987 }
1988 }
1989
1990 bool KeywordVirtualFound = false;
1991 bool ImportStatement = false;
1992
1993 // import {...} from '...';
1994 if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1995 ImportStatement = true;
1996
1997 while (CurrentToken) {
1998 if (CurrentToken->is(tok::kw_virtual))
1999 KeywordVirtualFound = true;
2000 if (Style.isJavaScript()) {
2001 // export {...} from '...';
2002 // An export followed by "from 'some string';" is a re-export from
2003 // another module identified by a URI and is treated as a
2004 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
2005 // Just "export {...};" or "export class ..." should not be treated as
2006 // an import in this sense.
2007 if (Line.First->is(tok::kw_export) &&
2008 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
2009 CurrentToken->Next->isStringLiteral()) {
2010 ImportStatement = true;
2011 }
2012 if (isClosureImportStatement(*CurrentToken))
2013 ImportStatement = true;
2014 }
2015 if (!consumeToken())
2016 return LT_Invalid;
2017 }
2018 if (Line.Type == LT_AccessModifier)
2019 return LT_AccessModifier;
2020 if (KeywordVirtualFound)
2021 return LT_VirtualFunctionDecl;
2022 if (ImportStatement)
2023 return LT_ImportStatement;
2024
2025 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
2026 if (Contexts.back().FirstObjCSelectorName) {
2027 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
2028 Contexts.back().LongestObjCSelectorName;
2029 }
2030 return LT_ObjCMethodDecl;
2031 }
2032
2033 for (const auto &ctx : Contexts)
2034 if (ctx.ContextType == Context::StructArrayInitializer)
2035 return LT_ArrayOfStructInitializer;
2036
2037 return LT_Other;
2038 }
2039
2040 private:
isClosureImportStatement(const FormatToken & Tok)2041 bool isClosureImportStatement(const FormatToken &Tok) {
2042 // FIXME: Closure-library specific stuff should not be hard-coded but be
2043 // configurable.
2044 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
2045 Tok.Next->Next &&
2046 (Tok.Next->Next->TokenText == "module" ||
2047 Tok.Next->Next->TokenText == "provide" ||
2048 Tok.Next->Next->TokenText == "require" ||
2049 Tok.Next->Next->TokenText == "requireType" ||
2050 Tok.Next->Next->TokenText == "forwardDeclare") &&
2051 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
2052 }
2053
resetTokenMetadata()2054 void resetTokenMetadata() {
2055 if (!CurrentToken)
2056 return;
2057
2058 // Reset token type in case we have already looked at it and then
2059 // recovered from an error (e.g. failure to find the matching >).
2060 if (!CurrentToken->isTypeFinalized() &&
2061 !CurrentToken->isOneOf(
2062 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
2063 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
2064 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
2065 TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
2066 TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
2067 TT_UntouchableMacroFunc, TT_StatementAttributeLikeMacro,
2068 TT_FunctionLikeOrFreestandingMacro, TT_ClassLBrace, TT_EnumLBrace,
2069 TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause,
2070 TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
2071 TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
2072 TT_BracedListLBrace)) {
2073 CurrentToken->setType(TT_Unknown);
2074 }
2075 CurrentToken->Role.reset();
2076 CurrentToken->MatchingParen = nullptr;
2077 CurrentToken->FakeLParens.clear();
2078 CurrentToken->FakeRParens = 0;
2079 }
2080
next()2081 void next() {
2082 if (!CurrentToken)
2083 return;
2084
2085 CurrentToken->NestingLevel = Contexts.size() - 1;
2086 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
2087 modifyContext(*CurrentToken);
2088 determineTokenType(*CurrentToken);
2089 CurrentToken = CurrentToken->Next;
2090
2091 resetTokenMetadata();
2092 }
2093
2094 /// A struct to hold information valid in a specific context, e.g.
2095 /// a pair of parenthesis.
2096 struct Context {
Contextclang::format::__anonadd98d890111::AnnotatingParser::Context2097 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
2098 bool IsExpression)
2099 : ContextKind(ContextKind), BindingStrength(BindingStrength),
2100 IsExpression(IsExpression) {}
2101
2102 tok::TokenKind ContextKind;
2103 unsigned BindingStrength;
2104 bool IsExpression;
2105 unsigned LongestObjCSelectorName = 0;
2106 bool ColonIsForRangeExpr = false;
2107 bool ColonIsDictLiteral = false;
2108 bool ColonIsObjCMethodExpr = false;
2109 FormatToken *FirstObjCSelectorName = nullptr;
2110 FormatToken *FirstStartOfName = nullptr;
2111 bool CanBeExpression = true;
2112 bool CaretFound = false;
2113 bool InCpp11AttributeSpecifier = false;
2114 bool InCSharpAttributeSpecifier = false;
2115 bool VerilogAssignmentFound = false;
2116 // Whether the braces may mean concatenation instead of structure or array
2117 // literal.
2118 bool VerilogMayBeConcatenation = false;
2119 bool IsTableGenDAGArg = false;
2120 bool IsTableGenBangOpe = false;
2121 bool IsTableGenCondOpe = false;
2122 enum {
2123 Unknown,
2124 // Like the part after `:` in a constructor.
2125 // Context(...) : IsExpression(IsExpression)
2126 CtorInitializer,
2127 // Like in the parentheses in a foreach.
2128 ForEachMacro,
2129 // Like the inheritance list in a class declaration.
2130 // class Input : public IO
2131 InheritanceList,
2132 // Like in the braced list.
2133 // int x[] = {};
2134 StructArrayInitializer,
2135 // Like in `static_cast<int>`.
2136 TemplateArgument,
2137 // C11 _Generic selection.
2138 C11GenericSelection,
2139 // Like in the outer parentheses in `ffnand ff1(.q());`.
2140 VerilogInstancePortList,
2141 } ContextType = Unknown;
2142 };
2143
2144 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
2145 /// of each instance.
2146 struct ScopedContextCreator {
2147 AnnotatingParser &P;
2148
ScopedContextCreatorclang::format::__anonadd98d890111::AnnotatingParser::ScopedContextCreator2149 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
2150 unsigned Increase)
2151 : P(P) {
2152 P.Contexts.push_back(Context(ContextKind,
2153 P.Contexts.back().BindingStrength + Increase,
2154 P.Contexts.back().IsExpression));
2155 }
2156
~ScopedContextCreatorclang::format::__anonadd98d890111::AnnotatingParser::ScopedContextCreator2157 ~ScopedContextCreator() {
2158 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
2159 if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
2160 P.Contexts.pop_back();
2161 P.Contexts.back().ContextType = Context::StructArrayInitializer;
2162 return;
2163 }
2164 }
2165 P.Contexts.pop_back();
2166 }
2167 };
2168
modifyContext(const FormatToken & Current)2169 void modifyContext(const FormatToken &Current) {
2170 auto AssignmentStartsExpression = [&]() {
2171 if (Current.getPrecedence() != prec::Assignment)
2172 return false;
2173
2174 if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
2175 return false;
2176 if (Line.First->is(tok::kw_template)) {
2177 assert(Current.Previous);
2178 if (Current.Previous->is(tok::kw_operator)) {
2179 // `template ... operator=` cannot be an expression.
2180 return false;
2181 }
2182
2183 // `template` keyword can start a variable template.
2184 const FormatToken *Tok = Line.First->getNextNonComment();
2185 assert(Tok); // Current token is on the same line.
2186 if (Tok->isNot(TT_TemplateOpener)) {
2187 // Explicit template instantiations do not have `<>`.
2188 return false;
2189 }
2190
2191 // This is the default value of a template parameter, determine if it's
2192 // type or non-type.
2193 if (Contexts.back().ContextKind == tok::less) {
2194 assert(Current.Previous->Previous);
2195 return !Current.Previous->Previous->isOneOf(tok::kw_typename,
2196 tok::kw_class);
2197 }
2198
2199 Tok = Tok->MatchingParen;
2200 if (!Tok)
2201 return false;
2202 Tok = Tok->getNextNonComment();
2203 if (!Tok)
2204 return false;
2205
2206 if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
2207 tok::kw_using)) {
2208 return false;
2209 }
2210
2211 return true;
2212 }
2213
2214 // Type aliases use `type X = ...;` in TypeScript and can be exported
2215 // using `export type ...`.
2216 if (Style.isJavaScript() &&
2217 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
2218 Line.startsWith(tok::kw_export, Keywords.kw_type,
2219 tok::identifier))) {
2220 return false;
2221 }
2222
2223 return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
2224 };
2225
2226 if (AssignmentStartsExpression()) {
2227 Contexts.back().IsExpression = true;
2228 if (!Line.startsWith(TT_UnaryOperator)) {
2229 for (FormatToken *Previous = Current.Previous;
2230 Previous && Previous->Previous &&
2231 !Previous->Previous->isOneOf(tok::comma, tok::semi);
2232 Previous = Previous->Previous) {
2233 if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
2234 Previous = Previous->MatchingParen;
2235 if (!Previous)
2236 break;
2237 }
2238 if (Previous->opensScope())
2239 break;
2240 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
2241 Previous->isPointerOrReference() && Previous->Previous &&
2242 Previous->Previous->isNot(tok::equal)) {
2243 Previous->setType(TT_PointerOrReference);
2244 }
2245 }
2246 }
2247 } else if (Current.is(tok::lessless) &&
2248 (!Current.Previous ||
2249 Current.Previous->isNot(tok::kw_operator))) {
2250 Contexts.back().IsExpression = true;
2251 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
2252 Contexts.back().IsExpression = true;
2253 } else if (Current.is(TT_TrailingReturnArrow)) {
2254 Contexts.back().IsExpression = false;
2255 } else if (Current.isOneOf(TT_LambdaArrow, Keywords.kw_assert)) {
2256 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
2257 } else if (Current.Previous &&
2258 Current.Previous->is(TT_CtorInitializerColon)) {
2259 Contexts.back().IsExpression = true;
2260 Contexts.back().ContextType = Context::CtorInitializer;
2261 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
2262 Contexts.back().ContextType = Context::InheritanceList;
2263 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
2264 for (FormatToken *Previous = Current.Previous;
2265 Previous && Previous->isOneOf(tok::star, tok::amp);
2266 Previous = Previous->Previous) {
2267 Previous->setType(TT_PointerOrReference);
2268 }
2269 if (Line.MustBeDeclaration &&
2270 Contexts.front().ContextType != Context::CtorInitializer) {
2271 Contexts.back().IsExpression = false;
2272 }
2273 } else if (Current.is(tok::kw_new)) {
2274 Contexts.back().CanBeExpression = false;
2275 } else if (Current.is(tok::semi) ||
2276 (Current.is(tok::exclaim) && Current.Previous &&
2277 Current.Previous->isNot(tok::kw_operator))) {
2278 // This should be the condition or increment in a for-loop.
2279 // But not operator !() (can't use TT_OverloadedOperator here as its not
2280 // been annotated yet).
2281 Contexts.back().IsExpression = true;
2282 }
2283 }
2284
untilMatchingParen(FormatToken * Current)2285 static FormatToken *untilMatchingParen(FormatToken *Current) {
2286 // Used when `MatchingParen` is not yet established.
2287 int ParenLevel = 0;
2288 while (Current) {
2289 if (Current->is(tok::l_paren))
2290 ++ParenLevel;
2291 if (Current->is(tok::r_paren))
2292 --ParenLevel;
2293 if (ParenLevel < 1)
2294 break;
2295 Current = Current->Next;
2296 }
2297 return Current;
2298 }
2299
isDeductionGuide(FormatToken & Current)2300 static bool isDeductionGuide(FormatToken &Current) {
2301 // Look for a deduction guide template<T> A(...) -> A<...>;
2302 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
2303 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
2304 // Find the TemplateCloser.
2305 FormatToken *TemplateCloser = Current.Next->Next;
2306 int NestingLevel = 0;
2307 while (TemplateCloser) {
2308 // Skip over an expressions in parens A<(3 < 2)>;
2309 if (TemplateCloser->is(tok::l_paren)) {
2310 // No Matching Paren yet so skip to matching paren
2311 TemplateCloser = untilMatchingParen(TemplateCloser);
2312 if (!TemplateCloser)
2313 break;
2314 }
2315 if (TemplateCloser->is(tok::less))
2316 ++NestingLevel;
2317 if (TemplateCloser->is(tok::greater))
2318 --NestingLevel;
2319 if (NestingLevel < 1)
2320 break;
2321 TemplateCloser = TemplateCloser->Next;
2322 }
2323 // Assuming we have found the end of the template ensure its followed
2324 // with a semi-colon.
2325 if (TemplateCloser && TemplateCloser->Next &&
2326 TemplateCloser->Next->is(tok::semi) &&
2327 Current.Previous->MatchingParen) {
2328 // Determine if the identifier `A` prior to the A<..>; is the same as
2329 // prior to the A(..)
2330 FormatToken *LeadingIdentifier =
2331 Current.Previous->MatchingParen->Previous;
2332
2333 return LeadingIdentifier &&
2334 LeadingIdentifier->TokenText == Current.Next->TokenText;
2335 }
2336 }
2337 return false;
2338 }
2339
determineTokenType(FormatToken & Current)2340 void determineTokenType(FormatToken &Current) {
2341 if (Current.isNot(TT_Unknown)) {
2342 // The token type is already known.
2343 return;
2344 }
2345
2346 if ((Style.isJavaScript() || Style.isCSharp()) &&
2347 Current.is(tok::exclaim)) {
2348 if (Current.Previous) {
2349 bool IsIdentifier =
2350 Style.isJavaScript()
2351 ? Keywords.isJavaScriptIdentifier(
2352 *Current.Previous, /* AcceptIdentifierName= */ true)
2353 : Current.Previous->is(tok::identifier);
2354 if (IsIdentifier ||
2355 Current.Previous->isOneOf(
2356 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
2357 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
2358 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
2359 Current.Previous->Tok.isLiteral()) {
2360 Current.setType(TT_NonNullAssertion);
2361 return;
2362 }
2363 }
2364 if (Current.Next &&
2365 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
2366 Current.setType(TT_NonNullAssertion);
2367 return;
2368 }
2369 }
2370
2371 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2372 // function declaration have been found. In this case, 'Current' is a
2373 // trailing token of this declaration and thus cannot be a name.
2374 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
2375 Current.is(Keywords.kw_instanceof)) {
2376 Current.setType(TT_BinaryOperator);
2377 } else if (isStartOfName(Current) &&
2378 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
2379 Contexts.back().FirstStartOfName = &Current;
2380 Current.setType(TT_StartOfName);
2381 } else if (Current.is(tok::semi)) {
2382 // Reset FirstStartOfName after finding a semicolon so that a for loop
2383 // with multiple increment statements is not confused with a for loop
2384 // having multiple variable declarations.
2385 Contexts.back().FirstStartOfName = nullptr;
2386 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
2387 AutoFound = true;
2388 } else if (Current.is(tok::arrow) &&
2389 Style.Language == FormatStyle::LK_Java) {
2390 Current.setType(TT_LambdaArrow);
2391 } else if (Current.is(tok::arrow) && Style.isVerilog()) {
2392 // The implication operator.
2393 Current.setType(TT_BinaryOperator);
2394 } else if (Current.is(tok::arrow) && AutoFound &&
2395 Line.MightBeFunctionDecl && Current.NestingLevel == 0 &&
2396 !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
2397 // not auto operator->() -> xxx;
2398 Current.setType(TT_TrailingReturnArrow);
2399 } else if (Current.is(tok::arrow) && Current.Previous &&
2400 Current.Previous->is(tok::r_brace)) {
2401 // Concept implicit conversion constraint needs to be treated like
2402 // a trailing return type ... } -> <type>.
2403 Current.setType(TT_TrailingReturnArrow);
2404 } else if (isDeductionGuide(Current)) {
2405 // Deduction guides trailing arrow " A(...) -> A<T>;".
2406 Current.setType(TT_TrailingReturnArrow);
2407 } else if (Current.isPointerOrReference()) {
2408 Current.setType(determineStarAmpUsage(
2409 Current,
2410 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
2411 Contexts.back().ContextType == Context::TemplateArgument));
2412 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
2413 (Style.isVerilog() && Current.is(tok::pipe))) {
2414 Current.setType(determinePlusMinusCaretUsage(Current));
2415 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
2416 Contexts.back().CaretFound = true;
2417 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
2418 Current.setType(determineIncrementUsage(Current));
2419 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
2420 Current.setType(TT_UnaryOperator);
2421 } else if (Current.is(tok::question)) {
2422 if (Style.isJavaScript() && Line.MustBeDeclaration &&
2423 !Contexts.back().IsExpression) {
2424 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
2425 // on the interface, not a ternary expression.
2426 Current.setType(TT_JsTypeOptionalQuestion);
2427 } else if (Style.isTableGen()) {
2428 // In TableGen, '?' is just an identifier like token.
2429 Current.setType(TT_Unknown);
2430 } else {
2431 Current.setType(TT_ConditionalExpr);
2432 }
2433 } else if (Current.isBinaryOperator() &&
2434 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
2435 (Current.isNot(tok::greater) &&
2436 Style.Language != FormatStyle::LK_TextProto)) {
2437 if (Style.isVerilog()) {
2438 if (Current.is(tok::lessequal) && Contexts.size() == 1 &&
2439 !Contexts.back().VerilogAssignmentFound) {
2440 // In Verilog `<=` is assignment if in its own statement. It is a
2441 // statement instead of an expression, that is it can not be chained.
2442 Current.ForcedPrecedence = prec::Assignment;
2443 Current.setFinalizedType(TT_BinaryOperator);
2444 }
2445 if (Current.getPrecedence() == prec::Assignment)
2446 Contexts.back().VerilogAssignmentFound = true;
2447 }
2448 Current.setType(TT_BinaryOperator);
2449 } else if (Current.is(tok::comment)) {
2450 if (Current.TokenText.starts_with("/*")) {
2451 if (Current.TokenText.ends_with("*/")) {
2452 Current.setType(TT_BlockComment);
2453 } else {
2454 // The lexer has for some reason determined a comment here. But we
2455 // cannot really handle it, if it isn't properly terminated.
2456 Current.Tok.setKind(tok::unknown);
2457 }
2458 } else {
2459 Current.setType(TT_LineComment);
2460 }
2461 } else if (Current.is(tok::string_literal)) {
2462 if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
2463 Current.getPreviousNonComment() &&
2464 Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
2465 Current.getNextNonComment() &&
2466 Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
2467 Current.setType(TT_StringInConcatenation);
2468 }
2469 } else if (Current.is(tok::l_paren)) {
2470 if (lParenStartsCppCast(Current))
2471 Current.setType(TT_CppCastLParen);
2472 } else if (Current.is(tok::r_paren)) {
2473 if (rParenEndsCast(Current))
2474 Current.setType(TT_CastRParen);
2475 if (Current.MatchingParen && Current.Next &&
2476 !Current.Next->isBinaryOperator() &&
2477 !Current.Next->isOneOf(
2478 tok::semi, tok::colon, tok::l_brace, tok::l_paren, tok::comma,
2479 tok::period, tok::arrow, tok::coloncolon, tok::kw_noexcept)) {
2480 if (FormatToken *AfterParen = Current.MatchingParen->Next;
2481 AfterParen && AfterParen->isNot(tok::caret)) {
2482 // Make sure this isn't the return type of an Obj-C block declaration.
2483 if (FormatToken *BeforeParen = Current.MatchingParen->Previous;
2484 BeforeParen && BeforeParen->is(tok::identifier) &&
2485 BeforeParen->isNot(TT_TypenameMacro) &&
2486 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
2487 (!BeforeParen->Previous ||
2488 BeforeParen->Previous->ClosesTemplateDeclaration ||
2489 BeforeParen->Previous->ClosesRequiresClause)) {
2490 Current.setType(TT_FunctionAnnotationRParen);
2491 }
2492 }
2493 }
2494 } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
2495 Style.Language != FormatStyle::LK_Java) {
2496 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
2497 // marks declarations and properties that need special formatting.
2498 switch (Current.Next->Tok.getObjCKeywordID()) {
2499 case tok::objc_interface:
2500 case tok::objc_implementation:
2501 case tok::objc_protocol:
2502 Current.setType(TT_ObjCDecl);
2503 break;
2504 case tok::objc_property:
2505 Current.setType(TT_ObjCProperty);
2506 break;
2507 default:
2508 break;
2509 }
2510 } else if (Current.is(tok::period)) {
2511 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
2512 if (PreviousNoComment &&
2513 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
2514 Current.setType(TT_DesignatedInitializerPeriod);
2515 } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
2516 Current.Previous->isOneOf(TT_JavaAnnotation,
2517 TT_LeadingJavaAnnotation)) {
2518 Current.setType(Current.Previous->getType());
2519 }
2520 } else if (canBeObjCSelectorComponent(Current) &&
2521 // FIXME(bug 36976): ObjC return types shouldn't use
2522 // TT_CastRParen.
2523 Current.Previous && Current.Previous->is(TT_CastRParen) &&
2524 Current.Previous->MatchingParen &&
2525 Current.Previous->MatchingParen->Previous &&
2526 Current.Previous->MatchingParen->Previous->is(
2527 TT_ObjCMethodSpecifier)) {
2528 // This is the first part of an Objective-C selector name. (If there's no
2529 // colon after this, this is the only place which annotates the identifier
2530 // as a selector.)
2531 Current.setType(TT_SelectorName);
2532 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2533 tok::kw_requires) &&
2534 Current.Previous &&
2535 !Current.Previous->isOneOf(tok::equal, tok::at,
2536 TT_CtorInitializerComma,
2537 TT_CtorInitializerColon) &&
2538 Line.MightBeFunctionDecl && Contexts.size() == 1) {
2539 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2540 // function declaration have been found.
2541 Current.setType(TT_TrailingAnnotation);
2542 } else if ((Style.Language == FormatStyle::LK_Java ||
2543 Style.isJavaScript()) &&
2544 Current.Previous) {
2545 if (Current.Previous->is(tok::at) &&
2546 Current.isNot(Keywords.kw_interface)) {
2547 const FormatToken &AtToken = *Current.Previous;
2548 const FormatToken *Previous = AtToken.getPreviousNonComment();
2549 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2550 Current.setType(TT_LeadingJavaAnnotation);
2551 else
2552 Current.setType(TT_JavaAnnotation);
2553 } else if (Current.Previous->is(tok::period) &&
2554 Current.Previous->isOneOf(TT_JavaAnnotation,
2555 TT_LeadingJavaAnnotation)) {
2556 Current.setType(Current.Previous->getType());
2557 }
2558 }
2559 }
2560
2561 /// Take a guess at whether \p Tok starts a name of a function or
2562 /// variable declaration.
2563 ///
2564 /// This is a heuristic based on whether \p Tok is an identifier following
2565 /// something that is likely a type.
isStartOfName(const FormatToken & Tok)2566 bool isStartOfName(const FormatToken &Tok) {
2567 // Handled in ExpressionParser for Verilog.
2568 if (Style.isVerilog())
2569 return false;
2570
2571 if (Tok.isNot(tok::identifier) || !Tok.Previous)
2572 return false;
2573
2574 if (const auto *NextNonComment = Tok.getNextNonComment();
2575 (!NextNonComment && !Line.InMacroBody) ||
2576 (NextNonComment &&
2577 (NextNonComment->isPointerOrReference() ||
2578 NextNonComment->is(tok::string_literal) ||
2579 (Line.InPragmaDirective && NextNonComment->is(tok::identifier))))) {
2580 return false;
2581 }
2582
2583 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2584 Keywords.kw_as)) {
2585 return false;
2586 }
2587 if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2588 return false;
2589
2590 // Skip "const" as it does not have an influence on whether this is a name.
2591 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2592
2593 // For javascript const can be like "let" or "var"
2594 if (!Style.isJavaScript())
2595 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2596 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2597
2598 if (!PreviousNotConst)
2599 return false;
2600
2601 if (PreviousNotConst->ClosesRequiresClause)
2602 return false;
2603
2604 if (Style.isTableGen()) {
2605 // keywords such as let and def* defines names.
2606 if (Keywords.isTableGenDefinition(*PreviousNotConst))
2607 return true;
2608 // Otherwise C++ style declarations is available only inside the brace.
2609 if (Contexts.back().ContextKind != tok::l_brace)
2610 return false;
2611 }
2612
2613 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2614 PreviousNotConst->Previous &&
2615 PreviousNotConst->Previous->is(tok::hash);
2616
2617 if (PreviousNotConst->is(TT_TemplateCloser)) {
2618 return PreviousNotConst && PreviousNotConst->MatchingParen &&
2619 PreviousNotConst->MatchingParen->Previous &&
2620 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2621 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2622 }
2623
2624 if ((PreviousNotConst->is(tok::r_paren) &&
2625 PreviousNotConst->is(TT_TypeDeclarationParen)) ||
2626 PreviousNotConst->is(TT_AttributeRParen)) {
2627 return true;
2628 }
2629
2630 // If is a preprocess keyword like #define.
2631 if (IsPPKeyword)
2632 return false;
2633
2634 // int a or auto a.
2635 if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto) &&
2636 PreviousNotConst->isNot(TT_StatementAttributeLikeMacro)) {
2637 return true;
2638 }
2639
2640 // *a or &a or &&a.
2641 if (PreviousNotConst->is(TT_PointerOrReference))
2642 return true;
2643
2644 // MyClass a;
2645 if (PreviousNotConst->isTypeName(LangOpts))
2646 return true;
2647
2648 // type[] a in Java
2649 if (Style.Language == FormatStyle::LK_Java &&
2650 PreviousNotConst->is(tok::r_square)) {
2651 return true;
2652 }
2653
2654 // const a = in JavaScript.
2655 return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2656 }
2657
2658 /// Determine whether '(' is starting a C++ cast.
lParenStartsCppCast(const FormatToken & Tok)2659 bool lParenStartsCppCast(const FormatToken &Tok) {
2660 // C-style casts are only used in C++.
2661 if (!IsCpp)
2662 return false;
2663
2664 FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2665 if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2666 LeftOfParens->MatchingParen) {
2667 auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2668 if (Prev &&
2669 Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2670 tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2671 // FIXME: Maybe we should handle identifiers ending with "_cast",
2672 // e.g. any_cast?
2673 return true;
2674 }
2675 }
2676 return false;
2677 }
2678
2679 /// Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)2680 bool rParenEndsCast(const FormatToken &Tok) {
2681 assert(Tok.is(tok::r_paren));
2682
2683 if (!Tok.MatchingParen || !Tok.Previous)
2684 return false;
2685
2686 // C-style casts are only used in C++, C# and Java.
2687 if (!IsCpp && !Style.isCSharp() && Style.Language != FormatStyle::LK_Java)
2688 return false;
2689
2690 const auto *LParen = Tok.MatchingParen;
2691 const auto *BeforeRParen = Tok.Previous;
2692 const auto *AfterRParen = Tok.Next;
2693
2694 // Empty parens aren't casts and there are no casts at the end of the line.
2695 if (BeforeRParen == LParen || !AfterRParen)
2696 return false;
2697
2698 if (LParen->is(TT_OverloadedOperatorLParen))
2699 return false;
2700
2701 auto *LeftOfParens = LParen->getPreviousNonComment();
2702 if (LeftOfParens) {
2703 // If there is a closing parenthesis left of the current
2704 // parentheses, look past it as these might be chained casts.
2705 if (LeftOfParens->is(tok::r_paren) &&
2706 LeftOfParens->isNot(TT_CastRParen)) {
2707 if (!LeftOfParens->MatchingParen ||
2708 !LeftOfParens->MatchingParen->Previous) {
2709 return false;
2710 }
2711 LeftOfParens = LeftOfParens->MatchingParen->Previous;
2712 }
2713
2714 if (LeftOfParens->is(tok::r_square)) {
2715 // delete[] (void *)ptr;
2716 auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2717 if (Tok->isNot(tok::r_square))
2718 return nullptr;
2719
2720 Tok = Tok->getPreviousNonComment();
2721 if (!Tok || Tok->isNot(tok::l_square))
2722 return nullptr;
2723
2724 Tok = Tok->getPreviousNonComment();
2725 if (!Tok || Tok->isNot(tok::kw_delete))
2726 return nullptr;
2727 return Tok;
2728 };
2729 if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2730 LeftOfParens = MaybeDelete;
2731 }
2732
2733 // The Condition directly below this one will see the operator arguments
2734 // as a (void *foo) cast.
2735 // void operator delete(void *foo) ATTRIB;
2736 if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2737 LeftOfParens->Previous->is(tok::kw_operator)) {
2738 return false;
2739 }
2740
2741 // If there is an identifier (or with a few exceptions a keyword) right
2742 // before the parentheses, this is unlikely to be a cast.
2743 if (LeftOfParens->Tok.getIdentifierInfo() &&
2744 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2745 tok::kw_delete, tok::kw_throw)) {
2746 return false;
2747 }
2748
2749 // Certain other tokens right before the parentheses are also signals that
2750 // this cannot be a cast.
2751 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2752 TT_TemplateCloser, tok::ellipsis)) {
2753 return false;
2754 }
2755 }
2756
2757 if (AfterRParen->is(tok::question) ||
2758 (AfterRParen->is(tok::ampamp) && !BeforeRParen->isTypeName(LangOpts))) {
2759 return false;
2760 }
2761
2762 // `foreach((A a, B b) in someList)` should not be seen as a cast.
2763 if (AfterRParen->is(Keywords.kw_in) && Style.isCSharp())
2764 return false;
2765
2766 // Functions which end with decorations like volatile, noexcept are unlikely
2767 // to be casts.
2768 if (AfterRParen->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2769 tok::kw_requires, tok::kw_throw, tok::arrow,
2770 Keywords.kw_override, Keywords.kw_final) ||
2771 isCppAttribute(IsCpp, *AfterRParen)) {
2772 return false;
2773 }
2774
2775 // As Java has no function types, a "(" after the ")" likely means that this
2776 // is a cast.
2777 if (Style.Language == FormatStyle::LK_Java && AfterRParen->is(tok::l_paren))
2778 return true;
2779
2780 // If a (non-string) literal follows, this is likely a cast.
2781 if (AfterRParen->isOneOf(tok::kw_sizeof, tok::kw_alignof) ||
2782 (AfterRParen->Tok.isLiteral() &&
2783 AfterRParen->isNot(tok::string_literal))) {
2784 return true;
2785 }
2786
2787 // Heuristically try to determine whether the parentheses contain a type.
2788 auto IsQualifiedPointerOrReference = [](const FormatToken *T,
2789 const LangOptions &LangOpts) {
2790 // This is used to handle cases such as x = (foo *const)&y;
2791 assert(!T->isTypeName(LangOpts) && "Should have already been checked");
2792 // Strip trailing qualifiers such as const or volatile when checking
2793 // whether the parens could be a cast to a pointer/reference type.
2794 while (T) {
2795 if (T->is(TT_AttributeRParen)) {
2796 // Handle `x = (foo *__attribute__((foo)))&v;`:
2797 assert(T->is(tok::r_paren));
2798 assert(T->MatchingParen);
2799 assert(T->MatchingParen->is(tok::l_paren));
2800 assert(T->MatchingParen->is(TT_AttributeLParen));
2801 if (const auto *Tok = T->MatchingParen->Previous;
2802 Tok && Tok->isAttribute()) {
2803 T = Tok->Previous;
2804 continue;
2805 }
2806 } else if (T->is(TT_AttributeSquare)) {
2807 // Handle `x = (foo *[[clang::foo]])&v;`:
2808 if (T->MatchingParen && T->MatchingParen->Previous) {
2809 T = T->MatchingParen->Previous;
2810 continue;
2811 }
2812 } else if (T->canBePointerOrReferenceQualifier()) {
2813 T = T->Previous;
2814 continue;
2815 }
2816 break;
2817 }
2818 return T && T->is(TT_PointerOrReference);
2819 };
2820 bool ParensAreType =
2821 BeforeRParen->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2822 BeforeRParen->isTypeName(LangOpts) ||
2823 IsQualifiedPointerOrReference(BeforeRParen, LangOpts);
2824 bool ParensCouldEndDecl =
2825 AfterRParen->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2826 if (ParensAreType && !ParensCouldEndDecl)
2827 return true;
2828
2829 // At this point, we heuristically assume that there are no casts at the
2830 // start of the line. We assume that we have found most cases where there
2831 // are by the logic above, e.g. "(void)x;".
2832 if (!LeftOfParens)
2833 return false;
2834
2835 // Certain token types inside the parentheses mean that this can't be a
2836 // cast.
2837 for (const auto *Token = LParen->Next; Token != &Tok; Token = Token->Next)
2838 if (Token->is(TT_BinaryOperator))
2839 return false;
2840
2841 // If the following token is an identifier or 'this', this is a cast. All
2842 // cases where this can be something else are handled above.
2843 if (AfterRParen->isOneOf(tok::identifier, tok::kw_this))
2844 return true;
2845
2846 // Look for a cast `( x ) (`.
2847 if (AfterRParen->is(tok::l_paren) && BeforeRParen->Previous) {
2848 if (BeforeRParen->is(tok::identifier) &&
2849 BeforeRParen->Previous->is(tok::l_paren)) {
2850 return true;
2851 }
2852 }
2853
2854 if (!AfterRParen->Next)
2855 return false;
2856
2857 if (AfterRParen->is(tok::l_brace) &&
2858 AfterRParen->getBlockKind() == BK_BracedInit) {
2859 return true;
2860 }
2861
2862 // If the next token after the parenthesis is a unary operator, assume
2863 // that this is cast, unless there are unexpected tokens inside the
2864 // parenthesis.
2865 const bool NextIsAmpOrStar = AfterRParen->isOneOf(tok::amp, tok::star);
2866 if (!(AfterRParen->isUnaryOperator() || NextIsAmpOrStar) ||
2867 AfterRParen->is(tok::plus) ||
2868 !AfterRParen->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2869 return false;
2870 }
2871
2872 if (NextIsAmpOrStar &&
2873 (AfterRParen->Next->is(tok::numeric_constant) || Line.InPPDirective)) {
2874 return false;
2875 }
2876
2877 if (Line.InPPDirective && AfterRParen->is(tok::minus))
2878 return false;
2879
2880 // Search for unexpected tokens.
2881 for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) {
2882 if (Prev->is(tok::r_paren)) {
2883 if (Prev->is(TT_CastRParen))
2884 return false;
2885 Prev = Prev->MatchingParen;
2886 if (!Prev)
2887 return false;
2888 if (Prev->is(TT_FunctionTypeLParen))
2889 break;
2890 continue;
2891 }
2892 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2893 return false;
2894 }
2895
2896 return true;
2897 }
2898
2899 /// Returns true if the token is used as a unary operator.
determineUnaryOperatorByUsage(const FormatToken & Tok)2900 bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2901 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2902 if (!PrevToken)
2903 return true;
2904
2905 // These keywords are deliberately not included here because they may
2906 // precede only one of unary star/amp and plus/minus but not both. They are
2907 // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2908 //
2909 // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2910 // know how they can be followed by a star or amp.
2911 if (PrevToken->isOneOf(
2912 TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2913 tok::equal, tok::question, tok::l_square, tok::l_brace,
2914 tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2915 tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2916 return true;
2917 }
2918
2919 // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2920 // where the unary `+` operator is overloaded, it is reasonable to write
2921 // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2922 if (PrevToken->is(tok::kw_sizeof))
2923 return true;
2924
2925 // A sequence of leading unary operators.
2926 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2927 return true;
2928
2929 // There can't be two consecutive binary operators.
2930 if (PrevToken->is(TT_BinaryOperator))
2931 return true;
2932
2933 return false;
2934 }
2935
2936 /// Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)2937 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2938 bool InTemplateArgument) {
2939 if (Style.isJavaScript())
2940 return TT_BinaryOperator;
2941
2942 // && in C# must be a binary operator.
2943 if (Style.isCSharp() && Tok.is(tok::ampamp))
2944 return TT_BinaryOperator;
2945
2946 if (Style.isVerilog()) {
2947 // In Verilog, `*` can only be a binary operator. `&` can be either unary
2948 // or binary. `*` also includes `*>` in module path declarations in
2949 // specify blocks because merged tokens take the type of the first one by
2950 // default.
2951 if (Tok.is(tok::star))
2952 return TT_BinaryOperator;
2953 return determineUnaryOperatorByUsage(Tok) ? TT_UnaryOperator
2954 : TT_BinaryOperator;
2955 }
2956
2957 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2958 if (!PrevToken)
2959 return TT_UnaryOperator;
2960 if (PrevToken->is(TT_TypeName))
2961 return TT_PointerOrReference;
2962 if (PrevToken->isOneOf(tok::kw_new, tok::kw_delete) && Tok.is(tok::ampamp))
2963 return TT_BinaryOperator;
2964
2965 const FormatToken *NextToken = Tok.getNextNonComment();
2966
2967 if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2968 return TT_BinaryOperator;
2969
2970 if (!NextToken ||
2971 NextToken->isOneOf(tok::arrow, tok::equal, tok::comma, tok::r_paren,
2972 TT_RequiresClause) ||
2973 (NextToken->is(tok::kw_noexcept) && !IsExpression) ||
2974 NextToken->canBePointerOrReferenceQualifier() ||
2975 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2976 return TT_PointerOrReference;
2977 }
2978
2979 if (PrevToken->is(tok::coloncolon))
2980 return TT_PointerOrReference;
2981
2982 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2983 return TT_PointerOrReference;
2984
2985 if (determineUnaryOperatorByUsage(Tok))
2986 return TT_UnaryOperator;
2987
2988 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2989 return TT_PointerOrReference;
2990 if (NextToken->is(tok::kw_operator) && !IsExpression)
2991 return TT_PointerOrReference;
2992 if (NextToken->isOneOf(tok::comma, tok::semi))
2993 return TT_PointerOrReference;
2994
2995 // After right braces, star tokens are likely to be pointers to struct,
2996 // union, or class.
2997 // struct {} *ptr;
2998 // This by itself is not sufficient to distinguish from multiplication
2999 // following a brace-initialized expression, as in:
3000 // int i = int{42} * 2;
3001 // In the struct case, the part of the struct declaration until the `{` and
3002 // the `}` are put on separate unwrapped lines; in the brace-initialized
3003 // case, the matching `{` is on the same unwrapped line, so check for the
3004 // presence of the matching brace to distinguish between those.
3005 if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
3006 !PrevToken->MatchingParen) {
3007 return TT_PointerOrReference;
3008 }
3009
3010 if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
3011 return TT_UnaryOperator;
3012
3013 if (PrevToken->Tok.isLiteral() ||
3014 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
3015 tok::kw_false, tok::r_brace)) {
3016 return TT_BinaryOperator;
3017 }
3018
3019 const FormatToken *NextNonParen = NextToken;
3020 while (NextNonParen && NextNonParen->is(tok::l_paren))
3021 NextNonParen = NextNonParen->getNextNonComment();
3022 if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
3023 NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
3024 NextNonParen->isUnaryOperator())) {
3025 return TT_BinaryOperator;
3026 }
3027
3028 // If we know we're in a template argument, there are no named declarations.
3029 // Thus, having an identifier on the right-hand side indicates a binary
3030 // operator.
3031 if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
3032 return TT_BinaryOperator;
3033
3034 // "&&" followed by "(", "*", or "&" is quite unlikely to be two successive
3035 // unary "&".
3036 if (Tok.is(tok::ampamp) &&
3037 NextToken->isOneOf(tok::l_paren, tok::star, tok::amp)) {
3038 return TT_BinaryOperator;
3039 }
3040
3041 // This catches some cases where evaluation order is used as control flow:
3042 // aaa && aaa->f();
3043 if (NextToken->Tok.isAnyIdentifier()) {
3044 const FormatToken *NextNextToken = NextToken->getNextNonComment();
3045 if (NextNextToken && NextNextToken->is(tok::arrow))
3046 return TT_BinaryOperator;
3047 }
3048
3049 // It is very unlikely that we are going to find a pointer or reference type
3050 // definition on the RHS of an assignment.
3051 if (IsExpression && !Contexts.back().CaretFound)
3052 return TT_BinaryOperator;
3053
3054 // Opeartors at class scope are likely pointer or reference members.
3055 if (!Scopes.empty() && Scopes.back() == ST_Class)
3056 return TT_PointerOrReference;
3057
3058 // Tokens that indicate member access or chained operator& use.
3059 auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
3060 return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
3061 tok::arrowstar, tok::periodstar);
3062 };
3063
3064 // It's more likely that & represents operator& than an uninitialized
3065 // reference.
3066 if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
3067 IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
3068 NextToken && NextToken->Tok.isAnyIdentifier()) {
3069 if (auto NextNext = NextToken->getNextNonComment();
3070 NextNext &&
3071 (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
3072 return TT_BinaryOperator;
3073 }
3074 }
3075
3076 return TT_PointerOrReference;
3077 }
3078
determinePlusMinusCaretUsage(const FormatToken & Tok)3079 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
3080 if (determineUnaryOperatorByUsage(Tok))
3081 return TT_UnaryOperator;
3082
3083 const FormatToken *PrevToken = Tok.getPreviousNonComment();
3084 if (!PrevToken)
3085 return TT_UnaryOperator;
3086
3087 if (PrevToken->is(tok::at))
3088 return TT_UnaryOperator;
3089
3090 // Fall back to marking the token as binary operator.
3091 return TT_BinaryOperator;
3092 }
3093
3094 /// Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)3095 TokenType determineIncrementUsage(const FormatToken &Tok) {
3096 const FormatToken *PrevToken = Tok.getPreviousNonComment();
3097 if (!PrevToken || PrevToken->is(TT_CastRParen))
3098 return TT_UnaryOperator;
3099 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
3100 return TT_TrailingUnaryOperator;
3101
3102 return TT_UnaryOperator;
3103 }
3104
3105 SmallVector<Context, 8> Contexts;
3106
3107 const FormatStyle &Style;
3108 AnnotatedLine &Line;
3109 FormatToken *CurrentToken;
3110 bool AutoFound;
3111 bool IsCpp;
3112 LangOptions LangOpts;
3113 const AdditionalKeywords &Keywords;
3114
3115 SmallVector<ScopeType> &Scopes;
3116
3117 // Set of "<" tokens that do not open a template parameter list. If parseAngle
3118 // determines that a specific token can't be a template opener, it will make
3119 // same decision irrespective of the decisions for tokens leading up to it.
3120 // Store this information to prevent this from causing exponential runtime.
3121 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
3122
3123 int TemplateDeclarationDepth;
3124 };
3125
3126 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
3127 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
3128
3129 /// Parses binary expressions by inserting fake parenthesis based on
3130 /// operator precedence.
3131 class ExpressionParser {
3132 public:
ExpressionParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,AnnotatedLine & Line)3133 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
3134 AnnotatedLine &Line)
3135 : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
3136
3137 /// Parse expressions with the given operator precedence.
parse(int Precedence=0)3138 void parse(int Precedence = 0) {
3139 // Skip 'return' and ObjC selector colons as they are not part of a binary
3140 // expression.
3141 while (Current && (Current->is(tok::kw_return) ||
3142 (Current->is(tok::colon) &&
3143 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
3144 next();
3145 }
3146
3147 if (!Current || Precedence > PrecedenceArrowAndPeriod)
3148 return;
3149
3150 // Conditional expressions need to be parsed separately for proper nesting.
3151 if (Precedence == prec::Conditional) {
3152 parseConditionalExpr();
3153 return;
3154 }
3155
3156 // Parse unary operators, which all have a higher precedence than binary
3157 // operators.
3158 if (Precedence == PrecedenceUnaryOperator) {
3159 parseUnaryOperator();
3160 return;
3161 }
3162
3163 FormatToken *Start = Current;
3164 FormatToken *LatestOperator = nullptr;
3165 unsigned OperatorIndex = 0;
3166 // The first name of the current type in a port list.
3167 FormatToken *VerilogFirstOfType = nullptr;
3168
3169 while (Current) {
3170 // In Verilog ports in a module header that don't have a type take the
3171 // type of the previous one. For example,
3172 // module a(output b,
3173 // c,
3174 // output d);
3175 // In this case there need to be fake parentheses around b and c.
3176 if (Style.isVerilog() && Precedence == prec::Comma) {
3177 VerilogFirstOfType =
3178 verilogGroupDecl(VerilogFirstOfType, LatestOperator);
3179 }
3180
3181 // Consume operators with higher precedence.
3182 parse(Precedence + 1);
3183
3184 int CurrentPrecedence = getCurrentPrecedence();
3185
3186 if (Precedence == CurrentPrecedence && Current &&
3187 Current->is(TT_SelectorName)) {
3188 if (LatestOperator)
3189 addFakeParenthesis(Start, prec::Level(Precedence));
3190 Start = Current;
3191 }
3192
3193 if ((Style.isCSharp() || Style.isJavaScript() ||
3194 Style.Language == FormatStyle::LK_Java) &&
3195 Precedence == prec::Additive && Current) {
3196 // A string can be broken without parentheses around it when it is
3197 // already in a sequence of strings joined by `+` signs.
3198 FormatToken *Prev = Current->getPreviousNonComment();
3199 if (Prev && Prev->is(tok::string_literal) &&
3200 (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
3201 TT_StringInConcatenation))) {
3202 Prev->setType(TT_StringInConcatenation);
3203 }
3204 }
3205
3206 // At the end of the line or when an operator with lower precedence is
3207 // found, insert fake parenthesis and return.
3208 if (!Current ||
3209 (Current->closesScope() &&
3210 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
3211 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
3212 (CurrentPrecedence == prec::Conditional &&
3213 Precedence == prec::Assignment && Current->is(tok::colon))) {
3214 break;
3215 }
3216
3217 // Consume scopes: (), [], <> and {}
3218 // In addition to that we handle require clauses as scope, so that the
3219 // constraints in that are correctly indented.
3220 if (Current->opensScope() ||
3221 Current->isOneOf(TT_RequiresClause,
3222 TT_RequiresClauseInARequiresExpression)) {
3223 // In fragment of a JavaScript template string can look like '}..${' and
3224 // thus close a scope and open a new one at the same time.
3225 while (Current && (!Current->closesScope() || Current->opensScope())) {
3226 next();
3227 parse();
3228 }
3229 next();
3230 } else {
3231 // Operator found.
3232 if (CurrentPrecedence == Precedence) {
3233 if (LatestOperator)
3234 LatestOperator->NextOperator = Current;
3235 LatestOperator = Current;
3236 Current->OperatorIndex = OperatorIndex;
3237 ++OperatorIndex;
3238 }
3239 next(/*SkipPastLeadingComments=*/Precedence > 0);
3240 }
3241 }
3242
3243 // Group variables of the same type.
3244 if (Style.isVerilog() && Precedence == prec::Comma && VerilogFirstOfType)
3245 addFakeParenthesis(VerilogFirstOfType, prec::Comma);
3246
3247 if (LatestOperator && (Current || Precedence > 0)) {
3248 // The requires clauses do not neccessarily end in a semicolon or a brace,
3249 // but just go over to struct/class or a function declaration, we need to
3250 // intervene so that the fake right paren is inserted correctly.
3251 auto End =
3252 (Start->Previous &&
3253 Start->Previous->isOneOf(TT_RequiresClause,
3254 TT_RequiresClauseInARequiresExpression))
3255 ? [this]() {
3256 auto Ret = Current ? Current : Line.Last;
3257 while (!Ret->ClosesRequiresClause && Ret->Previous)
3258 Ret = Ret->Previous;
3259 return Ret;
3260 }()
3261 : nullptr;
3262
3263 if (Precedence == PrecedenceArrowAndPeriod) {
3264 // Call expressions don't have a binary operator precedence.
3265 addFakeParenthesis(Start, prec::Unknown, End);
3266 } else {
3267 addFakeParenthesis(Start, prec::Level(Precedence), End);
3268 }
3269 }
3270 }
3271
3272 private:
3273 /// Gets the precedence (+1) of the given token for binary operators
3274 /// and other tokens that we treat like binary operators.
getCurrentPrecedence()3275 int getCurrentPrecedence() {
3276 if (Current) {
3277 const FormatToken *NextNonComment = Current->getNextNonComment();
3278 if (Current->is(TT_ConditionalExpr))
3279 return prec::Conditional;
3280 if (NextNonComment && Current->is(TT_SelectorName) &&
3281 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
3282 (Style.isProto() && NextNonComment->is(tok::less)))) {
3283 return prec::Assignment;
3284 }
3285 if (Current->is(TT_JsComputedPropertyName))
3286 return prec::Assignment;
3287 if (Current->is(TT_LambdaArrow))
3288 return prec::Comma;
3289 if (Current->is(TT_FatArrow))
3290 return prec::Assignment;
3291 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
3292 (Current->is(tok::comment) && NextNonComment &&
3293 NextNonComment->is(TT_SelectorName))) {
3294 return 0;
3295 }
3296 if (Current->is(TT_RangeBasedForLoopColon))
3297 return prec::Comma;
3298 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3299 Current->is(Keywords.kw_instanceof)) {
3300 return prec::Relational;
3301 }
3302 if (Style.isJavaScript() &&
3303 Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
3304 return prec::Relational;
3305 }
3306 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
3307 return Current->getPrecedence();
3308 if (Current->isOneOf(tok::period, tok::arrow) &&
3309 Current->isNot(TT_TrailingReturnArrow)) {
3310 return PrecedenceArrowAndPeriod;
3311 }
3312 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3313 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
3314 Keywords.kw_throws)) {
3315 return 0;
3316 }
3317 // In Verilog case labels are not on separate lines straight out of
3318 // UnwrappedLineParser. The colon is not part of an expression.
3319 if (Style.isVerilog() && Current->is(tok::colon))
3320 return 0;
3321 }
3322 return -1;
3323 }
3324
addFakeParenthesis(FormatToken * Start,prec::Level Precedence,FormatToken * End=nullptr)3325 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
3326 FormatToken *End = nullptr) {
3327 // Do not assign fake parenthesis to tokens that are part of an
3328 // unexpanded macro call. The line within the macro call contains
3329 // the parenthesis and commas, and we will not find operators within
3330 // that structure.
3331 if (Start->MacroParent)
3332 return;
3333
3334 Start->FakeLParens.push_back(Precedence);
3335 if (Precedence > prec::Unknown)
3336 Start->StartsBinaryExpression = true;
3337 if (!End && Current)
3338 End = Current->getPreviousNonComment();
3339 if (End) {
3340 ++End->FakeRParens;
3341 if (Precedence > prec::Unknown)
3342 End->EndsBinaryExpression = true;
3343 }
3344 }
3345
3346 /// Parse unary operator expressions and surround them with fake
3347 /// parentheses if appropriate.
parseUnaryOperator()3348 void parseUnaryOperator() {
3349 llvm::SmallVector<FormatToken *, 2> Tokens;
3350 while (Current && Current->is(TT_UnaryOperator)) {
3351 Tokens.push_back(Current);
3352 next();
3353 }
3354 parse(PrecedenceArrowAndPeriod);
3355 for (FormatToken *Token : llvm::reverse(Tokens)) {
3356 // The actual precedence doesn't matter.
3357 addFakeParenthesis(Token, prec::Unknown);
3358 }
3359 }
3360
parseConditionalExpr()3361 void parseConditionalExpr() {
3362 while (Current && Current->isTrailingComment())
3363 next();
3364 FormatToken *Start = Current;
3365 parse(prec::LogicalOr);
3366 if (!Current || Current->isNot(tok::question))
3367 return;
3368 next();
3369 parse(prec::Assignment);
3370 if (!Current || Current->isNot(TT_ConditionalExpr))
3371 return;
3372 next();
3373 parse(prec::Assignment);
3374 addFakeParenthesis(Start, prec::Conditional);
3375 }
3376
next(bool SkipPastLeadingComments=true)3377 void next(bool SkipPastLeadingComments = true) {
3378 if (Current)
3379 Current = Current->Next;
3380 while (Current &&
3381 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
3382 Current->isTrailingComment()) {
3383 Current = Current->Next;
3384 }
3385 }
3386
3387 // Add fake parenthesis around declarations of the same type for example in a
3388 // module prototype. Return the first port / variable of the current type.
verilogGroupDecl(FormatToken * FirstOfType,FormatToken * PreviousComma)3389 FormatToken *verilogGroupDecl(FormatToken *FirstOfType,
3390 FormatToken *PreviousComma) {
3391 if (!Current)
3392 return nullptr;
3393
3394 FormatToken *Start = Current;
3395
3396 // Skip attributes.
3397 while (Start->startsSequence(tok::l_paren, tok::star)) {
3398 if (!(Start = Start->MatchingParen) ||
3399 !(Start = Start->getNextNonComment())) {
3400 return nullptr;
3401 }
3402 }
3403
3404 FormatToken *Tok = Start;
3405
3406 if (Tok->is(Keywords.kw_assign))
3407 Tok = Tok->getNextNonComment();
3408
3409 // Skip any type qualifiers to find the first identifier. It may be either a
3410 // new type name or a variable name. There can be several type qualifiers
3411 // preceding a variable name, and we can not tell them apart by looking at
3412 // the word alone since a macro can be defined as either a type qualifier or
3413 // a variable name. Thus we use the last word before the dimensions instead
3414 // of the first word as the candidate for the variable or type name.
3415 FormatToken *First = nullptr;
3416 while (Tok) {
3417 FormatToken *Next = Tok->getNextNonComment();
3418
3419 if (Tok->is(tok::hash)) {
3420 // Start of a macro expansion.
3421 First = Tok;
3422 Tok = Next;
3423 if (Tok)
3424 Tok = Tok->getNextNonComment();
3425 } else if (Tok->is(tok::hashhash)) {
3426 // Concatenation. Skip.
3427 Tok = Next;
3428 if (Tok)
3429 Tok = Tok->getNextNonComment();
3430 } else if (Keywords.isVerilogQualifier(*Tok) ||
3431 Keywords.isVerilogIdentifier(*Tok)) {
3432 First = Tok;
3433 Tok = Next;
3434 // The name may have dots like `interface_foo.modport_foo`.
3435 while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) &&
3436 (Tok = Tok->getNextNonComment())) {
3437 if (Keywords.isVerilogIdentifier(*Tok))
3438 Tok = Tok->getNextNonComment();
3439 }
3440 } else if (!Next) {
3441 Tok = nullptr;
3442 } else if (Tok->is(tok::l_paren)) {
3443 // Make sure the parenthesized list is a drive strength. Otherwise the
3444 // statement may be a module instantiation in which case we have already
3445 // found the instance name.
3446 if (Next->isOneOf(
3447 Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large,
3448 Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1,
3449 Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1,
3450 Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0,
3451 Keywords.kw_weak1)) {
3452 Tok->setType(TT_VerilogStrength);
3453 Tok = Tok->MatchingParen;
3454 if (Tok) {
3455 Tok->setType(TT_VerilogStrength);
3456 Tok = Tok->getNextNonComment();
3457 }
3458 } else {
3459 break;
3460 }
3461 } else if (Tok->is(Keywords.kw_verilogHash)) {
3462 // Delay control.
3463 if (Next->is(tok::l_paren))
3464 Next = Next->MatchingParen;
3465 if (Next)
3466 Tok = Next->getNextNonComment();
3467 } else {
3468 break;
3469 }
3470 }
3471
3472 // Find the second identifier. If it exists it will be the name.
3473 FormatToken *Second = nullptr;
3474 // Dimensions.
3475 while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen))
3476 Tok = Tok->getNextNonComment();
3477 if (Tok && (Tok->is(tok::hash) || Keywords.isVerilogIdentifier(*Tok)))
3478 Second = Tok;
3479
3480 // If the second identifier doesn't exist and there are qualifiers, the type
3481 // is implied.
3482 FormatToken *TypedName = nullptr;
3483 if (Second) {
3484 TypedName = Second;
3485 if (First && First->is(TT_Unknown))
3486 First->setType(TT_VerilogDimensionedTypeName);
3487 } else if (First != Start) {
3488 // If 'First' is null, then this isn't a declaration, 'TypedName' gets set
3489 // to null as intended.
3490 TypedName = First;
3491 }
3492
3493 if (TypedName) {
3494 // This is a declaration with a new type.
3495 if (TypedName->is(TT_Unknown))
3496 TypedName->setType(TT_StartOfName);
3497 // Group variables of the previous type.
3498 if (FirstOfType && PreviousComma) {
3499 PreviousComma->setType(TT_VerilogTypeComma);
3500 addFakeParenthesis(FirstOfType, prec::Comma, PreviousComma->Previous);
3501 }
3502
3503 FirstOfType = TypedName;
3504
3505 // Don't let higher precedence handle the qualifiers. For example if we
3506 // have:
3507 // parameter x = 0
3508 // We skip `parameter` here. This way the fake parentheses for the
3509 // assignment will be around `x = 0`.
3510 while (Current && Current != FirstOfType) {
3511 if (Current->opensScope()) {
3512 next();
3513 parse();
3514 }
3515 next();
3516 }
3517 }
3518
3519 return FirstOfType;
3520 }
3521
3522 const FormatStyle &Style;
3523 const AdditionalKeywords &Keywords;
3524 const AnnotatedLine &Line;
3525 FormatToken *Current;
3526 };
3527
3528 } // end anonymous namespace
3529
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines) const3530 void TokenAnnotator::setCommentLineLevels(
3531 SmallVectorImpl<AnnotatedLine *> &Lines) const {
3532 const AnnotatedLine *NextNonCommentLine = nullptr;
3533 for (AnnotatedLine *Line : llvm::reverse(Lines)) {
3534 assert(Line->First);
3535
3536 // If the comment is currently aligned with the line immediately following
3537 // it, that's probably intentional and we should keep it.
3538 if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
3539 Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
3540 NextNonCommentLine->First->OriginalColumn ==
3541 Line->First->OriginalColumn) {
3542 const bool PPDirectiveOrImportStmt =
3543 NextNonCommentLine->Type == LT_PreprocessorDirective ||
3544 NextNonCommentLine->Type == LT_ImportStatement;
3545 if (PPDirectiveOrImportStmt)
3546 Line->Type = LT_CommentAbovePPDirective;
3547 // Align comments for preprocessor lines with the # in column 0 if
3548 // preprocessor lines are not indented. Otherwise, align with the next
3549 // line.
3550 Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
3551 PPDirectiveOrImportStmt
3552 ? 0
3553 : NextNonCommentLine->Level;
3554 } else {
3555 NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
3556 }
3557
3558 setCommentLineLevels(Line->Children);
3559 }
3560 }
3561
maxNestingDepth(const AnnotatedLine & Line)3562 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
3563 unsigned Result = 0;
3564 for (const auto *Tok = Line.First; Tok; Tok = Tok->Next)
3565 Result = std::max(Result, Tok->NestingLevel);
3566 return Result;
3567 }
3568
3569 // Returns the name of a function with no return type, e.g. a constructor or
3570 // destructor.
getFunctionName(const AnnotatedLine & Line,FormatToken * & OpeningParen)3571 static FormatToken *getFunctionName(const AnnotatedLine &Line,
3572 FormatToken *&OpeningParen) {
3573 for (FormatToken *Tok = Line.getFirstNonComment(), *Name = nullptr; Tok;
3574 Tok = Tok->getNextNonComment()) {
3575 // Skip C++11 attributes both before and after the function name.
3576 if (Tok->is(tok::l_square) && Tok->is(TT_AttributeSquare)) {
3577 Tok = Tok->MatchingParen;
3578 if (!Tok)
3579 break;
3580 continue;
3581 }
3582
3583 // Make sure the name is followed by a pair of parentheses.
3584 if (Name) {
3585 if (Tok->is(tok::l_paren) && Tok->isNot(TT_FunctionTypeLParen) &&
3586 Tok->MatchingParen) {
3587 OpeningParen = Tok;
3588 return Name;
3589 }
3590 return nullptr;
3591 }
3592
3593 // Skip keywords that may precede the constructor/destructor name.
3594 if (Tok->isOneOf(tok::kw_friend, tok::kw_inline, tok::kw_virtual,
3595 tok::kw_constexpr, tok::kw_consteval, tok::kw_explicit)) {
3596 continue;
3597 }
3598
3599 // A qualified name may start from the global namespace.
3600 if (Tok->is(tok::coloncolon)) {
3601 Tok = Tok->Next;
3602 if (!Tok)
3603 break;
3604 }
3605
3606 // Skip to the unqualified part of the name.
3607 while (Tok->startsSequence(tok::identifier, tok::coloncolon)) {
3608 assert(Tok->Next);
3609 Tok = Tok->Next->Next;
3610 if (!Tok)
3611 return nullptr;
3612 }
3613
3614 // Skip the `~` if a destructor name.
3615 if (Tok->is(tok::tilde)) {
3616 Tok = Tok->Next;
3617 if (!Tok)
3618 break;
3619 }
3620
3621 // Make sure the name is not already annotated, e.g. as NamespaceMacro.
3622 if (Tok->isNot(tok::identifier) || Tok->isNot(TT_Unknown))
3623 break;
3624
3625 Name = Tok;
3626 }
3627
3628 return nullptr;
3629 }
3630
3631 // Checks if Tok is a constructor/destructor name qualified by its class name.
isCtorOrDtorName(const FormatToken * Tok)3632 static bool isCtorOrDtorName(const FormatToken *Tok) {
3633 assert(Tok && Tok->is(tok::identifier));
3634 const auto *Prev = Tok->Previous;
3635
3636 if (Prev && Prev->is(tok::tilde))
3637 Prev = Prev->Previous;
3638
3639 if (!Prev || !Prev->endsSequence(tok::coloncolon, tok::identifier))
3640 return false;
3641
3642 assert(Prev->Previous);
3643 return Prev->Previous->TokenText == Tok->TokenText;
3644 }
3645
annotate(AnnotatedLine & Line)3646 void TokenAnnotator::annotate(AnnotatedLine &Line) {
3647 AnnotatingParser Parser(Style, Line, Keywords, Scopes);
3648 Line.Type = Parser.parseLine();
3649
3650 for (auto &Child : Line.Children)
3651 annotate(*Child);
3652
3653 // With very deep nesting, ExpressionParser uses lots of stack and the
3654 // formatting algorithm is very slow. We're not going to do a good job here
3655 // anyway - it's probably generated code being formatted by mistake.
3656 // Just skip the whole line.
3657 if (maxNestingDepth(Line) > 50)
3658 Line.Type = LT_Invalid;
3659
3660 if (Line.Type == LT_Invalid)
3661 return;
3662
3663 ExpressionParser ExprParser(Style, Keywords, Line);
3664 ExprParser.parse();
3665
3666 if (IsCpp) {
3667 FormatToken *OpeningParen = nullptr;
3668 auto *Tok = getFunctionName(Line, OpeningParen);
3669 if (Tok && ((!Scopes.empty() && Scopes.back() == ST_Class) ||
3670 Line.endsWith(TT_FunctionLBrace) || isCtorOrDtorName(Tok))) {
3671 Tok->setFinalizedType(TT_CtorDtorDeclName);
3672 assert(OpeningParen);
3673 OpeningParen->setFinalizedType(TT_FunctionDeclarationLParen);
3674 }
3675 }
3676
3677 if (Line.startsWith(TT_ObjCMethodSpecifier))
3678 Line.Type = LT_ObjCMethodDecl;
3679 else if (Line.startsWith(TT_ObjCDecl))
3680 Line.Type = LT_ObjCDecl;
3681 else if (Line.startsWith(TT_ObjCProperty))
3682 Line.Type = LT_ObjCProperty;
3683
3684 auto *First = Line.First;
3685 First->SpacesRequiredBefore = 1;
3686 First->CanBreakBefore = First->MustBreakBefore;
3687 }
3688
3689 // This function heuristically determines whether 'Current' starts the name of a
3690 // function declaration.
isFunctionDeclarationName(const LangOptions & LangOpts,const FormatToken & Current,const AnnotatedLine & Line,FormatToken * & ClosingParen)3691 static bool isFunctionDeclarationName(const LangOptions &LangOpts,
3692 const FormatToken &Current,
3693 const AnnotatedLine &Line,
3694 FormatToken *&ClosingParen) {
3695 assert(Current.Previous);
3696
3697 if (Current.is(TT_FunctionDeclarationName))
3698 return true;
3699
3700 if (!Current.Tok.getIdentifierInfo())
3701 return false;
3702
3703 const auto &Previous = *Current.Previous;
3704
3705 if (const auto *PrevPrev = Previous.Previous;
3706 PrevPrev && PrevPrev->is(TT_ObjCDecl)) {
3707 return false;
3708 }
3709
3710 auto skipOperatorName =
3711 [&LangOpts](const FormatToken *Next) -> const FormatToken * {
3712 for (; Next; Next = Next->Next) {
3713 if (Next->is(TT_OverloadedOperatorLParen))
3714 return Next;
3715 if (Next->is(TT_OverloadedOperator))
3716 continue;
3717 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
3718 // For 'new[]' and 'delete[]'.
3719 if (Next->Next &&
3720 Next->Next->startsSequence(tok::l_square, tok::r_square)) {
3721 Next = Next->Next->Next;
3722 }
3723 continue;
3724 }
3725 if (Next->startsSequence(tok::l_square, tok::r_square)) {
3726 // For operator[]().
3727 Next = Next->Next;
3728 continue;
3729 }
3730 if ((Next->isTypeName(LangOpts) || Next->is(tok::identifier)) &&
3731 Next->Next && Next->Next->isPointerOrReference()) {
3732 // For operator void*(), operator char*(), operator Foo*().
3733 Next = Next->Next;
3734 continue;
3735 }
3736 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3737 Next = Next->MatchingParen;
3738 continue;
3739 }
3740
3741 break;
3742 }
3743 return nullptr;
3744 };
3745
3746 const auto *Next = Current.Next;
3747 const bool IsCpp = LangOpts.CXXOperatorNames;
3748
3749 // Find parentheses of parameter list.
3750 if (Current.is(tok::kw_operator)) {
3751 if (Previous.Tok.getIdentifierInfo() &&
3752 !Previous.isOneOf(tok::kw_return, tok::kw_co_return)) {
3753 return true;
3754 }
3755 if (Previous.is(tok::r_paren) && Previous.is(TT_TypeDeclarationParen)) {
3756 assert(Previous.MatchingParen);
3757 assert(Previous.MatchingParen->is(tok::l_paren));
3758 assert(Previous.MatchingParen->is(TT_TypeDeclarationParen));
3759 return true;
3760 }
3761 if (!Previous.isPointerOrReference() && Previous.isNot(TT_TemplateCloser))
3762 return false;
3763 Next = skipOperatorName(Next);
3764 } else {
3765 if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0)
3766 return false;
3767 for (; Next; Next = Next->Next) {
3768 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3769 Next = Next->MatchingParen;
3770 } else if (Next->is(tok::coloncolon)) {
3771 Next = Next->Next;
3772 if (!Next)
3773 return false;
3774 if (Next->is(tok::kw_operator)) {
3775 Next = skipOperatorName(Next->Next);
3776 break;
3777 }
3778 if (Next->isNot(tok::identifier))
3779 return false;
3780 } else if (isCppAttribute(IsCpp, *Next)) {
3781 Next = Next->MatchingParen;
3782 if (!Next)
3783 return false;
3784 } else if (Next->is(tok::l_paren)) {
3785 break;
3786 } else {
3787 return false;
3788 }
3789 }
3790 }
3791
3792 // Check whether parameter list can belong to a function declaration.
3793 if (!Next || Next->isNot(tok::l_paren) || !Next->MatchingParen)
3794 return false;
3795 ClosingParen = Next->MatchingParen;
3796 assert(ClosingParen->is(tok::r_paren));
3797 // If the lines ends with "{", this is likely a function definition.
3798 if (Line.Last->is(tok::l_brace))
3799 return true;
3800 if (Next->Next == ClosingParen)
3801 return true; // Empty parentheses.
3802 // If there is an &/&& after the r_paren, this is likely a function.
3803 if (ClosingParen->Next && ClosingParen->Next->is(TT_PointerOrReference))
3804 return true;
3805
3806 // Check for K&R C function definitions (and C++ function definitions with
3807 // unnamed parameters), e.g.:
3808 // int f(i)
3809 // {
3810 // return i + 1;
3811 // }
3812 // bool g(size_t = 0, bool b = false)
3813 // {
3814 // return !b;
3815 // }
3816 if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
3817 !Line.endsWith(tok::semi)) {
3818 return true;
3819 }
3820
3821 for (const FormatToken *Tok = Next->Next; Tok && Tok != ClosingParen;
3822 Tok = Tok->Next) {
3823 if (Tok->is(TT_TypeDeclarationParen))
3824 return true;
3825 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
3826 Tok = Tok->MatchingParen;
3827 continue;
3828 }
3829 if (Tok->is(tok::kw_const) || Tok->isTypeName(LangOpts) ||
3830 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) {
3831 return true;
3832 }
3833 if (Tok->isOneOf(tok::l_brace, TT_ObjCMethodExpr) || Tok->Tok.isLiteral())
3834 return false;
3835 }
3836 return false;
3837 }
3838
mustBreakForReturnType(const AnnotatedLine & Line) const3839 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
3840 assert(Line.MightBeFunctionDecl);
3841
3842 if ((Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
3843 Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevelDefinitions) &&
3844 Line.Level > 0) {
3845 return false;
3846 }
3847
3848 switch (Style.BreakAfterReturnType) {
3849 case FormatStyle::RTBS_None:
3850 case FormatStyle::RTBS_Automatic:
3851 case FormatStyle::RTBS_ExceptShortType:
3852 return false;
3853 case FormatStyle::RTBS_All:
3854 case FormatStyle::RTBS_TopLevel:
3855 return true;
3856 case FormatStyle::RTBS_AllDefinitions:
3857 case FormatStyle::RTBS_TopLevelDefinitions:
3858 return Line.mightBeFunctionDefinition();
3859 }
3860
3861 return false;
3862 }
3863
calculateFormattingInformation(AnnotatedLine & Line) const3864 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
3865 for (AnnotatedLine *ChildLine : Line.Children)
3866 calculateFormattingInformation(*ChildLine);
3867
3868 auto *First = Line.First;
3869 First->TotalLength = First->IsMultiline
3870 ? Style.ColumnLimit
3871 : Line.FirstStartColumn + First->ColumnWidth;
3872 FormatToken *Current = First->Next;
3873 bool InFunctionDecl = Line.MightBeFunctionDecl;
3874 bool AlignArrayOfStructures =
3875 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
3876 Line.Type == LT_ArrayOfStructInitializer);
3877 if (AlignArrayOfStructures)
3878 calculateArrayInitializerColumnList(Line);
3879
3880 bool SeenName = false;
3881 bool LineIsFunctionDeclaration = false;
3882 FormatToken *ClosingParen = nullptr;
3883 FormatToken *AfterLastAttribute = nullptr;
3884
3885 for (auto *Tok = Current; Tok; Tok = Tok->Next) {
3886 if (Tok->is(TT_StartOfName))
3887 SeenName = true;
3888 if (Tok->Previous->EndsCppAttributeGroup)
3889 AfterLastAttribute = Tok;
3890 if (const bool IsCtorOrDtor = Tok->is(TT_CtorDtorDeclName);
3891 IsCtorOrDtor ||
3892 isFunctionDeclarationName(LangOpts, *Tok, Line, ClosingParen)) {
3893 if (!IsCtorOrDtor)
3894 Tok->setFinalizedType(TT_FunctionDeclarationName);
3895 LineIsFunctionDeclaration = true;
3896 SeenName = true;
3897 if (ClosingParen) {
3898 auto *OpeningParen = ClosingParen->MatchingParen;
3899 assert(OpeningParen);
3900 if (OpeningParen->is(TT_Unknown))
3901 OpeningParen->setType(TT_FunctionDeclarationLParen);
3902 }
3903 break;
3904 }
3905 }
3906
3907 if (IsCpp && (LineIsFunctionDeclaration || First->is(TT_CtorDtorDeclName)) &&
3908 Line.endsWith(tok::semi, tok::r_brace)) {
3909 auto *Tok = Line.Last->Previous;
3910 while (Tok->isNot(tok::r_brace))
3911 Tok = Tok->Previous;
3912 if (auto *LBrace = Tok->MatchingParen; LBrace) {
3913 assert(LBrace->is(tok::l_brace));
3914 Tok->setBlockKind(BK_Block);
3915 LBrace->setBlockKind(BK_Block);
3916 LBrace->setFinalizedType(TT_FunctionLBrace);
3917 }
3918 }
3919
3920 if (IsCpp && SeenName && AfterLastAttribute &&
3921 mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
3922 AfterLastAttribute->MustBreakBefore = true;
3923 if (LineIsFunctionDeclaration)
3924 Line.ReturnTypeWrapped = true;
3925 }
3926
3927 if (IsCpp) {
3928 if (!LineIsFunctionDeclaration) {
3929 // Annotate */&/&& in `operator` function calls as binary operators.
3930 for (const auto *Tok = First; Tok; Tok = Tok->Next) {
3931 if (Tok->isNot(tok::kw_operator))
3932 continue;
3933 do {
3934 Tok = Tok->Next;
3935 } while (Tok && Tok->isNot(TT_OverloadedOperatorLParen));
3936 if (!Tok || !Tok->MatchingParen)
3937 break;
3938 const auto *LeftParen = Tok;
3939 for (Tok = Tok->Next; Tok && Tok != LeftParen->MatchingParen;
3940 Tok = Tok->Next) {
3941 if (Tok->isNot(tok::identifier))
3942 continue;
3943 auto *Next = Tok->Next;
3944 const bool NextIsBinaryOperator =
3945 Next && Next->isPointerOrReference() && Next->Next &&
3946 Next->Next->is(tok::identifier);
3947 if (!NextIsBinaryOperator)
3948 continue;
3949 Next->setType(TT_BinaryOperator);
3950 Tok = Next;
3951 }
3952 }
3953 } else if (ClosingParen) {
3954 for (auto *Tok = ClosingParen->Next; Tok; Tok = Tok->Next) {
3955 if (Tok->is(TT_CtorInitializerColon))
3956 break;
3957 if (Tok->is(tok::arrow)) {
3958 Tok->setType(TT_TrailingReturnArrow);
3959 break;
3960 }
3961 if (Tok->isNot(TT_TrailingAnnotation))
3962 continue;
3963 const auto *Next = Tok->Next;
3964 if (!Next || Next->isNot(tok::l_paren))
3965 continue;
3966 Tok = Next->MatchingParen;
3967 if (!Tok)
3968 break;
3969 }
3970 }
3971 }
3972
3973 while (Current) {
3974 const FormatToken *Prev = Current->Previous;
3975 if (Current->is(TT_LineComment)) {
3976 if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
3977 Current->SpacesRequiredBefore =
3978 (Style.Cpp11BracedListStyle && !Style.SpacesInParensOptions.Other)
3979 ? 0
3980 : 1;
3981 } else if (Prev->is(TT_VerilogMultiLineListLParen)) {
3982 Current->SpacesRequiredBefore = 0;
3983 } else {
3984 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
3985 }
3986
3987 // If we find a trailing comment, iterate backwards to determine whether
3988 // it seems to relate to a specific parameter. If so, break before that
3989 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
3990 // to the previous line in:
3991 // SomeFunction(a,
3992 // b, // comment
3993 // c);
3994 if (!Current->HasUnescapedNewline) {
3995 for (FormatToken *Parameter = Current->Previous; Parameter;
3996 Parameter = Parameter->Previous) {
3997 if (Parameter->isOneOf(tok::comment, tok::r_brace))
3998 break;
3999 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
4000 if (Parameter->Previous->isNot(TT_CtorInitializerComma) &&
4001 Parameter->HasUnescapedNewline) {
4002 Parameter->MustBreakBefore = true;
4003 }
4004 break;
4005 }
4006 }
4007 }
4008 } else if (!Current->Finalized && Current->SpacesRequiredBefore == 0 &&
4009 spaceRequiredBefore(Line, *Current)) {
4010 Current->SpacesRequiredBefore = 1;
4011 }
4012
4013 const auto &Children = Prev->Children;
4014 if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
4015 Current->MustBreakBefore = true;
4016 } else {
4017 Current->MustBreakBefore =
4018 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
4019 if (!Current->MustBreakBefore && InFunctionDecl &&
4020 Current->is(TT_FunctionDeclarationName)) {
4021 Current->MustBreakBefore = mustBreakForReturnType(Line);
4022 }
4023 }
4024
4025 Current->CanBreakBefore =
4026 Current->MustBreakBefore || canBreakBefore(Line, *Current);
4027 unsigned ChildSize = 0;
4028 if (Prev->Children.size() == 1) {
4029 FormatToken &LastOfChild = *Prev->Children[0]->Last;
4030 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
4031 : LastOfChild.TotalLength + 1;
4032 }
4033 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
4034 (Prev->Children.size() == 1 &&
4035 Prev->Children[0]->First->MustBreakBefore) ||
4036 Current->IsMultiline) {
4037 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
4038 } else {
4039 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
4040 ChildSize + Current->SpacesRequiredBefore;
4041 }
4042
4043 if (Current->is(TT_CtorInitializerColon))
4044 InFunctionDecl = false;
4045
4046 // FIXME: Only calculate this if CanBreakBefore is true once static
4047 // initializers etc. are sorted out.
4048 // FIXME: Move magic numbers to a better place.
4049
4050 // Reduce penalty for aligning ObjC method arguments using the colon
4051 // alignment as this is the canonical way (still prefer fitting everything
4052 // into one line if possible). Trying to fit a whole expression into one
4053 // line should not force other line breaks (e.g. when ObjC method
4054 // expression is a part of other expression).
4055 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
4056 if (Style.Language == FormatStyle::LK_ObjC &&
4057 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
4058 if (Current->ParameterIndex == 1)
4059 Current->SplitPenalty += 5 * Current->BindingStrength;
4060 } else {
4061 Current->SplitPenalty += 20 * Current->BindingStrength;
4062 }
4063
4064 Current = Current->Next;
4065 }
4066
4067 calculateUnbreakableTailLengths(Line);
4068 unsigned IndentLevel = Line.Level;
4069 for (Current = First; Current; Current = Current->Next) {
4070 if (Current->Role)
4071 Current->Role->precomputeFormattingInfos(Current);
4072 if (Current->MatchingParen &&
4073 Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
4074 IndentLevel > 0) {
4075 --IndentLevel;
4076 }
4077 Current->IndentLevel = IndentLevel;
4078 if (Current->opensBlockOrBlockTypeList(Style))
4079 ++IndentLevel;
4080 }
4081
4082 LLVM_DEBUG({ printDebugInfo(Line); });
4083 }
4084
calculateUnbreakableTailLengths(AnnotatedLine & Line) const4085 void TokenAnnotator::calculateUnbreakableTailLengths(
4086 AnnotatedLine &Line) const {
4087 unsigned UnbreakableTailLength = 0;
4088 FormatToken *Current = Line.Last;
4089 while (Current) {
4090 Current->UnbreakableTailLength = UnbreakableTailLength;
4091 if (Current->CanBreakBefore ||
4092 Current->isOneOf(tok::comment, tok::string_literal)) {
4093 UnbreakableTailLength = 0;
4094 } else {
4095 UnbreakableTailLength +=
4096 Current->ColumnWidth + Current->SpacesRequiredBefore;
4097 }
4098 Current = Current->Previous;
4099 }
4100 }
4101
calculateArrayInitializerColumnList(AnnotatedLine & Line) const4102 void TokenAnnotator::calculateArrayInitializerColumnList(
4103 AnnotatedLine &Line) const {
4104 if (Line.First == Line.Last)
4105 return;
4106 auto *CurrentToken = Line.First;
4107 CurrentToken->ArrayInitializerLineStart = true;
4108 unsigned Depth = 0;
4109 while (CurrentToken && CurrentToken != Line.Last) {
4110 if (CurrentToken->is(tok::l_brace)) {
4111 CurrentToken->IsArrayInitializer = true;
4112 if (CurrentToken->Next)
4113 CurrentToken->Next->MustBreakBefore = true;
4114 CurrentToken =
4115 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
4116 } else {
4117 CurrentToken = CurrentToken->Next;
4118 }
4119 }
4120 }
4121
calculateInitializerColumnList(AnnotatedLine & Line,FormatToken * CurrentToken,unsigned Depth) const4122 FormatToken *TokenAnnotator::calculateInitializerColumnList(
4123 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
4124 while (CurrentToken && CurrentToken != Line.Last) {
4125 if (CurrentToken->is(tok::l_brace))
4126 ++Depth;
4127 else if (CurrentToken->is(tok::r_brace))
4128 --Depth;
4129 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
4130 CurrentToken = CurrentToken->Next;
4131 if (!CurrentToken)
4132 break;
4133 CurrentToken->StartsColumn = true;
4134 CurrentToken = CurrentToken->Previous;
4135 }
4136 CurrentToken = CurrentToken->Next;
4137 }
4138 return CurrentToken;
4139 }
4140
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl) const4141 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
4142 const FormatToken &Tok,
4143 bool InFunctionDecl) const {
4144 const FormatToken &Left = *Tok.Previous;
4145 const FormatToken &Right = Tok;
4146
4147 if (Left.is(tok::semi))
4148 return 0;
4149
4150 // Language specific handling.
4151 if (Style.Language == FormatStyle::LK_Java) {
4152 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
4153 return 1;
4154 if (Right.is(Keywords.kw_implements))
4155 return 2;
4156 if (Left.is(tok::comma) && Left.NestingLevel == 0)
4157 return 3;
4158 } else if (Style.isJavaScript()) {
4159 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
4160 return 100;
4161 if (Left.is(TT_JsTypeColon))
4162 return 35;
4163 if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
4164 (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
4165 return 100;
4166 }
4167 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
4168 if (Left.opensScope() && Right.closesScope())
4169 return 200;
4170 } else if (Style.Language == FormatStyle::LK_Proto) {
4171 if (Right.is(tok::l_square))
4172 return 1;
4173 if (Right.is(tok::period))
4174 return 500;
4175 }
4176
4177 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
4178 return 1;
4179 if (Right.is(tok::l_square)) {
4180 if (Left.is(tok::r_square))
4181 return 200;
4182 // Slightly prefer formatting local lambda definitions like functions.
4183 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
4184 return 35;
4185 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4186 TT_ArrayInitializerLSquare,
4187 TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
4188 return 500;
4189 }
4190 }
4191
4192 if (Left.is(tok::coloncolon))
4193 return Style.PenaltyBreakScopeResolution;
4194 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
4195 Right.is(tok::kw_operator)) {
4196 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
4197 return 3;
4198 if (Left.is(TT_StartOfName))
4199 return 110;
4200 if (InFunctionDecl && Right.NestingLevel == 0)
4201 return Style.PenaltyReturnTypeOnItsOwnLine;
4202 return 200;
4203 }
4204 if (Right.is(TT_PointerOrReference))
4205 return 190;
4206 if (Right.is(TT_LambdaArrow))
4207 return 110;
4208 if (Left.is(tok::equal) && Right.is(tok::l_brace))
4209 return 160;
4210 if (Left.is(TT_CastRParen))
4211 return 100;
4212 if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
4213 return 5000;
4214 if (Left.is(tok::comment))
4215 return 1000;
4216
4217 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
4218 TT_CtorInitializerColon)) {
4219 return 2;
4220 }
4221
4222 if (Right.isMemberAccess()) {
4223 // Breaking before the "./->" of a chained call/member access is reasonably
4224 // cheap, as formatting those with one call per line is generally
4225 // desirable. In particular, it should be cheaper to break before the call
4226 // than it is to break inside a call's parameters, which could lead to weird
4227 // "hanging" indents. The exception is the very last "./->" to support this
4228 // frequent pattern:
4229 //
4230 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
4231 // dddddddd);
4232 //
4233 // which might otherwise be blown up onto many lines. Here, clang-format
4234 // won't produce "hanging" indents anyway as there is no other trailing
4235 // call.
4236 //
4237 // Also apply higher penalty is not a call as that might lead to a wrapping
4238 // like:
4239 //
4240 // aaaaaaa
4241 // .aaaaaaaaa.bbbbbbbb(cccccccc);
4242 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
4243 ? 150
4244 : 35;
4245 }
4246
4247 if (Right.is(TT_TrailingAnnotation) &&
4248 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
4249 // Moving trailing annotations to the next line is fine for ObjC method
4250 // declarations.
4251 if (Line.startsWith(TT_ObjCMethodSpecifier))
4252 return 10;
4253 // Generally, breaking before a trailing annotation is bad unless it is
4254 // function-like. It seems to be especially preferable to keep standard
4255 // annotations (i.e. "const", "final" and "override") on the same line.
4256 // Use a slightly higher penalty after ")" so that annotations like
4257 // "const override" are kept together.
4258 bool is_short_annotation = Right.TokenText.size() < 10;
4259 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
4260 }
4261
4262 // In for-loops, prefer breaking at ',' and ';'.
4263 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
4264 return 4;
4265
4266 // In Objective-C method expressions, prefer breaking before "param:" over
4267 // breaking after it.
4268 if (Right.is(TT_SelectorName))
4269 return 0;
4270 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
4271 return Line.MightBeFunctionDecl ? 50 : 500;
4272
4273 // In Objective-C type declarations, avoid breaking after the category's
4274 // open paren (we'll prefer breaking after the protocol list's opening
4275 // angle bracket, if present).
4276 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
4277 Left.Previous->isOneOf(tok::identifier, tok::greater)) {
4278 return 500;
4279 }
4280
4281 if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
4282 return Style.PenaltyBreakOpenParenthesis;
4283 if (Left.is(tok::l_paren) && InFunctionDecl &&
4284 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
4285 return 100;
4286 }
4287 if (Left.is(tok::l_paren) && Left.Previous &&
4288 (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
4289 Left.Previous->isIf())) {
4290 return 1000;
4291 }
4292 if (Left.is(tok::equal) && InFunctionDecl)
4293 return 110;
4294 if (Right.is(tok::r_brace))
4295 return 1;
4296 if (Left.is(TT_TemplateOpener))
4297 return 100;
4298 if (Left.opensScope()) {
4299 // If we aren't aligning after opening parens/braces we can always break
4300 // here unless the style does not want us to place all arguments on the
4301 // next line.
4302 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
4303 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
4304 return 0;
4305 }
4306 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
4307 return 19;
4308 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
4309 : 19;
4310 }
4311 if (Left.is(TT_JavaAnnotation))
4312 return 50;
4313
4314 if (Left.is(TT_UnaryOperator))
4315 return 60;
4316 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
4317 Left.Previous->isLabelString() &&
4318 (Left.NextOperator || Left.OperatorIndex != 0)) {
4319 return 50;
4320 }
4321 if (Right.is(tok::plus) && Left.isLabelString() &&
4322 (Right.NextOperator || Right.OperatorIndex != 0)) {
4323 return 25;
4324 }
4325 if (Left.is(tok::comma))
4326 return 1;
4327 if (Right.is(tok::lessless) && Left.isLabelString() &&
4328 (Right.NextOperator || Right.OperatorIndex != 1)) {
4329 return 25;
4330 }
4331 if (Right.is(tok::lessless)) {
4332 // Breaking at a << is really cheap.
4333 if (Left.isNot(tok::r_paren) || Right.OperatorIndex > 0) {
4334 // Slightly prefer to break before the first one in log-like statements.
4335 return 2;
4336 }
4337 return 1;
4338 }
4339 if (Left.ClosesTemplateDeclaration)
4340 return Style.PenaltyBreakTemplateDeclaration;
4341 if (Left.ClosesRequiresClause)
4342 return 0;
4343 if (Left.is(TT_ConditionalExpr))
4344 return prec::Conditional;
4345 prec::Level Level = Left.getPrecedence();
4346 if (Level == prec::Unknown)
4347 Level = Right.getPrecedence();
4348 if (Level == prec::Assignment)
4349 return Style.PenaltyBreakAssignment;
4350 if (Level != prec::Unknown)
4351 return Level;
4352
4353 return 3;
4354 }
4355
spaceRequiredBeforeParens(const FormatToken & Right) const4356 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
4357 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
4358 return true;
4359 if (Right.is(TT_OverloadedOperatorLParen) &&
4360 Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
4361 return true;
4362 }
4363 if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
4364 Right.ParameterCount > 0) {
4365 return true;
4366 }
4367 return false;
4368 }
4369
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right) const4370 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
4371 const FormatToken &Left,
4372 const FormatToken &Right) const {
4373 if (Left.is(tok::kw_return) &&
4374 !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
4375 return true;
4376 }
4377 if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
4378 Right.MatchingParen->is(TT_CastRParen)) {
4379 return true;
4380 }
4381 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
4382 return true;
4383 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
4384 Left.Tok.getObjCKeywordID() == tok::objc_property) {
4385 return true;
4386 }
4387 if (Right.is(tok::hashhash))
4388 return Left.is(tok::hash);
4389 if (Left.isOneOf(tok::hashhash, tok::hash))
4390 return Right.is(tok::hash);
4391 if (Left.is(BK_Block) && Right.is(tok::r_brace) &&
4392 Right.MatchingParen == &Left && Line.Children.empty()) {
4393 return Style.SpaceInEmptyBlock;
4394 }
4395 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
4396 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
4397 Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
4398 return Style.SpacesInParensOptions.InEmptyParentheses;
4399 }
4400 if (Style.SpacesInParens == FormatStyle::SIPO_Custom &&
4401 Style.SpacesInParensOptions.ExceptDoubleParentheses &&
4402 Left.is(tok::r_paren) && Right.is(tok::r_paren)) {
4403 auto *InnerLParen = Left.MatchingParen;
4404 if (InnerLParen && InnerLParen->Previous == Right.MatchingParen) {
4405 InnerLParen->SpacesRequiredBefore = 0;
4406 return false;
4407 }
4408 }
4409 if (Style.SpacesInParensOptions.InConditionalStatements) {
4410 const FormatToken *LeftParen = nullptr;
4411 if (Left.is(tok::l_paren))
4412 LeftParen = &Left;
4413 else if (Right.is(tok::r_paren) && Right.MatchingParen)
4414 LeftParen = Right.MatchingParen;
4415 if (LeftParen) {
4416 if (LeftParen->is(TT_ConditionLParen))
4417 return true;
4418 if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
4419 return true;
4420 }
4421 }
4422
4423 // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
4424 if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
4425 // function return type 'auto'
4426 TT_FunctionTypeLParen)) {
4427 return true;
4428 }
4429
4430 // auto{x} auto(x)
4431 if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
4432 return false;
4433
4434 const auto *BeforeLeft = Left.Previous;
4435
4436 // operator co_await(x)
4437 if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && BeforeLeft &&
4438 BeforeLeft->is(tok::kw_operator)) {
4439 return false;
4440 }
4441 // co_await (x), co_yield (x), co_return (x)
4442 if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
4443 !Right.isOneOf(tok::semi, tok::r_paren)) {
4444 return true;
4445 }
4446
4447 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
4448 return (Right.is(TT_CastRParen) ||
4449 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
4450 ? Style.SpacesInParensOptions.InCStyleCasts
4451 : Style.SpacesInParensOptions.Other;
4452 }
4453 if (Right.isOneOf(tok::semi, tok::comma))
4454 return false;
4455 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
4456 bool IsLightweightGeneric = Right.MatchingParen &&
4457 Right.MatchingParen->Next &&
4458 Right.MatchingParen->Next->is(tok::colon);
4459 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
4460 }
4461 if (Right.is(tok::less) && Left.is(tok::kw_template))
4462 return Style.SpaceAfterTemplateKeyword;
4463 if (Left.isOneOf(tok::exclaim, tok::tilde))
4464 return false;
4465 if (Left.is(tok::at) &&
4466 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
4467 tok::numeric_constant, tok::l_paren, tok::l_brace,
4468 tok::kw_true, tok::kw_false)) {
4469 return false;
4470 }
4471 if (Left.is(tok::colon))
4472 return Left.isNot(TT_ObjCMethodExpr);
4473 if (Left.is(tok::coloncolon))
4474 return false;
4475 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
4476 if (Style.Language == FormatStyle::LK_TextProto ||
4477 (Style.Language == FormatStyle::LK_Proto &&
4478 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
4479 // Format empty list as `<>`.
4480 if (Left.is(tok::less) && Right.is(tok::greater))
4481 return false;
4482 return !Style.Cpp11BracedListStyle;
4483 }
4484 // Don't attempt to format operator<(), as it is handled later.
4485 if (Right.isNot(TT_OverloadedOperatorLParen))
4486 return false;
4487 }
4488 if (Right.is(tok::ellipsis)) {
4489 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && BeforeLeft &&
4490 BeforeLeft->is(tok::kw_case));
4491 }
4492 if (Left.is(tok::l_square) && Right.is(tok::amp))
4493 return Style.SpacesInSquareBrackets;
4494 if (Right.is(TT_PointerOrReference)) {
4495 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
4496 if (!Left.MatchingParen)
4497 return true;
4498 FormatToken *TokenBeforeMatchingParen =
4499 Left.MatchingParen->getPreviousNonComment();
4500 if (!TokenBeforeMatchingParen || Left.isNot(TT_TypeDeclarationParen))
4501 return true;
4502 }
4503 // Add a space if the previous token is a pointer qualifier or the closing
4504 // parenthesis of __attribute__(()) expression and the style requires spaces
4505 // after pointer qualifiers.
4506 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
4507 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4508 (Left.is(TT_AttributeRParen) ||
4509 Left.canBePointerOrReferenceQualifier())) {
4510 return true;
4511 }
4512 if (Left.Tok.isLiteral())
4513 return true;
4514 // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
4515 if (Left.isTypeOrIdentifier(LangOpts) && Right.Next && Right.Next->Next &&
4516 Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
4517 return getTokenPointerOrReferenceAlignment(Right) !=
4518 FormatStyle::PAS_Left;
4519 }
4520 return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
4521 (getTokenPointerOrReferenceAlignment(Right) !=
4522 FormatStyle::PAS_Left ||
4523 (Line.IsMultiVariableDeclStmt &&
4524 (Left.NestingLevel == 0 ||
4525 (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
4526 }
4527 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
4528 (Left.isNot(TT_PointerOrReference) ||
4529 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
4530 !Line.IsMultiVariableDeclStmt))) {
4531 return true;
4532 }
4533 if (Left.is(TT_PointerOrReference)) {
4534 // Add a space if the next token is a pointer qualifier and the style
4535 // requires spaces before pointer qualifiers.
4536 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
4537 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4538 Right.canBePointerOrReferenceQualifier()) {
4539 return true;
4540 }
4541 // & 1
4542 if (Right.Tok.isLiteral())
4543 return true;
4544 // & /* comment
4545 if (Right.is(TT_BlockComment))
4546 return true;
4547 // foo() -> const Bar * override/final
4548 // S::foo() & noexcept/requires
4549 if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final, tok::kw_noexcept,
4550 TT_RequiresClause) &&
4551 Right.isNot(TT_StartOfName)) {
4552 return true;
4553 }
4554 // & {
4555 if (Right.is(tok::l_brace) && Right.is(BK_Block))
4556 return true;
4557 // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
4558 if (BeforeLeft && BeforeLeft->isTypeOrIdentifier(LangOpts) && Right.Next &&
4559 Right.Next->is(TT_RangeBasedForLoopColon)) {
4560 return getTokenPointerOrReferenceAlignment(Left) !=
4561 FormatStyle::PAS_Right;
4562 }
4563 if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
4564 tok::l_paren)) {
4565 return false;
4566 }
4567 if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
4568 return false;
4569 // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
4570 // because it does not take into account nested scopes like lambdas.
4571 // In multi-variable declaration statements, attach */& to the variable
4572 // independently of the style. However, avoid doing it if we are in a nested
4573 // scope, e.g. lambda. We still need to special-case statements with
4574 // initializers.
4575 if (Line.IsMultiVariableDeclStmt &&
4576 (Left.NestingLevel == Line.First->NestingLevel ||
4577 ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
4578 startsWithInitStatement(Line)))) {
4579 return false;
4580 }
4581 if (!BeforeLeft)
4582 return false;
4583 if (BeforeLeft->is(tok::coloncolon)) {
4584 if (Left.isNot(tok::star))
4585 return false;
4586 assert(Style.PointerAlignment != FormatStyle::PAS_Right);
4587 if (!Right.startsSequence(tok::identifier, tok::r_paren))
4588 return true;
4589 assert(Right.Next);
4590 const auto *LParen = Right.Next->MatchingParen;
4591 return !LParen || LParen->isNot(TT_FunctionTypeLParen);
4592 }
4593 return !BeforeLeft->isOneOf(tok::l_paren, tok::l_square);
4594 }
4595 // Ensure right pointer alignment with ellipsis e.g. int *...P
4596 if (Left.is(tok::ellipsis) && BeforeLeft &&
4597 BeforeLeft->isPointerOrReference()) {
4598 return Style.PointerAlignment != FormatStyle::PAS_Right;
4599 }
4600
4601 if (Right.is(tok::star) && Left.is(tok::l_paren))
4602 return false;
4603 if (Left.is(tok::star) && Right.isPointerOrReference())
4604 return false;
4605 if (Right.isPointerOrReference()) {
4606 const FormatToken *Previous = &Left;
4607 while (Previous && Previous->isNot(tok::kw_operator)) {
4608 if (Previous->is(tok::identifier) || Previous->isTypeName(LangOpts)) {
4609 Previous = Previous->getPreviousNonComment();
4610 continue;
4611 }
4612 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
4613 Previous = Previous->MatchingParen->getPreviousNonComment();
4614 continue;
4615 }
4616 if (Previous->is(tok::coloncolon)) {
4617 Previous = Previous->getPreviousNonComment();
4618 continue;
4619 }
4620 break;
4621 }
4622 // Space between the type and the * in:
4623 // operator void*()
4624 // operator char*()
4625 // operator void const*()
4626 // operator void volatile*()
4627 // operator /*comment*/ const char*()
4628 // operator volatile /*comment*/ char*()
4629 // operator Foo*()
4630 // operator C<T>*()
4631 // operator std::Foo*()
4632 // operator C<T>::D<U>*()
4633 // dependent on PointerAlignment style.
4634 if (Previous) {
4635 if (Previous->endsSequence(tok::kw_operator))
4636 return Style.PointerAlignment != FormatStyle::PAS_Left;
4637 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
4638 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
4639 (Style.SpaceAroundPointerQualifiers ==
4640 FormatStyle::SAPQ_After) ||
4641 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
4642 }
4643 }
4644 }
4645 if (Style.isCSharp() && Left.is(Keywords.kw_is) && Right.is(tok::l_square))
4646 return true;
4647 const auto SpaceRequiredForArrayInitializerLSquare =
4648 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
4649 return Style.SpacesInContainerLiterals ||
4650 (Style.isProto() && !Style.Cpp11BracedListStyle &&
4651 LSquareTok.endsSequence(tok::l_square, tok::colon,
4652 TT_SelectorName));
4653 };
4654 if (Left.is(tok::l_square)) {
4655 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
4656 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
4657 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
4658 TT_LambdaLSquare) &&
4659 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
4660 }
4661 if (Right.is(tok::r_square)) {
4662 return Right.MatchingParen &&
4663 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
4664 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
4665 Style)) ||
4666 (Style.SpacesInSquareBrackets &&
4667 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
4668 TT_StructuredBindingLSquare,
4669 TT_LambdaLSquare)));
4670 }
4671 if (Right.is(tok::l_square) &&
4672 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4673 TT_DesignatedInitializerLSquare,
4674 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
4675 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
4676 !(Left.isNot(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
4677 Right.is(TT_ArraySubscriptLSquare))) {
4678 return false;
4679 }
4680 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
4681 return !Left.Children.empty(); // No spaces in "{}".
4682 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
4683 (Right.is(tok::r_brace) && Right.MatchingParen &&
4684 Right.MatchingParen->isNot(BK_Block))) {
4685 return !Style.Cpp11BracedListStyle || Style.SpacesInParensOptions.Other;
4686 }
4687 if (Left.is(TT_BlockComment)) {
4688 // No whitespace in x(/*foo=*/1), except for JavaScript.
4689 return Style.isJavaScript() || !Left.TokenText.ends_with("=*/");
4690 }
4691
4692 // Space between template and attribute.
4693 // e.g. template <typename T> [[nodiscard]] ...
4694 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
4695 return true;
4696 // Space before parentheses common for all languages
4697 if (Right.is(tok::l_paren)) {
4698 if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
4699 return spaceRequiredBeforeParens(Right);
4700 if (Left.isOneOf(TT_RequiresClause,
4701 TT_RequiresClauseInARequiresExpression)) {
4702 return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
4703 spaceRequiredBeforeParens(Right);
4704 }
4705 if (Left.is(TT_RequiresExpression)) {
4706 return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
4707 spaceRequiredBeforeParens(Right);
4708 }
4709 if (Left.is(TT_AttributeRParen) ||
4710 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
4711 return true;
4712 }
4713 if (Left.is(TT_ForEachMacro)) {
4714 return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
4715 spaceRequiredBeforeParens(Right);
4716 }
4717 if (Left.is(TT_IfMacro)) {
4718 return Style.SpaceBeforeParensOptions.AfterIfMacros ||
4719 spaceRequiredBeforeParens(Right);
4720 }
4721 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Custom &&
4722 Left.isOneOf(tok::kw_new, tok::kw_delete) &&
4723 Right.isNot(TT_OverloadedOperatorLParen) &&
4724 !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
4725 return Style.SpaceBeforeParensOptions.AfterPlacementOperator;
4726 }
4727 if (Line.Type == LT_ObjCDecl)
4728 return true;
4729 if (Left.is(tok::semi))
4730 return true;
4731 if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
4732 tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
4733 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
4734 Right.is(TT_ConditionLParen)) {
4735 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4736 spaceRequiredBeforeParens(Right);
4737 }
4738
4739 // TODO add Operator overloading specific Options to
4740 // SpaceBeforeParensOptions
4741 if (Right.is(TT_OverloadedOperatorLParen))
4742 return spaceRequiredBeforeParens(Right);
4743 // Function declaration or definition
4744 if (Line.MightBeFunctionDecl && Right.is(TT_FunctionDeclarationLParen)) {
4745 if (spaceRequiredBeforeParens(Right))
4746 return true;
4747 const auto &Options = Style.SpaceBeforeParensOptions;
4748 return Line.mightBeFunctionDefinition()
4749 ? Options.AfterFunctionDefinitionName
4750 : Options.AfterFunctionDeclarationName;
4751 }
4752 // Lambda
4753 if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
4754 Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
4755 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4756 spaceRequiredBeforeParens(Right);
4757 }
4758 if (!BeforeLeft || !BeforeLeft->isOneOf(tok::period, tok::arrow)) {
4759 if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
4760 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4761 spaceRequiredBeforeParens(Right);
4762 }
4763 if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
4764 return ((!Line.MightBeFunctionDecl || !BeforeLeft) &&
4765 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4766 spaceRequiredBeforeParens(Right);
4767 }
4768
4769 if (Left.is(tok::r_square) && Left.MatchingParen &&
4770 Left.MatchingParen->Previous &&
4771 Left.MatchingParen->Previous->is(tok::kw_delete)) {
4772 return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4773 spaceRequiredBeforeParens(Right);
4774 }
4775 }
4776 // Handle builtins like identifiers.
4777 if (Line.Type != LT_PreprocessorDirective &&
4778 (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
4779 return spaceRequiredBeforeParens(Right);
4780 }
4781 return false;
4782 }
4783 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
4784 return false;
4785 if (Right.is(TT_UnaryOperator)) {
4786 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
4787 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
4788 }
4789 // No space between the variable name and the initializer list.
4790 // A a1{1};
4791 // Verilog doesn't have such syntax, but it has word operators that are C++
4792 // identifiers like `a inside {b, c}`. So the rule is not applicable.
4793 if (!Style.isVerilog() &&
4794 (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
4795 tok::r_paren) ||
4796 Left.isTypeName(LangOpts)) &&
4797 Right.is(tok::l_brace) && Right.getNextNonComment() &&
4798 Right.isNot(BK_Block)) {
4799 return false;
4800 }
4801 if (Left.is(tok::period) || Right.is(tok::period))
4802 return false;
4803 // u#str, U#str, L#str, u8#str
4804 // uR#str, UR#str, LR#str, u8R#str
4805 if (Right.is(tok::hash) && Left.is(tok::identifier) &&
4806 (Left.TokenText == "L" || Left.TokenText == "u" ||
4807 Left.TokenText == "U" || Left.TokenText == "u8" ||
4808 Left.TokenText == "LR" || Left.TokenText == "uR" ||
4809 Left.TokenText == "UR" || Left.TokenText == "u8R")) {
4810 return false;
4811 }
4812 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
4813 Left.MatchingParen->Previous &&
4814 (Left.MatchingParen->Previous->is(tok::period) ||
4815 Left.MatchingParen->Previous->is(tok::coloncolon))) {
4816 // Java call to generic function with explicit type:
4817 // A.<B<C<...>>>DoSomething();
4818 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
4819 return false;
4820 }
4821 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
4822 return false;
4823 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
4824 // Objective-C dictionary literal -> no space after opening brace.
4825 return false;
4826 }
4827 if (Right.is(tok::r_brace) && Right.MatchingParen &&
4828 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
4829 // Objective-C dictionary literal -> no space before closing brace.
4830 return false;
4831 }
4832 if (Right.is(TT_TrailingAnnotation) && Right.isOneOf(tok::amp, tok::ampamp) &&
4833 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
4834 (!Right.Next || Right.Next->is(tok::semi))) {
4835 // Match const and volatile ref-qualifiers without any additional
4836 // qualifiers such as
4837 // void Fn() const &;
4838 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4839 }
4840
4841 return true;
4842 }
4843
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Right) const4844 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
4845 const FormatToken &Right) const {
4846 const FormatToken &Left = *Right.Previous;
4847
4848 // If the token is finalized don't touch it (as it could be in a
4849 // clang-format-off section).
4850 if (Left.Finalized)
4851 return Right.hasWhitespaceBefore();
4852
4853 const bool IsVerilog = Style.isVerilog();
4854 assert(!IsVerilog || !IsCpp);
4855
4856 // Never ever merge two words.
4857 if (Keywords.isWordLike(Right, IsVerilog) &&
4858 Keywords.isWordLike(Left, IsVerilog)) {
4859 return true;
4860 }
4861
4862 // Leave a space between * and /* to avoid C4138 `comment end` found outside
4863 // of comment.
4864 if (Left.is(tok::star) && Right.is(tok::comment))
4865 return true;
4866
4867 if (IsCpp) {
4868 if (Left.is(TT_OverloadedOperator) &&
4869 Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) {
4870 return true;
4871 }
4872 // Space between UDL and dot: auto b = 4s .count();
4873 if (Right.is(tok::period) && Left.is(tok::numeric_constant))
4874 return true;
4875 // Space between import <iostream>.
4876 // or import .....;
4877 if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
4878 return true;
4879 // Space between `module :` and `import :`.
4880 if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
4881 Right.is(TT_ModulePartitionColon)) {
4882 return true;
4883 }
4884 // No space between import foo:bar but keep a space between import :bar;
4885 if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
4886 return false;
4887 // No space between :bar;
4888 if (Left.is(TT_ModulePartitionColon) &&
4889 Right.isOneOf(tok::identifier, tok::kw_private)) {
4890 return false;
4891 }
4892 if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
4893 Line.First->is(Keywords.kw_import)) {
4894 return false;
4895 }
4896 // Space in __attribute__((attr)) ::type.
4897 if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
4898 Right.is(tok::coloncolon)) {
4899 return true;
4900 }
4901
4902 if (Left.is(tok::kw_operator))
4903 return Right.is(tok::coloncolon);
4904 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
4905 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
4906 return true;
4907 }
4908 if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
4909 Right.is(TT_TemplateOpener)) {
4910 return true;
4911 }
4912 // C++ Core Guidelines suppression tag, e.g. `[[suppress(type.5)]]`.
4913 if (Left.is(tok::identifier) && Right.is(tok::numeric_constant))
4914 return Right.TokenText[0] != '.';
4915 // `Left` is a keyword (including C++ alternative operator) or identifier.
4916 if (Left.Tok.getIdentifierInfo() && Right.Tok.isLiteral())
4917 return true;
4918 } else if (Style.isProto()) {
4919 if (Right.is(tok::period) &&
4920 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
4921 Keywords.kw_repeated, Keywords.kw_extend)) {
4922 return true;
4923 }
4924 if (Right.is(tok::l_paren) &&
4925 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
4926 return true;
4927 }
4928 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
4929 return true;
4930 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
4931 if (Left.is(tok::slash) || Right.is(tok::slash))
4932 return false;
4933 if (Left.MatchingParen &&
4934 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
4935 Right.isOneOf(tok::l_brace, tok::less)) {
4936 return !Style.Cpp11BracedListStyle;
4937 }
4938 // A percent is probably part of a formatting specification, such as %lld.
4939 if (Left.is(tok::percent))
4940 return false;
4941 // Preserve the existence of a space before a percent for cases like 0x%04x
4942 // and "%d %d"
4943 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
4944 return Right.hasWhitespaceBefore();
4945 } else if (Style.isJson()) {
4946 if (Right.is(tok::colon) && Left.is(tok::string_literal))
4947 return Style.SpaceBeforeJsonColon;
4948 } else if (Style.isCSharp()) {
4949 // Require spaces around '{' and before '}' unless they appear in
4950 // interpolated strings. Interpolated strings are merged into a single token
4951 // so cannot have spaces inserted by this function.
4952
4953 // No space between 'this' and '['
4954 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
4955 return false;
4956
4957 // No space between 'new' and '('
4958 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
4959 return false;
4960
4961 // Space before { (including space within '{ {').
4962 if (Right.is(tok::l_brace))
4963 return true;
4964
4965 // Spaces inside braces.
4966 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
4967 return true;
4968
4969 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
4970 return true;
4971
4972 // Spaces around '=>'.
4973 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
4974 return true;
4975
4976 // No spaces around attribute target colons
4977 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
4978 return false;
4979
4980 // space between type and variable e.g. Dictionary<string,string> foo;
4981 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
4982 return true;
4983
4984 // spaces inside square brackets.
4985 if (Left.is(tok::l_square) || Right.is(tok::r_square))
4986 return Style.SpacesInSquareBrackets;
4987
4988 // No space before ? in nullable types.
4989 if (Right.is(TT_CSharpNullable))
4990 return false;
4991
4992 // No space before null forgiving '!'.
4993 if (Right.is(TT_NonNullAssertion))
4994 return false;
4995
4996 // No space between consecutive commas '[,,]'.
4997 if (Left.is(tok::comma) && Right.is(tok::comma))
4998 return false;
4999
5000 // space after var in `var (key, value)`
5001 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
5002 return true;
5003
5004 // space between keywords and paren e.g. "using ("
5005 if (Right.is(tok::l_paren)) {
5006 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
5007 Keywords.kw_lock)) {
5008 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
5009 spaceRequiredBeforeParens(Right);
5010 }
5011 }
5012
5013 // space between method modifier and opening parenthesis of a tuple return
5014 // type
5015 if ((Left.isAccessSpecifierKeyword() ||
5016 Left.isOneOf(tok::kw_virtual, tok::kw_extern, tok::kw_static,
5017 Keywords.kw_internal, Keywords.kw_abstract,
5018 Keywords.kw_sealed, Keywords.kw_override,
5019 Keywords.kw_async, Keywords.kw_unsafe)) &&
5020 Right.is(tok::l_paren)) {
5021 return true;
5022 }
5023 } else if (Style.isJavaScript()) {
5024 if (Left.is(TT_FatArrow))
5025 return true;
5026 // for await ( ...
5027 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
5028 Left.Previous->is(tok::kw_for)) {
5029 return true;
5030 }
5031 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
5032 Right.MatchingParen) {
5033 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
5034 // An async arrow function, for example: `x = async () => foo();`,
5035 // as opposed to calling a function called async: `x = async();`
5036 if (Next && Next->is(TT_FatArrow))
5037 return true;
5038 }
5039 if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
5040 (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
5041 return false;
5042 }
5043 // In tagged template literals ("html`bar baz`"), there is no space between
5044 // the tag identifier and the template string.
5045 if (Keywords.isJavaScriptIdentifier(Left,
5046 /* AcceptIdentifierName= */ false) &&
5047 Right.is(TT_TemplateString)) {
5048 return false;
5049 }
5050 if (Right.is(tok::star) &&
5051 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
5052 return false;
5053 }
5054 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
5055 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
5056 Keywords.kw_extends, Keywords.kw_implements)) {
5057 return true;
5058 }
5059 if (Right.is(tok::l_paren)) {
5060 // JS methods can use some keywords as names (e.g. `delete()`).
5061 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
5062 return false;
5063 // Valid JS method names can include keywords, e.g. `foo.delete()` or
5064 // `bar.instanceof()`. Recognize call positions by preceding period.
5065 if (Left.Previous && Left.Previous->is(tok::period) &&
5066 Left.Tok.getIdentifierInfo()) {
5067 return false;
5068 }
5069 // Additional unary JavaScript operators that need a space after.
5070 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
5071 tok::kw_void)) {
5072 return true;
5073 }
5074 }
5075 // `foo as const;` casts into a const type.
5076 if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
5077 return false;
5078 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
5079 tok::kw_const) ||
5080 // "of" is only a keyword if it appears after another identifier
5081 // (e.g. as "const x of y" in a for loop), or after a destructuring
5082 // operation (const [x, y] of z, const {a, b} of c).
5083 (Left.is(Keywords.kw_of) && Left.Previous &&
5084 (Left.Previous->is(tok::identifier) ||
5085 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
5086 (!Left.Previous || Left.Previous->isNot(tok::period))) {
5087 return true;
5088 }
5089 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
5090 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
5091 return false;
5092 }
5093 if (Left.is(Keywords.kw_as) &&
5094 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
5095 return true;
5096 }
5097 if (Left.is(tok::kw_default) && Left.Previous &&
5098 Left.Previous->is(tok::kw_export)) {
5099 return true;
5100 }
5101 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
5102 return true;
5103 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
5104 return false;
5105 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
5106 return false;
5107 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
5108 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
5109 return false;
5110 }
5111 if (Left.is(tok::ellipsis))
5112 return false;
5113 if (Left.is(TT_TemplateCloser) &&
5114 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
5115 Keywords.kw_implements, Keywords.kw_extends)) {
5116 // Type assertions ('<type>expr') are not followed by whitespace. Other
5117 // locations that should have whitespace following are identified by the
5118 // above set of follower tokens.
5119 return false;
5120 }
5121 if (Right.is(TT_NonNullAssertion))
5122 return false;
5123 if (Left.is(TT_NonNullAssertion) &&
5124 Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
5125 return true; // "x! as string", "x! in y"
5126 }
5127 } else if (Style.Language == FormatStyle::LK_Java) {
5128 if (Left.is(TT_CaseLabelArrow) || Right.is(TT_CaseLabelArrow))
5129 return true;
5130 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
5131 return true;
5132 // spaces inside square brackets.
5133 if (Left.is(tok::l_square) || Right.is(tok::r_square))
5134 return Style.SpacesInSquareBrackets;
5135
5136 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
5137 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
5138 spaceRequiredBeforeParens(Right);
5139 }
5140 if ((Left.isAccessSpecifierKeyword() ||
5141 Left.isOneOf(tok::kw_static, Keywords.kw_final, Keywords.kw_abstract,
5142 Keywords.kw_native)) &&
5143 Right.is(TT_TemplateOpener)) {
5144 return true;
5145 }
5146 } else if (IsVerilog) {
5147 // An escaped identifier ends with whitespace.
5148 if (Left.is(tok::identifier) && Left.TokenText[0] == '\\')
5149 return true;
5150 // Add space between things in a primitive's state table unless in a
5151 // transition like `(0?)`.
5152 if ((Left.is(TT_VerilogTableItem) &&
5153 !Right.isOneOf(tok::r_paren, tok::semi)) ||
5154 (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
5155 const FormatToken *Next = Right.getNextNonComment();
5156 return !(Next && Next->is(tok::r_paren));
5157 }
5158 // Don't add space within a delay like `#0`.
5159 if (Left.isNot(TT_BinaryOperator) &&
5160 Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
5161 return false;
5162 }
5163 // Add space after a delay.
5164 if (Right.isNot(tok::semi) &&
5165 (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
5166 Left.endsSequence(tok::numeric_constant,
5167 Keywords.kw_verilogHashHash) ||
5168 (Left.is(tok::r_paren) && Left.MatchingParen &&
5169 Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
5170 return true;
5171 }
5172 // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
5173 // literal like `'{}`.
5174 if (Left.is(Keywords.kw_apostrophe) ||
5175 (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
5176 return false;
5177 }
5178 // Add spaces around the implication operator `->`.
5179 if (Left.is(tok::arrow) || Right.is(tok::arrow))
5180 return true;
5181 // Don't add spaces between two at signs. Like in a coverage event.
5182 // Don't add spaces between at and a sensitivity list like
5183 // `@(posedge clk)`.
5184 if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at))
5185 return false;
5186 // Add space between the type name and dimension like `logic [1:0]`.
5187 if (Right.is(tok::l_square) &&
5188 Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
5189 return true;
5190 }
5191 // In a tagged union expression, there should be a space after the tag.
5192 if (Right.isOneOf(tok::period, Keywords.kw_apostrophe) &&
5193 Keywords.isVerilogIdentifier(Left) && Left.getPreviousNonComment() &&
5194 Left.getPreviousNonComment()->is(Keywords.kw_tagged)) {
5195 return true;
5196 }
5197 // Don't add spaces between a casting type and the quote or repetition count
5198 // and the brace. The case of tagged union expressions is handled by the
5199 // previous rule.
5200 if ((Right.is(Keywords.kw_apostrophe) ||
5201 (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
5202 !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
5203 Keywords.isVerilogWordOperator(Left)) &&
5204 (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
5205 tok::numeric_constant) ||
5206 Keywords.isWordLike(Left))) {
5207 return false;
5208 }
5209 // Don't add spaces in imports like `import foo::*;`.
5210 if ((Right.is(tok::star) && Left.is(tok::coloncolon)) ||
5211 (Left.is(tok::star) && Right.is(tok::semi))) {
5212 return false;
5213 }
5214 // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
5215 if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
5216 return true;
5217 // Add space before drive strength like in `wire (strong1, pull0)`.
5218 if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength))
5219 return true;
5220 // Don't add space in a streaming concatenation like `{>>{j}}`.
5221 if ((Left.is(tok::l_brace) &&
5222 Right.isOneOf(tok::lessless, tok::greatergreater)) ||
5223 (Left.endsSequence(tok::lessless, tok::l_brace) ||
5224 Left.endsSequence(tok::greatergreater, tok::l_brace))) {
5225 return false;
5226 }
5227 } else if (Style.isTableGen()) {
5228 // Avoid to connect [ and {. [{ is start token of multiline string.
5229 if (Left.is(tok::l_square) && Right.is(tok::l_brace))
5230 return true;
5231 if (Left.is(tok::r_brace) && Right.is(tok::r_square))
5232 return true;
5233 // Do not insert around colon in DAGArg and cond operator.
5234 if (Right.isOneOf(TT_TableGenDAGArgListColon,
5235 TT_TableGenDAGArgListColonToAlign) ||
5236 Left.isOneOf(TT_TableGenDAGArgListColon,
5237 TT_TableGenDAGArgListColonToAlign)) {
5238 return false;
5239 }
5240 if (Right.is(TT_TableGenCondOperatorColon))
5241 return false;
5242 if (Left.isOneOf(TT_TableGenDAGArgOperatorID,
5243 TT_TableGenDAGArgOperatorToBreak) &&
5244 Right.isNot(TT_TableGenDAGArgCloser)) {
5245 return true;
5246 }
5247 // Do not insert bang operators and consequent openers.
5248 if (Right.isOneOf(tok::l_paren, tok::less) &&
5249 Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
5250 return false;
5251 }
5252 // Trailing paste requires space before '{' or ':', the case in name values.
5253 // Not before ';', the case in normal values.
5254 if (Left.is(TT_TableGenTrailingPasteOperator) &&
5255 Right.isOneOf(tok::l_brace, tok::colon)) {
5256 return true;
5257 }
5258 // Otherwise paste operator does not prefer space around.
5259 if (Left.is(tok::hash) || Right.is(tok::hash))
5260 return false;
5261 // Sure not to connect after defining keywords.
5262 if (Keywords.isTableGenDefinition(Left))
5263 return true;
5264 }
5265
5266 if (Left.is(TT_ImplicitStringLiteral))
5267 return Right.hasWhitespaceBefore();
5268 if (Line.Type == LT_ObjCMethodDecl) {
5269 if (Left.is(TT_ObjCMethodSpecifier))
5270 return true;
5271 if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) &&
5272 canBeObjCSelectorComponent(Right)) {
5273 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
5274 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
5275 // method declaration.
5276 return false;
5277 }
5278 }
5279 if (Line.Type == LT_ObjCProperty &&
5280 (Right.is(tok::equal) || Left.is(tok::equal))) {
5281 return false;
5282 }
5283
5284 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
5285 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
5286 return true;
5287 }
5288 if (Left.is(tok::comma) && Right.isNot(TT_OverloadedOperatorLParen) &&
5289 // In an unexpanded macro call we only find the parentheses and commas
5290 // in a line; the commas and closing parenthesis do not require a space.
5291 (Left.Children.empty() || !Left.MacroParent)) {
5292 return true;
5293 }
5294 if (Right.is(tok::comma))
5295 return false;
5296 if (Right.is(TT_ObjCBlockLParen))
5297 return true;
5298 if (Right.is(TT_CtorInitializerColon))
5299 return Style.SpaceBeforeCtorInitializerColon;
5300 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
5301 return false;
5302 if (Right.is(TT_RangeBasedForLoopColon) &&
5303 !Style.SpaceBeforeRangeBasedForLoopColon) {
5304 return false;
5305 }
5306 if (Left.is(TT_BitFieldColon)) {
5307 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
5308 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
5309 }
5310 if (Right.is(tok::colon)) {
5311 if (Right.is(TT_CaseLabelColon))
5312 return Style.SpaceBeforeCaseColon;
5313 if (Right.is(TT_GotoLabelColon))
5314 return false;
5315 // `private:` and `public:`.
5316 if (!Right.getNextNonComment())
5317 return false;
5318 if (Right.is(TT_ObjCMethodExpr))
5319 return false;
5320 if (Left.is(tok::question))
5321 return false;
5322 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
5323 return false;
5324 if (Right.is(TT_DictLiteral))
5325 return Style.SpacesInContainerLiterals;
5326 if (Right.is(TT_AttributeColon))
5327 return false;
5328 if (Right.is(TT_CSharpNamedArgumentColon))
5329 return false;
5330 if (Right.is(TT_GenericSelectionColon))
5331 return false;
5332 if (Right.is(TT_BitFieldColon)) {
5333 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
5334 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
5335 }
5336 return true;
5337 }
5338 // Do not merge "- -" into "--".
5339 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
5340 Right.isOneOf(tok::minus, tok::minusminus)) ||
5341 (Left.isOneOf(tok::plus, tok::plusplus) &&
5342 Right.isOneOf(tok::plus, tok::plusplus))) {
5343 return true;
5344 }
5345 if (Left.is(TT_UnaryOperator)) {
5346 // Lambda captures allow for a lone &, so "&]" needs to be properly
5347 // handled.
5348 if (Left.is(tok::amp) && Right.is(tok::r_square))
5349 return Style.SpacesInSquareBrackets;
5350 return Style.SpaceAfterLogicalNot && Left.is(tok::exclaim);
5351 }
5352
5353 // If the next token is a binary operator or a selector name, we have
5354 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
5355 if (Left.is(TT_CastRParen)) {
5356 return Style.SpaceAfterCStyleCast ||
5357 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
5358 }
5359
5360 auto ShouldAddSpacesInAngles = [this, &Right]() {
5361 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
5362 return true;
5363 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
5364 return Right.hasWhitespaceBefore();
5365 return false;
5366 };
5367
5368 if (Left.is(tok::greater) && Right.is(tok::greater)) {
5369 if (Style.Language == FormatStyle::LK_TextProto ||
5370 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
5371 return !Style.Cpp11BracedListStyle;
5372 }
5373 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
5374 ((Style.Standard < FormatStyle::LS_Cpp11) ||
5375 ShouldAddSpacesInAngles());
5376 }
5377 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
5378 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
5379 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
5380 return false;
5381 }
5382 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
5383 Right.getPrecedence() == prec::Assignment) {
5384 return false;
5385 }
5386 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
5387 (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
5388 return false;
5389 }
5390 if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
5391 // Generally don't remove existing spaces between an identifier and "::".
5392 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
5393 // this turns out to be too lenient, add analysis of the identifier itself.
5394 return Right.hasWhitespaceBefore();
5395 }
5396 if (Right.is(tok::coloncolon) &&
5397 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
5398 // Put a space between < and :: in vector< ::std::string >
5399 return (Left.is(TT_TemplateOpener) &&
5400 ((Style.Standard < FormatStyle::LS_Cpp11) ||
5401 ShouldAddSpacesInAngles())) ||
5402 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
5403 tok::kw___super, TT_TemplateOpener,
5404 TT_TemplateCloser)) ||
5405 (Left.is(tok::l_paren) && Style.SpacesInParensOptions.Other);
5406 }
5407 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
5408 return ShouldAddSpacesInAngles();
5409 // Space before TT_StructuredBindingLSquare.
5410 if (Right.is(TT_StructuredBindingLSquare)) {
5411 return !Left.isOneOf(tok::amp, tok::ampamp) ||
5412 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
5413 }
5414 // Space before & or && following a TT_StructuredBindingLSquare.
5415 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
5416 Right.isOneOf(tok::amp, tok::ampamp)) {
5417 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
5418 }
5419 if ((Right.is(TT_BinaryOperator) && Left.isNot(tok::l_paren)) ||
5420 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
5421 Right.isNot(tok::r_paren))) {
5422 return true;
5423 }
5424 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
5425 Left.MatchingParen &&
5426 Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
5427 return false;
5428 }
5429 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
5430 Line.Type == LT_ImportStatement) {
5431 return true;
5432 }
5433 if (Right.is(TT_TrailingUnaryOperator))
5434 return false;
5435 if (Left.is(TT_RegexLiteral))
5436 return false;
5437 return spaceRequiredBetween(Line, Left, Right);
5438 }
5439
5440 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)5441 static bool isAllmanBrace(const FormatToken &Tok) {
5442 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
5443 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
5444 }
5445
5446 // Returns 'true' if 'Tok' is a function argument.
IsFunctionArgument(const FormatToken & Tok)5447 static bool IsFunctionArgument(const FormatToken &Tok) {
5448 return Tok.MatchingParen && Tok.MatchingParen->Next &&
5449 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
5450 }
5451
5452 static bool
isItAnEmptyLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)5453 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
5454 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
5455 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
5456 }
5457
isAllmanLambdaBrace(const FormatToken & Tok)5458 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
5459 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
5460 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
5461 }
5462
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const5463 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
5464 const FormatToken &Right) const {
5465 const FormatToken &Left = *Right.Previous;
5466 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
5467 return true;
5468
5469 if (Style.BreakFunctionDefinitionParameters && Line.MightBeFunctionDecl &&
5470 Line.mightBeFunctionDefinition() && Left.MightBeFunctionDeclParen &&
5471 Left.ParameterCount > 0) {
5472 return true;
5473 }
5474
5475 const auto *BeforeLeft = Left.Previous;
5476 const auto *AfterRight = Right.Next;
5477
5478 if (Style.isCSharp()) {
5479 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
5480 Style.BraceWrapping.AfterFunction) {
5481 return true;
5482 }
5483 if (Right.is(TT_CSharpNamedArgumentColon) ||
5484 Left.is(TT_CSharpNamedArgumentColon)) {
5485 return false;
5486 }
5487 if (Right.is(TT_CSharpGenericTypeConstraint))
5488 return true;
5489 if (AfterRight && AfterRight->is(TT_FatArrow) &&
5490 (Right.is(tok::numeric_constant) ||
5491 (Right.is(tok::identifier) && Right.TokenText == "_"))) {
5492 return true;
5493 }
5494
5495 // Break after C# [...] and before public/protected/private/internal.
5496 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
5497 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
5498 Right.is(Keywords.kw_internal))) {
5499 return true;
5500 }
5501 // Break between ] and [ but only when there are really 2 attributes.
5502 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
5503 Left.is(tok::r_square) && Right.is(tok::l_square)) {
5504 return true;
5505 }
5506 } else if (Style.isJavaScript()) {
5507 // FIXME: This might apply to other languages and token kinds.
5508 if (Right.is(tok::string_literal) && Left.is(tok::plus) && BeforeLeft &&
5509 BeforeLeft->is(tok::string_literal)) {
5510 return true;
5511 }
5512 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
5513 BeforeLeft && BeforeLeft->is(tok::equal) &&
5514 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
5515 tok::kw_const) &&
5516 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
5517 // above.
5518 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
5519 // Object literals on the top level of a file are treated as "enum-style".
5520 // Each key/value pair is put on a separate line, instead of bin-packing.
5521 return true;
5522 }
5523 if (Left.is(tok::l_brace) && Line.Level == 0 &&
5524 (Line.startsWith(tok::kw_enum) ||
5525 Line.startsWith(tok::kw_const, tok::kw_enum) ||
5526 Line.startsWith(tok::kw_export, tok::kw_enum) ||
5527 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
5528 // JavaScript top-level enum key/value pairs are put on separate lines
5529 // instead of bin-packing.
5530 return true;
5531 }
5532 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && BeforeLeft &&
5533 BeforeLeft->is(TT_FatArrow)) {
5534 // JS arrow function (=> {...}).
5535 switch (Style.AllowShortLambdasOnASingleLine) {
5536 case FormatStyle::SLS_All:
5537 return false;
5538 case FormatStyle::SLS_None:
5539 return true;
5540 case FormatStyle::SLS_Empty:
5541 return !Left.Children.empty();
5542 case FormatStyle::SLS_Inline:
5543 // allow one-lining inline (e.g. in function call args) and empty arrow
5544 // functions.
5545 return (Left.NestingLevel == 0 && Line.Level == 0) &&
5546 !Left.Children.empty();
5547 }
5548 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
5549 }
5550
5551 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
5552 !Left.Children.empty()) {
5553 // Support AllowShortFunctionsOnASingleLine for JavaScript.
5554 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
5555 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
5556 (Left.NestingLevel == 0 && Line.Level == 0 &&
5557 Style.AllowShortFunctionsOnASingleLine &
5558 FormatStyle::SFS_InlineOnly);
5559 }
5560 } else if (Style.Language == FormatStyle::LK_Java) {
5561 if (Right.is(tok::plus) && Left.is(tok::string_literal) && AfterRight &&
5562 AfterRight->is(tok::string_literal)) {
5563 return true;
5564 }
5565 } else if (Style.isVerilog()) {
5566 // Break between assignments.
5567 if (Left.is(TT_VerilogAssignComma))
5568 return true;
5569 // Break between ports of different types.
5570 if (Left.is(TT_VerilogTypeComma))
5571 return true;
5572 // Break between ports in a module instantiation and after the parameter
5573 // list.
5574 if (Style.VerilogBreakBetweenInstancePorts &&
5575 (Left.is(TT_VerilogInstancePortComma) ||
5576 (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) &&
5577 Left.MatchingParen &&
5578 Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) {
5579 return true;
5580 }
5581 // Break after labels. In Verilog labels don't have the 'case' keyword, so
5582 // it is hard to identify them in UnwrappedLineParser.
5583 if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
5584 return true;
5585 } else if (Style.BreakAdjacentStringLiterals &&
5586 (IsCpp || Style.isProto() ||
5587 Style.Language == FormatStyle::LK_TableGen)) {
5588 if (Left.isStringLiteral() && Right.isStringLiteral())
5589 return true;
5590 }
5591
5592 // Basic JSON newline processing.
5593 if (Style.isJson()) {
5594 // Always break after a JSON record opener.
5595 // {
5596 // }
5597 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
5598 return true;
5599 // Always break after a JSON array opener based on BreakArrays.
5600 if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
5601 Right.isNot(tok::r_square)) ||
5602 Left.is(tok::comma)) {
5603 if (Right.is(tok::l_brace))
5604 return true;
5605 // scan to the right if an we see an object or an array inside
5606 // then break.
5607 for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
5608 if (Tok->isOneOf(tok::l_brace, tok::l_square))
5609 return true;
5610 if (Tok->isOneOf(tok::r_brace, tok::r_square))
5611 break;
5612 }
5613 return Style.BreakArrays;
5614 }
5615 } else if (Style.isTableGen()) {
5616 // Break the comma in side cond operators.
5617 // !cond(case1:1,
5618 // case2:0);
5619 if (Left.is(TT_TableGenCondOperatorComma))
5620 return true;
5621 if (Left.is(TT_TableGenDAGArgOperatorToBreak) &&
5622 Right.isNot(TT_TableGenDAGArgCloser)) {
5623 return true;
5624 }
5625 if (Left.is(TT_TableGenDAGArgListCommaToBreak))
5626 return true;
5627 if (Right.is(TT_TableGenDAGArgCloser) && Right.MatchingParen &&
5628 Right.MatchingParen->is(TT_TableGenDAGArgOpenerToBreak) &&
5629 &Left != Right.MatchingParen->Next) {
5630 // Check to avoid empty DAGArg such as (ins).
5631 return Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll;
5632 }
5633 }
5634
5635 if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
5636 Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
5637 return true;
5638 }
5639
5640 // If the last token before a '}', ']', or ')' is a comma or a trailing
5641 // comment, the intention is to insert a line break after it in order to make
5642 // shuffling around entries easier. Import statements, especially in
5643 // JavaScript, can be an exception to this rule.
5644 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
5645 const FormatToken *BeforeClosingBrace = nullptr;
5646 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
5647 (Style.isJavaScript() && Left.is(tok::l_paren))) &&
5648 Left.isNot(BK_Block) && Left.MatchingParen) {
5649 BeforeClosingBrace = Left.MatchingParen->Previous;
5650 } else if (Right.MatchingParen &&
5651 (Right.MatchingParen->isOneOf(tok::l_brace,
5652 TT_ArrayInitializerLSquare) ||
5653 (Style.isJavaScript() &&
5654 Right.MatchingParen->is(tok::l_paren)))) {
5655 BeforeClosingBrace = &Left;
5656 }
5657 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
5658 BeforeClosingBrace->isTrailingComment())) {
5659 return true;
5660 }
5661 }
5662
5663 if (Right.is(tok::comment)) {
5664 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
5665 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
5666 }
5667 if (Left.isTrailingComment())
5668 return true;
5669 if (Left.IsUnterminatedLiteral)
5670 return true;
5671
5672 if (BeforeLeft && BeforeLeft->is(tok::lessless) &&
5673 Left.is(tok::string_literal) && Right.is(tok::lessless) && AfterRight &&
5674 AfterRight->is(tok::string_literal)) {
5675 return Right.NewlinesBefore > 0;
5676 }
5677
5678 if (Right.is(TT_RequiresClause)) {
5679 switch (Style.RequiresClausePosition) {
5680 case FormatStyle::RCPS_OwnLine:
5681 case FormatStyle::RCPS_WithFollowing:
5682 return true;
5683 default:
5684 break;
5685 }
5686 }
5687 // Can break after template<> declaration
5688 if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
5689 Left.MatchingParen->NestingLevel == 0) {
5690 // Put concepts on the next line e.g.
5691 // template<typename T>
5692 // concept ...
5693 if (Right.is(tok::kw_concept))
5694 return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
5695 return Style.BreakTemplateDeclarations == FormatStyle::BTDS_Yes ||
5696 (Style.BreakTemplateDeclarations == FormatStyle::BTDS_Leave &&
5697 Right.NewlinesBefore > 0);
5698 }
5699 if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
5700 switch (Style.RequiresClausePosition) {
5701 case FormatStyle::RCPS_OwnLine:
5702 case FormatStyle::RCPS_WithPreceding:
5703 return true;
5704 default:
5705 break;
5706 }
5707 }
5708 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
5709 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
5710 (Left.is(TT_CtorInitializerComma) ||
5711 Right.is(TT_CtorInitializerColon))) {
5712 return true;
5713 }
5714
5715 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5716 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
5717 return true;
5718 }
5719 }
5720 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
5721 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
5722 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
5723 return true;
5724 }
5725 if (Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly) {
5726 if ((Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon ||
5727 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) &&
5728 Right.is(TT_CtorInitializerColon)) {
5729 return true;
5730 }
5731
5732 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5733 Left.is(TT_CtorInitializerColon)) {
5734 return true;
5735 }
5736 }
5737 // Break only if we have multiple inheritance.
5738 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
5739 Right.is(TT_InheritanceComma)) {
5740 return true;
5741 }
5742 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
5743 Left.is(TT_InheritanceComma)) {
5744 return true;
5745 }
5746 if (Right.is(tok::string_literal) && Right.TokenText.starts_with("R\"")) {
5747 // Multiline raw string literals are special wrt. line breaks. The author
5748 // has made a deliberate choice and might have aligned the contents of the
5749 // string literal accordingly. Thus, we try keep existing line breaks.
5750 return Right.IsMultiline && Right.NewlinesBefore > 0;
5751 }
5752 if ((Left.is(tok::l_brace) ||
5753 (Left.is(tok::less) && BeforeLeft && BeforeLeft->is(tok::equal))) &&
5754 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
5755 // Don't put enums or option definitions onto single lines in protocol
5756 // buffers.
5757 return true;
5758 }
5759 if (Right.is(TT_InlineASMBrace))
5760 return Right.HasUnescapedNewline;
5761
5762 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
5763 auto *FirstNonComment = Line.getFirstNonComment();
5764 bool AccessSpecifier =
5765 FirstNonComment && (FirstNonComment->is(Keywords.kw_internal) ||
5766 FirstNonComment->isAccessSpecifierKeyword());
5767
5768 if (Style.BraceWrapping.AfterEnum) {
5769 if (Line.startsWith(tok::kw_enum) ||
5770 Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
5771 return true;
5772 }
5773 // Ensure BraceWrapping for `public enum A {`.
5774 if (AccessSpecifier && FirstNonComment->Next &&
5775 FirstNonComment->Next->is(tok::kw_enum)) {
5776 return true;
5777 }
5778 }
5779
5780 // Ensure BraceWrapping for `public interface A {`.
5781 if (Style.BraceWrapping.AfterClass &&
5782 ((AccessSpecifier && FirstNonComment->Next &&
5783 FirstNonComment->Next->is(Keywords.kw_interface)) ||
5784 Line.startsWith(Keywords.kw_interface))) {
5785 return true;
5786 }
5787
5788 // Don't attempt to interpret struct return types as structs.
5789 if (Right.isNot(TT_FunctionLBrace)) {
5790 return (Line.startsWith(tok::kw_class) &&
5791 Style.BraceWrapping.AfterClass) ||
5792 (Line.startsWith(tok::kw_struct) &&
5793 Style.BraceWrapping.AfterStruct);
5794 }
5795 }
5796
5797 if (Left.is(TT_ObjCBlockLBrace) &&
5798 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
5799 return true;
5800 }
5801
5802 // Ensure wrapping after __attribute__((XX)) and @interface etc.
5803 if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
5804 Right.is(TT_ObjCDecl)) {
5805 return true;
5806 }
5807
5808 if (Left.is(TT_LambdaLBrace)) {
5809 if (IsFunctionArgument(Left) &&
5810 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
5811 return false;
5812 }
5813
5814 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
5815 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
5816 (!Left.Children.empty() &&
5817 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
5818 return true;
5819 }
5820 }
5821
5822 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
5823 (Left.isPointerOrReference() || Left.is(TT_TemplateCloser))) {
5824 return true;
5825 }
5826
5827 // Put multiple Java annotation on a new line.
5828 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
5829 Left.is(TT_LeadingJavaAnnotation) &&
5830 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
5831 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
5832 return true;
5833 }
5834
5835 if (Right.is(TT_ProtoExtensionLSquare))
5836 return true;
5837
5838 // In text proto instances if a submessage contains at least 2 entries and at
5839 // least one of them is a submessage, like A { ... B { ... } ... },
5840 // put all of the entries of A on separate lines by forcing the selector of
5841 // the submessage B to be put on a newline.
5842 //
5843 // Example: these can stay on one line:
5844 // a { scalar_1: 1 scalar_2: 2 }
5845 // a { b { key: value } }
5846 //
5847 // and these entries need to be on a new line even if putting them all in one
5848 // line is under the column limit:
5849 // a {
5850 // scalar: 1
5851 // b { key: value }
5852 // }
5853 //
5854 // We enforce this by breaking before a submessage field that has previous
5855 // siblings, *and* breaking before a field that follows a submessage field.
5856 //
5857 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
5858 // the TT_SelectorName there, but we don't want to break inside the brackets.
5859 //
5860 // Another edge case is @submessage { key: value }, which is a common
5861 // substitution placeholder. In this case we want to keep `@` and `submessage`
5862 // together.
5863 //
5864 // We ensure elsewhere that extensions are always on their own line.
5865 if (Style.isProto() && Right.is(TT_SelectorName) &&
5866 Right.isNot(tok::r_square) && AfterRight) {
5867 // Keep `@submessage` together in:
5868 // @submessage { key: value }
5869 if (Left.is(tok::at))
5870 return false;
5871 // Look for the scope opener after selector in cases like:
5872 // selector { ...
5873 // selector: { ...
5874 // selector: @base { ...
5875 const auto *LBrace = AfterRight;
5876 if (LBrace && LBrace->is(tok::colon)) {
5877 LBrace = LBrace->Next;
5878 if (LBrace && LBrace->is(tok::at)) {
5879 LBrace = LBrace->Next;
5880 if (LBrace)
5881 LBrace = LBrace->Next;
5882 }
5883 }
5884 if (LBrace &&
5885 // The scope opener is one of {, [, <:
5886 // selector { ... }
5887 // selector [ ... ]
5888 // selector < ... >
5889 //
5890 // In case of selector { ... }, the l_brace is TT_DictLiteral.
5891 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
5892 // so we check for immediately following r_brace.
5893 ((LBrace->is(tok::l_brace) &&
5894 (LBrace->is(TT_DictLiteral) ||
5895 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
5896 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
5897 // If Left.ParameterCount is 0, then this submessage entry is not the
5898 // first in its parent submessage, and we want to break before this entry.
5899 // If Left.ParameterCount is greater than 0, then its parent submessage
5900 // might contain 1 or more entries and we want to break before this entry
5901 // if it contains at least 2 entries. We deal with this case later by
5902 // detecting and breaking before the next entry in the parent submessage.
5903 if (Left.ParameterCount == 0)
5904 return true;
5905 // However, if this submessage is the first entry in its parent
5906 // submessage, Left.ParameterCount might be 1 in some cases.
5907 // We deal with this case later by detecting an entry
5908 // following a closing paren of this submessage.
5909 }
5910
5911 // If this is an entry immediately following a submessage, it will be
5912 // preceded by a closing paren of that submessage, like in:
5913 // left---. .---right
5914 // v v
5915 // sub: { ... } key: value
5916 // If there was a comment between `}` an `key` above, then `key` would be
5917 // put on a new line anyways.
5918 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
5919 return true;
5920 }
5921
5922 return false;
5923 }
5924
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const5925 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
5926 const FormatToken &Right) const {
5927 const FormatToken &Left = *Right.Previous;
5928 // Language-specific stuff.
5929 if (Style.isCSharp()) {
5930 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
5931 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
5932 return false;
5933 }
5934 // Only break after commas for generic type constraints.
5935 if (Line.First->is(TT_CSharpGenericTypeConstraint))
5936 return Left.is(TT_CSharpGenericTypeConstraintComma);
5937 // Keep nullable operators attached to their identifiers.
5938 if (Right.is(TT_CSharpNullable))
5939 return false;
5940 } else if (Style.Language == FormatStyle::LK_Java) {
5941 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5942 Keywords.kw_implements)) {
5943 return false;
5944 }
5945 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5946 Keywords.kw_implements)) {
5947 return true;
5948 }
5949 } else if (Style.isJavaScript()) {
5950 const FormatToken *NonComment = Right.getPreviousNonComment();
5951 if (NonComment &&
5952 (NonComment->isAccessSpecifierKeyword() ||
5953 NonComment->isOneOf(
5954 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
5955 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
5956 tok::kw_static, Keywords.kw_readonly, Keywords.kw_override,
5957 Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set,
5958 Keywords.kw_async, Keywords.kw_await))) {
5959 return false; // Otherwise automatic semicolon insertion would trigger.
5960 }
5961 if (Right.NestingLevel == 0 &&
5962 (Left.Tok.getIdentifierInfo() ||
5963 Left.isOneOf(tok::r_square, tok::r_paren)) &&
5964 Right.isOneOf(tok::l_square, tok::l_paren)) {
5965 return false; // Otherwise automatic semicolon insertion would trigger.
5966 }
5967 if (NonComment && NonComment->is(tok::identifier) &&
5968 NonComment->TokenText == "asserts") {
5969 return false;
5970 }
5971 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
5972 return false;
5973 if (Left.is(TT_JsTypeColon))
5974 return true;
5975 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
5976 if (Left.is(tok::exclaim) && Right.is(tok::colon))
5977 return false;
5978 // Look for is type annotations like:
5979 // function f(): a is B { ... }
5980 // Do not break before is in these cases.
5981 if (Right.is(Keywords.kw_is)) {
5982 const FormatToken *Next = Right.getNextNonComment();
5983 // If `is` is followed by a colon, it's likely that it's a dict key, so
5984 // ignore it for this check.
5985 // For example this is common in Polymer:
5986 // Polymer({
5987 // is: 'name',
5988 // ...
5989 // });
5990 if (!Next || Next->isNot(tok::colon))
5991 return false;
5992 }
5993 if (Left.is(Keywords.kw_in))
5994 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
5995 if (Right.is(Keywords.kw_in))
5996 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5997 if (Right.is(Keywords.kw_as))
5998 return false; // must not break before as in 'x as type' casts
5999 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
6000 // extends and infer can appear as keywords in conditional types:
6001 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
6002 // do not break before them, as the expressions are subject to ASI.
6003 return false;
6004 }
6005 if (Left.is(Keywords.kw_as))
6006 return true;
6007 if (Left.is(TT_NonNullAssertion))
6008 return true;
6009 if (Left.is(Keywords.kw_declare) &&
6010 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
6011 Keywords.kw_function, tok::kw_class, tok::kw_enum,
6012 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
6013 Keywords.kw_let, tok::kw_const)) {
6014 // See grammar for 'declare' statements at:
6015 // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
6016 return false;
6017 }
6018 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
6019 Right.isOneOf(tok::identifier, tok::string_literal)) {
6020 return false; // must not break in "module foo { ...}"
6021 }
6022 if (Right.is(TT_TemplateString) && Right.closesScope())
6023 return false;
6024 // Don't split tagged template literal so there is a break between the tag
6025 // identifier and template string.
6026 if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
6027 return false;
6028 if (Left.is(TT_TemplateString) && Left.opensScope())
6029 return true;
6030 } else if (Style.isTableGen()) {
6031 // Avoid to break after "def", "class", "let" and so on.
6032 if (Keywords.isTableGenDefinition(Left))
6033 return false;
6034 // Avoid to break after '(' in the cases that is in bang operators.
6035 if (Right.is(tok::l_paren)) {
6036 return !Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator,
6037 TT_TemplateCloser);
6038 }
6039 // Avoid to break between the value and its suffix part.
6040 if (Left.is(TT_TableGenValueSuffix))
6041 return false;
6042 // Avoid to break around paste operator.
6043 if (Left.is(tok::hash) || Right.is(tok::hash))
6044 return false;
6045 if (Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator))
6046 return false;
6047 }
6048
6049 if (Left.is(tok::at))
6050 return false;
6051 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
6052 return false;
6053 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
6054 return Right.isNot(tok::l_paren);
6055 if (Right.is(TT_PointerOrReference)) {
6056 return Line.IsMultiVariableDeclStmt ||
6057 (getTokenPointerOrReferenceAlignment(Right) ==
6058 FormatStyle::PAS_Right &&
6059 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
6060 }
6061 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
6062 Right.is(tok::kw_operator)) {
6063 return true;
6064 }
6065 if (Left.is(TT_PointerOrReference))
6066 return false;
6067 if (Right.isTrailingComment()) {
6068 // We rely on MustBreakBefore being set correctly here as we should not
6069 // change the "binding" behavior of a comment.
6070 // The first comment in a braced lists is always interpreted as belonging to
6071 // the first list element. Otherwise, it should be placed outside of the
6072 // list.
6073 return Left.is(BK_BracedInit) ||
6074 (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
6075 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
6076 }
6077 if (Left.is(tok::question) && Right.is(tok::colon))
6078 return false;
6079 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
6080 return Style.BreakBeforeTernaryOperators;
6081 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
6082 return !Style.BreakBeforeTernaryOperators;
6083 if (Left.is(TT_InheritanceColon))
6084 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
6085 if (Right.is(TT_InheritanceColon))
6086 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
6087 if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) &&
6088 Left.isNot(TT_SelectorName)) {
6089 return true;
6090 }
6091
6092 if (Right.is(tok::colon) &&
6093 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
6094 return false;
6095 }
6096 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
6097 if (Style.isProto()) {
6098 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
6099 return false;
6100 // Prevent cases like:
6101 //
6102 // submessage:
6103 // { key: valueeeeeeeeeeee }
6104 //
6105 // when the snippet does not fit into one line.
6106 // Prefer:
6107 //
6108 // submessage: {
6109 // key: valueeeeeeeeeeee
6110 // }
6111 //
6112 // instead, even if it is longer by one line.
6113 //
6114 // Note that this allows the "{" to go over the column limit
6115 // when the column limit is just between ":" and "{", but that does
6116 // not happen too often and alternative formattings in this case are
6117 // not much better.
6118 //
6119 // The code covers the cases:
6120 //
6121 // submessage: { ... }
6122 // submessage: < ... >
6123 // repeated: [ ... ]
6124 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
6125 Right.is(TT_DictLiteral)) ||
6126 Right.is(TT_ArrayInitializerLSquare)) {
6127 return false;
6128 }
6129 }
6130 return true;
6131 }
6132 if (Right.is(tok::r_square) && Right.MatchingParen &&
6133 Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
6134 return false;
6135 }
6136 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
6137 Right.Next->is(TT_ObjCMethodExpr))) {
6138 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
6139 }
6140 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
6141 return true;
6142 if (Right.is(tok::kw_concept))
6143 return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
6144 if (Right.is(TT_RequiresClause))
6145 return true;
6146 if (Left.ClosesTemplateDeclaration) {
6147 return Style.BreakTemplateDeclarations != FormatStyle::BTDS_Leave ||
6148 Right.NewlinesBefore > 0;
6149 }
6150 if (Left.is(TT_FunctionAnnotationRParen))
6151 return true;
6152 if (Left.ClosesRequiresClause)
6153 return true;
6154 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
6155 TT_OverloadedOperator)) {
6156 return false;
6157 }
6158 if (Left.is(TT_RangeBasedForLoopColon))
6159 return true;
6160 if (Right.is(TT_RangeBasedForLoopColon))
6161 return false;
6162 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
6163 return true;
6164 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
6165 (Left.is(tok::less) && Right.is(tok::less))) {
6166 return false;
6167 }
6168 if (Right.is(TT_BinaryOperator) &&
6169 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
6170 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
6171 Right.getPrecedence() != prec::Assignment)) {
6172 return true;
6173 }
6174 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
6175 Left.is(tok::kw_operator)) {
6176 return false;
6177 }
6178 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
6179 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
6180 return false;
6181 }
6182 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
6183 !Style.Cpp11BracedListStyle) {
6184 return false;
6185 }
6186 if (Left.is(TT_AttributeLParen) ||
6187 (Left.is(tok::l_paren) && Left.is(TT_TypeDeclarationParen))) {
6188 return false;
6189 }
6190 if (Left.is(tok::l_paren) && Left.Previous &&
6191 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
6192 return false;
6193 }
6194 if (Right.is(TT_ImplicitStringLiteral))
6195 return false;
6196
6197 if (Right.is(TT_TemplateCloser))
6198 return false;
6199 if (Right.is(tok::r_square) && Right.MatchingParen &&
6200 Right.MatchingParen->is(TT_LambdaLSquare)) {
6201 return false;
6202 }
6203
6204 // We only break before r_brace if there was a corresponding break before
6205 // the l_brace, which is tracked by BreakBeforeClosingBrace.
6206 if (Right.is(tok::r_brace)) {
6207 return Right.MatchingParen && (Right.MatchingParen->is(BK_Block) ||
6208 (Right.isBlockIndentedInitRBrace(Style)));
6209 }
6210
6211 // We only break before r_paren if we're in a block indented context.
6212 if (Right.is(tok::r_paren)) {
6213 if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
6214 !Right.MatchingParen) {
6215 return false;
6216 }
6217 auto Next = Right.Next;
6218 if (Next && Next->is(tok::r_paren))
6219 Next = Next->Next;
6220 if (Next && Next->is(tok::l_paren))
6221 return false;
6222 const FormatToken *Previous = Right.MatchingParen->Previous;
6223 return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
6224 }
6225
6226 // Allow breaking after a trailing annotation, e.g. after a method
6227 // declaration.
6228 if (Left.is(TT_TrailingAnnotation)) {
6229 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
6230 tok::less, tok::coloncolon);
6231 }
6232
6233 if (Right.isAttribute())
6234 return true;
6235
6236 if (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))
6237 return Left.isNot(TT_AttributeSquare);
6238
6239 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
6240 return true;
6241
6242 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
6243 return true;
6244
6245 if (Left.is(TT_CtorInitializerColon)) {
6246 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
6247 (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
6248 }
6249 if (Right.is(TT_CtorInitializerColon))
6250 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
6251 if (Left.is(TT_CtorInitializerComma) &&
6252 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
6253 return false;
6254 }
6255 if (Right.is(TT_CtorInitializerComma) &&
6256 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
6257 return true;
6258 }
6259 if (Left.is(TT_InheritanceComma) &&
6260 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
6261 return false;
6262 }
6263 if (Right.is(TT_InheritanceComma) &&
6264 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
6265 return true;
6266 }
6267 if (Left.is(TT_ArrayInitializerLSquare))
6268 return true;
6269 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
6270 return true;
6271 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
6272 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
6273 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
6274 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
6275 Left.getPrecedence() == prec::Assignment)) {
6276 return true;
6277 }
6278 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
6279 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
6280 return false;
6281 }
6282
6283 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
6284 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
6285 if (isAllmanLambdaBrace(Left))
6286 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
6287 if (isAllmanLambdaBrace(Right))
6288 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
6289 }
6290
6291 if (Right.is(tok::kw_noexcept) && Right.is(TT_TrailingAnnotation)) {
6292 switch (Style.AllowBreakBeforeNoexceptSpecifier) {
6293 case FormatStyle::BBNSS_Never:
6294 return false;
6295 case FormatStyle::BBNSS_Always:
6296 return true;
6297 case FormatStyle::BBNSS_OnlyWithParen:
6298 return Right.Next && Right.Next->is(tok::l_paren);
6299 }
6300 }
6301
6302 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
6303 tok::kw_class, tok::kw_struct, tok::comment) ||
6304 Right.isMemberAccess() ||
6305 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
6306 tok::colon, tok::l_square, tok::at) ||
6307 (Left.is(tok::r_paren) &&
6308 Right.isOneOf(tok::identifier, tok::kw_const)) ||
6309 (Left.is(tok::l_paren) && Right.isNot(tok::r_paren)) ||
6310 (Left.is(TT_TemplateOpener) && Right.isNot(TT_TemplateCloser));
6311 }
6312
printDebugInfo(const AnnotatedLine & Line) const6313 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
6314 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
6315 << ", T=" << Line.Type << ", C=" << Line.IsContinuation
6316 << "):\n";
6317 const FormatToken *Tok = Line.First;
6318 while (Tok) {
6319 llvm::errs() << " M=" << Tok->MustBreakBefore
6320 << " C=" << Tok->CanBreakBefore
6321 << " T=" << getTokenTypeName(Tok->getType())
6322 << " S=" << Tok->SpacesRequiredBefore
6323 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
6324 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
6325 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
6326 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
6327 for (prec::Level LParen : Tok->FakeLParens)
6328 llvm::errs() << LParen << "/";
6329 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
6330 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
6331 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
6332 if (!Tok->Next)
6333 assert(Tok == Line.Last);
6334 Tok = Tok->Next;
6335 }
6336 llvm::errs() << "----\n";
6337 }
6338
6339 FormatStyle::PointerAlignmentStyle
getTokenReferenceAlignment(const FormatToken & Reference) const6340 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
6341 assert(Reference.isOneOf(tok::amp, tok::ampamp));
6342 switch (Style.ReferenceAlignment) {
6343 case FormatStyle::RAS_Pointer:
6344 return Style.PointerAlignment;
6345 case FormatStyle::RAS_Left:
6346 return FormatStyle::PAS_Left;
6347 case FormatStyle::RAS_Right:
6348 return FormatStyle::PAS_Right;
6349 case FormatStyle::RAS_Middle:
6350 return FormatStyle::PAS_Middle;
6351 }
6352 assert(0); //"Unhandled value of ReferenceAlignment"
6353 return Style.PointerAlignment;
6354 }
6355
6356 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(const FormatToken & PointerOrReference) const6357 TokenAnnotator::getTokenPointerOrReferenceAlignment(
6358 const FormatToken &PointerOrReference) const {
6359 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
6360 switch (Style.ReferenceAlignment) {
6361 case FormatStyle::RAS_Pointer:
6362 return Style.PointerAlignment;
6363 case FormatStyle::RAS_Left:
6364 return FormatStyle::PAS_Left;
6365 case FormatStyle::RAS_Right:
6366 return FormatStyle::PAS_Right;
6367 case FormatStyle::RAS_Middle:
6368 return FormatStyle::PAS_Middle;
6369 }
6370 }
6371 assert(PointerOrReference.is(tok::star));
6372 return Style.PointerAlignment;
6373 }
6374
6375 } // namespace format
6376 } // namespace clang
6377