1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that 11 /// fixes namespace end comments. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "NamespaceEndCommentsFixer.h" 16 #include "clang/Basic/TokenKinds.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/Regex.h" 19 20 #define DEBUG_TYPE "namespace-end-comments-fixer" 21 22 namespace clang { 23 namespace format { 24 25 namespace { 26 // Iterates all tokens starting from StartTok to EndTok and apply Fn to all 27 // tokens between them including StartTok and EndTok. Returns the token after 28 // EndTok. 29 const FormatToken * 30 processTokens(const FormatToken *Tok, tok::TokenKind StartTok, 31 tok::TokenKind EndTok, 32 llvm::function_ref<void(const FormatToken *)> Fn) { 33 if (!Tok || Tok->isNot(StartTok)) 34 return Tok; 35 int NestLevel = 0; 36 do { 37 if (Tok->is(StartTok)) 38 ++NestLevel; 39 else if (Tok->is(EndTok)) 40 --NestLevel; 41 if (Fn) 42 Fn(Tok); 43 Tok = Tok->getNextNonComment(); 44 } while (Tok && NestLevel > 0); 45 return Tok; 46 } 47 48 const FormatToken *skipAttribute(const FormatToken *Tok) { 49 if (!Tok) 50 return nullptr; 51 if (Tok->isAttribute()) { 52 Tok = Tok->getNextNonComment(); 53 Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr); 54 } else if (Tok->is(tok::l_square)) { 55 Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr); 56 } 57 return Tok; 58 } 59 60 // Computes the name of a namespace given the namespace token. 61 // Returns "" for anonymous namespace. 62 std::string computeName(const FormatToken *NamespaceTok) { 63 assert(NamespaceTok && 64 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 65 "expecting a namespace token"); 66 std::string name; 67 const FormatToken *Tok = NamespaceTok->getNextNonComment(); 68 if (NamespaceTok->is(TT_NamespaceMacro)) { 69 // Collects all the non-comment tokens between opening parenthesis 70 // and closing parenthesis or comma. 71 assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis"); 72 Tok = Tok->getNextNonComment(); 73 while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) { 74 name += Tok->TokenText; 75 Tok = Tok->getNextNonComment(); 76 } 77 return name; 78 } 79 Tok = skipAttribute(Tok); 80 81 std::string FirstNSName; 82 // For `namespace [[foo]] A::B::inline C {` or 83 // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C". 84 // Peek for the first '::' (or '{' or '(')) and then return all tokens from 85 // one token before that up until the '{'. A '(' might be a macro with 86 // arguments. 87 const FormatToken *FirstNSTok = nullptr; 88 while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) { 89 if (FirstNSTok) 90 FirstNSName += FirstNSTok->TokenText; 91 FirstNSTok = Tok; 92 Tok = Tok->getNextNonComment(); 93 } 94 95 if (FirstNSTok) 96 Tok = FirstNSTok; 97 Tok = skipAttribute(Tok); 98 99 FirstNSTok = nullptr; 100 // Add everything from '(' to ')'. 101 auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; }; 102 bool IsPrevColoncolon = false; 103 bool HasColoncolon = false; 104 bool IsPrevInline = false; 105 bool NameFinished = false; 106 // If we found '::' in name, then it's the name. Otherwise, we can't tell 107 // which one is name. For example, `namespace A B {`. 108 while (Tok && Tok->isNot(tok::l_brace)) { 109 if (FirstNSTok) { 110 if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) { 111 if (FirstNSTok->is(tok::l_paren)) { 112 FirstNSTok = Tok = 113 processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken); 114 continue; 115 } 116 if (FirstNSTok->isNot(tok::coloncolon)) { 117 NameFinished = true; 118 break; 119 } 120 } 121 name += FirstNSTok->TokenText; 122 IsPrevColoncolon = FirstNSTok->is(tok::coloncolon); 123 HasColoncolon = HasColoncolon || IsPrevColoncolon; 124 if (FirstNSTok->is(tok::kw_inline)) { 125 name += " "; 126 IsPrevInline = true; 127 } 128 } 129 FirstNSTok = Tok; 130 Tok = Tok->getNextNonComment(); 131 const FormatToken *TokAfterAttr = skipAttribute(Tok); 132 if (TokAfterAttr != Tok) 133 FirstNSTok = Tok = TokAfterAttr; 134 } 135 if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace)) 136 name += FirstNSTok->TokenText; 137 if (FirstNSName.empty() || HasColoncolon) 138 return name; 139 return name.empty() ? FirstNSName : FirstNSName + " " + name; 140 } 141 142 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline, 143 const FormatToken *NamespaceTok, 144 unsigned SpacesToAdd) { 145 std::string text = "//"; 146 text.append(SpacesToAdd, ' '); 147 text += NamespaceTok->TokenText; 148 if (NamespaceTok->is(TT_NamespaceMacro)) 149 text += "("; 150 else if (!NamespaceName.empty()) 151 text += ' '; 152 text += NamespaceName; 153 if (NamespaceTok->is(TT_NamespaceMacro)) 154 text += ")"; 155 if (AddNewline) 156 text += '\n'; 157 return text; 158 } 159 160 bool hasEndComment(const FormatToken *RBraceTok) { 161 return RBraceTok->Next && RBraceTok->Next->is(tok::comment); 162 } 163 164 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName, 165 const FormatToken *NamespaceTok) { 166 assert(hasEndComment(RBraceTok)); 167 const FormatToken *Comment = RBraceTok->Next; 168 169 // Matches a valid namespace end comment. 170 // Valid namespace end comments don't need to be edited. 171 static const llvm::Regex NamespaceCommentPattern = 172 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" 173 "namespace( +([a-zA-Z0-9:_ ]+))?\\.? *(\\*/)?$", 174 llvm::Regex::IgnoreCase); 175 static const llvm::Regex NamespaceMacroCommentPattern = 176 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" 177 "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*|\".+\")\\)\\.? *(\\*/)?$", 178 llvm::Regex::IgnoreCase); 179 180 SmallVector<StringRef, 8> Groups; 181 if (NamespaceTok->is(TT_NamespaceMacro) && 182 NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) { 183 StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : ""; 184 // The name of the macro must be used. 185 if (NamespaceTokenText != NamespaceTok->TokenText) 186 return false; 187 } else if (NamespaceTok->isNot(tok::kw_namespace) || 188 !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) { 189 // Comment does not match regex. 190 return false; 191 } 192 StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5].rtrim() : ""; 193 // Anonymous namespace comments must not mention a namespace name. 194 if (NamespaceName.empty() && !NamespaceNameInComment.empty()) 195 return false; 196 StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; 197 // Named namespace comments must not mention anonymous namespace. 198 if (!NamespaceName.empty() && !AnonymousInComment.empty()) 199 return false; 200 if (NamespaceNameInComment == NamespaceName) 201 return true; 202 203 // Has namespace comment flowed onto the next line. 204 // } // namespace 205 // // verylongnamespacenamethatdidnotfitonthepreviouscommentline 206 if (!(Comment->Next && Comment->Next->is(TT_LineComment))) 207 return false; 208 209 static const llvm::Regex CommentPattern = llvm::Regex( 210 "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase); 211 212 // Pull out just the comment text. 213 if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) 214 return false; 215 NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : ""; 216 217 return NamespaceNameInComment == NamespaceName; 218 } 219 220 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, 221 const SourceManager &SourceMgr, 222 tooling::Replacements *Fixes) { 223 auto EndLoc = RBraceTok->Tok.getEndLoc(); 224 auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); 225 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); 226 if (Err) { 227 llvm::errs() << "Error while adding namespace end comment: " 228 << llvm::toString(std::move(Err)) << "\n"; 229 } 230 } 231 232 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, 233 const SourceManager &SourceMgr, 234 tooling::Replacements *Fixes) { 235 assert(hasEndComment(RBraceTok)); 236 const FormatToken *Comment = RBraceTok->Next; 237 auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), 238 Comment->Tok.getEndLoc()); 239 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); 240 if (Err) { 241 llvm::errs() << "Error while updating namespace end comment: " 242 << llvm::toString(std::move(Err)) << "\n"; 243 } 244 } 245 } // namespace 246 247 const FormatToken * 248 getNamespaceToken(const AnnotatedLine *Line, 249 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 250 if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace)) 251 return nullptr; 252 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; 253 if (StartLineIndex == UnwrappedLine::kInvalidIndex) 254 return nullptr; 255 assert(StartLineIndex < AnnotatedLines.size()); 256 const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; 257 if (NamespaceTok->is(tok::l_brace)) { 258 // "namespace" keyword can be on the line preceding '{', e.g. in styles 259 // where BraceWrapping.AfterNamespace is true. 260 if (StartLineIndex > 0) { 261 NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First; 262 if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi)) 263 return nullptr; 264 } 265 } 266 267 return NamespaceTok->getNamespaceToken(); 268 } 269 270 StringRef 271 getNamespaceTokenText(const AnnotatedLine *Line, 272 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 273 const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines); 274 return NamespaceTok ? NamespaceTok->TokenText : StringRef(); 275 } 276 277 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, 278 const FormatStyle &Style) 279 : TokenAnalyzer(Env, Style) {} 280 281 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( 282 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 283 FormatTokenLexer &Tokens) { 284 const SourceManager &SourceMgr = Env.getSourceManager(); 285 AffectedRangeMgr.computeAffectedLines(AnnotatedLines); 286 tooling::Replacements Fixes; 287 288 // Spin through the lines and ensure we have balanced braces. 289 int Braces = 0; 290 for (AnnotatedLine *Line : AnnotatedLines) { 291 FormatToken *Tok = Line->First; 292 while (Tok) { 293 Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0; 294 Tok = Tok->Next; 295 } 296 } 297 // Don't attempt to comment unbalanced braces or this can 298 // lead to comments being placed on the closing brace which isn't 299 // the matching brace of the namespace. (occurs during incomplete editing). 300 if (Braces != 0) 301 return {Fixes, 0}; 302 303 std::string AllNamespaceNames; 304 size_t StartLineIndex = SIZE_MAX; 305 StringRef NamespaceTokenText; 306 unsigned int CompactedNamespacesCount = 0; 307 for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { 308 const AnnotatedLine *EndLine = AnnotatedLines[I]; 309 const FormatToken *NamespaceTok = 310 getNamespaceToken(EndLine, AnnotatedLines); 311 if (!NamespaceTok) 312 continue; 313 FormatToken *RBraceTok = EndLine->First; 314 if (RBraceTok->Finalized) 315 continue; 316 RBraceTok->Finalized = true; 317 const FormatToken *EndCommentPrevTok = RBraceTok; 318 // Namespaces often end with '};'. In that case, attach namespace end 319 // comments to the semicolon tokens. 320 if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) 321 EndCommentPrevTok = RBraceTok->Next; 322 if (StartLineIndex == SIZE_MAX) 323 StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; 324 std::string NamespaceName = computeName(NamespaceTok); 325 if (Style.CompactNamespaces) { 326 if (CompactedNamespacesCount == 0) 327 NamespaceTokenText = NamespaceTok->TokenText; 328 if ((I + 1 < E) && 329 NamespaceTokenText == 330 getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) && 331 StartLineIndex - CompactedNamespacesCount - 1 == 332 AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex && 333 !AnnotatedLines[I + 1]->First->Finalized) { 334 if (hasEndComment(EndCommentPrevTok)) { 335 // remove end comment, it will be merged in next one 336 updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes); 337 } 338 ++CompactedNamespacesCount; 339 if (!NamespaceName.empty()) 340 AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames; 341 continue; 342 } 343 NamespaceName += AllNamespaceNames; 344 CompactedNamespacesCount = 0; 345 AllNamespaceNames = std::string(); 346 } 347 // The next token in the token stream after the place where the end comment 348 // token must be. This is either the next token on the current line or the 349 // first token on the next line. 350 const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; 351 if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) 352 EndCommentNextTok = EndCommentNextTok->Next; 353 if (!EndCommentNextTok && I + 1 < E) 354 EndCommentNextTok = AnnotatedLines[I + 1]->First; 355 bool AddNewline = EndCommentNextTok && 356 EndCommentNextTok->NewlinesBefore == 0 && 357 EndCommentNextTok->isNot(tok::eof); 358 const std::string EndCommentText = 359 computeEndCommentText(NamespaceName, AddNewline, NamespaceTok, 360 Style.SpacesInLineCommentPrefix.Minimum); 361 if (!hasEndComment(EndCommentPrevTok)) { 362 unsigned LineCount = 0; 363 for (auto J = StartLineIndex + 1; J < I; ++J) 364 LineCount += AnnotatedLines[J]->size(); 365 if (LineCount > Style.ShortNamespaceLines) { 366 addEndComment(EndCommentPrevTok, 367 std::string(Style.SpacesBeforeTrailingComments, ' ') + 368 EndCommentText, 369 SourceMgr, &Fixes); 370 } 371 } else if (!validEndComment(EndCommentPrevTok, NamespaceName, 372 NamespaceTok)) { 373 updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); 374 } 375 StartLineIndex = SIZE_MAX; 376 } 377 return {Fixes, 0}; 378 } 379 380 } // namespace format 381 } // namespace clang 382