10b57cec5SDimitry Andric //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "clang/AST/RawCommentList.h" 100b57cec5SDimitry Andric #include "clang/AST/ASTContext.h" 110b57cec5SDimitry Andric #include "clang/AST/Comment.h" 120b57cec5SDimitry Andric #include "clang/AST/CommentBriefParser.h" 130b57cec5SDimitry Andric #include "clang/AST/CommentCommandTraits.h" 140b57cec5SDimitry Andric #include "clang/AST/CommentLexer.h" 150b57cec5SDimitry Andric #include "clang/AST/CommentParser.h" 160b57cec5SDimitry Andric #include "clang/AST/CommentSema.h" 170b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h" 180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 1981ad6265SDimitry Andric #include "llvm/ADT/StringExtras.h" 205ffd83dbSDimitry Andric #include "llvm/Support/Allocator.h" 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric using namespace clang; 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric namespace { 250b57cec5SDimitry Andric /// Get comment kind and bool describing if it is a trailing comment. 260b57cec5SDimitry Andric std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment, 270b57cec5SDimitry Andric bool ParseAllComments) { 280b57cec5SDimitry Andric const size_t MinCommentLength = ParseAllComments ? 2 : 3; 290b57cec5SDimitry Andric if ((Comment.size() < MinCommentLength) || Comment[0] != '/') 300b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_Invalid, false); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric RawComment::CommentKind K; 330b57cec5SDimitry Andric if (Comment[1] == '/') { 340b57cec5SDimitry Andric if (Comment.size() < 3) 350b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric if (Comment[2] == '/') 380b57cec5SDimitry Andric K = RawComment::RCK_BCPLSlash; 390b57cec5SDimitry Andric else if (Comment[2] == '!') 400b57cec5SDimitry Andric K = RawComment::RCK_BCPLExcl; 410b57cec5SDimitry Andric else 420b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); 430b57cec5SDimitry Andric } else { 440b57cec5SDimitry Andric assert(Comment.size() >= 4); 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric // Comment lexer does not understand escapes in comment markers, so pretend 470b57cec5SDimitry Andric // that this is not a comment. 480b57cec5SDimitry Andric if (Comment[1] != '*' || 490b57cec5SDimitry Andric Comment[Comment.size() - 2] != '*' || 500b57cec5SDimitry Andric Comment[Comment.size() - 1] != '/') 510b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_Invalid, false); 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric if (Comment[2] == '*') 540b57cec5SDimitry Andric K = RawComment::RCK_JavaDoc; 550b57cec5SDimitry Andric else if (Comment[2] == '!') 560b57cec5SDimitry Andric K = RawComment::RCK_Qt; 570b57cec5SDimitry Andric else 580b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_OrdinaryC, false); 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<'); 610b57cec5SDimitry Andric return std::make_pair(K, TrailingComment); 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric bool mergedCommentIsTrailingComment(StringRef Comment) { 650b57cec5SDimitry Andric return (Comment.size() > 3) && (Comment[3] == '<'); 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric /// Returns true if R1 and R2 both have valid locations that start on the same 690b57cec5SDimitry Andric /// column. 700b57cec5SDimitry Andric bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1, 710b57cec5SDimitry Andric const RawComment &R2) { 720b57cec5SDimitry Andric SourceLocation L1 = R1.getBeginLoc(); 730b57cec5SDimitry Andric SourceLocation L2 = R2.getBeginLoc(); 740b57cec5SDimitry Andric bool Invalid = false; 750b57cec5SDimitry Andric unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid); 760b57cec5SDimitry Andric if (!Invalid) { 770b57cec5SDimitry Andric unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid); 780b57cec5SDimitry Andric return !Invalid && (C1 == C2); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric return false; 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric } // unnamed namespace 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric /// Determines whether there is only whitespace in `Buffer` between `P` 850b57cec5SDimitry Andric /// and the previous line. 860b57cec5SDimitry Andric /// \param Buffer The buffer to search in. 870b57cec5SDimitry Andric /// \param P The offset from the beginning of `Buffer` to start from. 880b57cec5SDimitry Andric /// \return true if all of the characters in `Buffer` ranging from the closest 890b57cec5SDimitry Andric /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1` 900b57cec5SDimitry Andric /// are whitespace. 910b57cec5SDimitry Andric static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) { 920b57cec5SDimitry Andric // Search backwards until we see linefeed or carriage return. 930b57cec5SDimitry Andric for (unsigned I = P; I != 0; --I) { 940b57cec5SDimitry Andric char C = Buffer[I - 1]; 950b57cec5SDimitry Andric if (isVerticalWhitespace(C)) 960b57cec5SDimitry Andric return true; 970b57cec5SDimitry Andric if (!isHorizontalWhitespace(C)) 980b57cec5SDimitry Andric return false; 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric // We hit the beginning of the buffer. 1010b57cec5SDimitry Andric return true; 1020b57cec5SDimitry Andric } 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric /// Returns whether `K` is an ordinary comment kind. 1050b57cec5SDimitry Andric static bool isOrdinaryKind(RawComment::CommentKind K) { 1060b57cec5SDimitry Andric return (K == RawComment::RCK_OrdinaryBCPL) || 1070b57cec5SDimitry Andric (K == RawComment::RCK_OrdinaryC); 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, 1110b57cec5SDimitry Andric const CommentOptions &CommentOpts, bool Merged) : 1120b57cec5SDimitry Andric Range(SR), RawTextValid(false), BriefTextValid(false), 1130b57cec5SDimitry Andric IsAttached(false), IsTrailingComment(false), 1140b57cec5SDimitry Andric IsAlmostTrailingComment(false) { 1150b57cec5SDimitry Andric // Extract raw comment text, if possible. 1160b57cec5SDimitry Andric if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { 1170b57cec5SDimitry Andric Kind = RCK_Invalid; 1180b57cec5SDimitry Andric return; 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Guess comment kind. 1220b57cec5SDimitry Andric std::pair<CommentKind, bool> K = 1230b57cec5SDimitry Andric getCommentKind(RawText, CommentOpts.ParseAllComments); 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric // Guess whether an ordinary comment is trailing. 1260b57cec5SDimitry Andric if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) { 1270b57cec5SDimitry Andric FileID BeginFileID; 1280b57cec5SDimitry Andric unsigned BeginOffset; 1290b57cec5SDimitry Andric std::tie(BeginFileID, BeginOffset) = 1300b57cec5SDimitry Andric SourceMgr.getDecomposedLoc(Range.getBegin()); 1310b57cec5SDimitry Andric if (BeginOffset != 0) { 1320b57cec5SDimitry Andric bool Invalid = false; 1330b57cec5SDimitry Andric const char *Buffer = 1340b57cec5SDimitry Andric SourceMgr.getBufferData(BeginFileID, &Invalid).data(); 1350b57cec5SDimitry Andric IsTrailingComment |= 1360b57cec5SDimitry Andric (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset)); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric if (!Merged) { 1410b57cec5SDimitry Andric Kind = K.first; 1420b57cec5SDimitry Andric IsTrailingComment |= K.second; 1430b57cec5SDimitry Andric 144*5f757f3fSDimitry Andric IsAlmostTrailingComment = 145*5f757f3fSDimitry Andric RawText.starts_with("//<") || RawText.starts_with("/*<"); 1460b57cec5SDimitry Andric } else { 1470b57cec5SDimitry Andric Kind = RCK_Merged; 1480b57cec5SDimitry Andric IsTrailingComment = 1490b57cec5SDimitry Andric IsTrailingComment || mergedCommentIsTrailingComment(RawText); 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { 1540b57cec5SDimitry Andric FileID BeginFileID; 1550b57cec5SDimitry Andric FileID EndFileID; 1560b57cec5SDimitry Andric unsigned BeginOffset; 1570b57cec5SDimitry Andric unsigned EndOffset; 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric std::tie(BeginFileID, BeginOffset) = 1600b57cec5SDimitry Andric SourceMgr.getDecomposedLoc(Range.getBegin()); 1610b57cec5SDimitry Andric std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd()); 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric const unsigned Length = EndOffset - BeginOffset; 1640b57cec5SDimitry Andric if (Length < 2) 1650b57cec5SDimitry Andric return StringRef(); 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric // The comment can't begin in one file and end in another. 1680b57cec5SDimitry Andric assert(BeginFileID == EndFileID); 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric bool Invalid = false; 1710b57cec5SDimitry Andric const char *BufferStart = SourceMgr.getBufferData(BeginFileID, 1720b57cec5SDimitry Andric &Invalid).data(); 1730b57cec5SDimitry Andric if (Invalid) 1740b57cec5SDimitry Andric return StringRef(); 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric return StringRef(BufferStart + BeginOffset, Length); 1770b57cec5SDimitry Andric } 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric const char *RawComment::extractBriefText(const ASTContext &Context) const { 1800b57cec5SDimitry Andric // Lazily initialize RawText using the accessor before using it. 1810b57cec5SDimitry Andric (void)getRawText(Context.getSourceManager()); 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric // Since we will be copying the resulting text, all allocations made during 1840b57cec5SDimitry Andric // parsing are garbage after resulting string is formed. Thus we can use 1850b57cec5SDimitry Andric // a separate allocator for all temporary stuff. 1860b57cec5SDimitry Andric llvm::BumpPtrAllocator Allocator; 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric comments::Lexer L(Allocator, Context.getDiagnostics(), 1890b57cec5SDimitry Andric Context.getCommentCommandTraits(), 1900b57cec5SDimitry Andric Range.getBegin(), 1910b57cec5SDimitry Andric RawText.begin(), RawText.end()); 1920b57cec5SDimitry Andric comments::BriefParser P(L, Context.getCommentCommandTraits()); 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric const std::string Result = P.Parse(); 1950b57cec5SDimitry Andric const unsigned BriefTextLength = Result.size(); 1960b57cec5SDimitry Andric char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; 1970b57cec5SDimitry Andric memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); 1980b57cec5SDimitry Andric BriefText = BriefTextPtr; 1990b57cec5SDimitry Andric BriefTextValid = true; 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric return BriefTextPtr; 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric comments::FullComment *RawComment::parse(const ASTContext &Context, 2050b57cec5SDimitry Andric const Preprocessor *PP, 2060b57cec5SDimitry Andric const Decl *D) const { 2070b57cec5SDimitry Andric // Lazily initialize RawText using the accessor before using it. 2080b57cec5SDimitry Andric (void)getRawText(Context.getSourceManager()); 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(), 2110b57cec5SDimitry Andric Context.getCommentCommandTraits(), 2120b57cec5SDimitry Andric getSourceRange().getBegin(), 2130b57cec5SDimitry Andric RawText.begin(), RawText.end()); 2140b57cec5SDimitry Andric comments::Sema S(Context.getAllocator(), Context.getSourceManager(), 2150b57cec5SDimitry Andric Context.getDiagnostics(), 2160b57cec5SDimitry Andric Context.getCommentCommandTraits(), 2170b57cec5SDimitry Andric PP); 2180b57cec5SDimitry Andric S.setDecl(D); 2190b57cec5SDimitry Andric comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), 2200b57cec5SDimitry Andric Context.getDiagnostics(), 2210b57cec5SDimitry Andric Context.getCommentCommandTraits()); 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric return P.parseFullComment(); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric static bool onlyWhitespaceBetween(SourceManager &SM, 2270b57cec5SDimitry Andric SourceLocation Loc1, SourceLocation Loc2, 2280b57cec5SDimitry Andric unsigned MaxNewlinesAllowed) { 2290b57cec5SDimitry Andric std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1); 2300b57cec5SDimitry Andric std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric // Question does not make sense if locations are in different files. 2330b57cec5SDimitry Andric if (Loc1Info.first != Loc2Info.first) 2340b57cec5SDimitry Andric return false; 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric bool Invalid = false; 2370b57cec5SDimitry Andric const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data(); 2380b57cec5SDimitry Andric if (Invalid) 2390b57cec5SDimitry Andric return false; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric unsigned NumNewlines = 0; 2420b57cec5SDimitry Andric assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!"); 2430b57cec5SDimitry Andric // Look for non-whitespace characters and remember any newlines seen. 2440b57cec5SDimitry Andric for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) { 2450b57cec5SDimitry Andric switch (Buffer[I]) { 2460b57cec5SDimitry Andric default: 2470b57cec5SDimitry Andric return false; 2480b57cec5SDimitry Andric case ' ': 2490b57cec5SDimitry Andric case '\t': 2500b57cec5SDimitry Andric case '\f': 2510b57cec5SDimitry Andric case '\v': 2520b57cec5SDimitry Andric break; 2530b57cec5SDimitry Andric case '\r': 2540b57cec5SDimitry Andric case '\n': 2550b57cec5SDimitry Andric ++NumNewlines; 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric // Check if we have found more than the maximum allowed number of 2580b57cec5SDimitry Andric // newlines. 2590b57cec5SDimitry Andric if (NumNewlines > MaxNewlinesAllowed) 2600b57cec5SDimitry Andric return false; 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric // Collapse \r\n and \n\r into a single newline. 2630b57cec5SDimitry Andric if (I + 1 != Loc2Info.second && 2640b57cec5SDimitry Andric (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') && 2650b57cec5SDimitry Andric Buffer[I] != Buffer[I + 1]) 2660b57cec5SDimitry Andric ++I; 2670b57cec5SDimitry Andric break; 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric } 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric return true; 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric void RawCommentList::addComment(const RawComment &RC, 2750b57cec5SDimitry Andric const CommentOptions &CommentOpts, 2760b57cec5SDimitry Andric llvm::BumpPtrAllocator &Allocator) { 2770b57cec5SDimitry Andric if (RC.isInvalid()) 2780b57cec5SDimitry Andric return; 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric // Ordinary comments are not interesting for us. 2810b57cec5SDimitry Andric if (RC.isOrdinary() && !CommentOpts.ParseAllComments) 2820b57cec5SDimitry Andric return; 2830b57cec5SDimitry Andric 284a7dea167SDimitry Andric std::pair<FileID, unsigned> Loc = 285a7dea167SDimitry Andric SourceMgr.getDecomposedLoc(RC.getBeginLoc()); 286a7dea167SDimitry Andric 287a7dea167SDimitry Andric const FileID CommentFile = Loc.first; 288a7dea167SDimitry Andric const unsigned CommentOffset = Loc.second; 289a7dea167SDimitry Andric 2900b57cec5SDimitry Andric // If this is the first Doxygen comment, save it (because there isn't 2910b57cec5SDimitry Andric // anything to merge it with). 292a7dea167SDimitry Andric if (OrderedComments[CommentFile].empty()) { 293a7dea167SDimitry Andric OrderedComments[CommentFile][CommentOffset] = 294a7dea167SDimitry Andric new (Allocator) RawComment(RC); 2950b57cec5SDimitry Andric return; 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric 298a7dea167SDimitry Andric const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second; 2990b57cec5SDimitry Andric const RawComment &C2 = RC; 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric // Merge comments only if there is only whitespace between them. 3020b57cec5SDimitry Andric // Can't merge trailing and non-trailing comments unless the second is 3030b57cec5SDimitry Andric // non-trailing ordinary in the same column, as in the case: 3040b57cec5SDimitry Andric // int x; // documents x 3050b57cec5SDimitry Andric // // more text 3060b57cec5SDimitry Andric // versus: 3070b57cec5SDimitry Andric // int x; // documents x 3080b57cec5SDimitry Andric // int y; // documents y 3090b57cec5SDimitry Andric // or: 3100b57cec5SDimitry Andric // int x; // documents x 3110b57cec5SDimitry Andric // // documents y 3120b57cec5SDimitry Andric // int y; 3130b57cec5SDimitry Andric // Merge comments if they are on same or consecutive lines. 3140b57cec5SDimitry Andric if ((C1.isTrailingComment() == C2.isTrailingComment() || 3150b57cec5SDimitry Andric (C1.isTrailingComment() && !C2.isTrailingComment() && 3160b57cec5SDimitry Andric isOrdinaryKind(C2.getKind()) && 3170b57cec5SDimitry Andric commentsStartOnSameColumn(SourceMgr, C1, C2))) && 3180b57cec5SDimitry Andric onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(), 3190b57cec5SDimitry Andric /*MaxNewlinesAllowed=*/1)) { 3200b57cec5SDimitry Andric SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc()); 321a7dea167SDimitry Andric *OrderedComments[CommentFile].rbegin()->second = 322a7dea167SDimitry Andric RawComment(SourceMgr, MergedRange, CommentOpts, true); 3230b57cec5SDimitry Andric } else { 324a7dea167SDimitry Andric OrderedComments[CommentFile][CommentOffset] = 325a7dea167SDimitry Andric new (Allocator) RawComment(RC); 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric 329a7dea167SDimitry Andric const std::map<unsigned, RawComment *> * 330a7dea167SDimitry Andric RawCommentList::getCommentsInFile(FileID File) const { 331a7dea167SDimitry Andric auto CommentsInFile = OrderedComments.find(File); 332a7dea167SDimitry Andric if (CommentsInFile == OrderedComments.end()) 333a7dea167SDimitry Andric return nullptr; 3340b57cec5SDimitry Andric 335a7dea167SDimitry Andric return &CommentsInFile->second; 336a7dea167SDimitry Andric } 337a7dea167SDimitry Andric 338a7dea167SDimitry Andric bool RawCommentList::empty() const { return OrderedComments.empty(); } 339a7dea167SDimitry Andric 340a7dea167SDimitry Andric unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File, 341a7dea167SDimitry Andric unsigned Offset) const { 342a7dea167SDimitry Andric auto Cached = CommentBeginLine.find(C); 343a7dea167SDimitry Andric if (Cached != CommentBeginLine.end()) 344a7dea167SDimitry Andric return Cached->second; 345a7dea167SDimitry Andric const unsigned Line = SourceMgr.getLineNumber(File, Offset); 346a7dea167SDimitry Andric CommentBeginLine[C] = Line; 347a7dea167SDimitry Andric return Line; 348a7dea167SDimitry Andric } 349a7dea167SDimitry Andric 350a7dea167SDimitry Andric unsigned RawCommentList::getCommentEndOffset(RawComment *C) const { 351a7dea167SDimitry Andric auto Cached = CommentEndOffset.find(C); 352a7dea167SDimitry Andric if (Cached != CommentEndOffset.end()) 353a7dea167SDimitry Andric return Cached->second; 354a7dea167SDimitry Andric const unsigned Offset = 355a7dea167SDimitry Andric SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second; 356a7dea167SDimitry Andric CommentEndOffset[C] = Offset; 357a7dea167SDimitry Andric return Offset; 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric std::string RawComment::getFormattedText(const SourceManager &SourceMgr, 3610b57cec5SDimitry Andric DiagnosticsEngine &Diags) const { 3620b57cec5SDimitry Andric llvm::StringRef CommentText = getRawText(SourceMgr); 3630b57cec5SDimitry Andric if (CommentText.empty()) 3640b57cec5SDimitry Andric return ""; 3650b57cec5SDimitry Andric 36681ad6265SDimitry Andric std::string Result; 36781ad6265SDimitry Andric for (const RawComment::CommentLine &Line : 36881ad6265SDimitry Andric getFormattedLines(SourceMgr, Diags)) 36981ad6265SDimitry Andric Result += Line.Text + "\n"; 37081ad6265SDimitry Andric 37181ad6265SDimitry Andric auto LastChar = Result.find_last_not_of('\n'); 37281ad6265SDimitry Andric Result.erase(LastChar + 1, Result.size()); 37381ad6265SDimitry Andric 37481ad6265SDimitry Andric return Result; 37581ad6265SDimitry Andric } 37681ad6265SDimitry Andric 37781ad6265SDimitry Andric std::vector<RawComment::CommentLine> 37881ad6265SDimitry Andric RawComment::getFormattedLines(const SourceManager &SourceMgr, 37981ad6265SDimitry Andric DiagnosticsEngine &Diags) const { 38081ad6265SDimitry Andric llvm::StringRef CommentText = getRawText(SourceMgr); 38181ad6265SDimitry Andric if (CommentText.empty()) 38281ad6265SDimitry Andric return {}; 38381ad6265SDimitry Andric 3840b57cec5SDimitry Andric llvm::BumpPtrAllocator Allocator; 3850b57cec5SDimitry Andric // We do not parse any commands, so CommentOptions are ignored by 3860b57cec5SDimitry Andric // comments::Lexer. Therefore, we just use default-constructed options. 3870b57cec5SDimitry Andric CommentOptions DefOpts; 3880b57cec5SDimitry Andric comments::CommandTraits EmptyTraits(Allocator, DefOpts); 3890b57cec5SDimitry Andric comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(), 3900b57cec5SDimitry Andric CommentText.begin(), CommentText.end(), 3910b57cec5SDimitry Andric /*ParseCommands=*/false); 3920b57cec5SDimitry Andric 39381ad6265SDimitry Andric std::vector<RawComment::CommentLine> Result; 3940b57cec5SDimitry Andric // A column number of the first non-whitespace token in the comment text. 3950b57cec5SDimitry Andric // We skip whitespace up to this column, but keep the whitespace after this 3960b57cec5SDimitry Andric // column. IndentColumn is calculated when lexing the first line and reused 3970b57cec5SDimitry Andric // for the rest of lines. 3980b57cec5SDimitry Andric unsigned IndentColumn = 0; 3990b57cec5SDimitry Andric 40081ad6265SDimitry Andric // Record the line number of the last processed comment line. 40181ad6265SDimitry Andric // For block-style comments, an extra newline token will be produced after 40281ad6265SDimitry Andric // the end-comment marker, e.g.: 40381ad6265SDimitry Andric // /** This is a multi-line comment block. 40481ad6265SDimitry Andric // The lexer will produce two newline tokens here > */ 40581ad6265SDimitry Andric // previousLine will record the line number when we previously saw a newline 40681ad6265SDimitry Andric // token and recorded a comment line. If we see another newline token on the 40781ad6265SDimitry Andric // same line, don't record anything in between. 40881ad6265SDimitry Andric unsigned PreviousLine = 0; 40981ad6265SDimitry Andric 4100b57cec5SDimitry Andric // Processes one line of the comment and adds it to the result. 4110b57cec5SDimitry Andric // Handles skipping the indent at the start of the line. 4120b57cec5SDimitry Andric // Returns false when eof is reached and true otherwise. 4130b57cec5SDimitry Andric auto LexLine = [&](bool IsFirstLine) -> bool { 4140b57cec5SDimitry Andric comments::Token Tok; 4150b57cec5SDimitry Andric // Lex the first token on the line. We handle it separately, because we to 4160b57cec5SDimitry Andric // fix up its indentation. 4170b57cec5SDimitry Andric L.lex(Tok); 4180b57cec5SDimitry Andric if (Tok.is(comments::tok::eof)) 4190b57cec5SDimitry Andric return false; 4200b57cec5SDimitry Andric if (Tok.is(comments::tok::newline)) { 42181ad6265SDimitry Andric PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation()); 42281ad6265SDimitry Andric if (Loc.getLine() != PreviousLine) { 42381ad6265SDimitry Andric Result.emplace_back("", Loc, Loc); 42481ad6265SDimitry Andric PreviousLine = Loc.getLine(); 42581ad6265SDimitry Andric } 4260b57cec5SDimitry Andric return true; 4270b57cec5SDimitry Andric } 42881ad6265SDimitry Andric SmallString<124> Line; 4290b57cec5SDimitry Andric llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr); 4300b57cec5SDimitry Andric bool LocInvalid = false; 4310b57cec5SDimitry Andric unsigned TokColumn = 4320b57cec5SDimitry Andric SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid); 4330b57cec5SDimitry Andric assert(!LocInvalid && "getFormattedText for invalid location"); 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric // Amount of leading whitespace in TokText. 4360b57cec5SDimitry Andric size_t WhitespaceLen = TokText.find_first_not_of(" \t"); 4370b57cec5SDimitry Andric if (WhitespaceLen == StringRef::npos) 4380b57cec5SDimitry Andric WhitespaceLen = TokText.size(); 4390b57cec5SDimitry Andric // Remember the amount of whitespace we skipped in the first line to remove 4400b57cec5SDimitry Andric // indent up to that column in the following lines. 4410b57cec5SDimitry Andric if (IsFirstLine) 4420b57cec5SDimitry Andric IndentColumn = TokColumn + WhitespaceLen; 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric // Amount of leading whitespace we actually want to skip. 4450b57cec5SDimitry Andric // For the first line we skip all the whitespace. 4460b57cec5SDimitry Andric // For the rest of the lines, we skip whitespace up to IndentColumn. 4470b57cec5SDimitry Andric unsigned SkipLen = 4480b57cec5SDimitry Andric IsFirstLine 4490b57cec5SDimitry Andric ? WhitespaceLen 4500b57cec5SDimitry Andric : std::min<size_t>( 4510b57cec5SDimitry Andric WhitespaceLen, 4520b57cec5SDimitry Andric std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0)); 4530b57cec5SDimitry Andric llvm::StringRef Trimmed = TokText.drop_front(SkipLen); 45481ad6265SDimitry Andric Line += Trimmed; 45581ad6265SDimitry Andric // Get the beginning location of the adjusted comment line. 45681ad6265SDimitry Andric PresumedLoc Begin = 45781ad6265SDimitry Andric SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen)); 45881ad6265SDimitry Andric 4590b57cec5SDimitry Andric // Lex all tokens in the rest of the line. 4600b57cec5SDimitry Andric for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) { 4610b57cec5SDimitry Andric if (Tok.is(comments::tok::newline)) { 46281ad6265SDimitry Andric // Get the ending location of the comment line. 46381ad6265SDimitry Andric PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation()); 46481ad6265SDimitry Andric if (End.getLine() != PreviousLine) { 46581ad6265SDimitry Andric Result.emplace_back(Line, Begin, End); 46681ad6265SDimitry Andric PreviousLine = End.getLine(); 46781ad6265SDimitry Andric } 4680b57cec5SDimitry Andric return true; 4690b57cec5SDimitry Andric } 47081ad6265SDimitry Andric Line += L.getSpelling(Tok, SourceMgr); 4710b57cec5SDimitry Andric } 47281ad6265SDimitry Andric PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation()); 47381ad6265SDimitry Andric Result.emplace_back(Line, Begin, End); 4740b57cec5SDimitry Andric // We've reached the end of file token. 4750b57cec5SDimitry Andric return false; 4760b57cec5SDimitry Andric }; 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric // Process first line separately to remember indent for the following lines. 47981ad6265SDimitry Andric if (!LexLine(/*IsFirstLine=*/true)) 4800b57cec5SDimitry Andric return Result; 4810b57cec5SDimitry Andric // Process the rest of the lines. 4820b57cec5SDimitry Andric while (LexLine(/*IsFirstLine=*/false)) 4830b57cec5SDimitry Andric ; 4840b57cec5SDimitry Andric return Result; 4850b57cec5SDimitry Andric } 486