1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/AST/CommentParser.h" 10 #include "clang/AST/CommentCommandTraits.h" 11 #include "clang/AST/CommentDiagnostic.h" 12 #include "clang/AST/CommentSema.h" 13 #include "clang/Basic/CharInfo.h" 14 #include "clang/Basic/SourceManager.h" 15 #include "llvm/Support/ErrorHandling.h" 16 17 namespace clang { 18 19 static inline bool isWhitespace(llvm::StringRef S) { 20 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { 21 if (!isWhitespace(*I)) 22 return false; 23 } 24 return true; 25 } 26 27 namespace comments { 28 29 /// Re-lexes a sequence of tok::text tokens. 30 class TextTokenRetokenizer { 31 llvm::BumpPtrAllocator &Allocator; 32 Parser &P; 33 34 /// This flag is set when there are no more tokens we can fetch from lexer. 35 bool NoMoreInterestingTokens; 36 37 /// Token buffer: tokens we have processed and lookahead. 38 SmallVector<Token, 16> Toks; 39 40 /// A position in \c Toks. 41 struct Position { 42 const char *BufferStart; 43 const char *BufferEnd; 44 const char *BufferPtr; 45 SourceLocation BufferStartLoc; 46 unsigned CurToken; 47 }; 48 49 /// Current position in Toks. 50 Position Pos; 51 52 bool isEnd() const { 53 return Pos.CurToken >= Toks.size(); 54 } 55 56 /// Sets up the buffer pointers to point to current token. 57 void setupBuffer() { 58 assert(!isEnd()); 59 const Token &Tok = Toks[Pos.CurToken]; 60 61 Pos.BufferStart = Tok.getText().begin(); 62 Pos.BufferEnd = Tok.getText().end(); 63 Pos.BufferPtr = Pos.BufferStart; 64 Pos.BufferStartLoc = Tok.getLocation(); 65 } 66 67 SourceLocation getSourceLocation() const { 68 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; 69 return Pos.BufferStartLoc.getLocWithOffset(CharNo); 70 } 71 72 char peek() const { 73 assert(!isEnd()); 74 assert(Pos.BufferPtr != Pos.BufferEnd); 75 return *Pos.BufferPtr; 76 } 77 78 void consumeChar() { 79 assert(!isEnd()); 80 assert(Pos.BufferPtr != Pos.BufferEnd); 81 Pos.BufferPtr++; 82 if (Pos.BufferPtr == Pos.BufferEnd) { 83 Pos.CurToken++; 84 if (isEnd() && !addToken()) 85 return; 86 87 assert(!isEnd()); 88 setupBuffer(); 89 } 90 } 91 92 /// Extract a template type 93 bool lexTemplate(SmallString<32> &WordText) { 94 unsigned BracketCount = 0; 95 while (!isEnd()) { 96 const char C = peek(); 97 WordText.push_back(C); 98 consumeChar(); 99 switch (C) { 100 case '<': { 101 BracketCount++; 102 break; 103 } 104 case '>': { 105 BracketCount--; 106 if (!BracketCount) 107 return true; 108 break; 109 } 110 default: 111 break; 112 } 113 } 114 return false; 115 } 116 117 /// Add a token. 118 /// Returns true on success, false if there are no interesting tokens to 119 /// fetch from lexer. 120 bool addToken() { 121 if (NoMoreInterestingTokens) 122 return false; 123 124 if (P.Tok.is(tok::newline)) { 125 // If we see a single newline token between text tokens, skip it. 126 Token Newline = P.Tok; 127 P.consumeToken(); 128 if (P.Tok.isNot(tok::text)) { 129 P.putBack(Newline); 130 NoMoreInterestingTokens = true; 131 return false; 132 } 133 } 134 if (P.Tok.isNot(tok::text)) { 135 NoMoreInterestingTokens = true; 136 return false; 137 } 138 139 Toks.push_back(P.Tok); 140 P.consumeToken(); 141 if (Toks.size() == 1) 142 setupBuffer(); 143 return true; 144 } 145 146 void consumeWhitespace() { 147 while (!isEnd()) { 148 if (isWhitespace(peek())) 149 consumeChar(); 150 else 151 break; 152 } 153 } 154 155 void formTokenWithChars(Token &Result, 156 SourceLocation Loc, 157 const char *TokBegin, 158 unsigned TokLength, 159 StringRef Text) { 160 Result.setLocation(Loc); 161 Result.setKind(tok::text); 162 Result.setLength(TokLength); 163 #ifndef NDEBUG 164 Result.TextPtr = "<UNSET>"; 165 Result.IntVal = 7; 166 #endif 167 Result.setText(Text); 168 } 169 170 public: 171 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): 172 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { 173 Pos.CurToken = 0; 174 addToken(); 175 } 176 177 /// Extract a type argument 178 bool lexType(Token &Tok) { 179 if (isEnd()) 180 return false; 181 182 // Save current position in case we need to rollback because the type is 183 // empty. 184 Position SavedPos = Pos; 185 186 // Consume any leading whitespace. 187 consumeWhitespace(); 188 SmallString<32> WordText; 189 const char *WordBegin = Pos.BufferPtr; 190 SourceLocation Loc = getSourceLocation(); 191 192 while (!isEnd()) { 193 const char C = peek(); 194 // For non-whitespace characters we check if it's a template or otherwise 195 // continue reading the text into a word. 196 if (!isWhitespace(C)) { 197 if (C == '<') { 198 if (!lexTemplate(WordText)) 199 return false; 200 } else { 201 WordText.push_back(C); 202 consumeChar(); 203 } 204 } else { 205 consumeChar(); 206 break; 207 } 208 } 209 210 const unsigned Length = WordText.size(); 211 if (Length == 0) { 212 Pos = SavedPos; 213 return false; 214 } 215 216 char *TextPtr = Allocator.Allocate<char>(Length + 1); 217 218 memcpy(TextPtr, WordText.c_str(), Length + 1); 219 StringRef Text = StringRef(TextPtr, Length); 220 221 formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 222 return true; 223 } 224 225 // Check if this line starts with @par or \par 226 bool startsWithParCommand() { 227 unsigned Offset = 1; 228 229 // Skip all whitespace characters at the beginning. 230 // This needs to backtrack because Pos has already advanced past the 231 // actual \par or @par command by the time this function is called. 232 while (isWhitespace(*(Pos.BufferPtr - Offset))) 233 Offset++; 234 235 // Once we've reached the whitespace, backtrack and check if the previous 236 // four characters are \par or @par. 237 llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4); 238 return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); 239 } 240 241 /// Extract a par command argument-header. 242 bool lexParHeading(Token &Tok) { 243 if (isEnd()) 244 return false; 245 246 Position SavedPos = Pos; 247 248 consumeWhitespace(); 249 SmallString<32> WordText; 250 const char *WordBegin = Pos.BufferPtr; 251 SourceLocation Loc = getSourceLocation(); 252 253 if (!startsWithParCommand()) 254 return false; 255 256 // Read until the end of this token, which is effectively the end of the 257 // line. This gets us the content of the par header, if there is one. 258 while (!isEnd()) { 259 WordText.push_back(peek()); 260 if (Pos.BufferPtr + 1 == Pos.BufferEnd) { 261 consumeChar(); 262 break; 263 } 264 consumeChar(); 265 } 266 267 unsigned Length = WordText.size(); 268 if (Length == 0) { 269 Pos = SavedPos; 270 return false; 271 } 272 273 char *TextPtr = Allocator.Allocate<char>(Length + 1); 274 275 memcpy(TextPtr, WordText.c_str(), Length + 1); 276 StringRef Text = StringRef(TextPtr, Length); 277 278 formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 279 return true; 280 } 281 282 /// Extract a word -- sequence of non-whitespace characters. 283 bool lexWord(Token &Tok) { 284 if (isEnd()) 285 return false; 286 287 Position SavedPos = Pos; 288 289 consumeWhitespace(); 290 SmallString<32> WordText; 291 const char *WordBegin = Pos.BufferPtr; 292 SourceLocation Loc = getSourceLocation(); 293 while (!isEnd()) { 294 const char C = peek(); 295 if (!isWhitespace(C)) { 296 WordText.push_back(C); 297 consumeChar(); 298 } else 299 break; 300 } 301 const unsigned Length = WordText.size(); 302 if (Length == 0) { 303 Pos = SavedPos; 304 return false; 305 } 306 307 char *TextPtr = Allocator.Allocate<char>(Length + 1); 308 309 memcpy(TextPtr, WordText.c_str(), Length + 1); 310 StringRef Text = StringRef(TextPtr, Length); 311 312 formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 313 return true; 314 } 315 316 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { 317 if (isEnd()) 318 return false; 319 320 Position SavedPos = Pos; 321 322 consumeWhitespace(); 323 SmallString<32> WordText; 324 const char *WordBegin = Pos.BufferPtr; 325 SourceLocation Loc = getSourceLocation(); 326 bool Error = false; 327 if (!isEnd()) { 328 const char C = peek(); 329 if (C == OpenDelim) { 330 WordText.push_back(C); 331 consumeChar(); 332 } else 333 Error = true; 334 } 335 char C = '\0'; 336 while (!Error && !isEnd()) { 337 C = peek(); 338 WordText.push_back(C); 339 consumeChar(); 340 if (C == CloseDelim) 341 break; 342 } 343 if (!Error && C != CloseDelim) 344 Error = true; 345 346 if (Error) { 347 Pos = SavedPos; 348 return false; 349 } 350 351 const unsigned Length = WordText.size(); 352 char *TextPtr = Allocator.Allocate<char>(Length + 1); 353 354 memcpy(TextPtr, WordText.c_str(), Length + 1); 355 StringRef Text = StringRef(TextPtr, Length); 356 357 formTokenWithChars(Tok, Loc, WordBegin, 358 Pos.BufferPtr - WordBegin, Text); 359 return true; 360 } 361 362 /// Put back tokens that we didn't consume. 363 void putBackLeftoverTokens() { 364 if (isEnd()) 365 return; 366 367 bool HavePartialTok = false; 368 Token PartialTok; 369 if (Pos.BufferPtr != Pos.BufferStart) { 370 formTokenWithChars(PartialTok, getSourceLocation(), 371 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, 372 StringRef(Pos.BufferPtr, 373 Pos.BufferEnd - Pos.BufferPtr)); 374 HavePartialTok = true; 375 Pos.CurToken++; 376 } 377 378 P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); 379 Pos.CurToken = Toks.size(); 380 381 if (HavePartialTok) 382 P.putBack(PartialTok); 383 } 384 }; 385 386 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 387 const SourceManager &SourceMgr, DiagnosticsEngine &Diags, 388 const CommandTraits &Traits): 389 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), 390 Traits(Traits) { 391 consumeToken(); 392 } 393 394 void Parser::parseParamCommandArgs(ParamCommandComment *PC, 395 TextTokenRetokenizer &Retokenizer) { 396 Token Arg; 397 // Check if argument looks like direction specification: [dir] 398 // e.g., [in], [out], [in,out] 399 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 400 S.actOnParamCommandDirectionArg(PC, 401 Arg.getLocation(), 402 Arg.getEndLocation(), 403 Arg.getText()); 404 405 if (Retokenizer.lexWord(Arg)) 406 S.actOnParamCommandParamNameArg(PC, 407 Arg.getLocation(), 408 Arg.getEndLocation(), 409 Arg.getText()); 410 } 411 412 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, 413 TextTokenRetokenizer &Retokenizer) { 414 Token Arg; 415 if (Retokenizer.lexWord(Arg)) 416 S.actOnTParamCommandParamNameArg(TPC, 417 Arg.getLocation(), 418 Arg.getEndLocation(), 419 Arg.getText()); 420 } 421 422 ArrayRef<Comment::Argument> 423 Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { 424 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 425 Comment::Argument[NumArgs]; 426 unsigned ParsedArgs = 0; 427 Token Arg; 428 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 429 Args[ParsedArgs] = Comment::Argument{ 430 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 431 ParsedArgs++; 432 } 433 434 return llvm::ArrayRef(Args, ParsedArgs); 435 } 436 437 ArrayRef<Comment::Argument> 438 Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, 439 unsigned NumArgs) { 440 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 441 Comment::Argument[NumArgs]; 442 unsigned ParsedArgs = 0; 443 Token Arg; 444 445 while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) { 446 Args[ParsedArgs] = Comment::Argument{ 447 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 448 ParsedArgs++; 449 } 450 451 return llvm::ArrayRef(Args, ParsedArgs); 452 } 453 454 ArrayRef<Comment::Argument> 455 Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, 456 unsigned NumArgs) { 457 assert(NumArgs > 0); 458 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 459 Comment::Argument[NumArgs]; 460 unsigned ParsedArgs = 0; 461 Token Arg; 462 463 while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) { 464 Args[ParsedArgs] = Comment::Argument{ 465 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 466 ParsedArgs++; 467 } 468 469 return llvm::ArrayRef(Args, ParsedArgs); 470 } 471 472 BlockCommandComment *Parser::parseBlockCommand() { 473 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 474 475 ParamCommandComment *PC = nullptr; 476 TParamCommandComment *TPC = nullptr; 477 BlockCommandComment *BC = nullptr; 478 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 479 CommandMarkerKind CommandMarker = 480 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; 481 if (Info->IsParamCommand) { 482 PC = S.actOnParamCommandStart(Tok.getLocation(), 483 Tok.getEndLocation(), 484 Tok.getCommandID(), 485 CommandMarker); 486 } else if (Info->IsTParamCommand) { 487 TPC = S.actOnTParamCommandStart(Tok.getLocation(), 488 Tok.getEndLocation(), 489 Tok.getCommandID(), 490 CommandMarker); 491 } else { 492 BC = S.actOnBlockCommandStart(Tok.getLocation(), 493 Tok.getEndLocation(), 494 Tok.getCommandID(), 495 CommandMarker); 496 } 497 consumeToken(); 498 499 if (isTokBlockCommand()) { 500 // Block command ahead. We can't nest block commands, so pretend that this 501 // command has an empty argument. 502 ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt); 503 if (PC) { 504 S.actOnParamCommandFinish(PC, Paragraph); 505 return PC; 506 } else if (TPC) { 507 S.actOnTParamCommandFinish(TPC, Paragraph); 508 return TPC; 509 } else { 510 S.actOnBlockCommandFinish(BC, Paragraph); 511 return BC; 512 } 513 } 514 515 if (PC || TPC || Info->NumArgs > 0) { 516 // In order to parse command arguments we need to retokenize a few 517 // following text tokens. 518 TextTokenRetokenizer Retokenizer(Allocator, *this); 519 520 if (PC) 521 parseParamCommandArgs(PC, Retokenizer); 522 else if (TPC) 523 parseTParamCommandArgs(TPC, Retokenizer); 524 else if (Info->IsThrowsCommand) 525 S.actOnBlockCommandArgs( 526 BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs)); 527 else if (Info->IsParCommand) 528 S.actOnBlockCommandArgs(BC, 529 parseParCommandArgs(Retokenizer, Info->NumArgs)); 530 else 531 S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); 532 533 Retokenizer.putBackLeftoverTokens(); 534 } 535 536 // If there's a block command ahead, we will attach an empty paragraph to 537 // this command. 538 bool EmptyParagraph = false; 539 if (isTokBlockCommand()) 540 EmptyParagraph = true; 541 else if (Tok.is(tok::newline)) { 542 Token PrevTok = Tok; 543 consumeToken(); 544 EmptyParagraph = isTokBlockCommand(); 545 putBack(PrevTok); 546 } 547 548 ParagraphComment *Paragraph; 549 if (EmptyParagraph) 550 Paragraph = S.actOnParagraphComment(std::nullopt); 551 else { 552 BlockContentComment *Block = parseParagraphOrBlockCommand(); 553 // Since we have checked for a block command, we should have parsed a 554 // paragraph. 555 Paragraph = cast<ParagraphComment>(Block); 556 } 557 558 if (PC) { 559 S.actOnParamCommandFinish(PC, Paragraph); 560 return PC; 561 } else if (TPC) { 562 S.actOnTParamCommandFinish(TPC, Paragraph); 563 return TPC; 564 } else { 565 S.actOnBlockCommandFinish(BC, Paragraph); 566 return BC; 567 } 568 } 569 570 InlineCommandComment *Parser::parseInlineCommand() { 571 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 572 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 573 574 const Token CommandTok = Tok; 575 consumeToken(); 576 577 TextTokenRetokenizer Retokenizer(Allocator, *this); 578 ArrayRef<Comment::Argument> Args = 579 parseCommandArgs(Retokenizer, Info->NumArgs); 580 581 InlineCommandComment *IC = S.actOnInlineCommand( 582 CommandTok.getLocation(), CommandTok.getEndLocation(), 583 CommandTok.getCommandID(), Args); 584 585 if (Args.size() < Info->NumArgs) { 586 Diag(CommandTok.getEndLocation().getLocWithOffset(1), 587 diag::warn_doc_inline_command_not_enough_arguments) 588 << CommandTok.is(tok::at_command) << Info->Name << Args.size() 589 << Info->NumArgs 590 << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); 591 } 592 593 Retokenizer.putBackLeftoverTokens(); 594 595 return IC; 596 } 597 598 HTMLStartTagComment *Parser::parseHTMLStartTag() { 599 assert(Tok.is(tok::html_start_tag)); 600 HTMLStartTagComment *HST = 601 S.actOnHTMLStartTagStart(Tok.getLocation(), 602 Tok.getHTMLTagStartName()); 603 consumeToken(); 604 605 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; 606 while (true) { 607 switch (Tok.getKind()) { 608 case tok::html_ident: { 609 Token Ident = Tok; 610 consumeToken(); 611 if (Tok.isNot(tok::html_equals)) { 612 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 613 Ident.getHTMLIdent())); 614 continue; 615 } 616 Token Equals = Tok; 617 consumeToken(); 618 if (Tok.isNot(tok::html_quoted_string)) { 619 Diag(Tok.getLocation(), 620 diag::warn_doc_html_start_tag_expected_quoted_string) 621 << SourceRange(Equals.getLocation()); 622 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 623 Ident.getHTMLIdent())); 624 while (Tok.is(tok::html_equals) || 625 Tok.is(tok::html_quoted_string)) 626 consumeToken(); 627 continue; 628 } 629 Attrs.push_back(HTMLStartTagComment::Attribute( 630 Ident.getLocation(), 631 Ident.getHTMLIdent(), 632 Equals.getLocation(), 633 SourceRange(Tok.getLocation(), 634 Tok.getEndLocation()), 635 Tok.getHTMLQuotedString())); 636 consumeToken(); 637 continue; 638 } 639 640 case tok::html_greater: 641 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 642 Tok.getLocation(), 643 /* IsSelfClosing = */ false); 644 consumeToken(); 645 return HST; 646 647 case tok::html_slash_greater: 648 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 649 Tok.getLocation(), 650 /* IsSelfClosing = */ true); 651 consumeToken(); 652 return HST; 653 654 case tok::html_equals: 655 case tok::html_quoted_string: 656 Diag(Tok.getLocation(), 657 diag::warn_doc_html_start_tag_expected_ident_or_greater); 658 while (Tok.is(tok::html_equals) || 659 Tok.is(tok::html_quoted_string)) 660 consumeToken(); 661 if (Tok.is(tok::html_ident) || 662 Tok.is(tok::html_greater) || 663 Tok.is(tok::html_slash_greater)) 664 continue; 665 666 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 667 SourceLocation(), 668 /* IsSelfClosing = */ false); 669 return HST; 670 671 default: 672 // Not a token from an HTML start tag. Thus HTML tag prematurely ended. 673 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 674 SourceLocation(), 675 /* IsSelfClosing = */ false); 676 bool StartLineInvalid; 677 const unsigned StartLine = SourceMgr.getPresumedLineNumber( 678 HST->getLocation(), 679 &StartLineInvalid); 680 bool EndLineInvalid; 681 const unsigned EndLine = SourceMgr.getPresumedLineNumber( 682 Tok.getLocation(), 683 &EndLineInvalid); 684 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 685 Diag(Tok.getLocation(), 686 diag::warn_doc_html_start_tag_expected_ident_or_greater) 687 << HST->getSourceRange(); 688 else { 689 Diag(Tok.getLocation(), 690 diag::warn_doc_html_start_tag_expected_ident_or_greater); 691 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) 692 << HST->getSourceRange(); 693 } 694 return HST; 695 } 696 } 697 } 698 699 HTMLEndTagComment *Parser::parseHTMLEndTag() { 700 assert(Tok.is(tok::html_end_tag)); 701 Token TokEndTag = Tok; 702 consumeToken(); 703 SourceLocation Loc; 704 if (Tok.is(tok::html_greater)) { 705 Loc = Tok.getLocation(); 706 consumeToken(); 707 } 708 709 return S.actOnHTMLEndTag(TokEndTag.getLocation(), 710 Loc, 711 TokEndTag.getHTMLTagEndName()); 712 } 713 714 BlockContentComment *Parser::parseParagraphOrBlockCommand() { 715 SmallVector<InlineContentComment *, 8> Content; 716 717 while (true) { 718 switch (Tok.getKind()) { 719 case tok::verbatim_block_begin: 720 case tok::verbatim_line_name: 721 case tok::eof: 722 break; // Block content or EOF ahead, finish this parapgaph. 723 724 case tok::unknown_command: 725 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 726 Tok.getEndLocation(), 727 Tok.getUnknownCommandName())); 728 consumeToken(); 729 continue; 730 731 case tok::backslash_command: 732 case tok::at_command: { 733 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 734 if (Info->IsBlockCommand) { 735 if (Content.size() == 0) 736 return parseBlockCommand(); 737 break; // Block command ahead, finish this parapgaph. 738 } 739 if (Info->IsVerbatimBlockEndCommand) { 740 Diag(Tok.getLocation(), 741 diag::warn_verbatim_block_end_without_start) 742 << Tok.is(tok::at_command) 743 << Info->Name 744 << SourceRange(Tok.getLocation(), Tok.getEndLocation()); 745 consumeToken(); 746 continue; 747 } 748 if (Info->IsUnknownCommand) { 749 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 750 Tok.getEndLocation(), 751 Info->getID())); 752 consumeToken(); 753 continue; 754 } 755 assert(Info->IsInlineCommand); 756 Content.push_back(parseInlineCommand()); 757 continue; 758 } 759 760 case tok::newline: { 761 consumeToken(); 762 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 763 consumeToken(); 764 break; // Two newlines -- end of paragraph. 765 } 766 // Also allow [tok::newline, tok::text, tok::newline] if the middle 767 // tok::text is just whitespace. 768 if (Tok.is(tok::text) && isWhitespace(Tok.getText())) { 769 Token WhitespaceTok = Tok; 770 consumeToken(); 771 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 772 consumeToken(); 773 break; 774 } 775 // We have [tok::newline, tok::text, non-newline]. Put back tok::text. 776 putBack(WhitespaceTok); 777 } 778 if (Content.size() > 0) 779 Content.back()->addTrailingNewline(); 780 continue; 781 } 782 783 // Don't deal with HTML tag soup now. 784 case tok::html_start_tag: 785 Content.push_back(parseHTMLStartTag()); 786 continue; 787 788 case tok::html_end_tag: 789 Content.push_back(parseHTMLEndTag()); 790 continue; 791 792 case tok::text: 793 Content.push_back(S.actOnText(Tok.getLocation(), 794 Tok.getEndLocation(), 795 Tok.getText())); 796 consumeToken(); 797 continue; 798 799 case tok::verbatim_block_line: 800 case tok::verbatim_block_end: 801 case tok::verbatim_line_text: 802 case tok::html_ident: 803 case tok::html_equals: 804 case tok::html_quoted_string: 805 case tok::html_greater: 806 case tok::html_slash_greater: 807 llvm_unreachable("should not see this token"); 808 } 809 break; 810 } 811 812 return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content))); 813 } 814 815 VerbatimBlockComment *Parser::parseVerbatimBlock() { 816 assert(Tok.is(tok::verbatim_block_begin)); 817 818 VerbatimBlockComment *VB = 819 S.actOnVerbatimBlockStart(Tok.getLocation(), 820 Tok.getVerbatimBlockID()); 821 consumeToken(); 822 823 // Don't create an empty line if verbatim opening command is followed 824 // by a newline. 825 if (Tok.is(tok::newline)) 826 consumeToken(); 827 828 SmallVector<VerbatimBlockLineComment *, 8> Lines; 829 while (Tok.is(tok::verbatim_block_line) || 830 Tok.is(tok::newline)) { 831 VerbatimBlockLineComment *Line; 832 if (Tok.is(tok::verbatim_block_line)) { 833 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 834 Tok.getVerbatimBlockText()); 835 consumeToken(); 836 if (Tok.is(tok::newline)) { 837 consumeToken(); 838 } 839 } else { 840 // Empty line, just a tok::newline. 841 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); 842 consumeToken(); 843 } 844 Lines.push_back(Line); 845 } 846 847 if (Tok.is(tok::verbatim_block_end)) { 848 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); 849 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name, 850 S.copyArray(llvm::ArrayRef(Lines))); 851 consumeToken(); 852 } else { 853 // Unterminated \\verbatim block 854 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", 855 S.copyArray(llvm::ArrayRef(Lines))); 856 } 857 858 return VB; 859 } 860 861 VerbatimLineComment *Parser::parseVerbatimLine() { 862 assert(Tok.is(tok::verbatim_line_name)); 863 864 Token NameTok = Tok; 865 consumeToken(); 866 867 SourceLocation TextBegin; 868 StringRef Text; 869 // Next token might not be a tok::verbatim_line_text if verbatim line 870 // starting command comes just before a newline or comment end. 871 if (Tok.is(tok::verbatim_line_text)) { 872 TextBegin = Tok.getLocation(); 873 Text = Tok.getVerbatimLineText(); 874 } else { 875 TextBegin = NameTok.getEndLocation(); 876 Text = ""; 877 } 878 879 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 880 NameTok.getVerbatimLineID(), 881 TextBegin, 882 Text); 883 consumeToken(); 884 return VL; 885 } 886 887 BlockContentComment *Parser::parseBlockContent() { 888 switch (Tok.getKind()) { 889 case tok::text: 890 case tok::unknown_command: 891 case tok::backslash_command: 892 case tok::at_command: 893 case tok::html_start_tag: 894 case tok::html_end_tag: 895 return parseParagraphOrBlockCommand(); 896 897 case tok::verbatim_block_begin: 898 return parseVerbatimBlock(); 899 900 case tok::verbatim_line_name: 901 return parseVerbatimLine(); 902 903 case tok::eof: 904 case tok::newline: 905 case tok::verbatim_block_line: 906 case tok::verbatim_block_end: 907 case tok::verbatim_line_text: 908 case tok::html_ident: 909 case tok::html_equals: 910 case tok::html_quoted_string: 911 case tok::html_greater: 912 case tok::html_slash_greater: 913 llvm_unreachable("should not see this token"); 914 } 915 llvm_unreachable("bogus token kind"); 916 } 917 918 FullComment *Parser::parseFullComment() { 919 // Skip newlines at the beginning of the comment. 920 while (Tok.is(tok::newline)) 921 consumeToken(); 922 923 SmallVector<BlockContentComment *, 8> Blocks; 924 while (Tok.isNot(tok::eof)) { 925 Blocks.push_back(parseBlockContent()); 926 927 // Skip extra newlines after paragraph end. 928 while (Tok.is(tok::newline)) 929 consumeToken(); 930 } 931 return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks))); 932 } 933 934 } // end namespace comments 935 } // end namespace clang 936