1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/AST/CommentParser.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/AST/CommentDiagnostic.h"
12 #include "clang/AST/CommentSema.h"
13 #include "clang/Basic/CharInfo.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "llvm/Support/ErrorHandling.h"
16
17 namespace clang {
18
isWhitespace(llvm::StringRef S)19 static inline bool isWhitespace(llvm::StringRef S) {
20 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21 if (!isWhitespace(*I))
22 return false;
23 }
24 return true;
25 }
26
27 namespace comments {
28
29 /// Re-lexes a sequence of tok::text tokens.
30 class TextTokenRetokenizer {
31 llvm::BumpPtrAllocator &Allocator;
32 Parser &P;
33
34 /// This flag is set when there are no more tokens we can fetch from lexer.
35 bool NoMoreInterestingTokens;
36
37 /// Token buffer: tokens we have processed and lookahead.
38 SmallVector<Token, 16> Toks;
39
40 /// A position in \c Toks.
41 struct Position {
42 const char *BufferStart;
43 const char *BufferEnd;
44 const char *BufferPtr;
45 SourceLocation BufferStartLoc;
46 unsigned CurToken;
47 };
48
49 /// Current position in Toks.
50 Position Pos;
51
isEnd() const52 bool isEnd() const {
53 return Pos.CurToken >= Toks.size();
54 }
55
56 /// Sets up the buffer pointers to point to current token.
setupBuffer()57 void setupBuffer() {
58 assert(!isEnd());
59 const Token &Tok = Toks[Pos.CurToken];
60
61 Pos.BufferStart = Tok.getText().begin();
62 Pos.BufferEnd = Tok.getText().end();
63 Pos.BufferPtr = Pos.BufferStart;
64 Pos.BufferStartLoc = Tok.getLocation();
65 }
66
getSourceLocation() const67 SourceLocation getSourceLocation() const {
68 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70 }
71
peek() const72 char peek() const {
73 assert(!isEnd());
74 assert(Pos.BufferPtr != Pos.BufferEnd);
75 return *Pos.BufferPtr;
76 }
77
consumeChar()78 void consumeChar() {
79 assert(!isEnd());
80 assert(Pos.BufferPtr != Pos.BufferEnd);
81 Pos.BufferPtr++;
82 if (Pos.BufferPtr == Pos.BufferEnd) {
83 Pos.CurToken++;
84 if (isEnd() && !addToken())
85 return;
86
87 assert(!isEnd());
88 setupBuffer();
89 }
90 }
91
92 /// Extract a template type
lexTemplate(SmallString<32> & WordText)93 bool lexTemplate(SmallString<32> &WordText) {
94 unsigned BracketCount = 0;
95 while (!isEnd()) {
96 const char C = peek();
97 WordText.push_back(C);
98 consumeChar();
99 switch (C) {
100 case '<': {
101 BracketCount++;
102 break;
103 }
104 case '>': {
105 BracketCount--;
106 if (!BracketCount)
107 return true;
108 break;
109 }
110 default:
111 break;
112 }
113 }
114 return false;
115 }
116
117 /// Add a token.
118 /// Returns true on success, false if there are no interesting tokens to
119 /// fetch from lexer.
addToken()120 bool addToken() {
121 if (NoMoreInterestingTokens)
122 return false;
123
124 if (P.Tok.is(tok::newline)) {
125 // If we see a single newline token between text tokens, skip it.
126 Token Newline = P.Tok;
127 P.consumeToken();
128 if (P.Tok.isNot(tok::text)) {
129 P.putBack(Newline);
130 NoMoreInterestingTokens = true;
131 return false;
132 }
133 }
134 if (P.Tok.isNot(tok::text)) {
135 NoMoreInterestingTokens = true;
136 return false;
137 }
138
139 Toks.push_back(P.Tok);
140 P.consumeToken();
141 if (Toks.size() == 1)
142 setupBuffer();
143 return true;
144 }
145
consumeWhitespace()146 void consumeWhitespace() {
147 while (!isEnd()) {
148 if (isWhitespace(peek()))
149 consumeChar();
150 else
151 break;
152 }
153 }
154
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)155 void formTokenWithChars(Token &Result,
156 SourceLocation Loc,
157 const char *TokBegin,
158 unsigned TokLength,
159 StringRef Text) {
160 Result.setLocation(Loc);
161 Result.setKind(tok::text);
162 Result.setLength(TokLength);
163 #ifndef NDEBUG
164 Result.TextPtr = "<UNSET>";
165 Result.IntVal = 7;
166 #endif
167 Result.setText(Text);
168 }
169
170 public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)171 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
172 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
173 Pos.CurToken = 0;
174 addToken();
175 }
176
177 /// Extract a type argument
lexType(Token & Tok)178 bool lexType(Token &Tok) {
179 if (isEnd())
180 return false;
181
182 // Save current position in case we need to rollback because the type is
183 // empty.
184 Position SavedPos = Pos;
185
186 // Consume any leading whitespace.
187 consumeWhitespace();
188 SmallString<32> WordText;
189 const char *WordBegin = Pos.BufferPtr;
190 SourceLocation Loc = getSourceLocation();
191
192 while (!isEnd()) {
193 const char C = peek();
194 // For non-whitespace characters we check if it's a template or otherwise
195 // continue reading the text into a word.
196 if (!isWhitespace(C)) {
197 if (C == '<') {
198 if (!lexTemplate(WordText))
199 return false;
200 } else {
201 WordText.push_back(C);
202 consumeChar();
203 }
204 } else {
205 consumeChar();
206 break;
207 }
208 }
209
210 const unsigned Length = WordText.size();
211 if (Length == 0) {
212 Pos = SavedPos;
213 return false;
214 }
215
216 char *TextPtr = Allocator.Allocate<char>(Length + 1);
217
218 memcpy(TextPtr, WordText.c_str(), Length + 1);
219 StringRef Text = StringRef(TextPtr, Length);
220
221 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
222 return true;
223 }
224
225 // Check if this line starts with @par or \par
startsWithParCommand()226 bool startsWithParCommand() {
227 unsigned Offset = 1;
228
229 // Skip all whitespace characters at the beginning.
230 // This needs to backtrack because Pos has already advanced past the
231 // actual \par or @par command by the time this function is called.
232 while (isWhitespace(*(Pos.BufferPtr - Offset)))
233 Offset++;
234
235 // Once we've reached the whitespace, backtrack and check if the previous
236 // four characters are \par or @par.
237 llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
238 return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
239 }
240
241 /// Extract a par command argument-header.
lexParHeading(Token & Tok)242 bool lexParHeading(Token &Tok) {
243 if (isEnd())
244 return false;
245
246 Position SavedPos = Pos;
247
248 consumeWhitespace();
249 SmallString<32> WordText;
250 const char *WordBegin = Pos.BufferPtr;
251 SourceLocation Loc = getSourceLocation();
252
253 if (!startsWithParCommand())
254 return false;
255
256 // Read until the end of this token, which is effectively the end of the
257 // line. This gets us the content of the par header, if there is one.
258 while (!isEnd()) {
259 WordText.push_back(peek());
260 if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
261 consumeChar();
262 break;
263 }
264 consumeChar();
265 }
266
267 unsigned Length = WordText.size();
268 if (Length == 0) {
269 Pos = SavedPos;
270 return false;
271 }
272
273 char *TextPtr = Allocator.Allocate<char>(Length + 1);
274
275 memcpy(TextPtr, WordText.c_str(), Length + 1);
276 StringRef Text = StringRef(TextPtr, Length);
277
278 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
279 return true;
280 }
281
282 /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)283 bool lexWord(Token &Tok) {
284 if (isEnd())
285 return false;
286
287 Position SavedPos = Pos;
288
289 consumeWhitespace();
290 SmallString<32> WordText;
291 const char *WordBegin = Pos.BufferPtr;
292 SourceLocation Loc = getSourceLocation();
293 while (!isEnd()) {
294 const char C = peek();
295 if (!isWhitespace(C)) {
296 WordText.push_back(C);
297 consumeChar();
298 } else
299 break;
300 }
301 const unsigned Length = WordText.size();
302 if (Length == 0) {
303 Pos = SavedPos;
304 return false;
305 }
306
307 char *TextPtr = Allocator.Allocate<char>(Length + 1);
308
309 memcpy(TextPtr, WordText.c_str(), Length + 1);
310 StringRef Text = StringRef(TextPtr, Length);
311
312 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
313 return true;
314 }
315
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)316 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
317 if (isEnd())
318 return false;
319
320 Position SavedPos = Pos;
321
322 consumeWhitespace();
323 SmallString<32> WordText;
324 const char *WordBegin = Pos.BufferPtr;
325 SourceLocation Loc = getSourceLocation();
326 bool Error = false;
327 if (!isEnd()) {
328 const char C = peek();
329 if (C == OpenDelim) {
330 WordText.push_back(C);
331 consumeChar();
332 } else
333 Error = true;
334 }
335 char C = '\0';
336 while (!Error && !isEnd()) {
337 C = peek();
338 WordText.push_back(C);
339 consumeChar();
340 if (C == CloseDelim)
341 break;
342 }
343 if (!Error && C != CloseDelim)
344 Error = true;
345
346 if (Error) {
347 Pos = SavedPos;
348 return false;
349 }
350
351 const unsigned Length = WordText.size();
352 char *TextPtr = Allocator.Allocate<char>(Length + 1);
353
354 memcpy(TextPtr, WordText.c_str(), Length + 1);
355 StringRef Text = StringRef(TextPtr, Length);
356
357 formTokenWithChars(Tok, Loc, WordBegin,
358 Pos.BufferPtr - WordBegin, Text);
359 return true;
360 }
361
362 /// Put back tokens that we didn't consume.
putBackLeftoverTokens()363 void putBackLeftoverTokens() {
364 if (isEnd())
365 return;
366
367 bool HavePartialTok = false;
368 Token PartialTok;
369 if (Pos.BufferPtr != Pos.BufferStart) {
370 formTokenWithChars(PartialTok, getSourceLocation(),
371 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
372 StringRef(Pos.BufferPtr,
373 Pos.BufferEnd - Pos.BufferPtr));
374 HavePartialTok = true;
375 Pos.CurToken++;
376 }
377
378 P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
379 Pos.CurToken = Toks.size();
380
381 if (HavePartialTok)
382 P.putBack(PartialTok);
383 }
384 };
385
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)386 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
387 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
388 const CommandTraits &Traits):
389 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
390 Traits(Traits) {
391 consumeToken();
392 }
393
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)394 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
395 TextTokenRetokenizer &Retokenizer) {
396 Token Arg;
397 // Check if argument looks like direction specification: [dir]
398 // e.g., [in], [out], [in,out]
399 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
400 S.actOnParamCommandDirectionArg(PC,
401 Arg.getLocation(),
402 Arg.getEndLocation(),
403 Arg.getText());
404
405 if (Retokenizer.lexWord(Arg))
406 S.actOnParamCommandParamNameArg(PC,
407 Arg.getLocation(),
408 Arg.getEndLocation(),
409 Arg.getText());
410 }
411
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)412 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
413 TextTokenRetokenizer &Retokenizer) {
414 Token Arg;
415 if (Retokenizer.lexWord(Arg))
416 S.actOnTParamCommandParamNameArg(TPC,
417 Arg.getLocation(),
418 Arg.getEndLocation(),
419 Arg.getText());
420 }
421
422 ArrayRef<Comment::Argument>
parseCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)423 Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
424 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
425 Comment::Argument[NumArgs];
426 unsigned ParsedArgs = 0;
427 Token Arg;
428 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
429 Args[ParsedArgs] = Comment::Argument{
430 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
431 ParsedArgs++;
432 }
433
434 return llvm::ArrayRef(Args, ParsedArgs);
435 }
436
437 ArrayRef<Comment::Argument>
parseThrowCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)438 Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
439 unsigned NumArgs) {
440 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
441 Comment::Argument[NumArgs];
442 unsigned ParsedArgs = 0;
443 Token Arg;
444
445 while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
446 Args[ParsedArgs] = Comment::Argument{
447 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
448 ParsedArgs++;
449 }
450
451 return llvm::ArrayRef(Args, ParsedArgs);
452 }
453
454 ArrayRef<Comment::Argument>
parseParCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)455 Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
456 unsigned NumArgs) {
457 assert(NumArgs > 0);
458 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
459 Comment::Argument[NumArgs];
460 unsigned ParsedArgs = 0;
461 Token Arg;
462
463 while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {
464 Args[ParsedArgs] = Comment::Argument{
465 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
466 ParsedArgs++;
467 }
468
469 return llvm::ArrayRef(Args, ParsedArgs);
470 }
471
parseBlockCommand()472 BlockCommandComment *Parser::parseBlockCommand() {
473 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
474
475 ParamCommandComment *PC = nullptr;
476 TParamCommandComment *TPC = nullptr;
477 BlockCommandComment *BC = nullptr;
478 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
479 CommandMarkerKind CommandMarker =
480 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
481 if (Info->IsParamCommand) {
482 PC = S.actOnParamCommandStart(Tok.getLocation(),
483 Tok.getEndLocation(),
484 Tok.getCommandID(),
485 CommandMarker);
486 } else if (Info->IsTParamCommand) {
487 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
488 Tok.getEndLocation(),
489 Tok.getCommandID(),
490 CommandMarker);
491 } else {
492 BC = S.actOnBlockCommandStart(Tok.getLocation(),
493 Tok.getEndLocation(),
494 Tok.getCommandID(),
495 CommandMarker);
496 }
497 consumeToken();
498
499 if (isTokBlockCommand()) {
500 // Block command ahead. We can't nest block commands, so pretend that this
501 // command has an empty argument.
502 ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
503 if (PC) {
504 S.actOnParamCommandFinish(PC, Paragraph);
505 return PC;
506 } else if (TPC) {
507 S.actOnTParamCommandFinish(TPC, Paragraph);
508 return TPC;
509 } else {
510 S.actOnBlockCommandFinish(BC, Paragraph);
511 return BC;
512 }
513 }
514
515 if (PC || TPC || Info->NumArgs > 0) {
516 // In order to parse command arguments we need to retokenize a few
517 // following text tokens.
518 TextTokenRetokenizer Retokenizer(Allocator, *this);
519
520 if (PC)
521 parseParamCommandArgs(PC, Retokenizer);
522 else if (TPC)
523 parseTParamCommandArgs(TPC, Retokenizer);
524 else if (Info->IsThrowsCommand)
525 S.actOnBlockCommandArgs(
526 BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
527 else if (Info->IsParCommand)
528 S.actOnBlockCommandArgs(BC,
529 parseParCommandArgs(Retokenizer, Info->NumArgs));
530 else
531 S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
532
533 Retokenizer.putBackLeftoverTokens();
534 }
535
536 // If there's a block command ahead, we will attach an empty paragraph to
537 // this command.
538 bool EmptyParagraph = false;
539 if (isTokBlockCommand())
540 EmptyParagraph = true;
541 else if (Tok.is(tok::newline)) {
542 Token PrevTok = Tok;
543 consumeToken();
544 EmptyParagraph = isTokBlockCommand();
545 putBack(PrevTok);
546 }
547
548 ParagraphComment *Paragraph;
549 if (EmptyParagraph)
550 Paragraph = S.actOnParagraphComment(std::nullopt);
551 else {
552 BlockContentComment *Block = parseParagraphOrBlockCommand();
553 // Since we have checked for a block command, we should have parsed a
554 // paragraph.
555 Paragraph = cast<ParagraphComment>(Block);
556 }
557
558 if (PC) {
559 S.actOnParamCommandFinish(PC, Paragraph);
560 return PC;
561 } else if (TPC) {
562 S.actOnTParamCommandFinish(TPC, Paragraph);
563 return TPC;
564 } else {
565 S.actOnBlockCommandFinish(BC, Paragraph);
566 return BC;
567 }
568 }
569
parseInlineCommand()570 InlineCommandComment *Parser::parseInlineCommand() {
571 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
572 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
573
574 const Token CommandTok = Tok;
575 consumeToken();
576
577 TextTokenRetokenizer Retokenizer(Allocator, *this);
578 ArrayRef<Comment::Argument> Args =
579 parseCommandArgs(Retokenizer, Info->NumArgs);
580
581 InlineCommandComment *IC = S.actOnInlineCommand(
582 CommandTok.getLocation(), CommandTok.getEndLocation(),
583 CommandTok.getCommandID(), Args);
584
585 if (Args.size() < Info->NumArgs) {
586 Diag(CommandTok.getEndLocation().getLocWithOffset(1),
587 diag::warn_doc_inline_command_not_enough_arguments)
588 << CommandTok.is(tok::at_command) << Info->Name << Args.size()
589 << Info->NumArgs
590 << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
591 }
592
593 Retokenizer.putBackLeftoverTokens();
594
595 return IC;
596 }
597
parseHTMLStartTag()598 HTMLStartTagComment *Parser::parseHTMLStartTag() {
599 assert(Tok.is(tok::html_start_tag));
600 HTMLStartTagComment *HST =
601 S.actOnHTMLStartTagStart(Tok.getLocation(),
602 Tok.getHTMLTagStartName());
603 consumeToken();
604
605 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
606 while (true) {
607 switch (Tok.getKind()) {
608 case tok::html_ident: {
609 Token Ident = Tok;
610 consumeToken();
611 if (Tok.isNot(tok::html_equals)) {
612 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
613 Ident.getHTMLIdent()));
614 continue;
615 }
616 Token Equals = Tok;
617 consumeToken();
618 if (Tok.isNot(tok::html_quoted_string)) {
619 Diag(Tok.getLocation(),
620 diag::warn_doc_html_start_tag_expected_quoted_string)
621 << SourceRange(Equals.getLocation());
622 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
623 Ident.getHTMLIdent()));
624 while (Tok.is(tok::html_equals) ||
625 Tok.is(tok::html_quoted_string))
626 consumeToken();
627 continue;
628 }
629 Attrs.push_back(HTMLStartTagComment::Attribute(
630 Ident.getLocation(),
631 Ident.getHTMLIdent(),
632 Equals.getLocation(),
633 SourceRange(Tok.getLocation(),
634 Tok.getEndLocation()),
635 Tok.getHTMLQuotedString()));
636 consumeToken();
637 continue;
638 }
639
640 case tok::html_greater:
641 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
642 Tok.getLocation(),
643 /* IsSelfClosing = */ false);
644 consumeToken();
645 return HST;
646
647 case tok::html_slash_greater:
648 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
649 Tok.getLocation(),
650 /* IsSelfClosing = */ true);
651 consumeToken();
652 return HST;
653
654 case tok::html_equals:
655 case tok::html_quoted_string:
656 Diag(Tok.getLocation(),
657 diag::warn_doc_html_start_tag_expected_ident_or_greater);
658 while (Tok.is(tok::html_equals) ||
659 Tok.is(tok::html_quoted_string))
660 consumeToken();
661 if (Tok.is(tok::html_ident) ||
662 Tok.is(tok::html_greater) ||
663 Tok.is(tok::html_slash_greater))
664 continue;
665
666 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
667 SourceLocation(),
668 /* IsSelfClosing = */ false);
669 return HST;
670
671 default:
672 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
673 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
674 SourceLocation(),
675 /* IsSelfClosing = */ false);
676 bool StartLineInvalid;
677 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
678 HST->getLocation(),
679 &StartLineInvalid);
680 bool EndLineInvalid;
681 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
682 Tok.getLocation(),
683 &EndLineInvalid);
684 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
685 Diag(Tok.getLocation(),
686 diag::warn_doc_html_start_tag_expected_ident_or_greater)
687 << HST->getSourceRange();
688 else {
689 Diag(Tok.getLocation(),
690 diag::warn_doc_html_start_tag_expected_ident_or_greater);
691 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
692 << HST->getSourceRange();
693 }
694 return HST;
695 }
696 }
697 }
698
parseHTMLEndTag()699 HTMLEndTagComment *Parser::parseHTMLEndTag() {
700 assert(Tok.is(tok::html_end_tag));
701 Token TokEndTag = Tok;
702 consumeToken();
703 SourceLocation Loc;
704 if (Tok.is(tok::html_greater)) {
705 Loc = Tok.getLocation();
706 consumeToken();
707 }
708
709 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
710 Loc,
711 TokEndTag.getHTMLTagEndName());
712 }
713
parseParagraphOrBlockCommand()714 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
715 SmallVector<InlineContentComment *, 8> Content;
716
717 while (true) {
718 switch (Tok.getKind()) {
719 case tok::verbatim_block_begin:
720 case tok::verbatim_line_name:
721 case tok::eof:
722 break; // Block content or EOF ahead, finish this parapgaph.
723
724 case tok::unknown_command:
725 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
726 Tok.getEndLocation(),
727 Tok.getUnknownCommandName()));
728 consumeToken();
729 continue;
730
731 case tok::backslash_command:
732 case tok::at_command: {
733 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
734 if (Info->IsBlockCommand) {
735 if (Content.size() == 0)
736 return parseBlockCommand();
737 break; // Block command ahead, finish this parapgaph.
738 }
739 if (Info->IsVerbatimBlockEndCommand) {
740 Diag(Tok.getLocation(),
741 diag::warn_verbatim_block_end_without_start)
742 << Tok.is(tok::at_command)
743 << Info->Name
744 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
745 consumeToken();
746 continue;
747 }
748 if (Info->IsUnknownCommand) {
749 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
750 Tok.getEndLocation(),
751 Info->getID()));
752 consumeToken();
753 continue;
754 }
755 assert(Info->IsInlineCommand);
756 Content.push_back(parseInlineCommand());
757 continue;
758 }
759
760 case tok::newline: {
761 consumeToken();
762 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
763 consumeToken();
764 break; // Two newlines -- end of paragraph.
765 }
766 // Also allow [tok::newline, tok::text, tok::newline] if the middle
767 // tok::text is just whitespace.
768 if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
769 Token WhitespaceTok = Tok;
770 consumeToken();
771 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
772 consumeToken();
773 break;
774 }
775 // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
776 putBack(WhitespaceTok);
777 }
778 if (Content.size() > 0)
779 Content.back()->addTrailingNewline();
780 continue;
781 }
782
783 // Don't deal with HTML tag soup now.
784 case tok::html_start_tag:
785 Content.push_back(parseHTMLStartTag());
786 continue;
787
788 case tok::html_end_tag:
789 Content.push_back(parseHTMLEndTag());
790 continue;
791
792 case tok::text:
793 Content.push_back(S.actOnText(Tok.getLocation(),
794 Tok.getEndLocation(),
795 Tok.getText()));
796 consumeToken();
797 continue;
798
799 case tok::verbatim_block_line:
800 case tok::verbatim_block_end:
801 case tok::verbatim_line_text:
802 case tok::html_ident:
803 case tok::html_equals:
804 case tok::html_quoted_string:
805 case tok::html_greater:
806 case tok::html_slash_greater:
807 llvm_unreachable("should not see this token");
808 }
809 break;
810 }
811
812 return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
813 }
814
parseVerbatimBlock()815 VerbatimBlockComment *Parser::parseVerbatimBlock() {
816 assert(Tok.is(tok::verbatim_block_begin));
817
818 VerbatimBlockComment *VB =
819 S.actOnVerbatimBlockStart(Tok.getLocation(),
820 Tok.getVerbatimBlockID());
821 consumeToken();
822
823 // Don't create an empty line if verbatim opening command is followed
824 // by a newline.
825 if (Tok.is(tok::newline))
826 consumeToken();
827
828 SmallVector<VerbatimBlockLineComment *, 8> Lines;
829 while (Tok.is(tok::verbatim_block_line) ||
830 Tok.is(tok::newline)) {
831 VerbatimBlockLineComment *Line;
832 if (Tok.is(tok::verbatim_block_line)) {
833 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
834 Tok.getVerbatimBlockText());
835 consumeToken();
836 if (Tok.is(tok::newline)) {
837 consumeToken();
838 }
839 } else {
840 // Empty line, just a tok::newline.
841 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
842 consumeToken();
843 }
844 Lines.push_back(Line);
845 }
846
847 if (Tok.is(tok::verbatim_block_end)) {
848 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
849 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
850 S.copyArray(llvm::ArrayRef(Lines)));
851 consumeToken();
852 } else {
853 // Unterminated \\verbatim block
854 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
855 S.copyArray(llvm::ArrayRef(Lines)));
856 }
857
858 return VB;
859 }
860
parseVerbatimLine()861 VerbatimLineComment *Parser::parseVerbatimLine() {
862 assert(Tok.is(tok::verbatim_line_name));
863
864 Token NameTok = Tok;
865 consumeToken();
866
867 SourceLocation TextBegin;
868 StringRef Text;
869 // Next token might not be a tok::verbatim_line_text if verbatim line
870 // starting command comes just before a newline or comment end.
871 if (Tok.is(tok::verbatim_line_text)) {
872 TextBegin = Tok.getLocation();
873 Text = Tok.getVerbatimLineText();
874 } else {
875 TextBegin = NameTok.getEndLocation();
876 Text = "";
877 }
878
879 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
880 NameTok.getVerbatimLineID(),
881 TextBegin,
882 Text);
883 consumeToken();
884 return VL;
885 }
886
parseBlockContent()887 BlockContentComment *Parser::parseBlockContent() {
888 switch (Tok.getKind()) {
889 case tok::text:
890 case tok::unknown_command:
891 case tok::backslash_command:
892 case tok::at_command:
893 case tok::html_start_tag:
894 case tok::html_end_tag:
895 return parseParagraphOrBlockCommand();
896
897 case tok::verbatim_block_begin:
898 return parseVerbatimBlock();
899
900 case tok::verbatim_line_name:
901 return parseVerbatimLine();
902
903 case tok::eof:
904 case tok::newline:
905 case tok::verbatim_block_line:
906 case tok::verbatim_block_end:
907 case tok::verbatim_line_text:
908 case tok::html_ident:
909 case tok::html_equals:
910 case tok::html_quoted_string:
911 case tok::html_greater:
912 case tok::html_slash_greater:
913 llvm_unreachable("should not see this token");
914 }
915 llvm_unreachable("bogus token kind");
916 }
917
parseFullComment()918 FullComment *Parser::parseFullComment() {
919 // Skip newlines at the beginning of the comment.
920 while (Tok.is(tok::newline))
921 consumeToken();
922
923 SmallVector<BlockContentComment *, 8> Blocks;
924 while (Tok.isNot(tok::eof)) {
925 Blocks.push_back(parseBlockContent());
926
927 // Skip extra newlines after paragraph end.
928 while (Tok.is(tok::newline))
929 consumeToken();
930 }
931 return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
932 }
933
934 } // end namespace comments
935 } // end namespace clang
936