xref: /freebsd/contrib/llvm-project/clang/lib/AST/CommentParser.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/AST/CommentParser.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/AST/CommentDiagnostic.h"
12 #include "clang/AST/CommentSema.h"
13 #include "clang/Basic/CharInfo.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "llvm/Support/ErrorHandling.h"
16 
17 namespace clang {
18 
isWhitespace(llvm::StringRef S)19 static inline bool isWhitespace(llvm::StringRef S) {
20   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21     if (!isWhitespace(*I))
22       return false;
23   }
24   return true;
25 }
26 
27 namespace comments {
28 
29 /// Re-lexes a sequence of tok::text tokens.
30 class TextTokenRetokenizer {
31   llvm::BumpPtrAllocator &Allocator;
32   Parser &P;
33 
34   /// This flag is set when there are no more tokens we can fetch from lexer.
35   bool NoMoreInterestingTokens;
36 
37   /// Token buffer: tokens we have processed and lookahead.
38   SmallVector<Token, 16> Toks;
39 
40   /// A position in \c Toks.
41   struct Position {
42     const char *BufferStart;
43     const char *BufferEnd;
44     const char *BufferPtr;
45     SourceLocation BufferStartLoc;
46     unsigned CurToken;
47   };
48 
49   /// Current position in Toks.
50   Position Pos;
51 
isEnd() const52   bool isEnd() const {
53     return Pos.CurToken >= Toks.size();
54   }
55 
56   /// Sets up the buffer pointers to point to current token.
setupBuffer()57   void setupBuffer() {
58     assert(!isEnd());
59     const Token &Tok = Toks[Pos.CurToken];
60 
61     Pos.BufferStart = Tok.getText().begin();
62     Pos.BufferEnd = Tok.getText().end();
63     Pos.BufferPtr = Pos.BufferStart;
64     Pos.BufferStartLoc = Tok.getLocation();
65   }
66 
getSourceLocation() const67   SourceLocation getSourceLocation() const {
68     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69     return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70   }
71 
peek() const72   char peek() const {
73     assert(!isEnd());
74     assert(Pos.BufferPtr != Pos.BufferEnd);
75     return *Pos.BufferPtr;
76   }
77 
consumeChar()78   void consumeChar() {
79     assert(!isEnd());
80     assert(Pos.BufferPtr != Pos.BufferEnd);
81     Pos.BufferPtr++;
82     if (Pos.BufferPtr == Pos.BufferEnd) {
83       Pos.CurToken++;
84       if (isEnd() && !addToken())
85         return;
86 
87       assert(!isEnd());
88       setupBuffer();
89     }
90   }
91 
92   /// Extract a template type
lexTemplate(SmallString<32> & WordText)93   bool lexTemplate(SmallString<32> &WordText) {
94     unsigned BracketCount = 0;
95     while (!isEnd()) {
96       const char C = peek();
97       WordText.push_back(C);
98       consumeChar();
99       switch (C) {
100       case '<': {
101         BracketCount++;
102         break;
103       }
104       case '>': {
105         BracketCount--;
106         if (!BracketCount)
107           return true;
108         break;
109       }
110       default:
111         break;
112       }
113     }
114     return false;
115   }
116 
117   /// Add a token.
118   /// Returns true on success, false if there are no interesting tokens to
119   /// fetch from lexer.
addToken()120   bool addToken() {
121     if (NoMoreInterestingTokens)
122       return false;
123 
124     if (P.Tok.is(tok::newline)) {
125       // If we see a single newline token between text tokens, skip it.
126       Token Newline = P.Tok;
127       P.consumeToken();
128       if (P.Tok.isNot(tok::text)) {
129         P.putBack(Newline);
130         NoMoreInterestingTokens = true;
131         return false;
132       }
133     }
134     if (P.Tok.isNot(tok::text)) {
135       NoMoreInterestingTokens = true;
136       return false;
137     }
138 
139     Toks.push_back(P.Tok);
140     P.consumeToken();
141     if (Toks.size() == 1)
142       setupBuffer();
143     return true;
144   }
145 
consumeWhitespace()146   void consumeWhitespace() {
147     while (!isEnd()) {
148       if (isWhitespace(peek()))
149         consumeChar();
150       else
151         break;
152     }
153   }
154 
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)155   void formTokenWithChars(Token &Result,
156                           SourceLocation Loc,
157                           const char *TokBegin,
158                           unsigned TokLength,
159                           StringRef Text) {
160     Result.setLocation(Loc);
161     Result.setKind(tok::text);
162     Result.setLength(TokLength);
163 #ifndef NDEBUG
164     Result.TextPtr = "<UNSET>";
165     Result.IntVal = 7;
166 #endif
167     Result.setText(Text);
168   }
169 
170 public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)171   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
172       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
173     Pos.CurToken = 0;
174     addToken();
175   }
176 
177   /// Extract a type argument
lexType(Token & Tok)178   bool lexType(Token &Tok) {
179     if (isEnd())
180       return false;
181 
182     // Save current position in case we need to rollback because the type is
183     // empty.
184     Position SavedPos = Pos;
185 
186     // Consume any leading whitespace.
187     consumeWhitespace();
188     SmallString<32> WordText;
189     const char *WordBegin = Pos.BufferPtr;
190     SourceLocation Loc = getSourceLocation();
191 
192     while (!isEnd()) {
193       const char C = peek();
194       // For non-whitespace characters we check if it's a template or otherwise
195       // continue reading the text into a word.
196       if (!isWhitespace(C)) {
197         if (C == '<') {
198           if (!lexTemplate(WordText))
199             return false;
200         } else {
201           WordText.push_back(C);
202           consumeChar();
203         }
204       } else {
205         consumeChar();
206         break;
207       }
208     }
209 
210     const unsigned Length = WordText.size();
211     if (Length == 0) {
212       Pos = SavedPos;
213       return false;
214     }
215 
216     char *TextPtr = Allocator.Allocate<char>(Length + 1);
217 
218     memcpy(TextPtr, WordText.c_str(), Length + 1);
219     StringRef Text = StringRef(TextPtr, Length);
220 
221     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
222     return true;
223   }
224 
225   // Check if this line starts with @par or \par
startsWithParCommand()226   bool startsWithParCommand() {
227     unsigned Offset = 1;
228 
229     // Skip all whitespace characters at the beginning.
230     // This needs to backtrack because Pos has already advanced past the
231     // actual \par or @par command by the time this function is called.
232     while (isWhitespace(*(Pos.BufferPtr - Offset)))
233       Offset++;
234 
235     // Once we've reached the whitespace, backtrack and check if the previous
236     // four characters are \par or @par.
237     llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
238     return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
239   }
240 
241   /// Extract a par command argument-header.
lexParHeading(Token & Tok)242   bool lexParHeading(Token &Tok) {
243     if (isEnd())
244       return false;
245 
246     Position SavedPos = Pos;
247 
248     consumeWhitespace();
249     SmallString<32> WordText;
250     const char *WordBegin = Pos.BufferPtr;
251     SourceLocation Loc = getSourceLocation();
252 
253     if (!startsWithParCommand())
254       return false;
255 
256     // Read until the end of this token, which is effectively the end of the
257     // line. This gets us the content of the par header, if there is one.
258     while (!isEnd()) {
259       WordText.push_back(peek());
260       if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
261         consumeChar();
262         break;
263       }
264       consumeChar();
265     }
266 
267     unsigned Length = WordText.size();
268     if (Length == 0) {
269       Pos = SavedPos;
270       return false;
271     }
272 
273     char *TextPtr = Allocator.Allocate<char>(Length + 1);
274 
275     memcpy(TextPtr, WordText.c_str(), Length + 1);
276     StringRef Text = StringRef(TextPtr, Length);
277 
278     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
279     return true;
280   }
281 
282   /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)283   bool lexWord(Token &Tok) {
284     if (isEnd())
285       return false;
286 
287     Position SavedPos = Pos;
288 
289     consumeWhitespace();
290     SmallString<32> WordText;
291     const char *WordBegin = Pos.BufferPtr;
292     SourceLocation Loc = getSourceLocation();
293     while (!isEnd()) {
294       const char C = peek();
295       if (!isWhitespace(C)) {
296         WordText.push_back(C);
297         consumeChar();
298       } else
299         break;
300     }
301     const unsigned Length = WordText.size();
302     if (Length == 0) {
303       Pos = SavedPos;
304       return false;
305     }
306 
307     char *TextPtr = Allocator.Allocate<char>(Length + 1);
308 
309     memcpy(TextPtr, WordText.c_str(), Length + 1);
310     StringRef Text = StringRef(TextPtr, Length);
311 
312     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
313     return true;
314   }
315 
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)316   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
317     if (isEnd())
318       return false;
319 
320     Position SavedPos = Pos;
321 
322     consumeWhitespace();
323     SmallString<32> WordText;
324     const char *WordBegin = Pos.BufferPtr;
325     SourceLocation Loc = getSourceLocation();
326     bool Error = false;
327     if (!isEnd()) {
328       const char C = peek();
329       if (C == OpenDelim) {
330         WordText.push_back(C);
331         consumeChar();
332       } else
333         Error = true;
334     }
335     char C = '\0';
336     while (!Error && !isEnd()) {
337       C = peek();
338       WordText.push_back(C);
339       consumeChar();
340       if (C == CloseDelim)
341         break;
342     }
343     if (!Error && C != CloseDelim)
344       Error = true;
345 
346     if (Error) {
347       Pos = SavedPos;
348       return false;
349     }
350 
351     const unsigned Length = WordText.size();
352     char *TextPtr = Allocator.Allocate<char>(Length + 1);
353 
354     memcpy(TextPtr, WordText.c_str(), Length + 1);
355     StringRef Text = StringRef(TextPtr, Length);
356 
357     formTokenWithChars(Tok, Loc, WordBegin,
358                        Pos.BufferPtr - WordBegin, Text);
359     return true;
360   }
361 
362   /// Put back tokens that we didn't consume.
putBackLeftoverTokens()363   void putBackLeftoverTokens() {
364     if (isEnd())
365       return;
366 
367     bool HavePartialTok = false;
368     Token PartialTok;
369     if (Pos.BufferPtr != Pos.BufferStart) {
370       formTokenWithChars(PartialTok, getSourceLocation(),
371                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
372                          StringRef(Pos.BufferPtr,
373                                    Pos.BufferEnd - Pos.BufferPtr));
374       HavePartialTok = true;
375       Pos.CurToken++;
376     }
377 
378     P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
379     Pos.CurToken = Toks.size();
380 
381     if (HavePartialTok)
382       P.putBack(PartialTok);
383   }
384 };
385 
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)386 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
387                const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
388                const CommandTraits &Traits):
389     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
390     Traits(Traits) {
391   consumeToken();
392 }
393 
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)394 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
395                                    TextTokenRetokenizer &Retokenizer) {
396   Token Arg;
397   // Check if argument looks like direction specification: [dir]
398   // e.g., [in], [out], [in,out]
399   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
400     S.actOnParamCommandDirectionArg(PC,
401                                     Arg.getLocation(),
402                                     Arg.getEndLocation(),
403                                     Arg.getText());
404 
405   if (Retokenizer.lexWord(Arg))
406     S.actOnParamCommandParamNameArg(PC,
407                                     Arg.getLocation(),
408                                     Arg.getEndLocation(),
409                                     Arg.getText());
410 }
411 
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)412 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
413                                     TextTokenRetokenizer &Retokenizer) {
414   Token Arg;
415   if (Retokenizer.lexWord(Arg))
416     S.actOnTParamCommandParamNameArg(TPC,
417                                      Arg.getLocation(),
418                                      Arg.getEndLocation(),
419                                      Arg.getText());
420 }
421 
422 ArrayRef<Comment::Argument>
parseCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)423 Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
424   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
425       Comment::Argument[NumArgs];
426   unsigned ParsedArgs = 0;
427   Token Arg;
428   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
429     Args[ParsedArgs] = Comment::Argument{
430         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
431     ParsedArgs++;
432   }
433 
434   return llvm::ArrayRef(Args, ParsedArgs);
435 }
436 
437 ArrayRef<Comment::Argument>
parseThrowCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)438 Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
439                               unsigned NumArgs) {
440   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
441       Comment::Argument[NumArgs];
442   unsigned ParsedArgs = 0;
443   Token Arg;
444 
445   while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
446     Args[ParsedArgs] = Comment::Argument{
447         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
448     ParsedArgs++;
449   }
450 
451   return llvm::ArrayRef(Args, ParsedArgs);
452 }
453 
454 ArrayRef<Comment::Argument>
parseParCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)455 Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
456                             unsigned NumArgs) {
457   assert(NumArgs > 0);
458   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
459       Comment::Argument[NumArgs];
460   unsigned ParsedArgs = 0;
461   Token Arg;
462 
463   while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {
464     Args[ParsedArgs] = Comment::Argument{
465         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
466     ParsedArgs++;
467   }
468 
469   return llvm::ArrayRef(Args, ParsedArgs);
470 }
471 
parseBlockCommand()472 BlockCommandComment *Parser::parseBlockCommand() {
473   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
474 
475   ParamCommandComment *PC = nullptr;
476   TParamCommandComment *TPC = nullptr;
477   BlockCommandComment *BC = nullptr;
478   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
479   CommandMarkerKind CommandMarker =
480       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
481   if (Info->IsParamCommand) {
482     PC = S.actOnParamCommandStart(Tok.getLocation(),
483                                   Tok.getEndLocation(),
484                                   Tok.getCommandID(),
485                                   CommandMarker);
486   } else if (Info->IsTParamCommand) {
487     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
488                                     Tok.getEndLocation(),
489                                     Tok.getCommandID(),
490                                     CommandMarker);
491   } else {
492     BC = S.actOnBlockCommandStart(Tok.getLocation(),
493                                   Tok.getEndLocation(),
494                                   Tok.getCommandID(),
495                                   CommandMarker);
496   }
497   consumeToken();
498 
499   if (isTokBlockCommand()) {
500     // Block command ahead.  We can't nest block commands, so pretend that this
501     // command has an empty argument.
502     ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
503     if (PC) {
504       S.actOnParamCommandFinish(PC, Paragraph);
505       return PC;
506     } else if (TPC) {
507       S.actOnTParamCommandFinish(TPC, Paragraph);
508       return TPC;
509     } else {
510       S.actOnBlockCommandFinish(BC, Paragraph);
511       return BC;
512     }
513   }
514 
515   if (PC || TPC || Info->NumArgs > 0) {
516     // In order to parse command arguments we need to retokenize a few
517     // following text tokens.
518     TextTokenRetokenizer Retokenizer(Allocator, *this);
519 
520     if (PC)
521       parseParamCommandArgs(PC, Retokenizer);
522     else if (TPC)
523       parseTParamCommandArgs(TPC, Retokenizer);
524     else if (Info->IsThrowsCommand)
525       S.actOnBlockCommandArgs(
526           BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
527     else if (Info->IsParCommand)
528       S.actOnBlockCommandArgs(BC,
529                               parseParCommandArgs(Retokenizer, Info->NumArgs));
530     else
531       S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
532 
533     Retokenizer.putBackLeftoverTokens();
534   }
535 
536   // If there's a block command ahead, we will attach an empty paragraph to
537   // this command.
538   bool EmptyParagraph = false;
539   if (isTokBlockCommand())
540     EmptyParagraph = true;
541   else if (Tok.is(tok::newline)) {
542     Token PrevTok = Tok;
543     consumeToken();
544     EmptyParagraph = isTokBlockCommand();
545     putBack(PrevTok);
546   }
547 
548   ParagraphComment *Paragraph;
549   if (EmptyParagraph)
550     Paragraph = S.actOnParagraphComment(std::nullopt);
551   else {
552     BlockContentComment *Block = parseParagraphOrBlockCommand();
553     // Since we have checked for a block command, we should have parsed a
554     // paragraph.
555     Paragraph = cast<ParagraphComment>(Block);
556   }
557 
558   if (PC) {
559     S.actOnParamCommandFinish(PC, Paragraph);
560     return PC;
561   } else if (TPC) {
562     S.actOnTParamCommandFinish(TPC, Paragraph);
563     return TPC;
564   } else {
565     S.actOnBlockCommandFinish(BC, Paragraph);
566     return BC;
567   }
568 }
569 
parseInlineCommand()570 InlineCommandComment *Parser::parseInlineCommand() {
571   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
572   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
573 
574   const Token CommandTok = Tok;
575   consumeToken();
576 
577   TextTokenRetokenizer Retokenizer(Allocator, *this);
578   ArrayRef<Comment::Argument> Args =
579       parseCommandArgs(Retokenizer, Info->NumArgs);
580 
581   InlineCommandComment *IC = S.actOnInlineCommand(
582       CommandTok.getLocation(), CommandTok.getEndLocation(),
583       CommandTok.getCommandID(), Args);
584 
585   if (Args.size() < Info->NumArgs) {
586     Diag(CommandTok.getEndLocation().getLocWithOffset(1),
587          diag::warn_doc_inline_command_not_enough_arguments)
588         << CommandTok.is(tok::at_command) << Info->Name << Args.size()
589         << Info->NumArgs
590         << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
591   }
592 
593   Retokenizer.putBackLeftoverTokens();
594 
595   return IC;
596 }
597 
parseHTMLStartTag()598 HTMLStartTagComment *Parser::parseHTMLStartTag() {
599   assert(Tok.is(tok::html_start_tag));
600   HTMLStartTagComment *HST =
601       S.actOnHTMLStartTagStart(Tok.getLocation(),
602                                Tok.getHTMLTagStartName());
603   consumeToken();
604 
605   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
606   while (true) {
607     switch (Tok.getKind()) {
608     case tok::html_ident: {
609       Token Ident = Tok;
610       consumeToken();
611       if (Tok.isNot(tok::html_equals)) {
612         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
613                                                        Ident.getHTMLIdent()));
614         continue;
615       }
616       Token Equals = Tok;
617       consumeToken();
618       if (Tok.isNot(tok::html_quoted_string)) {
619         Diag(Tok.getLocation(),
620              diag::warn_doc_html_start_tag_expected_quoted_string)
621           << SourceRange(Equals.getLocation());
622         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
623                                                        Ident.getHTMLIdent()));
624         while (Tok.is(tok::html_equals) ||
625                Tok.is(tok::html_quoted_string))
626           consumeToken();
627         continue;
628       }
629       Attrs.push_back(HTMLStartTagComment::Attribute(
630                               Ident.getLocation(),
631                               Ident.getHTMLIdent(),
632                               Equals.getLocation(),
633                               SourceRange(Tok.getLocation(),
634                                           Tok.getEndLocation()),
635                               Tok.getHTMLQuotedString()));
636       consumeToken();
637       continue;
638     }
639 
640     case tok::html_greater:
641       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
642                                 Tok.getLocation(),
643                                 /* IsSelfClosing = */ false);
644       consumeToken();
645       return HST;
646 
647     case tok::html_slash_greater:
648       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
649                                 Tok.getLocation(),
650                                 /* IsSelfClosing = */ true);
651       consumeToken();
652       return HST;
653 
654     case tok::html_equals:
655     case tok::html_quoted_string:
656       Diag(Tok.getLocation(),
657            diag::warn_doc_html_start_tag_expected_ident_or_greater);
658       while (Tok.is(tok::html_equals) ||
659              Tok.is(tok::html_quoted_string))
660         consumeToken();
661       if (Tok.is(tok::html_ident) ||
662           Tok.is(tok::html_greater) ||
663           Tok.is(tok::html_slash_greater))
664         continue;
665 
666       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
667                                 SourceLocation(),
668                                 /* IsSelfClosing = */ false);
669       return HST;
670 
671     default:
672       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
673       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
674                                 SourceLocation(),
675                                 /* IsSelfClosing = */ false);
676       bool StartLineInvalid;
677       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
678                                                   HST->getLocation(),
679                                                   &StartLineInvalid);
680       bool EndLineInvalid;
681       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
682                                                   Tok.getLocation(),
683                                                   &EndLineInvalid);
684       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
685         Diag(Tok.getLocation(),
686              diag::warn_doc_html_start_tag_expected_ident_or_greater)
687           << HST->getSourceRange();
688       else {
689         Diag(Tok.getLocation(),
690              diag::warn_doc_html_start_tag_expected_ident_or_greater);
691         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
692           << HST->getSourceRange();
693       }
694       return HST;
695     }
696   }
697 }
698 
parseHTMLEndTag()699 HTMLEndTagComment *Parser::parseHTMLEndTag() {
700   assert(Tok.is(tok::html_end_tag));
701   Token TokEndTag = Tok;
702   consumeToken();
703   SourceLocation Loc;
704   if (Tok.is(tok::html_greater)) {
705     Loc = Tok.getLocation();
706     consumeToken();
707   }
708 
709   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
710                            Loc,
711                            TokEndTag.getHTMLTagEndName());
712 }
713 
parseParagraphOrBlockCommand()714 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
715   SmallVector<InlineContentComment *, 8> Content;
716 
717   while (true) {
718     switch (Tok.getKind()) {
719     case tok::verbatim_block_begin:
720     case tok::verbatim_line_name:
721     case tok::eof:
722       break; // Block content or EOF ahead, finish this parapgaph.
723 
724     case tok::unknown_command:
725       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
726                                               Tok.getEndLocation(),
727                                               Tok.getUnknownCommandName()));
728       consumeToken();
729       continue;
730 
731     case tok::backslash_command:
732     case tok::at_command: {
733       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
734       if (Info->IsBlockCommand) {
735         if (Content.size() == 0)
736           return parseBlockCommand();
737         break; // Block command ahead, finish this parapgaph.
738       }
739       if (Info->IsVerbatimBlockEndCommand) {
740         Diag(Tok.getLocation(),
741              diag::warn_verbatim_block_end_without_start)
742           << Tok.is(tok::at_command)
743           << Info->Name
744           << SourceRange(Tok.getLocation(), Tok.getEndLocation());
745         consumeToken();
746         continue;
747       }
748       if (Info->IsUnknownCommand) {
749         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
750                                                 Tok.getEndLocation(),
751                                                 Info->getID()));
752         consumeToken();
753         continue;
754       }
755       assert(Info->IsInlineCommand);
756       Content.push_back(parseInlineCommand());
757       continue;
758     }
759 
760     case tok::newline: {
761       consumeToken();
762       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
763         consumeToken();
764         break; // Two newlines -- end of paragraph.
765       }
766       // Also allow [tok::newline, tok::text, tok::newline] if the middle
767       // tok::text is just whitespace.
768       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
769         Token WhitespaceTok = Tok;
770         consumeToken();
771         if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
772           consumeToken();
773           break;
774         }
775         // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
776         putBack(WhitespaceTok);
777       }
778       if (Content.size() > 0)
779         Content.back()->addTrailingNewline();
780       continue;
781     }
782 
783     // Don't deal with HTML tag soup now.
784     case tok::html_start_tag:
785       Content.push_back(parseHTMLStartTag());
786       continue;
787 
788     case tok::html_end_tag:
789       Content.push_back(parseHTMLEndTag());
790       continue;
791 
792     case tok::text:
793       Content.push_back(S.actOnText(Tok.getLocation(),
794                                     Tok.getEndLocation(),
795                                     Tok.getText()));
796       consumeToken();
797       continue;
798 
799     case tok::verbatim_block_line:
800     case tok::verbatim_block_end:
801     case tok::verbatim_line_text:
802     case tok::html_ident:
803     case tok::html_equals:
804     case tok::html_quoted_string:
805     case tok::html_greater:
806     case tok::html_slash_greater:
807       llvm_unreachable("should not see this token");
808     }
809     break;
810   }
811 
812   return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
813 }
814 
parseVerbatimBlock()815 VerbatimBlockComment *Parser::parseVerbatimBlock() {
816   assert(Tok.is(tok::verbatim_block_begin));
817 
818   VerbatimBlockComment *VB =
819       S.actOnVerbatimBlockStart(Tok.getLocation(),
820                                 Tok.getVerbatimBlockID());
821   consumeToken();
822 
823   // Don't create an empty line if verbatim opening command is followed
824   // by a newline.
825   if (Tok.is(tok::newline))
826     consumeToken();
827 
828   SmallVector<VerbatimBlockLineComment *, 8> Lines;
829   while (Tok.is(tok::verbatim_block_line) ||
830          Tok.is(tok::newline)) {
831     VerbatimBlockLineComment *Line;
832     if (Tok.is(tok::verbatim_block_line)) {
833       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
834                                       Tok.getVerbatimBlockText());
835       consumeToken();
836       if (Tok.is(tok::newline)) {
837         consumeToken();
838       }
839     } else {
840       // Empty line, just a tok::newline.
841       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
842       consumeToken();
843     }
844     Lines.push_back(Line);
845   }
846 
847   if (Tok.is(tok::verbatim_block_end)) {
848     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
849     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
850                                S.copyArray(llvm::ArrayRef(Lines)));
851     consumeToken();
852   } else {
853     // Unterminated \\verbatim block
854     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
855                                S.copyArray(llvm::ArrayRef(Lines)));
856   }
857 
858   return VB;
859 }
860 
parseVerbatimLine()861 VerbatimLineComment *Parser::parseVerbatimLine() {
862   assert(Tok.is(tok::verbatim_line_name));
863 
864   Token NameTok = Tok;
865   consumeToken();
866 
867   SourceLocation TextBegin;
868   StringRef Text;
869   // Next token might not be a tok::verbatim_line_text if verbatim line
870   // starting command comes just before a newline or comment end.
871   if (Tok.is(tok::verbatim_line_text)) {
872     TextBegin = Tok.getLocation();
873     Text = Tok.getVerbatimLineText();
874   } else {
875     TextBegin = NameTok.getEndLocation();
876     Text = "";
877   }
878 
879   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
880                                                 NameTok.getVerbatimLineID(),
881                                                 TextBegin,
882                                                 Text);
883   consumeToken();
884   return VL;
885 }
886 
parseBlockContent()887 BlockContentComment *Parser::parseBlockContent() {
888   switch (Tok.getKind()) {
889   case tok::text:
890   case tok::unknown_command:
891   case tok::backslash_command:
892   case tok::at_command:
893   case tok::html_start_tag:
894   case tok::html_end_tag:
895     return parseParagraphOrBlockCommand();
896 
897   case tok::verbatim_block_begin:
898     return parseVerbatimBlock();
899 
900   case tok::verbatim_line_name:
901     return parseVerbatimLine();
902 
903   case tok::eof:
904   case tok::newline:
905   case tok::verbatim_block_line:
906   case tok::verbatim_block_end:
907   case tok::verbatim_line_text:
908   case tok::html_ident:
909   case tok::html_equals:
910   case tok::html_quoted_string:
911   case tok::html_greater:
912   case tok::html_slash_greater:
913     llvm_unreachable("should not see this token");
914   }
915   llvm_unreachable("bogus token kind");
916 }
917 
parseFullComment()918 FullComment *Parser::parseFullComment() {
919   // Skip newlines at the beginning of the comment.
920   while (Tok.is(tok::newline))
921     consumeToken();
922 
923   SmallVector<BlockContentComment *, 8> Blocks;
924   while (Tok.isNot(tok::eof)) {
925     Blocks.push_back(parseBlockContent());
926 
927     // Skip extra newlines after paragraph end.
928     while (Tok.is(tok::newline))
929       consumeToken();
930   }
931   return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
932 }
933 
934 } // end namespace comments
935 } // end namespace clang
936