Lines Matching +full:non +full:- +full:comment

1 //===- Lexer.cpp - C Language Family Lexer --------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
56 //===----------------------------------------------------------------------===//
58 //===----------------------------------------------------------------------===//
60 /// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
65 return II->getObjCKeywordID() == objcKey; in isObjCAtKeyword()
69 /// getObjCKeywordID - Return the ObjC keyword kind.
74 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; in getObjCKeywordID()
77 /// Determine whether the token kind starts a simple-type-specifier.
115 return getIdentifierInfo()->isKeyword(LangOpts); in isSimpleTypeSpecifier()
122 //===----------------------------------------------------------------------===//
124 //===----------------------------------------------------------------------===//
138 // Check whether we have a BOM in the beginning of the buffer. If yes - act in InitLexer()
139 // accordingly. Right now we support only UTF-8 with and without BOM, so, just in InitLexer()
140 // skip the UTF-8 BOM if it's present. in InitLexer()
143 StringRef Buf(BufferStart, BufferEnd - BufferStart); in InitLexer()
145 .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM in InitLexer()
180 /// Lexer constructor - Create a new lexer object for the specified buffer
196 /// Lexer constructor - Create a new raw lexer object. This object is only
210 /// Lexer constructor - Create a new raw lexer object. This object is only
225 SetCommentRetentionState(PP->getCommentRetentionState()); in resetExtendedTokenMode()
228 /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
259 L->BufferPtr = StrData; in Create_PragmaLexer()
260 L->BufferEnd = StrData+TokLen; in Create_PragmaLexer()
261 assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!"); in Create_PragmaLexer()
265 L->FileLoc = SM.createExpansionLoc(SM.getLocForStartOfFile(SpellingFID), in Create_PragmaLexer()
271 L->ParsingPreprocessorDirective = true; in Create_PragmaLexer()
274 L->Is_PragmaLexer = true; in Create_PragmaLexer()
279 this->IsAtPhysicalStartOfLine = IsAtStartOfLine; in seek()
280 this->IsAtStartOfLine = IsAtStartOfLine; in seek()
294 if ((i < e - 1) && (Str[i + 1] == '\n' || Str[i + 1] == '\r') && in StringifyImpl()
319 //===----------------------------------------------------------------------===//
321 //===----------------------------------------------------------------------===//
333 // Munch the encoding-prefix and opening double-quote. in getSpellingSlow()
339 if (Spelling[Length - 1] == '"') in getSpellingSlow()
344 // splicing do not occur within their d-char-sequence nor within their in getSpellingSlow()
345 // r-char-sequence. in getSpellingSlow()
347 Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') { in getSpellingSlow()
351 do --RawEnd; while (*RawEnd != '"'); in getSpellingSlow()
352 size_t RawLength = RawEnd - BufPtr + 1; in getSpellingSlow()
374 /// getSpelling() - Return the 'spelling' of this token. The spelling of a
376 /// after trigraph expansion and escaped-newline folding. In particular, this
415 /// getSpelling() - Return the 'spelling' of this token. The spelling of a
417 /// after trigraph expansion and escaped-newline folding. In particular, this
442 /// getSpelling - This method is used to get the spelling of a token into a
464 Buffer = II->getNameStart(); in getSpelling()
465 return II->getLength(); in getSpelling()
495 /// MeasureTokenLength - Relex the token at the specified location and return
516 // all obviously single-char tokens. This could use in getRawToken()
550 for (; LexStart != BufStart; --LexStart) { in findBeginningOfLine()
582 SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); in getBeginningOfFileToken()
596 if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) in getBeginningOfFileToken()
625 return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); in GetBeginningOfToken()
666 MaxLineOffset = CurPtr - Buffer.begin(); in ComputePreamble()
690 unsigned TokOffset = TheTok.getLocation().getRawEncoding() - StartOffset; in ComputePreamble()
699 if (TheTok.getKind() == tok::comment) { in ComputePreamble()
766 } while (TheTok.getKind() == tok::comment); in ComputePreamble()
783 End = ActiveCommentLoc; // don't truncate a decl comment. in ComputePreamble()
787 return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(), in ComputePreamble()
813 --CharNo; in getTokenPrefixLength()
819 for (; CharNo; --CharNo) { in getTokenPrefixLength()
830 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; in getTokenPrefixLength()
863 Len = Len - Offset; in getLocForEndOfToken()
1057 return file.substr(beginInfo.second, EndOffs - beginInfo.second); in getSourceText()
1069 const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); in getImmediateMacroName()
1094 // Find the spelling location of the start of the non-argument expansion in getImmediateMacroName()
1121 // Find the spelling location of the start of the non-argument expansion in getImmediateMacroNameForDiagnostics()
1140 if (Str - 1 < BufferStart) in isNewLineEscaped()
1143 if ((Str[0] == '\n' && Str[-1] == '\r') || in isNewLineEscaped()
1144 (Str[0] == '\r' && Str[-1] == '\n')) { in isNewLineEscaped()
1145 if (Str - 2 < BufferStart) in isNewLineEscaped()
1147 --Str; in isNewLineEscaped()
1149 --Str; in isNewLineEscaped()
1151 // Rewind to first non-space character: in isNewLineEscaped()
1153 --Str; in isNewLineEscaped()
1172 StringRef Rest = Buffer.substr(Line - Buffer.data()); in getIndentationForLine()
1179 //===----------------------------------------------------------------------===//
1181 //===----------------------------------------------------------------------===//
1183 /// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
1211 /// getSourceLocation - Return a source location identifier for the specified
1220 unsigned CharNo = Loc-BufferStart; in getSourceLocation()
1230 /// Diag - Forwarding function for diagnostics. This translate a source
1233 return PP->Diag(getSourceLocation(Loc), DiagID); in Diag()
1236 //===----------------------------------------------------------------------===//
1238 //===----------------------------------------------------------------------===//
1240 /// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
1253 case '-': return '~'; in GetTrigraphCharForLetter()
1257 /// DecodeTrigraphChar - If the specified character is a legal trigraph when
1267 if (L && !L->isLexingRawMode()) in DecodeTrigraphChar()
1268 L->Diag(CP-2, diag::trigraph_ignored); in DecodeTrigraphChar()
1272 if (L && !L->isLexingRawMode()) in DecodeTrigraphChar()
1273 L->Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1); in DecodeTrigraphChar()
1277 /// getEscapedNewLineSize - Return the size of the specified escaped newline,
1278 /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a
1285 if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r') in getEscapedNewLineSize()
1290 Ptr[Size-1] != Ptr[Size]) in getEscapedNewLineSize()
1300 /// SkipEscapedNewLines - If P points to an escaped newline (or a series of
1301 /// them), skip over them and return the first non-escaped-newline found,
1362 if (!Tok || Tok->isNot(TKind)) in findLocationAfterToken()
1364 SourceLocation TokenLoc = Tok->getLocation(); in findLocationAfterToken()
1369 const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength(); in findLocationAfterToken()
1386 return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars); in findLocationAfterToken()
1389 /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
1411 // Common case, backslash-char where the char is not whitespace. in getCharAndSizeSlow()
1419 if (Tok) Tok->setFlag(Token::NeedsCleaning); in getCharAndSizeSlow()
1446 if (Tok) Tok->setFlag(Token::NeedsCleaning); in getCharAndSizeSlow()
1459 /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
1474 // Common case, backslash-char where the char is not whitespace. in getCharAndSizeSlowNoWarn()
1510 //===----------------------------------------------------------------------===//
1512 //===----------------------------------------------------------------------===//
1539 // To mitigate https://github.com/llvm/llvm-project/issues/54732,
1544 // https://www.unicode.org/L2/L2022/22230-math-profile.pdf
1566 // A non-leading codepoint must have the XID_Continue property. in isAllowedIDChar()
1660 /// After encountering UTF-8 character C and interpreting it as an identifier
1661 /// character, check whether it's a homoglyph for a common non-identifier
1677 {U'\u200c', 0}, // ZERO WIDTH NON-JOINER in maybeDiagnoseUTF8Homoglyph()
1684 {U'\u2212', '-'}, // MINUS SIGN in maybeDiagnoseUTF8Homoglyph()
1693 {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE in maybeDiagnoseUTF8Homoglyph()
1704 {U'\uff0d', '-'}, // FULLWIDTH HYPHEN-MINUS in maybeDiagnoseUTF8Homoglyph()
1709 {U'\uff1c', '<'}, // FULLWIDTH LESS-THAN SIGN in maybeDiagnoseUTF8Homoglyph()
1711 {U'\uff1e', '>'}, // FULLWIDTH GREATER-THAN SIGN in maybeDiagnoseUTF8Homoglyph()
1726 std::end(SortedHomoglyphs) - 1, HomoglyphPair{C, '\0'}); in maybeDiagnoseUTF8Homoglyph()
1727 if (Homoglyph->Character == C) { in maybeDiagnoseUTF8Homoglyph()
1728 if (Homoglyph->LooksLike) { in maybeDiagnoseUTF8Homoglyph()
1729 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; in maybeDiagnoseUTF8Homoglyph()
1778 !PP->isPreprocessedOutput()) in tryConsumeIdentifierUCN()
1780 PP->getDiagnostics(), LangOpts, CodePoint, in tryConsumeIdentifierUCN()
1789 diagnoseExtensionInIdentifier(PP->getDiagnostics(), CodePoint, in tryConsumeIdentifierUCN()
1792 maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, in tryConsumeIdentifierUCN()
1798 if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || in tryConsumeIdentifierUCN()
1799 (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) in tryConsumeIdentifierUCN()
1810 // If a UTF-8 codepoint appears immediately after an escaped new line, in tryConsumeIdentifierUTF8Char()
1815 const char *CharStart = CurPtr + FirstCodeUnitSize - 1; in tryConsumeIdentifierUTF8Char()
1831 !PP->isPreprocessedOutput()) in tryConsumeIdentifierUTF8Char()
1833 PP->getDiagnostics(), LangOpts, CodePoint, in tryConsumeIdentifierUTF8Char()
1841 PP->getDiagnostics(), CodePoint, in tryConsumeIdentifierUTF8Char()
1843 maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, in tryConsumeIdentifierUTF8Char()
1846 maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, in tryConsumeIdentifierUTF8Char()
1850 // Once we sucessfully parsed some UTF-8, in tryConsumeIdentifierUTF8Char()
1863 !PP->isPreprocessedOutput()) { in LexUnicodeIdentifierStart()
1865 diagnoseExtensionInIdentifier(PP->getDiagnostics(), C, in LexUnicodeIdentifierStart()
1867 maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, in LexUnicodeIdentifierStart()
1870 maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C, in LexUnicodeIdentifierStart()
1879 !PP->isPreprocessedOutput() && !isASCII(*BufferPtr) && in LexUnicodeIdentifierStart()
1881 // Non-ASCII characters tend to creep into source code unintentionally. in LexUnicodeIdentifierStart()
1884 // Note that we can /only/ do this when the non-ASCII character is actually in LexUnicodeIdentifierStart()
1891 PP->getDiagnostics(), LangOpts, C, in LexUnicodeIdentifierStart()
1916 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) { in fastParseASCIIIdentifier()
1936 // Match [_A-Za-z0-9]*, we have already matched an identifier start. in LexIdentifierContinue()
1978 const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); in LexIdentifierContinue()
1979 // Note that we have to call PP->LookUpIdentifierInfo() even for code in LexIdentifierContinue()
1986 // Return the code-completion token. in LexIdentifierContinue()
1988 // Skip the code-completion char and all immediate identifier characters. in LexIdentifierContinue()
1991 // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code in LexIdentifierContinue()
2008 if (II->isHandleIdentifierCase()) in LexIdentifierContinue()
2009 return PP->HandleIdentifier(Result); in LexIdentifierContinue()
2014 /// isHexaLiteral - Return true if Start points to a hex constant.
2028 /// LexNumericConstant - Lex the remainder of a integer or floating point
2029 /// constant. From[-1] is the first character lexed. Return the end of the
2039 CurPtr -= Size; in LexNumericConstant()
2046 if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) { in LexNumericConstant()
2054 if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) { in LexNumericConstant()
2056 // not-quite-conforming extension. Only do so if this looks like it's in LexNumericConstant()
2057 // actually meant to be a hexfloat, and not if it has a ud-suffix. in LexNumericConstant()
2084 // If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue. in LexNumericConstant()
2097 /// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
2098 /// in C++11, or warn on a ud-suffix in C++98.
2126 // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix in LexUDSuffix()
2127 // that does not start with an underscore is ill-formed. As a conforming in LexUDSuffix()
2129 // them. We assume a suffix beginning with a UCN or UTF-8 character is more in LexUDSuffix()
2130 // likely to be a ud-suffix than a macro, however, and accept that. in LexUDSuffix()
2189 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
2212 (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. in LexStringLiteral()
2215 FormTokenWithChars(Result, CurPtr-1, tok::unknown); in LexStringLiteral()
2220 if (isCodeCompletionPoint(CurPtr-1)) { in LexStringLiteral()
2222 codeCompleteIncludedFile(AfterQuote, CurPtr - 1, /*IsAngled=*/false); in LexStringLiteral()
2224 PP->CodeCompleteNaturalLanguage(); in LexStringLiteral()
2225 FormTokenWithChars(Result, CurPtr - 1, tok::unknown); in LexStringLiteral()
2230 NulCharacter = CurPtr-1; in LexStringLiteral()
2235 // If we are in C++11, lex the optional ud-suffix. in LexStringLiteral()
2250 /// LexRawStringLiteral - Lex the remainder of a raw string literal, after
2257 // universal-character-names, and line splicing) are reverted. in LexRawStringLiteral()
2298 if (C == 0 && CurPtr-1 == BufferEnd) { in LexRawStringLiteral()
2299 --CurPtr; in LexRawStringLiteral()
2321 } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file. in LexRawStringLiteral()
2325 FormTokenWithChars(Result, CurPtr-1, tok::unknown); in LexRawStringLiteral()
2330 // If we are in C++11, lex the optional ud-suffix. in LexRawStringLiteral()
2341 /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
2355 (C == 0 && (CurPtr - 1 == BufferEnd))) { // End of file. in LexAngledStringLiteral()
2363 if (isCodeCompletionPoint(CurPtr - 1)) { in LexAngledStringLiteral()
2364 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1, /*IsAngled=*/true); in LexAngledStringLiteral()
2366 FormTokenWithChars(Result, CurPtr - 1, tok::unknown); in LexAngledStringLiteral()
2369 NulCharacter = CurPtr-1; in LexAngledStringLiteral()
2389 StringRef PartialPath(PathStart, CompletionPoint - PathStart); in codeCompleteIncludedFile()
2397 PP->setCodeCompletionIdentifierInfo(&PP->getIdentifierTable().get( in codeCompleteIncludedFile()
2398 StringRef(StartOfFilename, CompletionPoint - StartOfFilename))); in codeCompleteIncludedFile()
2412 PP->setCodeCompletionTokenRange( in codeCompleteIncludedFile()
2413 FileLoc.getLocWithOffset(StartOfFilename - BufferStart), in codeCompleteIncludedFile()
2414 FileLoc.getLocWithOffset(CompletionPoint - BufferStart)); in codeCompleteIncludedFile()
2415 PP->CodeCompleteIncludedFile(Dir, IsAngled); in codeCompleteIncludedFile()
2418 /// LexCharConstant - Lex the remainder of a character constant, after having
2448 (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. in LexCharConstant()
2451 FormTokenWithChars(Result, CurPtr-1, tok::unknown); in LexCharConstant()
2456 if (isCodeCompletionPoint(CurPtr-1)) { in LexCharConstant()
2457 PP->CodeCompleteNaturalLanguage(); in LexCharConstant()
2458 FormTokenWithChars(Result, CurPtr-1, tok::unknown); in LexCharConstant()
2463 NulCharacter = CurPtr-1; in LexCharConstant()
2468 // If we are in C++11, lex the optional ud-suffix. in LexCharConstant()
2483 /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
2484 /// Update BufferPtr to point to the next non-whitespace character and return.
2489 // Whitespace - Skip it, then return the token after the whitespace. in SkipWhitespace()
2490 bool SawNewline = isVerticalWhitespace(CurPtr[-1]); in SkipWhitespace()
2501 setLastNewLine(CurPtr - 1); in SkipWhitespace()
2538 char PrevChar = CurPtr[-1]; in SkipWhitespace()
2547 if (auto *Handler = PP->getEmptylineHandler()) in SkipWhitespace()
2548 Handler->HandleEmptyline(SourceRange(getSourceLocation(NewLinePtr + 1), in SkipWhitespace()
2558 /// newline character that terminates the comment. Then update BufferPtr and
2576 // Scan over the body of the comment. The common case, when scanning, is that in SkipLineComment()
2577 // the comment contains normal ascii characters with nothing interesting in in SkipLineComment()
2581 // character that ends the line comment. in SkipLineComment()
2584 // Diagnose invalid UTF-8 if the corresponding warning is enabled, emitting a in SkipLineComment()
2585 // diagnostic only once per entire ill-formed subsequence to avoid in SkipLineComment()
2586 // emiting to many diagnostics (see http://unicode.org/review/pr-121.html). in SkipLineComment()
2594 C != '\n' && C != '\r') { // Newline or DOS-style newline. in SkipLineComment()
2617 const char *EscapePtr = CurPtr-1; in SkipLineComment()
2620 --EscapePtr; in SkipLineComment()
2627 else if (EscapePtr[0] == '/' && EscapePtr[-1] == '?' && in SkipLineComment()
2628 EscapePtr[-2] == '?' && LangOpts.Trigraphs) in SkipLineComment()
2629 // Trigraph-escaped newline. in SkipLineComment()
2630 CurPtr = EscapePtr-2; in SkipLineComment()
2657 // \n, then we had an escaped newline within the comment. Emit diagnostic in SkipLineComment()
2658 // unless the next line is also a // comment. in SkipLineComment()
2663 // Okay, we found a // comment that ends in a newline, if the next in SkipLineComment()
2664 // line is also a // comment, but has spaces, don't emit a diagnostic. in SkipLineComment()
2674 Diag(OldPtr-1, diag::ext_multi_line_line_comment); in SkipLineComment()
2680 --CurPtr; in SkipLineComment()
2684 if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { in SkipLineComment()
2685 PP->CodeCompleteNaturalLanguage(); in SkipLineComment()
2691 // Found but did not consume the newline. Notify comment handlers about the in SkipLineComment()
2692 // comment unless we're in a #if 0 block. in SkipLineComment()
2694 PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), in SkipLineComment()
2700 // If we are returning comments as tokens, return this comment as a token. in SkipLineComment()
2715 // comment above in that mode. in SkipLineComment()
2727 /// If in save-comment mode, package up this Line comment in an appropriate
2730 // If we're not in a preprocessor directive, just return the // comment in SaveLineComment()
2732 FormTokenWithChars(Result, CurPtr, tok::comment); in SaveLineComment()
2737 // If this Line-style comment is in a macro definition, transmogrify it into in SaveLineComment()
2738 // a C-style block comment. in SaveLineComment()
2740 std::string Spelling = PP->getSpelling(Result, &Invalid); in SaveLineComment()
2744 assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not line comment?"); in SaveLineComment()
2748 Result.setKind(tok::comment); in SaveLineComment()
2749 PP->CreateString(Spelling, Result, in SaveLineComment()
2754 /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
2756 /// a diagnostic if so. We know that the newline is inside of a block comment.
2768 --CurPtr; in isEndOfBlockCommentWithEscapedNewLine()
2770 // If this is a two-character newline sequence, skip the other character. in isEndOfBlockCommentWithEscapedNewLine()
2772 // \n\n or \r\r -> not escaped newline. in isEndOfBlockCommentWithEscapedNewLine()
2775 // \n\r or \r\n -> skip the newline. in isEndOfBlockCommentWithEscapedNewLine()
2776 --CurPtr; in isEndOfBlockCommentWithEscapedNewLine()
2783 --CurPtr; in isEndOfBlockCommentWithEscapedNewLine()
2788 --CurPtr; in isEndOfBlockCommentWithEscapedNewLine()
2789 } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') { in isEndOfBlockCommentWithEscapedNewLine()
2791 TrigraphPos = CurPtr - 2; in isEndOfBlockCommentWithEscapedNewLine()
2792 CurPtr -= 3; in isEndOfBlockCommentWithEscapedNewLine()
2798 // splicing we have a '*/' ending the comment. in isEndOfBlockCommentWithEscapedNewLine()
2810 if (!L->isLexingRawMode()) in isEndOfBlockCommentWithEscapedNewLine()
2811 L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment); in isEndOfBlockCommentWithEscapedNewLine()
2814 if (!L->isLexingRawMode()) in isEndOfBlockCommentWithEscapedNewLine()
2815 L->Diag(TrigraphPos, diag::trigraph_ends_block_comment); in isEndOfBlockCommentWithEscapedNewLine()
2819 if (!L->isLexingRawMode()) in isEndOfBlockCommentWithEscapedNewLine()
2820 L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end); in isEndOfBlockCommentWithEscapedNewLine()
2823 if (SpacePos && !L->isLexingRawMode()) in isEndOfBlockCommentWithEscapedNewLine()
2824 L->Diag(SpacePos, diag::backslash_newline_space); in isEndOfBlockCommentWithEscapedNewLine()
2836 /// We have just read from input the / and * characters that started a comment.
2837 /// Read until we find the * and / characters that terminate the comment.
2839 /// comments, because they cannot cause the comment to end. The only thing
2840 /// that can happen is the comment could end with an escaped newline between
2861 --CurPtr; in SkipBlockComment()
2863 // KeepWhitespaceMode should return this broken comment as a token. Since in SkipBlockComment()
2864 // it isn't a well formed comment, just return it as an 'unknown' token. in SkipBlockComment()
2875 // then this slash does not end the block comment, it is part of it. in SkipBlockComment()
2880 // Diagnose invalid UTF-8 if the corresponding warning is enabled, emitting a in SkipBlockComment()
2881 // diagnostic only once per entire ill-formed subsequence to avoid in SkipBlockComment()
2882 // emiting to many diagnostics (see http://unicode.org/review/pr-121.html). in SkipBlockComment()
2886 // Skip over all non-interesting characters until we find end of buffer or a in SkipBlockComment()
2889 // If there is a code-completion point avoid the fast scan because it in SkipBlockComment()
2891 !(PP && PP->getCodeCompletionFileLoc() == FileLoc)) { in SkipBlockComment()
2892 // While not aligned to a 16-byte boundary. in SkipBlockComment()
2959 // Loop to scan the remainder, warning on invalid UTF-8 in SkipBlockComment()
2972 (const llvm::UTF8 *)CurPtr - 1, (const llvm::UTF8 *)BufferEnd); in SkipBlockComment()
2975 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment); in SkipBlockComment()
2979 CurPtr += Length - 1; in SkipBlockComment()
2986 if (CurPtr[-2] == '*') // We found the final */. We're done! in SkipBlockComment()
2989 if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { in SkipBlockComment()
2990 if (isEndOfBlockCommentWithEscapedNewLine(CurPtr - 2, this, in SkipBlockComment()
2998 // If this is a /* inside of the comment, emit a warning. Don't do this in SkipBlockComment()
2999 // if this is a /*/, which will end the comment. This misses cases with in SkipBlockComment()
3002 Diag(CurPtr-1, diag::warn_nested_block_comment); in SkipBlockComment()
3009 // comment, which surely would confuse the parser. in SkipBlockComment()
3010 --CurPtr; in SkipBlockComment()
3012 // KeepWhitespaceMode should return this broken comment as a token. Since in SkipBlockComment()
3013 // it isn't a well formed comment, just return it as an 'unknown' token. in SkipBlockComment()
3021 } else if (C == '\0' && isCodeCompletionPoint(CurPtr-1)) { in SkipBlockComment()
3022 PP->CodeCompleteNaturalLanguage(); in SkipBlockComment()
3030 // Notify comment handlers about the comment unless we're in a #if 0 block. in SkipBlockComment()
3032 PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr), in SkipBlockComment()
3038 // If we are returning comments as tokens, return this comment as a token. in SkipBlockComment()
3040 FormTokenWithChars(Result, CurPtr, tok::comment); in SkipBlockComment()
3044 // It is common for the tokens immediately after a /**/ comment to be in SkipBlockComment()
3047 // have already returned above with the comment as a token. in SkipBlockComment()
3059 //===----------------------------------------------------------------------===//
3061 //===----------------------------------------------------------------------===//
3063 /// ReadToEndOfLine - Read the rest of the current preprocessor line as an
3071 // CurPtr - Cache BufferPtr in an automatic variable. in ReadToEndOfLine()
3078 Result->push_back(Char); in ReadToEndOfLine()
3082 if (CurPtr-1 != BufferEnd) { in ReadToEndOfLine()
3083 if (isCodeCompletionPoint(CurPtr-1)) { in ReadToEndOfLine()
3084 PP->CodeCompleteNaturalLanguage(); in ReadToEndOfLine()
3091 Result->push_back(Char); in ReadToEndOfLine()
3099 assert(CurPtr[-1] == Char && "Trigraphs for newline?"); in ReadToEndOfLine()
3100 BufferPtr = CurPtr-1; in ReadToEndOfLine()
3106 PP->CodeCompleteNaturalLanguage(); in ReadToEndOfLine()
3117 /// LexEndOfFile - CurPtr points to the end of this file. Handle this
3131 // Restore comment saving mode, in case it was disabled for directive. in LexEndOfFile()
3146 if (PP->isRecordingPreamble() && PP->isInPrimaryFile()) { in LexEndOfFile()
3147 PP->setRecordedPreambleConditionalStack(ConditionalStack); in LexEndOfFile()
3160 if (PP->getCodeCompletionFileLoc() != FileLoc) in LexEndOfFile()
3161 PP->Diag(ConditionalStack.back().IfLoc, in LexEndOfFile()
3166 // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue in LexEndOfFile()
3168 if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) { in LexEndOfFile()
3169 DiagnosticsEngine &Diags = PP->getDiagnostics(); in LexEndOfFile()
3176 // non-extension, user-requested "missing newline at EOF" warning. in LexEndOfFile()
3193 return PP->HandleEndOfFile(Result, isPragmaLexer()); in LexEndOfFile()
3196 /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
3232 // Restore the lexer back to non-skipping mode. in isNextPPTokenLParen()
3245 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen); in FindConflictEnd()
3250 (RestOfBuffer[Pos - 1] != '\r' && RestOfBuffer[Pos - 1] != '\n')) { in FindConflictEnd()
3260 /// IsStartOfConflictMarker - If the specified pointer is the start of a version
3267 CurPtr[-1] != '\n' && CurPtr[-1] != '\r') in IsStartOfConflictMarker()
3271 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with("<<<<<<<") && in IsStartOfConflictMarker()
3272 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(">>>> ")) in IsStartOfConflictMarker()
3291 // end-of-conflict marker starts with \r or \n. in IsStartOfConflictMarker()
3304 /// HandleEndOfConflictMarker - If this is a '====' or '||||' or '>>>>', or if
3311 CurPtr[-1] != '\n' && CurPtr[-1] != '\r') in HandleEndOfConflictMarker()
3349 BufferEnd -= 1; // Scan until the second last character. in findPlaceholderEnd()
3358 assert(CurPtr[-1] == '<' && CurPtr[0] == '#' && "Not a placeholder!"); in lexEditorPlaceholder()
3359 if (!PP || !PP->getPreprocessorOpts().LexEditorPlaceholders || LexingRawMode) in lexEditorPlaceholder()
3364 const char *Start = CurPtr - 1; in lexEditorPlaceholder()
3370 PP->LookUpIdentifierInfo(Result); in lexEditorPlaceholder()
3377 if (PP && PP->isCodeCompletionEnabled()) { in isCodeCompletionPoint()
3378 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart); in isCodeCompletionPoint()
3379 return Loc == PP->getCodeCompletionLoc(); in isCodeCompletionPoint()
3410 const char *KindLoc = &CurPtr[-1]; in tryReadNumericUCN()
3428 if (Value == -1U) { in tryReadNumericUCN()
3477 Diag(SlashLoc, PP->getLangOpts().CPlusPlus23 in tryReadNumericUCN()
3480 << /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0); in tryReadNumericUCN()
3484 Result->setFlag(Token::HasUCN); in tryReadNumericUCN()
3488 if (CurPtr - StartPtr == (ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0))) in tryReadNumericUCN()
3509 const char *KindLoc = &CurPtr[-1]; in tryReadNamedUCN()
3551 << makeCharRange(*this, StartName, CurPtr - CharSize); in tryReadNamedUCN()
3555 makeCharRange(*this, StartName, CurPtr - CharSize), in tryReadNamedUCN()
3556 LooseMatch->Name); in tryReadNamedUCN()
3565 Diag(SlashLoc, PP->getLangOpts().CPlusPlus23 in tryReadNamedUCN()
3568 << /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0); in tryReadNamedUCN()
3575 Match = LooseMatch->CodePoint; in tryReadNamedUCN()
3578 Result->setFlag(Token::HasUCN); in tryReadNamedUCN()
3582 if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 3)) in tryReadNamedUCN()
3615 // - in the range D800 through DFFF inclusive; or in tryReadUCN()
3616 // - greater than 10FFFF. in tryReadUCN()
3617 // A universal-character-name outside the c-char-sequence of a character in tryReadUCN()
3618 // constant, or the s-char-sequence of a string-literal shall not designate in tryReadUCN()
3622 // universal-character-name corresponds to a surrogate code point (in the in tryReadUCN()
3623 // range 0xD800-0xDFFF, inclusive), the program is ill-formed. Additionally, in tryReadUCN()
3624 // if the hexadecimal value for a universal-character-name outside the in tryReadUCN()
3625 // c-char-sequence, s-char-sequence, or r-char-sequence of a character or in tryReadUCN()
3627 // ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a character in the in tryReadUCN()
3628 // basic source character set, the program is ill-formed. in tryReadUCN()
3660 if (!isLexingRawMode() && !PP->isPreprocessedOutput() && in CheckUnicodeWhitespace()
3710 /// LexTokenInternal - This implements a simple C family lexer. It is an
3720 // CurPtr - Cache BufferPtr in an automatic variable. in LexTokenInternal()
3754 if (CurPtr-1 == BufferEnd) in LexTokenInternal()
3755 return LexEndOfFile(Result, CurPtr-1); in LexTokenInternal()
3758 if (isCodeCompletionPoint(CurPtr-1)) { in LexTokenInternal()
3759 // Return the code-completion token. in LexTokenInternal()
3766 Diag(CurPtr-1, diag::null_in_file); in LexTokenInternal()
3779 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft); in LexTokenInternal()
3780 return LexEndOfFile(Result, CurPtr-1); in LexTokenInternal()
3798 // Restore comment saving mode, in case it was disabled for directive. in LexTokenInternal()
3805 NewLinePtr = CurPtr - 1; in LexTokenInternal()
3832 // If the next token is obviously a // or /* */ comment, skip it efficiently in LexTokenInternal()
3854 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
3859 // UTF-8 (C23/C++17) or UTF-16 (C11/C++11) character literal, or in LexTokenInternal()
3860 // UTF-8 or UTF-16 string literal (C11/C++11). in LexTokenInternal()
3862 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
3868 // UTF-16 string literal in LexTokenInternal()
3873 // UTF-16 character constant in LexTokenInternal()
3878 // UTF-16 raw string literal in LexTokenInternal()
3889 // UTF-8 string literal in LexTokenInternal()
3904 // UTF-8 raw string literal in LexTokenInternal()
3919 case 'U': // Identifier (e.g. Uber) or C11/C++11 UTF-32 string literal in LexTokenInternal()
3920 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
3926 // UTF-32 string literal in LexTokenInternal()
3931 // UTF-32 character constant in LexTokenInternal()
3936 // UTF-32 raw string literal in LexTokenInternal()
3949 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
3965 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
3999 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
4006 Diag(CurPtr-1, diag::ext_dollar_in_identifier); in LexTokenInternal()
4007 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
4017 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
4023 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
4054 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
4102 case '-': in LexTokenInternal()
4104 if (Char == '-') { // -- in LexTokenInternal()
4108 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { // C++ ->* in LexTokenInternal()
4112 } else if (Char == '>') { // -> in LexTokenInternal()
4115 } else if (Char == '=') { // -= in LexTokenInternal()
4136 if (Char == '/') { // Line comment. in LexTokenInternal()
4138 // want to lex this as a comment. There is one problem with this though, in LexTokenInternal()
4143 // If so, we will lex that as a "/" instead of the start of a comment. in LexTokenInternal()
4148 if (!(PP && PP->isPreprocessedOutput())) in LexTokenInternal()
4156 // It is common for the tokens immediately after a // comment to be in LexTokenInternal()
4163 if (Char == '*') { // /**/ comment. in LexTokenInternal()
4186 Kind = tok::r_brace; // '%>' -> '}' in LexTokenInternal()
4192 Kind = tok::hashhash; // '%:%:' -> '##' in LexTokenInternal()
4195 } else if (Char == '@' && LangOpts.MicrosoftExt) {// %:@ -> #@ -> Charize in LexTokenInternal()
4200 } else { // '%:' -> '#' in LexTokenInternal()
4204 // TODO: -fpreprocessed mode?? in LexTokenInternal()
4224 } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) { in LexTokenInternal()
4228 } else if (After == '<' && HandleEndOfConflictMarker(CurPtr-1)) { in LexTokenInternal()
4229 // If this is '<<<<' and we're in a Perforce-style conflict marker, in LexTokenInternal()
4261 } else if (LangOpts.Digraphs && Char == ':') { // '<:' -> '[' in LexTokenInternal()
4281 } else if (LangOpts.Digraphs && Char == '%') { // '<%' -> '{' in LexTokenInternal()
4302 } else if (After == '>' && IsStartOfConflictMarker(CurPtr-1)) { in LexTokenInternal()
4306 } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) { in LexTokenInternal()
4340 if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1)) in LexTokenInternal()
4351 Kind = tok::r_square; // ':>' -> ']' in LexTokenInternal()
4367 if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1)) in LexTokenInternal()
4384 } else if (Char == '@' && LangOpts.MicrosoftExt) { // #@ -> Charize in LexTokenInternal()
4393 // TODO: -fpreprocessed mode?? in LexTokenInternal()
4403 if (CurPtr[-1] == '@' && LangOpts.ObjC) in LexTokenInternal()
4439 --CurPtr; in LexTokenInternal()
4458 PP->isPreprocessedOutput()) { in LexTokenInternal()
4464 // Non-ASCII characters tend to creep into source code unintentionally. in LexTokenInternal()
4466 // just diagnose the invalid UTF-8, then drop the character. in LexTokenInternal()
4477 // Notify MIOpt that we read a non-whitespace/non-comment token. in LexTokenInternal()
4488 PP->HandleDirective(Result); in LexTokenInternal()
4490 if (PP->hadModuleLoaderFatalFailure()) in LexTokenInternal()
4554 PP->HandleDirective(Result); in LexDependencyDirectiveToken()
4560 const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); in LexDependencyDirectiveToken()
4561 if (II->isHandleIdentifierCase()) in LexDependencyDirectiveToken()
4562 return PP->HandleIdentifier(Result); in LexDependencyDirectiveToken()
4634 --NestedIfs; in LexDependencyDirectiveTokenWhileSkipping()