1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Implements # directive processing for the Preprocessor. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/CharInfo.h" 15 #include "clang/Basic/DirectoryEntry.h" 16 #include "clang/Basic/FileManager.h" 17 #include "clang/Basic/IdentifierTable.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/Module.h" 20 #include "clang/Basic/SourceLocation.h" 21 #include "clang/Basic/SourceManager.h" 22 #include "clang/Basic/TargetInfo.h" 23 #include "clang/Basic/TokenKinds.h" 24 #include "clang/Lex/CodeCompletionHandler.h" 25 #include "clang/Lex/HeaderSearch.h" 26 #include "clang/Lex/HeaderSearchOptions.h" 27 #include "clang/Lex/LexDiagnostic.h" 28 #include "clang/Lex/LiteralSupport.h" 29 #include "clang/Lex/MacroInfo.h" 30 #include "clang/Lex/ModuleLoader.h" 31 #include "clang/Lex/ModuleMap.h" 32 #include "clang/Lex/PPCallbacks.h" 33 #include "clang/Lex/Pragma.h" 34 #include "clang/Lex/Preprocessor.h" 35 #include "clang/Lex/PreprocessorOptions.h" 36 #include "clang/Lex/Token.h" 37 #include "clang/Lex/VariadicMacroSupport.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/STLExtras.h" 40 #include "llvm/ADT/ScopeExit.h" 41 #include "llvm/ADT/SmallString.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/ADT/StringExtras.h" 44 #include "llvm/ADT/StringRef.h" 45 #include "llvm/ADT/StringSwitch.h" 46 #include "llvm/Support/AlignOf.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/Path.h" 49 #include "llvm/Support/SaveAndRestore.h" 50 #include <algorithm> 51 #include <cassert> 52 #include <cstring> 53 #include <new> 54 #include <optional> 55 #include <string> 56 #include <utility> 57 58 using namespace clang; 59 60 //===----------------------------------------------------------------------===// 61 // Utility Methods for Preprocessor Directive Handling. 62 //===----------------------------------------------------------------------===// 63 64 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) { 65 static_assert(std::is_trivially_destructible_v<MacroInfo>, ""); 66 return new (BP) MacroInfo(L); 67 } 68 69 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, 70 SourceLocation Loc) { 71 return new (BP) DefMacroDirective(MI, Loc); 72 } 73 74 UndefMacroDirective * 75 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) { 76 return new (BP) UndefMacroDirective(UndefLoc); 77 } 78 79 VisibilityMacroDirective * 80 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, 81 bool isPublic) { 82 return new (BP) VisibilityMacroDirective(Loc, isPublic); 83 } 84 85 /// Read and discard all tokens remaining on the current line until 86 /// the tok::eod token is found. 87 SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) { 88 SourceRange Res; 89 90 LexUnexpandedToken(Tmp); 91 Res.setBegin(Tmp.getLocation()); 92 while (Tmp.isNot(tok::eod)) { 93 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens"); 94 LexUnexpandedToken(Tmp); 95 } 96 Res.setEnd(Tmp.getLocation()); 97 return Res; 98 } 99 100 /// Enumerates possible cases of #define/#undef a reserved identifier. 101 enum MacroDiag { 102 MD_NoWarn, //> Not a reserved identifier 103 MD_KeywordDef, //> Macro hides keyword, enabled by default 104 MD_ReservedMacro //> #define of #undef reserved id, disabled by default 105 }; 106 107 /// Enumerates possible %select values for the pp_err_elif_after_else and 108 /// pp_err_elif_without_if diagnostics. 109 enum PPElifDiag { 110 PED_Elif, 111 PED_Elifdef, 112 PED_Elifndef 113 }; 114 115 static bool isFeatureTestMacro(StringRef MacroName) { 116 // list from: 117 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html 118 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160 119 // * man 7 feature_test_macros 120 // The list must be sorted for correct binary search. 121 static constexpr StringRef ReservedMacro[] = { 122 "_ATFILE_SOURCE", 123 "_BSD_SOURCE", 124 "_CRT_NONSTDC_NO_WARNINGS", 125 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES", 126 "_CRT_SECURE_NO_WARNINGS", 127 "_FILE_OFFSET_BITS", 128 "_FORTIFY_SOURCE", 129 "_GLIBCXX_ASSERTIONS", 130 "_GLIBCXX_CONCEPT_CHECKS", 131 "_GLIBCXX_DEBUG", 132 "_GLIBCXX_DEBUG_PEDANTIC", 133 "_GLIBCXX_PARALLEL", 134 "_GLIBCXX_PARALLEL_ASSERTIONS", 135 "_GLIBCXX_SANITIZE_VECTOR", 136 "_GLIBCXX_USE_CXX11_ABI", 137 "_GLIBCXX_USE_DEPRECATED", 138 "_GNU_SOURCE", 139 "_ISOC11_SOURCE", 140 "_ISOC95_SOURCE", 141 "_ISOC99_SOURCE", 142 "_LARGEFILE64_SOURCE", 143 "_POSIX_C_SOURCE", 144 "_REENTRANT", 145 "_SVID_SOURCE", 146 "_THREAD_SAFE", 147 "_XOPEN_SOURCE", 148 "_XOPEN_SOURCE_EXTENDED", 149 "__STDCPP_WANT_MATH_SPEC_FUNCS__", 150 "__STDC_FORMAT_MACROS", 151 }; 152 return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro), 153 MacroName); 154 } 155 156 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr, 157 const MacroInfo *MI, 158 const StringRef MacroName) { 159 // If this is a macro with special handling (like __LINE__) then it's language 160 // defined. 161 if (MI->isBuiltinMacro()) 162 return true; 163 // Builtin macros are defined in the builtin file 164 if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc())) 165 return false; 166 // C defines macros starting with __STDC, and C++ defines macros starting with 167 // __STDCPP 168 if (MacroName.starts_with("__STDC")) 169 return true; 170 // C++ defines the __cplusplus macro 171 if (MacroName == "__cplusplus") 172 return true; 173 // C++ defines various feature-test macros starting with __cpp 174 if (MacroName.starts_with("__cpp")) 175 return true; 176 // Anything else isn't language-defined 177 return false; 178 } 179 180 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) { 181 const LangOptions &Lang = PP.getLangOpts(); 182 StringRef Text = II->getName(); 183 if (isReservedInAllContexts(II->isReserved(Lang))) 184 return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro; 185 if (II->isKeyword(Lang)) 186 return MD_KeywordDef; 187 if (Lang.CPlusPlus11 && (Text == "override" || Text == "final")) 188 return MD_KeywordDef; 189 return MD_NoWarn; 190 } 191 192 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) { 193 const LangOptions &Lang = PP.getLangOpts(); 194 // Do not warn on keyword undef. It is generally harmless and widely used. 195 if (isReservedInAllContexts(II->isReserved(Lang))) 196 return MD_ReservedMacro; 197 return MD_NoWarn; 198 } 199 200 // Return true if we want to issue a diagnostic by default if we 201 // encounter this name in a #include with the wrong case. For now, 202 // this includes the standard C and C++ headers, Posix headers, 203 // and Boost headers. Improper case for these #includes is a 204 // potential portability issue. 205 static bool warnByDefaultOnWrongCase(StringRef Include) { 206 // If the first component of the path is "boost", treat this like a standard header 207 // for the purposes of diagnostics. 208 if (::llvm::sys::path::begin(Include)->equals_insensitive("boost")) 209 return true; 210 211 // "condition_variable" is the longest standard header name at 18 characters. 212 // If the include file name is longer than that, it can't be a standard header. 213 static const size_t MaxStdHeaderNameLen = 18u; 214 if (Include.size() > MaxStdHeaderNameLen) 215 return false; 216 217 // Lowercase and normalize the search string. 218 SmallString<32> LowerInclude{Include}; 219 for (char &Ch : LowerInclude) { 220 // In the ASCII range? 221 if (static_cast<unsigned char>(Ch) > 0x7f) 222 return false; // Can't be a standard header 223 // ASCII lowercase: 224 if (Ch >= 'A' && Ch <= 'Z') 225 Ch += 'a' - 'A'; 226 // Normalize path separators for comparison purposes. 227 else if (::llvm::sys::path::is_separator(Ch)) 228 Ch = '/'; 229 } 230 231 // The standard C/C++ and Posix headers 232 return llvm::StringSwitch<bool>(LowerInclude) 233 // C library headers 234 .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true) 235 .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true) 236 .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true) 237 .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true) 238 .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true) 239 .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true) 240 .Cases("wchar.h", "wctype.h", true) 241 242 // C++ headers for C library facilities 243 .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true) 244 .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true) 245 .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true) 246 .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true) 247 .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true) 248 .Case("cwctype", true) 249 250 // C++ library headers 251 .Cases("algorithm", "fstream", "list", "regex", "thread", true) 252 .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true) 253 .Cases("atomic", "future", "map", "set", "type_traits", true) 254 .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true) 255 .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true) 256 .Cases("codecvt", "ios", "new", "stack", "unordered_map", true) 257 .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true) 258 .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true) 259 .Cases("deque", "istream", "queue", "string", "valarray", true) 260 .Cases("exception", "iterator", "random", "strstream", "vector", true) 261 .Cases("forward_list", "limits", "ratio", "system_error", true) 262 263 // POSIX headers (which aren't also C headers) 264 .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true) 265 .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true) 266 .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true) 267 .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true) 268 .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true) 269 .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true) 270 .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true) 271 .Cases("sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h", "sys/socket.h", true) 272 .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true) 273 .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true) 274 .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true) 275 .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true) 276 .Default(false); 277 } 278 279 /// Find a similar string in `Candidates`. 280 /// 281 /// \param LHS a string for a similar string in `Candidates` 282 /// 283 /// \param Candidates the candidates to find a similar string. 284 /// 285 /// \returns a similar string if exists. If no similar string exists, 286 /// returns std::nullopt. 287 static std::optional<StringRef> 288 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) { 289 // We need to check if `Candidates` has the exact case-insensitive string 290 // because the Levenshtein distance match does not care about it. 291 for (StringRef C : Candidates) { 292 if (LHS.equals_insensitive(C)) { 293 return C; 294 } 295 } 296 297 // Keep going with the Levenshtein distance match. 298 // If the LHS size is less than 3, use the LHS size minus 1 and if not, 299 // use the LHS size divided by 3. 300 size_t Length = LHS.size(); 301 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3; 302 303 std::optional<std::pair<StringRef, size_t>> SimilarStr; 304 for (StringRef C : Candidates) { 305 size_t CurDist = LHS.edit_distance(C, true); 306 if (CurDist <= MaxDist) { 307 if (!SimilarStr) { 308 // The first similar string found. 309 SimilarStr = {C, CurDist}; 310 } else if (CurDist < SimilarStr->second) { 311 // More similar string found. 312 SimilarStr = {C, CurDist}; 313 } 314 } 315 } 316 317 if (SimilarStr) { 318 return SimilarStr->first; 319 } else { 320 return std::nullopt; 321 } 322 } 323 324 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 325 bool *ShadowFlag) { 326 // Missing macro name? 327 if (MacroNameTok.is(tok::eod)) 328 return Diag(MacroNameTok, diag::err_pp_missing_macro_name); 329 330 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 331 if (!II) 332 return Diag(MacroNameTok, diag::err_pp_macro_not_identifier); 333 334 if (II->isCPlusPlusOperatorKeyword()) { 335 // C++ 2.5p2: Alternative tokens behave the same as its primary token 336 // except for their spellings. 337 Diag(MacroNameTok, getLangOpts().MicrosoftExt 338 ? diag::ext_pp_operator_used_as_macro_name 339 : diag::err_pp_operator_used_as_macro_name) 340 << II << MacroNameTok.getKind(); 341 // Allow #defining |and| and friends for Microsoft compatibility or 342 // recovery when legacy C headers are included in C++. 343 } 344 345 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) { 346 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4. 347 return Diag(MacroNameTok, diag::err_defined_macro_name); 348 } 349 350 // If defining/undefining reserved identifier or a keyword, we need to issue 351 // a warning. 352 SourceLocation MacroNameLoc = MacroNameTok.getLocation(); 353 if (ShadowFlag) 354 *ShadowFlag = false; 355 if (!SourceMgr.isInSystemHeader(MacroNameLoc) && 356 (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) { 357 MacroDiag D = MD_NoWarn; 358 if (isDefineUndef == MU_Define) { 359 D = shouldWarnOnMacroDef(*this, II); 360 } 361 else if (isDefineUndef == MU_Undef) 362 D = shouldWarnOnMacroUndef(*this, II); 363 if (D == MD_KeywordDef) { 364 // We do not want to warn on some patterns widely used in configuration 365 // scripts. This requires analyzing next tokens, so do not issue warnings 366 // now, only inform caller. 367 if (ShadowFlag) 368 *ShadowFlag = true; 369 } 370 if (D == MD_ReservedMacro) 371 Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id); 372 } 373 374 // Okay, we got a good identifier. 375 return false; 376 } 377 378 /// Lex and validate a macro name, which occurs after a 379 /// \#define or \#undef. 380 /// 381 /// This sets the token kind to eod and discards the rest of the macro line if 382 /// the macro name is invalid. 383 /// 384 /// \param MacroNameTok Token that is expected to be a macro name. 385 /// \param isDefineUndef Context in which macro is used. 386 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword. 387 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 388 bool *ShadowFlag) { 389 // Read the token, don't allow macro expansion on it. 390 LexUnexpandedToken(MacroNameTok); 391 392 if (MacroNameTok.is(tok::code_completion)) { 393 if (CodeComplete) 394 CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define); 395 setCodeCompletionReached(); 396 LexUnexpandedToken(MacroNameTok); 397 } 398 399 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag)) 400 return; 401 402 // Invalid macro name, read and discard the rest of the line and set the 403 // token kind to tok::eod if necessary. 404 if (MacroNameTok.isNot(tok::eod)) { 405 MacroNameTok.setKind(tok::eod); 406 DiscardUntilEndOfDirective(); 407 } 408 } 409 410 /// Ensure that the next token is a tok::eod token. 411 /// 412 /// If not, emit a diagnostic and consume up until the eod. If EnableMacros is 413 /// true, then we consider macros that expand to zero tokens as being ok. 414 /// 415 /// Returns the location of the end of the directive. 416 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, 417 bool EnableMacros) { 418 Token Tmp; 419 // Lex unexpanded tokens for most directives: macros might expand to zero 420 // tokens, causing us to miss diagnosing invalid lines. Some directives (like 421 // #line) allow empty macros. 422 if (EnableMacros) 423 Lex(Tmp); 424 else 425 LexUnexpandedToken(Tmp); 426 427 // There should be no tokens after the directive, but we allow them as an 428 // extension. 429 while (Tmp.is(tok::comment)) // Skip comments in -C mode. 430 LexUnexpandedToken(Tmp); 431 432 if (Tmp.is(tok::eod)) 433 return Tmp.getLocation(); 434 435 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89, 436 // or if this is a macro-style preprocessing directive, because it is more 437 // trouble than it is worth to insert /**/ and check that there is no /**/ 438 // in the range also. 439 FixItHint Hint; 440 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && 441 !CurTokenLexer) 442 Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); 443 Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; 444 return DiscardUntilEndOfDirective().getEnd(); 445 } 446 447 void Preprocessor::SuggestTypoedDirective(const Token &Tok, 448 StringRef Directive) const { 449 // If this is a `.S` file, treat unknown # directives as non-preprocessor 450 // directives. 451 if (getLangOpts().AsmPreprocessor) return; 452 453 std::vector<StringRef> Candidates = { 454 "if", "ifdef", "ifndef", "elif", "else", "endif" 455 }; 456 if (LangOpts.C23 || LangOpts.CPlusPlus23) 457 Candidates.insert(Candidates.end(), {"elifdef", "elifndef"}); 458 459 if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) { 460 // Directive cannot be coming from macro. 461 assert(Tok.getLocation().isFileID()); 462 CharSourceRange DirectiveRange = CharSourceRange::getCharRange( 463 Tok.getLocation(), 464 Tok.getLocation().getLocWithOffset(Directive.size())); 465 StringRef SuggValue = *Sugg; 466 467 auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue); 468 Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint; 469 } 470 } 471 472 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and 473 /// decided that the subsequent tokens are in the \#if'd out portion of the 474 /// file. Lex the rest of the file, until we see an \#endif. If 475 /// FoundNonSkipPortion is true, then we have already emitted code for part of 476 /// this \#if directive, so \#else/\#elif blocks should never be entered. 477 /// If ElseOk is true, then \#else directives are ok, if not, then we have 478 /// already seen one so a \#else directive is a duplicate. When this returns, 479 /// the caller can lex the first valid token. 480 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 481 SourceLocation IfTokenLoc, 482 bool FoundNonSkipPortion, 483 bool FoundElse, 484 SourceLocation ElseLoc) { 485 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock() 486 // not getting called recursively by storing the RecordedSkippedRanges 487 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects 488 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be 489 // invalidated. If this changes and there is a need to call 490 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should 491 // change to do a second lookup in endLexPass function instead of reusing the 492 // lookup pointer. 493 assert(!SkippingExcludedConditionalBlock && 494 "calling SkipExcludedConditionalBlock recursively"); 495 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true); 496 497 ++NumSkipped; 498 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!"); 499 assert(CurPPLexer && "Conditional PP block must be in a file!"); 500 assert(CurLexer && "Conditional PP block but no current lexer set!"); 501 502 if (PreambleConditionalStack.reachedEOFWhileSkipping()) 503 PreambleConditionalStack.clearSkipInfo(); 504 else 505 CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false, 506 FoundNonSkipPortion, FoundElse); 507 508 // Enter raw mode to disable identifier lookup (and thus macro expansion), 509 // disabling warnings, etc. 510 CurPPLexer->LexingRawMode = true; 511 Token Tok; 512 SourceLocation endLoc; 513 514 /// Keeps track and caches skipped ranges and also retrieves a prior skipped 515 /// range if the same block is re-visited. 516 struct SkippingRangeStateTy { 517 Preprocessor &PP; 518 519 const char *BeginPtr = nullptr; 520 unsigned *SkipRangePtr = nullptr; 521 522 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {} 523 524 void beginLexPass() { 525 if (BeginPtr) 526 return; // continue skipping a block. 527 528 // Initiate a skipping block and adjust the lexer if we already skipped it 529 // before. 530 BeginPtr = PP.CurLexer->getBufferLocation(); 531 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr]; 532 if (*SkipRangePtr) { 533 PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr, 534 /*IsAtStartOfLine*/ true); 535 } 536 } 537 538 void endLexPass(const char *Hashptr) { 539 if (!BeginPtr) { 540 // Not doing normal lexing. 541 assert(PP.CurLexer->isDependencyDirectivesLexer()); 542 return; 543 } 544 545 // Finished skipping a block, record the range if it's first time visited. 546 if (!*SkipRangePtr) { 547 *SkipRangePtr = Hashptr - BeginPtr; 548 } 549 assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr)); 550 BeginPtr = nullptr; 551 SkipRangePtr = nullptr; 552 } 553 } SkippingRangeState(*this); 554 555 while (true) { 556 if (CurLexer->isDependencyDirectivesLexer()) { 557 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok); 558 } else { 559 SkippingRangeState.beginLexPass(); 560 while (true) { 561 CurLexer->Lex(Tok); 562 563 if (Tok.is(tok::code_completion)) { 564 setCodeCompletionReached(); 565 if (CodeComplete) 566 CodeComplete->CodeCompleteInConditionalExclusion(); 567 continue; 568 } 569 570 // If this is the end of the buffer, we have an error. 571 if (Tok.is(tok::eof)) { 572 // We don't emit errors for unterminated conditionals here, 573 // Lexer::LexEndOfFile can do that properly. 574 // Just return and let the caller lex after this #include. 575 if (PreambleConditionalStack.isRecording()) 576 PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc, 577 FoundNonSkipPortion, 578 FoundElse, ElseLoc); 579 break; 580 } 581 582 // If this token is not a preprocessor directive, just skip it. 583 if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) 584 continue; 585 586 break; 587 } 588 } 589 if (Tok.is(tok::eof)) 590 break; 591 592 // We just parsed a # character at the start of a line, so we're in 593 // directive mode. Tell the lexer this so any newlines we see will be 594 // converted into an EOD token (this terminates the macro). 595 CurPPLexer->ParsingPreprocessorDirective = true; 596 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); 597 598 assert(Tok.is(tok::hash)); 599 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength(); 600 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation()); 601 602 // Read the next token, the directive flavor. 603 LexUnexpandedToken(Tok); 604 605 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or 606 // something bogus), skip it. 607 if (Tok.isNot(tok::raw_identifier)) { 608 CurPPLexer->ParsingPreprocessorDirective = false; 609 // Restore comment saving mode. 610 if (CurLexer) CurLexer->resetExtendedTokenMode(); 611 continue; 612 } 613 614 // If the first letter isn't i or e, it isn't intesting to us. We know that 615 // this is safe in the face of spelling differences, because there is no way 616 // to spell an i/e in a strange way that is another letter. Skipping this 617 // allows us to avoid looking up the identifier info for #define/#undef and 618 // other common directives. 619 StringRef RI = Tok.getRawIdentifier(); 620 621 char FirstChar = RI[0]; 622 if (FirstChar >= 'a' && FirstChar <= 'z' && 623 FirstChar != 'i' && FirstChar != 'e') { 624 CurPPLexer->ParsingPreprocessorDirective = false; 625 // Restore comment saving mode. 626 if (CurLexer) CurLexer->resetExtendedTokenMode(); 627 continue; 628 } 629 630 // Get the identifier name without trigraphs or embedded newlines. Note 631 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled 632 // when skipping. 633 char DirectiveBuf[20]; 634 StringRef Directive; 635 if (!Tok.needsCleaning() && RI.size() < 20) { 636 Directive = RI; 637 } else { 638 std::string DirectiveStr = getSpelling(Tok); 639 size_t IdLen = DirectiveStr.size(); 640 if (IdLen >= 20) { 641 CurPPLexer->ParsingPreprocessorDirective = false; 642 // Restore comment saving mode. 643 if (CurLexer) CurLexer->resetExtendedTokenMode(); 644 continue; 645 } 646 memcpy(DirectiveBuf, &DirectiveStr[0], IdLen); 647 Directive = StringRef(DirectiveBuf, IdLen); 648 } 649 650 if (Directive.starts_with("if")) { 651 StringRef Sub = Directive.substr(2); 652 if (Sub.empty() || // "if" 653 Sub == "def" || // "ifdef" 654 Sub == "ndef") { // "ifndef" 655 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't 656 // bother parsing the condition. 657 DiscardUntilEndOfDirective(); 658 CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, 659 /*foundnonskip*/false, 660 /*foundelse*/false); 661 } else { 662 SuggestTypoedDirective(Tok, Directive); 663 } 664 } else if (Directive[0] == 'e') { 665 StringRef Sub = Directive.substr(1); 666 if (Sub == "ndif") { // "endif" 667 PPConditionalInfo CondInfo; 668 CondInfo.WasSkipping = true; // Silence bogus warning. 669 bool InCond = CurPPLexer->popConditionalLevel(CondInfo); 670 (void)InCond; // Silence warning in no-asserts mode. 671 assert(!InCond && "Can't be skipping if not in a conditional!"); 672 673 // If we popped the outermost skipping block, we're done skipping! 674 if (!CondInfo.WasSkipping) { 675 SkippingRangeState.endLexPass(Hashptr); 676 // Restore the value of LexingRawMode so that trailing comments 677 // are handled correctly, if we've reached the outermost block. 678 CurPPLexer->LexingRawMode = false; 679 endLoc = CheckEndOfDirective("endif"); 680 CurPPLexer->LexingRawMode = true; 681 if (Callbacks) 682 Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc); 683 break; 684 } else { 685 DiscardUntilEndOfDirective(); 686 } 687 } else if (Sub == "lse") { // "else". 688 // #else directive in a skipping conditional. If not in some other 689 // skipping conditional, and if #else hasn't already been seen, enter it 690 // as a non-skipping conditional. 691 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); 692 693 if (!CondInfo.WasSkipping) 694 SkippingRangeState.endLexPass(Hashptr); 695 696 // If this is a #else with a #else before it, report the error. 697 if (CondInfo.FoundElse) 698 Diag(Tok, diag::pp_err_else_after_else); 699 700 // Note that we've seen a #else in this conditional. 701 CondInfo.FoundElse = true; 702 703 // If the conditional is at the top level, and the #if block wasn't 704 // entered, enter the #else block now. 705 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { 706 CondInfo.FoundNonSkip = true; 707 // Restore the value of LexingRawMode so that trailing comments 708 // are handled correctly. 709 CurPPLexer->LexingRawMode = false; 710 endLoc = CheckEndOfDirective("else"); 711 CurPPLexer->LexingRawMode = true; 712 if (Callbacks) 713 Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc); 714 break; 715 } else { 716 DiscardUntilEndOfDirective(); // C99 6.10p4. 717 } 718 } else if (Sub == "lif") { // "elif". 719 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); 720 721 if (!CondInfo.WasSkipping) 722 SkippingRangeState.endLexPass(Hashptr); 723 724 // If this is a #elif with a #else before it, report the error. 725 if (CondInfo.FoundElse) 726 Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif; 727 728 // If this is in a skipping block or if we're already handled this #if 729 // block, don't bother parsing the condition. 730 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { 731 // FIXME: We should probably do at least some minimal parsing of the 732 // condition to verify that it is well-formed. The current state 733 // allows #elif* directives with completely malformed (or missing) 734 // conditions. 735 DiscardUntilEndOfDirective(); 736 } else { 737 // Restore the value of LexingRawMode so that identifiers are 738 // looked up, etc, inside the #elif expression. 739 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!"); 740 CurPPLexer->LexingRawMode = false; 741 IdentifierInfo *IfNDefMacro = nullptr; 742 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); 743 // Stop if Lexer became invalid after hitting code completion token. 744 if (!CurPPLexer) 745 return; 746 const bool CondValue = DER.Conditional; 747 CurPPLexer->LexingRawMode = true; 748 if (Callbacks) { 749 Callbacks->Elif( 750 Tok.getLocation(), DER.ExprRange, 751 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False), 752 CondInfo.IfLoc); 753 } 754 // If this condition is true, enter it! 755 if (CondValue) { 756 CondInfo.FoundNonSkip = true; 757 break; 758 } 759 } 760 } else if (Sub == "lifdef" || // "elifdef" 761 Sub == "lifndef") { // "elifndef" 762 bool IsElifDef = Sub == "lifdef"; 763 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); 764 Token DirectiveToken = Tok; 765 766 if (!CondInfo.WasSkipping) 767 SkippingRangeState.endLexPass(Hashptr); 768 769 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even 770 // if this branch is in a skipping block. 771 unsigned DiagID; 772 if (LangOpts.CPlusPlus) 773 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive 774 : diag::ext_cxx23_pp_directive; 775 else 776 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive 777 : diag::ext_c23_pp_directive; 778 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef); 779 780 // If this is a #elif with a #else before it, report the error. 781 if (CondInfo.FoundElse) 782 Diag(Tok, diag::pp_err_elif_after_else) 783 << (IsElifDef ? PED_Elifdef : PED_Elifndef); 784 785 // If this is in a skipping block or if we're already handled this #if 786 // block, don't bother parsing the condition. 787 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { 788 // FIXME: We should probably do at least some minimal parsing of the 789 // condition to verify that it is well-formed. The current state 790 // allows #elif* directives with completely malformed (or missing) 791 // conditions. 792 DiscardUntilEndOfDirective(); 793 } else { 794 // Restore the value of LexingRawMode so that identifiers are 795 // looked up, etc, inside the #elif[n]def expression. 796 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!"); 797 CurPPLexer->LexingRawMode = false; 798 Token MacroNameTok; 799 ReadMacroName(MacroNameTok); 800 CurPPLexer->LexingRawMode = true; 801 802 // If the macro name token is tok::eod, there was an error that was 803 // already reported. 804 if (MacroNameTok.is(tok::eod)) { 805 // Skip code until we get to #endif. This helps with recovery by 806 // not emitting an error when the #endif is reached. 807 continue; 808 } 809 810 emitMacroExpansionWarnings(MacroNameTok); 811 812 CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef"); 813 814 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); 815 auto MD = getMacroDefinition(MII); 816 MacroInfo *MI = MD.getMacroInfo(); 817 818 if (Callbacks) { 819 if (IsElifDef) { 820 Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok, 821 MD); 822 } else { 823 Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok, 824 MD); 825 } 826 } 827 // If this condition is true, enter it! 828 if (static_cast<bool>(MI) == IsElifDef) { 829 CondInfo.FoundNonSkip = true; 830 break; 831 } 832 } 833 } else { 834 SuggestTypoedDirective(Tok, Directive); 835 } 836 } else { 837 SuggestTypoedDirective(Tok, Directive); 838 } 839 840 CurPPLexer->ParsingPreprocessorDirective = false; 841 // Restore comment saving mode. 842 if (CurLexer) CurLexer->resetExtendedTokenMode(); 843 } 844 845 // Finally, if we are out of the conditional (saw an #endif or ran off the end 846 // of the file, just stop skipping and return to lexing whatever came after 847 // the #if block. 848 CurPPLexer->LexingRawMode = false; 849 850 // The last skipped range isn't actually skipped yet if it's truncated 851 // by the end of the preamble; we'll resume parsing after the preamble. 852 if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble())) 853 Callbacks->SourceRangeSkipped( 854 SourceRange(HashTokenLoc, endLoc.isValid() 855 ? endLoc 856 : CurPPLexer->getSourceLocation()), 857 Tok.getLocation()); 858 } 859 860 Module *Preprocessor::getModuleForLocation(SourceLocation Loc, 861 bool AllowTextual) { 862 if (!SourceMgr.isInMainFile(Loc)) { 863 // Try to determine the module of the include directive. 864 // FIXME: Look into directly passing the FileEntry from LookupFile instead. 865 FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc)); 866 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) { 867 // The include comes from an included file. 868 return HeaderInfo.getModuleMap() 869 .findModuleForHeader(*EntryOfIncl, AllowTextual) 870 .getModule(); 871 } 872 } 873 874 // This is either in the main file or not in a file at all. It belongs 875 // to the current module, if there is one. 876 return getLangOpts().CurrentModule.empty() 877 ? nullptr 878 : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc); 879 } 880 881 OptionalFileEntryRef 882 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 883 SourceLocation Loc) { 884 Module *IncM = getModuleForLocation( 885 IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes); 886 887 // Walk up through the include stack, looking through textual headers of M 888 // until we hit a non-textual header that we can #include. (We assume textual 889 // headers of a module with non-textual headers aren't meant to be used to 890 // import entities from the module.) 891 auto &SM = getSourceManager(); 892 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) { 893 auto ID = SM.getFileID(SM.getExpansionLoc(Loc)); 894 auto FE = SM.getFileEntryRefForID(ID); 895 if (!FE) 896 break; 897 898 // We want to find all possible modules that might contain this header, so 899 // search all enclosing directories for module maps and load them. 900 HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr, 901 SourceMgr.isInSystemHeader(Loc)); 902 903 bool InPrivateHeader = false; 904 for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) { 905 if (!Header.isAccessibleFrom(IncM)) { 906 // It's in a private header; we can't #include it. 907 // FIXME: If there's a public header in some module that re-exports it, 908 // then we could suggest including that, but it's not clear that's the 909 // expected way to make this entity visible. 910 InPrivateHeader = true; 911 continue; 912 } 913 914 // Don't suggest explicitly excluded headers. 915 if (Header.getRole() == ModuleMap::ExcludedHeader) 916 continue; 917 918 // We'll suggest including textual headers below if they're 919 // include-guarded. 920 if (Header.getRole() & ModuleMap::TextualHeader) 921 continue; 922 923 // If we have a module import syntax, we shouldn't include a header to 924 // make a particular module visible. Let the caller know they should 925 // suggest an import instead. 926 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules) 927 return std::nullopt; 928 929 // If this is an accessible, non-textual header of M's top-level module 930 // that transitively includes the given location and makes the 931 // corresponding module visible, this is the thing to #include. 932 return *FE; 933 } 934 935 // FIXME: If we're bailing out due to a private header, we shouldn't suggest 936 // an import either. 937 if (InPrivateHeader) 938 return std::nullopt; 939 940 // If the header is includable and has an include guard, assume the 941 // intended way to expose its contents is by #include, not by importing a 942 // module that transitively includes it. 943 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE)) 944 return *FE; 945 946 Loc = SM.getIncludeLoc(ID); 947 } 948 949 return std::nullopt; 950 } 951 952 OptionalFileEntryRef Preprocessor::LookupFile( 953 SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 954 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 955 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath, 956 SmallVectorImpl<char> *RelativePath, 957 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 958 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) { 959 ConstSearchDirIterator CurDirLocal = nullptr; 960 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal; 961 962 Module *RequestingModule = getModuleForLocation( 963 FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes); 964 965 // If the header lookup mechanism may be relative to the current inclusion 966 // stack, record the parent #includes. 967 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers; 968 bool BuildSystemModule = false; 969 if (!FromDir && !FromFile) { 970 FileID FID = getCurrentFileLexer()->getFileID(); 971 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID); 972 973 // If there is no file entry associated with this file, it must be the 974 // predefines buffer or the module includes buffer. Any other file is not 975 // lexed with a normal lexer, so it won't be scanned for preprocessor 976 // directives. 977 // 978 // If we have the predefines buffer, resolve #include references (which come 979 // from the -include command line argument) from the current working 980 // directory instead of relative to the main file. 981 // 982 // If we have the module includes buffer, resolve #include references (which 983 // come from header declarations in the module map) relative to the module 984 // map file. 985 if (!FileEnt) { 986 if (FID == SourceMgr.getMainFileID() && MainFileDir) { 987 auto IncludeDir = 988 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir( 989 Filename, getCurrentModule()) 990 ? HeaderInfo.getModuleMap().getBuiltinDir() 991 : MainFileDir; 992 Includers.push_back(std::make_pair(std::nullopt, *IncludeDir)); 993 BuildSystemModule = getCurrentModule()->IsSystem; 994 } else if ((FileEnt = SourceMgr.getFileEntryRefForID( 995 SourceMgr.getMainFileID()))) { 996 auto CWD = FileMgr.getOptionalDirectoryRef("."); 997 Includers.push_back(std::make_pair(*FileEnt, *CWD)); 998 } 999 } else { 1000 Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir())); 1001 } 1002 1003 // MSVC searches the current include stack from top to bottom for 1004 // headers included by quoted include directives. 1005 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx 1006 if (LangOpts.MSVCCompat && !isAngled) { 1007 for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { 1008 if (IsFileLexer(ISEntry)) 1009 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry())) 1010 Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir())); 1011 } 1012 } 1013 } 1014 1015 CurDir = CurDirLookup; 1016 1017 if (FromFile) { 1018 // We're supposed to start looking from after a particular file. Search 1019 // the include path until we find that file or run out of files. 1020 ConstSearchDirIterator TmpCurDir = CurDir; 1021 ConstSearchDirIterator TmpFromDir = nullptr; 1022 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile( 1023 Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir, 1024 Includers, SearchPath, RelativePath, RequestingModule, 1025 SuggestedModule, /*IsMapped=*/nullptr, 1026 /*IsFrameworkFound=*/nullptr, SkipCache)) { 1027 // Keep looking as if this file did a #include_next. 1028 TmpFromDir = TmpCurDir; 1029 ++TmpFromDir; 1030 if (&FE->getFileEntry() == FromFile) { 1031 // Found it. 1032 FromDir = TmpFromDir; 1033 CurDir = TmpCurDir; 1034 break; 1035 } 1036 } 1037 } 1038 1039 // Do a standard file entry lookup. 1040 OptionalFileEntryRef FE = HeaderInfo.LookupFile( 1041 Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath, 1042 RelativePath, RequestingModule, SuggestedModule, IsMapped, 1043 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures); 1044 if (FE) 1045 return FE; 1046 1047 OptionalFileEntryRef CurFileEnt; 1048 // Otherwise, see if this is a subframework header. If so, this is relative 1049 // to one of the headers on the #include stack. Walk the list of the current 1050 // headers on the #include stack and pass them to HeaderInfo. 1051 if (IsFileLexer()) { 1052 if ((CurFileEnt = CurPPLexer->getFileEntry())) { 1053 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader( 1054 Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule, 1055 SuggestedModule)) { 1056 return FE; 1057 } 1058 } 1059 } 1060 1061 for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { 1062 if (IsFileLexer(ISEntry)) { 1063 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) { 1064 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader( 1065 Filename, *CurFileEnt, SearchPath, RelativePath, 1066 RequestingModule, SuggestedModule)) { 1067 return FE; 1068 } 1069 } 1070 } 1071 } 1072 1073 // Otherwise, we really couldn't find the file. 1074 return std::nullopt; 1075 } 1076 1077 OptionalFileEntryRef 1078 Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, 1079 const FileEntry *LookupFromFile) { 1080 FileManager &FM = this->getFileManager(); 1081 if (llvm::sys::path::is_absolute(Filename)) { 1082 // lookup path or immediately fail 1083 llvm::Expected<FileEntryRef> ShouldBeEntry = 1084 FM.getFileRef(Filename, OpenFile); 1085 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1086 } 1087 1088 auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath, 1089 StringRef StartingFrom, StringRef FileName, 1090 bool RemoveInitialFileComponentFromLookupPath) { 1091 llvm::sys::path::native(StartingFrom, LookupPath); 1092 if (RemoveInitialFileComponentFromLookupPath) 1093 llvm::sys::path::remove_filename(LookupPath); 1094 if (!LookupPath.empty() && 1095 !llvm::sys::path::is_separator(LookupPath.back())) { 1096 LookupPath.push_back(llvm::sys::path::get_separator().front()); 1097 } 1098 LookupPath.append(FileName.begin(), FileName.end()); 1099 }; 1100 1101 // Otherwise, it's search time! 1102 SmallString<512> LookupPath; 1103 // Non-angled lookup 1104 if (!isAngled) { 1105 if (LookupFromFile) { 1106 // Use file-based lookup. 1107 StringRef FullFileDir = LookupFromFile->tryGetRealPathName(); 1108 if (!FullFileDir.empty()) { 1109 SeparateComponents(LookupPath, FullFileDir, Filename, true); 1110 llvm::Expected<FileEntryRef> ShouldBeEntry = 1111 FM.getFileRef(LookupPath, OpenFile); 1112 if (ShouldBeEntry) 1113 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1114 llvm::consumeError(ShouldBeEntry.takeError()); 1115 } 1116 } 1117 1118 // Otherwise, do working directory lookup. 1119 LookupPath.clear(); 1120 auto MaybeWorkingDirEntry = FM.getDirectoryRef("."); 1121 if (MaybeWorkingDirEntry) { 1122 DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry; 1123 StringRef WorkingDir = WorkingDirEntry.getName(); 1124 if (!WorkingDir.empty()) { 1125 SeparateComponents(LookupPath, WorkingDir, Filename, false); 1126 llvm::Expected<FileEntryRef> ShouldBeEntry = 1127 FM.getFileRef(LookupPath, OpenFile); 1128 if (ShouldBeEntry) 1129 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1130 llvm::consumeError(ShouldBeEntry.takeError()); 1131 } 1132 } 1133 } 1134 1135 for (const auto &Entry : PPOpts->EmbedEntries) { 1136 LookupPath.clear(); 1137 SeparateComponents(LookupPath, Entry, Filename, false); 1138 llvm::Expected<FileEntryRef> ShouldBeEntry = 1139 FM.getFileRef(LookupPath, OpenFile); 1140 if (ShouldBeEntry) 1141 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1142 llvm::consumeError(ShouldBeEntry.takeError()); 1143 } 1144 return std::nullopt; 1145 } 1146 1147 //===----------------------------------------------------------------------===// 1148 // Preprocessor Directive Handling. 1149 //===----------------------------------------------------------------------===// 1150 1151 class Preprocessor::ResetMacroExpansionHelper { 1152 public: 1153 ResetMacroExpansionHelper(Preprocessor *pp) 1154 : PP(pp), save(pp->DisableMacroExpansion) { 1155 if (pp->MacroExpansionInDirectivesOverride) 1156 pp->DisableMacroExpansion = false; 1157 } 1158 1159 ~ResetMacroExpansionHelper() { 1160 PP->DisableMacroExpansion = save; 1161 } 1162 1163 private: 1164 Preprocessor *PP; 1165 bool save; 1166 }; 1167 1168 /// Process a directive while looking for the through header or a #pragma 1169 /// hdrstop. The following directives are handled: 1170 /// #include (to check if it is the through header) 1171 /// #define (to warn about macros that don't match the PCH) 1172 /// #pragma (to check for pragma hdrstop). 1173 /// All other directives are completely discarded. 1174 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1175 SourceLocation HashLoc) { 1176 if (const IdentifierInfo *II = Result.getIdentifierInfo()) { 1177 if (II->getPPKeywordID() == tok::pp_define) { 1178 return HandleDefineDirective(Result, 1179 /*ImmediatelyAfterHeaderGuard=*/false); 1180 } 1181 if (SkippingUntilPCHThroughHeader && 1182 II->getPPKeywordID() == tok::pp_include) { 1183 return HandleIncludeDirective(HashLoc, Result); 1184 } 1185 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) { 1186 Lex(Result); 1187 auto *II = Result.getIdentifierInfo(); 1188 if (II && II->getName() == "hdrstop") 1189 return HandlePragmaHdrstop(Result); 1190 } 1191 } 1192 DiscardUntilEndOfDirective(); 1193 } 1194 1195 /// HandleDirective - This callback is invoked when the lexer sees a # token 1196 /// at the start of a line. This consumes the directive, modifies the 1197 /// lexer/preprocessor state, and advances the lexer(s) so that the next token 1198 /// read is the correct one. 1199 void Preprocessor::HandleDirective(Token &Result) { 1200 // FIXME: Traditional: # with whitespace before it not recognized by K&R? 1201 1202 // We just parsed a # character at the start of a line, so we're in directive 1203 // mode. Tell the lexer this so any newlines we see will be converted into an 1204 // EOD token (which terminates the directive). 1205 CurPPLexer->ParsingPreprocessorDirective = true; 1206 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); 1207 1208 bool ImmediatelyAfterTopLevelIfndef = 1209 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef(); 1210 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef(); 1211 1212 ++NumDirectives; 1213 1214 // We are about to read a token. For the multiple-include optimization FA to 1215 // work, we have to remember if we had read any tokens *before* this 1216 // pp-directive. 1217 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); 1218 1219 // Save the '#' token in case we need to return it later. 1220 Token SavedHash = Result; 1221 1222 // Read the next token, the directive flavor. This isn't expanded due to 1223 // C99 6.10.3p8. 1224 LexUnexpandedToken(Result); 1225 1226 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: 1227 // #define A(x) #x 1228 // A(abc 1229 // #warning blah 1230 // def) 1231 // If so, the user is relying on undefined behavior, emit a diagnostic. Do 1232 // not support this for #include-like directives, since that can result in 1233 // terrible diagnostics, and does not work in GCC. 1234 if (InMacroArgs) { 1235 if (IdentifierInfo *II = Result.getIdentifierInfo()) { 1236 switch (II->getPPKeywordID()) { 1237 case tok::pp_include: 1238 case tok::pp_import: 1239 case tok::pp_include_next: 1240 case tok::pp___include_macros: 1241 case tok::pp_pragma: 1242 case tok::pp_embed: 1243 Diag(Result, diag::err_embedded_directive) << II->getName(); 1244 Diag(*ArgMacro, diag::note_macro_expansion_here) 1245 << ArgMacro->getIdentifierInfo(); 1246 DiscardUntilEndOfDirective(); 1247 return; 1248 default: 1249 break; 1250 } 1251 } 1252 Diag(Result, diag::ext_embedded_directive); 1253 } 1254 1255 // Temporarily enable macro expansion if set so 1256 // and reset to previous state when returning from this function. 1257 ResetMacroExpansionHelper helper(this); 1258 1259 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop) 1260 return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation()); 1261 1262 switch (Result.getKind()) { 1263 case tok::eod: 1264 // Ignore the null directive with regards to the multiple-include 1265 // optimization, i.e. allow the null directive to appear outside of the 1266 // include guard and still enable the multiple-include optimization. 1267 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective); 1268 return; // null directive. 1269 case tok::code_completion: 1270 setCodeCompletionReached(); 1271 if (CodeComplete) 1272 CodeComplete->CodeCompleteDirective( 1273 CurPPLexer->getConditionalStackDepth() > 0); 1274 return; 1275 case tok::numeric_constant: // # 7 GNU line marker directive. 1276 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor 1277 // directive. However do permit it in the predefines file, as we use line 1278 // markers to mark the builtin macros as being in a system header. 1279 if (getLangOpts().AsmPreprocessor && 1280 SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID()) 1281 break; 1282 return HandleDigitDirective(Result); 1283 default: 1284 IdentifierInfo *II = Result.getIdentifierInfo(); 1285 if (!II) break; // Not an identifier. 1286 1287 // Ask what the preprocessor keyword ID is. 1288 switch (II->getPPKeywordID()) { 1289 default: break; 1290 // C99 6.10.1 - Conditional Inclusion. 1291 case tok::pp_if: 1292 return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective); 1293 case tok::pp_ifdef: 1294 return HandleIfdefDirective(Result, SavedHash, false, 1295 true /*not valid for miopt*/); 1296 case tok::pp_ifndef: 1297 return HandleIfdefDirective(Result, SavedHash, true, 1298 ReadAnyTokensBeforeDirective); 1299 case tok::pp_elif: 1300 case tok::pp_elifdef: 1301 case tok::pp_elifndef: 1302 return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID()); 1303 1304 case tok::pp_else: 1305 return HandleElseDirective(Result, SavedHash); 1306 case tok::pp_endif: 1307 return HandleEndifDirective(Result); 1308 1309 // C99 6.10.2 - Source File Inclusion. 1310 case tok::pp_include: 1311 // Handle #include. 1312 return HandleIncludeDirective(SavedHash.getLocation(), Result); 1313 case tok::pp___include_macros: 1314 // Handle -imacros. 1315 return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); 1316 1317 // C99 6.10.3 - Macro Replacement. 1318 case tok::pp_define: 1319 return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef); 1320 case tok::pp_undef: 1321 return HandleUndefDirective(); 1322 1323 // C99 6.10.4 - Line Control. 1324 case tok::pp_line: 1325 return HandleLineDirective(); 1326 1327 // C99 6.10.5 - Error Directive. 1328 case tok::pp_error: 1329 return HandleUserDiagnosticDirective(Result, false); 1330 1331 // C99 6.10.6 - Pragma Directive. 1332 case tok::pp_pragma: 1333 return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); 1334 1335 // GNU Extensions. 1336 case tok::pp_import: 1337 return HandleImportDirective(SavedHash.getLocation(), Result); 1338 case tok::pp_include_next: 1339 return HandleIncludeNextDirective(SavedHash.getLocation(), Result); 1340 1341 case tok::pp_warning: 1342 if (LangOpts.CPlusPlus) 1343 Diag(Result, LangOpts.CPlusPlus23 1344 ? diag::warn_cxx23_compat_warning_directive 1345 : diag::ext_pp_warning_directive) 1346 << /*C++23*/ 1; 1347 else 1348 Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive 1349 : diag::ext_pp_warning_directive) 1350 << /*C23*/ 0; 1351 1352 return HandleUserDiagnosticDirective(Result, true); 1353 case tok::pp_ident: 1354 return HandleIdentSCCSDirective(Result); 1355 case tok::pp_sccs: 1356 return HandleIdentSCCSDirective(Result); 1357 case tok::pp_embed: 1358 return HandleEmbedDirective(SavedHash.getLocation(), Result, 1359 getCurrentFileLexer() 1360 ? *getCurrentFileLexer()->getFileEntry() 1361 : static_cast<FileEntry *>(nullptr)); 1362 case tok::pp_assert: 1363 //isExtension = true; // FIXME: implement #assert 1364 break; 1365 case tok::pp_unassert: 1366 //isExtension = true; // FIXME: implement #unassert 1367 break; 1368 1369 case tok::pp___public_macro: 1370 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility) 1371 return HandleMacroPublicDirective(Result); 1372 break; 1373 1374 case tok::pp___private_macro: 1375 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility) 1376 return HandleMacroPrivateDirective(); 1377 break; 1378 } 1379 break; 1380 } 1381 1382 // If this is a .S file, treat unknown # directives as non-preprocessor 1383 // directives. This is important because # may be a comment or introduce 1384 // various pseudo-ops. Just return the # token and push back the following 1385 // token to be lexed next time. 1386 if (getLangOpts().AsmPreprocessor) { 1387 auto Toks = std::make_unique<Token[]>(2); 1388 // Return the # and the token after it. 1389 Toks[0] = SavedHash; 1390 Toks[1] = Result; 1391 1392 // If the second token is a hashhash token, then we need to translate it to 1393 // unknown so the token lexer doesn't try to perform token pasting. 1394 if (Result.is(tok::hashhash)) 1395 Toks[1].setKind(tok::unknown); 1396 1397 // Enter this token stream so that we re-lex the tokens. Make sure to 1398 // enable macro expansion, in case the token after the # is an identifier 1399 // that is expanded. 1400 EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false); 1401 return; 1402 } 1403 1404 // If we reached here, the preprocessing token is not valid! 1405 // Start suggesting if a similar directive found. 1406 Diag(Result, diag::err_pp_invalid_directive) << 0; 1407 1408 // Read the rest of the PP line. 1409 DiscardUntilEndOfDirective(); 1410 1411 // Okay, we're done parsing the directive. 1412 } 1413 1414 /// GetLineValue - Convert a numeric token into an unsigned value, emitting 1415 /// Diagnostic DiagID if it is invalid, and returning the value in Val. 1416 static bool GetLineValue(Token &DigitTok, unsigned &Val, 1417 unsigned DiagID, Preprocessor &PP, 1418 bool IsGNULineDirective=false) { 1419 if (DigitTok.isNot(tok::numeric_constant)) { 1420 PP.Diag(DigitTok, DiagID); 1421 1422 if (DigitTok.isNot(tok::eod)) 1423 PP.DiscardUntilEndOfDirective(); 1424 return true; 1425 } 1426 1427 SmallString<64> IntegerBuffer; 1428 IntegerBuffer.resize(DigitTok.getLength()); 1429 const char *DigitTokBegin = &IntegerBuffer[0]; 1430 bool Invalid = false; 1431 unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid); 1432 if (Invalid) 1433 return true; 1434 1435 // Verify that we have a simple digit-sequence, and compute the value. This 1436 // is always a simple digit string computed in decimal, so we do this manually 1437 // here. 1438 Val = 0; 1439 for (unsigned i = 0; i != ActualLength; ++i) { 1440 // C++1y [lex.fcon]p1: 1441 // Optional separating single quotes in a digit-sequence are ignored 1442 if (DigitTokBegin[i] == '\'') 1443 continue; 1444 1445 if (!isDigit(DigitTokBegin[i])) { 1446 PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i), 1447 diag::err_pp_line_digit_sequence) << IsGNULineDirective; 1448 PP.DiscardUntilEndOfDirective(); 1449 return true; 1450 } 1451 1452 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0'); 1453 if (NextVal < Val) { // overflow. 1454 PP.Diag(DigitTok, DiagID); 1455 PP.DiscardUntilEndOfDirective(); 1456 return true; 1457 } 1458 Val = NextVal; 1459 } 1460 1461 if (DigitTokBegin[0] == '0' && Val) 1462 PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal) 1463 << IsGNULineDirective; 1464 1465 return false; 1466 } 1467 1468 /// Handle a \#line directive: C99 6.10.4. 1469 /// 1470 /// The two acceptable forms are: 1471 /// \verbatim 1472 /// # line digit-sequence 1473 /// # line digit-sequence "s-char-sequence" 1474 /// \endverbatim 1475 void Preprocessor::HandleLineDirective() { 1476 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are 1477 // expanded. 1478 Token DigitTok; 1479 Lex(DigitTok); 1480 1481 // Validate the number and convert it to an unsigned. 1482 unsigned LineNo; 1483 if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this)) 1484 return; 1485 1486 if (LineNo == 0) 1487 Diag(DigitTok, diag::ext_pp_line_zero); 1488 1489 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a 1490 // number greater than 2147483647". C90 requires that the line # be <= 32767. 1491 unsigned LineLimit = 32768U; 1492 if (LangOpts.C99 || LangOpts.CPlusPlus11) 1493 LineLimit = 2147483648U; 1494 if (LineNo >= LineLimit) 1495 Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; 1496 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U) 1497 Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big); 1498 1499 int FilenameID = -1; 1500 Token StrTok; 1501 Lex(StrTok); 1502 1503 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a 1504 // string followed by eod. 1505 if (StrTok.is(tok::eod)) 1506 ; // ok 1507 else if (StrTok.isNot(tok::string_literal)) { 1508 Diag(StrTok, diag::err_pp_line_invalid_filename); 1509 DiscardUntilEndOfDirective(); 1510 return; 1511 } else if (StrTok.hasUDSuffix()) { 1512 Diag(StrTok, diag::err_invalid_string_udl); 1513 DiscardUntilEndOfDirective(); 1514 return; 1515 } else { 1516 // Parse and validate the string, converting it into a unique ID. 1517 StringLiteralParser Literal(StrTok, *this); 1518 assert(Literal.isOrdinary() && "Didn't allow wide strings in"); 1519 if (Literal.hadError) { 1520 DiscardUntilEndOfDirective(); 1521 return; 1522 } 1523 if (Literal.Pascal) { 1524 Diag(StrTok, diag::err_pp_linemarker_invalid_filename); 1525 DiscardUntilEndOfDirective(); 1526 return; 1527 } 1528 FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); 1529 1530 // Verify that there is nothing after the string, other than EOD. Because 1531 // of C99 6.10.4p5, macros that expand to empty tokens are ok. 1532 CheckEndOfDirective("line", true); 1533 } 1534 1535 // Take the file kind of the file containing the #line directive. #line 1536 // directives are often used for generated sources from the same codebase, so 1537 // the new file should generally be classified the same way as the current 1538 // file. This is visible in GCC's pre-processed output, which rewrites #line 1539 // to GNU line markers. 1540 SrcMgr::CharacteristicKind FileKind = 1541 SourceMgr.getFileCharacteristic(DigitTok.getLocation()); 1542 1543 SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false, 1544 false, FileKind); 1545 1546 if (Callbacks) 1547 Callbacks->FileChanged(CurPPLexer->getSourceLocation(), 1548 PPCallbacks::RenameFile, FileKind); 1549 } 1550 1551 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line 1552 /// marker directive. 1553 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, 1554 SrcMgr::CharacteristicKind &FileKind, 1555 Preprocessor &PP) { 1556 unsigned FlagVal; 1557 Token FlagTok; 1558 PP.Lex(FlagTok); 1559 if (FlagTok.is(tok::eod)) return false; 1560 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) 1561 return true; 1562 1563 if (FlagVal == 1) { 1564 IsFileEntry = true; 1565 1566 PP.Lex(FlagTok); 1567 if (FlagTok.is(tok::eod)) return false; 1568 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) 1569 return true; 1570 } else if (FlagVal == 2) { 1571 IsFileExit = true; 1572 1573 SourceManager &SM = PP.getSourceManager(); 1574 // If we are leaving the current presumed file, check to make sure the 1575 // presumed include stack isn't empty! 1576 FileID CurFileID = 1577 SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first; 1578 PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation()); 1579 if (PLoc.isInvalid()) 1580 return true; 1581 1582 // If there is no include loc (main file) or if the include loc is in a 1583 // different physical file, then we aren't in a "1" line marker flag region. 1584 SourceLocation IncLoc = PLoc.getIncludeLoc(); 1585 if (IncLoc.isInvalid() || 1586 SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) { 1587 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop); 1588 PP.DiscardUntilEndOfDirective(); 1589 return true; 1590 } 1591 1592 PP.Lex(FlagTok); 1593 if (FlagTok.is(tok::eod)) return false; 1594 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) 1595 return true; 1596 } 1597 1598 // We must have 3 if there are still flags. 1599 if (FlagVal != 3) { 1600 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); 1601 PP.DiscardUntilEndOfDirective(); 1602 return true; 1603 } 1604 1605 FileKind = SrcMgr::C_System; 1606 1607 PP.Lex(FlagTok); 1608 if (FlagTok.is(tok::eod)) return false; 1609 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) 1610 return true; 1611 1612 // We must have 4 if there is yet another flag. 1613 if (FlagVal != 4) { 1614 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); 1615 PP.DiscardUntilEndOfDirective(); 1616 return true; 1617 } 1618 1619 FileKind = SrcMgr::C_ExternCSystem; 1620 1621 PP.Lex(FlagTok); 1622 if (FlagTok.is(tok::eod)) return false; 1623 1624 // There are no more valid flags here. 1625 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); 1626 PP.DiscardUntilEndOfDirective(); 1627 return true; 1628 } 1629 1630 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is 1631 /// one of the following forms: 1632 /// 1633 /// # 42 1634 /// # 42 "file" ('1' | '2')? 1635 /// # 42 "file" ('1' | '2')? '3' '4'? 1636 /// 1637 void Preprocessor::HandleDigitDirective(Token &DigitTok) { 1638 // Validate the number and convert it to an unsigned. GNU does not have a 1639 // line # limit other than it fit in 32-bits. 1640 unsigned LineNo; 1641 if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer, 1642 *this, true)) 1643 return; 1644 1645 Token StrTok; 1646 Lex(StrTok); 1647 1648 bool IsFileEntry = false, IsFileExit = false; 1649 int FilenameID = -1; 1650 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; 1651 1652 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a 1653 // string followed by eod. 1654 if (StrTok.is(tok::eod)) { 1655 Diag(StrTok, diag::ext_pp_gnu_line_directive); 1656 // Treat this like "#line NN", which doesn't change file characteristics. 1657 FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation()); 1658 } else if (StrTok.isNot(tok::string_literal)) { 1659 Diag(StrTok, diag::err_pp_linemarker_invalid_filename); 1660 DiscardUntilEndOfDirective(); 1661 return; 1662 } else if (StrTok.hasUDSuffix()) { 1663 Diag(StrTok, diag::err_invalid_string_udl); 1664 DiscardUntilEndOfDirective(); 1665 return; 1666 } else { 1667 // Parse and validate the string, converting it into a unique ID. 1668 StringLiteralParser Literal(StrTok, *this); 1669 assert(Literal.isOrdinary() && "Didn't allow wide strings in"); 1670 if (Literal.hadError) { 1671 DiscardUntilEndOfDirective(); 1672 return; 1673 } 1674 if (Literal.Pascal) { 1675 Diag(StrTok, diag::err_pp_linemarker_invalid_filename); 1676 DiscardUntilEndOfDirective(); 1677 return; 1678 } 1679 1680 // If a filename was present, read any flags that are present. 1681 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this)) 1682 return; 1683 if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) && 1684 !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation())) 1685 Diag(StrTok, diag::ext_pp_gnu_line_directive); 1686 1687 // Exiting to an empty string means pop to the including file, so leave 1688 // FilenameID as -1 in that case. 1689 if (!(IsFileExit && Literal.GetString().empty())) 1690 FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); 1691 } 1692 1693 // Create a line note with this information. 1694 SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry, 1695 IsFileExit, FileKind); 1696 1697 // If the preprocessor has callbacks installed, notify them of the #line 1698 // change. This is used so that the line marker comes out in -E mode for 1699 // example. 1700 if (Callbacks) { 1701 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile; 1702 if (IsFileEntry) 1703 Reason = PPCallbacks::EnterFile; 1704 else if (IsFileExit) 1705 Reason = PPCallbacks::ExitFile; 1706 1707 Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind); 1708 } 1709 } 1710 1711 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive. 1712 /// 1713 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, 1714 bool isWarning) { 1715 // Read the rest of the line raw. We do this because we don't want macros 1716 // to be expanded and we don't require that the tokens be valid preprocessing 1717 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does 1718 // collapse multiple consecutive white space between tokens, but this isn't 1719 // specified by the standard. 1720 SmallString<128> Message; 1721 CurLexer->ReadToEndOfLine(&Message); 1722 1723 // Find the first non-whitespace character, so that we can make the 1724 // diagnostic more succinct. 1725 StringRef Msg = Message.str().ltrim(' '); 1726 1727 if (isWarning) 1728 Diag(Tok, diag::pp_hash_warning) << Msg; 1729 else 1730 Diag(Tok, diag::err_pp_hash_error) << Msg; 1731 } 1732 1733 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive. 1734 /// 1735 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { 1736 // Yes, this directive is an extension. 1737 Diag(Tok, diag::ext_pp_ident_directive); 1738 1739 // Read the string argument. 1740 Token StrTok; 1741 Lex(StrTok); 1742 1743 // If the token kind isn't a string, it's a malformed directive. 1744 if (StrTok.isNot(tok::string_literal) && 1745 StrTok.isNot(tok::wide_string_literal)) { 1746 Diag(StrTok, diag::err_pp_malformed_ident); 1747 if (StrTok.isNot(tok::eod)) 1748 DiscardUntilEndOfDirective(); 1749 return; 1750 } 1751 1752 if (StrTok.hasUDSuffix()) { 1753 Diag(StrTok, diag::err_invalid_string_udl); 1754 DiscardUntilEndOfDirective(); 1755 return; 1756 } 1757 1758 // Verify that there is nothing after the string, other than EOD. 1759 CheckEndOfDirective("ident"); 1760 1761 if (Callbacks) { 1762 bool Invalid = false; 1763 std::string Str = getSpelling(StrTok, &Invalid); 1764 if (!Invalid) 1765 Callbacks->Ident(Tok.getLocation(), Str); 1766 } 1767 } 1768 1769 /// Handle a #public directive. 1770 void Preprocessor::HandleMacroPublicDirective(Token &Tok) { 1771 Token MacroNameTok; 1772 ReadMacroName(MacroNameTok, MU_Undef); 1773 1774 // Error reading macro name? If so, diagnostic already issued. 1775 if (MacroNameTok.is(tok::eod)) 1776 return; 1777 1778 // Check to see if this is the last token on the #__public_macro line. 1779 CheckEndOfDirective("__public_macro"); 1780 1781 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 1782 // Okay, we finally have a valid identifier to undef. 1783 MacroDirective *MD = getLocalMacroDirective(II); 1784 1785 // If the macro is not defined, this is an error. 1786 if (!MD) { 1787 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; 1788 return; 1789 } 1790 1791 // Note that this macro has now been exported. 1792 appendMacroDirective(II, AllocateVisibilityMacroDirective( 1793 MacroNameTok.getLocation(), /*isPublic=*/true)); 1794 } 1795 1796 /// Handle a #private directive. 1797 void Preprocessor::HandleMacroPrivateDirective() { 1798 Token MacroNameTok; 1799 ReadMacroName(MacroNameTok, MU_Undef); 1800 1801 // Error reading macro name? If so, diagnostic already issued. 1802 if (MacroNameTok.is(tok::eod)) 1803 return; 1804 1805 // Check to see if this is the last token on the #__private_macro line. 1806 CheckEndOfDirective("__private_macro"); 1807 1808 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 1809 // Okay, we finally have a valid identifier to undef. 1810 MacroDirective *MD = getLocalMacroDirective(II); 1811 1812 // If the macro is not defined, this is an error. 1813 if (!MD) { 1814 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; 1815 return; 1816 } 1817 1818 // Note that this macro has now been marked private. 1819 appendMacroDirective(II, AllocateVisibilityMacroDirective( 1820 MacroNameTok.getLocation(), /*isPublic=*/false)); 1821 } 1822 1823 //===----------------------------------------------------------------------===// 1824 // Preprocessor Include Directive Handling. 1825 //===----------------------------------------------------------------------===// 1826 1827 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully 1828 /// checked and spelled filename, e.g. as an operand of \#include. This returns 1829 /// true if the input filename was in <>'s or false if it were in ""'s. The 1830 /// caller is expected to provide a buffer that is large enough to hold the 1831 /// spelling of the filename, but is also expected to handle the case when 1832 /// this method decides to use a different buffer. 1833 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, 1834 StringRef &Buffer) { 1835 // Get the text form of the filename. 1836 assert(!Buffer.empty() && "Can't have tokens with empty spellings!"); 1837 1838 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and 1839 // C++20 [lex.header]/2: 1840 // 1841 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then 1842 // in C: behavior is undefined 1843 // in C++: program is conditionally-supported with implementation-defined 1844 // semantics 1845 1846 // Make sure the filename is <x> or "x". 1847 bool isAngled; 1848 if (Buffer[0] == '<') { 1849 if (Buffer.back() != '>') { 1850 Diag(Loc, diag::err_pp_expects_filename); 1851 Buffer = StringRef(); 1852 return true; 1853 } 1854 isAngled = true; 1855 } else if (Buffer[0] == '"') { 1856 if (Buffer.back() != '"') { 1857 Diag(Loc, diag::err_pp_expects_filename); 1858 Buffer = StringRef(); 1859 return true; 1860 } 1861 isAngled = false; 1862 } else { 1863 Diag(Loc, diag::err_pp_expects_filename); 1864 Buffer = StringRef(); 1865 return true; 1866 } 1867 1868 // Diagnose #include "" as invalid. 1869 if (Buffer.size() <= 2) { 1870 Diag(Loc, diag::err_pp_empty_filename); 1871 Buffer = StringRef(); 1872 return true; 1873 } 1874 1875 // Skip the brackets. 1876 Buffer = Buffer.substr(1, Buffer.size()-2); 1877 return isAngled; 1878 } 1879 1880 /// Push a token onto the token stream containing an annotation. 1881 void Preprocessor::EnterAnnotationToken(SourceRange Range, 1882 tok::TokenKind Kind, 1883 void *AnnotationVal) { 1884 // FIXME: Produce this as the current token directly, rather than 1885 // allocating a new token for it. 1886 auto Tok = std::make_unique<Token[]>(1); 1887 Tok[0].startToken(); 1888 Tok[0].setKind(Kind); 1889 Tok[0].setLocation(Range.getBegin()); 1890 Tok[0].setAnnotationEndLoc(Range.getEnd()); 1891 Tok[0].setAnnotationValue(AnnotationVal); 1892 EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false); 1893 } 1894 1895 /// Produce a diagnostic informing the user that a #include or similar 1896 /// was implicitly treated as a module import. 1897 static void diagnoseAutoModuleImport( 1898 Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok, 1899 ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path, 1900 SourceLocation PathEnd) { 1901 SmallString<128> PathString; 1902 for (size_t I = 0, N = Path.size(); I != N; ++I) { 1903 if (I) 1904 PathString += '.'; 1905 PathString += Path[I].first->getName(); 1906 } 1907 1908 int IncludeKind = 0; 1909 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { 1910 case tok::pp_include: 1911 IncludeKind = 0; 1912 break; 1913 1914 case tok::pp_import: 1915 IncludeKind = 1; 1916 break; 1917 1918 case tok::pp_include_next: 1919 IncludeKind = 2; 1920 break; 1921 1922 case tok::pp___include_macros: 1923 IncludeKind = 3; 1924 break; 1925 1926 default: 1927 llvm_unreachable("unknown include directive kind"); 1928 } 1929 1930 PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation) 1931 << IncludeKind << PathString; 1932 } 1933 1934 // Given a vector of path components and a string containing the real 1935 // path to the file, build a properly-cased replacement in the vector, 1936 // and return true if the replacement should be suggested. 1937 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components, 1938 StringRef RealPathName, 1939 llvm::sys::path::Style Separator) { 1940 auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName); 1941 auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName); 1942 int Cnt = 0; 1943 bool SuggestReplacement = false; 1944 1945 auto IsSep = [Separator](StringRef Component) { 1946 return Component.size() == 1 && 1947 llvm::sys::path::is_separator(Component[0], Separator); 1948 }; 1949 1950 // Below is a best-effort to handle ".." in paths. It is admittedly 1951 // not 100% correct in the presence of symlinks. 1952 for (auto &Component : llvm::reverse(Components)) { 1953 if ("." == Component) { 1954 } else if (".." == Component) { 1955 ++Cnt; 1956 } else if (Cnt) { 1957 --Cnt; 1958 } else if (RealPathComponentIter != RealPathComponentEnd) { 1959 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) && 1960 Component != *RealPathComponentIter) { 1961 // If these non-separator path components differ by more than just case, 1962 // then we may be looking at symlinked paths. Bail on this diagnostic to 1963 // avoid noisy false positives. 1964 SuggestReplacement = 1965 RealPathComponentIter->equals_insensitive(Component); 1966 if (!SuggestReplacement) 1967 break; 1968 Component = *RealPathComponentIter; 1969 } 1970 ++RealPathComponentIter; 1971 } 1972 } 1973 return SuggestReplacement; 1974 } 1975 1976 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts, 1977 const TargetInfo &TargetInfo, 1978 const Module &M, 1979 DiagnosticsEngine &Diags) { 1980 Module::Requirement Requirement; 1981 Module::UnresolvedHeaderDirective MissingHeader; 1982 Module *ShadowingModule = nullptr; 1983 if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader, 1984 ShadowingModule)) 1985 return false; 1986 1987 if (MissingHeader.FileNameLoc.isValid()) { 1988 Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing) 1989 << MissingHeader.IsUmbrella << MissingHeader.FileName; 1990 } else if (ShadowingModule) { 1991 Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name; 1992 Diags.Report(ShadowingModule->DefinitionLoc, 1993 diag::note_previous_definition); 1994 } else { 1995 // FIXME: Track the location at which the requirement was specified, and 1996 // use it here. 1997 Diags.Report(M.DefinitionLoc, diag::err_module_unavailable) 1998 << M.getFullModuleName() << Requirement.RequiredState 1999 << Requirement.FeatureName; 2000 } 2001 return true; 2002 } 2003 2004 std::pair<ConstSearchDirIterator, const FileEntry *> 2005 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const { 2006 // #include_next is like #include, except that we start searching after 2007 // the current found directory. If we can't do this, issue a 2008 // diagnostic. 2009 ConstSearchDirIterator Lookup = CurDirLookup; 2010 const FileEntry *LookupFromFile = nullptr; 2011 2012 if (isInPrimaryFile() && LangOpts.IsHeaderFile) { 2013 // If the main file is a header, then it's either for PCH/AST generation, 2014 // or libclang opened it. Either way, handle it as a normal include below 2015 // and do not complain about include_next. 2016 } else if (isInPrimaryFile()) { 2017 Lookup = nullptr; 2018 Diag(IncludeNextTok, diag::pp_include_next_in_primary); 2019 } else if (CurLexerSubmodule) { 2020 // Start looking up in the directory *after* the one in which the current 2021 // file would be found, if any. 2022 assert(CurPPLexer && "#include_next directive in macro?"); 2023 if (auto FE = CurPPLexer->getFileEntry()) 2024 LookupFromFile = *FE; 2025 Lookup = nullptr; 2026 } else if (!Lookup) { 2027 // The current file was not found by walking the include path. Either it 2028 // is the primary file (handled above), or it was found by absolute path, 2029 // or it was found relative to such a file. 2030 // FIXME: Track enough information so we know which case we're in. 2031 Diag(IncludeNextTok, diag::pp_include_next_absolute_path); 2032 } else { 2033 // Start looking up in the next directory. 2034 ++Lookup; 2035 } 2036 2037 return {Lookup, LookupFromFile}; 2038 } 2039 2040 /// HandleIncludeDirective - The "\#include" tokens have just been read, read 2041 /// the file to be included from the lexer, then include it! This is a common 2042 /// routine with functionality shared between \#include, \#include_next and 2043 /// \#import. LookupFrom is set when this is a \#include_next directive, it 2044 /// specifies the file to start searching from. 2045 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, 2046 Token &IncludeTok, 2047 ConstSearchDirIterator LookupFrom, 2048 const FileEntry *LookupFromFile) { 2049 Token FilenameTok; 2050 if (LexHeaderName(FilenameTok)) 2051 return; 2052 2053 if (FilenameTok.isNot(tok::header_name)) { 2054 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); 2055 if (FilenameTok.isNot(tok::eod)) 2056 DiscardUntilEndOfDirective(); 2057 return; 2058 } 2059 2060 // Verify that there is nothing after the filename, other than EOD. Note 2061 // that we allow macros that expand to nothing after the filename, because 2062 // this falls into the category of "#include pp-tokens new-line" specified 2063 // in C99 6.10.2p4. 2064 SourceLocation EndLoc = 2065 CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true); 2066 2067 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok, 2068 EndLoc, LookupFrom, LookupFromFile); 2069 switch (Action.Kind) { 2070 case ImportAction::None: 2071 case ImportAction::SkippedModuleImport: 2072 break; 2073 case ImportAction::ModuleBegin: 2074 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), 2075 tok::annot_module_begin, Action.ModuleForHeader); 2076 break; 2077 case ImportAction::HeaderUnitImport: 2078 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit, 2079 Action.ModuleForHeader); 2080 break; 2081 case ImportAction::ModuleImport: 2082 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), 2083 tok::annot_module_include, Action.ModuleForHeader); 2084 break; 2085 case ImportAction::Failure: 2086 assert(TheModuleLoader.HadFatalFailure && 2087 "This should be an early exit only to a fatal error"); 2088 TheModuleLoader.HadFatalFailure = true; 2089 IncludeTok.setKind(tok::eof); 2090 CurLexer->cutOffLexing(); 2091 return; 2092 } 2093 } 2094 2095 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport( 2096 ConstSearchDirIterator *CurDir, StringRef &Filename, 2097 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2098 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2099 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2100 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2101 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2102 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) { 2103 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) { 2104 if (LangOpts.AsmPreprocessor) 2105 return; 2106 2107 Module *RequestingModule = getModuleForLocation( 2108 FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes); 2109 bool RequestingModuleIsModuleInterface = 2110 !SourceMgr.isInMainFile(FilenameLoc); 2111 2112 HeaderInfo.getModuleMap().diagnoseHeaderInclusion( 2113 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc, 2114 Filename, FE); 2115 }; 2116 2117 OptionalFileEntryRef File = LookupFile( 2118 FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir, 2119 Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, 2120 &SuggestedModule, &IsMapped, &IsFrameworkFound); 2121 if (File) { 2122 DiagnoseHeaderInclusion(*File); 2123 return File; 2124 } 2125 2126 // Give the clients a chance to silently skip this include. 2127 if (Callbacks && Callbacks->FileNotFound(Filename)) 2128 return std::nullopt; 2129 2130 if (SuppressIncludeNotFoundError) 2131 return std::nullopt; 2132 2133 // If the file could not be located and it was included via angle 2134 // brackets, we can attempt a lookup as though it were a quoted path to 2135 // provide the user with a possible fixit. 2136 if (isAngled) { 2137 OptionalFileEntryRef File = LookupFile( 2138 FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir, 2139 Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, 2140 &SuggestedModule, &IsMapped, 2141 /*IsFrameworkFound=*/nullptr); 2142 if (File) { 2143 DiagnoseHeaderInclusion(*File); 2144 Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal) 2145 << Filename << IsImportDecl 2146 << FixItHint::CreateReplacement(FilenameRange, 2147 "\"" + Filename.str() + "\""); 2148 return File; 2149 } 2150 } 2151 2152 // Check for likely typos due to leading or trailing non-isAlphanumeric 2153 // characters 2154 StringRef OriginalFilename = Filename; 2155 if (LangOpts.SpellChecking) { 2156 // A heuristic to correct a typo file name by removing leading and 2157 // trailing non-isAlphanumeric characters. 2158 auto CorrectTypoFilename = [](llvm::StringRef Filename) { 2159 Filename = Filename.drop_until(isAlphanumeric); 2160 while (!Filename.empty() && !isAlphanumeric(Filename.back())) { 2161 Filename = Filename.drop_back(); 2162 } 2163 return Filename; 2164 }; 2165 StringRef TypoCorrectionName = CorrectTypoFilename(Filename); 2166 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename); 2167 2168 OptionalFileEntryRef File = LookupFile( 2169 FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom, 2170 LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, 2171 Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, 2172 /*IsFrameworkFound=*/nullptr); 2173 if (File) { 2174 DiagnoseHeaderInclusion(*File); 2175 auto Hint = 2176 isAngled ? FixItHint::CreateReplacement( 2177 FilenameRange, "<" + TypoCorrectionName.str() + ">") 2178 : FixItHint::CreateReplacement( 2179 FilenameRange, "\"" + TypoCorrectionName.str() + "\""); 2180 Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal) 2181 << OriginalFilename << TypoCorrectionName << Hint; 2182 // We found the file, so set the Filename to the name after typo 2183 // correction. 2184 Filename = TypoCorrectionName; 2185 LookupFilename = TypoCorrectionLookupName; 2186 return File; 2187 } 2188 } 2189 2190 // If the file is still not found, just go with the vanilla diagnostic 2191 assert(!File && "expected missing file"); 2192 Diag(FilenameTok, diag::err_pp_file_not_found) 2193 << OriginalFilename << FilenameRange; 2194 if (IsFrameworkFound) { 2195 size_t SlashPos = OriginalFilename.find('/'); 2196 assert(SlashPos != StringRef::npos && 2197 "Include with framework name should have '/' in the filename"); 2198 StringRef FrameworkName = OriginalFilename.substr(0, SlashPos); 2199 FrameworkCacheEntry &CacheEntry = 2200 HeaderInfo.LookupFrameworkCache(FrameworkName); 2201 assert(CacheEntry.Directory && "Found framework should be in cache"); 2202 Diag(FilenameTok, diag::note_pp_framework_without_header) 2203 << OriginalFilename.substr(SlashPos + 1) << FrameworkName 2204 << CacheEntry.Directory->getName(); 2205 } 2206 2207 return std::nullopt; 2208 } 2209 2210 /// Handle either a #include-like directive or an import declaration that names 2211 /// a header file. 2212 /// 2213 /// \param HashLoc The location of the '#' token for an include, or 2214 /// SourceLocation() for an import declaration. 2215 /// \param IncludeTok The include / include_next / import token. 2216 /// \param FilenameTok The header-name token. 2217 /// \param EndLoc The location at which any imported macros become visible. 2218 /// \param LookupFrom For #include_next, the starting directory for the 2219 /// directory lookup. 2220 /// \param LookupFromFile For #include_next, the starting file for the directory 2221 /// lookup. 2222 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( 2223 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok, 2224 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom, 2225 const FileEntry *LookupFromFile) { 2226 SmallString<128> FilenameBuffer; 2227 StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); 2228 SourceLocation CharEnd = FilenameTok.getEndLoc(); 2229 2230 CharSourceRange FilenameRange 2231 = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd); 2232 StringRef OriginalFilename = Filename; 2233 bool isAngled = 2234 GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); 2235 2236 // If GetIncludeFilenameSpelling set the start ptr to null, there was an 2237 // error. 2238 if (Filename.empty()) 2239 return {ImportAction::None}; 2240 2241 bool IsImportDecl = HashLoc.isInvalid(); 2242 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc; 2243 2244 // Complain about attempts to #include files in an audit pragma. 2245 if (PragmaARCCFCodeAuditedInfo.second.isValid()) { 2246 Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl; 2247 Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here); 2248 2249 // Immediately leave the pragma. 2250 PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()}; 2251 } 2252 2253 // Complain about attempts to #include files in an assume-nonnull pragma. 2254 if (PragmaAssumeNonNullLoc.isValid()) { 2255 Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl; 2256 Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here); 2257 2258 // Immediately leave the pragma. 2259 PragmaAssumeNonNullLoc = SourceLocation(); 2260 } 2261 2262 if (HeaderInfo.HasIncludeAliasMap()) { 2263 // Map the filename with the brackets still attached. If the name doesn't 2264 // map to anything, fall back on the filename we've already gotten the 2265 // spelling for. 2266 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename); 2267 if (!NewName.empty()) 2268 Filename = NewName; 2269 } 2270 2271 // Search include directories. 2272 bool IsMapped = false; 2273 bool IsFrameworkFound = false; 2274 ConstSearchDirIterator CurDir = nullptr; 2275 SmallString<1024> SearchPath; 2276 SmallString<1024> RelativePath; 2277 // We get the raw path only if we have 'Callbacks' to which we later pass 2278 // the path. 2279 ModuleMap::KnownHeader SuggestedModule; 2280 SourceLocation FilenameLoc = FilenameTok.getLocation(); 2281 StringRef LookupFilename = Filename; 2282 2283 // Normalize slashes when compiling with -fms-extensions on non-Windows. This 2284 // is unnecessary on Windows since the filesystem there handles backslashes. 2285 SmallString<128> NormalizedPath; 2286 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native; 2287 if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) { 2288 NormalizedPath = Filename.str(); 2289 llvm::sys::path::native(NormalizedPath); 2290 LookupFilename = NormalizedPath; 2291 BackslashStyle = llvm::sys::path::Style::windows; 2292 } 2293 2294 OptionalFileEntryRef File = LookupHeaderIncludeOrImport( 2295 &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok, 2296 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile, 2297 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled); 2298 2299 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) { 2300 if (File && isPCHThroughHeader(&File->getFileEntry())) 2301 SkippingUntilPCHThroughHeader = false; 2302 return {ImportAction::None}; 2303 } 2304 2305 // Should we enter the source file? Set to Skip if either the source file is 2306 // known to have no effect beyond its effect on module visibility -- that is, 2307 // if it's got an include guard that is already defined, set to Import if it 2308 // is a modular header we've already built and should import. 2309 2310 // For C++20 Modules 2311 // [cpp.include]/7 If the header identified by the header-name denotes an 2312 // importable header, it is implementation-defined whether the #include 2313 // preprocessing directive is instead replaced by an import directive. 2314 // For this implementation, the translation is permitted when we are parsing 2315 // the Global Module Fragment, and not otherwise (the cases where it would be 2316 // valid to replace an include with an import are highly constrained once in 2317 // named module purview; this choice avoids considerable complexity in 2318 // determining valid cases). 2319 2320 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; 2321 2322 if (PPOpts->SingleFileParseMode) 2323 Action = IncludeLimitReached; 2324 2325 // If we've reached the max allowed include depth, it is usually due to an 2326 // include cycle. Don't enter already processed files again as it can lead to 2327 // reaching the max allowed include depth again. 2328 if (Action == Enter && HasReachedMaxIncludeDepth && File && 2329 alreadyIncluded(*File)) 2330 Action = IncludeLimitReached; 2331 2332 // FIXME: We do not have a good way to disambiguate C++ clang modules from 2333 // C++ standard modules (other than use/non-use of Header Units). 2334 2335 Module *ModuleToImport = SuggestedModule.getModule(); 2336 2337 bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport && 2338 !ModuleToImport->isForBuilding(getLangOpts()); 2339 2340 // Maybe a usable Header Unit 2341 bool UsableHeaderUnit = false; 2342 if (getLangOpts().CPlusPlusModules && ModuleToImport && 2343 ModuleToImport->isHeaderUnit()) { 2344 if (TrackGMFState.inGMF() || IsImportDecl) 2345 UsableHeaderUnit = true; 2346 else if (!IsImportDecl) { 2347 // This is a Header Unit that we do not include-translate 2348 ModuleToImport = nullptr; 2349 } 2350 } 2351 // Maybe a usable clang header module. 2352 bool UsableClangHeaderModule = 2353 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && 2354 ModuleToImport && !ModuleToImport->isHeaderUnit(); 2355 2356 // Determine whether we should try to import the module for this #include, if 2357 // there is one. Don't do so if precompiled module support is disabled or we 2358 // are processing this module textually (because we're building the module). 2359 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) { 2360 // If this include corresponds to a module but that module is 2361 // unavailable, diagnose the situation and bail out. 2362 // FIXME: Remove this; loadModule does the same check (but produces 2363 // slightly worse diagnostics). 2364 if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport, 2365 getDiagnostics())) { 2366 Diag(FilenameTok.getLocation(), 2367 diag::note_implicit_top_level_module_import_here) 2368 << ModuleToImport->getTopLevelModuleName(); 2369 return {ImportAction::None}; 2370 } 2371 2372 // Compute the module access path corresponding to this module. 2373 // FIXME: Should we have a second loadModule() overload to avoid this 2374 // extra lookup step? 2375 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; 2376 for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent) 2377 Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), 2378 FilenameTok.getLocation())); 2379 std::reverse(Path.begin(), Path.end()); 2380 2381 // Warn that we're replacing the include/import with a module import. 2382 if (!IsImportDecl) 2383 diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd); 2384 2385 // Load the module to import its macros. We'll make the declarations 2386 // visible when the parser gets here. 2387 // FIXME: Pass ModuleToImport in here rather than converting it to a path 2388 // and making the module loader convert it back again. 2389 ModuleLoadResult Imported = TheModuleLoader.loadModule( 2390 IncludeTok.getLocation(), Path, Module::Hidden, 2391 /*IsInclusionDirective=*/true); 2392 assert((Imported == nullptr || Imported == ModuleToImport) && 2393 "the imported module is different than the suggested one"); 2394 2395 if (Imported) { 2396 Action = Import; 2397 } else if (Imported.isMissingExpected()) { 2398 markClangModuleAsAffecting( 2399 static_cast<Module *>(Imported)->getTopLevelModule()); 2400 // We failed to find a submodule that we assumed would exist (because it 2401 // was in the directory of an umbrella header, for instance), but no 2402 // actual module containing it exists (because the umbrella header is 2403 // incomplete). Treat this as a textual inclusion. 2404 ModuleToImport = nullptr; 2405 } else if (Imported.isConfigMismatch()) { 2406 // On a configuration mismatch, enter the header textually. We still know 2407 // that it's part of the corresponding module. 2408 } else { 2409 // We hit an error processing the import. Bail out. 2410 if (hadModuleLoaderFatalFailure()) { 2411 // With a fatal failure in the module loader, we abort parsing. 2412 Token &Result = IncludeTok; 2413 assert(CurLexer && "#include but no current lexer set!"); 2414 Result.startToken(); 2415 CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); 2416 CurLexer->cutOffLexing(); 2417 } 2418 return {ImportAction::None}; 2419 } 2420 } 2421 2422 // The #included file will be considered to be a system header if either it is 2423 // in a system include directory, or if the #includer is a system include 2424 // header. 2425 SrcMgr::CharacteristicKind FileCharacter = 2426 SourceMgr.getFileCharacteristic(FilenameTok.getLocation()); 2427 if (File) 2428 FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter); 2429 2430 // If this is a '#import' or an import-declaration, don't re-enter the file. 2431 // 2432 // FIXME: If we have a suggested module for a '#include', and we've already 2433 // visited this file, don't bother entering it again. We know it has no 2434 // further effect. 2435 bool EnterOnce = 2436 IsImportDecl || 2437 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import; 2438 2439 bool IsFirstIncludeOfFile = false; 2440 2441 // Ask HeaderInfo if we should enter this #include file. If not, #including 2442 // this file will have no effect. 2443 if (Action == Enter && File && 2444 !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce, 2445 getLangOpts().Modules, ModuleToImport, 2446 IsFirstIncludeOfFile)) { 2447 // C++ standard modules: 2448 // If we are not in the GMF, then we textually include only 2449 // clang modules: 2450 // Even if we've already preprocessed this header once and know that we 2451 // don't need to see its contents again, we still need to import it if it's 2452 // modular because we might not have imported it from this submodule before. 2453 // 2454 // FIXME: We don't do this when compiling a PCH because the AST 2455 // serialization layer can't cope with it. This means we get local 2456 // submodule visibility semantics wrong in that case. 2457 if (UsableHeaderUnit && !getLangOpts().CompilingPCH) 2458 Action = TrackGMFState.inGMF() ? Import : Skip; 2459 else 2460 Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip; 2461 } 2462 2463 // Check for circular inclusion of the main file. 2464 // We can't generate a consistent preamble with regard to the conditional 2465 // stack if the main file is included again as due to the preamble bounds 2466 // some directives (e.g. #endif of a header guard) will never be seen. 2467 // Since this will lead to confusing errors, avoid the inclusion. 2468 if (Action == Enter && File && PreambleConditionalStack.isRecording() && 2469 SourceMgr.isMainFile(File->getFileEntry())) { 2470 Diag(FilenameTok.getLocation(), 2471 diag::err_pp_including_mainfile_in_preamble); 2472 return {ImportAction::None}; 2473 } 2474 2475 if (Callbacks && !IsImportDecl) { 2476 // Notify the callback object that we've seen an inclusion directive. 2477 // FIXME: Use a different callback for a pp-import? 2478 Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled, 2479 FilenameRange, File, SearchPath, RelativePath, 2480 SuggestedModule.getModule(), Action == Import, 2481 FileCharacter); 2482 if (Action == Skip && File) 2483 Callbacks->FileSkipped(*File, FilenameTok, FileCharacter); 2484 } 2485 2486 if (!File) 2487 return {ImportAction::None}; 2488 2489 // If this is a C++20 pp-import declaration, diagnose if we didn't find any 2490 // module corresponding to the named header. 2491 if (IsImportDecl && !ModuleToImport) { 2492 Diag(FilenameTok, diag::err_header_import_not_header_unit) 2493 << OriginalFilename << File->getName(); 2494 return {ImportAction::None}; 2495 } 2496 2497 // Issue a diagnostic if the name of the file on disk has a different case 2498 // than the one we're about to open. 2499 const bool CheckIncludePathPortability = 2500 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty(); 2501 2502 if (CheckIncludePathPortability) { 2503 StringRef Name = LookupFilename; 2504 StringRef NameWithoriginalSlashes = Filename; 2505 #if defined(_WIN32) 2506 // Skip UNC prefix if present. (tryGetRealPathName() always 2507 // returns a path with the prefix skipped.) 2508 bool NameWasUNC = Name.consume_front("\\\\?\\"); 2509 NameWithoriginalSlashes.consume_front("\\\\?\\"); 2510 #endif 2511 StringRef RealPathName = File->getFileEntry().tryGetRealPathName(); 2512 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name), 2513 llvm::sys::path::end(Name)); 2514 #if defined(_WIN32) 2515 // -Wnonportable-include-path is designed to diagnose includes using 2516 // case even on systems with a case-insensitive file system. 2517 // On Windows, RealPathName always starts with an upper-case drive 2518 // letter for absolute paths, but Name might start with either 2519 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell. 2520 // ("foo" will always have on-disk case, no matter which case was 2521 // used in the cd command). To not emit this warning solely for 2522 // the drive letter, whose case is dependent on if `cd` is used 2523 // with upper- or lower-case drive letters, always consider the 2524 // given drive letter case as correct for the purpose of this warning. 2525 SmallString<128> FixedDriveRealPath; 2526 if (llvm::sys::path::is_absolute(Name) && 2527 llvm::sys::path::is_absolute(RealPathName) && 2528 toLowercase(Name[0]) == toLowercase(RealPathName[0]) && 2529 isLowercase(Name[0]) != isLowercase(RealPathName[0])) { 2530 assert(Components.size() >= 3 && "should have drive, backslash, name"); 2531 assert(Components[0].size() == 2 && "should start with drive"); 2532 assert(Components[0][1] == ':' && "should have colon"); 2533 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str(); 2534 RealPathName = FixedDriveRealPath; 2535 } 2536 #endif 2537 2538 if (trySimplifyPath(Components, RealPathName, BackslashStyle)) { 2539 SmallString<128> Path; 2540 Path.reserve(Name.size()+2); 2541 Path.push_back(isAngled ? '<' : '"'); 2542 2543 const auto IsSep = [BackslashStyle](char c) { 2544 return llvm::sys::path::is_separator(c, BackslashStyle); 2545 }; 2546 2547 for (auto Component : Components) { 2548 // On POSIX, Components will contain a single '/' as first element 2549 // exactly if Name is an absolute path. 2550 // On Windows, it will contain "C:" followed by '\' for absolute paths. 2551 // The drive letter is optional for absolute paths on Windows, but 2552 // clang currently cannot process absolute paths in #include lines that 2553 // don't have a drive. 2554 // If the first entry in Components is a directory separator, 2555 // then the code at the bottom of this loop that keeps the original 2556 // directory separator style copies it. If the second entry is 2557 // a directory separator (the C:\ case), then that separator already 2558 // got copied when the C: was processed and we want to skip that entry. 2559 if (!(Component.size() == 1 && IsSep(Component[0]))) 2560 Path.append(Component); 2561 else if (Path.size() != 1) 2562 continue; 2563 2564 // Append the separator(s) the user used, or the close quote 2565 if (Path.size() > NameWithoriginalSlashes.size()) { 2566 Path.push_back(isAngled ? '>' : '"'); 2567 continue; 2568 } 2569 assert(IsSep(NameWithoriginalSlashes[Path.size()-1])); 2570 do 2571 Path.push_back(NameWithoriginalSlashes[Path.size()-1]); 2572 while (Path.size() <= NameWithoriginalSlashes.size() && 2573 IsSep(NameWithoriginalSlashes[Path.size()-1])); 2574 } 2575 2576 #if defined(_WIN32) 2577 // Restore UNC prefix if it was there. 2578 if (NameWasUNC) 2579 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str(); 2580 #endif 2581 2582 // For user files and known standard headers, issue a diagnostic. 2583 // For other system headers, don't. They can be controlled separately. 2584 auto DiagId = 2585 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) 2586 ? diag::pp_nonportable_path 2587 : diag::pp_nonportable_system_path; 2588 Diag(FilenameTok, DiagId) << Path << 2589 FixItHint::CreateReplacement(FilenameRange, Path); 2590 } 2591 } 2592 2593 switch (Action) { 2594 case Skip: 2595 // If we don't need to enter the file, stop now. 2596 if (ModuleToImport) 2597 return {ImportAction::SkippedModuleImport, ModuleToImport}; 2598 return {ImportAction::None}; 2599 2600 case IncludeLimitReached: 2601 // If we reached our include limit and don't want to enter any more files, 2602 // don't go any further. 2603 return {ImportAction::None}; 2604 2605 case Import: { 2606 // If this is a module import, make it visible if needed. 2607 assert(ModuleToImport && "no module to import"); 2608 2609 makeModuleVisible(ModuleToImport, EndLoc); 2610 2611 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == 2612 tok::pp___include_macros) 2613 return {ImportAction::None}; 2614 2615 return {ImportAction::ModuleImport, ModuleToImport}; 2616 } 2617 2618 case Enter: 2619 break; 2620 } 2621 2622 // Check that we don't have infinite #include recursion. 2623 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) { 2624 Diag(FilenameTok, diag::err_pp_include_too_deep); 2625 HasReachedMaxIncludeDepth = true; 2626 return {ImportAction::None}; 2627 } 2628 2629 if (isAngled && isInNamedModule()) 2630 Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview) 2631 << getNamedModuleName(); 2632 2633 // Look up the file, create a File ID for it. 2634 SourceLocation IncludePos = FilenameTok.getLocation(); 2635 // If the filename string was the result of macro expansions, set the include 2636 // position on the file where it will be included and after the expansions. 2637 if (IncludePos.isMacroID()) 2638 IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd(); 2639 FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter); 2640 if (!FID.isValid()) { 2641 TheModuleLoader.HadFatalFailure = true; 2642 return ImportAction::Failure; 2643 } 2644 2645 // If all is good, enter the new file! 2646 if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(), 2647 IsFirstIncludeOfFile)) 2648 return {ImportAction::None}; 2649 2650 // Determine if we're switching to building a new submodule, and which one. 2651 // This does not apply for C++20 modules header units. 2652 if (ModuleToImport && !ModuleToImport->isHeaderUnit()) { 2653 if (ModuleToImport->getTopLevelModule()->ShadowingModule) { 2654 // We are building a submodule that belongs to a shadowed module. This 2655 // means we find header files in the shadowed module. 2656 Diag(ModuleToImport->DefinitionLoc, 2657 diag::err_module_build_shadowed_submodule) 2658 << ModuleToImport->getFullModuleName(); 2659 Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc, 2660 diag::note_previous_definition); 2661 return {ImportAction::None}; 2662 } 2663 // When building a pch, -fmodule-name tells the compiler to textually 2664 // include headers in the specified module. We are not building the 2665 // specified module. 2666 // 2667 // FIXME: This is the wrong way to handle this. We should produce a PCH 2668 // that behaves the same as the header would behave in a compilation using 2669 // that PCH, which means we should enter the submodule. We need to teach 2670 // the AST serialization layer to deal with the resulting AST. 2671 if (getLangOpts().CompilingPCH && 2672 ModuleToImport->isForBuilding(getLangOpts())) 2673 return {ImportAction::None}; 2674 2675 assert(!CurLexerSubmodule && "should not have marked this as a module yet"); 2676 CurLexerSubmodule = ModuleToImport; 2677 2678 // Let the macro handling code know that any future macros are within 2679 // the new submodule. 2680 EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false); 2681 2682 // Let the parser know that any future declarations are within the new 2683 // submodule. 2684 // FIXME: There's no point doing this if we're handling a #__include_macros 2685 // directive. 2686 return {ImportAction::ModuleBegin, ModuleToImport}; 2687 } 2688 2689 assert(!IsImportDecl && "failed to diagnose missing module for import decl"); 2690 return {ImportAction::None}; 2691 } 2692 2693 /// HandleIncludeNextDirective - Implements \#include_next. 2694 /// 2695 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, 2696 Token &IncludeNextTok) { 2697 Diag(IncludeNextTok, diag::ext_pp_include_next_directive); 2698 2699 ConstSearchDirIterator Lookup = nullptr; 2700 const FileEntry *LookupFromFile; 2701 std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok); 2702 2703 return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup, 2704 LookupFromFile); 2705 } 2706 2707 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode 2708 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) { 2709 // The Microsoft #import directive takes a type library and generates header 2710 // files from it, and includes those. This is beyond the scope of what clang 2711 // does, so we ignore it and error out. However, #import can optionally have 2712 // trailing attributes that span multiple lines. We're going to eat those 2713 // so we can continue processing from there. 2714 Diag(Tok, diag::err_pp_import_directive_ms ); 2715 2716 // Read tokens until we get to the end of the directive. Note that the 2717 // directive can be split over multiple lines using the backslash character. 2718 DiscardUntilEndOfDirective(); 2719 } 2720 2721 /// HandleImportDirective - Implements \#import. 2722 /// 2723 void Preprocessor::HandleImportDirective(SourceLocation HashLoc, 2724 Token &ImportTok) { 2725 if (!LangOpts.ObjC) { // #import is standard for ObjC. 2726 if (LangOpts.MSVCCompat) 2727 return HandleMicrosoftImportDirective(ImportTok); 2728 Diag(ImportTok, diag::ext_pp_import_directive); 2729 } 2730 return HandleIncludeDirective(HashLoc, ImportTok); 2731 } 2732 2733 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a 2734 /// pseudo directive in the predefines buffer. This handles it by sucking all 2735 /// tokens through the preprocessor and discarding them (only keeping the side 2736 /// effects on the preprocessor). 2737 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, 2738 Token &IncludeMacrosTok) { 2739 // This directive should only occur in the predefines buffer. If not, emit an 2740 // error and reject it. 2741 SourceLocation Loc = IncludeMacrosTok.getLocation(); 2742 if (SourceMgr.getBufferName(Loc) != "<built-in>") { 2743 Diag(IncludeMacrosTok.getLocation(), 2744 diag::pp_include_macros_out_of_predefines); 2745 DiscardUntilEndOfDirective(); 2746 return; 2747 } 2748 2749 // Treat this as a normal #include for checking purposes. If this is 2750 // successful, it will push a new lexer onto the include stack. 2751 HandleIncludeDirective(HashLoc, IncludeMacrosTok); 2752 2753 Token TmpTok; 2754 do { 2755 Lex(TmpTok); 2756 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!"); 2757 } while (TmpTok.isNot(tok::hashhash)); 2758 } 2759 2760 //===----------------------------------------------------------------------===// 2761 // Preprocessor Macro Directive Handling. 2762 //===----------------------------------------------------------------------===// 2763 2764 /// ReadMacroParameterList - The ( starting a parameter list of a macro 2765 /// definition has just been read. Lex the rest of the parameters and the 2766 /// closing ), updating MI with what we learn. Return true if an error occurs 2767 /// parsing the param list. 2768 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { 2769 SmallVector<IdentifierInfo*, 32> Parameters; 2770 2771 while (true) { 2772 LexUnexpandedNonComment(Tok); 2773 switch (Tok.getKind()) { 2774 case tok::r_paren: 2775 // Found the end of the parameter list. 2776 if (Parameters.empty()) // #define FOO() 2777 return false; 2778 // Otherwise we have #define FOO(A,) 2779 Diag(Tok, diag::err_pp_expected_ident_in_arg_list); 2780 return true; 2781 case tok::ellipsis: // #define X(... -> C99 varargs 2782 if (!LangOpts.C99) 2783 Diag(Tok, LangOpts.CPlusPlus11 ? 2784 diag::warn_cxx98_compat_variadic_macro : 2785 diag::ext_variadic_macro); 2786 2787 // OpenCL v1.2 s6.9.e: variadic macros are not supported. 2788 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) { 2789 Diag(Tok, diag::ext_pp_opencl_variadic_macros); 2790 } 2791 2792 // Lex the token after the identifier. 2793 LexUnexpandedNonComment(Tok); 2794 if (Tok.isNot(tok::r_paren)) { 2795 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); 2796 return true; 2797 } 2798 // Add the __VA_ARGS__ identifier as a parameter. 2799 Parameters.push_back(Ident__VA_ARGS__); 2800 MI->setIsC99Varargs(); 2801 MI->setParameterList(Parameters, BP); 2802 return false; 2803 case tok::eod: // #define X( 2804 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); 2805 return true; 2806 default: 2807 // Handle keywords and identifiers here to accept things like 2808 // #define Foo(for) for. 2809 IdentifierInfo *II = Tok.getIdentifierInfo(); 2810 if (!II) { 2811 // #define X(1 2812 Diag(Tok, diag::err_pp_invalid_tok_in_arg_list); 2813 return true; 2814 } 2815 2816 // If this is already used as a parameter, it is used multiple times (e.g. 2817 // #define X(A,A. 2818 if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6 2819 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II; 2820 return true; 2821 } 2822 2823 // Add the parameter to the macro info. 2824 Parameters.push_back(II); 2825 2826 // Lex the token after the identifier. 2827 LexUnexpandedNonComment(Tok); 2828 2829 switch (Tok.getKind()) { 2830 default: // #define X(A B 2831 Diag(Tok, diag::err_pp_expected_comma_in_arg_list); 2832 return true; 2833 case tok::r_paren: // #define X(A) 2834 MI->setParameterList(Parameters, BP); 2835 return false; 2836 case tok::comma: // #define X(A, 2837 break; 2838 case tok::ellipsis: // #define X(A... -> GCC extension 2839 // Diagnose extension. 2840 Diag(Tok, diag::ext_named_variadic_macro); 2841 2842 // Lex the token after the identifier. 2843 LexUnexpandedNonComment(Tok); 2844 if (Tok.isNot(tok::r_paren)) { 2845 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); 2846 return true; 2847 } 2848 2849 MI->setIsGNUVarargs(); 2850 MI->setParameterList(Parameters, BP); 2851 return false; 2852 } 2853 } 2854 } 2855 } 2856 2857 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI, 2858 const LangOptions &LOptions) { 2859 if (MI->getNumTokens() == 1) { 2860 const Token &Value = MI->getReplacementToken(0); 2861 2862 // Macro that is identity, like '#define inline inline' is a valid pattern. 2863 if (MacroName.getKind() == Value.getKind()) 2864 return true; 2865 2866 // Macro that maps a keyword to the same keyword decorated with leading/ 2867 // trailing underscores is a valid pattern: 2868 // #define inline __inline 2869 // #define inline __inline__ 2870 // #define inline _inline (in MS compatibility mode) 2871 StringRef MacroText = MacroName.getIdentifierInfo()->getName(); 2872 if (IdentifierInfo *II = Value.getIdentifierInfo()) { 2873 if (!II->isKeyword(LOptions)) 2874 return false; 2875 StringRef ValueText = II->getName(); 2876 StringRef TrimmedValue = ValueText; 2877 if (!ValueText.starts_with("__")) { 2878 if (ValueText.starts_with("_")) 2879 TrimmedValue = TrimmedValue.drop_front(1); 2880 else 2881 return false; 2882 } else { 2883 TrimmedValue = TrimmedValue.drop_front(2); 2884 if (TrimmedValue.ends_with("__")) 2885 TrimmedValue = TrimmedValue.drop_back(2); 2886 } 2887 return TrimmedValue == MacroText; 2888 } else { 2889 return false; 2890 } 2891 } 2892 2893 // #define inline 2894 return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static, 2895 tok::kw_const) && 2896 MI->getNumTokens() == 0; 2897 } 2898 2899 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2900 // entire line) of the macro's tokens and adds them to MacroInfo, and while 2901 // doing so performs certain validity checks including (but not limited to): 2902 // - # (stringization) is followed by a macro parameter 2903 // 2904 // Returns a nullptr if an invalid sequence of tokens is encountered or returns 2905 // a pointer to a MacroInfo object. 2906 2907 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( 2908 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) { 2909 2910 Token LastTok = MacroNameTok; 2911 // Create the new macro. 2912 MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation()); 2913 2914 Token Tok; 2915 LexUnexpandedToken(Tok); 2916 2917 // Ensure we consume the rest of the macro body if errors occur. 2918 auto _ = llvm::make_scope_exit([&]() { 2919 // The flag indicates if we are still waiting for 'eod'. 2920 if (CurLexer->ParsingPreprocessorDirective) 2921 DiscardUntilEndOfDirective(); 2922 }); 2923 2924 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk 2925 // within their appropriate context. 2926 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this); 2927 2928 // If this is a function-like macro definition, parse the argument list, 2929 // marking each of the identifiers as being used as macro arguments. Also, 2930 // check other constraints on the first token of the macro body. 2931 if (Tok.is(tok::eod)) { 2932 if (ImmediatelyAfterHeaderGuard) { 2933 // Save this macro information since it may part of a header guard. 2934 CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(), 2935 MacroNameTok.getLocation()); 2936 } 2937 // If there is no body to this macro, we have no special handling here. 2938 } else if (Tok.hasLeadingSpace()) { 2939 // This is a normal token with leading space. Clear the leading space 2940 // marker on the first token to get proper expansion. 2941 Tok.clearFlag(Token::LeadingSpace); 2942 } else if (Tok.is(tok::l_paren)) { 2943 // This is a function-like macro definition. Read the argument list. 2944 MI->setIsFunctionLike(); 2945 if (ReadMacroParameterList(MI, LastTok)) 2946 return nullptr; 2947 2948 // If this is a definition of an ISO C/C++ variadic function-like macro (not 2949 // using the GNU named varargs extension) inform our variadic scope guard 2950 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__) 2951 // allowed only within the definition of a variadic macro. 2952 2953 if (MI->isC99Varargs()) { 2954 VariadicMacroScopeGuard.enterScope(); 2955 } 2956 2957 // Read the first token after the arg list for down below. 2958 LexUnexpandedToken(Tok); 2959 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) { 2960 // C99 requires whitespace between the macro definition and the body. Emit 2961 // a diagnostic for something like "#define X+". 2962 Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); 2963 } else { 2964 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the 2965 // first character of a replacement list is not a character required by 2966 // subclause 5.2.1, then there shall be white-space separation between the 2967 // identifier and the replacement list.". 5.2.1 lists this set: 2968 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which 2969 // is irrelevant here. 2970 bool isInvalid = false; 2971 if (Tok.is(tok::at)) // @ is not in the list above. 2972 isInvalid = true; 2973 else if (Tok.is(tok::unknown)) { 2974 // If we have an unknown token, it is something strange like "`". Since 2975 // all of valid characters would have lexed into a single character 2976 // token of some sort, we know this is not a valid case. 2977 isInvalid = true; 2978 } 2979 if (isInvalid) 2980 Diag(Tok, diag::ext_missing_whitespace_after_macro_name); 2981 else 2982 Diag(Tok, diag::warn_missing_whitespace_after_macro_name); 2983 } 2984 2985 if (!Tok.is(tok::eod)) 2986 LastTok = Tok; 2987 2988 SmallVector<Token, 16> Tokens; 2989 2990 // Read the rest of the macro body. 2991 if (MI->isObjectLike()) { 2992 // Object-like macros are very simple, just read their body. 2993 while (Tok.isNot(tok::eod)) { 2994 LastTok = Tok; 2995 Tokens.push_back(Tok); 2996 // Get the next token of the macro. 2997 LexUnexpandedToken(Tok); 2998 } 2999 } else { 3000 // Otherwise, read the body of a function-like macro. While we are at it, 3001 // check C99 6.10.3.2p1: ensure that # operators are followed by macro 3002 // parameters in function-like macro expansions. 3003 3004 VAOptDefinitionContext VAOCtx(*this); 3005 3006 while (Tok.isNot(tok::eod)) { 3007 LastTok = Tok; 3008 3009 if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) { 3010 Tokens.push_back(Tok); 3011 3012 if (VAOCtx.isVAOptToken(Tok)) { 3013 // If we're already within a VAOPT, emit an error. 3014 if (VAOCtx.isInVAOpt()) { 3015 Diag(Tok, diag::err_pp_vaopt_nested_use); 3016 return nullptr; 3017 } 3018 // Ensure VAOPT is followed by a '(' . 3019 LexUnexpandedToken(Tok); 3020 if (Tok.isNot(tok::l_paren)) { 3021 Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use); 3022 return nullptr; 3023 } 3024 Tokens.push_back(Tok); 3025 VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation()); 3026 LexUnexpandedToken(Tok); 3027 if (Tok.is(tok::hashhash)) { 3028 Diag(Tok, diag::err_vaopt_paste_at_start); 3029 return nullptr; 3030 } 3031 continue; 3032 } else if (VAOCtx.isInVAOpt()) { 3033 if (Tok.is(tok::r_paren)) { 3034 if (VAOCtx.sawClosingParen()) { 3035 assert(Tokens.size() >= 3 && 3036 "Must have seen at least __VA_OPT__( " 3037 "and a subsequent tok::r_paren"); 3038 if (Tokens[Tokens.size() - 2].is(tok::hashhash)) { 3039 Diag(Tok, diag::err_vaopt_paste_at_end); 3040 return nullptr; 3041 } 3042 } 3043 } else if (Tok.is(tok::l_paren)) { 3044 VAOCtx.sawOpeningParen(Tok.getLocation()); 3045 } 3046 } 3047 // Get the next token of the macro. 3048 LexUnexpandedToken(Tok); 3049 continue; 3050 } 3051 3052 // If we're in -traditional mode, then we should ignore stringification 3053 // and token pasting. Mark the tokens as unknown so as not to confuse 3054 // things. 3055 if (getLangOpts().TraditionalCPP) { 3056 Tok.setKind(tok::unknown); 3057 Tokens.push_back(Tok); 3058 3059 // Get the next token of the macro. 3060 LexUnexpandedToken(Tok); 3061 continue; 3062 } 3063 3064 if (Tok.is(tok::hashhash)) { 3065 // If we see token pasting, check if it looks like the gcc comma 3066 // pasting extension. We'll use this information to suppress 3067 // diagnostics later on. 3068 3069 // Get the next token of the macro. 3070 LexUnexpandedToken(Tok); 3071 3072 if (Tok.is(tok::eod)) { 3073 Tokens.push_back(LastTok); 3074 break; 3075 } 3076 3077 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && 3078 Tokens[Tokens.size() - 1].is(tok::comma)) 3079 MI->setHasCommaPasting(); 3080 3081 // Things look ok, add the '##' token to the macro. 3082 Tokens.push_back(LastTok); 3083 continue; 3084 } 3085 3086 // Our Token is a stringization operator. 3087 // Get the next token of the macro. 3088 LexUnexpandedToken(Tok); 3089 3090 // Check for a valid macro arg identifier or __VA_OPT__. 3091 if (!VAOCtx.isVAOptToken(Tok) && 3092 (Tok.getIdentifierInfo() == nullptr || 3093 MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) { 3094 3095 // If this is assembler-with-cpp mode, we accept random gibberish after 3096 // the '#' because '#' is often a comment character. However, change 3097 // the kind of the token to tok::unknown so that the preprocessor isn't 3098 // confused. 3099 if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { 3100 LastTok.setKind(tok::unknown); 3101 Tokens.push_back(LastTok); 3102 continue; 3103 } else { 3104 Diag(Tok, diag::err_pp_stringize_not_parameter) 3105 << LastTok.is(tok::hashat); 3106 return nullptr; 3107 } 3108 } 3109 3110 // Things look ok, add the '#' and param name tokens to the macro. 3111 Tokens.push_back(LastTok); 3112 3113 // If the token following '#' is VAOPT, let the next iteration handle it 3114 // and check it for correctness, otherwise add the token and prime the 3115 // loop with the next one. 3116 if (!VAOCtx.isVAOptToken(Tok)) { 3117 Tokens.push_back(Tok); 3118 LastTok = Tok; 3119 3120 // Get the next token of the macro. 3121 LexUnexpandedToken(Tok); 3122 } 3123 } 3124 if (VAOCtx.isInVAOpt()) { 3125 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive"); 3126 Diag(Tok, diag::err_pp_expected_after) 3127 << LastTok.getKind() << tok::r_paren; 3128 Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren; 3129 return nullptr; 3130 } 3131 } 3132 MI->setDefinitionEndLoc(LastTok.getLocation()); 3133 3134 MI->setTokens(Tokens, BP); 3135 return MI; 3136 } 3137 3138 static bool isObjCProtectedMacro(const IdentifierInfo *II) { 3139 return II->isStr("__strong") || II->isStr("__weak") || 3140 II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing"); 3141 } 3142 3143 /// HandleDefineDirective - Implements \#define. This consumes the entire macro 3144 /// line then lets the caller lex the next real token. 3145 void Preprocessor::HandleDefineDirective( 3146 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) { 3147 ++NumDefined; 3148 3149 Token MacroNameTok; 3150 bool MacroShadowsKeyword; 3151 ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword); 3152 3153 // Error reading macro name? If so, diagnostic already issued. 3154 if (MacroNameTok.is(tok::eod)) 3155 return; 3156 3157 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 3158 // Issue a final pragma warning if we're defining a macro that was has been 3159 // undefined and is being redefined. 3160 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal()) 3161 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false); 3162 3163 // If we are supposed to keep comments in #defines, reenable comment saving 3164 // mode. 3165 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments); 3166 3167 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody( 3168 MacroNameTok, ImmediatelyAfterHeaderGuard); 3169 3170 if (!MI) return; 3171 3172 if (MacroShadowsKeyword && 3173 !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) { 3174 Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword); 3175 } 3176 // Check that there is no paste (##) operator at the beginning or end of the 3177 // replacement list. 3178 unsigned NumTokens = MI->getNumTokens(); 3179 if (NumTokens != 0) { 3180 if (MI->getReplacementToken(0).is(tok::hashhash)) { 3181 Diag(MI->getReplacementToken(0), diag::err_paste_at_start); 3182 return; 3183 } 3184 if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) { 3185 Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end); 3186 return; 3187 } 3188 } 3189 3190 // When skipping just warn about macros that do not match. 3191 if (SkippingUntilPCHThroughHeader) { 3192 const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo()); 3193 if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this, 3194 /*Syntactic=*/LangOpts.MicrosoftExt)) 3195 Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch) 3196 << MacroNameTok.getIdentifierInfo(); 3197 // Issue the diagnostic but allow the change if msvc extensions are enabled 3198 if (!LangOpts.MicrosoftExt) 3199 return; 3200 } 3201 3202 // Finally, if this identifier already had a macro defined for it, verify that 3203 // the macro bodies are identical, and issue diagnostics if they are not. 3204 if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) { 3205 // Final macros are hard-mode: they always warn. Even if the bodies are 3206 // identical. Even if they are in system headers. Even if they are things we 3207 // would silently allow in the past. 3208 if (MacroNameTok.getIdentifierInfo()->isFinal()) 3209 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false); 3210 3211 // In Objective-C, ignore attempts to directly redefine the builtin 3212 // definitions of the ownership qualifiers. It's still possible to 3213 // #undef them. 3214 if (getLangOpts().ObjC && 3215 SourceMgr.getFileID(OtherMI->getDefinitionLoc()) == 3216 getPredefinesFileID() && 3217 isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) { 3218 // Warn if it changes the tokens. 3219 if ((!getDiagnostics().getSuppressSystemWarnings() || 3220 !SourceMgr.isInSystemHeader(DefineTok.getLocation())) && 3221 !MI->isIdenticalTo(*OtherMI, *this, 3222 /*Syntactic=*/LangOpts.MicrosoftExt)) { 3223 Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored); 3224 } 3225 assert(!OtherMI->isWarnIfUnused()); 3226 return; 3227 } 3228 3229 // It is very common for system headers to have tons of macro redefinitions 3230 // and for warnings to be disabled in system headers. If this is the case, 3231 // then don't bother calling MacroInfo::isIdenticalTo. 3232 if (!getDiagnostics().getSuppressSystemWarnings() || 3233 !SourceMgr.isInSystemHeader(DefineTok.getLocation())) { 3234 3235 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused()) 3236 Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used); 3237 3238 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and 3239 // C++ [cpp.predefined]p4, but allow it as an extension. 3240 if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName())) 3241 Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro); 3242 // Macros must be identical. This means all tokens and whitespace 3243 // separation must be the same. C99 6.10.3p2. 3244 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() && 3245 !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) { 3246 Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef) 3247 << MacroNameTok.getIdentifierInfo(); 3248 Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition); 3249 } 3250 } 3251 if (OtherMI->isWarnIfUnused()) 3252 WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc()); 3253 } 3254 3255 DefMacroDirective *MD = 3256 appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI); 3257 3258 assert(!MI->isUsed()); 3259 // If we need warning for not using the macro, add its location in the 3260 // warn-because-unused-macro set. If it gets used it will be removed from set. 3261 if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) && 3262 !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) && 3263 !MacroExpansionInDirectivesOverride && 3264 getSourceManager().getFileID(MI->getDefinitionLoc()) != 3265 getPredefinesFileID()) { 3266 MI->setIsWarnIfUnused(true); 3267 WarnUnusedMacroLocs.insert(MI->getDefinitionLoc()); 3268 } 3269 3270 // If the callbacks want to know, tell them about the macro definition. 3271 if (Callbacks) 3272 Callbacks->MacroDefined(MacroNameTok, MD); 3273 3274 // If we're in MS compatibility mode and the macro being defined is the 3275 // assert macro, implicitly add a macro definition for static_assert to work 3276 // around their broken assert.h header file in C. Only do so if there isn't 3277 // already a static_assert macro defined. 3278 if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat && 3279 MacroNameTok.getIdentifierInfo()->isStr("assert") && 3280 !isMacroDefined("static_assert")) { 3281 MacroInfo *MI = AllocateMacroInfo(SourceLocation()); 3282 3283 Token Tok; 3284 Tok.startToken(); 3285 Tok.setKind(tok::kw__Static_assert); 3286 Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert")); 3287 MI->setTokens({Tok}, BP); 3288 (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI); 3289 } 3290 } 3291 3292 /// HandleUndefDirective - Implements \#undef. 3293 /// 3294 void Preprocessor::HandleUndefDirective() { 3295 ++NumUndefined; 3296 3297 Token MacroNameTok; 3298 ReadMacroName(MacroNameTok, MU_Undef); 3299 3300 // Error reading macro name? If so, diagnostic already issued. 3301 if (MacroNameTok.is(tok::eod)) 3302 return; 3303 3304 // Check to see if this is the last token on the #undef line. 3305 CheckEndOfDirective("undef"); 3306 3307 // Okay, we have a valid identifier to undef. 3308 auto *II = MacroNameTok.getIdentifierInfo(); 3309 auto MD = getMacroDefinition(II); 3310 UndefMacroDirective *Undef = nullptr; 3311 3312 if (II->isFinal()) 3313 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true); 3314 3315 // If the macro is not defined, this is a noop undef. 3316 if (const MacroInfo *MI = MD.getMacroInfo()) { 3317 if (!MI->isUsed() && MI->isWarnIfUnused()) 3318 Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used); 3319 3320 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and 3321 // C++ [cpp.predefined]p4, but allow it as an extension. 3322 if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName())) 3323 Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro); 3324 3325 if (MI->isWarnIfUnused()) 3326 WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); 3327 3328 Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation()); 3329 } 3330 3331 // If the callbacks want to know, tell them about the macro #undef. 3332 // Note: no matter if the macro was defined or not. 3333 if (Callbacks) 3334 Callbacks->MacroUndefined(MacroNameTok, MD, Undef); 3335 3336 if (Undef) 3337 appendMacroDirective(II, Undef); 3338 } 3339 3340 //===----------------------------------------------------------------------===// 3341 // Preprocessor Conditional Directive Handling. 3342 //===----------------------------------------------------------------------===// 3343 3344 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef 3345 /// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is 3346 /// true if any tokens have been returned or pp-directives activated before this 3347 /// \#ifndef has been lexed. 3348 /// 3349 void Preprocessor::HandleIfdefDirective(Token &Result, 3350 const Token &HashToken, 3351 bool isIfndef, 3352 bool ReadAnyTokensBeforeDirective) { 3353 ++NumIf; 3354 Token DirectiveTok = Result; 3355 3356 Token MacroNameTok; 3357 ReadMacroName(MacroNameTok); 3358 3359 // Error reading macro name? If so, diagnostic already issued. 3360 if (MacroNameTok.is(tok::eod)) { 3361 // Skip code until we get to #endif. This helps with recovery by not 3362 // emitting an error when the #endif is reached. 3363 SkipExcludedConditionalBlock(HashToken.getLocation(), 3364 DirectiveTok.getLocation(), 3365 /*Foundnonskip*/ false, /*FoundElse*/ false); 3366 return; 3367 } 3368 3369 emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true); 3370 3371 // Check to see if this is the last token on the #if[n]def line. 3372 CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef"); 3373 3374 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); 3375 auto MD = getMacroDefinition(MII); 3376 MacroInfo *MI = MD.getMacroInfo(); 3377 3378 if (CurPPLexer->getConditionalStackDepth() == 0) { 3379 // If the start of a top-level #ifdef and if the macro is not defined, 3380 // inform MIOpt that this might be the start of a proper include guard. 3381 // Otherwise it is some other form of unknown conditional which we can't 3382 // handle. 3383 if (!ReadAnyTokensBeforeDirective && !MI) { 3384 assert(isIfndef && "#ifdef shouldn't reach here"); 3385 CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation()); 3386 } else 3387 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3388 } 3389 3390 // If there is a macro, process it. 3391 if (MI) // Mark it used. 3392 markMacroAsUsed(MI); 3393 3394 if (Callbacks) { 3395 if (isIfndef) 3396 Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD); 3397 else 3398 Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD); 3399 } 3400 3401 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3402 getSourceManager().isInMainFile(DirectiveTok.getLocation()); 3403 3404 // Should we include the stuff contained by this directive? 3405 if (PPOpts->SingleFileParseMode && !MI) { 3406 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3407 // the directive blocks. 3408 CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(), 3409 /*wasskip*/false, /*foundnonskip*/false, 3410 /*foundelse*/false); 3411 } else if (!MI == isIfndef || RetainExcludedCB) { 3412 // Yes, remember that we are inside a conditional, then lex the next token. 3413 CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(), 3414 /*wasskip*/false, /*foundnonskip*/true, 3415 /*foundelse*/false); 3416 } else { 3417 // No, skip the contents of this block. 3418 SkipExcludedConditionalBlock(HashToken.getLocation(), 3419 DirectiveTok.getLocation(), 3420 /*Foundnonskip*/ false, 3421 /*FoundElse*/ false); 3422 } 3423 } 3424 3425 /// HandleIfDirective - Implements the \#if directive. 3426 /// 3427 void Preprocessor::HandleIfDirective(Token &IfToken, 3428 const Token &HashToken, 3429 bool ReadAnyTokensBeforeDirective) { 3430 ++NumIf; 3431 3432 // Parse and evaluate the conditional expression. 3433 IdentifierInfo *IfNDefMacro = nullptr; 3434 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); 3435 const bool ConditionalTrue = DER.Conditional; 3436 // Lexer might become invalid if we hit code completion point while evaluating 3437 // expression. 3438 if (!CurPPLexer) 3439 return; 3440 3441 // If this condition is equivalent to #ifndef X, and if this is the first 3442 // directive seen, handle it for the multiple-include optimization. 3443 if (CurPPLexer->getConditionalStackDepth() == 0) { 3444 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue) 3445 // FIXME: Pass in the location of the macro name, not the 'if' token. 3446 CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation()); 3447 else 3448 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3449 } 3450 3451 if (Callbacks) 3452 Callbacks->If( 3453 IfToken.getLocation(), DER.ExprRange, 3454 (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False)); 3455 3456 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3457 getSourceManager().isInMainFile(IfToken.getLocation()); 3458 3459 // Should we include the stuff contained by this directive? 3460 if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) { 3461 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3462 // the directive blocks. 3463 CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, 3464 /*foundnonskip*/false, /*foundelse*/false); 3465 } else if (ConditionalTrue || RetainExcludedCB) { 3466 // Yes, remember that we are inside a conditional, then lex the next token. 3467 CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, 3468 /*foundnonskip*/true, /*foundelse*/false); 3469 } else { 3470 // No, skip the contents of this block. 3471 SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(), 3472 /*Foundnonskip*/ false, 3473 /*FoundElse*/ false); 3474 } 3475 } 3476 3477 /// HandleEndifDirective - Implements the \#endif directive. 3478 /// 3479 void Preprocessor::HandleEndifDirective(Token &EndifToken) { 3480 ++NumEndif; 3481 3482 // Check that this is the whole directive. 3483 CheckEndOfDirective("endif"); 3484 3485 PPConditionalInfo CondInfo; 3486 if (CurPPLexer->popConditionalLevel(CondInfo)) { 3487 // No conditionals on the stack: this is an #endif without an #if. 3488 Diag(EndifToken, diag::err_pp_endif_without_if); 3489 return; 3490 } 3491 3492 // If this the end of a top-level #endif, inform MIOpt. 3493 if (CurPPLexer->getConditionalStackDepth() == 0) 3494 CurPPLexer->MIOpt.ExitTopLevelConditional(); 3495 3496 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode && 3497 "This code should only be reachable in the non-skipping case!"); 3498 3499 if (Callbacks) 3500 Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc); 3501 } 3502 3503 /// HandleElseDirective - Implements the \#else directive. 3504 /// 3505 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) { 3506 ++NumElse; 3507 3508 // #else directive in a non-skipping conditional... start skipping. 3509 CheckEndOfDirective("else"); 3510 3511 PPConditionalInfo CI; 3512 if (CurPPLexer->popConditionalLevel(CI)) { 3513 Diag(Result, diag::pp_err_else_without_if); 3514 return; 3515 } 3516 3517 // If this is a top-level #else, inform the MIOpt. 3518 if (CurPPLexer->getConditionalStackDepth() == 0) 3519 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3520 3521 // If this is a #else with a #else before it, report the error. 3522 if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); 3523 3524 if (Callbacks) 3525 Callbacks->Else(Result.getLocation(), CI.IfLoc); 3526 3527 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3528 getSourceManager().isInMainFile(Result.getLocation()); 3529 3530 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) { 3531 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3532 // the directive blocks. 3533 CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false, 3534 /*foundnonskip*/false, /*foundelse*/true); 3535 return; 3536 } 3537 3538 // Finally, skip the rest of the contents of this block. 3539 SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc, 3540 /*Foundnonskip*/ true, 3541 /*FoundElse*/ true, Result.getLocation()); 3542 } 3543 3544 /// Implements the \#elif, \#elifdef, and \#elifndef directives. 3545 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken, 3546 const Token &HashToken, 3547 tok::PPKeywordKind Kind) { 3548 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif 3549 : Kind == tok::pp_elifdef ? PED_Elifdef 3550 : PED_Elifndef; 3551 ++NumElse; 3552 3553 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode. 3554 switch (DirKind) { 3555 case PED_Elifdef: 3556 case PED_Elifndef: 3557 unsigned DiagID; 3558 if (LangOpts.CPlusPlus) 3559 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive 3560 : diag::ext_cxx23_pp_directive; 3561 else 3562 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive 3563 : diag::ext_c23_pp_directive; 3564 Diag(ElifToken, DiagID) << DirKind; 3565 break; 3566 default: 3567 break; 3568 } 3569 3570 // #elif directive in a non-skipping conditional... start skipping. 3571 // We don't care what the condition is, because we will always skip it (since 3572 // the block immediately before it was included). 3573 SourceRange ConditionRange = DiscardUntilEndOfDirective(); 3574 3575 PPConditionalInfo CI; 3576 if (CurPPLexer->popConditionalLevel(CI)) { 3577 Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind; 3578 return; 3579 } 3580 3581 // If this is a top-level #elif, inform the MIOpt. 3582 if (CurPPLexer->getConditionalStackDepth() == 0) 3583 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3584 3585 // If this is a #elif with a #else before it, report the error. 3586 if (CI.FoundElse) 3587 Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind; 3588 3589 if (Callbacks) { 3590 switch (Kind) { 3591 case tok::pp_elif: 3592 Callbacks->Elif(ElifToken.getLocation(), ConditionRange, 3593 PPCallbacks::CVK_NotEvaluated, CI.IfLoc); 3594 break; 3595 case tok::pp_elifdef: 3596 Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc); 3597 break; 3598 case tok::pp_elifndef: 3599 Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc); 3600 break; 3601 default: 3602 assert(false && "unexpected directive kind"); 3603 break; 3604 } 3605 } 3606 3607 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3608 getSourceManager().isInMainFile(ElifToken.getLocation()); 3609 3610 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) { 3611 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3612 // the directive blocks. 3613 CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false, 3614 /*foundnonskip*/false, /*foundelse*/false); 3615 return; 3616 } 3617 3618 // Finally, skip the rest of the contents of this block. 3619 SkipExcludedConditionalBlock( 3620 HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, 3621 /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); 3622 } 3623 3624 std::optional<LexEmbedParametersResult> 3625 Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) { 3626 LexEmbedParametersResult Result{}; 3627 SmallVector<Token, 2> ParameterTokens; 3628 tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod; 3629 3630 auto DiagMismatchedBracesAndSkipToEOD = 3631 [&](tok::TokenKind Expected, 3632 std::pair<tok::TokenKind, SourceLocation> Matches) { 3633 Diag(CurTok, diag::err_expected) << Expected; 3634 Diag(Matches.second, diag::note_matching) << Matches.first; 3635 if (CurTok.isNot(tok::eod)) 3636 DiscardUntilEndOfDirective(CurTok); 3637 }; 3638 3639 auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) { 3640 if (CurTok.isNot(Kind)) { 3641 Diag(CurTok, diag::err_expected) << Kind; 3642 if (CurTok.isNot(tok::eod)) 3643 DiscardUntilEndOfDirective(CurTok); 3644 return false; 3645 } 3646 return true; 3647 }; 3648 3649 // C23 6.10: 3650 // pp-parameter-name: 3651 // pp-standard-parameter 3652 // pp-prefixed-parameter 3653 // 3654 // pp-standard-parameter: 3655 // identifier 3656 // 3657 // pp-prefixed-parameter: 3658 // identifier :: identifier 3659 auto LexPPParameterName = [&]() -> std::optional<std::string> { 3660 // We expect the current token to be an identifier; if it's not, things 3661 // have gone wrong. 3662 if (!ExpectOrDiagAndSkipToEOD(tok::identifier)) 3663 return std::nullopt; 3664 3665 const IdentifierInfo *Prefix = CurTok.getIdentifierInfo(); 3666 3667 // Lex another token; it is either a :: or we're done with the parameter 3668 // name. 3669 LexNonComment(CurTok); 3670 if (CurTok.is(tok::coloncolon)) { 3671 // We found a ::, so lex another identifier token. 3672 LexNonComment(CurTok); 3673 if (!ExpectOrDiagAndSkipToEOD(tok::identifier)) 3674 return std::nullopt; 3675 3676 const IdentifierInfo *Suffix = CurTok.getIdentifierInfo(); 3677 3678 // Lex another token so we're past the name. 3679 LexNonComment(CurTok); 3680 return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str(); 3681 } 3682 return Prefix->getName().str(); 3683 }; 3684 3685 // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by 3686 // this document as an identifier pp_param and an identifier of the form 3687 // __pp_param__ shall behave the same when used as a preprocessor parameter, 3688 // except for the spelling. 3689 auto NormalizeParameterName = [](StringRef Name) { 3690 if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__")) 3691 return Name.substr(2, Name.size() - 4); 3692 return Name; 3693 }; 3694 3695 auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> { 3696 // we have a limit parameter and its internals are processed using 3697 // evaluation rules from #if. 3698 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren)) 3699 return std::nullopt; 3700 3701 // We do not consume the ( because EvaluateDirectiveExpression will lex 3702 // the next token for us. 3703 IdentifierInfo *ParameterIfNDef = nullptr; 3704 bool EvaluatedDefined; 3705 DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression( 3706 ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false); 3707 3708 if (!LimitEvalResult.Value) { 3709 // If there was an error evaluating the directive expression, we expect 3710 // to be at the end of directive token. 3711 assert(CurTok.is(tok::eod) && "expect to be at the end of directive"); 3712 return std::nullopt; 3713 } 3714 3715 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren)) 3716 return std::nullopt; 3717 3718 // Eat the ). 3719 LexNonComment(CurTok); 3720 3721 // C23 6.10.3.2p2: The token defined shall not appear within the constant 3722 // expression. 3723 if (EvaluatedDefined) { 3724 Diag(CurTok, diag::err_defined_in_pp_embed); 3725 return std::nullopt; 3726 } 3727 3728 if (LimitEvalResult.Value) { 3729 const llvm::APSInt &Result = *LimitEvalResult.Value; 3730 if (Result.isNegative()) { 3731 Diag(CurTok, diag::err_requires_positive_value) 3732 << toString(Result, 10) << /*positive*/ 0; 3733 return std::nullopt; 3734 } 3735 return Result.getLimitedValue(); 3736 } 3737 return std::nullopt; 3738 }; 3739 3740 auto GetMatchingCloseBracket = [](tok::TokenKind Kind) { 3741 switch (Kind) { 3742 case tok::l_paren: 3743 return tok::r_paren; 3744 case tok::l_brace: 3745 return tok::r_brace; 3746 case tok::l_square: 3747 return tok::r_square; 3748 default: 3749 llvm_unreachable("should not get here"); 3750 } 3751 }; 3752 3753 auto LexParenthesizedBalancedTokenSoup = 3754 [&](llvm::SmallVectorImpl<Token> &Tokens) { 3755 std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack; 3756 3757 // We expect the current token to be a left paren. 3758 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren)) 3759 return false; 3760 LexNonComment(CurTok); // Eat the ( 3761 3762 bool WaitingForInnerCloseParen = false; 3763 while (CurTok.isNot(tok::eod) && 3764 (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) { 3765 switch (CurTok.getKind()) { 3766 default: // Shutting up diagnostics about not fully-covered switch. 3767 break; 3768 case tok::l_paren: 3769 WaitingForInnerCloseParen = true; 3770 [[fallthrough]]; 3771 case tok::l_brace: 3772 case tok::l_square: 3773 BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()}); 3774 break; 3775 case tok::r_paren: 3776 WaitingForInnerCloseParen = false; 3777 [[fallthrough]]; 3778 case tok::r_brace: 3779 case tok::r_square: { 3780 tok::TokenKind Matching = 3781 GetMatchingCloseBracket(BracketStack.back().first); 3782 if (BracketStack.empty() || CurTok.getKind() != Matching) { 3783 DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back()); 3784 return false; 3785 } 3786 BracketStack.pop_back(); 3787 } break; 3788 } 3789 Tokens.push_back(CurTok); 3790 LexNonComment(CurTok); 3791 } 3792 3793 // When we're done, we want to eat the closing paren. 3794 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren)) 3795 return false; 3796 3797 LexNonComment(CurTok); // Eat the ) 3798 return true; 3799 }; 3800 3801 LexNonComment(CurTok); // Prime the pump. 3802 while (!CurTok.isOneOf(EndTokenKind, tok::eod)) { 3803 SourceLocation ParamStartLoc = CurTok.getLocation(); 3804 std::optional<std::string> ParamName = LexPPParameterName(); 3805 if (!ParamName) 3806 return std::nullopt; 3807 StringRef Parameter = NormalizeParameterName(*ParamName); 3808 3809 // Lex the parameters (dependent on the parameter type we want!). 3810 // 3811 // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or 3812 // one time in the embed parameter sequence. 3813 if (Parameter == "limit") { 3814 if (Result.MaybeLimitParam) 3815 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3816 3817 std::optional<size_t> Limit = LexParenthesizedIntegerExpr(); 3818 if (!Limit) 3819 return std::nullopt; 3820 Result.MaybeLimitParam = 3821 PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}}; 3822 } else if (Parameter == "clang::offset") { 3823 if (Result.MaybeOffsetParam) 3824 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3825 3826 std::optional<size_t> Offset = LexParenthesizedIntegerExpr(); 3827 if (!Offset) 3828 return std::nullopt; 3829 Result.MaybeOffsetParam = PPEmbedParameterOffset{ 3830 *Offset, {ParamStartLoc, CurTok.getLocation()}}; 3831 } else if (Parameter == "prefix") { 3832 if (Result.MaybePrefixParam) 3833 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3834 3835 SmallVector<Token, 4> Soup; 3836 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3837 return std::nullopt; 3838 Result.MaybePrefixParam = PPEmbedParameterPrefix{ 3839 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; 3840 } else if (Parameter == "suffix") { 3841 if (Result.MaybeSuffixParam) 3842 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3843 3844 SmallVector<Token, 4> Soup; 3845 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3846 return std::nullopt; 3847 Result.MaybeSuffixParam = PPEmbedParameterSuffix{ 3848 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; 3849 } else if (Parameter == "if_empty") { 3850 if (Result.MaybeIfEmptyParam) 3851 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3852 3853 SmallVector<Token, 4> Soup; 3854 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3855 return std::nullopt; 3856 Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{ 3857 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; 3858 } else { 3859 ++Result.UnrecognizedParams; 3860 3861 // If there's a left paren, we need to parse a balanced token sequence 3862 // and just eat those tokens. 3863 if (CurTok.is(tok::l_paren)) { 3864 SmallVector<Token, 4> Soup; 3865 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3866 return std::nullopt; 3867 } 3868 if (!ForHasEmbed) { 3869 Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter; 3870 return std::nullopt; 3871 } 3872 } 3873 } 3874 return Result; 3875 } 3876 3877 void Preprocessor::HandleEmbedDirectiveImpl( 3878 SourceLocation HashLoc, const LexEmbedParametersResult &Params, 3879 StringRef BinaryContents) { 3880 if (BinaryContents.empty()) { 3881 // If we have no binary contents, the only thing we need to emit are the 3882 // if_empty tokens, if any. 3883 // FIXME: this loses AST fidelity; nothing in the compiler will see that 3884 // these tokens came from #embed. We have to hack around this when printing 3885 // preprocessed output. The same is true for prefix and suffix tokens. 3886 if (Params.MaybeIfEmptyParam) { 3887 ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens; 3888 size_t TokCount = Toks.size(); 3889 auto NewToks = std::make_unique<Token[]>(TokCount); 3890 llvm::copy(Toks, NewToks.get()); 3891 EnterTokenStream(std::move(NewToks), TokCount, true, true); 3892 } 3893 return; 3894 } 3895 3896 size_t NumPrefixToks = Params.PrefixTokenCount(), 3897 NumSuffixToks = Params.SuffixTokenCount(); 3898 size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks; 3899 size_t CurIdx = 0; 3900 auto Toks = std::make_unique<Token[]>(TotalNumToks); 3901 3902 // Add the prefix tokens, if any. 3903 if (Params.MaybePrefixParam) { 3904 llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]); 3905 CurIdx += NumPrefixToks; 3906 } 3907 3908 EmbedAnnotationData *Data = new (BP) EmbedAnnotationData; 3909 Data->BinaryData = BinaryContents; 3910 3911 Toks[CurIdx].startToken(); 3912 Toks[CurIdx].setKind(tok::annot_embed); 3913 Toks[CurIdx].setAnnotationRange(HashLoc); 3914 Toks[CurIdx++].setAnnotationValue(Data); 3915 3916 // Now add the suffix tokens, if any. 3917 if (Params.MaybeSuffixParam) { 3918 llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]); 3919 CurIdx += NumSuffixToks; 3920 } 3921 3922 assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens"); 3923 EnterTokenStream(std::move(Toks), TotalNumToks, true, true); 3924 } 3925 3926 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, 3927 const FileEntry *LookupFromFile) { 3928 // Give the usual extension/compatibility warnings. 3929 if (LangOpts.C23) 3930 Diag(EmbedTok, diag::warn_compat_pp_embed_directive); 3931 else 3932 Diag(EmbedTok, diag::ext_pp_embed_directive) 3933 << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0); 3934 3935 // Parse the filename header 3936 Token FilenameTok; 3937 if (LexHeaderName(FilenameTok)) 3938 return; 3939 3940 if (FilenameTok.isNot(tok::header_name)) { 3941 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); 3942 if (FilenameTok.isNot(tok::eod)) 3943 DiscardUntilEndOfDirective(); 3944 return; 3945 } 3946 3947 // Parse the optional sequence of 3948 // directive-parameters: 3949 // identifier parameter-name-list[opt] directive-argument-list[opt] 3950 // directive-argument-list: 3951 // '(' balanced-token-sequence ')' 3952 // parameter-name-list: 3953 // '::' identifier parameter-name-list[opt] 3954 Token CurTok; 3955 std::optional<LexEmbedParametersResult> Params = 3956 LexEmbedParameters(CurTok, /*ForHasEmbed=*/false); 3957 3958 assert((Params || CurTok.is(tok::eod)) && 3959 "expected success or to be at the end of the directive"); 3960 if (!Params) 3961 return; 3962 3963 // Now, splat the data out! 3964 SmallString<128> FilenameBuffer; 3965 StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); 3966 StringRef OriginalFilename = Filename; 3967 bool isAngled = 3968 GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); 3969 // If GetIncludeFilenameSpelling set the start ptr to null, there was an 3970 // error. 3971 assert(!Filename.empty()); 3972 OptionalFileEntryRef MaybeFileRef = 3973 this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile); 3974 if (!MaybeFileRef) { 3975 // could not find file 3976 if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) { 3977 return; 3978 } 3979 Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; 3980 return; 3981 } 3982 std::optional<llvm::MemoryBufferRef> MaybeFile = 3983 getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef); 3984 if (!MaybeFile) { 3985 // could not find file 3986 Diag(FilenameTok, diag::err_cannot_open_file) 3987 << Filename << "a buffer to the contents could not be created"; 3988 return; 3989 } 3990 StringRef BinaryContents = MaybeFile->getBuffer(); 3991 3992 // The order is important between 'offset' and 'limit'; we want to offset 3993 // first and then limit second; otherwise we may reduce the notional resource 3994 // size to something too small to offset into. 3995 if (Params->MaybeOffsetParam) { 3996 // FIXME: just like with the limit() and if_empty() parameters, this loses 3997 // source fidelity in the AST; it has no idea that there was an offset 3998 // involved. 3999 // offsets all the way to the end of the file make for an empty file. 4000 BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset); 4001 } 4002 4003 if (Params->MaybeLimitParam) { 4004 // FIXME: just like with the clang::offset() and if_empty() parameters, 4005 // this loses source fidelity in the AST; it has no idea there was a limit 4006 // involved. 4007 BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit); 4008 } 4009 4010 if (Callbacks) 4011 Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef, 4012 *Params); 4013 HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents); 4014 } 4015