xref: /freebsd/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/DirectoryEntry.h"
16 #include "clang/Basic/FileManager.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/Module.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/Basic/TokenKinds.h"
24 #include "clang/Lex/CodeCompletionHandler.h"
25 #include "clang/Lex/HeaderSearch.h"
26 #include "clang/Lex/HeaderSearchOptions.h"
27 #include "clang/Lex/LexDiagnostic.h"
28 #include "clang/Lex/LiteralSupport.h"
29 #include "clang/Lex/MacroInfo.h"
30 #include "clang/Lex/ModuleLoader.h"
31 #include "clang/Lex/ModuleMap.h"
32 #include "clang/Lex/PPCallbacks.h"
33 #include "clang/Lex/Pragma.h"
34 #include "clang/Lex/Preprocessor.h"
35 #include "clang/Lex/PreprocessorOptions.h"
36 #include "clang/Lex/Token.h"
37 #include "clang/Lex/VariadicMacroSupport.h"
38 #include "llvm/ADT/ArrayRef.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/ScopeExit.h"
41 #include "llvm/ADT/SmallString.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringExtras.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/ADT/StringSwitch.h"
46 #include "llvm/Support/AlignOf.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/Path.h"
49 #include "llvm/Support/SaveAndRestore.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstring>
53 #include <new>
54 #include <optional>
55 #include <string>
56 #include <utility>
57 
58 using namespace clang;
59 
60 //===----------------------------------------------------------------------===//
61 // Utility Methods for Preprocessor Directive Handling.
62 //===----------------------------------------------------------------------===//
63 
AllocateMacroInfo(SourceLocation L)64 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
65   static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
66   return new (BP) MacroInfo(L);
67 }
68 
AllocateDefMacroDirective(MacroInfo * MI,SourceLocation Loc)69 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
70                                                            SourceLocation Loc) {
71   return new (BP) DefMacroDirective(MI, Loc);
72 }
73 
74 UndefMacroDirective *
AllocateUndefMacroDirective(SourceLocation UndefLoc)75 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
76   return new (BP) UndefMacroDirective(UndefLoc);
77 }
78 
79 VisibilityMacroDirective *
AllocateVisibilityMacroDirective(SourceLocation Loc,bool isPublic)80 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
81                                                bool isPublic) {
82   return new (BP) VisibilityMacroDirective(Loc, isPublic);
83 }
84 
85 /// Read and discard all tokens remaining on the current line until
86 /// the tok::eod token is found.
DiscardUntilEndOfDirective(Token & Tmp)87 SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
88   SourceRange Res;
89 
90   LexUnexpandedToken(Tmp);
91   Res.setBegin(Tmp.getLocation());
92   while (Tmp.isNot(tok::eod)) {
93     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
94     LexUnexpandedToken(Tmp);
95   }
96   Res.setEnd(Tmp.getLocation());
97   return Res;
98 }
99 
100 /// Enumerates possible cases of #define/#undef a reserved identifier.
101 enum MacroDiag {
102   MD_NoWarn,        //> Not a reserved identifier
103   MD_KeywordDef,    //> Macro hides keyword, enabled by default
104   MD_ReservedMacro  //> #define of #undef reserved id, disabled by default
105 };
106 
107 /// Enumerates possible %select values for the pp_err_elif_after_else and
108 /// pp_err_elif_without_if diagnostics.
109 enum PPElifDiag {
110   PED_Elif,
111   PED_Elifdef,
112   PED_Elifndef
113 };
114 
isFeatureTestMacro(StringRef MacroName)115 static bool isFeatureTestMacro(StringRef MacroName) {
116   // list from:
117   // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
118   // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
119   // * man 7 feature_test_macros
120   // The list must be sorted for correct binary search.
121   static constexpr StringRef ReservedMacro[] = {
122       "_ATFILE_SOURCE",
123       "_BSD_SOURCE",
124       "_CRT_NONSTDC_NO_WARNINGS",
125       "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
126       "_CRT_SECURE_NO_WARNINGS",
127       "_FILE_OFFSET_BITS",
128       "_FORTIFY_SOURCE",
129       "_GLIBCXX_ASSERTIONS",
130       "_GLIBCXX_CONCEPT_CHECKS",
131       "_GLIBCXX_DEBUG",
132       "_GLIBCXX_DEBUG_PEDANTIC",
133       "_GLIBCXX_PARALLEL",
134       "_GLIBCXX_PARALLEL_ASSERTIONS",
135       "_GLIBCXX_SANITIZE_VECTOR",
136       "_GLIBCXX_USE_CXX11_ABI",
137       "_GLIBCXX_USE_DEPRECATED",
138       "_GNU_SOURCE",
139       "_ISOC11_SOURCE",
140       "_ISOC95_SOURCE",
141       "_ISOC99_SOURCE",
142       "_LARGEFILE64_SOURCE",
143       "_POSIX_C_SOURCE",
144       "_REENTRANT",
145       "_SVID_SOURCE",
146       "_THREAD_SAFE",
147       "_XOPEN_SOURCE",
148       "_XOPEN_SOURCE_EXTENDED",
149       "__STDCPP_WANT_MATH_SPEC_FUNCS__",
150       "__STDC_FORMAT_MACROS",
151   };
152   return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
153                             MacroName);
154 }
155 
isLanguageDefinedBuiltin(const SourceManager & SourceMgr,const MacroInfo * MI,const StringRef MacroName)156 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
157                                      const MacroInfo *MI,
158                                      const StringRef MacroName) {
159   // If this is a macro with special handling (like __LINE__) then it's language
160   // defined.
161   if (MI->isBuiltinMacro())
162     return true;
163   // Builtin macros are defined in the builtin file
164   if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
165     return false;
166   // C defines macros starting with __STDC, and C++ defines macros starting with
167   // __STDCPP
168   if (MacroName.starts_with("__STDC"))
169     return true;
170   // C++ defines the __cplusplus macro
171   if (MacroName == "__cplusplus")
172     return true;
173   // C++ defines various feature-test macros starting with __cpp
174   if (MacroName.starts_with("__cpp"))
175     return true;
176   // Anything else isn't language-defined
177   return false;
178 }
179 
shouldWarnOnMacroDef(Preprocessor & PP,IdentifierInfo * II)180 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
181   const LangOptions &Lang = PP.getLangOpts();
182   StringRef Text = II->getName();
183   if (isReservedInAllContexts(II->isReserved(Lang)))
184     return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
185   if (II->isKeyword(Lang))
186     return MD_KeywordDef;
187   if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
188     return MD_KeywordDef;
189   return MD_NoWarn;
190 }
191 
shouldWarnOnMacroUndef(Preprocessor & PP,IdentifierInfo * II)192 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
193   const LangOptions &Lang = PP.getLangOpts();
194   // Do not warn on keyword undef.  It is generally harmless and widely used.
195   if (isReservedInAllContexts(II->isReserved(Lang)))
196     return MD_ReservedMacro;
197   return MD_NoWarn;
198 }
199 
200 // Return true if we want to issue a diagnostic by default if we
201 // encounter this name in a #include with the wrong case. For now,
202 // this includes the standard C and C++ headers, Posix headers,
203 // and Boost headers. Improper case for these #includes is a
204 // potential portability issue.
warnByDefaultOnWrongCase(StringRef Include)205 static bool warnByDefaultOnWrongCase(StringRef Include) {
206   // If the first component of the path is "boost", treat this like a standard header
207   // for the purposes of diagnostics.
208   if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
209     return true;
210 
211   // "condition_variable" is the longest standard header name at 18 characters.
212   // If the include file name is longer than that, it can't be a standard header.
213   static const size_t MaxStdHeaderNameLen = 18u;
214   if (Include.size() > MaxStdHeaderNameLen)
215     return false;
216 
217   // Lowercase and normalize the search string.
218   SmallString<32> LowerInclude{Include};
219   for (char &Ch : LowerInclude) {
220     // In the ASCII range?
221     if (static_cast<unsigned char>(Ch) > 0x7f)
222       return false; // Can't be a standard header
223     // ASCII lowercase:
224     if (Ch >= 'A' && Ch <= 'Z')
225       Ch += 'a' - 'A';
226     // Normalize path separators for comparison purposes.
227     else if (::llvm::sys::path::is_separator(Ch))
228       Ch = '/';
229   }
230 
231   // The standard C/C++ and Posix headers
232   return llvm::StringSwitch<bool>(LowerInclude)
233     // C library headers
234     .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
235     .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
236     .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
237     .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)
238     .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
239     .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
240     .Cases("wchar.h", "wctype.h", true)
241 
242     // C++ headers for C library facilities
243     .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
244     .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
245     .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
246     .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
247     .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
248     .Case("cwctype", true)
249 
250     // C++ library headers
251     .Cases("algorithm", "fstream", "list", "regex", "thread", true)
252     .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
253     .Cases("atomic", "future", "map", "set", "type_traits", true)
254     .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
255     .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
256     .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
257     .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
258     .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
259     .Cases("deque", "istream", "queue", "string", "valarray", true)
260     .Cases("exception", "iterator", "random", "strstream", "vector", true)
261     .Cases("forward_list", "limits", "ratio", "system_error", true)
262 
263     // POSIX headers (which aren't also C headers)
264     .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
265     .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
266     .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
267     .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
268     .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
269     .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
270     .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
271     .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
272     .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
273     .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
274     .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
275     .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
276     .Default(false);
277 }
278 
279 /// Find a similar string in `Candidates`.
280 ///
281 /// \param LHS a string for a similar string in `Candidates`
282 ///
283 /// \param Candidates the candidates to find a similar string.
284 ///
285 /// \returns a similar string if exists. If no similar string exists,
286 /// returns std::nullopt.
287 static std::optional<StringRef>
findSimilarStr(StringRef LHS,const std::vector<StringRef> & Candidates)288 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
289   // We need to check if `Candidates` has the exact case-insensitive string
290   // because the Levenshtein distance match does not care about it.
291   for (StringRef C : Candidates) {
292     if (LHS.equals_insensitive(C)) {
293       return C;
294     }
295   }
296 
297   // Keep going with the Levenshtein distance match.
298   // If the LHS size is less than 3, use the LHS size minus 1 and if not,
299   // use the LHS size divided by 3.
300   size_t Length = LHS.size();
301   size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
302 
303   std::optional<std::pair<StringRef, size_t>> SimilarStr;
304   for (StringRef C : Candidates) {
305     size_t CurDist = LHS.edit_distance(C, true);
306     if (CurDist <= MaxDist) {
307       if (!SimilarStr) {
308         // The first similar string found.
309         SimilarStr = {C, CurDist};
310       } else if (CurDist < SimilarStr->second) {
311         // More similar string found.
312         SimilarStr = {C, CurDist};
313       }
314     }
315   }
316 
317   if (SimilarStr) {
318     return SimilarStr->first;
319   } else {
320     return std::nullopt;
321   }
322 }
323 
CheckMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)324 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
325                                   bool *ShadowFlag) {
326   // Missing macro name?
327   if (MacroNameTok.is(tok::eod))
328     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
329 
330   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
331   if (!II)
332     return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
333 
334   if (II->isCPlusPlusOperatorKeyword()) {
335     // C++ 2.5p2: Alternative tokens behave the same as its primary token
336     // except for their spellings.
337     Diag(MacroNameTok, getLangOpts().MicrosoftExt
338                            ? diag::ext_pp_operator_used_as_macro_name
339                            : diag::err_pp_operator_used_as_macro_name)
340         << II << MacroNameTok.getKind();
341     // Allow #defining |and| and friends for Microsoft compatibility or
342     // recovery when legacy C headers are included in C++.
343   }
344 
345   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
346     // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
347     return Diag(MacroNameTok, diag::err_defined_macro_name);
348   }
349 
350   // If defining/undefining reserved identifier or a keyword, we need to issue
351   // a warning.
352   SourceLocation MacroNameLoc = MacroNameTok.getLocation();
353   if (ShadowFlag)
354     *ShadowFlag = false;
355   if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
356       (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
357     MacroDiag D = MD_NoWarn;
358     if (isDefineUndef == MU_Define) {
359       D = shouldWarnOnMacroDef(*this, II);
360     }
361     else if (isDefineUndef == MU_Undef)
362       D = shouldWarnOnMacroUndef(*this, II);
363     if (D == MD_KeywordDef) {
364       // We do not want to warn on some patterns widely used in configuration
365       // scripts.  This requires analyzing next tokens, so do not issue warnings
366       // now, only inform caller.
367       if (ShadowFlag)
368         *ShadowFlag = true;
369     }
370     if (D == MD_ReservedMacro)
371       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
372   }
373 
374   // Okay, we got a good identifier.
375   return false;
376 }
377 
378 /// Lex and validate a macro name, which occurs after a
379 /// \#define or \#undef.
380 ///
381 /// This sets the token kind to eod and discards the rest of the macro line if
382 /// the macro name is invalid.
383 ///
384 /// \param MacroNameTok Token that is expected to be a macro name.
385 /// \param isDefineUndef Context in which macro is used.
386 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
ReadMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)387 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
388                                  bool *ShadowFlag) {
389   // Read the token, don't allow macro expansion on it.
390   LexUnexpandedToken(MacroNameTok);
391 
392   if (MacroNameTok.is(tok::code_completion)) {
393     if (CodeComplete)
394       CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
395     setCodeCompletionReached();
396     LexUnexpandedToken(MacroNameTok);
397   }
398 
399   if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
400     return;
401 
402   // Invalid macro name, read and discard the rest of the line and set the
403   // token kind to tok::eod if necessary.
404   if (MacroNameTok.isNot(tok::eod)) {
405     MacroNameTok.setKind(tok::eod);
406     DiscardUntilEndOfDirective();
407   }
408 }
409 
410 /// Ensure that the next token is a tok::eod token.
411 ///
412 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
413 /// true, then we consider macros that expand to zero tokens as being ok.
414 ///
415 /// Returns the location of the end of the directive.
CheckEndOfDirective(const char * DirType,bool EnableMacros)416 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
417                                                  bool EnableMacros) {
418   Token Tmp;
419   // Lex unexpanded tokens for most directives: macros might expand to zero
420   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
421   // #line) allow empty macros.
422   if (EnableMacros)
423     Lex(Tmp);
424   else
425     LexUnexpandedToken(Tmp);
426 
427   // There should be no tokens after the directive, but we allow them as an
428   // extension.
429   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
430     LexUnexpandedToken(Tmp);
431 
432   if (Tmp.is(tok::eod))
433     return Tmp.getLocation();
434 
435   // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
436   // or if this is a macro-style preprocessing directive, because it is more
437   // trouble than it is worth to insert /**/ and check that there is no /**/
438   // in the range also.
439   FixItHint Hint;
440   if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
441       !CurTokenLexer)
442     Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
443   Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
444   return DiscardUntilEndOfDirective().getEnd();
445 }
446 
SuggestTypoedDirective(const Token & Tok,StringRef Directive) const447 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
448                                           StringRef Directive) const {
449   // If this is a `.S` file, treat unknown # directives as non-preprocessor
450   // directives.
451   if (getLangOpts().AsmPreprocessor) return;
452 
453   std::vector<StringRef> Candidates = {
454       "if", "ifdef", "ifndef", "elif", "else", "endif"
455   };
456   if (LangOpts.C23 || LangOpts.CPlusPlus23)
457     Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
458 
459   if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
460     // Directive cannot be coming from macro.
461     assert(Tok.getLocation().isFileID());
462     CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
463         Tok.getLocation(),
464         Tok.getLocation().getLocWithOffset(Directive.size()));
465     StringRef SuggValue = *Sugg;
466 
467     auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
468     Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
469   }
470 }
471 
472 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
473 /// decided that the subsequent tokens are in the \#if'd out portion of the
474 /// file.  Lex the rest of the file, until we see an \#endif.  If
475 /// FoundNonSkipPortion is true, then we have already emitted code for part of
476 /// this \#if directive, so \#else/\#elif blocks should never be entered.
477 /// If ElseOk is true, then \#else directives are ok, if not, then we have
478 /// already seen one so a \#else directive is a duplicate.  When this returns,
479 /// the caller can lex the first valid token.
SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,SourceLocation IfTokenLoc,bool FoundNonSkipPortion,bool FoundElse,SourceLocation ElseLoc)480 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
481                                                 SourceLocation IfTokenLoc,
482                                                 bool FoundNonSkipPortion,
483                                                 bool FoundElse,
484                                                 SourceLocation ElseLoc) {
485   // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
486   // not getting called recursively by storing the RecordedSkippedRanges
487   // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
488   // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
489   // invalidated. If this changes and there is a need to call
490   // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
491   // change to do a second lookup in endLexPass function instead of reusing the
492   // lookup pointer.
493   assert(!SkippingExcludedConditionalBlock &&
494          "calling SkipExcludedConditionalBlock recursively");
495   llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
496 
497   ++NumSkipped;
498   assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
499   assert(CurPPLexer && "Conditional PP block must be in a file!");
500   assert(CurLexer && "Conditional PP block but no current lexer set!");
501 
502   if (PreambleConditionalStack.reachedEOFWhileSkipping())
503     PreambleConditionalStack.clearSkipInfo();
504   else
505     CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
506                                      FoundNonSkipPortion, FoundElse);
507 
508   // Enter raw mode to disable identifier lookup (and thus macro expansion),
509   // disabling warnings, etc.
510   CurPPLexer->LexingRawMode = true;
511   Token Tok;
512   SourceLocation endLoc;
513 
514   /// Keeps track and caches skipped ranges and also retrieves a prior skipped
515   /// range if the same block is re-visited.
516   struct SkippingRangeStateTy {
517     Preprocessor &PP;
518 
519     const char *BeginPtr = nullptr;
520     unsigned *SkipRangePtr = nullptr;
521 
522     SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
523 
524     void beginLexPass() {
525       if (BeginPtr)
526         return; // continue skipping a block.
527 
528       // Initiate a skipping block and adjust the lexer if we already skipped it
529       // before.
530       BeginPtr = PP.CurLexer->getBufferLocation();
531       SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
532       if (*SkipRangePtr) {
533         PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
534                           /*IsAtStartOfLine*/ true);
535       }
536     }
537 
538     void endLexPass(const char *Hashptr) {
539       if (!BeginPtr) {
540         // Not doing normal lexing.
541         assert(PP.CurLexer->isDependencyDirectivesLexer());
542         return;
543       }
544 
545       // Finished skipping a block, record the range if it's first time visited.
546       if (!*SkipRangePtr) {
547         *SkipRangePtr = Hashptr - BeginPtr;
548       }
549       assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
550       BeginPtr = nullptr;
551       SkipRangePtr = nullptr;
552     }
553   } SkippingRangeState(*this);
554 
555   while (true) {
556     if (CurLexer->isDependencyDirectivesLexer()) {
557       CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
558     } else {
559       SkippingRangeState.beginLexPass();
560       while (true) {
561         CurLexer->Lex(Tok);
562 
563         if (Tok.is(tok::code_completion)) {
564           setCodeCompletionReached();
565           if (CodeComplete)
566             CodeComplete->CodeCompleteInConditionalExclusion();
567           continue;
568         }
569 
570         // If this is the end of the buffer, we have an error.
571         if (Tok.is(tok::eof)) {
572           // We don't emit errors for unterminated conditionals here,
573           // Lexer::LexEndOfFile can do that properly.
574           // Just return and let the caller lex after this #include.
575           if (PreambleConditionalStack.isRecording())
576             PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
577                                                       FoundNonSkipPortion,
578                                                       FoundElse, ElseLoc);
579           break;
580         }
581 
582         // If this token is not a preprocessor directive, just skip it.
583         if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
584           continue;
585 
586         break;
587       }
588     }
589     if (Tok.is(tok::eof))
590       break;
591 
592     // We just parsed a # character at the start of a line, so we're in
593     // directive mode.  Tell the lexer this so any newlines we see will be
594     // converted into an EOD token (this terminates the macro).
595     CurPPLexer->ParsingPreprocessorDirective = true;
596     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
597 
598     assert(Tok.is(tok::hash));
599     const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
600     assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
601 
602     // Read the next token, the directive flavor.
603     LexUnexpandedToken(Tok);
604 
605     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
606     // something bogus), skip it.
607     if (Tok.isNot(tok::raw_identifier)) {
608       CurPPLexer->ParsingPreprocessorDirective = false;
609       // Restore comment saving mode.
610       if (CurLexer) CurLexer->resetExtendedTokenMode();
611       continue;
612     }
613 
614     // If the first letter isn't i or e, it isn't intesting to us.  We know that
615     // this is safe in the face of spelling differences, because there is no way
616     // to spell an i/e in a strange way that is another letter.  Skipping this
617     // allows us to avoid looking up the identifier info for #define/#undef and
618     // other common directives.
619     StringRef RI = Tok.getRawIdentifier();
620 
621     char FirstChar = RI[0];
622     if (FirstChar >= 'a' && FirstChar <= 'z' &&
623         FirstChar != 'i' && FirstChar != 'e') {
624       CurPPLexer->ParsingPreprocessorDirective = false;
625       // Restore comment saving mode.
626       if (CurLexer) CurLexer->resetExtendedTokenMode();
627       continue;
628     }
629 
630     // Get the identifier name without trigraphs or embedded newlines.  Note
631     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
632     // when skipping.
633     char DirectiveBuf[20];
634     StringRef Directive;
635     if (!Tok.needsCleaning() && RI.size() < 20) {
636       Directive = RI;
637     } else {
638       std::string DirectiveStr = getSpelling(Tok);
639       size_t IdLen = DirectiveStr.size();
640       if (IdLen >= 20) {
641         CurPPLexer->ParsingPreprocessorDirective = false;
642         // Restore comment saving mode.
643         if (CurLexer) CurLexer->resetExtendedTokenMode();
644         continue;
645       }
646       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
647       Directive = StringRef(DirectiveBuf, IdLen);
648     }
649 
650     if (Directive.starts_with("if")) {
651       StringRef Sub = Directive.substr(2);
652       if (Sub.empty() ||   // "if"
653           Sub == "def" ||   // "ifdef"
654           Sub == "ndef") {  // "ifndef"
655         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
656         // bother parsing the condition.
657         DiscardUntilEndOfDirective();
658         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
659                                        /*foundnonskip*/false,
660                                        /*foundelse*/false);
661       } else {
662         SuggestTypoedDirective(Tok, Directive);
663       }
664     } else if (Directive[0] == 'e') {
665       StringRef Sub = Directive.substr(1);
666       if (Sub == "ndif") {  // "endif"
667         PPConditionalInfo CondInfo;
668         CondInfo.WasSkipping = true; // Silence bogus warning.
669         bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
670         (void)InCond;  // Silence warning in no-asserts mode.
671         assert(!InCond && "Can't be skipping if not in a conditional!");
672 
673         // If we popped the outermost skipping block, we're done skipping!
674         if (!CondInfo.WasSkipping) {
675           SkippingRangeState.endLexPass(Hashptr);
676           // Restore the value of LexingRawMode so that trailing comments
677           // are handled correctly, if we've reached the outermost block.
678           CurPPLexer->LexingRawMode = false;
679           endLoc = CheckEndOfDirective("endif");
680           CurPPLexer->LexingRawMode = true;
681           if (Callbacks)
682             Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
683           break;
684         } else {
685           DiscardUntilEndOfDirective();
686         }
687       } else if (Sub == "lse") { // "else".
688         // #else directive in a skipping conditional.  If not in some other
689         // skipping conditional, and if #else hasn't already been seen, enter it
690         // as a non-skipping conditional.
691         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
692 
693         if (!CondInfo.WasSkipping)
694           SkippingRangeState.endLexPass(Hashptr);
695 
696         // If this is a #else with a #else before it, report the error.
697         if (CondInfo.FoundElse)
698           Diag(Tok, diag::pp_err_else_after_else);
699 
700         // Note that we've seen a #else in this conditional.
701         CondInfo.FoundElse = true;
702 
703         // If the conditional is at the top level, and the #if block wasn't
704         // entered, enter the #else block now.
705         if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
706           CondInfo.FoundNonSkip = true;
707           // Restore the value of LexingRawMode so that trailing comments
708           // are handled correctly.
709           CurPPLexer->LexingRawMode = false;
710           endLoc = CheckEndOfDirective("else");
711           CurPPLexer->LexingRawMode = true;
712           if (Callbacks)
713             Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
714           break;
715         } else {
716           DiscardUntilEndOfDirective();  // C99 6.10p4.
717         }
718       } else if (Sub == "lif") {  // "elif".
719         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
720 
721         if (!CondInfo.WasSkipping)
722           SkippingRangeState.endLexPass(Hashptr);
723 
724         // If this is a #elif with a #else before it, report the error.
725         if (CondInfo.FoundElse)
726           Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
727 
728         // If this is in a skipping block or if we're already handled this #if
729         // block, don't bother parsing the condition.
730         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
731           // FIXME: We should probably do at least some minimal parsing of the
732           // condition to verify that it is well-formed. The current state
733           // allows #elif* directives with completely malformed (or missing)
734           // conditions.
735           DiscardUntilEndOfDirective();
736         } else {
737           // Restore the value of LexingRawMode so that identifiers are
738           // looked up, etc, inside the #elif expression.
739           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
740           CurPPLexer->LexingRawMode = false;
741           IdentifierInfo *IfNDefMacro = nullptr;
742           DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
743           // Stop if Lexer became invalid after hitting code completion token.
744           if (!CurPPLexer)
745             return;
746           const bool CondValue = DER.Conditional;
747           CurPPLexer->LexingRawMode = true;
748           if (Callbacks) {
749             Callbacks->Elif(
750                 Tok.getLocation(), DER.ExprRange,
751                 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
752                 CondInfo.IfLoc);
753           }
754           // If this condition is true, enter it!
755           if (CondValue) {
756             CondInfo.FoundNonSkip = true;
757             break;
758           }
759         }
760       } else if (Sub == "lifdef" ||  // "elifdef"
761                  Sub == "lifndef") { // "elifndef"
762         bool IsElifDef = Sub == "lifdef";
763         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
764         Token DirectiveToken = Tok;
765 
766         if (!CondInfo.WasSkipping)
767           SkippingRangeState.endLexPass(Hashptr);
768 
769         // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
770         // if this branch is in a skipping block.
771         unsigned DiagID;
772         if (LangOpts.CPlusPlus)
773           DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
774                                         : diag::ext_cxx23_pp_directive;
775         else
776           DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
777                                 : diag::ext_c23_pp_directive;
778         Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
779 
780         // If this is a #elif with a #else before it, report the error.
781         if (CondInfo.FoundElse)
782           Diag(Tok, diag::pp_err_elif_after_else)
783               << (IsElifDef ? PED_Elifdef : PED_Elifndef);
784 
785         // If this is in a skipping block or if we're already handled this #if
786         // block, don't bother parsing the condition.
787         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
788           // FIXME: We should probably do at least some minimal parsing of the
789           // condition to verify that it is well-formed. The current state
790           // allows #elif* directives with completely malformed (or missing)
791           // conditions.
792           DiscardUntilEndOfDirective();
793         } else {
794           // Restore the value of LexingRawMode so that identifiers are
795           // looked up, etc, inside the #elif[n]def expression.
796           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
797           CurPPLexer->LexingRawMode = false;
798           Token MacroNameTok;
799           ReadMacroName(MacroNameTok);
800           CurPPLexer->LexingRawMode = true;
801 
802           // If the macro name token is tok::eod, there was an error that was
803           // already reported.
804           if (MacroNameTok.is(tok::eod)) {
805             // Skip code until we get to #endif.  This helps with recovery by
806             // not emitting an error when the #endif is reached.
807             continue;
808           }
809 
810           emitMacroExpansionWarnings(MacroNameTok);
811 
812           CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
813 
814           IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
815           auto MD = getMacroDefinition(MII);
816           MacroInfo *MI = MD.getMacroInfo();
817 
818           if (Callbacks) {
819             if (IsElifDef) {
820               Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
821                                  MD);
822             } else {
823               Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
824                                   MD);
825             }
826           }
827           // If this condition is true, enter it!
828           if (static_cast<bool>(MI) == IsElifDef) {
829             CondInfo.FoundNonSkip = true;
830             break;
831           }
832         }
833       } else {
834         SuggestTypoedDirective(Tok, Directive);
835       }
836     } else {
837       SuggestTypoedDirective(Tok, Directive);
838     }
839 
840     CurPPLexer->ParsingPreprocessorDirective = false;
841     // Restore comment saving mode.
842     if (CurLexer) CurLexer->resetExtendedTokenMode();
843   }
844 
845   // Finally, if we are out of the conditional (saw an #endif or ran off the end
846   // of the file, just stop skipping and return to lexing whatever came after
847   // the #if block.
848   CurPPLexer->LexingRawMode = false;
849 
850   // The last skipped range isn't actually skipped yet if it's truncated
851   // by the end of the preamble; we'll resume parsing after the preamble.
852   if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
853     Callbacks->SourceRangeSkipped(
854         SourceRange(HashTokenLoc, endLoc.isValid()
855                                       ? endLoc
856                                       : CurPPLexer->getSourceLocation()),
857         Tok.getLocation());
858 }
859 
getModuleForLocation(SourceLocation Loc,bool AllowTextual)860 Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
861                                            bool AllowTextual) {
862   if (!SourceMgr.isInMainFile(Loc)) {
863     // Try to determine the module of the include directive.
864     // FIXME: Look into directly passing the FileEntry from LookupFile instead.
865     FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
866     if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
867       // The include comes from an included file.
868       return HeaderInfo.getModuleMap()
869           .findModuleForHeader(*EntryOfIncl, AllowTextual)
870           .getModule();
871     }
872   }
873 
874   // This is either in the main file or not in a file at all. It belongs
875   // to the current module, if there is one.
876   return getLangOpts().CurrentModule.empty()
877              ? nullptr
878              : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
879 }
880 
881 OptionalFileEntryRef
getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,SourceLocation Loc)882 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
883                                                SourceLocation Loc) {
884   Module *IncM = getModuleForLocation(
885       IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
886 
887   // Walk up through the include stack, looking through textual headers of M
888   // until we hit a non-textual header that we can #include. (We assume textual
889   // headers of a module with non-textual headers aren't meant to be used to
890   // import entities from the module.)
891   auto &SM = getSourceManager();
892   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
893     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
894     auto FE = SM.getFileEntryRefForID(ID);
895     if (!FE)
896       break;
897 
898     // We want to find all possible modules that might contain this header, so
899     // search all enclosing directories for module maps and load them.
900     HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
901                             SourceMgr.isInSystemHeader(Loc));
902 
903     bool InPrivateHeader = false;
904     for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
905       if (!Header.isAccessibleFrom(IncM)) {
906         // It's in a private header; we can't #include it.
907         // FIXME: If there's a public header in some module that re-exports it,
908         // then we could suggest including that, but it's not clear that's the
909         // expected way to make this entity visible.
910         InPrivateHeader = true;
911         continue;
912       }
913 
914       // Don't suggest explicitly excluded headers.
915       if (Header.getRole() == ModuleMap::ExcludedHeader)
916         continue;
917 
918       // We'll suggest including textual headers below if they're
919       // include-guarded.
920       if (Header.getRole() & ModuleMap::TextualHeader)
921         continue;
922 
923       // If we have a module import syntax, we shouldn't include a header to
924       // make a particular module visible. Let the caller know they should
925       // suggest an import instead.
926       if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
927         return std::nullopt;
928 
929       // If this is an accessible, non-textual header of M's top-level module
930       // that transitively includes the given location and makes the
931       // corresponding module visible, this is the thing to #include.
932       return *FE;
933     }
934 
935     // FIXME: If we're bailing out due to a private header, we shouldn't suggest
936     // an import either.
937     if (InPrivateHeader)
938       return std::nullopt;
939 
940     // If the header is includable and has an include guard, assume the
941     // intended way to expose its contents is by #include, not by importing a
942     // module that transitively includes it.
943     if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
944       return *FE;
945 
946     Loc = SM.getIncludeLoc(ID);
947   }
948 
949   return std::nullopt;
950 }
951 
LookupFile(SourceLocation FilenameLoc,StringRef Filename,bool isAngled,ConstSearchDirIterator FromDir,const FileEntry * FromFile,ConstSearchDirIterator * CurDirArg,SmallVectorImpl<char> * SearchPath,SmallVectorImpl<char> * RelativePath,ModuleMap::KnownHeader * SuggestedModule,bool * IsMapped,bool * IsFrameworkFound,bool SkipCache,bool OpenFile,bool CacheFailures)952 OptionalFileEntryRef Preprocessor::LookupFile(
953     SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
954     ConstSearchDirIterator FromDir, const FileEntry *FromFile,
955     ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
956     SmallVectorImpl<char> *RelativePath,
957     ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
958     bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
959   ConstSearchDirIterator CurDirLocal = nullptr;
960   ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
961 
962   Module *RequestingModule = getModuleForLocation(
963       FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
964 
965   // If the header lookup mechanism may be relative to the current inclusion
966   // stack, record the parent #includes.
967   SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
968   bool BuildSystemModule = false;
969   if (!FromDir && !FromFile) {
970     FileID FID = getCurrentFileLexer()->getFileID();
971     OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
972 
973     // If there is no file entry associated with this file, it must be the
974     // predefines buffer or the module includes buffer. Any other file is not
975     // lexed with a normal lexer, so it won't be scanned for preprocessor
976     // directives.
977     //
978     // If we have the predefines buffer, resolve #include references (which come
979     // from the -include command line argument) from the current working
980     // directory instead of relative to the main file.
981     //
982     // If we have the module includes buffer, resolve #include references (which
983     // come from header declarations in the module map) relative to the module
984     // map file.
985     if (!FileEnt) {
986       if (FID == SourceMgr.getMainFileID() && MainFileDir) {
987         auto IncludeDir =
988             HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
989                 Filename, getCurrentModule())
990                 ? HeaderInfo.getModuleMap().getBuiltinDir()
991                 : MainFileDir;
992         Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
993         BuildSystemModule = getCurrentModule()->IsSystem;
994       } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
995                       SourceMgr.getMainFileID()))) {
996         auto CWD = FileMgr.getOptionalDirectoryRef(".");
997         Includers.push_back(std::make_pair(*FileEnt, *CWD));
998       }
999     } else {
1000       Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1001     }
1002 
1003     // MSVC searches the current include stack from top to bottom for
1004     // headers included by quoted include directives.
1005     // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1006     if (LangOpts.MSVCCompat && !isAngled) {
1007       for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1008         if (IsFileLexer(ISEntry))
1009           if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1010             Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1011       }
1012     }
1013   }
1014 
1015   CurDir = CurDirLookup;
1016 
1017   if (FromFile) {
1018     // We're supposed to start looking from after a particular file. Search
1019     // the include path until we find that file or run out of files.
1020     ConstSearchDirIterator TmpCurDir = CurDir;
1021     ConstSearchDirIterator TmpFromDir = nullptr;
1022     while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1023                Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1024                Includers, SearchPath, RelativePath, RequestingModule,
1025                SuggestedModule, /*IsMapped=*/nullptr,
1026                /*IsFrameworkFound=*/nullptr, SkipCache)) {
1027       // Keep looking as if this file did a #include_next.
1028       TmpFromDir = TmpCurDir;
1029       ++TmpFromDir;
1030       if (&FE->getFileEntry() == FromFile) {
1031         // Found it.
1032         FromDir = TmpFromDir;
1033         CurDir = TmpCurDir;
1034         break;
1035       }
1036     }
1037   }
1038 
1039   // Do a standard file entry lookup.
1040   OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1041       Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1042       RelativePath, RequestingModule, SuggestedModule, IsMapped,
1043       IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1044   if (FE)
1045     return FE;
1046 
1047   OptionalFileEntryRef CurFileEnt;
1048   // Otherwise, see if this is a subframework header.  If so, this is relative
1049   // to one of the headers on the #include stack.  Walk the list of the current
1050   // headers on the #include stack and pass them to HeaderInfo.
1051   if (IsFileLexer()) {
1052     if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1053       if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1054               Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1055               SuggestedModule)) {
1056         return FE;
1057       }
1058     }
1059   }
1060 
1061   for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1062     if (IsFileLexer(ISEntry)) {
1063       if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1064         if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1065                 Filename, *CurFileEnt, SearchPath, RelativePath,
1066                 RequestingModule, SuggestedModule)) {
1067           return FE;
1068         }
1069       }
1070     }
1071   }
1072 
1073   // Otherwise, we really couldn't find the file.
1074   return std::nullopt;
1075 }
1076 
1077 OptionalFileEntryRef
LookupEmbedFile(StringRef Filename,bool isAngled,bool OpenFile,const FileEntry * LookupFromFile)1078 Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1079                               const FileEntry *LookupFromFile) {
1080   FileManager &FM = this->getFileManager();
1081   if (llvm::sys::path::is_absolute(Filename)) {
1082     // lookup path or immediately fail
1083     llvm::Expected<FileEntryRef> ShouldBeEntry =
1084         FM.getFileRef(Filename, OpenFile);
1085     return llvm::expectedToOptional(std::move(ShouldBeEntry));
1086   }
1087 
1088   auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1089                                StringRef StartingFrom, StringRef FileName,
1090                                bool RemoveInitialFileComponentFromLookupPath) {
1091     llvm::sys::path::native(StartingFrom, LookupPath);
1092     if (RemoveInitialFileComponentFromLookupPath)
1093       llvm::sys::path::remove_filename(LookupPath);
1094     if (!LookupPath.empty() &&
1095         !llvm::sys::path::is_separator(LookupPath.back())) {
1096       LookupPath.push_back(llvm::sys::path::get_separator().front());
1097     }
1098     LookupPath.append(FileName.begin(), FileName.end());
1099   };
1100 
1101   // Otherwise, it's search time!
1102   SmallString<512> LookupPath;
1103   // Non-angled lookup
1104   if (!isAngled) {
1105     if (LookupFromFile) {
1106       // Use file-based lookup.
1107       StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1108       if (!FullFileDir.empty()) {
1109         SeparateComponents(LookupPath, FullFileDir, Filename, true);
1110         llvm::Expected<FileEntryRef> ShouldBeEntry =
1111             FM.getFileRef(LookupPath, OpenFile);
1112         if (ShouldBeEntry)
1113           return llvm::expectedToOptional(std::move(ShouldBeEntry));
1114         llvm::consumeError(ShouldBeEntry.takeError());
1115       }
1116     }
1117 
1118     // Otherwise, do working directory lookup.
1119     LookupPath.clear();
1120     auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
1121     if (MaybeWorkingDirEntry) {
1122       DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1123       StringRef WorkingDir = WorkingDirEntry.getName();
1124       if (!WorkingDir.empty()) {
1125         SeparateComponents(LookupPath, WorkingDir, Filename, false);
1126         llvm::Expected<FileEntryRef> ShouldBeEntry =
1127             FM.getFileRef(LookupPath, OpenFile);
1128         if (ShouldBeEntry)
1129           return llvm::expectedToOptional(std::move(ShouldBeEntry));
1130         llvm::consumeError(ShouldBeEntry.takeError());
1131       }
1132     }
1133   }
1134 
1135   for (const auto &Entry : PPOpts->EmbedEntries) {
1136     LookupPath.clear();
1137     SeparateComponents(LookupPath, Entry, Filename, false);
1138     llvm::Expected<FileEntryRef> ShouldBeEntry =
1139         FM.getFileRef(LookupPath, OpenFile);
1140     if (ShouldBeEntry)
1141       return llvm::expectedToOptional(std::move(ShouldBeEntry));
1142     llvm::consumeError(ShouldBeEntry.takeError());
1143   }
1144   return std::nullopt;
1145 }
1146 
1147 //===----------------------------------------------------------------------===//
1148 // Preprocessor Directive Handling.
1149 //===----------------------------------------------------------------------===//
1150 
1151 class Preprocessor::ResetMacroExpansionHelper {
1152 public:
ResetMacroExpansionHelper(Preprocessor * pp)1153   ResetMacroExpansionHelper(Preprocessor *pp)
1154     : PP(pp), save(pp->DisableMacroExpansion) {
1155     if (pp->MacroExpansionInDirectivesOverride)
1156       pp->DisableMacroExpansion = false;
1157   }
1158 
~ResetMacroExpansionHelper()1159   ~ResetMacroExpansionHelper() {
1160     PP->DisableMacroExpansion = save;
1161   }
1162 
1163 private:
1164   Preprocessor *PP;
1165   bool save;
1166 };
1167 
1168 /// Process a directive while looking for the through header or a #pragma
1169 /// hdrstop. The following directives are handled:
1170 /// #include (to check if it is the through header)
1171 /// #define (to warn about macros that don't match the PCH)
1172 /// #pragma (to check for pragma hdrstop).
1173 /// All other directives are completely discarded.
HandleSkippedDirectiveWhileUsingPCH(Token & Result,SourceLocation HashLoc)1174 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1175                                                        SourceLocation HashLoc) {
1176   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1177     if (II->getPPKeywordID() == tok::pp_define) {
1178       return HandleDefineDirective(Result,
1179                                    /*ImmediatelyAfterHeaderGuard=*/false);
1180     }
1181     if (SkippingUntilPCHThroughHeader &&
1182         II->getPPKeywordID() == tok::pp_include) {
1183       return HandleIncludeDirective(HashLoc, Result);
1184     }
1185     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1186       Lex(Result);
1187       auto *II = Result.getIdentifierInfo();
1188       if (II && II->getName() == "hdrstop")
1189         return HandlePragmaHdrstop(Result);
1190     }
1191   }
1192   DiscardUntilEndOfDirective();
1193 }
1194 
1195 /// HandleDirective - This callback is invoked when the lexer sees a # token
1196 /// at the start of a line.  This consumes the directive, modifies the
1197 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1198 /// read is the correct one.
HandleDirective(Token & Result)1199 void Preprocessor::HandleDirective(Token &Result) {
1200   // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1201 
1202   // We just parsed a # character at the start of a line, so we're in directive
1203   // mode.  Tell the lexer this so any newlines we see will be converted into an
1204   // EOD token (which terminates the directive).
1205   CurPPLexer->ParsingPreprocessorDirective = true;
1206   if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1207 
1208   bool ImmediatelyAfterTopLevelIfndef =
1209       CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1210   CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1211 
1212   ++NumDirectives;
1213 
1214   // We are about to read a token.  For the multiple-include optimization FA to
1215   // work, we have to remember if we had read any tokens *before* this
1216   // pp-directive.
1217   bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1218 
1219   // Save the '#' token in case we need to return it later.
1220   Token SavedHash = Result;
1221 
1222   // Read the next token, the directive flavor.  This isn't expanded due to
1223   // C99 6.10.3p8.
1224   LexUnexpandedToken(Result);
1225 
1226   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1227   //   #define A(x) #x
1228   //   A(abc
1229   //     #warning blah
1230   //   def)
1231   // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1232   // not support this for #include-like directives, since that can result in
1233   // terrible diagnostics, and does not work in GCC.
1234   if (InMacroArgs) {
1235     if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1236       switch (II->getPPKeywordID()) {
1237       case tok::pp_include:
1238       case tok::pp_import:
1239       case tok::pp_include_next:
1240       case tok::pp___include_macros:
1241       case tok::pp_pragma:
1242       case tok::pp_embed:
1243         Diag(Result, diag::err_embedded_directive) << II->getName();
1244         Diag(*ArgMacro, diag::note_macro_expansion_here)
1245             << ArgMacro->getIdentifierInfo();
1246         DiscardUntilEndOfDirective();
1247         return;
1248       default:
1249         break;
1250       }
1251     }
1252     Diag(Result, diag::ext_embedded_directive);
1253   }
1254 
1255   // Temporarily enable macro expansion if set so
1256   // and reset to previous state when returning from this function.
1257   ResetMacroExpansionHelper helper(this);
1258 
1259   if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1260     return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1261 
1262   switch (Result.getKind()) {
1263   case tok::eod:
1264     // Ignore the null directive with regards to the multiple-include
1265     // optimization, i.e. allow the null directive to appear outside of the
1266     // include guard and still enable the multiple-include optimization.
1267     CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1268     return;   // null directive.
1269   case tok::code_completion:
1270     setCodeCompletionReached();
1271     if (CodeComplete)
1272       CodeComplete->CodeCompleteDirective(
1273                                     CurPPLexer->getConditionalStackDepth() > 0);
1274     return;
1275   case tok::numeric_constant:  // # 7  GNU line marker directive.
1276     // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1277     // directive. However do permit it in the predefines file, as we use line
1278     // markers to mark the builtin macros as being in a system header.
1279     if (getLangOpts().AsmPreprocessor &&
1280         SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1281       break;
1282     return HandleDigitDirective(Result);
1283   default:
1284     IdentifierInfo *II = Result.getIdentifierInfo();
1285     if (!II) break; // Not an identifier.
1286 
1287     // Ask what the preprocessor keyword ID is.
1288     switch (II->getPPKeywordID()) {
1289     default: break;
1290     // C99 6.10.1 - Conditional Inclusion.
1291     case tok::pp_if:
1292       return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1293     case tok::pp_ifdef:
1294       return HandleIfdefDirective(Result, SavedHash, false,
1295                                   true /*not valid for miopt*/);
1296     case tok::pp_ifndef:
1297       return HandleIfdefDirective(Result, SavedHash, true,
1298                                   ReadAnyTokensBeforeDirective);
1299     case tok::pp_elif:
1300     case tok::pp_elifdef:
1301     case tok::pp_elifndef:
1302       return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1303 
1304     case tok::pp_else:
1305       return HandleElseDirective(Result, SavedHash);
1306     case tok::pp_endif:
1307       return HandleEndifDirective(Result);
1308 
1309     // C99 6.10.2 - Source File Inclusion.
1310     case tok::pp_include:
1311       // Handle #include.
1312       return HandleIncludeDirective(SavedHash.getLocation(), Result);
1313     case tok::pp___include_macros:
1314       // Handle -imacros.
1315       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1316 
1317     // C99 6.10.3 - Macro Replacement.
1318     case tok::pp_define:
1319       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1320     case tok::pp_undef:
1321       return HandleUndefDirective();
1322 
1323     // C99 6.10.4 - Line Control.
1324     case tok::pp_line:
1325       return HandleLineDirective();
1326 
1327     // C99 6.10.5 - Error Directive.
1328     case tok::pp_error:
1329       return HandleUserDiagnosticDirective(Result, false);
1330 
1331     // C99 6.10.6 - Pragma Directive.
1332     case tok::pp_pragma:
1333       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1334 
1335     // GNU Extensions.
1336     case tok::pp_import:
1337       return HandleImportDirective(SavedHash.getLocation(), Result);
1338     case tok::pp_include_next:
1339       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1340 
1341     case tok::pp_warning:
1342       if (LangOpts.CPlusPlus)
1343         Diag(Result, LangOpts.CPlusPlus23
1344                          ? diag::warn_cxx23_compat_warning_directive
1345                          : diag::ext_pp_warning_directive)
1346             << /*C++23*/ 1;
1347       else
1348         Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1349                                   : diag::ext_pp_warning_directive)
1350             << /*C23*/ 0;
1351 
1352       return HandleUserDiagnosticDirective(Result, true);
1353     case tok::pp_ident:
1354       return HandleIdentSCCSDirective(Result);
1355     case tok::pp_sccs:
1356       return HandleIdentSCCSDirective(Result);
1357     case tok::pp_embed:
1358       return HandleEmbedDirective(SavedHash.getLocation(), Result,
1359                                   getCurrentFileLexer()
1360                                       ? *getCurrentFileLexer()->getFileEntry()
1361                                       : static_cast<FileEntry *>(nullptr));
1362     case tok::pp_assert:
1363       //isExtension = true;  // FIXME: implement #assert
1364       break;
1365     case tok::pp_unassert:
1366       //isExtension = true;  // FIXME: implement #unassert
1367       break;
1368 
1369     case tok::pp___public_macro:
1370       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1371         return HandleMacroPublicDirective(Result);
1372       break;
1373 
1374     case tok::pp___private_macro:
1375       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1376         return HandleMacroPrivateDirective();
1377       break;
1378     }
1379     break;
1380   }
1381 
1382   // If this is a .S file, treat unknown # directives as non-preprocessor
1383   // directives.  This is important because # may be a comment or introduce
1384   // various pseudo-ops.  Just return the # token and push back the following
1385   // token to be lexed next time.
1386   if (getLangOpts().AsmPreprocessor) {
1387     auto Toks = std::make_unique<Token[]>(2);
1388     // Return the # and the token after it.
1389     Toks[0] = SavedHash;
1390     Toks[1] = Result;
1391 
1392     // If the second token is a hashhash token, then we need to translate it to
1393     // unknown so the token lexer doesn't try to perform token pasting.
1394     if (Result.is(tok::hashhash))
1395       Toks[1].setKind(tok::unknown);
1396 
1397     // Enter this token stream so that we re-lex the tokens.  Make sure to
1398     // enable macro expansion, in case the token after the # is an identifier
1399     // that is expanded.
1400     EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1401     return;
1402   }
1403 
1404   // If we reached here, the preprocessing token is not valid!
1405   // Start suggesting if a similar directive found.
1406   Diag(Result, diag::err_pp_invalid_directive) << 0;
1407 
1408   // Read the rest of the PP line.
1409   DiscardUntilEndOfDirective();
1410 
1411   // Okay, we're done parsing the directive.
1412 }
1413 
1414 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1415 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
GetLineValue(Token & DigitTok,unsigned & Val,unsigned DiagID,Preprocessor & PP,bool IsGNULineDirective=false)1416 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1417                          unsigned DiagID, Preprocessor &PP,
1418                          bool IsGNULineDirective=false) {
1419   if (DigitTok.isNot(tok::numeric_constant)) {
1420     PP.Diag(DigitTok, DiagID);
1421 
1422     if (DigitTok.isNot(tok::eod))
1423       PP.DiscardUntilEndOfDirective();
1424     return true;
1425   }
1426 
1427   SmallString<64> IntegerBuffer;
1428   IntegerBuffer.resize(DigitTok.getLength());
1429   const char *DigitTokBegin = &IntegerBuffer[0];
1430   bool Invalid = false;
1431   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1432   if (Invalid)
1433     return true;
1434 
1435   // Verify that we have a simple digit-sequence, and compute the value.  This
1436   // is always a simple digit string computed in decimal, so we do this manually
1437   // here.
1438   Val = 0;
1439   for (unsigned i = 0; i != ActualLength; ++i) {
1440     // C++1y [lex.fcon]p1:
1441     //   Optional separating single quotes in a digit-sequence are ignored
1442     if (DigitTokBegin[i] == '\'')
1443       continue;
1444 
1445     if (!isDigit(DigitTokBegin[i])) {
1446       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1447               diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1448       PP.DiscardUntilEndOfDirective();
1449       return true;
1450     }
1451 
1452     unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1453     if (NextVal < Val) { // overflow.
1454       PP.Diag(DigitTok, DiagID);
1455       PP.DiscardUntilEndOfDirective();
1456       return true;
1457     }
1458     Val = NextVal;
1459   }
1460 
1461   if (DigitTokBegin[0] == '0' && Val)
1462     PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1463       << IsGNULineDirective;
1464 
1465   return false;
1466 }
1467 
1468 /// Handle a \#line directive: C99 6.10.4.
1469 ///
1470 /// The two acceptable forms are:
1471 /// \verbatim
1472 ///   # line digit-sequence
1473 ///   # line digit-sequence "s-char-sequence"
1474 /// \endverbatim
HandleLineDirective()1475 void Preprocessor::HandleLineDirective() {
1476   // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1477   // expanded.
1478   Token DigitTok;
1479   Lex(DigitTok);
1480 
1481   // Validate the number and convert it to an unsigned.
1482   unsigned LineNo;
1483   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1484     return;
1485 
1486   if (LineNo == 0)
1487     Diag(DigitTok, diag::ext_pp_line_zero);
1488 
1489   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1490   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1491   unsigned LineLimit = 32768U;
1492   if (LangOpts.C99 || LangOpts.CPlusPlus11)
1493     LineLimit = 2147483648U;
1494   if (LineNo >= LineLimit)
1495     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1496   else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1497     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1498 
1499   int FilenameID = -1;
1500   Token StrTok;
1501   Lex(StrTok);
1502 
1503   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1504   // string followed by eod.
1505   if (StrTok.is(tok::eod))
1506     ; // ok
1507   else if (StrTok.isNot(tok::string_literal)) {
1508     Diag(StrTok, diag::err_pp_line_invalid_filename);
1509     DiscardUntilEndOfDirective();
1510     return;
1511   } else if (StrTok.hasUDSuffix()) {
1512     Diag(StrTok, diag::err_invalid_string_udl);
1513     DiscardUntilEndOfDirective();
1514     return;
1515   } else {
1516     // Parse and validate the string, converting it into a unique ID.
1517     StringLiteralParser Literal(StrTok, *this);
1518     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1519     if (Literal.hadError) {
1520       DiscardUntilEndOfDirective();
1521       return;
1522     }
1523     if (Literal.Pascal) {
1524       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1525       DiscardUntilEndOfDirective();
1526       return;
1527     }
1528     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1529 
1530     // Verify that there is nothing after the string, other than EOD.  Because
1531     // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1532     CheckEndOfDirective("line", true);
1533   }
1534 
1535   // Take the file kind of the file containing the #line directive. #line
1536   // directives are often used for generated sources from the same codebase, so
1537   // the new file should generally be classified the same way as the current
1538   // file. This is visible in GCC's pre-processed output, which rewrites #line
1539   // to GNU line markers.
1540   SrcMgr::CharacteristicKind FileKind =
1541       SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1542 
1543   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1544                         false, FileKind);
1545 
1546   if (Callbacks)
1547     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1548                            PPCallbacks::RenameFile, FileKind);
1549 }
1550 
1551 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1552 /// marker directive.
ReadLineMarkerFlags(bool & IsFileEntry,bool & IsFileExit,SrcMgr::CharacteristicKind & FileKind,Preprocessor & PP)1553 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1554                                 SrcMgr::CharacteristicKind &FileKind,
1555                                 Preprocessor &PP) {
1556   unsigned FlagVal;
1557   Token FlagTok;
1558   PP.Lex(FlagTok);
1559   if (FlagTok.is(tok::eod)) return false;
1560   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1561     return true;
1562 
1563   if (FlagVal == 1) {
1564     IsFileEntry = true;
1565 
1566     PP.Lex(FlagTok);
1567     if (FlagTok.is(tok::eod)) return false;
1568     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1569       return true;
1570   } else if (FlagVal == 2) {
1571     IsFileExit = true;
1572 
1573     SourceManager &SM = PP.getSourceManager();
1574     // If we are leaving the current presumed file, check to make sure the
1575     // presumed include stack isn't empty!
1576     FileID CurFileID =
1577       SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1578     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1579     if (PLoc.isInvalid())
1580       return true;
1581 
1582     // If there is no include loc (main file) or if the include loc is in a
1583     // different physical file, then we aren't in a "1" line marker flag region.
1584     SourceLocation IncLoc = PLoc.getIncludeLoc();
1585     if (IncLoc.isInvalid() ||
1586         SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1587       PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1588       PP.DiscardUntilEndOfDirective();
1589       return true;
1590     }
1591 
1592     PP.Lex(FlagTok);
1593     if (FlagTok.is(tok::eod)) return false;
1594     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1595       return true;
1596   }
1597 
1598   // We must have 3 if there are still flags.
1599   if (FlagVal != 3) {
1600     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1601     PP.DiscardUntilEndOfDirective();
1602     return true;
1603   }
1604 
1605   FileKind = SrcMgr::C_System;
1606 
1607   PP.Lex(FlagTok);
1608   if (FlagTok.is(tok::eod)) return false;
1609   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1610     return true;
1611 
1612   // We must have 4 if there is yet another flag.
1613   if (FlagVal != 4) {
1614     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1615     PP.DiscardUntilEndOfDirective();
1616     return true;
1617   }
1618 
1619   FileKind = SrcMgr::C_ExternCSystem;
1620 
1621   PP.Lex(FlagTok);
1622   if (FlagTok.is(tok::eod)) return false;
1623 
1624   // There are no more valid flags here.
1625   PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1626   PP.DiscardUntilEndOfDirective();
1627   return true;
1628 }
1629 
1630 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1631 /// one of the following forms:
1632 ///
1633 ///     # 42
1634 ///     # 42 "file" ('1' | '2')?
1635 ///     # 42 "file" ('1' | '2')? '3' '4'?
1636 ///
HandleDigitDirective(Token & DigitTok)1637 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1638   // Validate the number and convert it to an unsigned.  GNU does not have a
1639   // line # limit other than it fit in 32-bits.
1640   unsigned LineNo;
1641   if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1642                    *this, true))
1643     return;
1644 
1645   Token StrTok;
1646   Lex(StrTok);
1647 
1648   bool IsFileEntry = false, IsFileExit = false;
1649   int FilenameID = -1;
1650   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1651 
1652   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1653   // string followed by eod.
1654   if (StrTok.is(tok::eod)) {
1655     Diag(StrTok, diag::ext_pp_gnu_line_directive);
1656     // Treat this like "#line NN", which doesn't change file characteristics.
1657     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1658   } else if (StrTok.isNot(tok::string_literal)) {
1659     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1660     DiscardUntilEndOfDirective();
1661     return;
1662   } else if (StrTok.hasUDSuffix()) {
1663     Diag(StrTok, diag::err_invalid_string_udl);
1664     DiscardUntilEndOfDirective();
1665     return;
1666   } else {
1667     // Parse and validate the string, converting it into a unique ID.
1668     StringLiteralParser Literal(StrTok, *this);
1669     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1670     if (Literal.hadError) {
1671       DiscardUntilEndOfDirective();
1672       return;
1673     }
1674     if (Literal.Pascal) {
1675       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1676       DiscardUntilEndOfDirective();
1677       return;
1678     }
1679 
1680     // If a filename was present, read any flags that are present.
1681     if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1682       return;
1683     if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1684         !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1685       Diag(StrTok, diag::ext_pp_gnu_line_directive);
1686 
1687     // Exiting to an empty string means pop to the including file, so leave
1688     // FilenameID as -1 in that case.
1689     if (!(IsFileExit && Literal.GetString().empty()))
1690       FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1691   }
1692 
1693   // Create a line note with this information.
1694   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1695                         IsFileExit, FileKind);
1696 
1697   // If the preprocessor has callbacks installed, notify them of the #line
1698   // change.  This is used so that the line marker comes out in -E mode for
1699   // example.
1700   if (Callbacks) {
1701     PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1702     if (IsFileEntry)
1703       Reason = PPCallbacks::EnterFile;
1704     else if (IsFileExit)
1705       Reason = PPCallbacks::ExitFile;
1706 
1707     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1708   }
1709 }
1710 
1711 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1712 ///
HandleUserDiagnosticDirective(Token & Tok,bool isWarning)1713 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1714                                                  bool isWarning) {
1715   // Read the rest of the line raw.  We do this because we don't want macros
1716   // to be expanded and we don't require that the tokens be valid preprocessing
1717   // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1718   // collapse multiple consecutive white space between tokens, but this isn't
1719   // specified by the standard.
1720   SmallString<128> Message;
1721   CurLexer->ReadToEndOfLine(&Message);
1722 
1723   // Find the first non-whitespace character, so that we can make the
1724   // diagnostic more succinct.
1725   StringRef Msg = Message.str().ltrim(' ');
1726 
1727   if (isWarning)
1728     Diag(Tok, diag::pp_hash_warning) << Msg;
1729   else
1730     Diag(Tok, diag::err_pp_hash_error) << Msg;
1731 }
1732 
1733 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1734 ///
HandleIdentSCCSDirective(Token & Tok)1735 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1736   // Yes, this directive is an extension.
1737   Diag(Tok, diag::ext_pp_ident_directive);
1738 
1739   // Read the string argument.
1740   Token StrTok;
1741   Lex(StrTok);
1742 
1743   // If the token kind isn't a string, it's a malformed directive.
1744   if (StrTok.isNot(tok::string_literal) &&
1745       StrTok.isNot(tok::wide_string_literal)) {
1746     Diag(StrTok, diag::err_pp_malformed_ident);
1747     if (StrTok.isNot(tok::eod))
1748       DiscardUntilEndOfDirective();
1749     return;
1750   }
1751 
1752   if (StrTok.hasUDSuffix()) {
1753     Diag(StrTok, diag::err_invalid_string_udl);
1754     DiscardUntilEndOfDirective();
1755     return;
1756   }
1757 
1758   // Verify that there is nothing after the string, other than EOD.
1759   CheckEndOfDirective("ident");
1760 
1761   if (Callbacks) {
1762     bool Invalid = false;
1763     std::string Str = getSpelling(StrTok, &Invalid);
1764     if (!Invalid)
1765       Callbacks->Ident(Tok.getLocation(), Str);
1766   }
1767 }
1768 
1769 /// Handle a #public directive.
HandleMacroPublicDirective(Token & Tok)1770 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1771   Token MacroNameTok;
1772   ReadMacroName(MacroNameTok, MU_Undef);
1773 
1774   // Error reading macro name?  If so, diagnostic already issued.
1775   if (MacroNameTok.is(tok::eod))
1776     return;
1777 
1778   // Check to see if this is the last token on the #__public_macro line.
1779   CheckEndOfDirective("__public_macro");
1780 
1781   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1782   // Okay, we finally have a valid identifier to undef.
1783   MacroDirective *MD = getLocalMacroDirective(II);
1784 
1785   // If the macro is not defined, this is an error.
1786   if (!MD) {
1787     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1788     return;
1789   }
1790 
1791   // Note that this macro has now been exported.
1792   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1793                                 MacroNameTok.getLocation(), /*isPublic=*/true));
1794 }
1795 
1796 /// Handle a #private directive.
HandleMacroPrivateDirective()1797 void Preprocessor::HandleMacroPrivateDirective() {
1798   Token MacroNameTok;
1799   ReadMacroName(MacroNameTok, MU_Undef);
1800 
1801   // Error reading macro name?  If so, diagnostic already issued.
1802   if (MacroNameTok.is(tok::eod))
1803     return;
1804 
1805   // Check to see if this is the last token on the #__private_macro line.
1806   CheckEndOfDirective("__private_macro");
1807 
1808   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1809   // Okay, we finally have a valid identifier to undef.
1810   MacroDirective *MD = getLocalMacroDirective(II);
1811 
1812   // If the macro is not defined, this is an error.
1813   if (!MD) {
1814     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1815     return;
1816   }
1817 
1818   // Note that this macro has now been marked private.
1819   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1820                                MacroNameTok.getLocation(), /*isPublic=*/false));
1821 }
1822 
1823 //===----------------------------------------------------------------------===//
1824 // Preprocessor Include Directive Handling.
1825 //===----------------------------------------------------------------------===//
1826 
1827 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1828 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1829 /// true if the input filename was in <>'s or false if it were in ""'s.  The
1830 /// caller is expected to provide a buffer that is large enough to hold the
1831 /// spelling of the filename, but is also expected to handle the case when
1832 /// this method decides to use a different buffer.
GetIncludeFilenameSpelling(SourceLocation Loc,StringRef & Buffer)1833 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1834                                               StringRef &Buffer) {
1835   // Get the text form of the filename.
1836   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1837 
1838   // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1839   // C++20 [lex.header]/2:
1840   //
1841   // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1842   //   in C: behavior is undefined
1843   //   in C++: program is conditionally-supported with implementation-defined
1844   //           semantics
1845 
1846   // Make sure the filename is <x> or "x".
1847   bool isAngled;
1848   if (Buffer[0] == '<') {
1849     if (Buffer.back() != '>') {
1850       Diag(Loc, diag::err_pp_expects_filename);
1851       Buffer = StringRef();
1852       return true;
1853     }
1854     isAngled = true;
1855   } else if (Buffer[0] == '"') {
1856     if (Buffer.back() != '"') {
1857       Diag(Loc, diag::err_pp_expects_filename);
1858       Buffer = StringRef();
1859       return true;
1860     }
1861     isAngled = false;
1862   } else {
1863     Diag(Loc, diag::err_pp_expects_filename);
1864     Buffer = StringRef();
1865     return true;
1866   }
1867 
1868   // Diagnose #include "" as invalid.
1869   if (Buffer.size() <= 2) {
1870     Diag(Loc, diag::err_pp_empty_filename);
1871     Buffer = StringRef();
1872     return true;
1873   }
1874 
1875   // Skip the brackets.
1876   Buffer = Buffer.substr(1, Buffer.size()-2);
1877   return isAngled;
1878 }
1879 
1880 /// Push a token onto the token stream containing an annotation.
EnterAnnotationToken(SourceRange Range,tok::TokenKind Kind,void * AnnotationVal)1881 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1882                                         tok::TokenKind Kind,
1883                                         void *AnnotationVal) {
1884   // FIXME: Produce this as the current token directly, rather than
1885   // allocating a new token for it.
1886   auto Tok = std::make_unique<Token[]>(1);
1887   Tok[0].startToken();
1888   Tok[0].setKind(Kind);
1889   Tok[0].setLocation(Range.getBegin());
1890   Tok[0].setAnnotationEndLoc(Range.getEnd());
1891   Tok[0].setAnnotationValue(AnnotationVal);
1892   EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1893 }
1894 
1895 /// Produce a diagnostic informing the user that a #include or similar
1896 /// was implicitly treated as a module import.
diagnoseAutoModuleImport(Preprocessor & PP,SourceLocation HashLoc,Token & IncludeTok,ArrayRef<std::pair<IdentifierInfo *,SourceLocation>> Path,SourceLocation PathEnd)1897 static void diagnoseAutoModuleImport(
1898     Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1899     ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1900     SourceLocation PathEnd) {
1901   SmallString<128> PathString;
1902   for (size_t I = 0, N = Path.size(); I != N; ++I) {
1903     if (I)
1904       PathString += '.';
1905     PathString += Path[I].first->getName();
1906   }
1907 
1908   int IncludeKind = 0;
1909   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1910   case tok::pp_include:
1911     IncludeKind = 0;
1912     break;
1913 
1914   case tok::pp_import:
1915     IncludeKind = 1;
1916     break;
1917 
1918   case tok::pp_include_next:
1919     IncludeKind = 2;
1920     break;
1921 
1922   case tok::pp___include_macros:
1923     IncludeKind = 3;
1924     break;
1925 
1926   default:
1927     llvm_unreachable("unknown include directive kind");
1928   }
1929 
1930   PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1931       << IncludeKind << PathString;
1932 }
1933 
1934 // Given a vector of path components and a string containing the real
1935 // path to the file, build a properly-cased replacement in the vector,
1936 // and return true if the replacement should be suggested.
trySimplifyPath(SmallVectorImpl<StringRef> & Components,StringRef RealPathName,llvm::sys::path::Style Separator)1937 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1938                             StringRef RealPathName,
1939                             llvm::sys::path::Style Separator) {
1940   auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1941   auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1942   int Cnt = 0;
1943   bool SuggestReplacement = false;
1944 
1945   auto IsSep = [Separator](StringRef Component) {
1946     return Component.size() == 1 &&
1947            llvm::sys::path::is_separator(Component[0], Separator);
1948   };
1949 
1950   // Below is a best-effort to handle ".." in paths. It is admittedly
1951   // not 100% correct in the presence of symlinks.
1952   for (auto &Component : llvm::reverse(Components)) {
1953     if ("." == Component) {
1954     } else if (".." == Component) {
1955       ++Cnt;
1956     } else if (Cnt) {
1957       --Cnt;
1958     } else if (RealPathComponentIter != RealPathComponentEnd) {
1959       if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1960           Component != *RealPathComponentIter) {
1961         // If these non-separator path components differ by more than just case,
1962         // then we may be looking at symlinked paths. Bail on this diagnostic to
1963         // avoid noisy false positives.
1964         SuggestReplacement =
1965             RealPathComponentIter->equals_insensitive(Component);
1966         if (!SuggestReplacement)
1967           break;
1968         Component = *RealPathComponentIter;
1969       }
1970       ++RealPathComponentIter;
1971     }
1972   }
1973   return SuggestReplacement;
1974 }
1975 
checkModuleIsAvailable(const LangOptions & LangOpts,const TargetInfo & TargetInfo,const Module & M,DiagnosticsEngine & Diags)1976 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1977                                           const TargetInfo &TargetInfo,
1978                                           const Module &M,
1979                                           DiagnosticsEngine &Diags) {
1980   Module::Requirement Requirement;
1981   Module::UnresolvedHeaderDirective MissingHeader;
1982   Module *ShadowingModule = nullptr;
1983   if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1984                     ShadowingModule))
1985     return false;
1986 
1987   if (MissingHeader.FileNameLoc.isValid()) {
1988     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1989         << MissingHeader.IsUmbrella << MissingHeader.FileName;
1990   } else if (ShadowingModule) {
1991     Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
1992     Diags.Report(ShadowingModule->DefinitionLoc,
1993                  diag::note_previous_definition);
1994   } else {
1995     // FIXME: Track the location at which the requirement was specified, and
1996     // use it here.
1997     Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
1998         << M.getFullModuleName() << Requirement.RequiredState
1999         << Requirement.FeatureName;
2000   }
2001   return true;
2002 }
2003 
2004 std::pair<ConstSearchDirIterator, const FileEntry *>
getIncludeNextStart(const Token & IncludeNextTok) const2005 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2006   // #include_next is like #include, except that we start searching after
2007   // the current found directory.  If we can't do this, issue a
2008   // diagnostic.
2009   ConstSearchDirIterator Lookup = CurDirLookup;
2010   const FileEntry *LookupFromFile = nullptr;
2011 
2012   if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2013     // If the main file is a header, then it's either for PCH/AST generation,
2014     // or libclang opened it. Either way, handle it as a normal include below
2015     // and do not complain about include_next.
2016   } else if (isInPrimaryFile()) {
2017     Lookup = nullptr;
2018     Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2019   } else if (CurLexerSubmodule) {
2020     // Start looking up in the directory *after* the one in which the current
2021     // file would be found, if any.
2022     assert(CurPPLexer && "#include_next directive in macro?");
2023     if (auto FE = CurPPLexer->getFileEntry())
2024       LookupFromFile = *FE;
2025     Lookup = nullptr;
2026   } else if (!Lookup) {
2027     // The current file was not found by walking the include path. Either it
2028     // is the primary file (handled above), or it was found by absolute path,
2029     // or it was found relative to such a file.
2030     // FIXME: Track enough information so we know which case we're in.
2031     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2032   } else {
2033     // Start looking up in the next directory.
2034     ++Lookup;
2035   }
2036 
2037   return {Lookup, LookupFromFile};
2038 }
2039 
2040 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
2041 /// the file to be included from the lexer, then include it!  This is a common
2042 /// routine with functionality shared between \#include, \#include_next and
2043 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
2044 /// specifies the file to start searching from.
HandleIncludeDirective(SourceLocation HashLoc,Token & IncludeTok,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2045 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2046                                           Token &IncludeTok,
2047                                           ConstSearchDirIterator LookupFrom,
2048                                           const FileEntry *LookupFromFile) {
2049   Token FilenameTok;
2050   if (LexHeaderName(FilenameTok))
2051     return;
2052 
2053   if (FilenameTok.isNot(tok::header_name)) {
2054     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
2055     if (FilenameTok.isNot(tok::eod))
2056       DiscardUntilEndOfDirective();
2057     return;
2058   }
2059 
2060   // Verify that there is nothing after the filename, other than EOD.  Note
2061   // that we allow macros that expand to nothing after the filename, because
2062   // this falls into the category of "#include pp-tokens new-line" specified
2063   // in C99 6.10.2p4.
2064   SourceLocation EndLoc =
2065       CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
2066 
2067   auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2068                                             EndLoc, LookupFrom, LookupFromFile);
2069   switch (Action.Kind) {
2070   case ImportAction::None:
2071   case ImportAction::SkippedModuleImport:
2072     break;
2073   case ImportAction::ModuleBegin:
2074     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2075                          tok::annot_module_begin, Action.ModuleForHeader);
2076     break;
2077   case ImportAction::HeaderUnitImport:
2078     EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2079                          Action.ModuleForHeader);
2080     break;
2081   case ImportAction::ModuleImport:
2082     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2083                          tok::annot_module_include, Action.ModuleForHeader);
2084     break;
2085   case ImportAction::Failure:
2086     assert(TheModuleLoader.HadFatalFailure &&
2087            "This should be an early exit only to a fatal error");
2088     TheModuleLoader.HadFatalFailure = true;
2089     IncludeTok.setKind(tok::eof);
2090     CurLexer->cutOffLexing();
2091     return;
2092   }
2093 }
2094 
LookupHeaderIncludeOrImport(ConstSearchDirIterator * CurDir,StringRef & Filename,SourceLocation FilenameLoc,CharSourceRange FilenameRange,const Token & FilenameTok,bool & IsFrameworkFound,bool IsImportDecl,bool & IsMapped,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile,StringRef & LookupFilename,SmallVectorImpl<char> & RelativePath,SmallVectorImpl<char> & SearchPath,ModuleMap::KnownHeader & SuggestedModule,bool isAngled)2095 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2096     ConstSearchDirIterator *CurDir, StringRef &Filename,
2097     SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2098     const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2099     bool &IsMapped, ConstSearchDirIterator LookupFrom,
2100     const FileEntry *LookupFromFile, StringRef &LookupFilename,
2101     SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2102     ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2103   auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2104     if (LangOpts.AsmPreprocessor)
2105       return;
2106 
2107     Module *RequestingModule = getModuleForLocation(
2108         FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2109     bool RequestingModuleIsModuleInterface =
2110         !SourceMgr.isInMainFile(FilenameLoc);
2111 
2112     HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2113         RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2114         Filename, FE);
2115   };
2116 
2117   OptionalFileEntryRef File = LookupFile(
2118       FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2119       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2120       &SuggestedModule, &IsMapped, &IsFrameworkFound);
2121   if (File) {
2122     DiagnoseHeaderInclusion(*File);
2123     return File;
2124   }
2125 
2126   // Give the clients a chance to silently skip this include.
2127   if (Callbacks && Callbacks->FileNotFound(Filename))
2128     return std::nullopt;
2129 
2130   if (SuppressIncludeNotFoundError)
2131     return std::nullopt;
2132 
2133   // If the file could not be located and it was included via angle
2134   // brackets, we can attempt a lookup as though it were a quoted path to
2135   // provide the user with a possible fixit.
2136   if (isAngled) {
2137     OptionalFileEntryRef File = LookupFile(
2138         FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2139         Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2140         &SuggestedModule, &IsMapped,
2141         /*IsFrameworkFound=*/nullptr);
2142     if (File) {
2143       DiagnoseHeaderInclusion(*File);
2144       Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2145           << Filename << IsImportDecl
2146           << FixItHint::CreateReplacement(FilenameRange,
2147                                           "\"" + Filename.str() + "\"");
2148       return File;
2149     }
2150   }
2151 
2152   // Check for likely typos due to leading or trailing non-isAlphanumeric
2153   // characters
2154   StringRef OriginalFilename = Filename;
2155   if (LangOpts.SpellChecking) {
2156     // A heuristic to correct a typo file name by removing leading and
2157     // trailing non-isAlphanumeric characters.
2158     auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2159       Filename = Filename.drop_until(isAlphanumeric);
2160       while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2161         Filename = Filename.drop_back();
2162       }
2163       return Filename;
2164     };
2165     StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2166     StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2167 
2168     OptionalFileEntryRef File = LookupFile(
2169         FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2170         LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2171         Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2172         /*IsFrameworkFound=*/nullptr);
2173     if (File) {
2174       DiagnoseHeaderInclusion(*File);
2175       auto Hint =
2176           isAngled ? FixItHint::CreateReplacement(
2177                          FilenameRange, "<" + TypoCorrectionName.str() + ">")
2178                    : FixItHint::CreateReplacement(
2179                          FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2180       Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2181           << OriginalFilename << TypoCorrectionName << Hint;
2182       // We found the file, so set the Filename to the name after typo
2183       // correction.
2184       Filename = TypoCorrectionName;
2185       LookupFilename = TypoCorrectionLookupName;
2186       return File;
2187     }
2188   }
2189 
2190   // If the file is still not found, just go with the vanilla diagnostic
2191   assert(!File && "expected missing file");
2192   Diag(FilenameTok, diag::err_pp_file_not_found)
2193       << OriginalFilename << FilenameRange;
2194   if (IsFrameworkFound) {
2195     size_t SlashPos = OriginalFilename.find('/');
2196     assert(SlashPos != StringRef::npos &&
2197            "Include with framework name should have '/' in the filename");
2198     StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2199     FrameworkCacheEntry &CacheEntry =
2200         HeaderInfo.LookupFrameworkCache(FrameworkName);
2201     assert(CacheEntry.Directory && "Found framework should be in cache");
2202     Diag(FilenameTok, diag::note_pp_framework_without_header)
2203         << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2204         << CacheEntry.Directory->getName();
2205   }
2206 
2207   return std::nullopt;
2208 }
2209 
2210 /// Handle either a #include-like directive or an import declaration that names
2211 /// a header file.
2212 ///
2213 /// \param HashLoc The location of the '#' token for an include, or
2214 ///        SourceLocation() for an import declaration.
2215 /// \param IncludeTok The include / include_next / import token.
2216 /// \param FilenameTok The header-name token.
2217 /// \param EndLoc The location at which any imported macros become visible.
2218 /// \param LookupFrom For #include_next, the starting directory for the
2219 ///        directory lookup.
2220 /// \param LookupFromFile For #include_next, the starting file for the directory
2221 ///        lookup.
HandleHeaderIncludeOrImport(SourceLocation HashLoc,Token & IncludeTok,Token & FilenameTok,SourceLocation EndLoc,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2222 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2223     SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2224     SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2225     const FileEntry *LookupFromFile) {
2226   SmallString<128> FilenameBuffer;
2227   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2228   SourceLocation CharEnd = FilenameTok.getEndLoc();
2229 
2230   CharSourceRange FilenameRange
2231     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2232   StringRef OriginalFilename = Filename;
2233   bool isAngled =
2234     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2235 
2236   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2237   // error.
2238   if (Filename.empty())
2239     return {ImportAction::None};
2240 
2241   bool IsImportDecl = HashLoc.isInvalid();
2242   SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2243 
2244   // Complain about attempts to #include files in an audit pragma.
2245   if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2246     Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2247     Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2248 
2249     // Immediately leave the pragma.
2250     PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2251   }
2252 
2253   // Complain about attempts to #include files in an assume-nonnull pragma.
2254   if (PragmaAssumeNonNullLoc.isValid()) {
2255     Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2256     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2257 
2258     // Immediately leave the pragma.
2259     PragmaAssumeNonNullLoc = SourceLocation();
2260   }
2261 
2262   if (HeaderInfo.HasIncludeAliasMap()) {
2263     // Map the filename with the brackets still attached.  If the name doesn't
2264     // map to anything, fall back on the filename we've already gotten the
2265     // spelling for.
2266     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2267     if (!NewName.empty())
2268       Filename = NewName;
2269   }
2270 
2271   // Search include directories.
2272   bool IsMapped = false;
2273   bool IsFrameworkFound = false;
2274   ConstSearchDirIterator CurDir = nullptr;
2275   SmallString<1024> SearchPath;
2276   SmallString<1024> RelativePath;
2277   // We get the raw path only if we have 'Callbacks' to which we later pass
2278   // the path.
2279   ModuleMap::KnownHeader SuggestedModule;
2280   SourceLocation FilenameLoc = FilenameTok.getLocation();
2281   StringRef LookupFilename = Filename;
2282 
2283   // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2284   // is unnecessary on Windows since the filesystem there handles backslashes.
2285   SmallString<128> NormalizedPath;
2286   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2287   if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2288     NormalizedPath = Filename.str();
2289     llvm::sys::path::native(NormalizedPath);
2290     LookupFilename = NormalizedPath;
2291     BackslashStyle = llvm::sys::path::Style::windows;
2292   }
2293 
2294   OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2295       &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2296       IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2297       LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2298 
2299   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2300     if (File && isPCHThroughHeader(&File->getFileEntry()))
2301       SkippingUntilPCHThroughHeader = false;
2302     return {ImportAction::None};
2303   }
2304 
2305   // Should we enter the source file? Set to Skip if either the source file is
2306   // known to have no effect beyond its effect on module visibility -- that is,
2307   // if it's got an include guard that is already defined, set to Import if it
2308   // is a modular header we've already built and should import.
2309 
2310   // For C++20 Modules
2311   // [cpp.include]/7 If the header identified by the header-name denotes an
2312   // importable header, it is implementation-defined whether the #include
2313   // preprocessing directive is instead replaced by an import directive.
2314   // For this implementation, the translation is permitted when we are parsing
2315   // the Global Module Fragment, and not otherwise (the cases where it would be
2316   // valid to replace an include with an import are highly constrained once in
2317   // named module purview; this choice avoids considerable complexity in
2318   // determining valid cases).
2319 
2320   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2321 
2322   if (PPOpts->SingleFileParseMode)
2323     Action = IncludeLimitReached;
2324 
2325   // If we've reached the max allowed include depth, it is usually due to an
2326   // include cycle. Don't enter already processed files again as it can lead to
2327   // reaching the max allowed include depth again.
2328   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2329       alreadyIncluded(*File))
2330     Action = IncludeLimitReached;
2331 
2332   // FIXME: We do not have a good way to disambiguate C++ clang modules from
2333   // C++ standard modules (other than use/non-use of Header Units).
2334 
2335   Module *ModuleToImport = SuggestedModule.getModule();
2336 
2337   bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2338                                !ModuleToImport->isForBuilding(getLangOpts());
2339 
2340   // Maybe a usable Header Unit
2341   bool UsableHeaderUnit = false;
2342   if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2343       ModuleToImport->isHeaderUnit()) {
2344     if (TrackGMFState.inGMF() || IsImportDecl)
2345       UsableHeaderUnit = true;
2346     else if (!IsImportDecl) {
2347       // This is a Header Unit that we do not include-translate
2348       ModuleToImport = nullptr;
2349     }
2350   }
2351   // Maybe a usable clang header module.
2352   bool UsableClangHeaderModule =
2353       (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2354       ModuleToImport && !ModuleToImport->isHeaderUnit();
2355 
2356   // Determine whether we should try to import the module for this #include, if
2357   // there is one. Don't do so if precompiled module support is disabled or we
2358   // are processing this module textually (because we're building the module).
2359   if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2360     // If this include corresponds to a module but that module is
2361     // unavailable, diagnose the situation and bail out.
2362     // FIXME: Remove this; loadModule does the same check (but produces
2363     // slightly worse diagnostics).
2364     if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport,
2365                                getDiagnostics())) {
2366       Diag(FilenameTok.getLocation(),
2367            diag::note_implicit_top_level_module_import_here)
2368           << ModuleToImport->getTopLevelModuleName();
2369       return {ImportAction::None};
2370     }
2371 
2372     // Compute the module access path corresponding to this module.
2373     // FIXME: Should we have a second loadModule() overload to avoid this
2374     // extra lookup step?
2375     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2376     for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2377       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2378                                     FilenameTok.getLocation()));
2379     std::reverse(Path.begin(), Path.end());
2380 
2381     // Warn that we're replacing the include/import with a module import.
2382     if (!IsImportDecl)
2383       diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2384 
2385     // Load the module to import its macros. We'll make the declarations
2386     // visible when the parser gets here.
2387     // FIXME: Pass ModuleToImport in here rather than converting it to a path
2388     // and making the module loader convert it back again.
2389     ModuleLoadResult Imported = TheModuleLoader.loadModule(
2390         IncludeTok.getLocation(), Path, Module::Hidden,
2391         /*IsInclusionDirective=*/true);
2392     assert((Imported == nullptr || Imported == ModuleToImport) &&
2393            "the imported module is different than the suggested one");
2394 
2395     if (Imported) {
2396       Action = Import;
2397     } else if (Imported.isMissingExpected()) {
2398       markClangModuleAsAffecting(
2399           static_cast<Module *>(Imported)->getTopLevelModule());
2400       // We failed to find a submodule that we assumed would exist (because it
2401       // was in the directory of an umbrella header, for instance), but no
2402       // actual module containing it exists (because the umbrella header is
2403       // incomplete).  Treat this as a textual inclusion.
2404       ModuleToImport = nullptr;
2405     } else if (Imported.isConfigMismatch()) {
2406       // On a configuration mismatch, enter the header textually. We still know
2407       // that it's part of the corresponding module.
2408     } else {
2409       // We hit an error processing the import. Bail out.
2410       if (hadModuleLoaderFatalFailure()) {
2411         // With a fatal failure in the module loader, we abort parsing.
2412         Token &Result = IncludeTok;
2413         assert(CurLexer && "#include but no current lexer set!");
2414         Result.startToken();
2415         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2416         CurLexer->cutOffLexing();
2417       }
2418       return {ImportAction::None};
2419     }
2420   }
2421 
2422   // The #included file will be considered to be a system header if either it is
2423   // in a system include directory, or if the #includer is a system include
2424   // header.
2425   SrcMgr::CharacteristicKind FileCharacter =
2426       SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2427   if (File)
2428     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2429 
2430   // If this is a '#import' or an import-declaration, don't re-enter the file.
2431   //
2432   // FIXME: If we have a suggested module for a '#include', and we've already
2433   // visited this file, don't bother entering it again. We know it has no
2434   // further effect.
2435   bool EnterOnce =
2436       IsImportDecl ||
2437       IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2438 
2439   bool IsFirstIncludeOfFile = false;
2440 
2441   // Ask HeaderInfo if we should enter this #include file.  If not, #including
2442   // this file will have no effect.
2443   if (Action == Enter && File &&
2444       !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2445                                          getLangOpts().Modules, ModuleToImport,
2446                                          IsFirstIncludeOfFile)) {
2447     // C++ standard modules:
2448     // If we are not in the GMF, then we textually include only
2449     // clang modules:
2450     // Even if we've already preprocessed this header once and know that we
2451     // don't need to see its contents again, we still need to import it if it's
2452     // modular because we might not have imported it from this submodule before.
2453     //
2454     // FIXME: We don't do this when compiling a PCH because the AST
2455     // serialization layer can't cope with it. This means we get local
2456     // submodule visibility semantics wrong in that case.
2457     if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2458       Action = TrackGMFState.inGMF() ? Import : Skip;
2459     else
2460       Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2461   }
2462 
2463   // Check for circular inclusion of the main file.
2464   // We can't generate a consistent preamble with regard to the conditional
2465   // stack if the main file is included again as due to the preamble bounds
2466   // some directives (e.g. #endif of a header guard) will never be seen.
2467   // Since this will lead to confusing errors, avoid the inclusion.
2468   if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2469       SourceMgr.isMainFile(File->getFileEntry())) {
2470     Diag(FilenameTok.getLocation(),
2471          diag::err_pp_including_mainfile_in_preamble);
2472     return {ImportAction::None};
2473   }
2474 
2475   if (Callbacks && !IsImportDecl) {
2476     // Notify the callback object that we've seen an inclusion directive.
2477     // FIXME: Use a different callback for a pp-import?
2478     Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2479                                   FilenameRange, File, SearchPath, RelativePath,
2480                                   SuggestedModule.getModule(), Action == Import,
2481                                   FileCharacter);
2482     if (Action == Skip && File)
2483       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2484   }
2485 
2486   if (!File)
2487     return {ImportAction::None};
2488 
2489   // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2490   // module corresponding to the named header.
2491   if (IsImportDecl && !ModuleToImport) {
2492     Diag(FilenameTok, diag::err_header_import_not_header_unit)
2493       << OriginalFilename << File->getName();
2494     return {ImportAction::None};
2495   }
2496 
2497   // Issue a diagnostic if the name of the file on disk has a different case
2498   // than the one we're about to open.
2499   const bool CheckIncludePathPortability =
2500       !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2501 
2502   if (CheckIncludePathPortability) {
2503     StringRef Name = LookupFilename;
2504     StringRef NameWithoriginalSlashes = Filename;
2505 #if defined(_WIN32)
2506     // Skip UNC prefix if present. (tryGetRealPathName() always
2507     // returns a path with the prefix skipped.)
2508     bool NameWasUNC = Name.consume_front("\\\\?\\");
2509     NameWithoriginalSlashes.consume_front("\\\\?\\");
2510 #endif
2511     StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2512     SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2513                                           llvm::sys::path::end(Name));
2514 #if defined(_WIN32)
2515     // -Wnonportable-include-path is designed to diagnose includes using
2516     // case even on systems with a case-insensitive file system.
2517     // On Windows, RealPathName always starts with an upper-case drive
2518     // letter for absolute paths, but Name might start with either
2519     // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2520     // ("foo" will always have on-disk case, no matter which case was
2521     // used in the cd command). To not emit this warning solely for
2522     // the drive letter, whose case is dependent on if `cd` is used
2523     // with upper- or lower-case drive letters, always consider the
2524     // given drive letter case as correct for the purpose of this warning.
2525     SmallString<128> FixedDriveRealPath;
2526     if (llvm::sys::path::is_absolute(Name) &&
2527         llvm::sys::path::is_absolute(RealPathName) &&
2528         toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2529         isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2530       assert(Components.size() >= 3 && "should have drive, backslash, name");
2531       assert(Components[0].size() == 2 && "should start with drive");
2532       assert(Components[0][1] == ':' && "should have colon");
2533       FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2534       RealPathName = FixedDriveRealPath;
2535     }
2536 #endif
2537 
2538     if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2539       SmallString<128> Path;
2540       Path.reserve(Name.size()+2);
2541       Path.push_back(isAngled ? '<' : '"');
2542 
2543       const auto IsSep = [BackslashStyle](char c) {
2544         return llvm::sys::path::is_separator(c, BackslashStyle);
2545       };
2546 
2547       for (auto Component : Components) {
2548         // On POSIX, Components will contain a single '/' as first element
2549         // exactly if Name is an absolute path.
2550         // On Windows, it will contain "C:" followed by '\' for absolute paths.
2551         // The drive letter is optional for absolute paths on Windows, but
2552         // clang currently cannot process absolute paths in #include lines that
2553         // don't have a drive.
2554         // If the first entry in Components is a directory separator,
2555         // then the code at the bottom of this loop that keeps the original
2556         // directory separator style copies it. If the second entry is
2557         // a directory separator (the C:\ case), then that separator already
2558         // got copied when the C: was processed and we want to skip that entry.
2559         if (!(Component.size() == 1 && IsSep(Component[0])))
2560           Path.append(Component);
2561         else if (Path.size() != 1)
2562           continue;
2563 
2564         // Append the separator(s) the user used, or the close quote
2565         if (Path.size() > NameWithoriginalSlashes.size()) {
2566           Path.push_back(isAngled ? '>' : '"');
2567           continue;
2568         }
2569         assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2570         do
2571           Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2572         while (Path.size() <= NameWithoriginalSlashes.size() &&
2573                IsSep(NameWithoriginalSlashes[Path.size()-1]));
2574       }
2575 
2576 #if defined(_WIN32)
2577       // Restore UNC prefix if it was there.
2578       if (NameWasUNC)
2579         Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2580 #endif
2581 
2582       // For user files and known standard headers, issue a diagnostic.
2583       // For other system headers, don't. They can be controlled separately.
2584       auto DiagId =
2585           (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2586               ? diag::pp_nonportable_path
2587               : diag::pp_nonportable_system_path;
2588       Diag(FilenameTok, DiagId) << Path <<
2589         FixItHint::CreateReplacement(FilenameRange, Path);
2590     }
2591   }
2592 
2593   switch (Action) {
2594   case Skip:
2595     // If we don't need to enter the file, stop now.
2596     if (ModuleToImport)
2597       return {ImportAction::SkippedModuleImport, ModuleToImport};
2598     return {ImportAction::None};
2599 
2600   case IncludeLimitReached:
2601     // If we reached our include limit and don't want to enter any more files,
2602     // don't go any further.
2603     return {ImportAction::None};
2604 
2605   case Import: {
2606     // If this is a module import, make it visible if needed.
2607     assert(ModuleToImport && "no module to import");
2608 
2609     makeModuleVisible(ModuleToImport, EndLoc);
2610 
2611     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2612         tok::pp___include_macros)
2613       return {ImportAction::None};
2614 
2615     return {ImportAction::ModuleImport, ModuleToImport};
2616   }
2617 
2618   case Enter:
2619     break;
2620   }
2621 
2622   // Check that we don't have infinite #include recursion.
2623   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2624     Diag(FilenameTok, diag::err_pp_include_too_deep);
2625     HasReachedMaxIncludeDepth = true;
2626     return {ImportAction::None};
2627   }
2628 
2629   if (isAngled && isInNamedModule())
2630     Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2631         << getNamedModuleName();
2632 
2633   // Look up the file, create a File ID for it.
2634   SourceLocation IncludePos = FilenameTok.getLocation();
2635   // If the filename string was the result of macro expansions, set the include
2636   // position on the file where it will be included and after the expansions.
2637   if (IncludePos.isMacroID())
2638     IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2639   FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2640   if (!FID.isValid()) {
2641     TheModuleLoader.HadFatalFailure = true;
2642     return ImportAction::Failure;
2643   }
2644 
2645   // If all is good, enter the new file!
2646   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2647                       IsFirstIncludeOfFile))
2648     return {ImportAction::None};
2649 
2650   // Determine if we're switching to building a new submodule, and which one.
2651   // This does not apply for C++20 modules header units.
2652   if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2653     if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2654       // We are building a submodule that belongs to a shadowed module. This
2655       // means we find header files in the shadowed module.
2656       Diag(ModuleToImport->DefinitionLoc,
2657            diag::err_module_build_shadowed_submodule)
2658           << ModuleToImport->getFullModuleName();
2659       Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2660            diag::note_previous_definition);
2661       return {ImportAction::None};
2662     }
2663     // When building a pch, -fmodule-name tells the compiler to textually
2664     // include headers in the specified module. We are not building the
2665     // specified module.
2666     //
2667     // FIXME: This is the wrong way to handle this. We should produce a PCH
2668     // that behaves the same as the header would behave in a compilation using
2669     // that PCH, which means we should enter the submodule. We need to teach
2670     // the AST serialization layer to deal with the resulting AST.
2671     if (getLangOpts().CompilingPCH &&
2672         ModuleToImport->isForBuilding(getLangOpts()))
2673       return {ImportAction::None};
2674 
2675     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2676     CurLexerSubmodule = ModuleToImport;
2677 
2678     // Let the macro handling code know that any future macros are within
2679     // the new submodule.
2680     EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false);
2681 
2682     // Let the parser know that any future declarations are within the new
2683     // submodule.
2684     // FIXME: There's no point doing this if we're handling a #__include_macros
2685     // directive.
2686     return {ImportAction::ModuleBegin, ModuleToImport};
2687   }
2688 
2689   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2690   return {ImportAction::None};
2691 }
2692 
2693 /// HandleIncludeNextDirective - Implements \#include_next.
2694 ///
HandleIncludeNextDirective(SourceLocation HashLoc,Token & IncludeNextTok)2695 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2696                                               Token &IncludeNextTok) {
2697   Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2698 
2699   ConstSearchDirIterator Lookup = nullptr;
2700   const FileEntry *LookupFromFile;
2701   std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2702 
2703   return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2704                                 LookupFromFile);
2705 }
2706 
2707 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
HandleMicrosoftImportDirective(Token & Tok)2708 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2709   // The Microsoft #import directive takes a type library and generates header
2710   // files from it, and includes those.  This is beyond the scope of what clang
2711   // does, so we ignore it and error out.  However, #import can optionally have
2712   // trailing attributes that span multiple lines.  We're going to eat those
2713   // so we can continue processing from there.
2714   Diag(Tok, diag::err_pp_import_directive_ms );
2715 
2716   // Read tokens until we get to the end of the directive.  Note that the
2717   // directive can be split over multiple lines using the backslash character.
2718   DiscardUntilEndOfDirective();
2719 }
2720 
2721 /// HandleImportDirective - Implements \#import.
2722 ///
HandleImportDirective(SourceLocation HashLoc,Token & ImportTok)2723 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2724                                          Token &ImportTok) {
2725   if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2726     if (LangOpts.MSVCCompat)
2727       return HandleMicrosoftImportDirective(ImportTok);
2728     Diag(ImportTok, diag::ext_pp_import_directive);
2729   }
2730   return HandleIncludeDirective(HashLoc, ImportTok);
2731 }
2732 
2733 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2734 /// pseudo directive in the predefines buffer.  This handles it by sucking all
2735 /// tokens through the preprocessor and discarding them (only keeping the side
2736 /// effects on the preprocessor).
HandleIncludeMacrosDirective(SourceLocation HashLoc,Token & IncludeMacrosTok)2737 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2738                                                 Token &IncludeMacrosTok) {
2739   // This directive should only occur in the predefines buffer.  If not, emit an
2740   // error and reject it.
2741   SourceLocation Loc = IncludeMacrosTok.getLocation();
2742   if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2743     Diag(IncludeMacrosTok.getLocation(),
2744          diag::pp_include_macros_out_of_predefines);
2745     DiscardUntilEndOfDirective();
2746     return;
2747   }
2748 
2749   // Treat this as a normal #include for checking purposes.  If this is
2750   // successful, it will push a new lexer onto the include stack.
2751   HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2752 
2753   Token TmpTok;
2754   do {
2755     Lex(TmpTok);
2756     assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2757   } while (TmpTok.isNot(tok::hashhash));
2758 }
2759 
2760 //===----------------------------------------------------------------------===//
2761 // Preprocessor Macro Directive Handling.
2762 //===----------------------------------------------------------------------===//
2763 
2764 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2765 /// definition has just been read.  Lex the rest of the parameters and the
2766 /// closing ), updating MI with what we learn.  Return true if an error occurs
2767 /// parsing the param list.
ReadMacroParameterList(MacroInfo * MI,Token & Tok)2768 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2769   SmallVector<IdentifierInfo*, 32> Parameters;
2770 
2771   while (true) {
2772     LexUnexpandedNonComment(Tok);
2773     switch (Tok.getKind()) {
2774     case tok::r_paren:
2775       // Found the end of the parameter list.
2776       if (Parameters.empty())  // #define FOO()
2777         return false;
2778       // Otherwise we have #define FOO(A,)
2779       Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2780       return true;
2781     case tok::ellipsis:  // #define X(... -> C99 varargs
2782       if (!LangOpts.C99)
2783         Diag(Tok, LangOpts.CPlusPlus11 ?
2784              diag::warn_cxx98_compat_variadic_macro :
2785              diag::ext_variadic_macro);
2786 
2787       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2788       if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2789         Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2790       }
2791 
2792       // Lex the token after the identifier.
2793       LexUnexpandedNonComment(Tok);
2794       if (Tok.isNot(tok::r_paren)) {
2795         Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2796         return true;
2797       }
2798       // Add the __VA_ARGS__ identifier as a parameter.
2799       Parameters.push_back(Ident__VA_ARGS__);
2800       MI->setIsC99Varargs();
2801       MI->setParameterList(Parameters, BP);
2802       return false;
2803     case tok::eod:  // #define X(
2804       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2805       return true;
2806     default:
2807       // Handle keywords and identifiers here to accept things like
2808       // #define Foo(for) for.
2809       IdentifierInfo *II = Tok.getIdentifierInfo();
2810       if (!II) {
2811         // #define X(1
2812         Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2813         return true;
2814       }
2815 
2816       // If this is already used as a parameter, it is used multiple times (e.g.
2817       // #define X(A,A.
2818       if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2819         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2820         return true;
2821       }
2822 
2823       // Add the parameter to the macro info.
2824       Parameters.push_back(II);
2825 
2826       // Lex the token after the identifier.
2827       LexUnexpandedNonComment(Tok);
2828 
2829       switch (Tok.getKind()) {
2830       default:          // #define X(A B
2831         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2832         return true;
2833       case tok::r_paren: // #define X(A)
2834         MI->setParameterList(Parameters, BP);
2835         return false;
2836       case tok::comma:  // #define X(A,
2837         break;
2838       case tok::ellipsis:  // #define X(A... -> GCC extension
2839         // Diagnose extension.
2840         Diag(Tok, diag::ext_named_variadic_macro);
2841 
2842         // Lex the token after the identifier.
2843         LexUnexpandedNonComment(Tok);
2844         if (Tok.isNot(tok::r_paren)) {
2845           Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2846           return true;
2847         }
2848 
2849         MI->setIsGNUVarargs();
2850         MI->setParameterList(Parameters, BP);
2851         return false;
2852       }
2853     }
2854   }
2855 }
2856 
isConfigurationPattern(Token & MacroName,MacroInfo * MI,const LangOptions & LOptions)2857 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2858                                    const LangOptions &LOptions) {
2859   if (MI->getNumTokens() == 1) {
2860     const Token &Value = MI->getReplacementToken(0);
2861 
2862     // Macro that is identity, like '#define inline inline' is a valid pattern.
2863     if (MacroName.getKind() == Value.getKind())
2864       return true;
2865 
2866     // Macro that maps a keyword to the same keyword decorated with leading/
2867     // trailing underscores is a valid pattern:
2868     //    #define inline __inline
2869     //    #define inline __inline__
2870     //    #define inline _inline (in MS compatibility mode)
2871     StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2872     if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2873       if (!II->isKeyword(LOptions))
2874         return false;
2875       StringRef ValueText = II->getName();
2876       StringRef TrimmedValue = ValueText;
2877       if (!ValueText.starts_with("__")) {
2878         if (ValueText.starts_with("_"))
2879           TrimmedValue = TrimmedValue.drop_front(1);
2880         else
2881           return false;
2882       } else {
2883         TrimmedValue = TrimmedValue.drop_front(2);
2884         if (TrimmedValue.ends_with("__"))
2885           TrimmedValue = TrimmedValue.drop_back(2);
2886       }
2887       return TrimmedValue == MacroText;
2888     } else {
2889       return false;
2890     }
2891   }
2892 
2893   // #define inline
2894   return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2895                            tok::kw_const) &&
2896          MI->getNumTokens() == 0;
2897 }
2898 
2899 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2900 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2901 // doing so performs certain validity checks including (but not limited to):
2902 //   - # (stringization) is followed by a macro parameter
2903 //
2904 //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2905 //  a pointer to a MacroInfo object.
2906 
ReadOptionalMacroParameterListAndBody(const Token & MacroNameTok,const bool ImmediatelyAfterHeaderGuard)2907 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2908     const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2909 
2910   Token LastTok = MacroNameTok;
2911   // Create the new macro.
2912   MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2913 
2914   Token Tok;
2915   LexUnexpandedToken(Tok);
2916 
2917   // Ensure we consume the rest of the macro body if errors occur.
2918   auto _ = llvm::make_scope_exit([&]() {
2919     // The flag indicates if we are still waiting for 'eod'.
2920     if (CurLexer->ParsingPreprocessorDirective)
2921       DiscardUntilEndOfDirective();
2922   });
2923 
2924   // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2925   // within their appropriate context.
2926   VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2927 
2928   // If this is a function-like macro definition, parse the argument list,
2929   // marking each of the identifiers as being used as macro arguments.  Also,
2930   // check other constraints on the first token of the macro body.
2931   if (Tok.is(tok::eod)) {
2932     if (ImmediatelyAfterHeaderGuard) {
2933       // Save this macro information since it may part of a header guard.
2934       CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2935                                         MacroNameTok.getLocation());
2936     }
2937     // If there is no body to this macro, we have no special handling here.
2938   } else if (Tok.hasLeadingSpace()) {
2939     // This is a normal token with leading space.  Clear the leading space
2940     // marker on the first token to get proper expansion.
2941     Tok.clearFlag(Token::LeadingSpace);
2942   } else if (Tok.is(tok::l_paren)) {
2943     // This is a function-like macro definition.  Read the argument list.
2944     MI->setIsFunctionLike();
2945     if (ReadMacroParameterList(MI, LastTok))
2946       return nullptr;
2947 
2948     // If this is a definition of an ISO C/C++ variadic function-like macro (not
2949     // using the GNU named varargs extension) inform our variadic scope guard
2950     // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2951     // allowed only within the definition of a variadic macro.
2952 
2953     if (MI->isC99Varargs()) {
2954       VariadicMacroScopeGuard.enterScope();
2955     }
2956 
2957     // Read the first token after the arg list for down below.
2958     LexUnexpandedToken(Tok);
2959   } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2960     // C99 requires whitespace between the macro definition and the body.  Emit
2961     // a diagnostic for something like "#define X+".
2962     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2963   } else {
2964     // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2965     // first character of a replacement list is not a character required by
2966     // subclause 5.2.1, then there shall be white-space separation between the
2967     // identifier and the replacement list.".  5.2.1 lists this set:
2968     //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2969     // is irrelevant here.
2970     bool isInvalid = false;
2971     if (Tok.is(tok::at)) // @ is not in the list above.
2972       isInvalid = true;
2973     else if (Tok.is(tok::unknown)) {
2974       // If we have an unknown token, it is something strange like "`".  Since
2975       // all of valid characters would have lexed into a single character
2976       // token of some sort, we know this is not a valid case.
2977       isInvalid = true;
2978     }
2979     if (isInvalid)
2980       Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2981     else
2982       Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2983   }
2984 
2985   if (!Tok.is(tok::eod))
2986     LastTok = Tok;
2987 
2988   SmallVector<Token, 16> Tokens;
2989 
2990   // Read the rest of the macro body.
2991   if (MI->isObjectLike()) {
2992     // Object-like macros are very simple, just read their body.
2993     while (Tok.isNot(tok::eod)) {
2994       LastTok = Tok;
2995       Tokens.push_back(Tok);
2996       // Get the next token of the macro.
2997       LexUnexpandedToken(Tok);
2998     }
2999   } else {
3000     // Otherwise, read the body of a function-like macro.  While we are at it,
3001     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3002     // parameters in function-like macro expansions.
3003 
3004     VAOptDefinitionContext VAOCtx(*this);
3005 
3006     while (Tok.isNot(tok::eod)) {
3007       LastTok = Tok;
3008 
3009       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
3010         Tokens.push_back(Tok);
3011 
3012         if (VAOCtx.isVAOptToken(Tok)) {
3013           // If we're already within a VAOPT, emit an error.
3014           if (VAOCtx.isInVAOpt()) {
3015             Diag(Tok, diag::err_pp_vaopt_nested_use);
3016             return nullptr;
3017           }
3018           // Ensure VAOPT is followed by a '(' .
3019           LexUnexpandedToken(Tok);
3020           if (Tok.isNot(tok::l_paren)) {
3021             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
3022             return nullptr;
3023           }
3024           Tokens.push_back(Tok);
3025           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
3026           LexUnexpandedToken(Tok);
3027           if (Tok.is(tok::hashhash)) {
3028             Diag(Tok, diag::err_vaopt_paste_at_start);
3029             return nullptr;
3030           }
3031           continue;
3032         } else if (VAOCtx.isInVAOpt()) {
3033           if (Tok.is(tok::r_paren)) {
3034             if (VAOCtx.sawClosingParen()) {
3035               assert(Tokens.size() >= 3 &&
3036                      "Must have seen at least __VA_OPT__( "
3037                      "and a subsequent tok::r_paren");
3038               if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
3039                 Diag(Tok, diag::err_vaopt_paste_at_end);
3040                 return nullptr;
3041               }
3042             }
3043           } else if (Tok.is(tok::l_paren)) {
3044             VAOCtx.sawOpeningParen(Tok.getLocation());
3045           }
3046         }
3047         // Get the next token of the macro.
3048         LexUnexpandedToken(Tok);
3049         continue;
3050       }
3051 
3052       // If we're in -traditional mode, then we should ignore stringification
3053       // and token pasting. Mark the tokens as unknown so as not to confuse
3054       // things.
3055       if (getLangOpts().TraditionalCPP) {
3056         Tok.setKind(tok::unknown);
3057         Tokens.push_back(Tok);
3058 
3059         // Get the next token of the macro.
3060         LexUnexpandedToken(Tok);
3061         continue;
3062       }
3063 
3064       if (Tok.is(tok::hashhash)) {
3065         // If we see token pasting, check if it looks like the gcc comma
3066         // pasting extension.  We'll use this information to suppress
3067         // diagnostics later on.
3068 
3069         // Get the next token of the macro.
3070         LexUnexpandedToken(Tok);
3071 
3072         if (Tok.is(tok::eod)) {
3073           Tokens.push_back(LastTok);
3074           break;
3075         }
3076 
3077         if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3078             Tokens[Tokens.size() - 1].is(tok::comma))
3079           MI->setHasCommaPasting();
3080 
3081         // Things look ok, add the '##' token to the macro.
3082         Tokens.push_back(LastTok);
3083         continue;
3084       }
3085 
3086       // Our Token is a stringization operator.
3087       // Get the next token of the macro.
3088       LexUnexpandedToken(Tok);
3089 
3090       // Check for a valid macro arg identifier or __VA_OPT__.
3091       if (!VAOCtx.isVAOptToken(Tok) &&
3092           (Tok.getIdentifierInfo() == nullptr ||
3093            MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3094 
3095         // If this is assembler-with-cpp mode, we accept random gibberish after
3096         // the '#' because '#' is often a comment character.  However, change
3097         // the kind of the token to tok::unknown so that the preprocessor isn't
3098         // confused.
3099         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3100           LastTok.setKind(tok::unknown);
3101           Tokens.push_back(LastTok);
3102           continue;
3103         } else {
3104           Diag(Tok, diag::err_pp_stringize_not_parameter)
3105             << LastTok.is(tok::hashat);
3106           return nullptr;
3107         }
3108       }
3109 
3110       // Things look ok, add the '#' and param name tokens to the macro.
3111       Tokens.push_back(LastTok);
3112 
3113       // If the token following '#' is VAOPT, let the next iteration handle it
3114       // and check it for correctness, otherwise add the token and prime the
3115       // loop with the next one.
3116       if (!VAOCtx.isVAOptToken(Tok)) {
3117         Tokens.push_back(Tok);
3118         LastTok = Tok;
3119 
3120         // Get the next token of the macro.
3121         LexUnexpandedToken(Tok);
3122       }
3123     }
3124     if (VAOCtx.isInVAOpt()) {
3125       assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3126       Diag(Tok, diag::err_pp_expected_after)
3127         << LastTok.getKind() << tok::r_paren;
3128       Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3129       return nullptr;
3130     }
3131   }
3132   MI->setDefinitionEndLoc(LastTok.getLocation());
3133 
3134   MI->setTokens(Tokens, BP);
3135   return MI;
3136 }
3137 
isObjCProtectedMacro(const IdentifierInfo * II)3138 static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3139   return II->isStr("__strong") || II->isStr("__weak") ||
3140          II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3141 }
3142 
3143 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3144 /// line then lets the caller lex the next real token.
HandleDefineDirective(Token & DefineTok,const bool ImmediatelyAfterHeaderGuard)3145 void Preprocessor::HandleDefineDirective(
3146     Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3147   ++NumDefined;
3148 
3149   Token MacroNameTok;
3150   bool MacroShadowsKeyword;
3151   ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3152 
3153   // Error reading macro name?  If so, diagnostic already issued.
3154   if (MacroNameTok.is(tok::eod))
3155     return;
3156 
3157   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3158   // Issue a final pragma warning if we're defining a macro that was has been
3159   // undefined and is being redefined.
3160   if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3161     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3162 
3163   // If we are supposed to keep comments in #defines, reenable comment saving
3164   // mode.
3165   if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3166 
3167   MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3168       MacroNameTok, ImmediatelyAfterHeaderGuard);
3169 
3170   if (!MI) return;
3171 
3172   if (MacroShadowsKeyword &&
3173       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3174     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3175   }
3176   // Check that there is no paste (##) operator at the beginning or end of the
3177   // replacement list.
3178   unsigned NumTokens = MI->getNumTokens();
3179   if (NumTokens != 0) {
3180     if (MI->getReplacementToken(0).is(tok::hashhash)) {
3181       Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3182       return;
3183     }
3184     if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3185       Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3186       return;
3187     }
3188   }
3189 
3190   // When skipping just warn about macros that do not match.
3191   if (SkippingUntilPCHThroughHeader) {
3192     const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3193     if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3194                              /*Syntactic=*/LangOpts.MicrosoftExt))
3195       Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3196           << MacroNameTok.getIdentifierInfo();
3197     // Issue the diagnostic but allow the change if msvc extensions are enabled
3198     if (!LangOpts.MicrosoftExt)
3199       return;
3200   }
3201 
3202   // Finally, if this identifier already had a macro defined for it, verify that
3203   // the macro bodies are identical, and issue diagnostics if they are not.
3204   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3205     // Final macros are hard-mode: they always warn. Even if the bodies are
3206     // identical. Even if they are in system headers. Even if they are things we
3207     // would silently allow in the past.
3208     if (MacroNameTok.getIdentifierInfo()->isFinal())
3209       emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3210 
3211     // In Objective-C, ignore attempts to directly redefine the builtin
3212     // definitions of the ownership qualifiers.  It's still possible to
3213     // #undef them.
3214     if (getLangOpts().ObjC &&
3215         SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3216             getPredefinesFileID() &&
3217         isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3218       // Warn if it changes the tokens.
3219       if ((!getDiagnostics().getSuppressSystemWarnings() ||
3220            !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3221           !MI->isIdenticalTo(*OtherMI, *this,
3222                              /*Syntactic=*/LangOpts.MicrosoftExt)) {
3223         Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3224       }
3225       assert(!OtherMI->isWarnIfUnused());
3226       return;
3227     }
3228 
3229     // It is very common for system headers to have tons of macro redefinitions
3230     // and for warnings to be disabled in system headers.  If this is the case,
3231     // then don't bother calling MacroInfo::isIdenticalTo.
3232     if (!getDiagnostics().getSuppressSystemWarnings() ||
3233         !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3234 
3235       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3236         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3237 
3238       // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3239       // C++ [cpp.predefined]p4, but allow it as an extension.
3240       if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3241         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3242       // Macros must be identical.  This means all tokens and whitespace
3243       // separation must be the same.  C99 6.10.3p2.
3244       else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3245                !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3246         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3247           << MacroNameTok.getIdentifierInfo();
3248         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3249       }
3250     }
3251     if (OtherMI->isWarnIfUnused())
3252       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3253   }
3254 
3255   DefMacroDirective *MD =
3256       appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3257 
3258   assert(!MI->isUsed());
3259   // If we need warning for not using the macro, add its location in the
3260   // warn-because-unused-macro set. If it gets used it will be removed from set.
3261   if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3262       !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3263       !MacroExpansionInDirectivesOverride &&
3264       getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3265           getPredefinesFileID()) {
3266     MI->setIsWarnIfUnused(true);
3267     WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3268   }
3269 
3270   // If the callbacks want to know, tell them about the macro definition.
3271   if (Callbacks)
3272     Callbacks->MacroDefined(MacroNameTok, MD);
3273 
3274   // If we're in MS compatibility mode and the macro being defined is the
3275   // assert macro, implicitly add a macro definition for static_assert to work
3276   // around their broken assert.h header file in C. Only do so if there isn't
3277   // already a static_assert macro defined.
3278   if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3279       MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3280       !isMacroDefined("static_assert")) {
3281     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3282 
3283     Token Tok;
3284     Tok.startToken();
3285     Tok.setKind(tok::kw__Static_assert);
3286     Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3287     MI->setTokens({Tok}, BP);
3288     (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3289   }
3290 }
3291 
3292 /// HandleUndefDirective - Implements \#undef.
3293 ///
HandleUndefDirective()3294 void Preprocessor::HandleUndefDirective() {
3295   ++NumUndefined;
3296 
3297   Token MacroNameTok;
3298   ReadMacroName(MacroNameTok, MU_Undef);
3299 
3300   // Error reading macro name?  If so, diagnostic already issued.
3301   if (MacroNameTok.is(tok::eod))
3302     return;
3303 
3304   // Check to see if this is the last token on the #undef line.
3305   CheckEndOfDirective("undef");
3306 
3307   // Okay, we have a valid identifier to undef.
3308   auto *II = MacroNameTok.getIdentifierInfo();
3309   auto MD = getMacroDefinition(II);
3310   UndefMacroDirective *Undef = nullptr;
3311 
3312   if (II->isFinal())
3313     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3314 
3315   // If the macro is not defined, this is a noop undef.
3316   if (const MacroInfo *MI = MD.getMacroInfo()) {
3317     if (!MI->isUsed() && MI->isWarnIfUnused())
3318       Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3319 
3320     // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3321     // C++ [cpp.predefined]p4, but allow it as an extension.
3322     if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3323       Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3324 
3325     if (MI->isWarnIfUnused())
3326       WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3327 
3328     Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3329   }
3330 
3331   // If the callbacks want to know, tell them about the macro #undef.
3332   // Note: no matter if the macro was defined or not.
3333   if (Callbacks)
3334     Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3335 
3336   if (Undef)
3337     appendMacroDirective(II, Undef);
3338 }
3339 
3340 //===----------------------------------------------------------------------===//
3341 // Preprocessor Conditional Directive Handling.
3342 //===----------------------------------------------------------------------===//
3343 
3344 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3345 /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3346 /// true if any tokens have been returned or pp-directives activated before this
3347 /// \#ifndef has been lexed.
3348 ///
HandleIfdefDirective(Token & Result,const Token & HashToken,bool isIfndef,bool ReadAnyTokensBeforeDirective)3349 void Preprocessor::HandleIfdefDirective(Token &Result,
3350                                         const Token &HashToken,
3351                                         bool isIfndef,
3352                                         bool ReadAnyTokensBeforeDirective) {
3353   ++NumIf;
3354   Token DirectiveTok = Result;
3355 
3356   Token MacroNameTok;
3357   ReadMacroName(MacroNameTok);
3358 
3359   // Error reading macro name?  If so, diagnostic already issued.
3360   if (MacroNameTok.is(tok::eod)) {
3361     // Skip code until we get to #endif.  This helps with recovery by not
3362     // emitting an error when the #endif is reached.
3363     SkipExcludedConditionalBlock(HashToken.getLocation(),
3364                                  DirectiveTok.getLocation(),
3365                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
3366     return;
3367   }
3368 
3369   emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3370 
3371   // Check to see if this is the last token on the #if[n]def line.
3372   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3373 
3374   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3375   auto MD = getMacroDefinition(MII);
3376   MacroInfo *MI = MD.getMacroInfo();
3377 
3378   if (CurPPLexer->getConditionalStackDepth() == 0) {
3379     // If the start of a top-level #ifdef and if the macro is not defined,
3380     // inform MIOpt that this might be the start of a proper include guard.
3381     // Otherwise it is some other form of unknown conditional which we can't
3382     // handle.
3383     if (!ReadAnyTokensBeforeDirective && !MI) {
3384       assert(isIfndef && "#ifdef shouldn't reach here");
3385       CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3386     } else
3387       CurPPLexer->MIOpt.EnterTopLevelConditional();
3388   }
3389 
3390   // If there is a macro, process it.
3391   if (MI)  // Mark it used.
3392     markMacroAsUsed(MI);
3393 
3394   if (Callbacks) {
3395     if (isIfndef)
3396       Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3397     else
3398       Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3399   }
3400 
3401   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3402     getSourceManager().isInMainFile(DirectiveTok.getLocation());
3403 
3404   // Should we include the stuff contained by this directive?
3405   if (PPOpts->SingleFileParseMode && !MI) {
3406     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3407     // the directive blocks.
3408     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3409                                      /*wasskip*/false, /*foundnonskip*/false,
3410                                      /*foundelse*/false);
3411   } else if (!MI == isIfndef || RetainExcludedCB) {
3412     // Yes, remember that we are inside a conditional, then lex the next token.
3413     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3414                                      /*wasskip*/false, /*foundnonskip*/true,
3415                                      /*foundelse*/false);
3416   } else {
3417     // No, skip the contents of this block.
3418     SkipExcludedConditionalBlock(HashToken.getLocation(),
3419                                  DirectiveTok.getLocation(),
3420                                  /*Foundnonskip*/ false,
3421                                  /*FoundElse*/ false);
3422   }
3423 }
3424 
3425 /// HandleIfDirective - Implements the \#if directive.
3426 ///
HandleIfDirective(Token & IfToken,const Token & HashToken,bool ReadAnyTokensBeforeDirective)3427 void Preprocessor::HandleIfDirective(Token &IfToken,
3428                                      const Token &HashToken,
3429                                      bool ReadAnyTokensBeforeDirective) {
3430   ++NumIf;
3431 
3432   // Parse and evaluate the conditional expression.
3433   IdentifierInfo *IfNDefMacro = nullptr;
3434   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3435   const bool ConditionalTrue = DER.Conditional;
3436   // Lexer might become invalid if we hit code completion point while evaluating
3437   // expression.
3438   if (!CurPPLexer)
3439     return;
3440 
3441   // If this condition is equivalent to #ifndef X, and if this is the first
3442   // directive seen, handle it for the multiple-include optimization.
3443   if (CurPPLexer->getConditionalStackDepth() == 0) {
3444     if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3445       // FIXME: Pass in the location of the macro name, not the 'if' token.
3446       CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3447     else
3448       CurPPLexer->MIOpt.EnterTopLevelConditional();
3449   }
3450 
3451   if (Callbacks)
3452     Callbacks->If(
3453         IfToken.getLocation(), DER.ExprRange,
3454         (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3455 
3456   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3457     getSourceManager().isInMainFile(IfToken.getLocation());
3458 
3459   // Should we include the stuff contained by this directive?
3460   if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3461     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3462     // the directive blocks.
3463     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3464                                      /*foundnonskip*/false, /*foundelse*/false);
3465   } else if (ConditionalTrue || RetainExcludedCB) {
3466     // Yes, remember that we are inside a conditional, then lex the next token.
3467     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3468                                    /*foundnonskip*/true, /*foundelse*/false);
3469   } else {
3470     // No, skip the contents of this block.
3471     SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3472                                  /*Foundnonskip*/ false,
3473                                  /*FoundElse*/ false);
3474   }
3475 }
3476 
3477 /// HandleEndifDirective - Implements the \#endif directive.
3478 ///
HandleEndifDirective(Token & EndifToken)3479 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3480   ++NumEndif;
3481 
3482   // Check that this is the whole directive.
3483   CheckEndOfDirective("endif");
3484 
3485   PPConditionalInfo CondInfo;
3486   if (CurPPLexer->popConditionalLevel(CondInfo)) {
3487     // No conditionals on the stack: this is an #endif without an #if.
3488     Diag(EndifToken, diag::err_pp_endif_without_if);
3489     return;
3490   }
3491 
3492   // If this the end of a top-level #endif, inform MIOpt.
3493   if (CurPPLexer->getConditionalStackDepth() == 0)
3494     CurPPLexer->MIOpt.ExitTopLevelConditional();
3495 
3496   assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3497          "This code should only be reachable in the non-skipping case!");
3498 
3499   if (Callbacks)
3500     Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3501 }
3502 
3503 /// HandleElseDirective - Implements the \#else directive.
3504 ///
HandleElseDirective(Token & Result,const Token & HashToken)3505 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3506   ++NumElse;
3507 
3508   // #else directive in a non-skipping conditional... start skipping.
3509   CheckEndOfDirective("else");
3510 
3511   PPConditionalInfo CI;
3512   if (CurPPLexer->popConditionalLevel(CI)) {
3513     Diag(Result, diag::pp_err_else_without_if);
3514     return;
3515   }
3516 
3517   // If this is a top-level #else, inform the MIOpt.
3518   if (CurPPLexer->getConditionalStackDepth() == 0)
3519     CurPPLexer->MIOpt.EnterTopLevelConditional();
3520 
3521   // If this is a #else with a #else before it, report the error.
3522   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3523 
3524   if (Callbacks)
3525     Callbacks->Else(Result.getLocation(), CI.IfLoc);
3526 
3527   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3528     getSourceManager().isInMainFile(Result.getLocation());
3529 
3530   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3531     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3532     // the directive blocks.
3533     CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3534                                      /*foundnonskip*/false, /*foundelse*/true);
3535     return;
3536   }
3537 
3538   // Finally, skip the rest of the contents of this block.
3539   SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3540                                /*Foundnonskip*/ true,
3541                                /*FoundElse*/ true, Result.getLocation());
3542 }
3543 
3544 /// Implements the \#elif, \#elifdef, and \#elifndef directives.
HandleElifFamilyDirective(Token & ElifToken,const Token & HashToken,tok::PPKeywordKind Kind)3545 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3546                                              const Token &HashToken,
3547                                              tok::PPKeywordKind Kind) {
3548   PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3549                        : Kind == tok::pp_elifdef ? PED_Elifdef
3550                                                  : PED_Elifndef;
3551   ++NumElse;
3552 
3553   // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3554   switch (DirKind) {
3555   case PED_Elifdef:
3556   case PED_Elifndef:
3557     unsigned DiagID;
3558     if (LangOpts.CPlusPlus)
3559       DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3560                                     : diag::ext_cxx23_pp_directive;
3561     else
3562       DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3563                             : diag::ext_c23_pp_directive;
3564     Diag(ElifToken, DiagID) << DirKind;
3565     break;
3566   default:
3567     break;
3568   }
3569 
3570   // #elif directive in a non-skipping conditional... start skipping.
3571   // We don't care what the condition is, because we will always skip it (since
3572   // the block immediately before it was included).
3573   SourceRange ConditionRange = DiscardUntilEndOfDirective();
3574 
3575   PPConditionalInfo CI;
3576   if (CurPPLexer->popConditionalLevel(CI)) {
3577     Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3578     return;
3579   }
3580 
3581   // If this is a top-level #elif, inform the MIOpt.
3582   if (CurPPLexer->getConditionalStackDepth() == 0)
3583     CurPPLexer->MIOpt.EnterTopLevelConditional();
3584 
3585   // If this is a #elif with a #else before it, report the error.
3586   if (CI.FoundElse)
3587     Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3588 
3589   if (Callbacks) {
3590     switch (Kind) {
3591     case tok::pp_elif:
3592       Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3593                       PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3594       break;
3595     case tok::pp_elifdef:
3596       Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3597       break;
3598     case tok::pp_elifndef:
3599       Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3600       break;
3601     default:
3602       assert(false && "unexpected directive kind");
3603       break;
3604     }
3605   }
3606 
3607   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3608     getSourceManager().isInMainFile(ElifToken.getLocation());
3609 
3610   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3611     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3612     // the directive blocks.
3613     CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3614                                      /*foundnonskip*/false, /*foundelse*/false);
3615     return;
3616   }
3617 
3618   // Finally, skip the rest of the contents of this block.
3619   SkipExcludedConditionalBlock(
3620       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3621       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3622 }
3623 
3624 std::optional<LexEmbedParametersResult>
LexEmbedParameters(Token & CurTok,bool ForHasEmbed)3625 Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3626   LexEmbedParametersResult Result{};
3627   SmallVector<Token, 2> ParameterTokens;
3628   tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3629 
3630   auto DiagMismatchedBracesAndSkipToEOD =
3631       [&](tok::TokenKind Expected,
3632           std::pair<tok::TokenKind, SourceLocation> Matches) {
3633         Diag(CurTok, diag::err_expected) << Expected;
3634         Diag(Matches.second, diag::note_matching) << Matches.first;
3635         if (CurTok.isNot(tok::eod))
3636           DiscardUntilEndOfDirective(CurTok);
3637       };
3638 
3639   auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3640     if (CurTok.isNot(Kind)) {
3641       Diag(CurTok, diag::err_expected) << Kind;
3642       if (CurTok.isNot(tok::eod))
3643         DiscardUntilEndOfDirective(CurTok);
3644       return false;
3645     }
3646     return true;
3647   };
3648 
3649   // C23 6.10:
3650   // pp-parameter-name:
3651   //   pp-standard-parameter
3652   //   pp-prefixed-parameter
3653   //
3654   // pp-standard-parameter:
3655   //   identifier
3656   //
3657   // pp-prefixed-parameter:
3658   //   identifier :: identifier
3659   auto LexPPParameterName = [&]() -> std::optional<std::string> {
3660     // We expect the current token to be an identifier; if it's not, things
3661     // have gone wrong.
3662     if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3663       return std::nullopt;
3664 
3665     const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3666 
3667     // Lex another token; it is either a :: or we're done with the parameter
3668     // name.
3669     LexNonComment(CurTok);
3670     if (CurTok.is(tok::coloncolon)) {
3671       // We found a ::, so lex another identifier token.
3672       LexNonComment(CurTok);
3673       if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3674         return std::nullopt;
3675 
3676       const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3677 
3678       // Lex another token so we're past the name.
3679       LexNonComment(CurTok);
3680       return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3681     }
3682     return Prefix->getName().str();
3683   };
3684 
3685   // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3686   // this document as an identifier pp_param and an identifier of the form
3687   // __pp_param__ shall behave the same when used as a preprocessor parameter,
3688   // except for the spelling.
3689   auto NormalizeParameterName = [](StringRef Name) {
3690     if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
3691       return Name.substr(2, Name.size() - 4);
3692     return Name;
3693   };
3694 
3695   auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3696     // we have a limit parameter and its internals are processed using
3697     // evaluation rules from #if.
3698     if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3699       return std::nullopt;
3700 
3701     // We do not consume the ( because EvaluateDirectiveExpression will lex
3702     // the next token for us.
3703     IdentifierInfo *ParameterIfNDef = nullptr;
3704     bool EvaluatedDefined;
3705     DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3706         ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
3707 
3708     if (!LimitEvalResult.Value) {
3709       // If there was an error evaluating the directive expression, we expect
3710       // to be at the end of directive token.
3711       assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3712       return std::nullopt;
3713     }
3714 
3715     if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3716       return std::nullopt;
3717 
3718     // Eat the ).
3719     LexNonComment(CurTok);
3720 
3721     // C23 6.10.3.2p2: The token defined shall not appear within the constant
3722     // expression.
3723     if (EvaluatedDefined) {
3724       Diag(CurTok, diag::err_defined_in_pp_embed);
3725       return std::nullopt;
3726     }
3727 
3728     if (LimitEvalResult.Value) {
3729       const llvm::APSInt &Result = *LimitEvalResult.Value;
3730       if (Result.isNegative()) {
3731         Diag(CurTok, diag::err_requires_positive_value)
3732             << toString(Result, 10) << /*positive*/ 0;
3733         return std::nullopt;
3734       }
3735       return Result.getLimitedValue();
3736     }
3737     return std::nullopt;
3738   };
3739 
3740   auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3741     switch (Kind) {
3742     case tok::l_paren:
3743       return tok::r_paren;
3744     case tok::l_brace:
3745       return tok::r_brace;
3746     case tok::l_square:
3747       return tok::r_square;
3748     default:
3749       llvm_unreachable("should not get here");
3750     }
3751   };
3752 
3753   auto LexParenthesizedBalancedTokenSoup =
3754       [&](llvm::SmallVectorImpl<Token> &Tokens) {
3755         std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3756 
3757         // We expect the current token to be a left paren.
3758         if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3759           return false;
3760         LexNonComment(CurTok); // Eat the (
3761 
3762         bool WaitingForInnerCloseParen = false;
3763         while (CurTok.isNot(tok::eod) &&
3764                (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {
3765           switch (CurTok.getKind()) {
3766           default: // Shutting up diagnostics about not fully-covered switch.
3767             break;
3768           case tok::l_paren:
3769             WaitingForInnerCloseParen = true;
3770             [[fallthrough]];
3771           case tok::l_brace:
3772           case tok::l_square:
3773             BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
3774             break;
3775           case tok::r_paren:
3776             WaitingForInnerCloseParen = false;
3777             [[fallthrough]];
3778           case tok::r_brace:
3779           case tok::r_square: {
3780             tok::TokenKind Matching =
3781                 GetMatchingCloseBracket(BracketStack.back().first);
3782             if (BracketStack.empty() || CurTok.getKind() != Matching) {
3783               DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3784               return false;
3785             }
3786             BracketStack.pop_back();
3787           } break;
3788           }
3789           Tokens.push_back(CurTok);
3790           LexNonComment(CurTok);
3791         }
3792 
3793         // When we're done, we want to eat the closing paren.
3794         if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3795           return false;
3796 
3797         LexNonComment(CurTok); // Eat the )
3798         return true;
3799       };
3800 
3801   LexNonComment(CurTok); // Prime the pump.
3802   while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
3803     SourceLocation ParamStartLoc = CurTok.getLocation();
3804     std::optional<std::string> ParamName = LexPPParameterName();
3805     if (!ParamName)
3806       return std::nullopt;
3807     StringRef Parameter = NormalizeParameterName(*ParamName);
3808 
3809     // Lex the parameters (dependent on the parameter type we want!).
3810     //
3811     // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3812     // one time in the embed parameter sequence.
3813     if (Parameter == "limit") {
3814       if (Result.MaybeLimitParam)
3815         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3816 
3817       std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3818       if (!Limit)
3819         return std::nullopt;
3820       Result.MaybeLimitParam =
3821           PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3822     } else if (Parameter == "clang::offset") {
3823       if (Result.MaybeOffsetParam)
3824         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3825 
3826       std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3827       if (!Offset)
3828         return std::nullopt;
3829       Result.MaybeOffsetParam = PPEmbedParameterOffset{
3830           *Offset, {ParamStartLoc, CurTok.getLocation()}};
3831     } else if (Parameter == "prefix") {
3832       if (Result.MaybePrefixParam)
3833         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3834 
3835       SmallVector<Token, 4> Soup;
3836       if (!LexParenthesizedBalancedTokenSoup(Soup))
3837         return std::nullopt;
3838       Result.MaybePrefixParam = PPEmbedParameterPrefix{
3839           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3840     } else if (Parameter == "suffix") {
3841       if (Result.MaybeSuffixParam)
3842         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3843 
3844       SmallVector<Token, 4> Soup;
3845       if (!LexParenthesizedBalancedTokenSoup(Soup))
3846         return std::nullopt;
3847       Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3848           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3849     } else if (Parameter == "if_empty") {
3850       if (Result.MaybeIfEmptyParam)
3851         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3852 
3853       SmallVector<Token, 4> Soup;
3854       if (!LexParenthesizedBalancedTokenSoup(Soup))
3855         return std::nullopt;
3856       Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3857           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3858     } else {
3859       ++Result.UnrecognizedParams;
3860 
3861       // If there's a left paren, we need to parse a balanced token sequence
3862       // and just eat those tokens.
3863       if (CurTok.is(tok::l_paren)) {
3864         SmallVector<Token, 4> Soup;
3865         if (!LexParenthesizedBalancedTokenSoup(Soup))
3866           return std::nullopt;
3867       }
3868       if (!ForHasEmbed) {
3869         Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;
3870         return std::nullopt;
3871       }
3872     }
3873   }
3874   return Result;
3875 }
3876 
HandleEmbedDirectiveImpl(SourceLocation HashLoc,const LexEmbedParametersResult & Params,StringRef BinaryContents)3877 void Preprocessor::HandleEmbedDirectiveImpl(
3878     SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3879     StringRef BinaryContents) {
3880   if (BinaryContents.empty()) {
3881     // If we have no binary contents, the only thing we need to emit are the
3882     // if_empty tokens, if any.
3883     // FIXME: this loses AST fidelity; nothing in the compiler will see that
3884     // these tokens came from #embed. We have to hack around this when printing
3885     // preprocessed output. The same is true for prefix and suffix tokens.
3886     if (Params.MaybeIfEmptyParam) {
3887       ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3888       size_t TokCount = Toks.size();
3889       auto NewToks = std::make_unique<Token[]>(TokCount);
3890       llvm::copy(Toks, NewToks.get());
3891       EnterTokenStream(std::move(NewToks), TokCount, true, true);
3892     }
3893     return;
3894   }
3895 
3896   size_t NumPrefixToks = Params.PrefixTokenCount(),
3897          NumSuffixToks = Params.SuffixTokenCount();
3898   size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3899   size_t CurIdx = 0;
3900   auto Toks = std::make_unique<Token[]>(TotalNumToks);
3901 
3902   // Add the prefix tokens, if any.
3903   if (Params.MaybePrefixParam) {
3904     llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
3905     CurIdx += NumPrefixToks;
3906   }
3907 
3908   EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3909   Data->BinaryData = BinaryContents;
3910 
3911   Toks[CurIdx].startToken();
3912   Toks[CurIdx].setKind(tok::annot_embed);
3913   Toks[CurIdx].setAnnotationRange(HashLoc);
3914   Toks[CurIdx++].setAnnotationValue(Data);
3915 
3916   // Now add the suffix tokens, if any.
3917   if (Params.MaybeSuffixParam) {
3918     llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
3919     CurIdx += NumSuffixToks;
3920   }
3921 
3922   assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3923   EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
3924 }
3925 
HandleEmbedDirective(SourceLocation HashLoc,Token & EmbedTok,const FileEntry * LookupFromFile)3926 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3927                                         const FileEntry *LookupFromFile) {
3928   // Give the usual extension/compatibility warnings.
3929   if (LangOpts.C23)
3930     Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
3931   else
3932     Diag(EmbedTok, diag::ext_pp_embed_directive)
3933         << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3934 
3935   // Parse the filename header
3936   Token FilenameTok;
3937   if (LexHeaderName(FilenameTok))
3938     return;
3939 
3940   if (FilenameTok.isNot(tok::header_name)) {
3941     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
3942     if (FilenameTok.isNot(tok::eod))
3943       DiscardUntilEndOfDirective();
3944     return;
3945   }
3946 
3947   // Parse the optional sequence of
3948   // directive-parameters:
3949   //     identifier parameter-name-list[opt] directive-argument-list[opt]
3950   // directive-argument-list:
3951   //    '(' balanced-token-sequence ')'
3952   // parameter-name-list:
3953   //    '::' identifier parameter-name-list[opt]
3954   Token CurTok;
3955   std::optional<LexEmbedParametersResult> Params =
3956       LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3957 
3958   assert((Params || CurTok.is(tok::eod)) &&
3959          "expected success or to be at the end of the directive");
3960   if (!Params)
3961     return;
3962 
3963   // Now, splat the data out!
3964   SmallString<128> FilenameBuffer;
3965   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
3966   StringRef OriginalFilename = Filename;
3967   bool isAngled =
3968       GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
3969   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
3970   // error.
3971   assert(!Filename.empty());
3972   OptionalFileEntryRef MaybeFileRef =
3973       this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
3974   if (!MaybeFileRef) {
3975     // could not find file
3976     if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
3977       return;
3978     }
3979     Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
3980     return;
3981   }
3982   std::optional<llvm::MemoryBufferRef> MaybeFile =
3983       getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
3984   if (!MaybeFile) {
3985     // could not find file
3986     Diag(FilenameTok, diag::err_cannot_open_file)
3987         << Filename << "a buffer to the contents could not be created";
3988     return;
3989   }
3990   StringRef BinaryContents = MaybeFile->getBuffer();
3991 
3992   // The order is important between 'offset' and 'limit'; we want to offset
3993   // first and then limit second; otherwise we may reduce the notional resource
3994   // size to something too small to offset into.
3995   if (Params->MaybeOffsetParam) {
3996     // FIXME: just like with the limit() and if_empty() parameters, this loses
3997     // source fidelity in the AST; it has no idea that there was an offset
3998     // involved.
3999     // offsets all the way to the end of the file make for an empty file.
4000     BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
4001   }
4002 
4003   if (Params->MaybeLimitParam) {
4004     // FIXME: just like with the clang::offset() and if_empty() parameters,
4005     // this loses source fidelity in the AST; it has no idea there was a limit
4006     // involved.
4007     BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
4008   }
4009 
4010   if (Callbacks)
4011     Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
4012                               *Params);
4013   HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
4014 }
4015