xref: /freebsd/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/AttributeCommonInfo.h"
15 #include "clang/Basic/Attributes.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/Basic/DirectoryEntry.h"
18 #include "clang/Basic/FileManager.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LangOptions.h"
21 #include "clang/Basic/Module.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/CodeCompletionHandler.h"
27 #include "clang/Lex/HeaderSearch.h"
28 #include "clang/Lex/LexDiagnostic.h"
29 #include "clang/Lex/LiteralSupport.h"
30 #include "clang/Lex/MacroInfo.h"
31 #include "clang/Lex/ModuleLoader.h"
32 #include "clang/Lex/ModuleMap.h"
33 #include "clang/Lex/PPCallbacks.h"
34 #include "clang/Lex/Pragma.h"
35 #include "clang/Lex/Preprocessor.h"
36 #include "clang/Lex/PreprocessorOptions.h"
37 #include "clang/Lex/Token.h"
38 #include "clang/Lex/VariadicMacroSupport.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/ScopeExit.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringExtras.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/ADT/StringSwitch.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/Path.h"
48 #include "llvm/Support/SaveAndRestore.h"
49 #include <algorithm>
50 #include <cassert>
51 #include <cstring>
52 #include <optional>
53 #include <string>
54 #include <utility>
55 
56 using namespace clang;
57 
58 //===----------------------------------------------------------------------===//
59 // Utility Methods for Preprocessor Directive Handling.
60 //===----------------------------------------------------------------------===//
61 
AllocateMacroInfo(SourceLocation L)62 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
63   static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
64   return new (BP) MacroInfo(L);
65 }
66 
AllocateDefMacroDirective(MacroInfo * MI,SourceLocation Loc)67 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
68                                                            SourceLocation Loc) {
69   return new (BP) DefMacroDirective(MI, Loc);
70 }
71 
72 UndefMacroDirective *
AllocateUndefMacroDirective(SourceLocation UndefLoc)73 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
74   return new (BP) UndefMacroDirective(UndefLoc);
75 }
76 
77 VisibilityMacroDirective *
AllocateVisibilityMacroDirective(SourceLocation Loc,bool isPublic)78 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
79                                                bool isPublic) {
80   return new (BP) VisibilityMacroDirective(Loc, isPublic);
81 }
82 
83 /// Read and discard all tokens remaining on the current line until
84 /// the tok::eod token is found.
DiscardUntilEndOfDirective(Token & Tmp)85 SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
86   SourceRange Res;
87 
88   LexUnexpandedToken(Tmp);
89   Res.setBegin(Tmp.getLocation());
90   while (Tmp.isNot(tok::eod)) {
91     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
92     LexUnexpandedToken(Tmp);
93   }
94   Res.setEnd(Tmp.getLocation());
95   return Res;
96 }
97 
98 /// Enumerates possible cases of #define/#undef a reserved identifier.
99 enum MacroDiag {
100   MD_NoWarn,        //> Not a reserved identifier
101   MD_KeywordDef,    //> Macro hides keyword, enabled by default
102   MD_ReservedMacro, //> #define of #undef reserved id, disabled by default
103   MD_ReservedAttributeIdentifier
104 };
105 
106 /// Enumerates possible %select values for the pp_err_elif_after_else and
107 /// pp_err_elif_without_if diagnostics.
108 enum PPElifDiag {
109   PED_Elif,
110   PED_Elifdef,
111   PED_Elifndef
112 };
113 
isFeatureTestMacro(StringRef MacroName)114 static bool isFeatureTestMacro(StringRef MacroName) {
115   // list from:
116   // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
117   // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
118   // * man 7 feature_test_macros
119   // The list must be sorted for correct binary search.
120   static constexpr StringRef ReservedMacro[] = {
121       "_ATFILE_SOURCE",
122       "_BSD_SOURCE",
123       "_CRT_NONSTDC_NO_WARNINGS",
124       "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
125       "_CRT_SECURE_NO_WARNINGS",
126       "_FILE_OFFSET_BITS",
127       "_FORTIFY_SOURCE",
128       "_GLIBCXX_ASSERTIONS",
129       "_GLIBCXX_CONCEPT_CHECKS",
130       "_GLIBCXX_DEBUG",
131       "_GLIBCXX_DEBUG_PEDANTIC",
132       "_GLIBCXX_PARALLEL",
133       "_GLIBCXX_PARALLEL_ASSERTIONS",
134       "_GLIBCXX_SANITIZE_VECTOR",
135       "_GLIBCXX_USE_CXX11_ABI",
136       "_GLIBCXX_USE_DEPRECATED",
137       "_GNU_SOURCE",
138       "_ISOC11_SOURCE",
139       "_ISOC95_SOURCE",
140       "_ISOC99_SOURCE",
141       "_LARGEFILE64_SOURCE",
142       "_POSIX_C_SOURCE",
143       "_REENTRANT",
144       "_SVID_SOURCE",
145       "_THREAD_SAFE",
146       "_XOPEN_SOURCE",
147       "_XOPEN_SOURCE_EXTENDED",
148       "__STDCPP_WANT_MATH_SPEC_FUNCS__",
149       "__STDC_FORMAT_MACROS",
150   };
151   return llvm::binary_search(ReservedMacro, MacroName);
152 }
153 
isLanguageDefinedBuiltin(const SourceManager & SourceMgr,const MacroInfo * MI,const StringRef MacroName)154 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
155                                      const MacroInfo *MI,
156                                      const StringRef MacroName) {
157   // If this is a macro with special handling (like __LINE__) then it's language
158   // defined.
159   if (MI->isBuiltinMacro())
160     return true;
161   // Builtin macros are defined in the builtin file
162   if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
163     return false;
164   // C defines macros starting with __STDC, and C++ defines macros starting with
165   // __STDCPP
166   if (MacroName.starts_with("__STDC"))
167     return true;
168   // C++ defines the __cplusplus macro
169   if (MacroName == "__cplusplus")
170     return true;
171   // C++ defines various feature-test macros starting with __cpp
172   if (MacroName.starts_with("__cpp"))
173     return true;
174   // Anything else isn't language-defined
175   return false;
176 }
177 
isReservedCXXAttributeName(Preprocessor & PP,IdentifierInfo * II)178 static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
179   const LangOptions &Lang = PP.getLangOpts();
180   if (Lang.CPlusPlus &&
181       hasAttribute(AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, II,
182                    PP.getTargetInfo(), Lang, /*CheckPlugins*/ false) > 0) {
183     AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
184         AttributeCommonInfo::getCXX11AttrArgsInfo(II);
185     if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
186       return PP.isNextPPTokenOneOf(tok::l_paren);
187 
188     return !PP.isNextPPTokenOneOf(tok::l_paren) ||
189            AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
190   }
191   return false;
192 }
193 
shouldWarnOnMacroDef(Preprocessor & PP,IdentifierInfo * II)194 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
195   const LangOptions &Lang = PP.getLangOpts();
196   StringRef Text = II->getName();
197   if (isReservedInAllContexts(II->isReserved(Lang)))
198     return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
199   if (II->isKeyword(Lang))
200     return MD_KeywordDef;
201   if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
202     return MD_KeywordDef;
203   if (isReservedCXXAttributeName(PP, II))
204     return MD_ReservedAttributeIdentifier;
205   return MD_NoWarn;
206 }
207 
shouldWarnOnMacroUndef(Preprocessor & PP,IdentifierInfo * II)208 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
209   const LangOptions &Lang = PP.getLangOpts();
210   // Do not warn on keyword undef.  It is generally harmless and widely used.
211   if (isReservedInAllContexts(II->isReserved(Lang)))
212     return MD_ReservedMacro;
213   if (isReservedCXXAttributeName(PP, II))
214     return MD_ReservedAttributeIdentifier;
215   return MD_NoWarn;
216 }
217 
218 // Return true if we want to issue a diagnostic by default if we
219 // encounter this name in a #include with the wrong case. For now,
220 // this includes the standard C and C++ headers, Posix headers,
221 // and Boost headers. Improper case for these #includes is a
222 // potential portability issue.
warnByDefaultOnWrongCase(StringRef Include)223 static bool warnByDefaultOnWrongCase(StringRef Include) {
224   // If the first component of the path is "boost", treat this like a standard header
225   // for the purposes of diagnostics.
226   if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
227     return true;
228 
229   // "condition_variable" is the longest standard header name at 18 characters.
230   // If the include file name is longer than that, it can't be a standard header.
231   static const size_t MaxStdHeaderNameLen = 18u;
232   if (Include.size() > MaxStdHeaderNameLen)
233     return false;
234 
235   // Lowercase and normalize the search string.
236   SmallString<32> LowerInclude{Include};
237   for (char &Ch : LowerInclude) {
238     // In the ASCII range?
239     if (static_cast<unsigned char>(Ch) > 0x7f)
240       return false; // Can't be a standard header
241     // ASCII lowercase:
242     if (Ch >= 'A' && Ch <= 'Z')
243       Ch += 'a' - 'A';
244     // Normalize path separators for comparison purposes.
245     else if (::llvm::sys::path::is_separator(Ch))
246       Ch = '/';
247   }
248 
249   // The standard C/C++ and Posix headers
250   return llvm::StringSwitch<bool>(LowerInclude)
251     // C library headers
252     .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
253     .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
254     .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
255     .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stdcountof.h", true)
256     .Cases("stddef.h", "stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
257     .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
258     .Cases("wchar.h", "wctype.h", true)
259 
260     // C++ headers for C library facilities
261     .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
262     .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
263     .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
264     .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
265     .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
266     .Case("cwctype", true)
267 
268     // C++ library headers
269     .Cases("algorithm", "fstream", "list", "regex", "thread", true)
270     .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
271     .Cases("atomic", "future", "map", "set", "type_traits", true)
272     .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
273     .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
274     .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
275     .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
276     .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
277     .Cases("deque", "istream", "queue", "string", "valarray", true)
278     .Cases("exception", "iterator", "random", "strstream", "vector", true)
279     .Cases("forward_list", "limits", "ratio", "system_error", true)
280 
281     // POSIX headers (which aren't also C headers)
282     .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
283     .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
284     .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
285     .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
286     .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
287     .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
288     .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
289     .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
290     .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
291     .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
292     .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
293     .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
294     .Default(false);
295 }
296 
297 /// Find a similar string in `Candidates`.
298 ///
299 /// \param LHS a string for a similar string in `Candidates`
300 ///
301 /// \param Candidates the candidates to find a similar string.
302 ///
303 /// \returns a similar string if exists. If no similar string exists,
304 /// returns std::nullopt.
305 static std::optional<StringRef>
findSimilarStr(StringRef LHS,const std::vector<StringRef> & Candidates)306 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
307   // We need to check if `Candidates` has the exact case-insensitive string
308   // because the Levenshtein distance match does not care about it.
309   for (StringRef C : Candidates) {
310     if (LHS.equals_insensitive(C)) {
311       return C;
312     }
313   }
314 
315   // Keep going with the Levenshtein distance match.
316   // If the LHS size is less than 3, use the LHS size minus 1 and if not,
317   // use the LHS size divided by 3.
318   size_t Length = LHS.size();
319   size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
320 
321   std::optional<std::pair<StringRef, size_t>> SimilarStr;
322   for (StringRef C : Candidates) {
323     size_t CurDist = LHS.edit_distance(C, true);
324     if (CurDist <= MaxDist) {
325       if (!SimilarStr) {
326         // The first similar string found.
327         SimilarStr = {C, CurDist};
328       } else if (CurDist < SimilarStr->second) {
329         // More similar string found.
330         SimilarStr = {C, CurDist};
331       }
332     }
333   }
334 
335   if (SimilarStr) {
336     return SimilarStr->first;
337   } else {
338     return std::nullopt;
339   }
340 }
341 
CheckMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)342 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
343                                   bool *ShadowFlag) {
344   // Missing macro name?
345   if (MacroNameTok.is(tok::eod))
346     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
347 
348   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
349   if (!II)
350     return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
351 
352   if (II->isCPlusPlusOperatorKeyword()) {
353     // C++ 2.5p2: Alternative tokens behave the same as its primary token
354     // except for their spellings.
355     Diag(MacroNameTok, getLangOpts().MicrosoftExt
356                            ? diag::ext_pp_operator_used_as_macro_name
357                            : diag::err_pp_operator_used_as_macro_name)
358         << II << MacroNameTok.getKind();
359     // Allow #defining |and| and friends for Microsoft compatibility or
360     // recovery when legacy C headers are included in C++.
361   }
362 
363   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
364     // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
365     return Diag(MacroNameTok, diag::err_defined_macro_name);
366   }
367 
368   // If defining/undefining reserved identifier or a keyword, we need to issue
369   // a warning.
370   SourceLocation MacroNameLoc = MacroNameTok.getLocation();
371   if (ShadowFlag)
372     *ShadowFlag = false;
373   // Macro names with reserved identifiers are accepted if built-in or passed
374   // through the command line (the later may be present if -dD was used to
375   // generate the preprocessed file).
376   // NB: isInPredefinedFile() is relatively expensive, so keep it at the end
377   // of the condition.
378   if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
379       !SourceMgr.isInPredefinedFile(MacroNameLoc)) {
380     MacroDiag D = MD_NoWarn;
381     if (isDefineUndef == MU_Define) {
382       D = shouldWarnOnMacroDef(*this, II);
383     }
384     else if (isDefineUndef == MU_Undef)
385       D = shouldWarnOnMacroUndef(*this, II);
386     if (D == MD_KeywordDef) {
387       // We do not want to warn on some patterns widely used in configuration
388       // scripts.  This requires analyzing next tokens, so do not issue warnings
389       // now, only inform caller.
390       if (ShadowFlag)
391         *ShadowFlag = true;
392     }
393     if (D == MD_ReservedMacro)
394       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
395     if (D == MD_ReservedAttributeIdentifier)
396       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_attribute_id)
397           << II->getName();
398   }
399 
400   // Okay, we got a good identifier.
401   return false;
402 }
403 
404 /// Lex and validate a macro name, which occurs after a
405 /// \#define or \#undef.
406 ///
407 /// This sets the token kind to eod and discards the rest of the macro line if
408 /// the macro name is invalid.
409 ///
410 /// \param MacroNameTok Token that is expected to be a macro name.
411 /// \param isDefineUndef Context in which macro is used.
412 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
ReadMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)413 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
414                                  bool *ShadowFlag) {
415   // Read the token, don't allow macro expansion on it.
416   LexUnexpandedToken(MacroNameTok);
417 
418   if (MacroNameTok.is(tok::code_completion)) {
419     if (CodeComplete)
420       CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
421     setCodeCompletionReached();
422     LexUnexpandedToken(MacroNameTok);
423   }
424 
425   if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
426     return;
427 
428   // Invalid macro name, read and discard the rest of the line and set the
429   // token kind to tok::eod if necessary.
430   if (MacroNameTok.isNot(tok::eod)) {
431     MacroNameTok.setKind(tok::eod);
432     DiscardUntilEndOfDirective();
433   }
434 }
435 
436 /// Ensure that the next token is a tok::eod token.
437 ///
438 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
439 /// true, then we consider macros that expand to zero tokens as being ok.
440 ///
441 /// Returns the location of the end of the directive.
CheckEndOfDirective(const char * DirType,bool EnableMacros)442 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
443                                                  bool EnableMacros) {
444   Token Tmp;
445   // Lex unexpanded tokens for most directives: macros might expand to zero
446   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
447   // #line) allow empty macros.
448   if (EnableMacros)
449     Lex(Tmp);
450   else
451     LexUnexpandedToken(Tmp);
452 
453   // There should be no tokens after the directive, but we allow them as an
454   // extension.
455   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
456     LexUnexpandedToken(Tmp);
457 
458   if (Tmp.is(tok::eod))
459     return Tmp.getLocation();
460 
461   // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
462   // or if this is a macro-style preprocessing directive, because it is more
463   // trouble than it is worth to insert /**/ and check that there is no /**/
464   // in the range also.
465   FixItHint Hint;
466   if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
467       !CurTokenLexer)
468     Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
469   Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
470   return DiscardUntilEndOfDirective().getEnd();
471 }
472 
SuggestTypoedDirective(const Token & Tok,StringRef Directive) const473 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
474                                           StringRef Directive) const {
475   // If this is a `.S` file, treat unknown # directives as non-preprocessor
476   // directives.
477   if (getLangOpts().AsmPreprocessor) return;
478 
479   std::vector<StringRef> Candidates = {
480       "if", "ifdef", "ifndef", "elif", "else", "endif"
481   };
482   if (LangOpts.C23 || LangOpts.CPlusPlus23)
483     Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
484 
485   if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
486     // Directive cannot be coming from macro.
487     assert(Tok.getLocation().isFileID());
488     CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
489         Tok.getLocation(),
490         Tok.getLocation().getLocWithOffset(Directive.size()));
491     StringRef SuggValue = *Sugg;
492 
493     auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
494     Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
495   }
496 }
497 
498 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
499 /// decided that the subsequent tokens are in the \#if'd out portion of the
500 /// file.  Lex the rest of the file, until we see an \#endif.  If
501 /// FoundNonSkipPortion is true, then we have already emitted code for part of
502 /// this \#if directive, so \#else/\#elif blocks should never be entered.
503 /// If ElseOk is true, then \#else directives are ok, if not, then we have
504 /// already seen one so a \#else directive is a duplicate.  When this returns,
505 /// the caller can lex the first valid token.
SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,SourceLocation IfTokenLoc,bool FoundNonSkipPortion,bool FoundElse,SourceLocation ElseLoc)506 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
507                                                 SourceLocation IfTokenLoc,
508                                                 bool FoundNonSkipPortion,
509                                                 bool FoundElse,
510                                                 SourceLocation ElseLoc) {
511   // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
512   // not getting called recursively by storing the RecordedSkippedRanges
513   // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
514   // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
515   // invalidated. If this changes and there is a need to call
516   // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
517   // change to do a second lookup in endLexPass function instead of reusing the
518   // lookup pointer.
519   assert(!SkippingExcludedConditionalBlock &&
520          "calling SkipExcludedConditionalBlock recursively");
521   llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
522 
523   ++NumSkipped;
524   assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
525   assert(CurPPLexer && "Conditional PP block must be in a file!");
526   assert(CurLexer && "Conditional PP block but no current lexer set!");
527 
528   if (PreambleConditionalStack.reachedEOFWhileSkipping())
529     PreambleConditionalStack.clearSkipInfo();
530   else
531     CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
532                                      FoundNonSkipPortion, FoundElse);
533 
534   // Enter raw mode to disable identifier lookup (and thus macro expansion),
535   // disabling warnings, etc.
536   CurPPLexer->LexingRawMode = true;
537   Token Tok;
538   SourceLocation endLoc;
539 
540   /// Keeps track and caches skipped ranges and also retrieves a prior skipped
541   /// range if the same block is re-visited.
542   struct SkippingRangeStateTy {
543     Preprocessor &PP;
544 
545     const char *BeginPtr = nullptr;
546     unsigned *SkipRangePtr = nullptr;
547 
548     SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
549 
550     void beginLexPass() {
551       if (BeginPtr)
552         return; // continue skipping a block.
553 
554       // Initiate a skipping block and adjust the lexer if we already skipped it
555       // before.
556       BeginPtr = PP.CurLexer->getBufferLocation();
557       SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
558       if (*SkipRangePtr) {
559         PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
560                           /*IsAtStartOfLine*/ true);
561       }
562     }
563 
564     void endLexPass(const char *Hashptr) {
565       if (!BeginPtr) {
566         // Not doing normal lexing.
567         assert(PP.CurLexer->isDependencyDirectivesLexer());
568         return;
569       }
570 
571       // Finished skipping a block, record the range if it's first time visited.
572       if (!*SkipRangePtr) {
573         *SkipRangePtr = Hashptr - BeginPtr;
574       }
575       assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
576       BeginPtr = nullptr;
577       SkipRangePtr = nullptr;
578     }
579   } SkippingRangeState(*this);
580 
581   while (true) {
582     if (CurLexer->isDependencyDirectivesLexer()) {
583       CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
584     } else {
585       SkippingRangeState.beginLexPass();
586       while (true) {
587         CurLexer->Lex(Tok);
588 
589         if (Tok.is(tok::code_completion)) {
590           setCodeCompletionReached();
591           if (CodeComplete)
592             CodeComplete->CodeCompleteInConditionalExclusion();
593           continue;
594         }
595 
596         // If this is the end of the buffer, we have an error.
597         if (Tok.is(tok::eof)) {
598           // We don't emit errors for unterminated conditionals here,
599           // Lexer::LexEndOfFile can do that properly.
600           // Just return and let the caller lex after this #include.
601           if (PreambleConditionalStack.isRecording())
602             PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
603                                                       FoundNonSkipPortion,
604                                                       FoundElse, ElseLoc);
605           break;
606         }
607 
608         // If this token is not a preprocessor directive, just skip it.
609         if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
610           continue;
611 
612         break;
613       }
614     }
615     if (Tok.is(tok::eof))
616       break;
617 
618     // We just parsed a # character at the start of a line, so we're in
619     // directive mode.  Tell the lexer this so any newlines we see will be
620     // converted into an EOD token (this terminates the macro).
621     CurPPLexer->ParsingPreprocessorDirective = true;
622     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
623 
624     assert(Tok.is(tok::hash));
625     const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
626     assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
627 
628     // Read the next token, the directive flavor.
629     LexUnexpandedToken(Tok);
630 
631     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
632     // something bogus), skip it.
633     if (Tok.isNot(tok::raw_identifier)) {
634       CurPPLexer->ParsingPreprocessorDirective = false;
635       // Restore comment saving mode.
636       if (CurLexer) CurLexer->resetExtendedTokenMode();
637       continue;
638     }
639 
640     // If the first letter isn't i or e, it isn't intesting to us.  We know that
641     // this is safe in the face of spelling differences, because there is no way
642     // to spell an i/e in a strange way that is another letter.  Skipping this
643     // allows us to avoid looking up the identifier info for #define/#undef and
644     // other common directives.
645     StringRef RI = Tok.getRawIdentifier();
646 
647     char FirstChar = RI[0];
648     if (FirstChar >= 'a' && FirstChar <= 'z' &&
649         FirstChar != 'i' && FirstChar != 'e') {
650       CurPPLexer->ParsingPreprocessorDirective = false;
651       // Restore comment saving mode.
652       if (CurLexer) CurLexer->resetExtendedTokenMode();
653       continue;
654     }
655 
656     // Get the identifier name without trigraphs or embedded newlines.  Note
657     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
658     // when skipping.
659     char DirectiveBuf[20];
660     StringRef Directive;
661     if (!Tok.needsCleaning() && RI.size() < 20) {
662       Directive = RI;
663     } else {
664       std::string DirectiveStr = getSpelling(Tok);
665       size_t IdLen = DirectiveStr.size();
666       if (IdLen >= 20) {
667         CurPPLexer->ParsingPreprocessorDirective = false;
668         // Restore comment saving mode.
669         if (CurLexer) CurLexer->resetExtendedTokenMode();
670         continue;
671       }
672       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
673       Directive = StringRef(DirectiveBuf, IdLen);
674     }
675 
676     if (Directive.starts_with("if")) {
677       StringRef Sub = Directive.substr(2);
678       if (Sub.empty() ||   // "if"
679           Sub == "def" ||   // "ifdef"
680           Sub == "ndef") {  // "ifndef"
681         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
682         // bother parsing the condition.
683         DiscardUntilEndOfDirective();
684         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
685                                        /*foundnonskip*/false,
686                                        /*foundelse*/false);
687       } else {
688         SuggestTypoedDirective(Tok, Directive);
689       }
690     } else if (Directive[0] == 'e') {
691       StringRef Sub = Directive.substr(1);
692       if (Sub == "ndif") {  // "endif"
693         PPConditionalInfo CondInfo;
694         CondInfo.WasSkipping = true; // Silence bogus warning.
695         bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
696         (void)InCond;  // Silence warning in no-asserts mode.
697         assert(!InCond && "Can't be skipping if not in a conditional!");
698 
699         // If we popped the outermost skipping block, we're done skipping!
700         if (!CondInfo.WasSkipping) {
701           SkippingRangeState.endLexPass(Hashptr);
702           // Restore the value of LexingRawMode so that trailing comments
703           // are handled correctly, if we've reached the outermost block.
704           CurPPLexer->LexingRawMode = false;
705           endLoc = CheckEndOfDirective("endif");
706           CurPPLexer->LexingRawMode = true;
707           if (Callbacks)
708             Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
709           break;
710         } else {
711           DiscardUntilEndOfDirective();
712         }
713       } else if (Sub == "lse") { // "else".
714         // #else directive in a skipping conditional.  If not in some other
715         // skipping conditional, and if #else hasn't already been seen, enter it
716         // as a non-skipping conditional.
717         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
718 
719         if (!CondInfo.WasSkipping)
720           SkippingRangeState.endLexPass(Hashptr);
721 
722         // If this is a #else with a #else before it, report the error.
723         if (CondInfo.FoundElse)
724           Diag(Tok, diag::pp_err_else_after_else);
725 
726         // Note that we've seen a #else in this conditional.
727         CondInfo.FoundElse = true;
728 
729         // If the conditional is at the top level, and the #if block wasn't
730         // entered, enter the #else block now.
731         if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
732           CondInfo.FoundNonSkip = true;
733           // Restore the value of LexingRawMode so that trailing comments
734           // are handled correctly.
735           CurPPLexer->LexingRawMode = false;
736           endLoc = CheckEndOfDirective("else");
737           CurPPLexer->LexingRawMode = true;
738           if (Callbacks)
739             Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
740           break;
741         } else {
742           DiscardUntilEndOfDirective();  // C99 6.10p4.
743         }
744       } else if (Sub == "lif") {  // "elif".
745         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
746 
747         if (!CondInfo.WasSkipping)
748           SkippingRangeState.endLexPass(Hashptr);
749 
750         // If this is a #elif with a #else before it, report the error.
751         if (CondInfo.FoundElse)
752           Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
753 
754         // If this is in a skipping block or if we're already handled this #if
755         // block, don't bother parsing the condition.
756         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
757           // FIXME: We should probably do at least some minimal parsing of the
758           // condition to verify that it is well-formed. The current state
759           // allows #elif* directives with completely malformed (or missing)
760           // conditions.
761           DiscardUntilEndOfDirective();
762         } else {
763           // Restore the value of LexingRawMode so that identifiers are
764           // looked up, etc, inside the #elif expression.
765           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
766           CurPPLexer->LexingRawMode = false;
767           IdentifierInfo *IfNDefMacro = nullptr;
768           DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
769           // Stop if Lexer became invalid after hitting code completion token.
770           if (!CurPPLexer)
771             return;
772           const bool CondValue = DER.Conditional;
773           CurPPLexer->LexingRawMode = true;
774           if (Callbacks) {
775             Callbacks->Elif(
776                 Tok.getLocation(), DER.ExprRange,
777                 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
778                 CondInfo.IfLoc);
779           }
780           // If this condition is true, enter it!
781           if (CondValue) {
782             CondInfo.FoundNonSkip = true;
783             break;
784           }
785         }
786       } else if (Sub == "lifdef" ||  // "elifdef"
787                  Sub == "lifndef") { // "elifndef"
788         bool IsElifDef = Sub == "lifdef";
789         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
790         Token DirectiveToken = Tok;
791 
792         if (!CondInfo.WasSkipping)
793           SkippingRangeState.endLexPass(Hashptr);
794 
795         // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
796         // if this branch is in a skipping block.
797         unsigned DiagID;
798         if (LangOpts.CPlusPlus)
799           DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
800                                         : diag::ext_cxx23_pp_directive;
801         else
802           DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
803                                 : diag::ext_c23_pp_directive;
804         Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
805 
806         // If this is a #elif with a #else before it, report the error.
807         if (CondInfo.FoundElse)
808           Diag(Tok, diag::pp_err_elif_after_else)
809               << (IsElifDef ? PED_Elifdef : PED_Elifndef);
810 
811         // If this is in a skipping block or if we're already handled this #if
812         // block, don't bother parsing the condition.
813         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
814           // FIXME: We should probably do at least some minimal parsing of the
815           // condition to verify that it is well-formed. The current state
816           // allows #elif* directives with completely malformed (or missing)
817           // conditions.
818           DiscardUntilEndOfDirective();
819         } else {
820           // Restore the value of LexingRawMode so that identifiers are
821           // looked up, etc, inside the #elif[n]def expression.
822           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
823           CurPPLexer->LexingRawMode = false;
824           Token MacroNameTok;
825           ReadMacroName(MacroNameTok);
826           CurPPLexer->LexingRawMode = true;
827 
828           // If the macro name token is tok::eod, there was an error that was
829           // already reported.
830           if (MacroNameTok.is(tok::eod)) {
831             // Skip code until we get to #endif.  This helps with recovery by
832             // not emitting an error when the #endif is reached.
833             continue;
834           }
835 
836           emitMacroExpansionWarnings(MacroNameTok);
837 
838           CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
839 
840           IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
841           auto MD = getMacroDefinition(MII);
842           MacroInfo *MI = MD.getMacroInfo();
843 
844           if (Callbacks) {
845             if (IsElifDef) {
846               Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
847                                  MD);
848             } else {
849               Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
850                                   MD);
851             }
852           }
853           // If this condition is true, enter it!
854           if (static_cast<bool>(MI) == IsElifDef) {
855             CondInfo.FoundNonSkip = true;
856             break;
857           }
858         }
859       } else {
860         SuggestTypoedDirective(Tok, Directive);
861       }
862     } else {
863       SuggestTypoedDirective(Tok, Directive);
864     }
865 
866     CurPPLexer->ParsingPreprocessorDirective = false;
867     // Restore comment saving mode.
868     if (CurLexer) CurLexer->resetExtendedTokenMode();
869   }
870 
871   // Finally, if we are out of the conditional (saw an #endif or ran off the end
872   // of the file, just stop skipping and return to lexing whatever came after
873   // the #if block.
874   CurPPLexer->LexingRawMode = false;
875 
876   // The last skipped range isn't actually skipped yet if it's truncated
877   // by the end of the preamble; we'll resume parsing after the preamble.
878   if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
879     Callbacks->SourceRangeSkipped(
880         SourceRange(HashTokenLoc, endLoc.isValid()
881                                       ? endLoc
882                                       : CurPPLexer->getSourceLocation()),
883         Tok.getLocation());
884 }
885 
getModuleForLocation(SourceLocation Loc,bool AllowTextual)886 Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
887                                            bool AllowTextual) {
888   if (!SourceMgr.isInMainFile(Loc)) {
889     // Try to determine the module of the include directive.
890     // FIXME: Look into directly passing the FileEntry from LookupFile instead.
891     FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
892     if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
893       // The include comes from an included file.
894       return HeaderInfo.getModuleMap()
895           .findModuleForHeader(*EntryOfIncl, AllowTextual)
896           .getModule();
897     }
898   }
899 
900   // This is either in the main file or not in a file at all. It belongs
901   // to the current module, if there is one.
902   return getLangOpts().CurrentModule.empty()
903              ? nullptr
904              : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
905 }
906 
907 OptionalFileEntryRef
getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,SourceLocation Loc)908 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
909                                                SourceLocation Loc) {
910   Module *IncM = getModuleForLocation(
911       IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
912 
913   // Walk up through the include stack, looking through textual headers of M
914   // until we hit a non-textual header that we can #include. (We assume textual
915   // headers of a module with non-textual headers aren't meant to be used to
916   // import entities from the module.)
917   auto &SM = getSourceManager();
918   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
919     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
920     auto FE = SM.getFileEntryRefForID(ID);
921     if (!FE)
922       break;
923 
924     // We want to find all possible modules that might contain this header, so
925     // search all enclosing directories for module maps and load them.
926     HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
927                             SourceMgr.isInSystemHeader(Loc));
928 
929     bool InPrivateHeader = false;
930     for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
931       if (!Header.isAccessibleFrom(IncM)) {
932         // It's in a private header; we can't #include it.
933         // FIXME: If there's a public header in some module that re-exports it,
934         // then we could suggest including that, but it's not clear that's the
935         // expected way to make this entity visible.
936         InPrivateHeader = true;
937         continue;
938       }
939 
940       // Don't suggest explicitly excluded headers.
941       if (Header.getRole() == ModuleMap::ExcludedHeader)
942         continue;
943 
944       // We'll suggest including textual headers below if they're
945       // include-guarded.
946       if (Header.getRole() & ModuleMap::TextualHeader)
947         continue;
948 
949       // If we have a module import syntax, we shouldn't include a header to
950       // make a particular module visible. Let the caller know they should
951       // suggest an import instead.
952       if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
953         return std::nullopt;
954 
955       // If this is an accessible, non-textual header of M's top-level module
956       // that transitively includes the given location and makes the
957       // corresponding module visible, this is the thing to #include.
958       return *FE;
959     }
960 
961     // FIXME: If we're bailing out due to a private header, we shouldn't suggest
962     // an import either.
963     if (InPrivateHeader)
964       return std::nullopt;
965 
966     // If the header is includable and has an include guard, assume the
967     // intended way to expose its contents is by #include, not by importing a
968     // module that transitively includes it.
969     if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
970       return *FE;
971 
972     Loc = SM.getIncludeLoc(ID);
973   }
974 
975   return std::nullopt;
976 }
977 
LookupFile(SourceLocation FilenameLoc,StringRef Filename,bool isAngled,ConstSearchDirIterator FromDir,const FileEntry * FromFile,ConstSearchDirIterator * CurDirArg,SmallVectorImpl<char> * SearchPath,SmallVectorImpl<char> * RelativePath,ModuleMap::KnownHeader * SuggestedModule,bool * IsMapped,bool * IsFrameworkFound,bool SkipCache,bool OpenFile,bool CacheFailures)978 OptionalFileEntryRef Preprocessor::LookupFile(
979     SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
980     ConstSearchDirIterator FromDir, const FileEntry *FromFile,
981     ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
982     SmallVectorImpl<char> *RelativePath,
983     ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
984     bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
985   ConstSearchDirIterator CurDirLocal = nullptr;
986   ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
987 
988   Module *RequestingModule = getModuleForLocation(
989       FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
990 
991   // If the header lookup mechanism may be relative to the current inclusion
992   // stack, record the parent #includes.
993   SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
994   bool BuildSystemModule = false;
995   if (!FromDir && !FromFile) {
996     FileID FID = getCurrentFileLexer()->getFileID();
997     OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
998 
999     // If there is no file entry associated with this file, it must be the
1000     // predefines buffer or the module includes buffer. Any other file is not
1001     // lexed with a normal lexer, so it won't be scanned for preprocessor
1002     // directives.
1003     //
1004     // If we have the predefines buffer, resolve #include references (which come
1005     // from the -include command line argument) from the current working
1006     // directory instead of relative to the main file.
1007     //
1008     // If we have the module includes buffer, resolve #include references (which
1009     // come from header declarations in the module map) relative to the module
1010     // map file.
1011     if (!FileEnt) {
1012       if (FID == SourceMgr.getMainFileID() && MainFileDir) {
1013         auto IncludeDir =
1014             HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
1015                 Filename, getCurrentModule())
1016                 ? HeaderInfo.getModuleMap().getBuiltinDir()
1017                 : MainFileDir;
1018         Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
1019         BuildSystemModule = getCurrentModule()->IsSystem;
1020       } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
1021                       SourceMgr.getMainFileID()))) {
1022         auto CWD = FileMgr.getOptionalDirectoryRef(".");
1023         Includers.push_back(std::make_pair(*FileEnt, *CWD));
1024       }
1025     } else {
1026       Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1027     }
1028 
1029     // MSVC searches the current include stack from top to bottom for
1030     // headers included by quoted include directives.
1031     // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1032     if (LangOpts.MSVCCompat && !isAngled) {
1033       for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1034         if (IsFileLexer(ISEntry))
1035           if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1036             Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1037       }
1038     }
1039   }
1040 
1041   CurDir = CurDirLookup;
1042 
1043   if (FromFile) {
1044     // We're supposed to start looking from after a particular file. Search
1045     // the include path until we find that file or run out of files.
1046     ConstSearchDirIterator TmpCurDir = CurDir;
1047     ConstSearchDirIterator TmpFromDir = nullptr;
1048     while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1049                Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1050                Includers, SearchPath, RelativePath, RequestingModule,
1051                SuggestedModule, /*IsMapped=*/nullptr,
1052                /*IsFrameworkFound=*/nullptr, SkipCache)) {
1053       // Keep looking as if this file did a #include_next.
1054       TmpFromDir = TmpCurDir;
1055       ++TmpFromDir;
1056       if (&FE->getFileEntry() == FromFile) {
1057         // Found it.
1058         FromDir = TmpFromDir;
1059         CurDir = TmpCurDir;
1060         break;
1061       }
1062     }
1063   }
1064 
1065   // Do a standard file entry lookup.
1066   OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1067       Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1068       RelativePath, RequestingModule, SuggestedModule, IsMapped,
1069       IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1070   if (FE)
1071     return FE;
1072 
1073   OptionalFileEntryRef CurFileEnt;
1074   // Otherwise, see if this is a subframework header.  If so, this is relative
1075   // to one of the headers on the #include stack.  Walk the list of the current
1076   // headers on the #include stack and pass them to HeaderInfo.
1077   if (IsFileLexer()) {
1078     if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1079       if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1080               Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1081               SuggestedModule)) {
1082         return FE;
1083       }
1084     }
1085   }
1086 
1087   for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1088     if (IsFileLexer(ISEntry)) {
1089       if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1090         if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1091                 Filename, *CurFileEnt, SearchPath, RelativePath,
1092                 RequestingModule, SuggestedModule)) {
1093           return FE;
1094         }
1095       }
1096     }
1097   }
1098 
1099   // Otherwise, we really couldn't find the file.
1100   return std::nullopt;
1101 }
1102 
1103 OptionalFileEntryRef
LookupEmbedFile(StringRef Filename,bool isAngled,bool OpenFile,const FileEntry * LookupFromFile)1104 Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1105                               const FileEntry *LookupFromFile) {
1106   FileManager &FM = this->getFileManager();
1107   if (llvm::sys::path::is_absolute(Filename)) {
1108     // lookup path or immediately fail
1109     llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1110         Filename, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1111     return llvm::expectedToOptional(std::move(ShouldBeEntry));
1112   }
1113 
1114   auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1115                                StringRef StartingFrom, StringRef FileName,
1116                                bool RemoveInitialFileComponentFromLookupPath) {
1117     llvm::sys::path::native(StartingFrom, LookupPath);
1118     if (RemoveInitialFileComponentFromLookupPath)
1119       llvm::sys::path::remove_filename(LookupPath);
1120     if (!LookupPath.empty() &&
1121         !llvm::sys::path::is_separator(LookupPath.back())) {
1122       LookupPath.push_back(llvm::sys::path::get_separator().front());
1123     }
1124     LookupPath.append(FileName.begin(), FileName.end());
1125   };
1126 
1127   // Otherwise, it's search time!
1128   SmallString<512> LookupPath;
1129   // Non-angled lookup
1130   if (!isAngled) {
1131     if (LookupFromFile) {
1132       // Use file-based lookup.
1133       StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1134       if (!FullFileDir.empty()) {
1135         SeparateComponents(LookupPath, FullFileDir, Filename, true);
1136         llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1137             LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1138         if (ShouldBeEntry)
1139           return llvm::expectedToOptional(std::move(ShouldBeEntry));
1140         llvm::consumeError(ShouldBeEntry.takeError());
1141       }
1142     }
1143 
1144     // Otherwise, do working directory lookup.
1145     LookupPath.clear();
1146     auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
1147     if (MaybeWorkingDirEntry) {
1148       DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1149       StringRef WorkingDir = WorkingDirEntry.getName();
1150       if (!WorkingDir.empty()) {
1151         SeparateComponents(LookupPath, WorkingDir, Filename, false);
1152         llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1153             LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1154         if (ShouldBeEntry)
1155           return llvm::expectedToOptional(std::move(ShouldBeEntry));
1156         llvm::consumeError(ShouldBeEntry.takeError());
1157       }
1158     }
1159   }
1160 
1161   for (const auto &Entry : PPOpts.EmbedEntries) {
1162     LookupPath.clear();
1163     SeparateComponents(LookupPath, Entry, Filename, false);
1164     llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1165         LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1166     if (ShouldBeEntry)
1167       return llvm::expectedToOptional(std::move(ShouldBeEntry));
1168     llvm::consumeError(ShouldBeEntry.takeError());
1169   }
1170   return std::nullopt;
1171 }
1172 
1173 //===----------------------------------------------------------------------===//
1174 // Preprocessor Directive Handling.
1175 //===----------------------------------------------------------------------===//
1176 
1177 class Preprocessor::ResetMacroExpansionHelper {
1178 public:
ResetMacroExpansionHelper(Preprocessor * pp)1179   ResetMacroExpansionHelper(Preprocessor *pp)
1180     : PP(pp), save(pp->DisableMacroExpansion) {
1181     if (pp->MacroExpansionInDirectivesOverride)
1182       pp->DisableMacroExpansion = false;
1183   }
1184 
~ResetMacroExpansionHelper()1185   ~ResetMacroExpansionHelper() {
1186     PP->DisableMacroExpansion = save;
1187   }
1188 
1189 private:
1190   Preprocessor *PP;
1191   bool save;
1192 };
1193 
1194 /// Process a directive while looking for the through header or a #pragma
1195 /// hdrstop. The following directives are handled:
1196 /// #include (to check if it is the through header)
1197 /// #define (to warn about macros that don't match the PCH)
1198 /// #pragma (to check for pragma hdrstop).
1199 /// All other directives are completely discarded.
HandleSkippedDirectiveWhileUsingPCH(Token & Result,SourceLocation HashLoc)1200 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1201                                                        SourceLocation HashLoc) {
1202   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1203     if (II->getPPKeywordID() == tok::pp_define) {
1204       return HandleDefineDirective(Result,
1205                                    /*ImmediatelyAfterHeaderGuard=*/false);
1206     }
1207     if (SkippingUntilPCHThroughHeader &&
1208         II->getPPKeywordID() == tok::pp_include) {
1209       return HandleIncludeDirective(HashLoc, Result);
1210     }
1211     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1212       Lex(Result);
1213       auto *II = Result.getIdentifierInfo();
1214       if (II && II->getName() == "hdrstop")
1215         return HandlePragmaHdrstop(Result);
1216     }
1217   }
1218   DiscardUntilEndOfDirective();
1219 }
1220 
1221 /// HandleDirective - This callback is invoked when the lexer sees a # token
1222 /// at the start of a line.  This consumes the directive, modifies the
1223 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1224 /// read is the correct one.
HandleDirective(Token & Result)1225 void Preprocessor::HandleDirective(Token &Result) {
1226   // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1227 
1228   // We just parsed a # character at the start of a line, so we're in directive
1229   // mode.  Tell the lexer this so any newlines we see will be converted into an
1230   // EOD token (which terminates the directive).
1231   CurPPLexer->ParsingPreprocessorDirective = true;
1232   if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1233 
1234   bool ImmediatelyAfterTopLevelIfndef =
1235       CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1236   CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1237 
1238   ++NumDirectives;
1239 
1240   // We are about to read a token.  For the multiple-include optimization FA to
1241   // work, we have to remember if we had read any tokens *before* this
1242   // pp-directive.
1243   bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1244 
1245   // Save the '#' token in case we need to return it later.
1246   Token SavedHash = Result;
1247 
1248   // Read the next token, the directive flavor.  This isn't expanded due to
1249   // C99 6.10.3p8.
1250   LexUnexpandedToken(Result);
1251 
1252   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1253   //   #define A(x) #x
1254   //   A(abc
1255   //     #warning blah
1256   //   def)
1257   // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1258   // not support this for #include-like directives, since that can result in
1259   // terrible diagnostics, and does not work in GCC.
1260   if (InMacroArgs) {
1261     if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1262       switch (II->getPPKeywordID()) {
1263       case tok::pp_include:
1264       case tok::pp_import:
1265       case tok::pp_include_next:
1266       case tok::pp___include_macros:
1267       case tok::pp_pragma:
1268       case tok::pp_embed:
1269         Diag(Result, diag::err_embedded_directive) << II->getName();
1270         Diag(*ArgMacro, diag::note_macro_expansion_here)
1271             << ArgMacro->getIdentifierInfo();
1272         DiscardUntilEndOfDirective();
1273         return;
1274       default:
1275         break;
1276       }
1277     }
1278     Diag(Result, diag::ext_embedded_directive);
1279   }
1280 
1281   // Temporarily enable macro expansion if set so
1282   // and reset to previous state when returning from this function.
1283   ResetMacroExpansionHelper helper(this);
1284 
1285   if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1286     return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1287 
1288   switch (Result.getKind()) {
1289   case tok::eod:
1290     // Ignore the null directive with regards to the multiple-include
1291     // optimization, i.e. allow the null directive to appear outside of the
1292     // include guard and still enable the multiple-include optimization.
1293     CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1294     return;   // null directive.
1295   case tok::code_completion:
1296     setCodeCompletionReached();
1297     if (CodeComplete)
1298       CodeComplete->CodeCompleteDirective(
1299                                     CurPPLexer->getConditionalStackDepth() > 0);
1300     return;
1301   case tok::numeric_constant:  // # 7  GNU line marker directive.
1302     // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1303     // directive. However do permit it in the predefines file, as we use line
1304     // markers to mark the builtin macros as being in a system header.
1305     if (getLangOpts().AsmPreprocessor &&
1306         SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1307       break;
1308     return HandleDigitDirective(Result);
1309   default:
1310     IdentifierInfo *II = Result.getIdentifierInfo();
1311     if (!II) break; // Not an identifier.
1312 
1313     // Ask what the preprocessor keyword ID is.
1314     switch (II->getPPKeywordID()) {
1315     default: break;
1316     // C99 6.10.1 - Conditional Inclusion.
1317     case tok::pp_if:
1318       return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1319     case tok::pp_ifdef:
1320       return HandleIfdefDirective(Result, SavedHash, false,
1321                                   true /*not valid for miopt*/);
1322     case tok::pp_ifndef:
1323       return HandleIfdefDirective(Result, SavedHash, true,
1324                                   ReadAnyTokensBeforeDirective);
1325     case tok::pp_elif:
1326     case tok::pp_elifdef:
1327     case tok::pp_elifndef:
1328       return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1329 
1330     case tok::pp_else:
1331       return HandleElseDirective(Result, SavedHash);
1332     case tok::pp_endif:
1333       return HandleEndifDirective(Result);
1334 
1335     // C99 6.10.2 - Source File Inclusion.
1336     case tok::pp_include:
1337       // Handle #include.
1338       return HandleIncludeDirective(SavedHash.getLocation(), Result);
1339     case tok::pp___include_macros:
1340       // Handle -imacros.
1341       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1342 
1343     // C99 6.10.3 - Macro Replacement.
1344     case tok::pp_define:
1345       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1346     case tok::pp_undef:
1347       return HandleUndefDirective();
1348 
1349     // C99 6.10.4 - Line Control.
1350     case tok::pp_line:
1351       return HandleLineDirective();
1352 
1353     // C99 6.10.5 - Error Directive.
1354     case tok::pp_error:
1355       return HandleUserDiagnosticDirective(Result, false);
1356 
1357     // C99 6.10.6 - Pragma Directive.
1358     case tok::pp_pragma:
1359       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1360 
1361     // GNU Extensions.
1362     case tok::pp_import:
1363       return HandleImportDirective(SavedHash.getLocation(), Result);
1364     case tok::pp_include_next:
1365       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1366 
1367     case tok::pp_warning:
1368       if (LangOpts.CPlusPlus)
1369         Diag(Result, LangOpts.CPlusPlus23
1370                          ? diag::warn_cxx23_compat_warning_directive
1371                          : diag::ext_pp_warning_directive)
1372             << /*C++23*/ 1;
1373       else
1374         Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1375                                   : diag::ext_pp_warning_directive)
1376             << /*C23*/ 0;
1377 
1378       return HandleUserDiagnosticDirective(Result, true);
1379     case tok::pp_ident:
1380       return HandleIdentSCCSDirective(Result);
1381     case tok::pp_sccs:
1382       return HandleIdentSCCSDirective(Result);
1383     case tok::pp_embed:
1384       return HandleEmbedDirective(SavedHash.getLocation(), Result,
1385                                   getCurrentFileLexer()
1386                                       ? *getCurrentFileLexer()->getFileEntry()
1387                                       : static_cast<FileEntry *>(nullptr));
1388     case tok::pp_assert:
1389       //isExtension = true;  // FIXME: implement #assert
1390       break;
1391     case tok::pp_unassert:
1392       //isExtension = true;  // FIXME: implement #unassert
1393       break;
1394 
1395     case tok::pp___public_macro:
1396       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1397         return HandleMacroPublicDirective(Result);
1398       break;
1399 
1400     case tok::pp___private_macro:
1401       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1402         return HandleMacroPrivateDirective();
1403       break;
1404     }
1405     break;
1406   }
1407 
1408   // If this is a .S file, treat unknown # directives as non-preprocessor
1409   // directives.  This is important because # may be a comment or introduce
1410   // various pseudo-ops.  Just return the # token and push back the following
1411   // token to be lexed next time.
1412   if (getLangOpts().AsmPreprocessor) {
1413     auto Toks = std::make_unique<Token[]>(2);
1414     // Return the # and the token after it.
1415     Toks[0] = SavedHash;
1416     Toks[1] = Result;
1417 
1418     // If the second token is a hashhash token, then we need to translate it to
1419     // unknown so the token lexer doesn't try to perform token pasting.
1420     if (Result.is(tok::hashhash))
1421       Toks[1].setKind(tok::unknown);
1422 
1423     // Enter this token stream so that we re-lex the tokens.  Make sure to
1424     // enable macro expansion, in case the token after the # is an identifier
1425     // that is expanded.
1426     EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1427     return;
1428   }
1429 
1430   // If we reached here, the preprocessing token is not valid!
1431   // Start suggesting if a similar directive found.
1432   Diag(Result, diag::err_pp_invalid_directive) << 0;
1433 
1434   // Read the rest of the PP line.
1435   DiscardUntilEndOfDirective();
1436 
1437   // Okay, we're done parsing the directive.
1438 }
1439 
1440 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1441 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
GetLineValue(Token & DigitTok,unsigned & Val,unsigned DiagID,Preprocessor & PP,bool IsGNULineDirective=false)1442 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1443                          unsigned DiagID, Preprocessor &PP,
1444                          bool IsGNULineDirective=false) {
1445   if (DigitTok.isNot(tok::numeric_constant)) {
1446     PP.Diag(DigitTok, DiagID);
1447 
1448     if (DigitTok.isNot(tok::eod))
1449       PP.DiscardUntilEndOfDirective();
1450     return true;
1451   }
1452 
1453   SmallString<64> IntegerBuffer;
1454   IntegerBuffer.resize(DigitTok.getLength());
1455   const char *DigitTokBegin = &IntegerBuffer[0];
1456   bool Invalid = false;
1457   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1458   if (Invalid)
1459     return true;
1460 
1461   // Verify that we have a simple digit-sequence, and compute the value.  This
1462   // is always a simple digit string computed in decimal, so we do this manually
1463   // here.
1464   Val = 0;
1465   for (unsigned i = 0; i != ActualLength; ++i) {
1466     // C++1y [lex.fcon]p1:
1467     //   Optional separating single quotes in a digit-sequence are ignored
1468     if (DigitTokBegin[i] == '\'')
1469       continue;
1470 
1471     if (!isDigit(DigitTokBegin[i])) {
1472       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1473               diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1474       PP.DiscardUntilEndOfDirective();
1475       return true;
1476     }
1477 
1478     unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1479     if (NextVal < Val) { // overflow.
1480       PP.Diag(DigitTok, DiagID);
1481       PP.DiscardUntilEndOfDirective();
1482       return true;
1483     }
1484     Val = NextVal;
1485   }
1486 
1487   if (DigitTokBegin[0] == '0' && Val)
1488     PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1489       << IsGNULineDirective;
1490 
1491   return false;
1492 }
1493 
1494 /// Handle a \#line directive: C99 6.10.4.
1495 ///
1496 /// The two acceptable forms are:
1497 /// \verbatim
1498 ///   # line digit-sequence
1499 ///   # line digit-sequence "s-char-sequence"
1500 /// \endverbatim
HandleLineDirective()1501 void Preprocessor::HandleLineDirective() {
1502   // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1503   // expanded.
1504   Token DigitTok;
1505   Lex(DigitTok);
1506 
1507   // Validate the number and convert it to an unsigned.
1508   unsigned LineNo;
1509   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1510     return;
1511 
1512   if (LineNo == 0)
1513     Diag(DigitTok, diag::ext_pp_line_zero);
1514 
1515   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1516   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1517   unsigned LineLimit = 32768U;
1518   if (LangOpts.C99 || LangOpts.CPlusPlus11)
1519     LineLimit = 2147483648U;
1520   if (LineNo >= LineLimit)
1521     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1522   else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1523     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1524 
1525   int FilenameID = -1;
1526   Token StrTok;
1527   Lex(StrTok);
1528 
1529   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1530   // string followed by eod.
1531   if (StrTok.is(tok::eod))
1532     ; // ok
1533   else if (StrTok.isNot(tok::string_literal)) {
1534     Diag(StrTok, diag::err_pp_line_invalid_filename);
1535     DiscardUntilEndOfDirective();
1536     return;
1537   } else if (StrTok.hasUDSuffix()) {
1538     Diag(StrTok, diag::err_invalid_string_udl);
1539     DiscardUntilEndOfDirective();
1540     return;
1541   } else {
1542     // Parse and validate the string, converting it into a unique ID.
1543     StringLiteralParser Literal(StrTok, *this);
1544     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1545     if (Literal.hadError) {
1546       DiscardUntilEndOfDirective();
1547       return;
1548     }
1549     if (Literal.Pascal) {
1550       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1551       DiscardUntilEndOfDirective();
1552       return;
1553     }
1554     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1555 
1556     // Verify that there is nothing after the string, other than EOD.  Because
1557     // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1558     CheckEndOfDirective("line", true);
1559   }
1560 
1561   // Take the file kind of the file containing the #line directive. #line
1562   // directives are often used for generated sources from the same codebase, so
1563   // the new file should generally be classified the same way as the current
1564   // file. This is visible in GCC's pre-processed output, which rewrites #line
1565   // to GNU line markers.
1566   SrcMgr::CharacteristicKind FileKind =
1567       SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1568 
1569   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1570                         false, FileKind);
1571 
1572   if (Callbacks)
1573     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1574                            PPCallbacks::RenameFile, FileKind);
1575 }
1576 
1577 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1578 /// marker directive.
ReadLineMarkerFlags(bool & IsFileEntry,bool & IsFileExit,SrcMgr::CharacteristicKind & FileKind,Preprocessor & PP)1579 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1580                                 SrcMgr::CharacteristicKind &FileKind,
1581                                 Preprocessor &PP) {
1582   unsigned FlagVal;
1583   Token FlagTok;
1584   PP.Lex(FlagTok);
1585   if (FlagTok.is(tok::eod)) return false;
1586   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1587     return true;
1588 
1589   if (FlagVal == 1) {
1590     IsFileEntry = true;
1591 
1592     PP.Lex(FlagTok);
1593     if (FlagTok.is(tok::eod)) return false;
1594     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1595       return true;
1596   } else if (FlagVal == 2) {
1597     IsFileExit = true;
1598 
1599     SourceManager &SM = PP.getSourceManager();
1600     // If we are leaving the current presumed file, check to make sure the
1601     // presumed include stack isn't empty!
1602     FileID CurFileID =
1603       SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1604     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1605     if (PLoc.isInvalid())
1606       return true;
1607 
1608     // If there is no include loc (main file) or if the include loc is in a
1609     // different physical file, then we aren't in a "1" line marker flag region.
1610     SourceLocation IncLoc = PLoc.getIncludeLoc();
1611     if (IncLoc.isInvalid() ||
1612         SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1613       PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1614       PP.DiscardUntilEndOfDirective();
1615       return true;
1616     }
1617 
1618     PP.Lex(FlagTok);
1619     if (FlagTok.is(tok::eod)) return false;
1620     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1621       return true;
1622   }
1623 
1624   // We must have 3 if there are still flags.
1625   if (FlagVal != 3) {
1626     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1627     PP.DiscardUntilEndOfDirective();
1628     return true;
1629   }
1630 
1631   FileKind = SrcMgr::C_System;
1632 
1633   PP.Lex(FlagTok);
1634   if (FlagTok.is(tok::eod)) return false;
1635   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1636     return true;
1637 
1638   // We must have 4 if there is yet another flag.
1639   if (FlagVal != 4) {
1640     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1641     PP.DiscardUntilEndOfDirective();
1642     return true;
1643   }
1644 
1645   FileKind = SrcMgr::C_ExternCSystem;
1646 
1647   PP.Lex(FlagTok);
1648   if (FlagTok.is(tok::eod)) return false;
1649 
1650   // There are no more valid flags here.
1651   PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1652   PP.DiscardUntilEndOfDirective();
1653   return true;
1654 }
1655 
1656 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1657 /// one of the following forms:
1658 ///
1659 ///     # 42
1660 ///     # 42 "file" ('1' | '2')?
1661 ///     # 42 "file" ('1' | '2')? '3' '4'?
1662 ///
HandleDigitDirective(Token & DigitTok)1663 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1664   // Validate the number and convert it to an unsigned.  GNU does not have a
1665   // line # limit other than it fit in 32-bits.
1666   unsigned LineNo;
1667   if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1668                    *this, true))
1669     return;
1670 
1671   Token StrTok;
1672   Lex(StrTok);
1673 
1674   bool IsFileEntry = false, IsFileExit = false;
1675   int FilenameID = -1;
1676   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1677 
1678   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1679   // string followed by eod.
1680   if (StrTok.is(tok::eod)) {
1681     Diag(StrTok, diag::ext_pp_gnu_line_directive);
1682     // Treat this like "#line NN", which doesn't change file characteristics.
1683     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1684   } else if (StrTok.isNot(tok::string_literal)) {
1685     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1686     DiscardUntilEndOfDirective();
1687     return;
1688   } else if (StrTok.hasUDSuffix()) {
1689     Diag(StrTok, diag::err_invalid_string_udl);
1690     DiscardUntilEndOfDirective();
1691     return;
1692   } else {
1693     // Parse and validate the string, converting it into a unique ID.
1694     StringLiteralParser Literal(StrTok, *this);
1695     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1696     if (Literal.hadError) {
1697       DiscardUntilEndOfDirective();
1698       return;
1699     }
1700     if (Literal.Pascal) {
1701       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1702       DiscardUntilEndOfDirective();
1703       return;
1704     }
1705 
1706     // If a filename was present, read any flags that are present.
1707     if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1708       return;
1709     if (!SourceMgr.isInPredefinedFile(DigitTok.getLocation()))
1710       Diag(StrTok, diag::ext_pp_gnu_line_directive);
1711 
1712     // Exiting to an empty string means pop to the including file, so leave
1713     // FilenameID as -1 in that case.
1714     if (!(IsFileExit && Literal.GetString().empty()))
1715       FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1716   }
1717 
1718   // Create a line note with this information.
1719   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1720                         IsFileExit, FileKind);
1721 
1722   // If the preprocessor has callbacks installed, notify them of the #line
1723   // change.  This is used so that the line marker comes out in -E mode for
1724   // example.
1725   if (Callbacks) {
1726     PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1727     if (IsFileEntry)
1728       Reason = PPCallbacks::EnterFile;
1729     else if (IsFileExit)
1730       Reason = PPCallbacks::ExitFile;
1731 
1732     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1733   }
1734 }
1735 
1736 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1737 ///
HandleUserDiagnosticDirective(Token & Tok,bool isWarning)1738 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1739                                                  bool isWarning) {
1740   // Read the rest of the line raw.  We do this because we don't want macros
1741   // to be expanded and we don't require that the tokens be valid preprocessing
1742   // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1743   // collapse multiple consecutive white space between tokens, but this isn't
1744   // specified by the standard.
1745   SmallString<128> Message;
1746   CurLexer->ReadToEndOfLine(&Message);
1747 
1748   // Find the first non-whitespace character, so that we can make the
1749   // diagnostic more succinct.
1750   StringRef Msg = Message.str().ltrim(' ');
1751 
1752   if (isWarning)
1753     Diag(Tok, diag::pp_hash_warning) << Msg;
1754   else
1755     Diag(Tok, diag::err_pp_hash_error) << Msg;
1756 }
1757 
1758 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1759 ///
HandleIdentSCCSDirective(Token & Tok)1760 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1761   // Yes, this directive is an extension.
1762   Diag(Tok, diag::ext_pp_ident_directive);
1763 
1764   // Read the string argument.
1765   Token StrTok;
1766   Lex(StrTok);
1767 
1768   // If the token kind isn't a string, it's a malformed directive.
1769   if (StrTok.isNot(tok::string_literal) &&
1770       StrTok.isNot(tok::wide_string_literal)) {
1771     Diag(StrTok, diag::err_pp_malformed_ident);
1772     if (StrTok.isNot(tok::eod))
1773       DiscardUntilEndOfDirective();
1774     return;
1775   }
1776 
1777   if (StrTok.hasUDSuffix()) {
1778     Diag(StrTok, diag::err_invalid_string_udl);
1779     DiscardUntilEndOfDirective();
1780     return;
1781   }
1782 
1783   // Verify that there is nothing after the string, other than EOD.
1784   CheckEndOfDirective("ident");
1785 
1786   if (Callbacks) {
1787     bool Invalid = false;
1788     std::string Str = getSpelling(StrTok, &Invalid);
1789     if (!Invalid)
1790       Callbacks->Ident(Tok.getLocation(), Str);
1791   }
1792 }
1793 
1794 /// Handle a #public directive.
HandleMacroPublicDirective(Token & Tok)1795 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1796   Token MacroNameTok;
1797   ReadMacroName(MacroNameTok, MU_Undef);
1798 
1799   // Error reading macro name?  If so, diagnostic already issued.
1800   if (MacroNameTok.is(tok::eod))
1801     return;
1802 
1803   // Check to see if this is the last token on the #__public_macro line.
1804   CheckEndOfDirective("__public_macro");
1805 
1806   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1807   // Okay, we finally have a valid identifier to undef.
1808   MacroDirective *MD = getLocalMacroDirective(II);
1809 
1810   // If the macro is not defined, this is an error.
1811   if (!MD) {
1812     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1813     return;
1814   }
1815 
1816   // Note that this macro has now been exported.
1817   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1818                                 MacroNameTok.getLocation(), /*isPublic=*/true));
1819 }
1820 
1821 /// Handle a #private directive.
HandleMacroPrivateDirective()1822 void Preprocessor::HandleMacroPrivateDirective() {
1823   Token MacroNameTok;
1824   ReadMacroName(MacroNameTok, MU_Undef);
1825 
1826   // Error reading macro name?  If so, diagnostic already issued.
1827   if (MacroNameTok.is(tok::eod))
1828     return;
1829 
1830   // Check to see if this is the last token on the #__private_macro line.
1831   CheckEndOfDirective("__private_macro");
1832 
1833   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1834   // Okay, we finally have a valid identifier to undef.
1835   MacroDirective *MD = getLocalMacroDirective(II);
1836 
1837   // If the macro is not defined, this is an error.
1838   if (!MD) {
1839     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1840     return;
1841   }
1842 
1843   // Note that this macro has now been marked private.
1844   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1845                                MacroNameTok.getLocation(), /*isPublic=*/false));
1846 }
1847 
1848 //===----------------------------------------------------------------------===//
1849 // Preprocessor Include Directive Handling.
1850 //===----------------------------------------------------------------------===//
1851 
1852 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1853 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1854 /// true if the input filename was in <>'s or false if it were in ""'s.  The
1855 /// caller is expected to provide a buffer that is large enough to hold the
1856 /// spelling of the filename, but is also expected to handle the case when
1857 /// this method decides to use a different buffer.
GetIncludeFilenameSpelling(SourceLocation Loc,StringRef & Buffer)1858 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1859                                               StringRef &Buffer) {
1860   // Get the text form of the filename.
1861   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1862 
1863   // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1864   // C++20 [lex.header]/2:
1865   //
1866   // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1867   //   in C: behavior is undefined
1868   //   in C++: program is conditionally-supported with implementation-defined
1869   //           semantics
1870 
1871   // Make sure the filename is <x> or "x".
1872   bool isAngled;
1873   if (Buffer[0] == '<') {
1874     if (Buffer.back() != '>') {
1875       Diag(Loc, diag::err_pp_expects_filename);
1876       Buffer = StringRef();
1877       return true;
1878     }
1879     isAngled = true;
1880   } else if (Buffer[0] == '"') {
1881     if (Buffer.back() != '"') {
1882       Diag(Loc, diag::err_pp_expects_filename);
1883       Buffer = StringRef();
1884       return true;
1885     }
1886     isAngled = false;
1887   } else {
1888     Diag(Loc, diag::err_pp_expects_filename);
1889     Buffer = StringRef();
1890     return true;
1891   }
1892 
1893   // Diagnose #include "" as invalid.
1894   if (Buffer.size() <= 2) {
1895     Diag(Loc, diag::err_pp_empty_filename);
1896     Buffer = StringRef();
1897     return true;
1898   }
1899 
1900   // Skip the brackets.
1901   Buffer = Buffer.substr(1, Buffer.size()-2);
1902   return isAngled;
1903 }
1904 
1905 /// Push a token onto the token stream containing an annotation.
EnterAnnotationToken(SourceRange Range,tok::TokenKind Kind,void * AnnotationVal)1906 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1907                                         tok::TokenKind Kind,
1908                                         void *AnnotationVal) {
1909   // FIXME: Produce this as the current token directly, rather than
1910   // allocating a new token for it.
1911   auto Tok = std::make_unique<Token[]>(1);
1912   Tok[0].startToken();
1913   Tok[0].setKind(Kind);
1914   Tok[0].setLocation(Range.getBegin());
1915   Tok[0].setAnnotationEndLoc(Range.getEnd());
1916   Tok[0].setAnnotationValue(AnnotationVal);
1917   EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1918 }
1919 
1920 /// Produce a diagnostic informing the user that a #include or similar
1921 /// was implicitly treated as a module import.
diagnoseAutoModuleImport(Preprocessor & PP,SourceLocation HashLoc,Token & IncludeTok,ArrayRef<IdentifierLoc> Path,SourceLocation PathEnd)1922 static void diagnoseAutoModuleImport(Preprocessor &PP, SourceLocation HashLoc,
1923                                      Token &IncludeTok,
1924                                      ArrayRef<IdentifierLoc> Path,
1925                                      SourceLocation PathEnd) {
1926   SmallString<128> PathString;
1927   for (size_t I = 0, N = Path.size(); I != N; ++I) {
1928     if (I)
1929       PathString += '.';
1930     PathString += Path[I].getIdentifierInfo()->getName();
1931   }
1932 
1933   int IncludeKind = 0;
1934   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1935   case tok::pp_include:
1936     IncludeKind = 0;
1937     break;
1938 
1939   case tok::pp_import:
1940     IncludeKind = 1;
1941     break;
1942 
1943   case tok::pp_include_next:
1944     IncludeKind = 2;
1945     break;
1946 
1947   case tok::pp___include_macros:
1948     IncludeKind = 3;
1949     break;
1950 
1951   default:
1952     llvm_unreachable("unknown include directive kind");
1953   }
1954 
1955   PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1956       << IncludeKind << PathString;
1957 }
1958 
1959 // Given a vector of path components and a string containing the real
1960 // path to the file, build a properly-cased replacement in the vector,
1961 // and return true if the replacement should be suggested.
trySimplifyPath(SmallVectorImpl<StringRef> & Components,StringRef RealPathName,llvm::sys::path::Style Separator)1962 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1963                             StringRef RealPathName,
1964                             llvm::sys::path::Style Separator) {
1965   auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1966   auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1967   int Cnt = 0;
1968   bool SuggestReplacement = false;
1969 
1970   auto IsSep = [Separator](StringRef Component) {
1971     return Component.size() == 1 &&
1972            llvm::sys::path::is_separator(Component[0], Separator);
1973   };
1974 
1975   // Below is a best-effort to handle ".." in paths. It is admittedly
1976   // not 100% correct in the presence of symlinks.
1977   for (auto &Component : llvm::reverse(Components)) {
1978     if ("." == Component) {
1979     } else if (".." == Component) {
1980       ++Cnt;
1981     } else if (Cnt) {
1982       --Cnt;
1983     } else if (RealPathComponentIter != RealPathComponentEnd) {
1984       if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1985           Component != *RealPathComponentIter) {
1986         // If these non-separator path components differ by more than just case,
1987         // then we may be looking at symlinked paths. Bail on this diagnostic to
1988         // avoid noisy false positives.
1989         SuggestReplacement =
1990             RealPathComponentIter->equals_insensitive(Component);
1991         if (!SuggestReplacement)
1992           break;
1993         Component = *RealPathComponentIter;
1994       }
1995       ++RealPathComponentIter;
1996     }
1997   }
1998   return SuggestReplacement;
1999 }
2000 
checkModuleIsAvailable(const LangOptions & LangOpts,const TargetInfo & TargetInfo,const Module & M,DiagnosticsEngine & Diags)2001 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
2002                                           const TargetInfo &TargetInfo,
2003                                           const Module &M,
2004                                           DiagnosticsEngine &Diags) {
2005   Module::Requirement Requirement;
2006   Module::UnresolvedHeaderDirective MissingHeader;
2007   Module *ShadowingModule = nullptr;
2008   if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
2009                     ShadowingModule))
2010     return false;
2011 
2012   if (MissingHeader.FileNameLoc.isValid()) {
2013     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
2014         << MissingHeader.IsUmbrella << MissingHeader.FileName;
2015   } else if (ShadowingModule) {
2016     Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
2017     Diags.Report(ShadowingModule->DefinitionLoc,
2018                  diag::note_previous_definition);
2019   } else {
2020     // FIXME: Track the location at which the requirement was specified, and
2021     // use it here.
2022     Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
2023         << M.getFullModuleName() << Requirement.RequiredState
2024         << Requirement.FeatureName;
2025   }
2026   return true;
2027 }
2028 
2029 std::pair<ConstSearchDirIterator, const FileEntry *>
getIncludeNextStart(const Token & IncludeNextTok) const2030 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2031   // #include_next is like #include, except that we start searching after
2032   // the current found directory.  If we can't do this, issue a
2033   // diagnostic.
2034   ConstSearchDirIterator Lookup = CurDirLookup;
2035   const FileEntry *LookupFromFile = nullptr;
2036 
2037   if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2038     // If the main file is a header, then it's either for PCH/AST generation,
2039     // or libclang opened it. Either way, handle it as a normal include below
2040     // and do not complain about include_next.
2041   } else if (isInPrimaryFile()) {
2042     Lookup = nullptr;
2043     Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2044   } else if (CurLexerSubmodule) {
2045     // Start looking up in the directory *after* the one in which the current
2046     // file would be found, if any.
2047     assert(CurPPLexer && "#include_next directive in macro?");
2048     if (auto FE = CurPPLexer->getFileEntry())
2049       LookupFromFile = *FE;
2050     Lookup = nullptr;
2051   } else if (!Lookup) {
2052     // The current file was not found by walking the include path. Either it
2053     // is the primary file (handled above), or it was found by absolute path,
2054     // or it was found relative to such a file.
2055     // FIXME: Track enough information so we know which case we're in.
2056     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2057   } else {
2058     // Start looking up in the next directory.
2059     ++Lookup;
2060   }
2061 
2062   return {Lookup, LookupFromFile};
2063 }
2064 
2065 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
2066 /// the file to be included from the lexer, then include it!  This is a common
2067 /// routine with functionality shared between \#include, \#include_next and
2068 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
2069 /// specifies the file to start searching from.
HandleIncludeDirective(SourceLocation HashLoc,Token & IncludeTok,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2070 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2071                                           Token &IncludeTok,
2072                                           ConstSearchDirIterator LookupFrom,
2073                                           const FileEntry *LookupFromFile) {
2074   Token FilenameTok;
2075   if (LexHeaderName(FilenameTok))
2076     return;
2077 
2078   if (FilenameTok.isNot(tok::header_name)) {
2079     if (FilenameTok.is(tok::identifier) && PPOpts.SingleFileParseMode) {
2080       // If we saw #include IDENTIFIER and lexing didn't turn in into a header
2081       // name, it was undefined. In 'single-file-parse' mode, just skip the
2082       // directive without emitting diagnostics - the identifier might be
2083       // normally defined in previously-skipped include directive.
2084       DiscardUntilEndOfDirective();
2085       return;
2086     }
2087 
2088     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
2089     if (FilenameTok.isNot(tok::eod))
2090       DiscardUntilEndOfDirective();
2091     return;
2092   }
2093 
2094   // Verify that there is nothing after the filename, other than EOD.  Note
2095   // that we allow macros that expand to nothing after the filename, because
2096   // this falls into the category of "#include pp-tokens new-line" specified
2097   // in C99 6.10.2p4.
2098   SourceLocation EndLoc =
2099       CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
2100 
2101   auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2102                                             EndLoc, LookupFrom, LookupFromFile);
2103   switch (Action.Kind) {
2104   case ImportAction::None:
2105   case ImportAction::SkippedModuleImport:
2106     break;
2107   case ImportAction::ModuleBegin:
2108     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2109                          tok::annot_module_begin, Action.ModuleForHeader);
2110     break;
2111   case ImportAction::HeaderUnitImport:
2112     EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2113                          Action.ModuleForHeader);
2114     break;
2115   case ImportAction::ModuleImport:
2116     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2117                          tok::annot_module_include, Action.ModuleForHeader);
2118     break;
2119   case ImportAction::Failure:
2120     assert(TheModuleLoader.HadFatalFailure &&
2121            "This should be an early exit only to a fatal error");
2122     TheModuleLoader.HadFatalFailure = true;
2123     IncludeTok.setKind(tok::eof);
2124     CurLexer->cutOffLexing();
2125     return;
2126   }
2127 }
2128 
LookupHeaderIncludeOrImport(ConstSearchDirIterator * CurDir,StringRef & Filename,SourceLocation FilenameLoc,CharSourceRange FilenameRange,const Token & FilenameTok,bool & IsFrameworkFound,bool IsImportDecl,bool & IsMapped,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile,StringRef & LookupFilename,SmallVectorImpl<char> & RelativePath,SmallVectorImpl<char> & SearchPath,ModuleMap::KnownHeader & SuggestedModule,bool isAngled)2129 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2130     ConstSearchDirIterator *CurDir, StringRef &Filename,
2131     SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2132     const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2133     bool &IsMapped, ConstSearchDirIterator LookupFrom,
2134     const FileEntry *LookupFromFile, StringRef &LookupFilename,
2135     SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2136     ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2137   auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2138     if (LangOpts.AsmPreprocessor)
2139       return;
2140 
2141     Module *RequestingModule = getModuleForLocation(
2142         FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2143     bool RequestingModuleIsModuleInterface =
2144         !SourceMgr.isInMainFile(FilenameLoc);
2145 
2146     HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2147         RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2148         Filename, FE);
2149   };
2150 
2151   OptionalFileEntryRef File = LookupFile(
2152       FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2153       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2154       &SuggestedModule, &IsMapped, &IsFrameworkFound);
2155   if (File) {
2156     DiagnoseHeaderInclusion(*File);
2157     return File;
2158   }
2159 
2160   // Give the clients a chance to silently skip this include.
2161   if (Callbacks && Callbacks->FileNotFound(Filename))
2162     return std::nullopt;
2163 
2164   if (SuppressIncludeNotFoundError)
2165     return std::nullopt;
2166 
2167   // If the file could not be located and it was included via angle
2168   // brackets, we can attempt a lookup as though it were a quoted path to
2169   // provide the user with a possible fixit.
2170   if (isAngled) {
2171     OptionalFileEntryRef File = LookupFile(
2172         FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2173         Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2174         &SuggestedModule, &IsMapped,
2175         /*IsFrameworkFound=*/nullptr);
2176     if (File) {
2177       DiagnoseHeaderInclusion(*File);
2178       Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2179           << Filename << IsImportDecl
2180           << FixItHint::CreateReplacement(FilenameRange,
2181                                           "\"" + Filename.str() + "\"");
2182       return File;
2183     }
2184   }
2185 
2186   // Check for likely typos due to leading or trailing non-isAlphanumeric
2187   // characters
2188   StringRef OriginalFilename = Filename;
2189   if (LangOpts.SpellChecking) {
2190     // A heuristic to correct a typo file name by removing leading and
2191     // trailing non-isAlphanumeric characters.
2192     auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2193       Filename = Filename.drop_until(isAlphanumeric);
2194       while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2195         Filename = Filename.drop_back();
2196       }
2197       return Filename;
2198     };
2199     StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2200     StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2201 
2202     OptionalFileEntryRef File = LookupFile(
2203         FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2204         LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2205         Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2206         /*IsFrameworkFound=*/nullptr);
2207     if (File) {
2208       DiagnoseHeaderInclusion(*File);
2209       auto Hint =
2210           isAngled ? FixItHint::CreateReplacement(
2211                          FilenameRange, "<" + TypoCorrectionName.str() + ">")
2212                    : FixItHint::CreateReplacement(
2213                          FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2214       Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2215           << OriginalFilename << TypoCorrectionName << Hint;
2216       // We found the file, so set the Filename to the name after typo
2217       // correction.
2218       Filename = TypoCorrectionName;
2219       LookupFilename = TypoCorrectionLookupName;
2220       return File;
2221     }
2222   }
2223 
2224   // If the file is still not found, just go with the vanilla diagnostic
2225   assert(!File && "expected missing file");
2226   Diag(FilenameTok, diag::err_pp_file_not_found)
2227       << OriginalFilename << FilenameRange;
2228   if (IsFrameworkFound) {
2229     size_t SlashPos = OriginalFilename.find('/');
2230     assert(SlashPos != StringRef::npos &&
2231            "Include with framework name should have '/' in the filename");
2232     StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2233     FrameworkCacheEntry &CacheEntry =
2234         HeaderInfo.LookupFrameworkCache(FrameworkName);
2235     assert(CacheEntry.Directory && "Found framework should be in cache");
2236     Diag(FilenameTok, diag::note_pp_framework_without_header)
2237         << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2238         << CacheEntry.Directory->getName();
2239   }
2240 
2241   return std::nullopt;
2242 }
2243 
2244 /// Handle either a #include-like directive or an import declaration that names
2245 /// a header file.
2246 ///
2247 /// \param HashLoc The location of the '#' token for an include, or
2248 ///        SourceLocation() for an import declaration.
2249 /// \param IncludeTok The include / include_next / import token.
2250 /// \param FilenameTok The header-name token.
2251 /// \param EndLoc The location at which any imported macros become visible.
2252 /// \param LookupFrom For #include_next, the starting directory for the
2253 ///        directory lookup.
2254 /// \param LookupFromFile For #include_next, the starting file for the directory
2255 ///        lookup.
HandleHeaderIncludeOrImport(SourceLocation HashLoc,Token & IncludeTok,Token & FilenameTok,SourceLocation EndLoc,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2256 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2257     SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2258     SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2259     const FileEntry *LookupFromFile) {
2260   SmallString<128> FilenameBuffer;
2261   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2262   SourceLocation CharEnd = FilenameTok.getEndLoc();
2263 
2264   CharSourceRange FilenameRange
2265     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2266   StringRef OriginalFilename = Filename;
2267   bool isAngled =
2268     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2269 
2270   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2271   // error.
2272   if (Filename.empty())
2273     return {ImportAction::None};
2274 
2275   bool IsImportDecl = HashLoc.isInvalid();
2276   SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2277 
2278   // Complain about attempts to #include files in an audit pragma.
2279   if (PragmaARCCFCodeAuditedInfo.getLoc().isValid()) {
2280     Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2281     Diag(PragmaARCCFCodeAuditedInfo.getLoc(), diag::note_pragma_entered_here);
2282 
2283     // Immediately leave the pragma.
2284     PragmaARCCFCodeAuditedInfo = IdentifierLoc();
2285   }
2286 
2287   // Complain about attempts to #include files in an assume-nonnull pragma.
2288   if (PragmaAssumeNonNullLoc.isValid()) {
2289     Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2290     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2291 
2292     // Immediately leave the pragma.
2293     PragmaAssumeNonNullLoc = SourceLocation();
2294   }
2295 
2296   if (HeaderInfo.HasIncludeAliasMap()) {
2297     // Map the filename with the brackets still attached.  If the name doesn't
2298     // map to anything, fall back on the filename we've already gotten the
2299     // spelling for.
2300     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2301     if (!NewName.empty())
2302       Filename = NewName;
2303   }
2304 
2305   // Search include directories.
2306   bool IsMapped = false;
2307   bool IsFrameworkFound = false;
2308   ConstSearchDirIterator CurDir = nullptr;
2309   SmallString<1024> SearchPath;
2310   SmallString<1024> RelativePath;
2311   // We get the raw path only if we have 'Callbacks' to which we later pass
2312   // the path.
2313   ModuleMap::KnownHeader SuggestedModule;
2314   SourceLocation FilenameLoc = FilenameTok.getLocation();
2315   StringRef LookupFilename = Filename;
2316 
2317   // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2318   // is unnecessary on Windows since the filesystem there handles backslashes.
2319   SmallString<128> NormalizedPath;
2320   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2321   if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2322     NormalizedPath = Filename.str();
2323     llvm::sys::path::native(NormalizedPath);
2324     LookupFilename = NormalizedPath;
2325     BackslashStyle = llvm::sys::path::Style::windows;
2326   }
2327 
2328   OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2329       &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2330       IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2331       LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2332 
2333   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2334     if (File && isPCHThroughHeader(&File->getFileEntry()))
2335       SkippingUntilPCHThroughHeader = false;
2336     return {ImportAction::None};
2337   }
2338 
2339   // Should we enter the source file? Set to Skip if either the source file is
2340   // known to have no effect beyond its effect on module visibility -- that is,
2341   // if it's got an include guard that is already defined, set to Import if it
2342   // is a modular header we've already built and should import.
2343 
2344   // For C++20 Modules
2345   // [cpp.include]/7 If the header identified by the header-name denotes an
2346   // importable header, it is implementation-defined whether the #include
2347   // preprocessing directive is instead replaced by an import directive.
2348   // For this implementation, the translation is permitted when we are parsing
2349   // the Global Module Fragment, and not otherwise (the cases where it would be
2350   // valid to replace an include with an import are highly constrained once in
2351   // named module purview; this choice avoids considerable complexity in
2352   // determining valid cases).
2353 
2354   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2355 
2356   if (PPOpts.SingleFileParseMode)
2357     Action = IncludeLimitReached;
2358 
2359   // If we've reached the max allowed include depth, it is usually due to an
2360   // include cycle. Don't enter already processed files again as it can lead to
2361   // reaching the max allowed include depth again.
2362   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2363       alreadyIncluded(*File))
2364     Action = IncludeLimitReached;
2365 
2366   // FIXME: We do not have a good way to disambiguate C++ clang modules from
2367   // C++ standard modules (other than use/non-use of Header Units).
2368 
2369   Module *ModuleToImport = SuggestedModule.getModule();
2370 
2371   bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2372                                !ModuleToImport->isForBuilding(getLangOpts());
2373 
2374   // Maybe a usable Header Unit
2375   bool UsableHeaderUnit = false;
2376   if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2377       ModuleToImport->isHeaderUnit()) {
2378     if (TrackGMFState.inGMF() || IsImportDecl)
2379       UsableHeaderUnit = true;
2380     else if (!IsImportDecl) {
2381       // This is a Header Unit that we do not include-translate
2382       ModuleToImport = nullptr;
2383     }
2384   }
2385   // Maybe a usable clang header module.
2386   bool UsableClangHeaderModule =
2387       (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2388       ModuleToImport && !ModuleToImport->isHeaderUnit();
2389 
2390   // Determine whether we should try to import the module for this #include, if
2391   // there is one. Don't do so if precompiled module support is disabled or we
2392   // are processing this module textually (because we're building the module).
2393   if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2394     // If this include corresponds to a module but that module is
2395     // unavailable, diagnose the situation and bail out.
2396     // FIXME: Remove this; loadModule does the same check (but produces
2397     // slightly worse diagnostics).
2398     if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport,
2399                                getDiagnostics())) {
2400       Diag(FilenameTok.getLocation(),
2401            diag::note_implicit_top_level_module_import_here)
2402           << ModuleToImport->getTopLevelModuleName();
2403       return {ImportAction::None};
2404     }
2405 
2406     // Compute the module access path corresponding to this module.
2407     // FIXME: Should we have a second loadModule() overload to avoid this
2408     // extra lookup step?
2409     SmallVector<IdentifierLoc, 2> Path;
2410     for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2411       Path.emplace_back(FilenameTok.getLocation(),
2412                         getIdentifierInfo(Mod->Name));
2413     std::reverse(Path.begin(), Path.end());
2414 
2415     // Warn that we're replacing the include/import with a module import.
2416     if (!IsImportDecl)
2417       diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2418 
2419     // Load the module to import its macros. We'll make the declarations
2420     // visible when the parser gets here.
2421     // FIXME: Pass ModuleToImport in here rather than converting it to a path
2422     // and making the module loader convert it back again.
2423     ModuleLoadResult Imported = TheModuleLoader.loadModule(
2424         IncludeTok.getLocation(), Path, Module::Hidden,
2425         /*IsInclusionDirective=*/true);
2426     assert((Imported == nullptr || Imported == ModuleToImport) &&
2427            "the imported module is different than the suggested one");
2428 
2429     if (Imported) {
2430       Action = Import;
2431     } else if (Imported.isMissingExpected()) {
2432       markClangModuleAsAffecting(
2433           static_cast<Module *>(Imported)->getTopLevelModule());
2434       // We failed to find a submodule that we assumed would exist (because it
2435       // was in the directory of an umbrella header, for instance), but no
2436       // actual module containing it exists (because the umbrella header is
2437       // incomplete).  Treat this as a textual inclusion.
2438       ModuleToImport = nullptr;
2439     } else if (Imported.isConfigMismatch()) {
2440       // On a configuration mismatch, enter the header textually. We still know
2441       // that it's part of the corresponding module.
2442     } else {
2443       // We hit an error processing the import. Bail out.
2444       if (hadModuleLoaderFatalFailure()) {
2445         // With a fatal failure in the module loader, we abort parsing.
2446         Token &Result = IncludeTok;
2447         assert(CurLexer && "#include but no current lexer set!");
2448         Result.startToken();
2449         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2450         CurLexer->cutOffLexing();
2451       }
2452       return {ImportAction::None};
2453     }
2454   }
2455 
2456   // The #included file will be considered to be a system header if either it is
2457   // in a system include directory, or if the #includer is a system include
2458   // header.
2459   SrcMgr::CharacteristicKind FileCharacter =
2460       SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2461   if (File)
2462     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2463 
2464   // If this is a '#import' or an import-declaration, don't re-enter the file.
2465   //
2466   // FIXME: If we have a suggested module for a '#include', and we've already
2467   // visited this file, don't bother entering it again. We know it has no
2468   // further effect.
2469   bool EnterOnce =
2470       IsImportDecl ||
2471       IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2472 
2473   bool IsFirstIncludeOfFile = false;
2474 
2475   // Ask HeaderInfo if we should enter this #include file.  If not, #including
2476   // this file will have no effect.
2477   if (Action == Enter && File &&
2478       !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2479                                          getLangOpts().Modules, ModuleToImport,
2480                                          IsFirstIncludeOfFile)) {
2481     // C++ standard modules:
2482     // If we are not in the GMF, then we textually include only
2483     // clang modules:
2484     // Even if we've already preprocessed this header once and know that we
2485     // don't need to see its contents again, we still need to import it if it's
2486     // modular because we might not have imported it from this submodule before.
2487     //
2488     // FIXME: We don't do this when compiling a PCH because the AST
2489     // serialization layer can't cope with it. This means we get local
2490     // submodule visibility semantics wrong in that case.
2491     if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2492       Action = TrackGMFState.inGMF() ? Import : Skip;
2493     else
2494       Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2495   }
2496 
2497   // Check for circular inclusion of the main file.
2498   // We can't generate a consistent preamble with regard to the conditional
2499   // stack if the main file is included again as due to the preamble bounds
2500   // some directives (e.g. #endif of a header guard) will never be seen.
2501   // Since this will lead to confusing errors, avoid the inclusion.
2502   if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2503       SourceMgr.isMainFile(File->getFileEntry())) {
2504     Diag(FilenameTok.getLocation(),
2505          diag::err_pp_including_mainfile_in_preamble);
2506     return {ImportAction::None};
2507   }
2508 
2509   if (Callbacks && !IsImportDecl) {
2510     // Notify the callback object that we've seen an inclusion directive.
2511     // FIXME: Use a different callback for a pp-import?
2512     Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2513                                   FilenameRange, File, SearchPath, RelativePath,
2514                                   SuggestedModule.getModule(), Action == Import,
2515                                   FileCharacter);
2516     if (Action == Skip && File)
2517       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2518   }
2519 
2520   if (!File)
2521     return {ImportAction::None};
2522 
2523   // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2524   // module corresponding to the named header.
2525   if (IsImportDecl && !ModuleToImport) {
2526     Diag(FilenameTok, diag::err_header_import_not_header_unit)
2527       << OriginalFilename << File->getName();
2528     return {ImportAction::None};
2529   }
2530 
2531   // Issue a diagnostic if the name of the file on disk has a different case
2532   // than the one we're about to open.
2533   const bool CheckIncludePathPortability =
2534       !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2535 
2536   if (CheckIncludePathPortability) {
2537     StringRef Name = LookupFilename;
2538     StringRef NameWithoriginalSlashes = Filename;
2539 #if defined(_WIN32)
2540     // Skip UNC prefix if present. (tryGetRealPathName() always
2541     // returns a path with the prefix skipped.)
2542     bool NameWasUNC = Name.consume_front("\\\\?\\");
2543     NameWithoriginalSlashes.consume_front("\\\\?\\");
2544 #endif
2545     StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2546     SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2547                                           llvm::sys::path::end(Name));
2548 #if defined(_WIN32)
2549     // -Wnonportable-include-path is designed to diagnose includes using
2550     // case even on systems with a case-insensitive file system.
2551     // On Windows, RealPathName always starts with an upper-case drive
2552     // letter for absolute paths, but Name might start with either
2553     // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2554     // ("foo" will always have on-disk case, no matter which case was
2555     // used in the cd command). To not emit this warning solely for
2556     // the drive letter, whose case is dependent on if `cd` is used
2557     // with upper- or lower-case drive letters, always consider the
2558     // given drive letter case as correct for the purpose of this warning.
2559     SmallString<128> FixedDriveRealPath;
2560     if (llvm::sys::path::is_absolute(Name) &&
2561         llvm::sys::path::is_absolute(RealPathName) &&
2562         toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2563         isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2564       assert(Components.size() >= 3 && "should have drive, backslash, name");
2565       assert(Components[0].size() == 2 && "should start with drive");
2566       assert(Components[0][1] == ':' && "should have colon");
2567       FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2568       RealPathName = FixedDriveRealPath;
2569     }
2570 #endif
2571 
2572     if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2573       SmallString<128> Path;
2574       Path.reserve(Name.size()+2);
2575       Path.push_back(isAngled ? '<' : '"');
2576 
2577       const auto IsSep = [BackslashStyle](char c) {
2578         return llvm::sys::path::is_separator(c, BackslashStyle);
2579       };
2580 
2581       for (auto Component : Components) {
2582         // On POSIX, Components will contain a single '/' as first element
2583         // exactly if Name is an absolute path.
2584         // On Windows, it will contain "C:" followed by '\' for absolute paths.
2585         // The drive letter is optional for absolute paths on Windows, but
2586         // clang currently cannot process absolute paths in #include lines that
2587         // don't have a drive.
2588         // If the first entry in Components is a directory separator,
2589         // then the code at the bottom of this loop that keeps the original
2590         // directory separator style copies it. If the second entry is
2591         // a directory separator (the C:\ case), then that separator already
2592         // got copied when the C: was processed and we want to skip that entry.
2593         if (!(Component.size() == 1 && IsSep(Component[0])))
2594           Path.append(Component);
2595         else if (Path.size() != 1)
2596           continue;
2597 
2598         // Append the separator(s) the user used, or the close quote
2599         if (Path.size() > NameWithoriginalSlashes.size()) {
2600           Path.push_back(isAngled ? '>' : '"');
2601           continue;
2602         }
2603         assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2604         do
2605           Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2606         while (Path.size() <= NameWithoriginalSlashes.size() &&
2607                IsSep(NameWithoriginalSlashes[Path.size()-1]));
2608       }
2609 
2610 #if defined(_WIN32)
2611       // Restore UNC prefix if it was there.
2612       if (NameWasUNC)
2613         Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2614 #endif
2615 
2616       // For user files and known standard headers, issue a diagnostic.
2617       // For other system headers, don't. They can be controlled separately.
2618       auto DiagId =
2619           (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2620               ? diag::pp_nonportable_path
2621               : diag::pp_nonportable_system_path;
2622       Diag(FilenameTok, DiagId) << Path <<
2623         FixItHint::CreateReplacement(FilenameRange, Path);
2624     }
2625   }
2626 
2627   switch (Action) {
2628   case Skip:
2629     // If we don't need to enter the file, stop now.
2630     if (ModuleToImport)
2631       return {ImportAction::SkippedModuleImport, ModuleToImport};
2632     return {ImportAction::None};
2633 
2634   case IncludeLimitReached:
2635     // If we reached our include limit and don't want to enter any more files,
2636     // don't go any further.
2637     return {ImportAction::None};
2638 
2639   case Import: {
2640     // If this is a module import, make it visible if needed.
2641     assert(ModuleToImport && "no module to import");
2642 
2643     makeModuleVisible(ModuleToImport, EndLoc);
2644 
2645     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2646         tok::pp___include_macros)
2647       return {ImportAction::None};
2648 
2649     return {ImportAction::ModuleImport, ModuleToImport};
2650   }
2651 
2652   case Enter:
2653     break;
2654   }
2655 
2656   // Check that we don't have infinite #include recursion.
2657   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2658     Diag(FilenameTok, diag::err_pp_include_too_deep);
2659     HasReachedMaxIncludeDepth = true;
2660     return {ImportAction::None};
2661   }
2662 
2663   if (isAngled && isInNamedModule())
2664     Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2665         << getNamedModuleName();
2666 
2667   // Look up the file, create a File ID for it.
2668   SourceLocation IncludePos = FilenameTok.getLocation();
2669   // If the filename string was the result of macro expansions, set the include
2670   // position on the file where it will be included and after the expansions.
2671   if (IncludePos.isMacroID())
2672     IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2673   FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2674   if (!FID.isValid()) {
2675     TheModuleLoader.HadFatalFailure = true;
2676     return ImportAction::Failure;
2677   }
2678 
2679   // If all is good, enter the new file!
2680   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2681                       IsFirstIncludeOfFile))
2682     return {ImportAction::None};
2683 
2684   // Determine if we're switching to building a new submodule, and which one.
2685   // This does not apply for C++20 modules header units.
2686   if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2687     if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2688       // We are building a submodule that belongs to a shadowed module. This
2689       // means we find header files in the shadowed module.
2690       Diag(ModuleToImport->DefinitionLoc,
2691            diag::err_module_build_shadowed_submodule)
2692           << ModuleToImport->getFullModuleName();
2693       Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2694            diag::note_previous_definition);
2695       return {ImportAction::None};
2696     }
2697     // When building a pch, -fmodule-name tells the compiler to textually
2698     // include headers in the specified module. We are not building the
2699     // specified module.
2700     //
2701     // FIXME: This is the wrong way to handle this. We should produce a PCH
2702     // that behaves the same as the header would behave in a compilation using
2703     // that PCH, which means we should enter the submodule. We need to teach
2704     // the AST serialization layer to deal with the resulting AST.
2705     if (getLangOpts().CompilingPCH &&
2706         ModuleToImport->isForBuilding(getLangOpts()))
2707       return {ImportAction::None};
2708 
2709     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2710     CurLexerSubmodule = ModuleToImport;
2711 
2712     // Let the macro handling code know that any future macros are within
2713     // the new submodule.
2714     EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false);
2715 
2716     // Let the parser know that any future declarations are within the new
2717     // submodule.
2718     // FIXME: There's no point doing this if we're handling a #__include_macros
2719     // directive.
2720     return {ImportAction::ModuleBegin, ModuleToImport};
2721   }
2722 
2723   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2724   return {ImportAction::None};
2725 }
2726 
2727 /// HandleIncludeNextDirective - Implements \#include_next.
2728 ///
HandleIncludeNextDirective(SourceLocation HashLoc,Token & IncludeNextTok)2729 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2730                                               Token &IncludeNextTok) {
2731   Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2732 
2733   ConstSearchDirIterator Lookup = nullptr;
2734   const FileEntry *LookupFromFile;
2735   std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2736 
2737   return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2738                                 LookupFromFile);
2739 }
2740 
2741 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
HandleMicrosoftImportDirective(Token & Tok)2742 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2743   // The Microsoft #import directive takes a type library and generates header
2744   // files from it, and includes those.  This is beyond the scope of what clang
2745   // does, so we ignore it and error out.  However, #import can optionally have
2746   // trailing attributes that span multiple lines.  We're going to eat those
2747   // so we can continue processing from there.
2748   Diag(Tok, diag::err_pp_import_directive_ms );
2749 
2750   // Read tokens until we get to the end of the directive.  Note that the
2751   // directive can be split over multiple lines using the backslash character.
2752   DiscardUntilEndOfDirective();
2753 }
2754 
2755 /// HandleImportDirective - Implements \#import.
2756 ///
HandleImportDirective(SourceLocation HashLoc,Token & ImportTok)2757 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2758                                          Token &ImportTok) {
2759   if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2760     if (LangOpts.MSVCCompat)
2761       return HandleMicrosoftImportDirective(ImportTok);
2762     Diag(ImportTok, diag::ext_pp_import_directive);
2763   }
2764   return HandleIncludeDirective(HashLoc, ImportTok);
2765 }
2766 
2767 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2768 /// pseudo directive in the predefines buffer.  This handles it by sucking all
2769 /// tokens through the preprocessor and discarding them (only keeping the side
2770 /// effects on the preprocessor).
HandleIncludeMacrosDirective(SourceLocation HashLoc,Token & IncludeMacrosTok)2771 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2772                                                 Token &IncludeMacrosTok) {
2773   // This directive should only occur in the predefines buffer.  If not, emit an
2774   // error and reject it.
2775   SourceLocation Loc = IncludeMacrosTok.getLocation();
2776   if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2777     Diag(IncludeMacrosTok.getLocation(),
2778          diag::pp_include_macros_out_of_predefines);
2779     DiscardUntilEndOfDirective();
2780     return;
2781   }
2782 
2783   // Treat this as a normal #include for checking purposes.  If this is
2784   // successful, it will push a new lexer onto the include stack.
2785   HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2786 
2787   Token TmpTok;
2788   do {
2789     Lex(TmpTok);
2790     assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2791   } while (TmpTok.isNot(tok::hashhash));
2792 }
2793 
2794 //===----------------------------------------------------------------------===//
2795 // Preprocessor Macro Directive Handling.
2796 //===----------------------------------------------------------------------===//
2797 
2798 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2799 /// definition has just been read.  Lex the rest of the parameters and the
2800 /// closing ), updating MI with what we learn.  Return true if an error occurs
2801 /// parsing the param list.
ReadMacroParameterList(MacroInfo * MI,Token & Tok)2802 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2803   SmallVector<IdentifierInfo*, 32> Parameters;
2804 
2805   while (true) {
2806     LexUnexpandedNonComment(Tok);
2807     switch (Tok.getKind()) {
2808     case tok::r_paren:
2809       // Found the end of the parameter list.
2810       if (Parameters.empty())  // #define FOO()
2811         return false;
2812       // Otherwise we have #define FOO(A,)
2813       Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2814       return true;
2815     case tok::ellipsis:  // #define X(... -> C99 varargs
2816       if (!LangOpts.C99)
2817         Diag(Tok, LangOpts.CPlusPlus11 ?
2818              diag::warn_cxx98_compat_variadic_macro :
2819              diag::ext_variadic_macro);
2820 
2821       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2822       if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2823         Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2824       }
2825 
2826       // Lex the token after the identifier.
2827       LexUnexpandedNonComment(Tok);
2828       if (Tok.isNot(tok::r_paren)) {
2829         Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2830         return true;
2831       }
2832       // Add the __VA_ARGS__ identifier as a parameter.
2833       Parameters.push_back(Ident__VA_ARGS__);
2834       MI->setIsC99Varargs();
2835       MI->setParameterList(Parameters, BP);
2836       return false;
2837     case tok::eod:  // #define X(
2838       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2839       return true;
2840     default:
2841       // Handle keywords and identifiers here to accept things like
2842       // #define Foo(for) for.
2843       IdentifierInfo *II = Tok.getIdentifierInfo();
2844       if (!II) {
2845         // #define X(1
2846         Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2847         return true;
2848       }
2849 
2850       // If this is already used as a parameter, it is used multiple times (e.g.
2851       // #define X(A,A.
2852       if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2853         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2854         return true;
2855       }
2856 
2857       // Add the parameter to the macro info.
2858       Parameters.push_back(II);
2859 
2860       // Lex the token after the identifier.
2861       LexUnexpandedNonComment(Tok);
2862 
2863       switch (Tok.getKind()) {
2864       default:          // #define X(A B
2865         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2866         return true;
2867       case tok::r_paren: // #define X(A)
2868         MI->setParameterList(Parameters, BP);
2869         return false;
2870       case tok::comma:  // #define X(A,
2871         break;
2872       case tok::ellipsis:  // #define X(A... -> GCC extension
2873         // Diagnose extension.
2874         Diag(Tok, diag::ext_named_variadic_macro);
2875 
2876         // Lex the token after the identifier.
2877         LexUnexpandedNonComment(Tok);
2878         if (Tok.isNot(tok::r_paren)) {
2879           Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2880           return true;
2881         }
2882 
2883         MI->setIsGNUVarargs();
2884         MI->setParameterList(Parameters, BP);
2885         return false;
2886       }
2887     }
2888   }
2889 }
2890 
isConfigurationPattern(Token & MacroName,MacroInfo * MI,const LangOptions & LOptions)2891 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2892                                    const LangOptions &LOptions) {
2893   if (MI->getNumTokens() == 1) {
2894     const Token &Value = MI->getReplacementToken(0);
2895 
2896     // Macro that is identity, like '#define inline inline' is a valid pattern.
2897     if (MacroName.getKind() == Value.getKind())
2898       return true;
2899 
2900     // Macro that maps a keyword to the same keyword decorated with leading/
2901     // trailing underscores is a valid pattern:
2902     //    #define inline __inline
2903     //    #define inline __inline__
2904     //    #define inline _inline (in MS compatibility mode)
2905     StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2906     if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2907       if (!II->isKeyword(LOptions))
2908         return false;
2909       StringRef ValueText = II->getName();
2910       StringRef TrimmedValue = ValueText;
2911       if (!ValueText.starts_with("__")) {
2912         if (ValueText.starts_with("_"))
2913           TrimmedValue = TrimmedValue.drop_front(1);
2914         else
2915           return false;
2916       } else {
2917         TrimmedValue = TrimmedValue.drop_front(2);
2918         if (TrimmedValue.ends_with("__"))
2919           TrimmedValue = TrimmedValue.drop_back(2);
2920       }
2921       return TrimmedValue == MacroText;
2922     } else {
2923       return false;
2924     }
2925   }
2926 
2927   // #define inline
2928   return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2929                            tok::kw_const) &&
2930          MI->getNumTokens() == 0;
2931 }
2932 
2933 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2934 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2935 // doing so performs certain validity checks including (but not limited to):
2936 //   - # (stringization) is followed by a macro parameter
2937 //
2938 //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2939 //  a pointer to a MacroInfo object.
2940 
ReadOptionalMacroParameterListAndBody(const Token & MacroNameTok,const bool ImmediatelyAfterHeaderGuard)2941 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2942     const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2943 
2944   Token LastTok = MacroNameTok;
2945   // Create the new macro.
2946   MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2947 
2948   Token Tok;
2949   LexUnexpandedToken(Tok);
2950 
2951   // Ensure we consume the rest of the macro body if errors occur.
2952   auto _ = llvm::make_scope_exit([&]() {
2953     // The flag indicates if we are still waiting for 'eod'.
2954     if (CurLexer->ParsingPreprocessorDirective)
2955       DiscardUntilEndOfDirective();
2956   });
2957 
2958   // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2959   // within their appropriate context.
2960   VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2961 
2962   // If this is a function-like macro definition, parse the argument list,
2963   // marking each of the identifiers as being used as macro arguments.  Also,
2964   // check other constraints on the first token of the macro body.
2965   if (Tok.is(tok::eod)) {
2966     if (ImmediatelyAfterHeaderGuard) {
2967       // Save this macro information since it may part of a header guard.
2968       CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2969                                         MacroNameTok.getLocation());
2970     }
2971     // If there is no body to this macro, we have no special handling here.
2972   } else if (Tok.hasLeadingSpace()) {
2973     // This is a normal token with leading space.  Clear the leading space
2974     // marker on the first token to get proper expansion.
2975     Tok.clearFlag(Token::LeadingSpace);
2976   } else if (Tok.is(tok::l_paren)) {
2977     // This is a function-like macro definition.  Read the argument list.
2978     MI->setIsFunctionLike();
2979     if (ReadMacroParameterList(MI, LastTok))
2980       return nullptr;
2981 
2982     // If this is a definition of an ISO C/C++ variadic function-like macro (not
2983     // using the GNU named varargs extension) inform our variadic scope guard
2984     // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2985     // allowed only within the definition of a variadic macro.
2986 
2987     if (MI->isC99Varargs()) {
2988       VariadicMacroScopeGuard.enterScope();
2989     }
2990 
2991     // Read the first token after the arg list for down below.
2992     LexUnexpandedToken(Tok);
2993   } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2994     // C99 requires whitespace between the macro definition and the body.  Emit
2995     // a diagnostic for something like "#define X+".
2996     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2997   } else {
2998     // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2999     // first character of a replacement list is not a character required by
3000     // subclause 5.2.1, then there shall be white-space separation between the
3001     // identifier and the replacement list.".  5.2.1 lists this set:
3002     //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
3003     // is irrelevant here.
3004     bool isInvalid = false;
3005     if (Tok.is(tok::at)) // @ is not in the list above.
3006       isInvalid = true;
3007     else if (Tok.is(tok::unknown)) {
3008       // If we have an unknown token, it is something strange like "`".  Since
3009       // all of valid characters would have lexed into a single character
3010       // token of some sort, we know this is not a valid case.
3011       isInvalid = true;
3012     }
3013     if (isInvalid)
3014       Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
3015     else
3016       Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
3017   }
3018 
3019   if (!Tok.is(tok::eod))
3020     LastTok = Tok;
3021 
3022   SmallVector<Token, 16> Tokens;
3023 
3024   // Read the rest of the macro body.
3025   if (MI->isObjectLike()) {
3026     // Object-like macros are very simple, just read their body.
3027     while (Tok.isNot(tok::eod)) {
3028       LastTok = Tok;
3029       Tokens.push_back(Tok);
3030       // Get the next token of the macro.
3031       LexUnexpandedToken(Tok);
3032     }
3033   } else {
3034     // Otherwise, read the body of a function-like macro.  While we are at it,
3035     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3036     // parameters in function-like macro expansions.
3037 
3038     VAOptDefinitionContext VAOCtx(*this);
3039 
3040     while (Tok.isNot(tok::eod)) {
3041       LastTok = Tok;
3042 
3043       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
3044         Tokens.push_back(Tok);
3045 
3046         if (VAOCtx.isVAOptToken(Tok)) {
3047           // If we're already within a VAOPT, emit an error.
3048           if (VAOCtx.isInVAOpt()) {
3049             Diag(Tok, diag::err_pp_vaopt_nested_use);
3050             return nullptr;
3051           }
3052           // Ensure VAOPT is followed by a '(' .
3053           LexUnexpandedToken(Tok);
3054           if (Tok.isNot(tok::l_paren)) {
3055             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
3056             return nullptr;
3057           }
3058           Tokens.push_back(Tok);
3059           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
3060           LexUnexpandedToken(Tok);
3061           if (Tok.is(tok::hashhash)) {
3062             Diag(Tok, diag::err_vaopt_paste_at_start);
3063             return nullptr;
3064           }
3065           continue;
3066         } else if (VAOCtx.isInVAOpt()) {
3067           if (Tok.is(tok::r_paren)) {
3068             if (VAOCtx.sawClosingParen()) {
3069               assert(Tokens.size() >= 3 &&
3070                      "Must have seen at least __VA_OPT__( "
3071                      "and a subsequent tok::r_paren");
3072               if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
3073                 Diag(Tok, diag::err_vaopt_paste_at_end);
3074                 return nullptr;
3075               }
3076             }
3077           } else if (Tok.is(tok::l_paren)) {
3078             VAOCtx.sawOpeningParen(Tok.getLocation());
3079           }
3080         }
3081         // Get the next token of the macro.
3082         LexUnexpandedToken(Tok);
3083         continue;
3084       }
3085 
3086       // If we're in -traditional mode, then we should ignore stringification
3087       // and token pasting. Mark the tokens as unknown so as not to confuse
3088       // things.
3089       if (getLangOpts().TraditionalCPP) {
3090         Tok.setKind(tok::unknown);
3091         Tokens.push_back(Tok);
3092 
3093         // Get the next token of the macro.
3094         LexUnexpandedToken(Tok);
3095         continue;
3096       }
3097 
3098       if (Tok.is(tok::hashhash)) {
3099         // If we see token pasting, check if it looks like the gcc comma
3100         // pasting extension.  We'll use this information to suppress
3101         // diagnostics later on.
3102 
3103         // Get the next token of the macro.
3104         LexUnexpandedToken(Tok);
3105 
3106         if (Tok.is(tok::eod)) {
3107           Tokens.push_back(LastTok);
3108           break;
3109         }
3110 
3111         if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3112             Tokens[Tokens.size() - 1].is(tok::comma))
3113           MI->setHasCommaPasting();
3114 
3115         // Things look ok, add the '##' token to the macro.
3116         Tokens.push_back(LastTok);
3117         continue;
3118       }
3119 
3120       // Our Token is a stringization operator.
3121       // Get the next token of the macro.
3122       LexUnexpandedToken(Tok);
3123 
3124       // Check for a valid macro arg identifier or __VA_OPT__.
3125       if (!VAOCtx.isVAOptToken(Tok) &&
3126           (Tok.getIdentifierInfo() == nullptr ||
3127            MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3128 
3129         // If this is assembler-with-cpp mode, we accept random gibberish after
3130         // the '#' because '#' is often a comment character.  However, change
3131         // the kind of the token to tok::unknown so that the preprocessor isn't
3132         // confused.
3133         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3134           LastTok.setKind(tok::unknown);
3135           Tokens.push_back(LastTok);
3136           continue;
3137         } else {
3138           Diag(Tok, diag::err_pp_stringize_not_parameter)
3139             << LastTok.is(tok::hashat);
3140           return nullptr;
3141         }
3142       }
3143 
3144       // Things look ok, add the '#' and param name tokens to the macro.
3145       Tokens.push_back(LastTok);
3146 
3147       // If the token following '#' is VAOPT, let the next iteration handle it
3148       // and check it for correctness, otherwise add the token and prime the
3149       // loop with the next one.
3150       if (!VAOCtx.isVAOptToken(Tok)) {
3151         Tokens.push_back(Tok);
3152         LastTok = Tok;
3153 
3154         // Get the next token of the macro.
3155         LexUnexpandedToken(Tok);
3156       }
3157     }
3158     if (VAOCtx.isInVAOpt()) {
3159       assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3160       Diag(Tok, diag::err_pp_expected_after)
3161         << LastTok.getKind() << tok::r_paren;
3162       Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3163       return nullptr;
3164     }
3165   }
3166   MI->setDefinitionEndLoc(LastTok.getLocation());
3167 
3168   MI->setTokens(Tokens, BP);
3169   return MI;
3170 }
3171 
isObjCProtectedMacro(const IdentifierInfo * II)3172 static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3173   return II->isStr("__strong") || II->isStr("__weak") ||
3174          II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3175 }
3176 
3177 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3178 /// line then lets the caller lex the next real token.
HandleDefineDirective(Token & DefineTok,const bool ImmediatelyAfterHeaderGuard)3179 void Preprocessor::HandleDefineDirective(
3180     Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3181   ++NumDefined;
3182 
3183   Token MacroNameTok;
3184   bool MacroShadowsKeyword;
3185   ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3186 
3187   // Error reading macro name?  If so, diagnostic already issued.
3188   if (MacroNameTok.is(tok::eod))
3189     return;
3190 
3191   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3192   // Issue a final pragma warning if we're defining a macro that was has been
3193   // undefined and is being redefined.
3194   if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3195     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3196 
3197   // If we are supposed to keep comments in #defines, reenable comment saving
3198   // mode.
3199   if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3200 
3201   MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3202       MacroNameTok, ImmediatelyAfterHeaderGuard);
3203 
3204   if (!MI) return;
3205 
3206   if (MacroShadowsKeyword &&
3207       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3208     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3209   }
3210   // Check that there is no paste (##) operator at the beginning or end of the
3211   // replacement list.
3212   unsigned NumTokens = MI->getNumTokens();
3213   if (NumTokens != 0) {
3214     if (MI->getReplacementToken(0).is(tok::hashhash)) {
3215       Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3216       return;
3217     }
3218     if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3219       Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3220       return;
3221     }
3222   }
3223 
3224   // When skipping just warn about macros that do not match.
3225   if (SkippingUntilPCHThroughHeader) {
3226     const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3227     if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3228                              /*Syntactic=*/LangOpts.MicrosoftExt))
3229       Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3230           << MacroNameTok.getIdentifierInfo();
3231     // Issue the diagnostic but allow the change if msvc extensions are enabled
3232     if (!LangOpts.MicrosoftExt)
3233       return;
3234   }
3235 
3236   // Finally, if this identifier already had a macro defined for it, verify that
3237   // the macro bodies are identical, and issue diagnostics if they are not.
3238   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3239     // Final macros are hard-mode: they always warn. Even if the bodies are
3240     // identical. Even if they are in system headers. Even if they are things we
3241     // would silently allow in the past.
3242     if (MacroNameTok.getIdentifierInfo()->isFinal())
3243       emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3244 
3245     // In Objective-C, ignore attempts to directly redefine the builtin
3246     // definitions of the ownership qualifiers.  It's still possible to
3247     // #undef them.
3248     if (getLangOpts().ObjC &&
3249         SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3250             getPredefinesFileID() &&
3251         isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3252       // Warn if it changes the tokens.
3253       if ((!getDiagnostics().getSuppressSystemWarnings() ||
3254            !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3255           !MI->isIdenticalTo(*OtherMI, *this,
3256                              /*Syntactic=*/LangOpts.MicrosoftExt)) {
3257         Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3258       }
3259       assert(!OtherMI->isWarnIfUnused());
3260       return;
3261     }
3262 
3263     // It is very common for system headers to have tons of macro redefinitions
3264     // and for warnings to be disabled in system headers.  If this is the case,
3265     // then don't bother calling MacroInfo::isIdenticalTo.
3266     if (!getDiagnostics().getSuppressSystemWarnings() ||
3267         !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3268 
3269       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3270         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3271 
3272       // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3273       // C++ [cpp.predefined]p4, but allow it as an extension.
3274       if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3275         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3276       // Macros must be identical.  This means all tokens and whitespace
3277       // separation must be the same.  C99 6.10.3p2.
3278       else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3279                !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3280         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3281           << MacroNameTok.getIdentifierInfo();
3282         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3283       }
3284     }
3285     if (OtherMI->isWarnIfUnused())
3286       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3287   }
3288 
3289   DefMacroDirective *MD =
3290       appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3291 
3292   assert(!MI->isUsed());
3293   // If we need warning for not using the macro, add its location in the
3294   // warn-because-unused-macro set. If it gets used it will be removed from set.
3295   if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3296       !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3297       !MacroExpansionInDirectivesOverride &&
3298       getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3299           getPredefinesFileID()) {
3300     MI->setIsWarnIfUnused(true);
3301     WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3302   }
3303 
3304   // If the callbacks want to know, tell them about the macro definition.
3305   if (Callbacks)
3306     Callbacks->MacroDefined(MacroNameTok, MD);
3307 }
3308 
3309 /// HandleUndefDirective - Implements \#undef.
3310 ///
HandleUndefDirective()3311 void Preprocessor::HandleUndefDirective() {
3312   ++NumUndefined;
3313 
3314   Token MacroNameTok;
3315   ReadMacroName(MacroNameTok, MU_Undef);
3316 
3317   // Error reading macro name?  If so, diagnostic already issued.
3318   if (MacroNameTok.is(tok::eod))
3319     return;
3320 
3321   // Check to see if this is the last token on the #undef line.
3322   CheckEndOfDirective("undef");
3323 
3324   // Okay, we have a valid identifier to undef.
3325   auto *II = MacroNameTok.getIdentifierInfo();
3326   auto MD = getMacroDefinition(II);
3327   UndefMacroDirective *Undef = nullptr;
3328 
3329   if (II->isFinal())
3330     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3331 
3332   // If the macro is not defined, this is a noop undef.
3333   if (const MacroInfo *MI = MD.getMacroInfo()) {
3334     if (!MI->isUsed() && MI->isWarnIfUnused())
3335       Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3336 
3337     // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3338     // C++ [cpp.predefined]p4, but allow it as an extension.
3339     if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3340       Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3341 
3342     if (MI->isWarnIfUnused())
3343       WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3344 
3345     Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3346   }
3347 
3348   // If the callbacks want to know, tell them about the macro #undef.
3349   // Note: no matter if the macro was defined or not.
3350   if (Callbacks)
3351     Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3352 
3353   if (Undef)
3354     appendMacroDirective(II, Undef);
3355 }
3356 
3357 //===----------------------------------------------------------------------===//
3358 // Preprocessor Conditional Directive Handling.
3359 //===----------------------------------------------------------------------===//
3360 
3361 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3362 /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3363 /// true if any tokens have been returned or pp-directives activated before this
3364 /// \#ifndef has been lexed.
3365 ///
HandleIfdefDirective(Token & Result,const Token & HashToken,bool isIfndef,bool ReadAnyTokensBeforeDirective)3366 void Preprocessor::HandleIfdefDirective(Token &Result,
3367                                         const Token &HashToken,
3368                                         bool isIfndef,
3369                                         bool ReadAnyTokensBeforeDirective) {
3370   ++NumIf;
3371   Token DirectiveTok = Result;
3372 
3373   Token MacroNameTok;
3374   ReadMacroName(MacroNameTok);
3375 
3376   // Error reading macro name?  If so, diagnostic already issued.
3377   if (MacroNameTok.is(tok::eod)) {
3378     // Skip code until we get to #endif.  This helps with recovery by not
3379     // emitting an error when the #endif is reached.
3380     SkipExcludedConditionalBlock(HashToken.getLocation(),
3381                                  DirectiveTok.getLocation(),
3382                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
3383     return;
3384   }
3385 
3386   emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3387 
3388   // Check to see if this is the last token on the #if[n]def line.
3389   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3390 
3391   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3392   auto MD = getMacroDefinition(MII);
3393   MacroInfo *MI = MD.getMacroInfo();
3394 
3395   if (CurPPLexer->getConditionalStackDepth() == 0) {
3396     // If the start of a top-level #ifdef and if the macro is not defined,
3397     // inform MIOpt that this might be the start of a proper include guard.
3398     // Otherwise it is some other form of unknown conditional which we can't
3399     // handle.
3400     if (!ReadAnyTokensBeforeDirective && !MI) {
3401       assert(isIfndef && "#ifdef shouldn't reach here");
3402       CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3403     } else
3404       CurPPLexer->MIOpt.EnterTopLevelConditional();
3405   }
3406 
3407   // If there is a macro, process it.
3408   if (MI)  // Mark it used.
3409     markMacroAsUsed(MI);
3410 
3411   if (Callbacks) {
3412     if (isIfndef)
3413       Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3414     else
3415       Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3416   }
3417 
3418   bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3419     getSourceManager().isInMainFile(DirectiveTok.getLocation());
3420 
3421   // Should we include the stuff contained by this directive?
3422   if (PPOpts.SingleFileParseMode && !MI) {
3423     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3424     // the directive blocks.
3425     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3426                                      /*wasskip*/false, /*foundnonskip*/false,
3427                                      /*foundelse*/false);
3428   } else if (!MI == isIfndef || RetainExcludedCB) {
3429     // Yes, remember that we are inside a conditional, then lex the next token.
3430     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3431                                      /*wasskip*/false, /*foundnonskip*/true,
3432                                      /*foundelse*/false);
3433   } else {
3434     // No, skip the contents of this block.
3435     SkipExcludedConditionalBlock(HashToken.getLocation(),
3436                                  DirectiveTok.getLocation(),
3437                                  /*Foundnonskip*/ false,
3438                                  /*FoundElse*/ false);
3439   }
3440 }
3441 
3442 /// HandleIfDirective - Implements the \#if directive.
3443 ///
HandleIfDirective(Token & IfToken,const Token & HashToken,bool ReadAnyTokensBeforeDirective)3444 void Preprocessor::HandleIfDirective(Token &IfToken,
3445                                      const Token &HashToken,
3446                                      bool ReadAnyTokensBeforeDirective) {
3447   ++NumIf;
3448 
3449   // Parse and evaluate the conditional expression.
3450   IdentifierInfo *IfNDefMacro = nullptr;
3451   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3452   const bool ConditionalTrue = DER.Conditional;
3453   // Lexer might become invalid if we hit code completion point while evaluating
3454   // expression.
3455   if (!CurPPLexer)
3456     return;
3457 
3458   // If this condition is equivalent to #ifndef X, and if this is the first
3459   // directive seen, handle it for the multiple-include optimization.
3460   if (CurPPLexer->getConditionalStackDepth() == 0) {
3461     if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3462       // FIXME: Pass in the location of the macro name, not the 'if' token.
3463       CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3464     else
3465       CurPPLexer->MIOpt.EnterTopLevelConditional();
3466   }
3467 
3468   if (Callbacks)
3469     Callbacks->If(
3470         IfToken.getLocation(), DER.ExprRange,
3471         (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3472 
3473   bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3474     getSourceManager().isInMainFile(IfToken.getLocation());
3475 
3476   // Should we include the stuff contained by this directive?
3477   if (PPOpts.SingleFileParseMode && DER.IncludedUndefinedIds) {
3478     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3479     // the directive blocks.
3480     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3481                                      /*foundnonskip*/false, /*foundelse*/false);
3482   } else if (ConditionalTrue || RetainExcludedCB) {
3483     // Yes, remember that we are inside a conditional, then lex the next token.
3484     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3485                                    /*foundnonskip*/true, /*foundelse*/false);
3486   } else {
3487     // No, skip the contents of this block.
3488     SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3489                                  /*Foundnonskip*/ false,
3490                                  /*FoundElse*/ false);
3491   }
3492 }
3493 
3494 /// HandleEndifDirective - Implements the \#endif directive.
3495 ///
HandleEndifDirective(Token & EndifToken)3496 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3497   ++NumEndif;
3498 
3499   // Check that this is the whole directive.
3500   CheckEndOfDirective("endif");
3501 
3502   PPConditionalInfo CondInfo;
3503   if (CurPPLexer->popConditionalLevel(CondInfo)) {
3504     // No conditionals on the stack: this is an #endif without an #if.
3505     Diag(EndifToken, diag::err_pp_endif_without_if);
3506     return;
3507   }
3508 
3509   // If this the end of a top-level #endif, inform MIOpt.
3510   if (CurPPLexer->getConditionalStackDepth() == 0)
3511     CurPPLexer->MIOpt.ExitTopLevelConditional();
3512 
3513   assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3514          "This code should only be reachable in the non-skipping case!");
3515 
3516   if (Callbacks)
3517     Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3518 }
3519 
3520 /// HandleElseDirective - Implements the \#else directive.
3521 ///
HandleElseDirective(Token & Result,const Token & HashToken)3522 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3523   ++NumElse;
3524 
3525   // #else directive in a non-skipping conditional... start skipping.
3526   CheckEndOfDirective("else");
3527 
3528   PPConditionalInfo CI;
3529   if (CurPPLexer->popConditionalLevel(CI)) {
3530     Diag(Result, diag::pp_err_else_without_if);
3531     return;
3532   }
3533 
3534   // If this is a top-level #else, inform the MIOpt.
3535   if (CurPPLexer->getConditionalStackDepth() == 0)
3536     CurPPLexer->MIOpt.EnterTopLevelConditional();
3537 
3538   // If this is a #else with a #else before it, report the error.
3539   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3540 
3541   if (Callbacks)
3542     Callbacks->Else(Result.getLocation(), CI.IfLoc);
3543 
3544   bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3545     getSourceManager().isInMainFile(Result.getLocation());
3546 
3547   if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3548     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3549     // the directive blocks.
3550     CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3551                                      /*foundnonskip*/false, /*foundelse*/true);
3552     return;
3553   }
3554 
3555   // Finally, skip the rest of the contents of this block.
3556   SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3557                                /*Foundnonskip*/ true,
3558                                /*FoundElse*/ true, Result.getLocation());
3559 }
3560 
3561 /// Implements the \#elif, \#elifdef, and \#elifndef directives.
HandleElifFamilyDirective(Token & ElifToken,const Token & HashToken,tok::PPKeywordKind Kind)3562 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3563                                              const Token &HashToken,
3564                                              tok::PPKeywordKind Kind) {
3565   PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3566                        : Kind == tok::pp_elifdef ? PED_Elifdef
3567                                                  : PED_Elifndef;
3568   ++NumElse;
3569 
3570   // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3571   switch (DirKind) {
3572   case PED_Elifdef:
3573   case PED_Elifndef:
3574     unsigned DiagID;
3575     if (LangOpts.CPlusPlus)
3576       DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3577                                     : diag::ext_cxx23_pp_directive;
3578     else
3579       DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3580                             : diag::ext_c23_pp_directive;
3581     Diag(ElifToken, DiagID) << DirKind;
3582     break;
3583   default:
3584     break;
3585   }
3586 
3587   // #elif directive in a non-skipping conditional... start skipping.
3588   // We don't care what the condition is, because we will always skip it (since
3589   // the block immediately before it was included).
3590   SourceRange ConditionRange = DiscardUntilEndOfDirective();
3591 
3592   PPConditionalInfo CI;
3593   if (CurPPLexer->popConditionalLevel(CI)) {
3594     Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3595     return;
3596   }
3597 
3598   // If this is a top-level #elif, inform the MIOpt.
3599   if (CurPPLexer->getConditionalStackDepth() == 0)
3600     CurPPLexer->MIOpt.EnterTopLevelConditional();
3601 
3602   // If this is a #elif with a #else before it, report the error.
3603   if (CI.FoundElse)
3604     Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3605 
3606   if (Callbacks) {
3607     switch (Kind) {
3608     case tok::pp_elif:
3609       Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3610                       PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3611       break;
3612     case tok::pp_elifdef:
3613       Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3614       break;
3615     case tok::pp_elifndef:
3616       Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3617       break;
3618     default:
3619       assert(false && "unexpected directive kind");
3620       break;
3621     }
3622   }
3623 
3624   bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3625     getSourceManager().isInMainFile(ElifToken.getLocation());
3626 
3627   if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3628     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3629     // the directive blocks.
3630     CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3631                                      /*foundnonskip*/false, /*foundelse*/false);
3632     return;
3633   }
3634 
3635   // Finally, skip the rest of the contents of this block.
3636   SkipExcludedConditionalBlock(
3637       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3638       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3639 }
3640 
3641 std::optional<LexEmbedParametersResult>
LexEmbedParameters(Token & CurTok,bool ForHasEmbed)3642 Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3643   LexEmbedParametersResult Result{};
3644   tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3645 
3646   auto DiagMismatchedBracesAndSkipToEOD =
3647       [&](tok::TokenKind Expected,
3648           std::pair<tok::TokenKind, SourceLocation> Matches) {
3649         Diag(CurTok, diag::err_expected) << Expected;
3650         Diag(Matches.second, diag::note_matching) << Matches.first;
3651         if (CurTok.isNot(tok::eod))
3652           DiscardUntilEndOfDirective(CurTok);
3653       };
3654 
3655   auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3656     if (CurTok.isNot(Kind)) {
3657       Diag(CurTok, diag::err_expected) << Kind;
3658       if (CurTok.isNot(tok::eod))
3659         DiscardUntilEndOfDirective(CurTok);
3660       return false;
3661     }
3662     return true;
3663   };
3664 
3665   // C23 6.10:
3666   // pp-parameter-name:
3667   //   pp-standard-parameter
3668   //   pp-prefixed-parameter
3669   //
3670   // pp-standard-parameter:
3671   //   identifier
3672   //
3673   // pp-prefixed-parameter:
3674   //   identifier :: identifier
3675   auto LexPPParameterName = [&]() -> std::optional<std::string> {
3676     // We expect the current token to be an identifier; if it's not, things
3677     // have gone wrong.
3678     if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3679       return std::nullopt;
3680 
3681     const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3682 
3683     // Lex another token; it is either a :: or we're done with the parameter
3684     // name.
3685     LexNonComment(CurTok);
3686     if (CurTok.is(tok::coloncolon)) {
3687       // We found a ::, so lex another identifier token.
3688       LexNonComment(CurTok);
3689       if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3690         return std::nullopt;
3691 
3692       const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3693 
3694       // Lex another token so we're past the name.
3695       LexNonComment(CurTok);
3696       return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3697     }
3698     return Prefix->getName().str();
3699   };
3700 
3701   // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3702   // this document as an identifier pp_param and an identifier of the form
3703   // __pp_param__ shall behave the same when used as a preprocessor parameter,
3704   // except for the spelling.
3705   auto NormalizeParameterName = [](StringRef Name) {
3706     if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
3707       return Name.substr(2, Name.size() - 4);
3708     return Name;
3709   };
3710 
3711   auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3712     // we have a limit parameter and its internals are processed using
3713     // evaluation rules from #if.
3714     if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3715       return std::nullopt;
3716 
3717     // We do not consume the ( because EvaluateDirectiveExpression will lex
3718     // the next token for us.
3719     IdentifierInfo *ParameterIfNDef = nullptr;
3720     bool EvaluatedDefined;
3721     DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3722         ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
3723 
3724     if (!LimitEvalResult.Value) {
3725       // If there was an error evaluating the directive expression, we expect
3726       // to be at the end of directive token.
3727       assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3728       return std::nullopt;
3729     }
3730 
3731     if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3732       return std::nullopt;
3733 
3734     // Eat the ).
3735     LexNonComment(CurTok);
3736 
3737     // C23 6.10.3.2p2: The token defined shall not appear within the constant
3738     // expression.
3739     if (EvaluatedDefined) {
3740       Diag(CurTok, diag::err_defined_in_pp_embed);
3741       return std::nullopt;
3742     }
3743 
3744     if (LimitEvalResult.Value) {
3745       const llvm::APSInt &Result = *LimitEvalResult.Value;
3746       if (Result.isNegative()) {
3747         Diag(CurTok, diag::err_requires_positive_value)
3748             << toString(Result, 10) << /*positive*/ 0;
3749         return std::nullopt;
3750       }
3751       return Result.getLimitedValue();
3752     }
3753     return std::nullopt;
3754   };
3755 
3756   auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3757     switch (Kind) {
3758     case tok::l_paren:
3759       return tok::r_paren;
3760     case tok::l_brace:
3761       return tok::r_brace;
3762     case tok::l_square:
3763       return tok::r_square;
3764     default:
3765       llvm_unreachable("should not get here");
3766     }
3767   };
3768 
3769   auto LexParenthesizedBalancedTokenSoup =
3770       [&](llvm::SmallVectorImpl<Token> &Tokens) {
3771         std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3772 
3773         // We expect the current token to be a left paren.
3774         if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3775           return false;
3776         LexNonComment(CurTok); // Eat the (
3777 
3778         bool WaitingForInnerCloseParen = false;
3779         while (CurTok.isNot(tok::eod) &&
3780                (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {
3781           switch (CurTok.getKind()) {
3782           default: // Shutting up diagnostics about not fully-covered switch.
3783             break;
3784           case tok::l_paren:
3785             WaitingForInnerCloseParen = true;
3786             [[fallthrough]];
3787           case tok::l_brace:
3788           case tok::l_square:
3789             BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
3790             break;
3791           case tok::r_paren:
3792             WaitingForInnerCloseParen = false;
3793             [[fallthrough]];
3794           case tok::r_brace:
3795           case tok::r_square: {
3796             tok::TokenKind Matching =
3797                 GetMatchingCloseBracket(BracketStack.back().first);
3798             if (BracketStack.empty() || CurTok.getKind() != Matching) {
3799               DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3800               return false;
3801             }
3802             BracketStack.pop_back();
3803           } break;
3804           }
3805           Tokens.push_back(CurTok);
3806           LexNonComment(CurTok);
3807         }
3808 
3809         // When we're done, we want to eat the closing paren.
3810         if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3811           return false;
3812 
3813         LexNonComment(CurTok); // Eat the )
3814         return true;
3815       };
3816 
3817   LexNonComment(CurTok); // Prime the pump.
3818   while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
3819     SourceLocation ParamStartLoc = CurTok.getLocation();
3820     std::optional<std::string> ParamName = LexPPParameterName();
3821     if (!ParamName)
3822       return std::nullopt;
3823     StringRef Parameter = NormalizeParameterName(*ParamName);
3824 
3825     // Lex the parameters (dependent on the parameter type we want!).
3826     //
3827     // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3828     // one time in the embed parameter sequence.
3829     if (Parameter == "limit") {
3830       if (Result.MaybeLimitParam)
3831         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3832 
3833       std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3834       if (!Limit)
3835         return std::nullopt;
3836       Result.MaybeLimitParam =
3837           PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3838     } else if (Parameter == "clang::offset") {
3839       if (Result.MaybeOffsetParam)
3840         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3841 
3842       std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3843       if (!Offset)
3844         return std::nullopt;
3845       Result.MaybeOffsetParam = PPEmbedParameterOffset{
3846           *Offset, {ParamStartLoc, CurTok.getLocation()}};
3847     } else if (Parameter == "prefix") {
3848       if (Result.MaybePrefixParam)
3849         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3850 
3851       SmallVector<Token, 4> Soup;
3852       if (!LexParenthesizedBalancedTokenSoup(Soup))
3853         return std::nullopt;
3854       Result.MaybePrefixParam = PPEmbedParameterPrefix{
3855           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3856     } else if (Parameter == "suffix") {
3857       if (Result.MaybeSuffixParam)
3858         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3859 
3860       SmallVector<Token, 4> Soup;
3861       if (!LexParenthesizedBalancedTokenSoup(Soup))
3862         return std::nullopt;
3863       Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3864           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3865     } else if (Parameter == "if_empty") {
3866       if (Result.MaybeIfEmptyParam)
3867         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3868 
3869       SmallVector<Token, 4> Soup;
3870       if (!LexParenthesizedBalancedTokenSoup(Soup))
3871         return std::nullopt;
3872       Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3873           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3874     } else {
3875       ++Result.UnrecognizedParams;
3876 
3877       // If there's a left paren, we need to parse a balanced token sequence
3878       // and just eat those tokens.
3879       if (CurTok.is(tok::l_paren)) {
3880         SmallVector<Token, 4> Soup;
3881         if (!LexParenthesizedBalancedTokenSoup(Soup))
3882           return std::nullopt;
3883       }
3884       if (!ForHasEmbed) {
3885         Diag(ParamStartLoc, diag::err_pp_unknown_parameter) << 1 << Parameter;
3886         if (CurTok.isNot(tok::eod))
3887           DiscardUntilEndOfDirective(CurTok);
3888         return std::nullopt;
3889       }
3890     }
3891   }
3892   return Result;
3893 }
3894 
HandleEmbedDirectiveImpl(SourceLocation HashLoc,const LexEmbedParametersResult & Params,StringRef BinaryContents,StringRef FileName)3895 void Preprocessor::HandleEmbedDirectiveImpl(
3896     SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3897     StringRef BinaryContents, StringRef FileName) {
3898   if (BinaryContents.empty()) {
3899     // If we have no binary contents, the only thing we need to emit are the
3900     // if_empty tokens, if any.
3901     // FIXME: this loses AST fidelity; nothing in the compiler will see that
3902     // these tokens came from #embed. We have to hack around this when printing
3903     // preprocessed output. The same is true for prefix and suffix tokens.
3904     if (Params.MaybeIfEmptyParam) {
3905       ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3906       size_t TokCount = Toks.size();
3907       auto NewToks = std::make_unique<Token[]>(TokCount);
3908       llvm::copy(Toks, NewToks.get());
3909       EnterTokenStream(std::move(NewToks), TokCount, true, true);
3910     }
3911     return;
3912   }
3913 
3914   size_t NumPrefixToks = Params.PrefixTokenCount(),
3915          NumSuffixToks = Params.SuffixTokenCount();
3916   size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3917   size_t CurIdx = 0;
3918   auto Toks = std::make_unique<Token[]>(TotalNumToks);
3919 
3920   // Add the prefix tokens, if any.
3921   if (Params.MaybePrefixParam) {
3922     llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
3923     CurIdx += NumPrefixToks;
3924   }
3925 
3926   EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3927   Data->BinaryData = BinaryContents;
3928   Data->FileName = FileName;
3929 
3930   Toks[CurIdx].startToken();
3931   Toks[CurIdx].setKind(tok::annot_embed);
3932   Toks[CurIdx].setAnnotationRange(HashLoc);
3933   Toks[CurIdx++].setAnnotationValue(Data);
3934 
3935   // Now add the suffix tokens, if any.
3936   if (Params.MaybeSuffixParam) {
3937     llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
3938     CurIdx += NumSuffixToks;
3939   }
3940 
3941   assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3942   EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
3943 }
3944 
HandleEmbedDirective(SourceLocation HashLoc,Token & EmbedTok,const FileEntry * LookupFromFile)3945 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3946                                         const FileEntry *LookupFromFile) {
3947   // Give the usual extension/compatibility warnings.
3948   if (LangOpts.C23)
3949     Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
3950   else
3951     Diag(EmbedTok, diag::ext_pp_embed_directive)
3952         << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3953 
3954   // Parse the filename header
3955   Token FilenameTok;
3956   if (LexHeaderName(FilenameTok))
3957     return;
3958 
3959   if (FilenameTok.isNot(tok::header_name)) {
3960     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
3961     if (FilenameTok.isNot(tok::eod))
3962       DiscardUntilEndOfDirective();
3963     return;
3964   }
3965 
3966   // Parse the optional sequence of
3967   // directive-parameters:
3968   //     identifier parameter-name-list[opt] directive-argument-list[opt]
3969   // directive-argument-list:
3970   //    '(' balanced-token-sequence ')'
3971   // parameter-name-list:
3972   //    '::' identifier parameter-name-list[opt]
3973   Token CurTok;
3974   std::optional<LexEmbedParametersResult> Params =
3975       LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3976 
3977   assert((Params || CurTok.is(tok::eod)) &&
3978          "expected success or to be at the end of the directive");
3979   if (!Params)
3980     return;
3981 
3982   // Now, splat the data out!
3983   SmallString<128> FilenameBuffer;
3984   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
3985   StringRef OriginalFilename = Filename;
3986   bool isAngled =
3987       GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
3988   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
3989   // error.
3990   assert(!Filename.empty());
3991   OptionalFileEntryRef MaybeFileRef =
3992       this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
3993   if (!MaybeFileRef) {
3994     // could not find file
3995     if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
3996       return;
3997     }
3998     Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
3999     return;
4000   }
4001 
4002   if (MaybeFileRef->isDeviceFile()) {
4003     Diag(FilenameTok, diag::err_pp_embed_device_file) << Filename;
4004     return;
4005   }
4006 
4007   std::optional<llvm::MemoryBufferRef> MaybeFile =
4008       getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
4009   if (!MaybeFile) {
4010     // could not find file
4011     Diag(FilenameTok, diag::err_cannot_open_file)
4012         << Filename << "a buffer to the contents could not be created";
4013     return;
4014   }
4015   StringRef BinaryContents = MaybeFile->getBuffer();
4016 
4017   // The order is important between 'offset' and 'limit'; we want to offset
4018   // first and then limit second; otherwise we may reduce the notional resource
4019   // size to something too small to offset into.
4020   if (Params->MaybeOffsetParam) {
4021     // FIXME: just like with the limit() and if_empty() parameters, this loses
4022     // source fidelity in the AST; it has no idea that there was an offset
4023     // involved.
4024     // offsets all the way to the end of the file make for an empty file.
4025     BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
4026   }
4027 
4028   if (Params->MaybeLimitParam) {
4029     // FIXME: just like with the clang::offset() and if_empty() parameters,
4030     // this loses source fidelity in the AST; it has no idea there was a limit
4031     // involved.
4032     BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
4033   }
4034 
4035   if (Callbacks)
4036     Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
4037                               *Params);
4038   // getSpelling() may return a buffer from the token itself or it may use the
4039   // SmallString buffer we provided. getSpelling() may also return a string that
4040   // is actually longer than FilenameTok.getLength(), so we first pass a
4041   // locally created buffer to getSpelling() to get the string of real length
4042   // and then we allocate a long living buffer because the buffer we used
4043   // previously will only live till the end of this function and we need
4044   // filename info to live longer.
4045   void *Mem = BP.Allocate(OriginalFilename.size(), alignof(char *));
4046   memcpy(Mem, OriginalFilename.data(), OriginalFilename.size());
4047   StringRef FilenameToGo =
4048       StringRef(static_cast<char *>(Mem), OriginalFilename.size());
4049   HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents, FilenameToGo);
4050 }
4051