xref: /freebsd/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// Implements # directive processing for the Preprocessor.
11  ///
12  //===----------------------------------------------------------------------===//
13  
14  #include "clang/Basic/CharInfo.h"
15  #include "clang/Basic/DirectoryEntry.h"
16  #include "clang/Basic/FileManager.h"
17  #include "clang/Basic/IdentifierTable.h"
18  #include "clang/Basic/LangOptions.h"
19  #include "clang/Basic/Module.h"
20  #include "clang/Basic/SourceLocation.h"
21  #include "clang/Basic/SourceManager.h"
22  #include "clang/Basic/TargetInfo.h"
23  #include "clang/Basic/TokenKinds.h"
24  #include "clang/Lex/CodeCompletionHandler.h"
25  #include "clang/Lex/HeaderSearch.h"
26  #include "clang/Lex/HeaderSearchOptions.h"
27  #include "clang/Lex/LexDiagnostic.h"
28  #include "clang/Lex/LiteralSupport.h"
29  #include "clang/Lex/MacroInfo.h"
30  #include "clang/Lex/ModuleLoader.h"
31  #include "clang/Lex/ModuleMap.h"
32  #include "clang/Lex/PPCallbacks.h"
33  #include "clang/Lex/Pragma.h"
34  #include "clang/Lex/Preprocessor.h"
35  #include "clang/Lex/PreprocessorOptions.h"
36  #include "clang/Lex/Token.h"
37  #include "clang/Lex/VariadicMacroSupport.h"
38  #include "llvm/ADT/ArrayRef.h"
39  #include "llvm/ADT/STLExtras.h"
40  #include "llvm/ADT/ScopeExit.h"
41  #include "llvm/ADT/SmallString.h"
42  #include "llvm/ADT/SmallVector.h"
43  #include "llvm/ADT/StringExtras.h"
44  #include "llvm/ADT/StringRef.h"
45  #include "llvm/ADT/StringSwitch.h"
46  #include "llvm/Support/AlignOf.h"
47  #include "llvm/Support/ErrorHandling.h"
48  #include "llvm/Support/Path.h"
49  #include "llvm/Support/SaveAndRestore.h"
50  #include <algorithm>
51  #include <cassert>
52  #include <cstring>
53  #include <new>
54  #include <optional>
55  #include <string>
56  #include <utility>
57  
58  using namespace clang;
59  
60  //===----------------------------------------------------------------------===//
61  // Utility Methods for Preprocessor Directive Handling.
62  //===----------------------------------------------------------------------===//
63  
AllocateMacroInfo(SourceLocation L)64  MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
65    static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
66    return new (BP) MacroInfo(L);
67  }
68  
AllocateDefMacroDirective(MacroInfo * MI,SourceLocation Loc)69  DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
70                                                             SourceLocation Loc) {
71    return new (BP) DefMacroDirective(MI, Loc);
72  }
73  
74  UndefMacroDirective *
AllocateUndefMacroDirective(SourceLocation UndefLoc)75  Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
76    return new (BP) UndefMacroDirective(UndefLoc);
77  }
78  
79  VisibilityMacroDirective *
AllocateVisibilityMacroDirective(SourceLocation Loc,bool isPublic)80  Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
81                                                 bool isPublic) {
82    return new (BP) VisibilityMacroDirective(Loc, isPublic);
83  }
84  
85  /// Read and discard all tokens remaining on the current line until
86  /// the tok::eod token is found.
DiscardUntilEndOfDirective(Token & Tmp)87  SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
88    SourceRange Res;
89  
90    LexUnexpandedToken(Tmp);
91    Res.setBegin(Tmp.getLocation());
92    while (Tmp.isNot(tok::eod)) {
93      assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
94      LexUnexpandedToken(Tmp);
95    }
96    Res.setEnd(Tmp.getLocation());
97    return Res;
98  }
99  
100  /// Enumerates possible cases of #define/#undef a reserved identifier.
101  enum MacroDiag {
102    MD_NoWarn,        //> Not a reserved identifier
103    MD_KeywordDef,    //> Macro hides keyword, enabled by default
104    MD_ReservedMacro  //> #define of #undef reserved id, disabled by default
105  };
106  
107  /// Enumerates possible %select values for the pp_err_elif_after_else and
108  /// pp_err_elif_without_if diagnostics.
109  enum PPElifDiag {
110    PED_Elif,
111    PED_Elifdef,
112    PED_Elifndef
113  };
114  
isFeatureTestMacro(StringRef MacroName)115  static bool isFeatureTestMacro(StringRef MacroName) {
116    // list from:
117    // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
118    // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
119    // * man 7 feature_test_macros
120    // The list must be sorted for correct binary search.
121    static constexpr StringRef ReservedMacro[] = {
122        "_ATFILE_SOURCE",
123        "_BSD_SOURCE",
124        "_CRT_NONSTDC_NO_WARNINGS",
125        "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
126        "_CRT_SECURE_NO_WARNINGS",
127        "_FILE_OFFSET_BITS",
128        "_FORTIFY_SOURCE",
129        "_GLIBCXX_ASSERTIONS",
130        "_GLIBCXX_CONCEPT_CHECKS",
131        "_GLIBCXX_DEBUG",
132        "_GLIBCXX_DEBUG_PEDANTIC",
133        "_GLIBCXX_PARALLEL",
134        "_GLIBCXX_PARALLEL_ASSERTIONS",
135        "_GLIBCXX_SANITIZE_VECTOR",
136        "_GLIBCXX_USE_CXX11_ABI",
137        "_GLIBCXX_USE_DEPRECATED",
138        "_GNU_SOURCE",
139        "_ISOC11_SOURCE",
140        "_ISOC95_SOURCE",
141        "_ISOC99_SOURCE",
142        "_LARGEFILE64_SOURCE",
143        "_POSIX_C_SOURCE",
144        "_REENTRANT",
145        "_SVID_SOURCE",
146        "_THREAD_SAFE",
147        "_XOPEN_SOURCE",
148        "_XOPEN_SOURCE_EXTENDED",
149        "__STDCPP_WANT_MATH_SPEC_FUNCS__",
150        "__STDC_FORMAT_MACROS",
151    };
152    return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
153                              MacroName);
154  }
155  
isLanguageDefinedBuiltin(const SourceManager & SourceMgr,const MacroInfo * MI,const StringRef MacroName)156  static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
157                                       const MacroInfo *MI,
158                                       const StringRef MacroName) {
159    // If this is a macro with special handling (like __LINE__) then it's language
160    // defined.
161    if (MI->isBuiltinMacro())
162      return true;
163    // Builtin macros are defined in the builtin file
164    if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
165      return false;
166    // C defines macros starting with __STDC, and C++ defines macros starting with
167    // __STDCPP
168    if (MacroName.starts_with("__STDC"))
169      return true;
170    // C++ defines the __cplusplus macro
171    if (MacroName == "__cplusplus")
172      return true;
173    // C++ defines various feature-test macros starting with __cpp
174    if (MacroName.starts_with("__cpp"))
175      return true;
176    // Anything else isn't language-defined
177    return false;
178  }
179  
shouldWarnOnMacroDef(Preprocessor & PP,IdentifierInfo * II)180  static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
181    const LangOptions &Lang = PP.getLangOpts();
182    StringRef Text = II->getName();
183    if (isReservedInAllContexts(II->isReserved(Lang)))
184      return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
185    if (II->isKeyword(Lang))
186      return MD_KeywordDef;
187    if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
188      return MD_KeywordDef;
189    return MD_NoWarn;
190  }
191  
shouldWarnOnMacroUndef(Preprocessor & PP,IdentifierInfo * II)192  static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
193    const LangOptions &Lang = PP.getLangOpts();
194    // Do not warn on keyword undef.  It is generally harmless and widely used.
195    if (isReservedInAllContexts(II->isReserved(Lang)))
196      return MD_ReservedMacro;
197    return MD_NoWarn;
198  }
199  
200  // Return true if we want to issue a diagnostic by default if we
201  // encounter this name in a #include with the wrong case. For now,
202  // this includes the standard C and C++ headers, Posix headers,
203  // and Boost headers. Improper case for these #includes is a
204  // potential portability issue.
warnByDefaultOnWrongCase(StringRef Include)205  static bool warnByDefaultOnWrongCase(StringRef Include) {
206    // If the first component of the path is "boost", treat this like a standard header
207    // for the purposes of diagnostics.
208    if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
209      return true;
210  
211    // "condition_variable" is the longest standard header name at 18 characters.
212    // If the include file name is longer than that, it can't be a standard header.
213    static const size_t MaxStdHeaderNameLen = 18u;
214    if (Include.size() > MaxStdHeaderNameLen)
215      return false;
216  
217    // Lowercase and normalize the search string.
218    SmallString<32> LowerInclude{Include};
219    for (char &Ch : LowerInclude) {
220      // In the ASCII range?
221      if (static_cast<unsigned char>(Ch) > 0x7f)
222        return false; // Can't be a standard header
223      // ASCII lowercase:
224      if (Ch >= 'A' && Ch <= 'Z')
225        Ch += 'a' - 'A';
226      // Normalize path separators for comparison purposes.
227      else if (::llvm::sys::path::is_separator(Ch))
228        Ch = '/';
229    }
230  
231    // The standard C/C++ and Posix headers
232    return llvm::StringSwitch<bool>(LowerInclude)
233      // C library headers
234      .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
235      .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
236      .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
237      .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)
238      .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
239      .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
240      .Cases("wchar.h", "wctype.h", true)
241  
242      // C++ headers for C library facilities
243      .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
244      .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
245      .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
246      .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
247      .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
248      .Case("cwctype", true)
249  
250      // C++ library headers
251      .Cases("algorithm", "fstream", "list", "regex", "thread", true)
252      .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
253      .Cases("atomic", "future", "map", "set", "type_traits", true)
254      .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
255      .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
256      .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
257      .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
258      .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
259      .Cases("deque", "istream", "queue", "string", "valarray", true)
260      .Cases("exception", "iterator", "random", "strstream", "vector", true)
261      .Cases("forward_list", "limits", "ratio", "system_error", true)
262  
263      // POSIX headers (which aren't also C headers)
264      .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
265      .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
266      .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
267      .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
268      .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
269      .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
270      .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
271      .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
272      .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
273      .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
274      .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
275      .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
276      .Default(false);
277  }
278  
279  /// Find a similar string in `Candidates`.
280  ///
281  /// \param LHS a string for a similar string in `Candidates`
282  ///
283  /// \param Candidates the candidates to find a similar string.
284  ///
285  /// \returns a similar string if exists. If no similar string exists,
286  /// returns std::nullopt.
287  static std::optional<StringRef>
findSimilarStr(StringRef LHS,const std::vector<StringRef> & Candidates)288  findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
289    // We need to check if `Candidates` has the exact case-insensitive string
290    // because the Levenshtein distance match does not care about it.
291    for (StringRef C : Candidates) {
292      if (LHS.equals_insensitive(C)) {
293        return C;
294      }
295    }
296  
297    // Keep going with the Levenshtein distance match.
298    // If the LHS size is less than 3, use the LHS size minus 1 and if not,
299    // use the LHS size divided by 3.
300    size_t Length = LHS.size();
301    size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
302  
303    std::optional<std::pair<StringRef, size_t>> SimilarStr;
304    for (StringRef C : Candidates) {
305      size_t CurDist = LHS.edit_distance(C, true);
306      if (CurDist <= MaxDist) {
307        if (!SimilarStr) {
308          // The first similar string found.
309          SimilarStr = {C, CurDist};
310        } else if (CurDist < SimilarStr->second) {
311          // More similar string found.
312          SimilarStr = {C, CurDist};
313        }
314      }
315    }
316  
317    if (SimilarStr) {
318      return SimilarStr->first;
319    } else {
320      return std::nullopt;
321    }
322  }
323  
CheckMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)324  bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
325                                    bool *ShadowFlag) {
326    // Missing macro name?
327    if (MacroNameTok.is(tok::eod))
328      return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
329  
330    IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
331    if (!II)
332      return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
333  
334    if (II->isCPlusPlusOperatorKeyword()) {
335      // C++ 2.5p2: Alternative tokens behave the same as its primary token
336      // except for their spellings.
337      Diag(MacroNameTok, getLangOpts().MicrosoftExt
338                             ? diag::ext_pp_operator_used_as_macro_name
339                             : diag::err_pp_operator_used_as_macro_name)
340          << II << MacroNameTok.getKind();
341      // Allow #defining |and| and friends for Microsoft compatibility or
342      // recovery when legacy C headers are included in C++.
343    }
344  
345    if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
346      // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
347      return Diag(MacroNameTok, diag::err_defined_macro_name);
348    }
349  
350    // If defining/undefining reserved identifier or a keyword, we need to issue
351    // a warning.
352    SourceLocation MacroNameLoc = MacroNameTok.getLocation();
353    if (ShadowFlag)
354      *ShadowFlag = false;
355    if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
356        (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
357      MacroDiag D = MD_NoWarn;
358      if (isDefineUndef == MU_Define) {
359        D = shouldWarnOnMacroDef(*this, II);
360      }
361      else if (isDefineUndef == MU_Undef)
362        D = shouldWarnOnMacroUndef(*this, II);
363      if (D == MD_KeywordDef) {
364        // We do not want to warn on some patterns widely used in configuration
365        // scripts.  This requires analyzing next tokens, so do not issue warnings
366        // now, only inform caller.
367        if (ShadowFlag)
368          *ShadowFlag = true;
369      }
370      if (D == MD_ReservedMacro)
371        Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
372    }
373  
374    // Okay, we got a good identifier.
375    return false;
376  }
377  
378  /// Lex and validate a macro name, which occurs after a
379  /// \#define or \#undef.
380  ///
381  /// This sets the token kind to eod and discards the rest of the macro line if
382  /// the macro name is invalid.
383  ///
384  /// \param MacroNameTok Token that is expected to be a macro name.
385  /// \param isDefineUndef Context in which macro is used.
386  /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
ReadMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)387  void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
388                                   bool *ShadowFlag) {
389    // Read the token, don't allow macro expansion on it.
390    LexUnexpandedToken(MacroNameTok);
391  
392    if (MacroNameTok.is(tok::code_completion)) {
393      if (CodeComplete)
394        CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
395      setCodeCompletionReached();
396      LexUnexpandedToken(MacroNameTok);
397    }
398  
399    if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
400      return;
401  
402    // Invalid macro name, read and discard the rest of the line and set the
403    // token kind to tok::eod if necessary.
404    if (MacroNameTok.isNot(tok::eod)) {
405      MacroNameTok.setKind(tok::eod);
406      DiscardUntilEndOfDirective();
407    }
408  }
409  
410  /// Ensure that the next token is a tok::eod token.
411  ///
412  /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
413  /// true, then we consider macros that expand to zero tokens as being ok.
414  ///
415  /// Returns the location of the end of the directive.
CheckEndOfDirective(const char * DirType,bool EnableMacros)416  SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
417                                                   bool EnableMacros) {
418    Token Tmp;
419    // Lex unexpanded tokens for most directives: macros might expand to zero
420    // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
421    // #line) allow empty macros.
422    if (EnableMacros)
423      Lex(Tmp);
424    else
425      LexUnexpandedToken(Tmp);
426  
427    // There should be no tokens after the directive, but we allow them as an
428    // extension.
429    while (Tmp.is(tok::comment))  // Skip comments in -C mode.
430      LexUnexpandedToken(Tmp);
431  
432    if (Tmp.is(tok::eod))
433      return Tmp.getLocation();
434  
435    // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
436    // or if this is a macro-style preprocessing directive, because it is more
437    // trouble than it is worth to insert /**/ and check that there is no /**/
438    // in the range also.
439    FixItHint Hint;
440    if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
441        !CurTokenLexer)
442      Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
443    Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
444    return DiscardUntilEndOfDirective().getEnd();
445  }
446  
SuggestTypoedDirective(const Token & Tok,StringRef Directive) const447  void Preprocessor::SuggestTypoedDirective(const Token &Tok,
448                                            StringRef Directive) const {
449    // If this is a `.S` file, treat unknown # directives as non-preprocessor
450    // directives.
451    if (getLangOpts().AsmPreprocessor) return;
452  
453    std::vector<StringRef> Candidates = {
454        "if", "ifdef", "ifndef", "elif", "else", "endif"
455    };
456    if (LangOpts.C23 || LangOpts.CPlusPlus23)
457      Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
458  
459    if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
460      // Directive cannot be coming from macro.
461      assert(Tok.getLocation().isFileID());
462      CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
463          Tok.getLocation(),
464          Tok.getLocation().getLocWithOffset(Directive.size()));
465      StringRef SuggValue = *Sugg;
466  
467      auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
468      Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
469    }
470  }
471  
472  /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
473  /// decided that the subsequent tokens are in the \#if'd out portion of the
474  /// file.  Lex the rest of the file, until we see an \#endif.  If
475  /// FoundNonSkipPortion is true, then we have already emitted code for part of
476  /// this \#if directive, so \#else/\#elif blocks should never be entered.
477  /// If ElseOk is true, then \#else directives are ok, if not, then we have
478  /// already seen one so a \#else directive is a duplicate.  When this returns,
479  /// the caller can lex the first valid token.
SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,SourceLocation IfTokenLoc,bool FoundNonSkipPortion,bool FoundElse,SourceLocation ElseLoc)480  void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
481                                                  SourceLocation IfTokenLoc,
482                                                  bool FoundNonSkipPortion,
483                                                  bool FoundElse,
484                                                  SourceLocation ElseLoc) {
485    // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
486    // not getting called recursively by storing the RecordedSkippedRanges
487    // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
488    // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
489    // invalidated. If this changes and there is a need to call
490    // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
491    // change to do a second lookup in endLexPass function instead of reusing the
492    // lookup pointer.
493    assert(!SkippingExcludedConditionalBlock &&
494           "calling SkipExcludedConditionalBlock recursively");
495    llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
496  
497    ++NumSkipped;
498    assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
499    assert(CurPPLexer && "Conditional PP block must be in a file!");
500    assert(CurLexer && "Conditional PP block but no current lexer set!");
501  
502    if (PreambleConditionalStack.reachedEOFWhileSkipping())
503      PreambleConditionalStack.clearSkipInfo();
504    else
505      CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
506                                       FoundNonSkipPortion, FoundElse);
507  
508    // Enter raw mode to disable identifier lookup (and thus macro expansion),
509    // disabling warnings, etc.
510    CurPPLexer->LexingRawMode = true;
511    Token Tok;
512    SourceLocation endLoc;
513  
514    /// Keeps track and caches skipped ranges and also retrieves a prior skipped
515    /// range if the same block is re-visited.
516    struct SkippingRangeStateTy {
517      Preprocessor &PP;
518  
519      const char *BeginPtr = nullptr;
520      unsigned *SkipRangePtr = nullptr;
521  
522      SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
523  
524      void beginLexPass() {
525        if (BeginPtr)
526          return; // continue skipping a block.
527  
528        // Initiate a skipping block and adjust the lexer if we already skipped it
529        // before.
530        BeginPtr = PP.CurLexer->getBufferLocation();
531        SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
532        if (*SkipRangePtr) {
533          PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
534                            /*IsAtStartOfLine*/ true);
535        }
536      }
537  
538      void endLexPass(const char *Hashptr) {
539        if (!BeginPtr) {
540          // Not doing normal lexing.
541          assert(PP.CurLexer->isDependencyDirectivesLexer());
542          return;
543        }
544  
545        // Finished skipping a block, record the range if it's first time visited.
546        if (!*SkipRangePtr) {
547          *SkipRangePtr = Hashptr - BeginPtr;
548        }
549        assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
550        BeginPtr = nullptr;
551        SkipRangePtr = nullptr;
552      }
553    } SkippingRangeState(*this);
554  
555    while (true) {
556      if (CurLexer->isDependencyDirectivesLexer()) {
557        CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
558      } else {
559        SkippingRangeState.beginLexPass();
560        while (true) {
561          CurLexer->Lex(Tok);
562  
563          if (Tok.is(tok::code_completion)) {
564            setCodeCompletionReached();
565            if (CodeComplete)
566              CodeComplete->CodeCompleteInConditionalExclusion();
567            continue;
568          }
569  
570          // If this is the end of the buffer, we have an error.
571          if (Tok.is(tok::eof)) {
572            // We don't emit errors for unterminated conditionals here,
573            // Lexer::LexEndOfFile can do that properly.
574            // Just return and let the caller lex after this #include.
575            if (PreambleConditionalStack.isRecording())
576              PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
577                                                        FoundNonSkipPortion,
578                                                        FoundElse, ElseLoc);
579            break;
580          }
581  
582          // If this token is not a preprocessor directive, just skip it.
583          if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
584            continue;
585  
586          break;
587        }
588      }
589      if (Tok.is(tok::eof))
590        break;
591  
592      // We just parsed a # character at the start of a line, so we're in
593      // directive mode.  Tell the lexer this so any newlines we see will be
594      // converted into an EOD token (this terminates the macro).
595      CurPPLexer->ParsingPreprocessorDirective = true;
596      if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
597  
598      assert(Tok.is(tok::hash));
599      const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
600      assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
601  
602      // Read the next token, the directive flavor.
603      LexUnexpandedToken(Tok);
604  
605      // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
606      // something bogus), skip it.
607      if (Tok.isNot(tok::raw_identifier)) {
608        CurPPLexer->ParsingPreprocessorDirective = false;
609        // Restore comment saving mode.
610        if (CurLexer) CurLexer->resetExtendedTokenMode();
611        continue;
612      }
613  
614      // If the first letter isn't i or e, it isn't intesting to us.  We know that
615      // this is safe in the face of spelling differences, because there is no way
616      // to spell an i/e in a strange way that is another letter.  Skipping this
617      // allows us to avoid looking up the identifier info for #define/#undef and
618      // other common directives.
619      StringRef RI = Tok.getRawIdentifier();
620  
621      char FirstChar = RI[0];
622      if (FirstChar >= 'a' && FirstChar <= 'z' &&
623          FirstChar != 'i' && FirstChar != 'e') {
624        CurPPLexer->ParsingPreprocessorDirective = false;
625        // Restore comment saving mode.
626        if (CurLexer) CurLexer->resetExtendedTokenMode();
627        continue;
628      }
629  
630      // Get the identifier name without trigraphs or embedded newlines.  Note
631      // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
632      // when skipping.
633      char DirectiveBuf[20];
634      StringRef Directive;
635      if (!Tok.needsCleaning() && RI.size() < 20) {
636        Directive = RI;
637      } else {
638        std::string DirectiveStr = getSpelling(Tok);
639        size_t IdLen = DirectiveStr.size();
640        if (IdLen >= 20) {
641          CurPPLexer->ParsingPreprocessorDirective = false;
642          // Restore comment saving mode.
643          if (CurLexer) CurLexer->resetExtendedTokenMode();
644          continue;
645        }
646        memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
647        Directive = StringRef(DirectiveBuf, IdLen);
648      }
649  
650      if (Directive.starts_with("if")) {
651        StringRef Sub = Directive.substr(2);
652        if (Sub.empty() ||   // "if"
653            Sub == "def" ||   // "ifdef"
654            Sub == "ndef") {  // "ifndef"
655          // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
656          // bother parsing the condition.
657          DiscardUntilEndOfDirective();
658          CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
659                                         /*foundnonskip*/false,
660                                         /*foundelse*/false);
661        } else {
662          SuggestTypoedDirective(Tok, Directive);
663        }
664      } else if (Directive[0] == 'e') {
665        StringRef Sub = Directive.substr(1);
666        if (Sub == "ndif") {  // "endif"
667          PPConditionalInfo CondInfo;
668          CondInfo.WasSkipping = true; // Silence bogus warning.
669          bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
670          (void)InCond;  // Silence warning in no-asserts mode.
671          assert(!InCond && "Can't be skipping if not in a conditional!");
672  
673          // If we popped the outermost skipping block, we're done skipping!
674          if (!CondInfo.WasSkipping) {
675            SkippingRangeState.endLexPass(Hashptr);
676            // Restore the value of LexingRawMode so that trailing comments
677            // are handled correctly, if we've reached the outermost block.
678            CurPPLexer->LexingRawMode = false;
679            endLoc = CheckEndOfDirective("endif");
680            CurPPLexer->LexingRawMode = true;
681            if (Callbacks)
682              Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
683            break;
684          } else {
685            DiscardUntilEndOfDirective();
686          }
687        } else if (Sub == "lse") { // "else".
688          // #else directive in a skipping conditional.  If not in some other
689          // skipping conditional, and if #else hasn't already been seen, enter it
690          // as a non-skipping conditional.
691          PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
692  
693          if (!CondInfo.WasSkipping)
694            SkippingRangeState.endLexPass(Hashptr);
695  
696          // If this is a #else with a #else before it, report the error.
697          if (CondInfo.FoundElse)
698            Diag(Tok, diag::pp_err_else_after_else);
699  
700          // Note that we've seen a #else in this conditional.
701          CondInfo.FoundElse = true;
702  
703          // If the conditional is at the top level, and the #if block wasn't
704          // entered, enter the #else block now.
705          if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
706            CondInfo.FoundNonSkip = true;
707            // Restore the value of LexingRawMode so that trailing comments
708            // are handled correctly.
709            CurPPLexer->LexingRawMode = false;
710            endLoc = CheckEndOfDirective("else");
711            CurPPLexer->LexingRawMode = true;
712            if (Callbacks)
713              Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
714            break;
715          } else {
716            DiscardUntilEndOfDirective();  // C99 6.10p4.
717          }
718        } else if (Sub == "lif") {  // "elif".
719          PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
720  
721          if (!CondInfo.WasSkipping)
722            SkippingRangeState.endLexPass(Hashptr);
723  
724          // If this is a #elif with a #else before it, report the error.
725          if (CondInfo.FoundElse)
726            Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
727  
728          // If this is in a skipping block or if we're already handled this #if
729          // block, don't bother parsing the condition.
730          if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
731            // FIXME: We should probably do at least some minimal parsing of the
732            // condition to verify that it is well-formed. The current state
733            // allows #elif* directives with completely malformed (or missing)
734            // conditions.
735            DiscardUntilEndOfDirective();
736          } else {
737            // Restore the value of LexingRawMode so that identifiers are
738            // looked up, etc, inside the #elif expression.
739            assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
740            CurPPLexer->LexingRawMode = false;
741            IdentifierInfo *IfNDefMacro = nullptr;
742            DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
743            // Stop if Lexer became invalid after hitting code completion token.
744            if (!CurPPLexer)
745              return;
746            const bool CondValue = DER.Conditional;
747            CurPPLexer->LexingRawMode = true;
748            if (Callbacks) {
749              Callbacks->Elif(
750                  Tok.getLocation(), DER.ExprRange,
751                  (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
752                  CondInfo.IfLoc);
753            }
754            // If this condition is true, enter it!
755            if (CondValue) {
756              CondInfo.FoundNonSkip = true;
757              break;
758            }
759          }
760        } else if (Sub == "lifdef" ||  // "elifdef"
761                   Sub == "lifndef") { // "elifndef"
762          bool IsElifDef = Sub == "lifdef";
763          PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
764          Token DirectiveToken = Tok;
765  
766          if (!CondInfo.WasSkipping)
767            SkippingRangeState.endLexPass(Hashptr);
768  
769          // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
770          // if this branch is in a skipping block.
771          unsigned DiagID;
772          if (LangOpts.CPlusPlus)
773            DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
774                                          : diag::ext_cxx23_pp_directive;
775          else
776            DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
777                                  : diag::ext_c23_pp_directive;
778          Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
779  
780          // If this is a #elif with a #else before it, report the error.
781          if (CondInfo.FoundElse)
782            Diag(Tok, diag::pp_err_elif_after_else)
783                << (IsElifDef ? PED_Elifdef : PED_Elifndef);
784  
785          // If this is in a skipping block or if we're already handled this #if
786          // block, don't bother parsing the condition.
787          if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
788            // FIXME: We should probably do at least some minimal parsing of the
789            // condition to verify that it is well-formed. The current state
790            // allows #elif* directives with completely malformed (or missing)
791            // conditions.
792            DiscardUntilEndOfDirective();
793          } else {
794            // Restore the value of LexingRawMode so that identifiers are
795            // looked up, etc, inside the #elif[n]def expression.
796            assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
797            CurPPLexer->LexingRawMode = false;
798            Token MacroNameTok;
799            ReadMacroName(MacroNameTok);
800            CurPPLexer->LexingRawMode = true;
801  
802            // If the macro name token is tok::eod, there was an error that was
803            // already reported.
804            if (MacroNameTok.is(tok::eod)) {
805              // Skip code until we get to #endif.  This helps with recovery by
806              // not emitting an error when the #endif is reached.
807              continue;
808            }
809  
810            emitMacroExpansionWarnings(MacroNameTok);
811  
812            CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
813  
814            IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
815            auto MD = getMacroDefinition(MII);
816            MacroInfo *MI = MD.getMacroInfo();
817  
818            if (Callbacks) {
819              if (IsElifDef) {
820                Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
821                                   MD);
822              } else {
823                Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
824                                    MD);
825              }
826            }
827            // If this condition is true, enter it!
828            if (static_cast<bool>(MI) == IsElifDef) {
829              CondInfo.FoundNonSkip = true;
830              break;
831            }
832          }
833        } else {
834          SuggestTypoedDirective(Tok, Directive);
835        }
836      } else {
837        SuggestTypoedDirective(Tok, Directive);
838      }
839  
840      CurPPLexer->ParsingPreprocessorDirective = false;
841      // Restore comment saving mode.
842      if (CurLexer) CurLexer->resetExtendedTokenMode();
843    }
844  
845    // Finally, if we are out of the conditional (saw an #endif or ran off the end
846    // of the file, just stop skipping and return to lexing whatever came after
847    // the #if block.
848    CurPPLexer->LexingRawMode = false;
849  
850    // The last skipped range isn't actually skipped yet if it's truncated
851    // by the end of the preamble; we'll resume parsing after the preamble.
852    if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
853      Callbacks->SourceRangeSkipped(
854          SourceRange(HashTokenLoc, endLoc.isValid()
855                                        ? endLoc
856                                        : CurPPLexer->getSourceLocation()),
857          Tok.getLocation());
858  }
859  
getModuleForLocation(SourceLocation Loc,bool AllowTextual)860  Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
861                                             bool AllowTextual) {
862    if (!SourceMgr.isInMainFile(Loc)) {
863      // Try to determine the module of the include directive.
864      // FIXME: Look into directly passing the FileEntry from LookupFile instead.
865      FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
866      if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
867        // The include comes from an included file.
868        return HeaderInfo.getModuleMap()
869            .findModuleForHeader(*EntryOfIncl, AllowTextual)
870            .getModule();
871      }
872    }
873  
874    // This is either in the main file or not in a file at all. It belongs
875    // to the current module, if there is one.
876    return getLangOpts().CurrentModule.empty()
877               ? nullptr
878               : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
879  }
880  
881  OptionalFileEntryRef
getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,SourceLocation Loc)882  Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
883                                                 SourceLocation Loc) {
884    Module *IncM = getModuleForLocation(
885        IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
886  
887    // Walk up through the include stack, looking through textual headers of M
888    // until we hit a non-textual header that we can #include. (We assume textual
889    // headers of a module with non-textual headers aren't meant to be used to
890    // import entities from the module.)
891    auto &SM = getSourceManager();
892    while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
893      auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
894      auto FE = SM.getFileEntryRefForID(ID);
895      if (!FE)
896        break;
897  
898      // We want to find all possible modules that might contain this header, so
899      // search all enclosing directories for module maps and load them.
900      HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
901                              SourceMgr.isInSystemHeader(Loc));
902  
903      bool InPrivateHeader = false;
904      for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
905        if (!Header.isAccessibleFrom(IncM)) {
906          // It's in a private header; we can't #include it.
907          // FIXME: If there's a public header in some module that re-exports it,
908          // then we could suggest including that, but it's not clear that's the
909          // expected way to make this entity visible.
910          InPrivateHeader = true;
911          continue;
912        }
913  
914        // Don't suggest explicitly excluded headers.
915        if (Header.getRole() == ModuleMap::ExcludedHeader)
916          continue;
917  
918        // We'll suggest including textual headers below if they're
919        // include-guarded.
920        if (Header.getRole() & ModuleMap::TextualHeader)
921          continue;
922  
923        // If we have a module import syntax, we shouldn't include a header to
924        // make a particular module visible. Let the caller know they should
925        // suggest an import instead.
926        if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
927          return std::nullopt;
928  
929        // If this is an accessible, non-textual header of M's top-level module
930        // that transitively includes the given location and makes the
931        // corresponding module visible, this is the thing to #include.
932        return *FE;
933      }
934  
935      // FIXME: If we're bailing out due to a private header, we shouldn't suggest
936      // an import either.
937      if (InPrivateHeader)
938        return std::nullopt;
939  
940      // If the header is includable and has an include guard, assume the
941      // intended way to expose its contents is by #include, not by importing a
942      // module that transitively includes it.
943      if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
944        return *FE;
945  
946      Loc = SM.getIncludeLoc(ID);
947    }
948  
949    return std::nullopt;
950  }
951  
LookupFile(SourceLocation FilenameLoc,StringRef Filename,bool isAngled,ConstSearchDirIterator FromDir,const FileEntry * FromFile,ConstSearchDirIterator * CurDirArg,SmallVectorImpl<char> * SearchPath,SmallVectorImpl<char> * RelativePath,ModuleMap::KnownHeader * SuggestedModule,bool * IsMapped,bool * IsFrameworkFound,bool SkipCache,bool OpenFile,bool CacheFailures)952  OptionalFileEntryRef Preprocessor::LookupFile(
953      SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
954      ConstSearchDirIterator FromDir, const FileEntry *FromFile,
955      ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
956      SmallVectorImpl<char> *RelativePath,
957      ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
958      bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
959    ConstSearchDirIterator CurDirLocal = nullptr;
960    ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
961  
962    Module *RequestingModule = getModuleForLocation(
963        FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
964  
965    // If the header lookup mechanism may be relative to the current inclusion
966    // stack, record the parent #includes.
967    SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
968    bool BuildSystemModule = false;
969    if (!FromDir && !FromFile) {
970      FileID FID = getCurrentFileLexer()->getFileID();
971      OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
972  
973      // If there is no file entry associated with this file, it must be the
974      // predefines buffer or the module includes buffer. Any other file is not
975      // lexed with a normal lexer, so it won't be scanned for preprocessor
976      // directives.
977      //
978      // If we have the predefines buffer, resolve #include references (which come
979      // from the -include command line argument) from the current working
980      // directory instead of relative to the main file.
981      //
982      // If we have the module includes buffer, resolve #include references (which
983      // come from header declarations in the module map) relative to the module
984      // map file.
985      if (!FileEnt) {
986        if (FID == SourceMgr.getMainFileID() && MainFileDir) {
987          auto IncludeDir =
988              HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
989                  Filename, getCurrentModule())
990                  ? HeaderInfo.getModuleMap().getBuiltinDir()
991                  : MainFileDir;
992          Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
993          BuildSystemModule = getCurrentModule()->IsSystem;
994        } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
995                        SourceMgr.getMainFileID()))) {
996          auto CWD = FileMgr.getOptionalDirectoryRef(".");
997          Includers.push_back(std::make_pair(*FileEnt, *CWD));
998        }
999      } else {
1000        Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1001      }
1002  
1003      // MSVC searches the current include stack from top to bottom for
1004      // headers included by quoted include directives.
1005      // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1006      if (LangOpts.MSVCCompat && !isAngled) {
1007        for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1008          if (IsFileLexer(ISEntry))
1009            if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1010              Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1011        }
1012      }
1013    }
1014  
1015    CurDir = CurDirLookup;
1016  
1017    if (FromFile) {
1018      // We're supposed to start looking from after a particular file. Search
1019      // the include path until we find that file or run out of files.
1020      ConstSearchDirIterator TmpCurDir = CurDir;
1021      ConstSearchDirIterator TmpFromDir = nullptr;
1022      while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1023                 Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1024                 Includers, SearchPath, RelativePath, RequestingModule,
1025                 SuggestedModule, /*IsMapped=*/nullptr,
1026                 /*IsFrameworkFound=*/nullptr, SkipCache)) {
1027        // Keep looking as if this file did a #include_next.
1028        TmpFromDir = TmpCurDir;
1029        ++TmpFromDir;
1030        if (&FE->getFileEntry() == FromFile) {
1031          // Found it.
1032          FromDir = TmpFromDir;
1033          CurDir = TmpCurDir;
1034          break;
1035        }
1036      }
1037    }
1038  
1039    // Do a standard file entry lookup.
1040    OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1041        Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1042        RelativePath, RequestingModule, SuggestedModule, IsMapped,
1043        IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1044    if (FE)
1045      return FE;
1046  
1047    OptionalFileEntryRef CurFileEnt;
1048    // Otherwise, see if this is a subframework header.  If so, this is relative
1049    // to one of the headers on the #include stack.  Walk the list of the current
1050    // headers on the #include stack and pass them to HeaderInfo.
1051    if (IsFileLexer()) {
1052      if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1053        if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1054                Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1055                SuggestedModule)) {
1056          return FE;
1057        }
1058      }
1059    }
1060  
1061    for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1062      if (IsFileLexer(ISEntry)) {
1063        if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1064          if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1065                  Filename, *CurFileEnt, SearchPath, RelativePath,
1066                  RequestingModule, SuggestedModule)) {
1067            return FE;
1068          }
1069        }
1070      }
1071    }
1072  
1073    // Otherwise, we really couldn't find the file.
1074    return std::nullopt;
1075  }
1076  
1077  OptionalFileEntryRef
LookupEmbedFile(StringRef Filename,bool isAngled,bool OpenFile,const FileEntry * LookupFromFile)1078  Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1079                                const FileEntry *LookupFromFile) {
1080    FileManager &FM = this->getFileManager();
1081    if (llvm::sys::path::is_absolute(Filename)) {
1082      // lookup path or immediately fail
1083      llvm::Expected<FileEntryRef> ShouldBeEntry =
1084          FM.getFileRef(Filename, OpenFile);
1085      return llvm::expectedToOptional(std::move(ShouldBeEntry));
1086    }
1087  
1088    auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1089                                 StringRef StartingFrom, StringRef FileName,
1090                                 bool RemoveInitialFileComponentFromLookupPath) {
1091      llvm::sys::path::native(StartingFrom, LookupPath);
1092      if (RemoveInitialFileComponentFromLookupPath)
1093        llvm::sys::path::remove_filename(LookupPath);
1094      if (!LookupPath.empty() &&
1095          !llvm::sys::path::is_separator(LookupPath.back())) {
1096        LookupPath.push_back(llvm::sys::path::get_separator().front());
1097      }
1098      LookupPath.append(FileName.begin(), FileName.end());
1099    };
1100  
1101    // Otherwise, it's search time!
1102    SmallString<512> LookupPath;
1103    // Non-angled lookup
1104    if (!isAngled) {
1105      if (LookupFromFile) {
1106        // Use file-based lookup.
1107        StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1108        if (!FullFileDir.empty()) {
1109          SeparateComponents(LookupPath, FullFileDir, Filename, true);
1110          llvm::Expected<FileEntryRef> ShouldBeEntry =
1111              FM.getFileRef(LookupPath, OpenFile);
1112          if (ShouldBeEntry)
1113            return llvm::expectedToOptional(std::move(ShouldBeEntry));
1114          llvm::consumeError(ShouldBeEntry.takeError());
1115        }
1116      }
1117  
1118      // Otherwise, do working directory lookup.
1119      LookupPath.clear();
1120      auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
1121      if (MaybeWorkingDirEntry) {
1122        DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1123        StringRef WorkingDir = WorkingDirEntry.getName();
1124        if (!WorkingDir.empty()) {
1125          SeparateComponents(LookupPath, WorkingDir, Filename, false);
1126          llvm::Expected<FileEntryRef> ShouldBeEntry =
1127              FM.getFileRef(LookupPath, OpenFile);
1128          if (ShouldBeEntry)
1129            return llvm::expectedToOptional(std::move(ShouldBeEntry));
1130          llvm::consumeError(ShouldBeEntry.takeError());
1131        }
1132      }
1133    }
1134  
1135    for (const auto &Entry : PPOpts->EmbedEntries) {
1136      LookupPath.clear();
1137      SeparateComponents(LookupPath, Entry, Filename, false);
1138      llvm::Expected<FileEntryRef> ShouldBeEntry =
1139          FM.getFileRef(LookupPath, OpenFile);
1140      if (ShouldBeEntry)
1141        return llvm::expectedToOptional(std::move(ShouldBeEntry));
1142      llvm::consumeError(ShouldBeEntry.takeError());
1143    }
1144    return std::nullopt;
1145  }
1146  
1147  //===----------------------------------------------------------------------===//
1148  // Preprocessor Directive Handling.
1149  //===----------------------------------------------------------------------===//
1150  
1151  class Preprocessor::ResetMacroExpansionHelper {
1152  public:
ResetMacroExpansionHelper(Preprocessor * pp)1153    ResetMacroExpansionHelper(Preprocessor *pp)
1154      : PP(pp), save(pp->DisableMacroExpansion) {
1155      if (pp->MacroExpansionInDirectivesOverride)
1156        pp->DisableMacroExpansion = false;
1157    }
1158  
~ResetMacroExpansionHelper()1159    ~ResetMacroExpansionHelper() {
1160      PP->DisableMacroExpansion = save;
1161    }
1162  
1163  private:
1164    Preprocessor *PP;
1165    bool save;
1166  };
1167  
1168  /// Process a directive while looking for the through header or a #pragma
1169  /// hdrstop. The following directives are handled:
1170  /// #include (to check if it is the through header)
1171  /// #define (to warn about macros that don't match the PCH)
1172  /// #pragma (to check for pragma hdrstop).
1173  /// All other directives are completely discarded.
HandleSkippedDirectiveWhileUsingPCH(Token & Result,SourceLocation HashLoc)1174  void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1175                                                         SourceLocation HashLoc) {
1176    if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1177      if (II->getPPKeywordID() == tok::pp_define) {
1178        return HandleDefineDirective(Result,
1179                                     /*ImmediatelyAfterHeaderGuard=*/false);
1180      }
1181      if (SkippingUntilPCHThroughHeader &&
1182          II->getPPKeywordID() == tok::pp_include) {
1183        return HandleIncludeDirective(HashLoc, Result);
1184      }
1185      if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1186        Lex(Result);
1187        auto *II = Result.getIdentifierInfo();
1188        if (II && II->getName() == "hdrstop")
1189          return HandlePragmaHdrstop(Result);
1190      }
1191    }
1192    DiscardUntilEndOfDirective();
1193  }
1194  
1195  /// HandleDirective - This callback is invoked when the lexer sees a # token
1196  /// at the start of a line.  This consumes the directive, modifies the
1197  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1198  /// read is the correct one.
HandleDirective(Token & Result)1199  void Preprocessor::HandleDirective(Token &Result) {
1200    // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1201  
1202    // We just parsed a # character at the start of a line, so we're in directive
1203    // mode.  Tell the lexer this so any newlines we see will be converted into an
1204    // EOD token (which terminates the directive).
1205    CurPPLexer->ParsingPreprocessorDirective = true;
1206    if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1207  
1208    bool ImmediatelyAfterTopLevelIfndef =
1209        CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1210    CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1211  
1212    ++NumDirectives;
1213  
1214    // We are about to read a token.  For the multiple-include optimization FA to
1215    // work, we have to remember if we had read any tokens *before* this
1216    // pp-directive.
1217    bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1218  
1219    // Save the '#' token in case we need to return it later.
1220    Token SavedHash = Result;
1221  
1222    // Read the next token, the directive flavor.  This isn't expanded due to
1223    // C99 6.10.3p8.
1224    LexUnexpandedToken(Result);
1225  
1226    // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1227    //   #define A(x) #x
1228    //   A(abc
1229    //     #warning blah
1230    //   def)
1231    // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1232    // not support this for #include-like directives, since that can result in
1233    // terrible diagnostics, and does not work in GCC.
1234    if (InMacroArgs) {
1235      if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1236        switch (II->getPPKeywordID()) {
1237        case tok::pp_include:
1238        case tok::pp_import:
1239        case tok::pp_include_next:
1240        case tok::pp___include_macros:
1241        case tok::pp_pragma:
1242        case tok::pp_embed:
1243          Diag(Result, diag::err_embedded_directive) << II->getName();
1244          Diag(*ArgMacro, diag::note_macro_expansion_here)
1245              << ArgMacro->getIdentifierInfo();
1246          DiscardUntilEndOfDirective();
1247          return;
1248        default:
1249          break;
1250        }
1251      }
1252      Diag(Result, diag::ext_embedded_directive);
1253    }
1254  
1255    // Temporarily enable macro expansion if set so
1256    // and reset to previous state when returning from this function.
1257    ResetMacroExpansionHelper helper(this);
1258  
1259    if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1260      return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1261  
1262    switch (Result.getKind()) {
1263    case tok::eod:
1264      // Ignore the null directive with regards to the multiple-include
1265      // optimization, i.e. allow the null directive to appear outside of the
1266      // include guard and still enable the multiple-include optimization.
1267      CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1268      return;   // null directive.
1269    case tok::code_completion:
1270      setCodeCompletionReached();
1271      if (CodeComplete)
1272        CodeComplete->CodeCompleteDirective(
1273                                      CurPPLexer->getConditionalStackDepth() > 0);
1274      return;
1275    case tok::numeric_constant:  // # 7  GNU line marker directive.
1276      // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1277      // directive. However do permit it in the predefines file, as we use line
1278      // markers to mark the builtin macros as being in a system header.
1279      if (getLangOpts().AsmPreprocessor &&
1280          SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1281        break;
1282      return HandleDigitDirective(Result);
1283    default:
1284      IdentifierInfo *II = Result.getIdentifierInfo();
1285      if (!II) break; // Not an identifier.
1286  
1287      // Ask what the preprocessor keyword ID is.
1288      switch (II->getPPKeywordID()) {
1289      default: break;
1290      // C99 6.10.1 - Conditional Inclusion.
1291      case tok::pp_if:
1292        return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1293      case tok::pp_ifdef:
1294        return HandleIfdefDirective(Result, SavedHash, false,
1295                                    true /*not valid for miopt*/);
1296      case tok::pp_ifndef:
1297        return HandleIfdefDirective(Result, SavedHash, true,
1298                                    ReadAnyTokensBeforeDirective);
1299      case tok::pp_elif:
1300      case tok::pp_elifdef:
1301      case tok::pp_elifndef:
1302        return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1303  
1304      case tok::pp_else:
1305        return HandleElseDirective(Result, SavedHash);
1306      case tok::pp_endif:
1307        return HandleEndifDirective(Result);
1308  
1309      // C99 6.10.2 - Source File Inclusion.
1310      case tok::pp_include:
1311        // Handle #include.
1312        return HandleIncludeDirective(SavedHash.getLocation(), Result);
1313      case tok::pp___include_macros:
1314        // Handle -imacros.
1315        return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1316  
1317      // C99 6.10.3 - Macro Replacement.
1318      case tok::pp_define:
1319        return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1320      case tok::pp_undef:
1321        return HandleUndefDirective();
1322  
1323      // C99 6.10.4 - Line Control.
1324      case tok::pp_line:
1325        return HandleLineDirective();
1326  
1327      // C99 6.10.5 - Error Directive.
1328      case tok::pp_error:
1329        return HandleUserDiagnosticDirective(Result, false);
1330  
1331      // C99 6.10.6 - Pragma Directive.
1332      case tok::pp_pragma:
1333        return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1334  
1335      // GNU Extensions.
1336      case tok::pp_import:
1337        return HandleImportDirective(SavedHash.getLocation(), Result);
1338      case tok::pp_include_next:
1339        return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1340  
1341      case tok::pp_warning:
1342        if (LangOpts.CPlusPlus)
1343          Diag(Result, LangOpts.CPlusPlus23
1344                           ? diag::warn_cxx23_compat_warning_directive
1345                           : diag::ext_pp_warning_directive)
1346              << /*C++23*/ 1;
1347        else
1348          Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1349                                    : diag::ext_pp_warning_directive)
1350              << /*C23*/ 0;
1351  
1352        return HandleUserDiagnosticDirective(Result, true);
1353      case tok::pp_ident:
1354        return HandleIdentSCCSDirective(Result);
1355      case tok::pp_sccs:
1356        return HandleIdentSCCSDirective(Result);
1357      case tok::pp_embed:
1358        return HandleEmbedDirective(SavedHash.getLocation(), Result,
1359                                    getCurrentFileLexer()
1360                                        ? *getCurrentFileLexer()->getFileEntry()
1361                                        : static_cast<FileEntry *>(nullptr));
1362      case tok::pp_assert:
1363        //isExtension = true;  // FIXME: implement #assert
1364        break;
1365      case tok::pp_unassert:
1366        //isExtension = true;  // FIXME: implement #unassert
1367        break;
1368  
1369      case tok::pp___public_macro:
1370        if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1371          return HandleMacroPublicDirective(Result);
1372        break;
1373  
1374      case tok::pp___private_macro:
1375        if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1376          return HandleMacroPrivateDirective();
1377        break;
1378      }
1379      break;
1380    }
1381  
1382    // If this is a .S file, treat unknown # directives as non-preprocessor
1383    // directives.  This is important because # may be a comment or introduce
1384    // various pseudo-ops.  Just return the # token and push back the following
1385    // token to be lexed next time.
1386    if (getLangOpts().AsmPreprocessor) {
1387      auto Toks = std::make_unique<Token[]>(2);
1388      // Return the # and the token after it.
1389      Toks[0] = SavedHash;
1390      Toks[1] = Result;
1391  
1392      // If the second token is a hashhash token, then we need to translate it to
1393      // unknown so the token lexer doesn't try to perform token pasting.
1394      if (Result.is(tok::hashhash))
1395        Toks[1].setKind(tok::unknown);
1396  
1397      // Enter this token stream so that we re-lex the tokens.  Make sure to
1398      // enable macro expansion, in case the token after the # is an identifier
1399      // that is expanded.
1400      EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1401      return;
1402    }
1403  
1404    // If we reached here, the preprocessing token is not valid!
1405    // Start suggesting if a similar directive found.
1406    Diag(Result, diag::err_pp_invalid_directive) << 0;
1407  
1408    // Read the rest of the PP line.
1409    DiscardUntilEndOfDirective();
1410  
1411    // Okay, we're done parsing the directive.
1412  }
1413  
1414  /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1415  /// Diagnostic DiagID if it is invalid, and returning the value in Val.
GetLineValue(Token & DigitTok,unsigned & Val,unsigned DiagID,Preprocessor & PP,bool IsGNULineDirective=false)1416  static bool GetLineValue(Token &DigitTok, unsigned &Val,
1417                           unsigned DiagID, Preprocessor &PP,
1418                           bool IsGNULineDirective=false) {
1419    if (DigitTok.isNot(tok::numeric_constant)) {
1420      PP.Diag(DigitTok, DiagID);
1421  
1422      if (DigitTok.isNot(tok::eod))
1423        PP.DiscardUntilEndOfDirective();
1424      return true;
1425    }
1426  
1427    SmallString<64> IntegerBuffer;
1428    IntegerBuffer.resize(DigitTok.getLength());
1429    const char *DigitTokBegin = &IntegerBuffer[0];
1430    bool Invalid = false;
1431    unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1432    if (Invalid)
1433      return true;
1434  
1435    // Verify that we have a simple digit-sequence, and compute the value.  This
1436    // is always a simple digit string computed in decimal, so we do this manually
1437    // here.
1438    Val = 0;
1439    for (unsigned i = 0; i != ActualLength; ++i) {
1440      // C++1y [lex.fcon]p1:
1441      //   Optional separating single quotes in a digit-sequence are ignored
1442      if (DigitTokBegin[i] == '\'')
1443        continue;
1444  
1445      if (!isDigit(DigitTokBegin[i])) {
1446        PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1447                diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1448        PP.DiscardUntilEndOfDirective();
1449        return true;
1450      }
1451  
1452      unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1453      if (NextVal < Val) { // overflow.
1454        PP.Diag(DigitTok, DiagID);
1455        PP.DiscardUntilEndOfDirective();
1456        return true;
1457      }
1458      Val = NextVal;
1459    }
1460  
1461    if (DigitTokBegin[0] == '0' && Val)
1462      PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1463        << IsGNULineDirective;
1464  
1465    return false;
1466  }
1467  
1468  /// Handle a \#line directive: C99 6.10.4.
1469  ///
1470  /// The two acceptable forms are:
1471  /// \verbatim
1472  ///   # line digit-sequence
1473  ///   # line digit-sequence "s-char-sequence"
1474  /// \endverbatim
HandleLineDirective()1475  void Preprocessor::HandleLineDirective() {
1476    // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1477    // expanded.
1478    Token DigitTok;
1479    Lex(DigitTok);
1480  
1481    // Validate the number and convert it to an unsigned.
1482    unsigned LineNo;
1483    if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1484      return;
1485  
1486    if (LineNo == 0)
1487      Diag(DigitTok, diag::ext_pp_line_zero);
1488  
1489    // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1490    // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1491    unsigned LineLimit = 32768U;
1492    if (LangOpts.C99 || LangOpts.CPlusPlus11)
1493      LineLimit = 2147483648U;
1494    if (LineNo >= LineLimit)
1495      Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1496    else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1497      Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1498  
1499    int FilenameID = -1;
1500    Token StrTok;
1501    Lex(StrTok);
1502  
1503    // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1504    // string followed by eod.
1505    if (StrTok.is(tok::eod))
1506      ; // ok
1507    else if (StrTok.isNot(tok::string_literal)) {
1508      Diag(StrTok, diag::err_pp_line_invalid_filename);
1509      DiscardUntilEndOfDirective();
1510      return;
1511    } else if (StrTok.hasUDSuffix()) {
1512      Diag(StrTok, diag::err_invalid_string_udl);
1513      DiscardUntilEndOfDirective();
1514      return;
1515    } else {
1516      // Parse and validate the string, converting it into a unique ID.
1517      StringLiteralParser Literal(StrTok, *this);
1518      assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1519      if (Literal.hadError) {
1520        DiscardUntilEndOfDirective();
1521        return;
1522      }
1523      if (Literal.Pascal) {
1524        Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1525        DiscardUntilEndOfDirective();
1526        return;
1527      }
1528      FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1529  
1530      // Verify that there is nothing after the string, other than EOD.  Because
1531      // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1532      CheckEndOfDirective("line", true);
1533    }
1534  
1535    // Take the file kind of the file containing the #line directive. #line
1536    // directives are often used for generated sources from the same codebase, so
1537    // the new file should generally be classified the same way as the current
1538    // file. This is visible in GCC's pre-processed output, which rewrites #line
1539    // to GNU line markers.
1540    SrcMgr::CharacteristicKind FileKind =
1541        SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1542  
1543    SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1544                          false, FileKind);
1545  
1546    if (Callbacks)
1547      Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1548                             PPCallbacks::RenameFile, FileKind);
1549  }
1550  
1551  /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1552  /// marker directive.
ReadLineMarkerFlags(bool & IsFileEntry,bool & IsFileExit,SrcMgr::CharacteristicKind & FileKind,Preprocessor & PP)1553  static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1554                                  SrcMgr::CharacteristicKind &FileKind,
1555                                  Preprocessor &PP) {
1556    unsigned FlagVal;
1557    Token FlagTok;
1558    PP.Lex(FlagTok);
1559    if (FlagTok.is(tok::eod)) return false;
1560    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1561      return true;
1562  
1563    if (FlagVal == 1) {
1564      IsFileEntry = true;
1565  
1566      PP.Lex(FlagTok);
1567      if (FlagTok.is(tok::eod)) return false;
1568      if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1569        return true;
1570    } else if (FlagVal == 2) {
1571      IsFileExit = true;
1572  
1573      SourceManager &SM = PP.getSourceManager();
1574      // If we are leaving the current presumed file, check to make sure the
1575      // presumed include stack isn't empty!
1576      FileID CurFileID =
1577        SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1578      PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1579      if (PLoc.isInvalid())
1580        return true;
1581  
1582      // If there is no include loc (main file) or if the include loc is in a
1583      // different physical file, then we aren't in a "1" line marker flag region.
1584      SourceLocation IncLoc = PLoc.getIncludeLoc();
1585      if (IncLoc.isInvalid() ||
1586          SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1587        PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1588        PP.DiscardUntilEndOfDirective();
1589        return true;
1590      }
1591  
1592      PP.Lex(FlagTok);
1593      if (FlagTok.is(tok::eod)) return false;
1594      if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1595        return true;
1596    }
1597  
1598    // We must have 3 if there are still flags.
1599    if (FlagVal != 3) {
1600      PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1601      PP.DiscardUntilEndOfDirective();
1602      return true;
1603    }
1604  
1605    FileKind = SrcMgr::C_System;
1606  
1607    PP.Lex(FlagTok);
1608    if (FlagTok.is(tok::eod)) return false;
1609    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1610      return true;
1611  
1612    // We must have 4 if there is yet another flag.
1613    if (FlagVal != 4) {
1614      PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1615      PP.DiscardUntilEndOfDirective();
1616      return true;
1617    }
1618  
1619    FileKind = SrcMgr::C_ExternCSystem;
1620  
1621    PP.Lex(FlagTok);
1622    if (FlagTok.is(tok::eod)) return false;
1623  
1624    // There are no more valid flags here.
1625    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1626    PP.DiscardUntilEndOfDirective();
1627    return true;
1628  }
1629  
1630  /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1631  /// one of the following forms:
1632  ///
1633  ///     # 42
1634  ///     # 42 "file" ('1' | '2')?
1635  ///     # 42 "file" ('1' | '2')? '3' '4'?
1636  ///
HandleDigitDirective(Token & DigitTok)1637  void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1638    // Validate the number and convert it to an unsigned.  GNU does not have a
1639    // line # limit other than it fit in 32-bits.
1640    unsigned LineNo;
1641    if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1642                     *this, true))
1643      return;
1644  
1645    Token StrTok;
1646    Lex(StrTok);
1647  
1648    bool IsFileEntry = false, IsFileExit = false;
1649    int FilenameID = -1;
1650    SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1651  
1652    // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1653    // string followed by eod.
1654    if (StrTok.is(tok::eod)) {
1655      Diag(StrTok, diag::ext_pp_gnu_line_directive);
1656      // Treat this like "#line NN", which doesn't change file characteristics.
1657      FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1658    } else if (StrTok.isNot(tok::string_literal)) {
1659      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1660      DiscardUntilEndOfDirective();
1661      return;
1662    } else if (StrTok.hasUDSuffix()) {
1663      Diag(StrTok, diag::err_invalid_string_udl);
1664      DiscardUntilEndOfDirective();
1665      return;
1666    } else {
1667      // Parse and validate the string, converting it into a unique ID.
1668      StringLiteralParser Literal(StrTok, *this);
1669      assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1670      if (Literal.hadError) {
1671        DiscardUntilEndOfDirective();
1672        return;
1673      }
1674      if (Literal.Pascal) {
1675        Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1676        DiscardUntilEndOfDirective();
1677        return;
1678      }
1679  
1680      // If a filename was present, read any flags that are present.
1681      if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1682        return;
1683      if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1684          !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1685        Diag(StrTok, diag::ext_pp_gnu_line_directive);
1686  
1687      // Exiting to an empty string means pop to the including file, so leave
1688      // FilenameID as -1 in that case.
1689      if (!(IsFileExit && Literal.GetString().empty()))
1690        FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1691    }
1692  
1693    // Create a line note with this information.
1694    SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1695                          IsFileExit, FileKind);
1696  
1697    // If the preprocessor has callbacks installed, notify them of the #line
1698    // change.  This is used so that the line marker comes out in -E mode for
1699    // example.
1700    if (Callbacks) {
1701      PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1702      if (IsFileEntry)
1703        Reason = PPCallbacks::EnterFile;
1704      else if (IsFileExit)
1705        Reason = PPCallbacks::ExitFile;
1706  
1707      Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1708    }
1709  }
1710  
1711  /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1712  ///
HandleUserDiagnosticDirective(Token & Tok,bool isWarning)1713  void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1714                                                   bool isWarning) {
1715    // Read the rest of the line raw.  We do this because we don't want macros
1716    // to be expanded and we don't require that the tokens be valid preprocessing
1717    // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1718    // collapse multiple consecutive white space between tokens, but this isn't
1719    // specified by the standard.
1720    SmallString<128> Message;
1721    CurLexer->ReadToEndOfLine(&Message);
1722  
1723    // Find the first non-whitespace character, so that we can make the
1724    // diagnostic more succinct.
1725    StringRef Msg = Message.str().ltrim(' ');
1726  
1727    if (isWarning)
1728      Diag(Tok, diag::pp_hash_warning) << Msg;
1729    else
1730      Diag(Tok, diag::err_pp_hash_error) << Msg;
1731  }
1732  
1733  /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1734  ///
HandleIdentSCCSDirective(Token & Tok)1735  void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1736    // Yes, this directive is an extension.
1737    Diag(Tok, diag::ext_pp_ident_directive);
1738  
1739    // Read the string argument.
1740    Token StrTok;
1741    Lex(StrTok);
1742  
1743    // If the token kind isn't a string, it's a malformed directive.
1744    if (StrTok.isNot(tok::string_literal) &&
1745        StrTok.isNot(tok::wide_string_literal)) {
1746      Diag(StrTok, diag::err_pp_malformed_ident);
1747      if (StrTok.isNot(tok::eod))
1748        DiscardUntilEndOfDirective();
1749      return;
1750    }
1751  
1752    if (StrTok.hasUDSuffix()) {
1753      Diag(StrTok, diag::err_invalid_string_udl);
1754      DiscardUntilEndOfDirective();
1755      return;
1756    }
1757  
1758    // Verify that there is nothing after the string, other than EOD.
1759    CheckEndOfDirective("ident");
1760  
1761    if (Callbacks) {
1762      bool Invalid = false;
1763      std::string Str = getSpelling(StrTok, &Invalid);
1764      if (!Invalid)
1765        Callbacks->Ident(Tok.getLocation(), Str);
1766    }
1767  }
1768  
1769  /// Handle a #public directive.
HandleMacroPublicDirective(Token & Tok)1770  void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1771    Token MacroNameTok;
1772    ReadMacroName(MacroNameTok, MU_Undef);
1773  
1774    // Error reading macro name?  If so, diagnostic already issued.
1775    if (MacroNameTok.is(tok::eod))
1776      return;
1777  
1778    // Check to see if this is the last token on the #__public_macro line.
1779    CheckEndOfDirective("__public_macro");
1780  
1781    IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1782    // Okay, we finally have a valid identifier to undef.
1783    MacroDirective *MD = getLocalMacroDirective(II);
1784  
1785    // If the macro is not defined, this is an error.
1786    if (!MD) {
1787      Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1788      return;
1789    }
1790  
1791    // Note that this macro has now been exported.
1792    appendMacroDirective(II, AllocateVisibilityMacroDirective(
1793                                  MacroNameTok.getLocation(), /*isPublic=*/true));
1794  }
1795  
1796  /// Handle a #private directive.
HandleMacroPrivateDirective()1797  void Preprocessor::HandleMacroPrivateDirective() {
1798    Token MacroNameTok;
1799    ReadMacroName(MacroNameTok, MU_Undef);
1800  
1801    // Error reading macro name?  If so, diagnostic already issued.
1802    if (MacroNameTok.is(tok::eod))
1803      return;
1804  
1805    // Check to see if this is the last token on the #__private_macro line.
1806    CheckEndOfDirective("__private_macro");
1807  
1808    IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1809    // Okay, we finally have a valid identifier to undef.
1810    MacroDirective *MD = getLocalMacroDirective(II);
1811  
1812    // If the macro is not defined, this is an error.
1813    if (!MD) {
1814      Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1815      return;
1816    }
1817  
1818    // Note that this macro has now been marked private.
1819    appendMacroDirective(II, AllocateVisibilityMacroDirective(
1820                                 MacroNameTok.getLocation(), /*isPublic=*/false));
1821  }
1822  
1823  //===----------------------------------------------------------------------===//
1824  // Preprocessor Include Directive Handling.
1825  //===----------------------------------------------------------------------===//
1826  
1827  /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1828  /// checked and spelled filename, e.g. as an operand of \#include. This returns
1829  /// true if the input filename was in <>'s or false if it were in ""'s.  The
1830  /// caller is expected to provide a buffer that is large enough to hold the
1831  /// spelling of the filename, but is also expected to handle the case when
1832  /// this method decides to use a different buffer.
GetIncludeFilenameSpelling(SourceLocation Loc,StringRef & Buffer)1833  bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1834                                                StringRef &Buffer) {
1835    // Get the text form of the filename.
1836    assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1837  
1838    // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1839    // C++20 [lex.header]/2:
1840    //
1841    // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1842    //   in C: behavior is undefined
1843    //   in C++: program is conditionally-supported with implementation-defined
1844    //           semantics
1845  
1846    // Make sure the filename is <x> or "x".
1847    bool isAngled;
1848    if (Buffer[0] == '<') {
1849      if (Buffer.back() != '>') {
1850        Diag(Loc, diag::err_pp_expects_filename);
1851        Buffer = StringRef();
1852        return true;
1853      }
1854      isAngled = true;
1855    } else if (Buffer[0] == '"') {
1856      if (Buffer.back() != '"') {
1857        Diag(Loc, diag::err_pp_expects_filename);
1858        Buffer = StringRef();
1859        return true;
1860      }
1861      isAngled = false;
1862    } else {
1863      Diag(Loc, diag::err_pp_expects_filename);
1864      Buffer = StringRef();
1865      return true;
1866    }
1867  
1868    // Diagnose #include "" as invalid.
1869    if (Buffer.size() <= 2) {
1870      Diag(Loc, diag::err_pp_empty_filename);
1871      Buffer = StringRef();
1872      return true;
1873    }
1874  
1875    // Skip the brackets.
1876    Buffer = Buffer.substr(1, Buffer.size()-2);
1877    return isAngled;
1878  }
1879  
1880  /// Push a token onto the token stream containing an annotation.
EnterAnnotationToken(SourceRange Range,tok::TokenKind Kind,void * AnnotationVal)1881  void Preprocessor::EnterAnnotationToken(SourceRange Range,
1882                                          tok::TokenKind Kind,
1883                                          void *AnnotationVal) {
1884    // FIXME: Produce this as the current token directly, rather than
1885    // allocating a new token for it.
1886    auto Tok = std::make_unique<Token[]>(1);
1887    Tok[0].startToken();
1888    Tok[0].setKind(Kind);
1889    Tok[0].setLocation(Range.getBegin());
1890    Tok[0].setAnnotationEndLoc(Range.getEnd());
1891    Tok[0].setAnnotationValue(AnnotationVal);
1892    EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1893  }
1894  
1895  /// Produce a diagnostic informing the user that a #include or similar
1896  /// was implicitly treated as a module import.
diagnoseAutoModuleImport(Preprocessor & PP,SourceLocation HashLoc,Token & IncludeTok,ArrayRef<std::pair<IdentifierInfo *,SourceLocation>> Path,SourceLocation PathEnd)1897  static void diagnoseAutoModuleImport(
1898      Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1899      ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1900      SourceLocation PathEnd) {
1901    SmallString<128> PathString;
1902    for (size_t I = 0, N = Path.size(); I != N; ++I) {
1903      if (I)
1904        PathString += '.';
1905      PathString += Path[I].first->getName();
1906    }
1907  
1908    int IncludeKind = 0;
1909    switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1910    case tok::pp_include:
1911      IncludeKind = 0;
1912      break;
1913  
1914    case tok::pp_import:
1915      IncludeKind = 1;
1916      break;
1917  
1918    case tok::pp_include_next:
1919      IncludeKind = 2;
1920      break;
1921  
1922    case tok::pp___include_macros:
1923      IncludeKind = 3;
1924      break;
1925  
1926    default:
1927      llvm_unreachable("unknown include directive kind");
1928    }
1929  
1930    PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1931        << IncludeKind << PathString;
1932  }
1933  
1934  // Given a vector of path components and a string containing the real
1935  // path to the file, build a properly-cased replacement in the vector,
1936  // and return true if the replacement should be suggested.
trySimplifyPath(SmallVectorImpl<StringRef> & Components,StringRef RealPathName,llvm::sys::path::Style Separator)1937  static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1938                              StringRef RealPathName,
1939                              llvm::sys::path::Style Separator) {
1940    auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1941    auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1942    int Cnt = 0;
1943    bool SuggestReplacement = false;
1944  
1945    auto IsSep = [Separator](StringRef Component) {
1946      return Component.size() == 1 &&
1947             llvm::sys::path::is_separator(Component[0], Separator);
1948    };
1949  
1950    // Below is a best-effort to handle ".." in paths. It is admittedly
1951    // not 100% correct in the presence of symlinks.
1952    for (auto &Component : llvm::reverse(Components)) {
1953      if ("." == Component) {
1954      } else if (".." == Component) {
1955        ++Cnt;
1956      } else if (Cnt) {
1957        --Cnt;
1958      } else if (RealPathComponentIter != RealPathComponentEnd) {
1959        if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1960            Component != *RealPathComponentIter) {
1961          // If these non-separator path components differ by more than just case,
1962          // then we may be looking at symlinked paths. Bail on this diagnostic to
1963          // avoid noisy false positives.
1964          SuggestReplacement =
1965              RealPathComponentIter->equals_insensitive(Component);
1966          if (!SuggestReplacement)
1967            break;
1968          Component = *RealPathComponentIter;
1969        }
1970        ++RealPathComponentIter;
1971      }
1972    }
1973    return SuggestReplacement;
1974  }
1975  
checkModuleIsAvailable(const LangOptions & LangOpts,const TargetInfo & TargetInfo,const Module & M,DiagnosticsEngine & Diags)1976  bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1977                                            const TargetInfo &TargetInfo,
1978                                            const Module &M,
1979                                            DiagnosticsEngine &Diags) {
1980    Module::Requirement Requirement;
1981    Module::UnresolvedHeaderDirective MissingHeader;
1982    Module *ShadowingModule = nullptr;
1983    if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1984                      ShadowingModule))
1985      return false;
1986  
1987    if (MissingHeader.FileNameLoc.isValid()) {
1988      Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1989          << MissingHeader.IsUmbrella << MissingHeader.FileName;
1990    } else if (ShadowingModule) {
1991      Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
1992      Diags.Report(ShadowingModule->DefinitionLoc,
1993                   diag::note_previous_definition);
1994    } else {
1995      // FIXME: Track the location at which the requirement was specified, and
1996      // use it here.
1997      Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
1998          << M.getFullModuleName() << Requirement.RequiredState
1999          << Requirement.FeatureName;
2000    }
2001    return true;
2002  }
2003  
2004  std::pair<ConstSearchDirIterator, const FileEntry *>
getIncludeNextStart(const Token & IncludeNextTok) const2005  Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2006    // #include_next is like #include, except that we start searching after
2007    // the current found directory.  If we can't do this, issue a
2008    // diagnostic.
2009    ConstSearchDirIterator Lookup = CurDirLookup;
2010    const FileEntry *LookupFromFile = nullptr;
2011  
2012    if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2013      // If the main file is a header, then it's either for PCH/AST generation,
2014      // or libclang opened it. Either way, handle it as a normal include below
2015      // and do not complain about include_next.
2016    } else if (isInPrimaryFile()) {
2017      Lookup = nullptr;
2018      Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2019    } else if (CurLexerSubmodule) {
2020      // Start looking up in the directory *after* the one in which the current
2021      // file would be found, if any.
2022      assert(CurPPLexer && "#include_next directive in macro?");
2023      if (auto FE = CurPPLexer->getFileEntry())
2024        LookupFromFile = *FE;
2025      Lookup = nullptr;
2026    } else if (!Lookup) {
2027      // The current file was not found by walking the include path. Either it
2028      // is the primary file (handled above), or it was found by absolute path,
2029      // or it was found relative to such a file.
2030      // FIXME: Track enough information so we know which case we're in.
2031      Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2032    } else {
2033      // Start looking up in the next directory.
2034      ++Lookup;
2035    }
2036  
2037    return {Lookup, LookupFromFile};
2038  }
2039  
2040  /// HandleIncludeDirective - The "\#include" tokens have just been read, read
2041  /// the file to be included from the lexer, then include it!  This is a common
2042  /// routine with functionality shared between \#include, \#include_next and
2043  /// \#import.  LookupFrom is set when this is a \#include_next directive, it
2044  /// specifies the file to start searching from.
HandleIncludeDirective(SourceLocation HashLoc,Token & IncludeTok,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2045  void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2046                                            Token &IncludeTok,
2047                                            ConstSearchDirIterator LookupFrom,
2048                                            const FileEntry *LookupFromFile) {
2049    Token FilenameTok;
2050    if (LexHeaderName(FilenameTok))
2051      return;
2052  
2053    if (FilenameTok.isNot(tok::header_name)) {
2054      Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
2055      if (FilenameTok.isNot(tok::eod))
2056        DiscardUntilEndOfDirective();
2057      return;
2058    }
2059  
2060    // Verify that there is nothing after the filename, other than EOD.  Note
2061    // that we allow macros that expand to nothing after the filename, because
2062    // this falls into the category of "#include pp-tokens new-line" specified
2063    // in C99 6.10.2p4.
2064    SourceLocation EndLoc =
2065        CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
2066  
2067    auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2068                                              EndLoc, LookupFrom, LookupFromFile);
2069    switch (Action.Kind) {
2070    case ImportAction::None:
2071    case ImportAction::SkippedModuleImport:
2072      break;
2073    case ImportAction::ModuleBegin:
2074      EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2075                           tok::annot_module_begin, Action.ModuleForHeader);
2076      break;
2077    case ImportAction::HeaderUnitImport:
2078      EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2079                           Action.ModuleForHeader);
2080      break;
2081    case ImportAction::ModuleImport:
2082      EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2083                           tok::annot_module_include, Action.ModuleForHeader);
2084      break;
2085    case ImportAction::Failure:
2086      assert(TheModuleLoader.HadFatalFailure &&
2087             "This should be an early exit only to a fatal error");
2088      TheModuleLoader.HadFatalFailure = true;
2089      IncludeTok.setKind(tok::eof);
2090      CurLexer->cutOffLexing();
2091      return;
2092    }
2093  }
2094  
LookupHeaderIncludeOrImport(ConstSearchDirIterator * CurDir,StringRef & Filename,SourceLocation FilenameLoc,CharSourceRange FilenameRange,const Token & FilenameTok,bool & IsFrameworkFound,bool IsImportDecl,bool & IsMapped,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile,StringRef & LookupFilename,SmallVectorImpl<char> & RelativePath,SmallVectorImpl<char> & SearchPath,ModuleMap::KnownHeader & SuggestedModule,bool isAngled)2095  OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2096      ConstSearchDirIterator *CurDir, StringRef &Filename,
2097      SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2098      const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2099      bool &IsMapped, ConstSearchDirIterator LookupFrom,
2100      const FileEntry *LookupFromFile, StringRef &LookupFilename,
2101      SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2102      ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2103    auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2104      if (LangOpts.AsmPreprocessor)
2105        return;
2106  
2107      Module *RequestingModule = getModuleForLocation(
2108          FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2109      bool RequestingModuleIsModuleInterface =
2110          !SourceMgr.isInMainFile(FilenameLoc);
2111  
2112      HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2113          RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2114          Filename, FE);
2115    };
2116  
2117    OptionalFileEntryRef File = LookupFile(
2118        FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2119        Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2120        &SuggestedModule, &IsMapped, &IsFrameworkFound);
2121    if (File) {
2122      DiagnoseHeaderInclusion(*File);
2123      return File;
2124    }
2125  
2126    // Give the clients a chance to silently skip this include.
2127    if (Callbacks && Callbacks->FileNotFound(Filename))
2128      return std::nullopt;
2129  
2130    if (SuppressIncludeNotFoundError)
2131      return std::nullopt;
2132  
2133    // If the file could not be located and it was included via angle
2134    // brackets, we can attempt a lookup as though it were a quoted path to
2135    // provide the user with a possible fixit.
2136    if (isAngled) {
2137      OptionalFileEntryRef File = LookupFile(
2138          FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2139          Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2140          &SuggestedModule, &IsMapped,
2141          /*IsFrameworkFound=*/nullptr);
2142      if (File) {
2143        DiagnoseHeaderInclusion(*File);
2144        Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2145            << Filename << IsImportDecl
2146            << FixItHint::CreateReplacement(FilenameRange,
2147                                            "\"" + Filename.str() + "\"");
2148        return File;
2149      }
2150    }
2151  
2152    // Check for likely typos due to leading or trailing non-isAlphanumeric
2153    // characters
2154    StringRef OriginalFilename = Filename;
2155    if (LangOpts.SpellChecking) {
2156      // A heuristic to correct a typo file name by removing leading and
2157      // trailing non-isAlphanumeric characters.
2158      auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2159        Filename = Filename.drop_until(isAlphanumeric);
2160        while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2161          Filename = Filename.drop_back();
2162        }
2163        return Filename;
2164      };
2165      StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2166      StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2167  
2168      OptionalFileEntryRef File = LookupFile(
2169          FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2170          LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2171          Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2172          /*IsFrameworkFound=*/nullptr);
2173      if (File) {
2174        DiagnoseHeaderInclusion(*File);
2175        auto Hint =
2176            isAngled ? FixItHint::CreateReplacement(
2177                           FilenameRange, "<" + TypoCorrectionName.str() + ">")
2178                     : FixItHint::CreateReplacement(
2179                           FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2180        Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2181            << OriginalFilename << TypoCorrectionName << Hint;
2182        // We found the file, so set the Filename to the name after typo
2183        // correction.
2184        Filename = TypoCorrectionName;
2185        LookupFilename = TypoCorrectionLookupName;
2186        return File;
2187      }
2188    }
2189  
2190    // If the file is still not found, just go with the vanilla diagnostic
2191    assert(!File && "expected missing file");
2192    Diag(FilenameTok, diag::err_pp_file_not_found)
2193        << OriginalFilename << FilenameRange;
2194    if (IsFrameworkFound) {
2195      size_t SlashPos = OriginalFilename.find('/');
2196      assert(SlashPos != StringRef::npos &&
2197             "Include with framework name should have '/' in the filename");
2198      StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2199      FrameworkCacheEntry &CacheEntry =
2200          HeaderInfo.LookupFrameworkCache(FrameworkName);
2201      assert(CacheEntry.Directory && "Found framework should be in cache");
2202      Diag(FilenameTok, diag::note_pp_framework_without_header)
2203          << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2204          << CacheEntry.Directory->getName();
2205    }
2206  
2207    return std::nullopt;
2208  }
2209  
2210  /// Handle either a #include-like directive or an import declaration that names
2211  /// a header file.
2212  ///
2213  /// \param HashLoc The location of the '#' token for an include, or
2214  ///        SourceLocation() for an import declaration.
2215  /// \param IncludeTok The include / include_next / import token.
2216  /// \param FilenameTok The header-name token.
2217  /// \param EndLoc The location at which any imported macros become visible.
2218  /// \param LookupFrom For #include_next, the starting directory for the
2219  ///        directory lookup.
2220  /// \param LookupFromFile For #include_next, the starting file for the directory
2221  ///        lookup.
HandleHeaderIncludeOrImport(SourceLocation HashLoc,Token & IncludeTok,Token & FilenameTok,SourceLocation EndLoc,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2222  Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2223      SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2224      SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2225      const FileEntry *LookupFromFile) {
2226    SmallString<128> FilenameBuffer;
2227    StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2228    SourceLocation CharEnd = FilenameTok.getEndLoc();
2229  
2230    CharSourceRange FilenameRange
2231      = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2232    StringRef OriginalFilename = Filename;
2233    bool isAngled =
2234      GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2235  
2236    // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2237    // error.
2238    if (Filename.empty())
2239      return {ImportAction::None};
2240  
2241    bool IsImportDecl = HashLoc.isInvalid();
2242    SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2243  
2244    // Complain about attempts to #include files in an audit pragma.
2245    if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2246      Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2247      Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2248  
2249      // Immediately leave the pragma.
2250      PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2251    }
2252  
2253    // Complain about attempts to #include files in an assume-nonnull pragma.
2254    if (PragmaAssumeNonNullLoc.isValid()) {
2255      Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2256      Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2257  
2258      // Immediately leave the pragma.
2259      PragmaAssumeNonNullLoc = SourceLocation();
2260    }
2261  
2262    if (HeaderInfo.HasIncludeAliasMap()) {
2263      // Map the filename with the brackets still attached.  If the name doesn't
2264      // map to anything, fall back on the filename we've already gotten the
2265      // spelling for.
2266      StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2267      if (!NewName.empty())
2268        Filename = NewName;
2269    }
2270  
2271    // Search include directories.
2272    bool IsMapped = false;
2273    bool IsFrameworkFound = false;
2274    ConstSearchDirIterator CurDir = nullptr;
2275    SmallString<1024> SearchPath;
2276    SmallString<1024> RelativePath;
2277    // We get the raw path only if we have 'Callbacks' to which we later pass
2278    // the path.
2279    ModuleMap::KnownHeader SuggestedModule;
2280    SourceLocation FilenameLoc = FilenameTok.getLocation();
2281    StringRef LookupFilename = Filename;
2282  
2283    // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2284    // is unnecessary on Windows since the filesystem there handles backslashes.
2285    SmallString<128> NormalizedPath;
2286    llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2287    if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2288      NormalizedPath = Filename.str();
2289      llvm::sys::path::native(NormalizedPath);
2290      LookupFilename = NormalizedPath;
2291      BackslashStyle = llvm::sys::path::Style::windows;
2292    }
2293  
2294    OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2295        &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2296        IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2297        LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2298  
2299    if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2300      if (File && isPCHThroughHeader(&File->getFileEntry()))
2301        SkippingUntilPCHThroughHeader = false;
2302      return {ImportAction::None};
2303    }
2304  
2305    // Should we enter the source file? Set to Skip if either the source file is
2306    // known to have no effect beyond its effect on module visibility -- that is,
2307    // if it's got an include guard that is already defined, set to Import if it
2308    // is a modular header we've already built and should import.
2309  
2310    // For C++20 Modules
2311    // [cpp.include]/7 If the header identified by the header-name denotes an
2312    // importable header, it is implementation-defined whether the #include
2313    // preprocessing directive is instead replaced by an import directive.
2314    // For this implementation, the translation is permitted when we are parsing
2315    // the Global Module Fragment, and not otherwise (the cases where it would be
2316    // valid to replace an include with an import are highly constrained once in
2317    // named module purview; this choice avoids considerable complexity in
2318    // determining valid cases).
2319  
2320    enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2321  
2322    if (PPOpts->SingleFileParseMode)
2323      Action = IncludeLimitReached;
2324  
2325    // If we've reached the max allowed include depth, it is usually due to an
2326    // include cycle. Don't enter already processed files again as it can lead to
2327    // reaching the max allowed include depth again.
2328    if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2329        alreadyIncluded(*File))
2330      Action = IncludeLimitReached;
2331  
2332    // FIXME: We do not have a good way to disambiguate C++ clang modules from
2333    // C++ standard modules (other than use/non-use of Header Units).
2334  
2335    Module *ModuleToImport = SuggestedModule.getModule();
2336  
2337    bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2338                                 !ModuleToImport->isForBuilding(getLangOpts());
2339  
2340    // Maybe a usable Header Unit
2341    bool UsableHeaderUnit = false;
2342    if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2343        ModuleToImport->isHeaderUnit()) {
2344      if (TrackGMFState.inGMF() || IsImportDecl)
2345        UsableHeaderUnit = true;
2346      else if (!IsImportDecl) {
2347        // This is a Header Unit that we do not include-translate
2348        ModuleToImport = nullptr;
2349      }
2350    }
2351    // Maybe a usable clang header module.
2352    bool UsableClangHeaderModule =
2353        (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2354        ModuleToImport && !ModuleToImport->isHeaderUnit();
2355  
2356    // Determine whether we should try to import the module for this #include, if
2357    // there is one. Don't do so if precompiled module support is disabled or we
2358    // are processing this module textually (because we're building the module).
2359    if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2360      // If this include corresponds to a module but that module is
2361      // unavailable, diagnose the situation and bail out.
2362      // FIXME: Remove this; loadModule does the same check (but produces
2363      // slightly worse diagnostics).
2364      if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport,
2365                                 getDiagnostics())) {
2366        Diag(FilenameTok.getLocation(),
2367             diag::note_implicit_top_level_module_import_here)
2368            << ModuleToImport->getTopLevelModuleName();
2369        return {ImportAction::None};
2370      }
2371  
2372      // Compute the module access path corresponding to this module.
2373      // FIXME: Should we have a second loadModule() overload to avoid this
2374      // extra lookup step?
2375      SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2376      for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2377        Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2378                                      FilenameTok.getLocation()));
2379      std::reverse(Path.begin(), Path.end());
2380  
2381      // Warn that we're replacing the include/import with a module import.
2382      if (!IsImportDecl)
2383        diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2384  
2385      // Load the module to import its macros. We'll make the declarations
2386      // visible when the parser gets here.
2387      // FIXME: Pass ModuleToImport in here rather than converting it to a path
2388      // and making the module loader convert it back again.
2389      ModuleLoadResult Imported = TheModuleLoader.loadModule(
2390          IncludeTok.getLocation(), Path, Module::Hidden,
2391          /*IsInclusionDirective=*/true);
2392      assert((Imported == nullptr || Imported == ModuleToImport) &&
2393             "the imported module is different than the suggested one");
2394  
2395      if (Imported) {
2396        Action = Import;
2397      } else if (Imported.isMissingExpected()) {
2398        markClangModuleAsAffecting(
2399            static_cast<Module *>(Imported)->getTopLevelModule());
2400        // We failed to find a submodule that we assumed would exist (because it
2401        // was in the directory of an umbrella header, for instance), but no
2402        // actual module containing it exists (because the umbrella header is
2403        // incomplete).  Treat this as a textual inclusion.
2404        ModuleToImport = nullptr;
2405      } else if (Imported.isConfigMismatch()) {
2406        // On a configuration mismatch, enter the header textually. We still know
2407        // that it's part of the corresponding module.
2408      } else {
2409        // We hit an error processing the import. Bail out.
2410        if (hadModuleLoaderFatalFailure()) {
2411          // With a fatal failure in the module loader, we abort parsing.
2412          Token &Result = IncludeTok;
2413          assert(CurLexer && "#include but no current lexer set!");
2414          Result.startToken();
2415          CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2416          CurLexer->cutOffLexing();
2417        }
2418        return {ImportAction::None};
2419      }
2420    }
2421  
2422    // The #included file will be considered to be a system header if either it is
2423    // in a system include directory, or if the #includer is a system include
2424    // header.
2425    SrcMgr::CharacteristicKind FileCharacter =
2426        SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2427    if (File)
2428      FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2429  
2430    // If this is a '#import' or an import-declaration, don't re-enter the file.
2431    //
2432    // FIXME: If we have a suggested module for a '#include', and we've already
2433    // visited this file, don't bother entering it again. We know it has no
2434    // further effect.
2435    bool EnterOnce =
2436        IsImportDecl ||
2437        IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2438  
2439    bool IsFirstIncludeOfFile = false;
2440  
2441    // Ask HeaderInfo if we should enter this #include file.  If not, #including
2442    // this file will have no effect.
2443    if (Action == Enter && File &&
2444        !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2445                                           getLangOpts().Modules, ModuleToImport,
2446                                           IsFirstIncludeOfFile)) {
2447      // C++ standard modules:
2448      // If we are not in the GMF, then we textually include only
2449      // clang modules:
2450      // Even if we've already preprocessed this header once and know that we
2451      // don't need to see its contents again, we still need to import it if it's
2452      // modular because we might not have imported it from this submodule before.
2453      //
2454      // FIXME: We don't do this when compiling a PCH because the AST
2455      // serialization layer can't cope with it. This means we get local
2456      // submodule visibility semantics wrong in that case.
2457      if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2458        Action = TrackGMFState.inGMF() ? Import : Skip;
2459      else
2460        Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2461    }
2462  
2463    // Check for circular inclusion of the main file.
2464    // We can't generate a consistent preamble with regard to the conditional
2465    // stack if the main file is included again as due to the preamble bounds
2466    // some directives (e.g. #endif of a header guard) will never be seen.
2467    // Since this will lead to confusing errors, avoid the inclusion.
2468    if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2469        SourceMgr.isMainFile(File->getFileEntry())) {
2470      Diag(FilenameTok.getLocation(),
2471           diag::err_pp_including_mainfile_in_preamble);
2472      return {ImportAction::None};
2473    }
2474  
2475    if (Callbacks && !IsImportDecl) {
2476      // Notify the callback object that we've seen an inclusion directive.
2477      // FIXME: Use a different callback for a pp-import?
2478      Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2479                                    FilenameRange, File, SearchPath, RelativePath,
2480                                    SuggestedModule.getModule(), Action == Import,
2481                                    FileCharacter);
2482      if (Action == Skip && File)
2483        Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2484    }
2485  
2486    if (!File)
2487      return {ImportAction::None};
2488  
2489    // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2490    // module corresponding to the named header.
2491    if (IsImportDecl && !ModuleToImport) {
2492      Diag(FilenameTok, diag::err_header_import_not_header_unit)
2493        << OriginalFilename << File->getName();
2494      return {ImportAction::None};
2495    }
2496  
2497    // Issue a diagnostic if the name of the file on disk has a different case
2498    // than the one we're about to open.
2499    const bool CheckIncludePathPortability =
2500        !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2501  
2502    if (CheckIncludePathPortability) {
2503      StringRef Name = LookupFilename;
2504      StringRef NameWithoriginalSlashes = Filename;
2505  #if defined(_WIN32)
2506      // Skip UNC prefix if present. (tryGetRealPathName() always
2507      // returns a path with the prefix skipped.)
2508      bool NameWasUNC = Name.consume_front("\\\\?\\");
2509      NameWithoriginalSlashes.consume_front("\\\\?\\");
2510  #endif
2511      StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2512      SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2513                                            llvm::sys::path::end(Name));
2514  #if defined(_WIN32)
2515      // -Wnonportable-include-path is designed to diagnose includes using
2516      // case even on systems with a case-insensitive file system.
2517      // On Windows, RealPathName always starts with an upper-case drive
2518      // letter for absolute paths, but Name might start with either
2519      // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2520      // ("foo" will always have on-disk case, no matter which case was
2521      // used in the cd command). To not emit this warning solely for
2522      // the drive letter, whose case is dependent on if `cd` is used
2523      // with upper- or lower-case drive letters, always consider the
2524      // given drive letter case as correct for the purpose of this warning.
2525      SmallString<128> FixedDriveRealPath;
2526      if (llvm::sys::path::is_absolute(Name) &&
2527          llvm::sys::path::is_absolute(RealPathName) &&
2528          toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2529          isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2530        assert(Components.size() >= 3 && "should have drive, backslash, name");
2531        assert(Components[0].size() == 2 && "should start with drive");
2532        assert(Components[0][1] == ':' && "should have colon");
2533        FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2534        RealPathName = FixedDriveRealPath;
2535      }
2536  #endif
2537  
2538      if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2539        SmallString<128> Path;
2540        Path.reserve(Name.size()+2);
2541        Path.push_back(isAngled ? '<' : '"');
2542  
2543        const auto IsSep = [BackslashStyle](char c) {
2544          return llvm::sys::path::is_separator(c, BackslashStyle);
2545        };
2546  
2547        for (auto Component : Components) {
2548          // On POSIX, Components will contain a single '/' as first element
2549          // exactly if Name is an absolute path.
2550          // On Windows, it will contain "C:" followed by '\' for absolute paths.
2551          // The drive letter is optional for absolute paths on Windows, but
2552          // clang currently cannot process absolute paths in #include lines that
2553          // don't have a drive.
2554          // If the first entry in Components is a directory separator,
2555          // then the code at the bottom of this loop that keeps the original
2556          // directory separator style copies it. If the second entry is
2557          // a directory separator (the C:\ case), then that separator already
2558          // got copied when the C: was processed and we want to skip that entry.
2559          if (!(Component.size() == 1 && IsSep(Component[0])))
2560            Path.append(Component);
2561          else if (Path.size() != 1)
2562            continue;
2563  
2564          // Append the separator(s) the user used, or the close quote
2565          if (Path.size() > NameWithoriginalSlashes.size()) {
2566            Path.push_back(isAngled ? '>' : '"');
2567            continue;
2568          }
2569          assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2570          do
2571            Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2572          while (Path.size() <= NameWithoriginalSlashes.size() &&
2573                 IsSep(NameWithoriginalSlashes[Path.size()-1]));
2574        }
2575  
2576  #if defined(_WIN32)
2577        // Restore UNC prefix if it was there.
2578        if (NameWasUNC)
2579          Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2580  #endif
2581  
2582        // For user files and known standard headers, issue a diagnostic.
2583        // For other system headers, don't. They can be controlled separately.
2584        auto DiagId =
2585            (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2586                ? diag::pp_nonportable_path
2587                : diag::pp_nonportable_system_path;
2588        Diag(FilenameTok, DiagId) << Path <<
2589          FixItHint::CreateReplacement(FilenameRange, Path);
2590      }
2591    }
2592  
2593    switch (Action) {
2594    case Skip:
2595      // If we don't need to enter the file, stop now.
2596      if (ModuleToImport)
2597        return {ImportAction::SkippedModuleImport, ModuleToImport};
2598      return {ImportAction::None};
2599  
2600    case IncludeLimitReached:
2601      // If we reached our include limit and don't want to enter any more files,
2602      // don't go any further.
2603      return {ImportAction::None};
2604  
2605    case Import: {
2606      // If this is a module import, make it visible if needed.
2607      assert(ModuleToImport && "no module to import");
2608  
2609      makeModuleVisible(ModuleToImport, EndLoc);
2610  
2611      if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2612          tok::pp___include_macros)
2613        return {ImportAction::None};
2614  
2615      return {ImportAction::ModuleImport, ModuleToImport};
2616    }
2617  
2618    case Enter:
2619      break;
2620    }
2621  
2622    // Check that we don't have infinite #include recursion.
2623    if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2624      Diag(FilenameTok, diag::err_pp_include_too_deep);
2625      HasReachedMaxIncludeDepth = true;
2626      return {ImportAction::None};
2627    }
2628  
2629    if (isAngled && isInNamedModule())
2630      Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2631          << getNamedModuleName();
2632  
2633    // Look up the file, create a File ID for it.
2634    SourceLocation IncludePos = FilenameTok.getLocation();
2635    // If the filename string was the result of macro expansions, set the include
2636    // position on the file where it will be included and after the expansions.
2637    if (IncludePos.isMacroID())
2638      IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2639    FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2640    if (!FID.isValid()) {
2641      TheModuleLoader.HadFatalFailure = true;
2642      return ImportAction::Failure;
2643    }
2644  
2645    // If all is good, enter the new file!
2646    if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2647                        IsFirstIncludeOfFile))
2648      return {ImportAction::None};
2649  
2650    // Determine if we're switching to building a new submodule, and which one.
2651    // This does not apply for C++20 modules header units.
2652    if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2653      if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2654        // We are building a submodule that belongs to a shadowed module. This
2655        // means we find header files in the shadowed module.
2656        Diag(ModuleToImport->DefinitionLoc,
2657             diag::err_module_build_shadowed_submodule)
2658            << ModuleToImport->getFullModuleName();
2659        Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2660             diag::note_previous_definition);
2661        return {ImportAction::None};
2662      }
2663      // When building a pch, -fmodule-name tells the compiler to textually
2664      // include headers in the specified module. We are not building the
2665      // specified module.
2666      //
2667      // FIXME: This is the wrong way to handle this. We should produce a PCH
2668      // that behaves the same as the header would behave in a compilation using
2669      // that PCH, which means we should enter the submodule. We need to teach
2670      // the AST serialization layer to deal with the resulting AST.
2671      if (getLangOpts().CompilingPCH &&
2672          ModuleToImport->isForBuilding(getLangOpts()))
2673        return {ImportAction::None};
2674  
2675      assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2676      CurLexerSubmodule = ModuleToImport;
2677  
2678      // Let the macro handling code know that any future macros are within
2679      // the new submodule.
2680      EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false);
2681  
2682      // Let the parser know that any future declarations are within the new
2683      // submodule.
2684      // FIXME: There's no point doing this if we're handling a #__include_macros
2685      // directive.
2686      return {ImportAction::ModuleBegin, ModuleToImport};
2687    }
2688  
2689    assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2690    return {ImportAction::None};
2691  }
2692  
2693  /// HandleIncludeNextDirective - Implements \#include_next.
2694  ///
HandleIncludeNextDirective(SourceLocation HashLoc,Token & IncludeNextTok)2695  void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2696                                                Token &IncludeNextTok) {
2697    Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2698  
2699    ConstSearchDirIterator Lookup = nullptr;
2700    const FileEntry *LookupFromFile;
2701    std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2702  
2703    return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2704                                  LookupFromFile);
2705  }
2706  
2707  /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
HandleMicrosoftImportDirective(Token & Tok)2708  void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2709    // The Microsoft #import directive takes a type library and generates header
2710    // files from it, and includes those.  This is beyond the scope of what clang
2711    // does, so we ignore it and error out.  However, #import can optionally have
2712    // trailing attributes that span multiple lines.  We're going to eat those
2713    // so we can continue processing from there.
2714    Diag(Tok, diag::err_pp_import_directive_ms );
2715  
2716    // Read tokens until we get to the end of the directive.  Note that the
2717    // directive can be split over multiple lines using the backslash character.
2718    DiscardUntilEndOfDirective();
2719  }
2720  
2721  /// HandleImportDirective - Implements \#import.
2722  ///
HandleImportDirective(SourceLocation HashLoc,Token & ImportTok)2723  void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2724                                           Token &ImportTok) {
2725    if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2726      if (LangOpts.MSVCCompat)
2727        return HandleMicrosoftImportDirective(ImportTok);
2728      Diag(ImportTok, diag::ext_pp_import_directive);
2729    }
2730    return HandleIncludeDirective(HashLoc, ImportTok);
2731  }
2732  
2733  /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2734  /// pseudo directive in the predefines buffer.  This handles it by sucking all
2735  /// tokens through the preprocessor and discarding them (only keeping the side
2736  /// effects on the preprocessor).
HandleIncludeMacrosDirective(SourceLocation HashLoc,Token & IncludeMacrosTok)2737  void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2738                                                  Token &IncludeMacrosTok) {
2739    // This directive should only occur in the predefines buffer.  If not, emit an
2740    // error and reject it.
2741    SourceLocation Loc = IncludeMacrosTok.getLocation();
2742    if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2743      Diag(IncludeMacrosTok.getLocation(),
2744           diag::pp_include_macros_out_of_predefines);
2745      DiscardUntilEndOfDirective();
2746      return;
2747    }
2748  
2749    // Treat this as a normal #include for checking purposes.  If this is
2750    // successful, it will push a new lexer onto the include stack.
2751    HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2752  
2753    Token TmpTok;
2754    do {
2755      Lex(TmpTok);
2756      assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2757    } while (TmpTok.isNot(tok::hashhash));
2758  }
2759  
2760  //===----------------------------------------------------------------------===//
2761  // Preprocessor Macro Directive Handling.
2762  //===----------------------------------------------------------------------===//
2763  
2764  /// ReadMacroParameterList - The ( starting a parameter list of a macro
2765  /// definition has just been read.  Lex the rest of the parameters and the
2766  /// closing ), updating MI with what we learn.  Return true if an error occurs
2767  /// parsing the param list.
ReadMacroParameterList(MacroInfo * MI,Token & Tok)2768  bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2769    SmallVector<IdentifierInfo*, 32> Parameters;
2770  
2771    while (true) {
2772      LexUnexpandedNonComment(Tok);
2773      switch (Tok.getKind()) {
2774      case tok::r_paren:
2775        // Found the end of the parameter list.
2776        if (Parameters.empty())  // #define FOO()
2777          return false;
2778        // Otherwise we have #define FOO(A,)
2779        Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2780        return true;
2781      case tok::ellipsis:  // #define X(... -> C99 varargs
2782        if (!LangOpts.C99)
2783          Diag(Tok, LangOpts.CPlusPlus11 ?
2784               diag::warn_cxx98_compat_variadic_macro :
2785               diag::ext_variadic_macro);
2786  
2787        // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2788        if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2789          Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2790        }
2791  
2792        // Lex the token after the identifier.
2793        LexUnexpandedNonComment(Tok);
2794        if (Tok.isNot(tok::r_paren)) {
2795          Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2796          return true;
2797        }
2798        // Add the __VA_ARGS__ identifier as a parameter.
2799        Parameters.push_back(Ident__VA_ARGS__);
2800        MI->setIsC99Varargs();
2801        MI->setParameterList(Parameters, BP);
2802        return false;
2803      case tok::eod:  // #define X(
2804        Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2805        return true;
2806      default:
2807        // Handle keywords and identifiers here to accept things like
2808        // #define Foo(for) for.
2809        IdentifierInfo *II = Tok.getIdentifierInfo();
2810        if (!II) {
2811          // #define X(1
2812          Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2813          return true;
2814        }
2815  
2816        // If this is already used as a parameter, it is used multiple times (e.g.
2817        // #define X(A,A.
2818        if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2819          Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2820          return true;
2821        }
2822  
2823        // Add the parameter to the macro info.
2824        Parameters.push_back(II);
2825  
2826        // Lex the token after the identifier.
2827        LexUnexpandedNonComment(Tok);
2828  
2829        switch (Tok.getKind()) {
2830        default:          // #define X(A B
2831          Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2832          return true;
2833        case tok::r_paren: // #define X(A)
2834          MI->setParameterList(Parameters, BP);
2835          return false;
2836        case tok::comma:  // #define X(A,
2837          break;
2838        case tok::ellipsis:  // #define X(A... -> GCC extension
2839          // Diagnose extension.
2840          Diag(Tok, diag::ext_named_variadic_macro);
2841  
2842          // Lex the token after the identifier.
2843          LexUnexpandedNonComment(Tok);
2844          if (Tok.isNot(tok::r_paren)) {
2845            Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2846            return true;
2847          }
2848  
2849          MI->setIsGNUVarargs();
2850          MI->setParameterList(Parameters, BP);
2851          return false;
2852        }
2853      }
2854    }
2855  }
2856  
isConfigurationPattern(Token & MacroName,MacroInfo * MI,const LangOptions & LOptions)2857  static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2858                                     const LangOptions &LOptions) {
2859    if (MI->getNumTokens() == 1) {
2860      const Token &Value = MI->getReplacementToken(0);
2861  
2862      // Macro that is identity, like '#define inline inline' is a valid pattern.
2863      if (MacroName.getKind() == Value.getKind())
2864        return true;
2865  
2866      // Macro that maps a keyword to the same keyword decorated with leading/
2867      // trailing underscores is a valid pattern:
2868      //    #define inline __inline
2869      //    #define inline __inline__
2870      //    #define inline _inline (in MS compatibility mode)
2871      StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2872      if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2873        if (!II->isKeyword(LOptions))
2874          return false;
2875        StringRef ValueText = II->getName();
2876        StringRef TrimmedValue = ValueText;
2877        if (!ValueText.starts_with("__")) {
2878          if (ValueText.starts_with("_"))
2879            TrimmedValue = TrimmedValue.drop_front(1);
2880          else
2881            return false;
2882        } else {
2883          TrimmedValue = TrimmedValue.drop_front(2);
2884          if (TrimmedValue.ends_with("__"))
2885            TrimmedValue = TrimmedValue.drop_back(2);
2886        }
2887        return TrimmedValue == MacroText;
2888      } else {
2889        return false;
2890      }
2891    }
2892  
2893    // #define inline
2894    return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2895                             tok::kw_const) &&
2896           MI->getNumTokens() == 0;
2897  }
2898  
2899  // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2900  // entire line) of the macro's tokens and adds them to MacroInfo, and while
2901  // doing so performs certain validity checks including (but not limited to):
2902  //   - # (stringization) is followed by a macro parameter
2903  //
2904  //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2905  //  a pointer to a MacroInfo object.
2906  
ReadOptionalMacroParameterListAndBody(const Token & MacroNameTok,const bool ImmediatelyAfterHeaderGuard)2907  MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2908      const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2909  
2910    Token LastTok = MacroNameTok;
2911    // Create the new macro.
2912    MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2913  
2914    Token Tok;
2915    LexUnexpandedToken(Tok);
2916  
2917    // Ensure we consume the rest of the macro body if errors occur.
2918    auto _ = llvm::make_scope_exit([&]() {
2919      // The flag indicates if we are still waiting for 'eod'.
2920      if (CurLexer->ParsingPreprocessorDirective)
2921        DiscardUntilEndOfDirective();
2922    });
2923  
2924    // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2925    // within their appropriate context.
2926    VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2927  
2928    // If this is a function-like macro definition, parse the argument list,
2929    // marking each of the identifiers as being used as macro arguments.  Also,
2930    // check other constraints on the first token of the macro body.
2931    if (Tok.is(tok::eod)) {
2932      if (ImmediatelyAfterHeaderGuard) {
2933        // Save this macro information since it may part of a header guard.
2934        CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2935                                          MacroNameTok.getLocation());
2936      }
2937      // If there is no body to this macro, we have no special handling here.
2938    } else if (Tok.hasLeadingSpace()) {
2939      // This is a normal token with leading space.  Clear the leading space
2940      // marker on the first token to get proper expansion.
2941      Tok.clearFlag(Token::LeadingSpace);
2942    } else if (Tok.is(tok::l_paren)) {
2943      // This is a function-like macro definition.  Read the argument list.
2944      MI->setIsFunctionLike();
2945      if (ReadMacroParameterList(MI, LastTok))
2946        return nullptr;
2947  
2948      // If this is a definition of an ISO C/C++ variadic function-like macro (not
2949      // using the GNU named varargs extension) inform our variadic scope guard
2950      // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2951      // allowed only within the definition of a variadic macro.
2952  
2953      if (MI->isC99Varargs()) {
2954        VariadicMacroScopeGuard.enterScope();
2955      }
2956  
2957      // Read the first token after the arg list for down below.
2958      LexUnexpandedToken(Tok);
2959    } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2960      // C99 requires whitespace between the macro definition and the body.  Emit
2961      // a diagnostic for something like "#define X+".
2962      Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2963    } else {
2964      // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2965      // first character of a replacement list is not a character required by
2966      // subclause 5.2.1, then there shall be white-space separation between the
2967      // identifier and the replacement list.".  5.2.1 lists this set:
2968      //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2969      // is irrelevant here.
2970      bool isInvalid = false;
2971      if (Tok.is(tok::at)) // @ is not in the list above.
2972        isInvalid = true;
2973      else if (Tok.is(tok::unknown)) {
2974        // If we have an unknown token, it is something strange like "`".  Since
2975        // all of valid characters would have lexed into a single character
2976        // token of some sort, we know this is not a valid case.
2977        isInvalid = true;
2978      }
2979      if (isInvalid)
2980        Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2981      else
2982        Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2983    }
2984  
2985    if (!Tok.is(tok::eod))
2986      LastTok = Tok;
2987  
2988    SmallVector<Token, 16> Tokens;
2989  
2990    // Read the rest of the macro body.
2991    if (MI->isObjectLike()) {
2992      // Object-like macros are very simple, just read their body.
2993      while (Tok.isNot(tok::eod)) {
2994        LastTok = Tok;
2995        Tokens.push_back(Tok);
2996        // Get the next token of the macro.
2997        LexUnexpandedToken(Tok);
2998      }
2999    } else {
3000      // Otherwise, read the body of a function-like macro.  While we are at it,
3001      // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3002      // parameters in function-like macro expansions.
3003  
3004      VAOptDefinitionContext VAOCtx(*this);
3005  
3006      while (Tok.isNot(tok::eod)) {
3007        LastTok = Tok;
3008  
3009        if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
3010          Tokens.push_back(Tok);
3011  
3012          if (VAOCtx.isVAOptToken(Tok)) {
3013            // If we're already within a VAOPT, emit an error.
3014            if (VAOCtx.isInVAOpt()) {
3015              Diag(Tok, diag::err_pp_vaopt_nested_use);
3016              return nullptr;
3017            }
3018            // Ensure VAOPT is followed by a '(' .
3019            LexUnexpandedToken(Tok);
3020            if (Tok.isNot(tok::l_paren)) {
3021              Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
3022              return nullptr;
3023            }
3024            Tokens.push_back(Tok);
3025            VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
3026            LexUnexpandedToken(Tok);
3027            if (Tok.is(tok::hashhash)) {
3028              Diag(Tok, diag::err_vaopt_paste_at_start);
3029              return nullptr;
3030            }
3031            continue;
3032          } else if (VAOCtx.isInVAOpt()) {
3033            if (Tok.is(tok::r_paren)) {
3034              if (VAOCtx.sawClosingParen()) {
3035                assert(Tokens.size() >= 3 &&
3036                       "Must have seen at least __VA_OPT__( "
3037                       "and a subsequent tok::r_paren");
3038                if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
3039                  Diag(Tok, diag::err_vaopt_paste_at_end);
3040                  return nullptr;
3041                }
3042              }
3043            } else if (Tok.is(tok::l_paren)) {
3044              VAOCtx.sawOpeningParen(Tok.getLocation());
3045            }
3046          }
3047          // Get the next token of the macro.
3048          LexUnexpandedToken(Tok);
3049          continue;
3050        }
3051  
3052        // If we're in -traditional mode, then we should ignore stringification
3053        // and token pasting. Mark the tokens as unknown so as not to confuse
3054        // things.
3055        if (getLangOpts().TraditionalCPP) {
3056          Tok.setKind(tok::unknown);
3057          Tokens.push_back(Tok);
3058  
3059          // Get the next token of the macro.
3060          LexUnexpandedToken(Tok);
3061          continue;
3062        }
3063  
3064        if (Tok.is(tok::hashhash)) {
3065          // If we see token pasting, check if it looks like the gcc comma
3066          // pasting extension.  We'll use this information to suppress
3067          // diagnostics later on.
3068  
3069          // Get the next token of the macro.
3070          LexUnexpandedToken(Tok);
3071  
3072          if (Tok.is(tok::eod)) {
3073            Tokens.push_back(LastTok);
3074            break;
3075          }
3076  
3077          if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3078              Tokens[Tokens.size() - 1].is(tok::comma))
3079            MI->setHasCommaPasting();
3080  
3081          // Things look ok, add the '##' token to the macro.
3082          Tokens.push_back(LastTok);
3083          continue;
3084        }
3085  
3086        // Our Token is a stringization operator.
3087        // Get the next token of the macro.
3088        LexUnexpandedToken(Tok);
3089  
3090        // Check for a valid macro arg identifier or __VA_OPT__.
3091        if (!VAOCtx.isVAOptToken(Tok) &&
3092            (Tok.getIdentifierInfo() == nullptr ||
3093             MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3094  
3095          // If this is assembler-with-cpp mode, we accept random gibberish after
3096          // the '#' because '#' is often a comment character.  However, change
3097          // the kind of the token to tok::unknown so that the preprocessor isn't
3098          // confused.
3099          if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3100            LastTok.setKind(tok::unknown);
3101            Tokens.push_back(LastTok);
3102            continue;
3103          } else {
3104            Diag(Tok, diag::err_pp_stringize_not_parameter)
3105              << LastTok.is(tok::hashat);
3106            return nullptr;
3107          }
3108        }
3109  
3110        // Things look ok, add the '#' and param name tokens to the macro.
3111        Tokens.push_back(LastTok);
3112  
3113        // If the token following '#' is VAOPT, let the next iteration handle it
3114        // and check it for correctness, otherwise add the token and prime the
3115        // loop with the next one.
3116        if (!VAOCtx.isVAOptToken(Tok)) {
3117          Tokens.push_back(Tok);
3118          LastTok = Tok;
3119  
3120          // Get the next token of the macro.
3121          LexUnexpandedToken(Tok);
3122        }
3123      }
3124      if (VAOCtx.isInVAOpt()) {
3125        assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3126        Diag(Tok, diag::err_pp_expected_after)
3127          << LastTok.getKind() << tok::r_paren;
3128        Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3129        return nullptr;
3130      }
3131    }
3132    MI->setDefinitionEndLoc(LastTok.getLocation());
3133  
3134    MI->setTokens(Tokens, BP);
3135    return MI;
3136  }
3137  
isObjCProtectedMacro(const IdentifierInfo * II)3138  static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3139    return II->isStr("__strong") || II->isStr("__weak") ||
3140           II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3141  }
3142  
3143  /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3144  /// line then lets the caller lex the next real token.
HandleDefineDirective(Token & DefineTok,const bool ImmediatelyAfterHeaderGuard)3145  void Preprocessor::HandleDefineDirective(
3146      Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3147    ++NumDefined;
3148  
3149    Token MacroNameTok;
3150    bool MacroShadowsKeyword;
3151    ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3152  
3153    // Error reading macro name?  If so, diagnostic already issued.
3154    if (MacroNameTok.is(tok::eod))
3155      return;
3156  
3157    IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3158    // Issue a final pragma warning if we're defining a macro that was has been
3159    // undefined and is being redefined.
3160    if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3161      emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3162  
3163    // If we are supposed to keep comments in #defines, reenable comment saving
3164    // mode.
3165    if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3166  
3167    MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3168        MacroNameTok, ImmediatelyAfterHeaderGuard);
3169  
3170    if (!MI) return;
3171  
3172    if (MacroShadowsKeyword &&
3173        !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3174      Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3175    }
3176    // Check that there is no paste (##) operator at the beginning or end of the
3177    // replacement list.
3178    unsigned NumTokens = MI->getNumTokens();
3179    if (NumTokens != 0) {
3180      if (MI->getReplacementToken(0).is(tok::hashhash)) {
3181        Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3182        return;
3183      }
3184      if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3185        Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3186        return;
3187      }
3188    }
3189  
3190    // When skipping just warn about macros that do not match.
3191    if (SkippingUntilPCHThroughHeader) {
3192      const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3193      if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3194                               /*Syntactic=*/LangOpts.MicrosoftExt))
3195        Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3196            << MacroNameTok.getIdentifierInfo();
3197      // Issue the diagnostic but allow the change if msvc extensions are enabled
3198      if (!LangOpts.MicrosoftExt)
3199        return;
3200    }
3201  
3202    // Finally, if this identifier already had a macro defined for it, verify that
3203    // the macro bodies are identical, and issue diagnostics if they are not.
3204    if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3205      // Final macros are hard-mode: they always warn. Even if the bodies are
3206      // identical. Even if they are in system headers. Even if they are things we
3207      // would silently allow in the past.
3208      if (MacroNameTok.getIdentifierInfo()->isFinal())
3209        emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3210  
3211      // In Objective-C, ignore attempts to directly redefine the builtin
3212      // definitions of the ownership qualifiers.  It's still possible to
3213      // #undef them.
3214      if (getLangOpts().ObjC &&
3215          SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3216              getPredefinesFileID() &&
3217          isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3218        // Warn if it changes the tokens.
3219        if ((!getDiagnostics().getSuppressSystemWarnings() ||
3220             !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3221            !MI->isIdenticalTo(*OtherMI, *this,
3222                               /*Syntactic=*/LangOpts.MicrosoftExt)) {
3223          Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3224        }
3225        assert(!OtherMI->isWarnIfUnused());
3226        return;
3227      }
3228  
3229      // It is very common for system headers to have tons of macro redefinitions
3230      // and for warnings to be disabled in system headers.  If this is the case,
3231      // then don't bother calling MacroInfo::isIdenticalTo.
3232      if (!getDiagnostics().getSuppressSystemWarnings() ||
3233          !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3234  
3235        if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3236          Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3237  
3238        // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3239        // C++ [cpp.predefined]p4, but allow it as an extension.
3240        if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3241          Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3242        // Macros must be identical.  This means all tokens and whitespace
3243        // separation must be the same.  C99 6.10.3p2.
3244        else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3245                 !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3246          Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3247            << MacroNameTok.getIdentifierInfo();
3248          Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3249        }
3250      }
3251      if (OtherMI->isWarnIfUnused())
3252        WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3253    }
3254  
3255    DefMacroDirective *MD =
3256        appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3257  
3258    assert(!MI->isUsed());
3259    // If we need warning for not using the macro, add its location in the
3260    // warn-because-unused-macro set. If it gets used it will be removed from set.
3261    if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3262        !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3263        !MacroExpansionInDirectivesOverride &&
3264        getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3265            getPredefinesFileID()) {
3266      MI->setIsWarnIfUnused(true);
3267      WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3268    }
3269  
3270    // If the callbacks want to know, tell them about the macro definition.
3271    if (Callbacks)
3272      Callbacks->MacroDefined(MacroNameTok, MD);
3273  
3274    // If we're in MS compatibility mode and the macro being defined is the
3275    // assert macro, implicitly add a macro definition for static_assert to work
3276    // around their broken assert.h header file in C. Only do so if there isn't
3277    // already a static_assert macro defined.
3278    if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3279        MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3280        !isMacroDefined("static_assert")) {
3281      MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3282  
3283      Token Tok;
3284      Tok.startToken();
3285      Tok.setKind(tok::kw__Static_assert);
3286      Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3287      MI->setTokens({Tok}, BP);
3288      (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3289    }
3290  }
3291  
3292  /// HandleUndefDirective - Implements \#undef.
3293  ///
HandleUndefDirective()3294  void Preprocessor::HandleUndefDirective() {
3295    ++NumUndefined;
3296  
3297    Token MacroNameTok;
3298    ReadMacroName(MacroNameTok, MU_Undef);
3299  
3300    // Error reading macro name?  If so, diagnostic already issued.
3301    if (MacroNameTok.is(tok::eod))
3302      return;
3303  
3304    // Check to see if this is the last token on the #undef line.
3305    CheckEndOfDirective("undef");
3306  
3307    // Okay, we have a valid identifier to undef.
3308    auto *II = MacroNameTok.getIdentifierInfo();
3309    auto MD = getMacroDefinition(II);
3310    UndefMacroDirective *Undef = nullptr;
3311  
3312    if (II->isFinal())
3313      emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3314  
3315    // If the macro is not defined, this is a noop undef.
3316    if (const MacroInfo *MI = MD.getMacroInfo()) {
3317      if (!MI->isUsed() && MI->isWarnIfUnused())
3318        Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3319  
3320      // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3321      // C++ [cpp.predefined]p4, but allow it as an extension.
3322      if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3323        Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3324  
3325      if (MI->isWarnIfUnused())
3326        WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3327  
3328      Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3329    }
3330  
3331    // If the callbacks want to know, tell them about the macro #undef.
3332    // Note: no matter if the macro was defined or not.
3333    if (Callbacks)
3334      Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3335  
3336    if (Undef)
3337      appendMacroDirective(II, Undef);
3338  }
3339  
3340  //===----------------------------------------------------------------------===//
3341  // Preprocessor Conditional Directive Handling.
3342  //===----------------------------------------------------------------------===//
3343  
3344  /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3345  /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3346  /// true if any tokens have been returned or pp-directives activated before this
3347  /// \#ifndef has been lexed.
3348  ///
HandleIfdefDirective(Token & Result,const Token & HashToken,bool isIfndef,bool ReadAnyTokensBeforeDirective)3349  void Preprocessor::HandleIfdefDirective(Token &Result,
3350                                          const Token &HashToken,
3351                                          bool isIfndef,
3352                                          bool ReadAnyTokensBeforeDirective) {
3353    ++NumIf;
3354    Token DirectiveTok = Result;
3355  
3356    Token MacroNameTok;
3357    ReadMacroName(MacroNameTok);
3358  
3359    // Error reading macro name?  If so, diagnostic already issued.
3360    if (MacroNameTok.is(tok::eod)) {
3361      // Skip code until we get to #endif.  This helps with recovery by not
3362      // emitting an error when the #endif is reached.
3363      SkipExcludedConditionalBlock(HashToken.getLocation(),
3364                                   DirectiveTok.getLocation(),
3365                                   /*Foundnonskip*/ false, /*FoundElse*/ false);
3366      return;
3367    }
3368  
3369    emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3370  
3371    // Check to see if this is the last token on the #if[n]def line.
3372    CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3373  
3374    IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3375    auto MD = getMacroDefinition(MII);
3376    MacroInfo *MI = MD.getMacroInfo();
3377  
3378    if (CurPPLexer->getConditionalStackDepth() == 0) {
3379      // If the start of a top-level #ifdef and if the macro is not defined,
3380      // inform MIOpt that this might be the start of a proper include guard.
3381      // Otherwise it is some other form of unknown conditional which we can't
3382      // handle.
3383      if (!ReadAnyTokensBeforeDirective && !MI) {
3384        assert(isIfndef && "#ifdef shouldn't reach here");
3385        CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3386      } else
3387        CurPPLexer->MIOpt.EnterTopLevelConditional();
3388    }
3389  
3390    // If there is a macro, process it.
3391    if (MI)  // Mark it used.
3392      markMacroAsUsed(MI);
3393  
3394    if (Callbacks) {
3395      if (isIfndef)
3396        Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3397      else
3398        Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3399    }
3400  
3401    bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3402      getSourceManager().isInMainFile(DirectiveTok.getLocation());
3403  
3404    // Should we include the stuff contained by this directive?
3405    if (PPOpts->SingleFileParseMode && !MI) {
3406      // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3407      // the directive blocks.
3408      CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3409                                       /*wasskip*/false, /*foundnonskip*/false,
3410                                       /*foundelse*/false);
3411    } else if (!MI == isIfndef || RetainExcludedCB) {
3412      // Yes, remember that we are inside a conditional, then lex the next token.
3413      CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3414                                       /*wasskip*/false, /*foundnonskip*/true,
3415                                       /*foundelse*/false);
3416    } else {
3417      // No, skip the contents of this block.
3418      SkipExcludedConditionalBlock(HashToken.getLocation(),
3419                                   DirectiveTok.getLocation(),
3420                                   /*Foundnonskip*/ false,
3421                                   /*FoundElse*/ false);
3422    }
3423  }
3424  
3425  /// HandleIfDirective - Implements the \#if directive.
3426  ///
HandleIfDirective(Token & IfToken,const Token & HashToken,bool ReadAnyTokensBeforeDirective)3427  void Preprocessor::HandleIfDirective(Token &IfToken,
3428                                       const Token &HashToken,
3429                                       bool ReadAnyTokensBeforeDirective) {
3430    ++NumIf;
3431  
3432    // Parse and evaluate the conditional expression.
3433    IdentifierInfo *IfNDefMacro = nullptr;
3434    const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3435    const bool ConditionalTrue = DER.Conditional;
3436    // Lexer might become invalid if we hit code completion point while evaluating
3437    // expression.
3438    if (!CurPPLexer)
3439      return;
3440  
3441    // If this condition is equivalent to #ifndef X, and if this is the first
3442    // directive seen, handle it for the multiple-include optimization.
3443    if (CurPPLexer->getConditionalStackDepth() == 0) {
3444      if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3445        // FIXME: Pass in the location of the macro name, not the 'if' token.
3446        CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3447      else
3448        CurPPLexer->MIOpt.EnterTopLevelConditional();
3449    }
3450  
3451    if (Callbacks)
3452      Callbacks->If(
3453          IfToken.getLocation(), DER.ExprRange,
3454          (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3455  
3456    bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3457      getSourceManager().isInMainFile(IfToken.getLocation());
3458  
3459    // Should we include the stuff contained by this directive?
3460    if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3461      // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3462      // the directive blocks.
3463      CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3464                                       /*foundnonskip*/false, /*foundelse*/false);
3465    } else if (ConditionalTrue || RetainExcludedCB) {
3466      // Yes, remember that we are inside a conditional, then lex the next token.
3467      CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3468                                     /*foundnonskip*/true, /*foundelse*/false);
3469    } else {
3470      // No, skip the contents of this block.
3471      SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3472                                   /*Foundnonskip*/ false,
3473                                   /*FoundElse*/ false);
3474    }
3475  }
3476  
3477  /// HandleEndifDirective - Implements the \#endif directive.
3478  ///
HandleEndifDirective(Token & EndifToken)3479  void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3480    ++NumEndif;
3481  
3482    // Check that this is the whole directive.
3483    CheckEndOfDirective("endif");
3484  
3485    PPConditionalInfo CondInfo;
3486    if (CurPPLexer->popConditionalLevel(CondInfo)) {
3487      // No conditionals on the stack: this is an #endif without an #if.
3488      Diag(EndifToken, diag::err_pp_endif_without_if);
3489      return;
3490    }
3491  
3492    // If this the end of a top-level #endif, inform MIOpt.
3493    if (CurPPLexer->getConditionalStackDepth() == 0)
3494      CurPPLexer->MIOpt.ExitTopLevelConditional();
3495  
3496    assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3497           "This code should only be reachable in the non-skipping case!");
3498  
3499    if (Callbacks)
3500      Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3501  }
3502  
3503  /// HandleElseDirective - Implements the \#else directive.
3504  ///
HandleElseDirective(Token & Result,const Token & HashToken)3505  void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3506    ++NumElse;
3507  
3508    // #else directive in a non-skipping conditional... start skipping.
3509    CheckEndOfDirective("else");
3510  
3511    PPConditionalInfo CI;
3512    if (CurPPLexer->popConditionalLevel(CI)) {
3513      Diag(Result, diag::pp_err_else_without_if);
3514      return;
3515    }
3516  
3517    // If this is a top-level #else, inform the MIOpt.
3518    if (CurPPLexer->getConditionalStackDepth() == 0)
3519      CurPPLexer->MIOpt.EnterTopLevelConditional();
3520  
3521    // If this is a #else with a #else before it, report the error.
3522    if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3523  
3524    if (Callbacks)
3525      Callbacks->Else(Result.getLocation(), CI.IfLoc);
3526  
3527    bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3528      getSourceManager().isInMainFile(Result.getLocation());
3529  
3530    if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3531      // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3532      // the directive blocks.
3533      CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3534                                       /*foundnonskip*/false, /*foundelse*/true);
3535      return;
3536    }
3537  
3538    // Finally, skip the rest of the contents of this block.
3539    SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3540                                 /*Foundnonskip*/ true,
3541                                 /*FoundElse*/ true, Result.getLocation());
3542  }
3543  
3544  /// Implements the \#elif, \#elifdef, and \#elifndef directives.
HandleElifFamilyDirective(Token & ElifToken,const Token & HashToken,tok::PPKeywordKind Kind)3545  void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3546                                               const Token &HashToken,
3547                                               tok::PPKeywordKind Kind) {
3548    PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3549                         : Kind == tok::pp_elifdef ? PED_Elifdef
3550                                                   : PED_Elifndef;
3551    ++NumElse;
3552  
3553    // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3554    switch (DirKind) {
3555    case PED_Elifdef:
3556    case PED_Elifndef:
3557      unsigned DiagID;
3558      if (LangOpts.CPlusPlus)
3559        DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3560                                      : diag::ext_cxx23_pp_directive;
3561      else
3562        DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3563                              : diag::ext_c23_pp_directive;
3564      Diag(ElifToken, DiagID) << DirKind;
3565      break;
3566    default:
3567      break;
3568    }
3569  
3570    // #elif directive in a non-skipping conditional... start skipping.
3571    // We don't care what the condition is, because we will always skip it (since
3572    // the block immediately before it was included).
3573    SourceRange ConditionRange = DiscardUntilEndOfDirective();
3574  
3575    PPConditionalInfo CI;
3576    if (CurPPLexer->popConditionalLevel(CI)) {
3577      Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3578      return;
3579    }
3580  
3581    // If this is a top-level #elif, inform the MIOpt.
3582    if (CurPPLexer->getConditionalStackDepth() == 0)
3583      CurPPLexer->MIOpt.EnterTopLevelConditional();
3584  
3585    // If this is a #elif with a #else before it, report the error.
3586    if (CI.FoundElse)
3587      Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3588  
3589    if (Callbacks) {
3590      switch (Kind) {
3591      case tok::pp_elif:
3592        Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3593                        PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3594        break;
3595      case tok::pp_elifdef:
3596        Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3597        break;
3598      case tok::pp_elifndef:
3599        Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3600        break;
3601      default:
3602        assert(false && "unexpected directive kind");
3603        break;
3604      }
3605    }
3606  
3607    bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3608      getSourceManager().isInMainFile(ElifToken.getLocation());
3609  
3610    if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3611      // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3612      // the directive blocks.
3613      CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3614                                       /*foundnonskip*/false, /*foundelse*/false);
3615      return;
3616    }
3617  
3618    // Finally, skip the rest of the contents of this block.
3619    SkipExcludedConditionalBlock(
3620        HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3621        /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3622  }
3623  
3624  std::optional<LexEmbedParametersResult>
LexEmbedParameters(Token & CurTok,bool ForHasEmbed)3625  Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3626    LexEmbedParametersResult Result{};
3627    SmallVector<Token, 2> ParameterTokens;
3628    tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3629  
3630    auto DiagMismatchedBracesAndSkipToEOD =
3631        [&](tok::TokenKind Expected,
3632            std::pair<tok::TokenKind, SourceLocation> Matches) {
3633          Diag(CurTok, diag::err_expected) << Expected;
3634          Diag(Matches.second, diag::note_matching) << Matches.first;
3635          if (CurTok.isNot(tok::eod))
3636            DiscardUntilEndOfDirective(CurTok);
3637        };
3638  
3639    auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3640      if (CurTok.isNot(Kind)) {
3641        Diag(CurTok, diag::err_expected) << Kind;
3642        if (CurTok.isNot(tok::eod))
3643          DiscardUntilEndOfDirective(CurTok);
3644        return false;
3645      }
3646      return true;
3647    };
3648  
3649    // C23 6.10:
3650    // pp-parameter-name:
3651    //   pp-standard-parameter
3652    //   pp-prefixed-parameter
3653    //
3654    // pp-standard-parameter:
3655    //   identifier
3656    //
3657    // pp-prefixed-parameter:
3658    //   identifier :: identifier
3659    auto LexPPParameterName = [&]() -> std::optional<std::string> {
3660      // We expect the current token to be an identifier; if it's not, things
3661      // have gone wrong.
3662      if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3663        return std::nullopt;
3664  
3665      const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3666  
3667      // Lex another token; it is either a :: or we're done with the parameter
3668      // name.
3669      LexNonComment(CurTok);
3670      if (CurTok.is(tok::coloncolon)) {
3671        // We found a ::, so lex another identifier token.
3672        LexNonComment(CurTok);
3673        if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3674          return std::nullopt;
3675  
3676        const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3677  
3678        // Lex another token so we're past the name.
3679        LexNonComment(CurTok);
3680        return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3681      }
3682      return Prefix->getName().str();
3683    };
3684  
3685    // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3686    // this document as an identifier pp_param and an identifier of the form
3687    // __pp_param__ shall behave the same when used as a preprocessor parameter,
3688    // except for the spelling.
3689    auto NormalizeParameterName = [](StringRef Name) {
3690      if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
3691        return Name.substr(2, Name.size() - 4);
3692      return Name;
3693    };
3694  
3695    auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3696      // we have a limit parameter and its internals are processed using
3697      // evaluation rules from #if.
3698      if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3699        return std::nullopt;
3700  
3701      // We do not consume the ( because EvaluateDirectiveExpression will lex
3702      // the next token for us.
3703      IdentifierInfo *ParameterIfNDef = nullptr;
3704      bool EvaluatedDefined;
3705      DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3706          ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
3707  
3708      if (!LimitEvalResult.Value) {
3709        // If there was an error evaluating the directive expression, we expect
3710        // to be at the end of directive token.
3711        assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3712        return std::nullopt;
3713      }
3714  
3715      if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3716        return std::nullopt;
3717  
3718      // Eat the ).
3719      LexNonComment(CurTok);
3720  
3721      // C23 6.10.3.2p2: The token defined shall not appear within the constant
3722      // expression.
3723      if (EvaluatedDefined) {
3724        Diag(CurTok, diag::err_defined_in_pp_embed);
3725        return std::nullopt;
3726      }
3727  
3728      if (LimitEvalResult.Value) {
3729        const llvm::APSInt &Result = *LimitEvalResult.Value;
3730        if (Result.isNegative()) {
3731          Diag(CurTok, diag::err_requires_positive_value)
3732              << toString(Result, 10) << /*positive*/ 0;
3733          return std::nullopt;
3734        }
3735        return Result.getLimitedValue();
3736      }
3737      return std::nullopt;
3738    };
3739  
3740    auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3741      switch (Kind) {
3742      case tok::l_paren:
3743        return tok::r_paren;
3744      case tok::l_brace:
3745        return tok::r_brace;
3746      case tok::l_square:
3747        return tok::r_square;
3748      default:
3749        llvm_unreachable("should not get here");
3750      }
3751    };
3752  
3753    auto LexParenthesizedBalancedTokenSoup =
3754        [&](llvm::SmallVectorImpl<Token> &Tokens) {
3755          std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3756  
3757          // We expect the current token to be a left paren.
3758          if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3759            return false;
3760          LexNonComment(CurTok); // Eat the (
3761  
3762          bool WaitingForInnerCloseParen = false;
3763          while (CurTok.isNot(tok::eod) &&
3764                 (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {
3765            switch (CurTok.getKind()) {
3766            default: // Shutting up diagnostics about not fully-covered switch.
3767              break;
3768            case tok::l_paren:
3769              WaitingForInnerCloseParen = true;
3770              [[fallthrough]];
3771            case tok::l_brace:
3772            case tok::l_square:
3773              BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
3774              break;
3775            case tok::r_paren:
3776              WaitingForInnerCloseParen = false;
3777              [[fallthrough]];
3778            case tok::r_brace:
3779            case tok::r_square: {
3780              tok::TokenKind Matching =
3781                  GetMatchingCloseBracket(BracketStack.back().first);
3782              if (BracketStack.empty() || CurTok.getKind() != Matching) {
3783                DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3784                return false;
3785              }
3786              BracketStack.pop_back();
3787            } break;
3788            }
3789            Tokens.push_back(CurTok);
3790            LexNonComment(CurTok);
3791          }
3792  
3793          // When we're done, we want to eat the closing paren.
3794          if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3795            return false;
3796  
3797          LexNonComment(CurTok); // Eat the )
3798          return true;
3799        };
3800  
3801    LexNonComment(CurTok); // Prime the pump.
3802    while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
3803      SourceLocation ParamStartLoc = CurTok.getLocation();
3804      std::optional<std::string> ParamName = LexPPParameterName();
3805      if (!ParamName)
3806        return std::nullopt;
3807      StringRef Parameter = NormalizeParameterName(*ParamName);
3808  
3809      // Lex the parameters (dependent on the parameter type we want!).
3810      //
3811      // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3812      // one time in the embed parameter sequence.
3813      if (Parameter == "limit") {
3814        if (Result.MaybeLimitParam)
3815          Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3816  
3817        std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3818        if (!Limit)
3819          return std::nullopt;
3820        Result.MaybeLimitParam =
3821            PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3822      } else if (Parameter == "clang::offset") {
3823        if (Result.MaybeOffsetParam)
3824          Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3825  
3826        std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3827        if (!Offset)
3828          return std::nullopt;
3829        Result.MaybeOffsetParam = PPEmbedParameterOffset{
3830            *Offset, {ParamStartLoc, CurTok.getLocation()}};
3831      } else if (Parameter == "prefix") {
3832        if (Result.MaybePrefixParam)
3833          Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3834  
3835        SmallVector<Token, 4> Soup;
3836        if (!LexParenthesizedBalancedTokenSoup(Soup))
3837          return std::nullopt;
3838        Result.MaybePrefixParam = PPEmbedParameterPrefix{
3839            std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3840      } else if (Parameter == "suffix") {
3841        if (Result.MaybeSuffixParam)
3842          Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3843  
3844        SmallVector<Token, 4> Soup;
3845        if (!LexParenthesizedBalancedTokenSoup(Soup))
3846          return std::nullopt;
3847        Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3848            std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3849      } else if (Parameter == "if_empty") {
3850        if (Result.MaybeIfEmptyParam)
3851          Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3852  
3853        SmallVector<Token, 4> Soup;
3854        if (!LexParenthesizedBalancedTokenSoup(Soup))
3855          return std::nullopt;
3856        Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3857            std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3858      } else {
3859        ++Result.UnrecognizedParams;
3860  
3861        // If there's a left paren, we need to parse a balanced token sequence
3862        // and just eat those tokens.
3863        if (CurTok.is(tok::l_paren)) {
3864          SmallVector<Token, 4> Soup;
3865          if (!LexParenthesizedBalancedTokenSoup(Soup))
3866            return std::nullopt;
3867        }
3868        if (!ForHasEmbed) {
3869          Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;
3870          return std::nullopt;
3871        }
3872      }
3873    }
3874    return Result;
3875  }
3876  
HandleEmbedDirectiveImpl(SourceLocation HashLoc,const LexEmbedParametersResult & Params,StringRef BinaryContents)3877  void Preprocessor::HandleEmbedDirectiveImpl(
3878      SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3879      StringRef BinaryContents) {
3880    if (BinaryContents.empty()) {
3881      // If we have no binary contents, the only thing we need to emit are the
3882      // if_empty tokens, if any.
3883      // FIXME: this loses AST fidelity; nothing in the compiler will see that
3884      // these tokens came from #embed. We have to hack around this when printing
3885      // preprocessed output. The same is true for prefix and suffix tokens.
3886      if (Params.MaybeIfEmptyParam) {
3887        ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3888        size_t TokCount = Toks.size();
3889        auto NewToks = std::make_unique<Token[]>(TokCount);
3890        llvm::copy(Toks, NewToks.get());
3891        EnterTokenStream(std::move(NewToks), TokCount, true, true);
3892      }
3893      return;
3894    }
3895  
3896    size_t NumPrefixToks = Params.PrefixTokenCount(),
3897           NumSuffixToks = Params.SuffixTokenCount();
3898    size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3899    size_t CurIdx = 0;
3900    auto Toks = std::make_unique<Token[]>(TotalNumToks);
3901  
3902    // Add the prefix tokens, if any.
3903    if (Params.MaybePrefixParam) {
3904      llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
3905      CurIdx += NumPrefixToks;
3906    }
3907  
3908    EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3909    Data->BinaryData = BinaryContents;
3910  
3911    Toks[CurIdx].startToken();
3912    Toks[CurIdx].setKind(tok::annot_embed);
3913    Toks[CurIdx].setAnnotationRange(HashLoc);
3914    Toks[CurIdx++].setAnnotationValue(Data);
3915  
3916    // Now add the suffix tokens, if any.
3917    if (Params.MaybeSuffixParam) {
3918      llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
3919      CurIdx += NumSuffixToks;
3920    }
3921  
3922    assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3923    EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
3924  }
3925  
HandleEmbedDirective(SourceLocation HashLoc,Token & EmbedTok,const FileEntry * LookupFromFile)3926  void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3927                                          const FileEntry *LookupFromFile) {
3928    // Give the usual extension/compatibility warnings.
3929    if (LangOpts.C23)
3930      Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
3931    else
3932      Diag(EmbedTok, diag::ext_pp_embed_directive)
3933          << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3934  
3935    // Parse the filename header
3936    Token FilenameTok;
3937    if (LexHeaderName(FilenameTok))
3938      return;
3939  
3940    if (FilenameTok.isNot(tok::header_name)) {
3941      Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
3942      if (FilenameTok.isNot(tok::eod))
3943        DiscardUntilEndOfDirective();
3944      return;
3945    }
3946  
3947    // Parse the optional sequence of
3948    // directive-parameters:
3949    //     identifier parameter-name-list[opt] directive-argument-list[opt]
3950    // directive-argument-list:
3951    //    '(' balanced-token-sequence ')'
3952    // parameter-name-list:
3953    //    '::' identifier parameter-name-list[opt]
3954    Token CurTok;
3955    std::optional<LexEmbedParametersResult> Params =
3956        LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3957  
3958    assert((Params || CurTok.is(tok::eod)) &&
3959           "expected success or to be at the end of the directive");
3960    if (!Params)
3961      return;
3962  
3963    // Now, splat the data out!
3964    SmallString<128> FilenameBuffer;
3965    StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
3966    StringRef OriginalFilename = Filename;
3967    bool isAngled =
3968        GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
3969    // If GetIncludeFilenameSpelling set the start ptr to null, there was an
3970    // error.
3971    assert(!Filename.empty());
3972    OptionalFileEntryRef MaybeFileRef =
3973        this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
3974    if (!MaybeFileRef) {
3975      // could not find file
3976      if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
3977        return;
3978      }
3979      Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
3980      return;
3981    }
3982    std::optional<llvm::MemoryBufferRef> MaybeFile =
3983        getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
3984    if (!MaybeFile) {
3985      // could not find file
3986      Diag(FilenameTok, diag::err_cannot_open_file)
3987          << Filename << "a buffer to the contents could not be created";
3988      return;
3989    }
3990    StringRef BinaryContents = MaybeFile->getBuffer();
3991  
3992    // The order is important between 'offset' and 'limit'; we want to offset
3993    // first and then limit second; otherwise we may reduce the notional resource
3994    // size to something too small to offset into.
3995    if (Params->MaybeOffsetParam) {
3996      // FIXME: just like with the limit() and if_empty() parameters, this loses
3997      // source fidelity in the AST; it has no idea that there was an offset
3998      // involved.
3999      // offsets all the way to the end of the file make for an empty file.
4000      BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
4001    }
4002  
4003    if (Params->MaybeLimitParam) {
4004      // FIXME: just like with the clang::offset() and if_empty() parameters,
4005      // this loses source fidelity in the AST; it has no idea there was a limit
4006      // involved.
4007      BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
4008    }
4009  
4010    if (Callbacks)
4011      Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
4012                                *Params);
4013    HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
4014  }
4015