xref: /freebsd/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/IdentifierTable.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Lex/CodeCompletionHandler.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/LexDiagnostic.h"
25 #include "clang/Lex/LiteralSupport.h"
26 #include "clang/Lex/MacroInfo.h"
27 #include "clang/Lex/ModuleLoader.h"
28 #include "clang/Lex/ModuleMap.h"
29 #include "clang/Lex/PPCallbacks.h"
30 #include "clang/Lex/Pragma.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Lex/PreprocessorOptions.h"
33 #include "clang/Lex/Token.h"
34 #include "clang/Lex/VariadicMacroSupport.h"
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/ADT/ScopeExit.h"
37 #include "llvm/ADT/SmallString.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/StringSwitch.h"
41 #include "llvm/ADT/StringRef.h"
42 #include "llvm/Support/AlignOf.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/Path.h"
45 #include <algorithm>
46 #include <cassert>
47 #include <cstring>
48 #include <new>
49 #include <string>
50 #include <utility>
51 
52 using namespace clang;
53 
54 //===----------------------------------------------------------------------===//
55 // Utility Methods for Preprocessor Directive Handling.
56 //===----------------------------------------------------------------------===//
57 
58 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
59   auto *MIChain = new (BP) MacroInfoChain{L, MIChainHead};
60   MIChainHead = MIChain;
61   return &MIChain->MI;
62 }
63 
64 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
65                                                            SourceLocation Loc) {
66   return new (BP) DefMacroDirective(MI, Loc);
67 }
68 
69 UndefMacroDirective *
70 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
71   return new (BP) UndefMacroDirective(UndefLoc);
72 }
73 
74 VisibilityMacroDirective *
75 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
76                                                bool isPublic) {
77   return new (BP) VisibilityMacroDirective(Loc, isPublic);
78 }
79 
80 /// Read and discard all tokens remaining on the current line until
81 /// the tok::eod token is found.
82 SourceRange Preprocessor::DiscardUntilEndOfDirective() {
83   Token Tmp;
84   SourceRange Res;
85 
86   LexUnexpandedToken(Tmp);
87   Res.setBegin(Tmp.getLocation());
88   while (Tmp.isNot(tok::eod)) {
89     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
90     LexUnexpandedToken(Tmp);
91   }
92   Res.setEnd(Tmp.getLocation());
93   return Res;
94 }
95 
96 /// Enumerates possible cases of #define/#undef a reserved identifier.
97 enum MacroDiag {
98   MD_NoWarn,        //> Not a reserved identifier
99   MD_KeywordDef,    //> Macro hides keyword, enabled by default
100   MD_ReservedMacro  //> #define of #undef reserved id, disabled by default
101 };
102 
103 /// Checks if the specified identifier is reserved in the specified
104 /// language.
105 /// This function does not check if the identifier is a keyword.
106 static bool isReservedId(StringRef Text, const LangOptions &Lang) {
107   // C++ [macro.names], C11 7.1.3:
108   // All identifiers that begin with an underscore and either an uppercase
109   // letter or another underscore are always reserved for any use.
110   if (Text.size() >= 2 && Text[0] == '_' &&
111       (isUppercase(Text[1]) || Text[1] == '_'))
112       return true;
113   // C++ [global.names]
114   // Each name that contains a double underscore ... is reserved to the
115   // implementation for any use.
116   if (Lang.CPlusPlus) {
117     if (Text.find("__") != StringRef::npos)
118       return true;
119   }
120   return false;
121 }
122 
123 // The -fmodule-name option tells the compiler to textually include headers in
124 // the specified module, meaning clang won't build the specified module. This is
125 // useful in a number of situations, for instance, when building a library that
126 // vends a module map, one might want to avoid hitting intermediate build
127 // products containimg the the module map or avoid finding the system installed
128 // modulemap for that library.
129 static bool isForModuleBuilding(Module *M, StringRef CurrentModule,
130                                 StringRef ModuleName) {
131   StringRef TopLevelName = M->getTopLevelModuleName();
132 
133   // When building framework Foo, we wanna make sure that Foo *and* Foo_Private
134   // are textually included and no modules are built for both.
135   if (M->getTopLevelModule()->IsFramework && CurrentModule == ModuleName &&
136       !CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private"))
137     TopLevelName = TopLevelName.drop_back(8);
138 
139   return TopLevelName == CurrentModule;
140 }
141 
142 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
143   const LangOptions &Lang = PP.getLangOpts();
144   StringRef Text = II->getName();
145   if (isReservedId(Text, Lang))
146     return MD_ReservedMacro;
147   if (II->isKeyword(Lang))
148     return MD_KeywordDef;
149   if (Lang.CPlusPlus11 && (Text.equals("override") || Text.equals("final")))
150     return MD_KeywordDef;
151   return MD_NoWarn;
152 }
153 
154 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
155   const LangOptions &Lang = PP.getLangOpts();
156   StringRef Text = II->getName();
157   // Do not warn on keyword undef.  It is generally harmless and widely used.
158   if (isReservedId(Text, Lang))
159     return MD_ReservedMacro;
160   return MD_NoWarn;
161 }
162 
163 // Return true if we want to issue a diagnostic by default if we
164 // encounter this name in a #include with the wrong case. For now,
165 // this includes the standard C and C++ headers, Posix headers,
166 // and Boost headers. Improper case for these #includes is a
167 // potential portability issue.
168 static bool warnByDefaultOnWrongCase(StringRef Include) {
169   // If the first component of the path is "boost", treat this like a standard header
170   // for the purposes of diagnostics.
171   if (::llvm::sys::path::begin(Include)->equals_lower("boost"))
172     return true;
173 
174   // "condition_variable" is the longest standard header name at 18 characters.
175   // If the include file name is longer than that, it can't be a standard header.
176   static const size_t MaxStdHeaderNameLen = 18u;
177   if (Include.size() > MaxStdHeaderNameLen)
178     return false;
179 
180   // Lowercase and normalize the search string.
181   SmallString<32> LowerInclude{Include};
182   for (char &Ch : LowerInclude) {
183     // In the ASCII range?
184     if (static_cast<unsigned char>(Ch) > 0x7f)
185       return false; // Can't be a standard header
186     // ASCII lowercase:
187     if (Ch >= 'A' && Ch <= 'Z')
188       Ch += 'a' - 'A';
189     // Normalize path separators for comparison purposes.
190     else if (::llvm::sys::path::is_separator(Ch))
191       Ch = '/';
192   }
193 
194   // The standard C/C++ and Posix headers
195   return llvm::StringSwitch<bool>(LowerInclude)
196     // C library headers
197     .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
198     .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
199     .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
200     .Cases("stdatomic.h", "stdbool.h", "stddef.h", "stdint.h", "stdio.h", true)
201     .Cases("stdlib.h", "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", true)
202     .Cases("time.h", "uchar.h", "wchar.h", "wctype.h", true)
203 
204     // C++ headers for C library facilities
205     .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
206     .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
207     .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
208     .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
209     .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
210     .Case("cwctype", true)
211 
212     // C++ library headers
213     .Cases("algorithm", "fstream", "list", "regex", "thread", true)
214     .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
215     .Cases("atomic", "future", "map", "set", "type_traits", true)
216     .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
217     .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
218     .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
219     .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
220     .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
221     .Cases("deque", "istream", "queue", "string", "valarray", true)
222     .Cases("exception", "iterator", "random", "strstream", "vector", true)
223     .Cases("forward_list", "limits", "ratio", "system_error", true)
224 
225     // POSIX headers (which aren't also C headers)
226     .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
227     .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
228     .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
229     .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
230     .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
231     .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
232     .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
233     .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
234     .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
235     .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
236     .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
237     .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
238     .Default(false);
239 }
240 
241 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
242                                   bool *ShadowFlag) {
243   // Missing macro name?
244   if (MacroNameTok.is(tok::eod))
245     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
246 
247   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
248   if (!II)
249     return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
250 
251   if (II->isCPlusPlusOperatorKeyword()) {
252     // C++ 2.5p2: Alternative tokens behave the same as its primary token
253     // except for their spellings.
254     Diag(MacroNameTok, getLangOpts().MicrosoftExt
255                            ? diag::ext_pp_operator_used_as_macro_name
256                            : diag::err_pp_operator_used_as_macro_name)
257         << II << MacroNameTok.getKind();
258     // Allow #defining |and| and friends for Microsoft compatibility or
259     // recovery when legacy C headers are included in C++.
260   }
261 
262   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
263     // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
264     return Diag(MacroNameTok, diag::err_defined_macro_name);
265   }
266 
267   if (isDefineUndef == MU_Undef) {
268     auto *MI = getMacroInfo(II);
269     if (MI && MI->isBuiltinMacro()) {
270       // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
271       // and C++ [cpp.predefined]p4], but allow it as an extension.
272       Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
273     }
274   }
275 
276   // If defining/undefining reserved identifier or a keyword, we need to issue
277   // a warning.
278   SourceLocation MacroNameLoc = MacroNameTok.getLocation();
279   if (ShadowFlag)
280     *ShadowFlag = false;
281   if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
282       (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
283     MacroDiag D = MD_NoWarn;
284     if (isDefineUndef == MU_Define) {
285       D = shouldWarnOnMacroDef(*this, II);
286     }
287     else if (isDefineUndef == MU_Undef)
288       D = shouldWarnOnMacroUndef(*this, II);
289     if (D == MD_KeywordDef) {
290       // We do not want to warn on some patterns widely used in configuration
291       // scripts.  This requires analyzing next tokens, so do not issue warnings
292       // now, only inform caller.
293       if (ShadowFlag)
294         *ShadowFlag = true;
295     }
296     if (D == MD_ReservedMacro)
297       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
298   }
299 
300   // Okay, we got a good identifier.
301   return false;
302 }
303 
304 /// Lex and validate a macro name, which occurs after a
305 /// \#define or \#undef.
306 ///
307 /// This sets the token kind to eod and discards the rest of the macro line if
308 /// the macro name is invalid.
309 ///
310 /// \param MacroNameTok Token that is expected to be a macro name.
311 /// \param isDefineUndef Context in which macro is used.
312 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
313 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
314                                  bool *ShadowFlag) {
315   // Read the token, don't allow macro expansion on it.
316   LexUnexpandedToken(MacroNameTok);
317 
318   if (MacroNameTok.is(tok::code_completion)) {
319     if (CodeComplete)
320       CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
321     setCodeCompletionReached();
322     LexUnexpandedToken(MacroNameTok);
323   }
324 
325   if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
326     return;
327 
328   // Invalid macro name, read and discard the rest of the line and set the
329   // token kind to tok::eod if necessary.
330   if (MacroNameTok.isNot(tok::eod)) {
331     MacroNameTok.setKind(tok::eod);
332     DiscardUntilEndOfDirective();
333   }
334 }
335 
336 /// Ensure that the next token is a tok::eod token.
337 ///
338 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
339 /// true, then we consider macros that expand to zero tokens as being ok.
340 ///
341 /// Returns the location of the end of the directive.
342 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
343                                                  bool EnableMacros) {
344   Token Tmp;
345   // Lex unexpanded tokens for most directives: macros might expand to zero
346   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
347   // #line) allow empty macros.
348   if (EnableMacros)
349     Lex(Tmp);
350   else
351     LexUnexpandedToken(Tmp);
352 
353   // There should be no tokens after the directive, but we allow them as an
354   // extension.
355   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
356     LexUnexpandedToken(Tmp);
357 
358   if (Tmp.is(tok::eod))
359     return Tmp.getLocation();
360 
361   // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
362   // or if this is a macro-style preprocessing directive, because it is more
363   // trouble than it is worth to insert /**/ and check that there is no /**/
364   // in the range also.
365   FixItHint Hint;
366   if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
367       !CurTokenLexer)
368     Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
369   Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
370   return DiscardUntilEndOfDirective().getEnd();
371 }
372 
373 Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock(
374     SourceLocation HashLoc) {
375   if (!ExcludedConditionalDirectiveSkipMappings)
376     return None;
377   if (!HashLoc.isFileID())
378     return None;
379 
380   std::pair<FileID, unsigned> HashFileOffset =
381       SourceMgr.getDecomposedLoc(HashLoc);
382   const llvm::MemoryBuffer *Buf = SourceMgr.getBuffer(HashFileOffset.first);
383   auto It = ExcludedConditionalDirectiveSkipMappings->find(Buf);
384   if (It == ExcludedConditionalDirectiveSkipMappings->end())
385     return None;
386 
387   const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond();
388   // Check if the offset of '#' is mapped in the skipped ranges.
389   auto MappingIt = SkippedRanges.find(HashFileOffset.second);
390   if (MappingIt == SkippedRanges.end())
391     return None;
392 
393   unsigned BytesToSkip = MappingIt->getSecond();
394   unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset();
395   assert(CurLexerBufferOffset >= HashFileOffset.second &&
396          "lexer is before the hash?");
397   // Take into account the fact that the lexer has already advanced, so the
398   // number of bytes to skip must be adjusted.
399   unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second;
400   assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?");
401   return BytesToSkip - LengthDiff;
402 }
403 
404 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
405 /// decided that the subsequent tokens are in the \#if'd out portion of the
406 /// file.  Lex the rest of the file, until we see an \#endif.  If
407 /// FoundNonSkipPortion is true, then we have already emitted code for part of
408 /// this \#if directive, so \#else/\#elif blocks should never be entered.
409 /// If ElseOk is true, then \#else directives are ok, if not, then we have
410 /// already seen one so a \#else directive is a duplicate.  When this returns,
411 /// the caller can lex the first valid token.
412 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
413                                                 SourceLocation IfTokenLoc,
414                                                 bool FoundNonSkipPortion,
415                                                 bool FoundElse,
416                                                 SourceLocation ElseLoc) {
417   ++NumSkipped;
418   assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?");
419 
420   if (PreambleConditionalStack.reachedEOFWhileSkipping())
421     PreambleConditionalStack.clearSkipInfo();
422   else
423     CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
424                                      FoundNonSkipPortion, FoundElse);
425 
426   // Enter raw mode to disable identifier lookup (and thus macro expansion),
427   // disabling warnings, etc.
428   CurPPLexer->LexingRawMode = true;
429   Token Tok;
430   if (auto SkipLength =
431           getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) {
432     // Skip to the next '#endif' / '#else' / '#elif'.
433     CurLexer->skipOver(*SkipLength);
434   }
435   SourceLocation endLoc;
436   while (true) {
437     CurLexer->Lex(Tok);
438 
439     if (Tok.is(tok::code_completion)) {
440       if (CodeComplete)
441         CodeComplete->CodeCompleteInConditionalExclusion();
442       setCodeCompletionReached();
443       continue;
444     }
445 
446     // If this is the end of the buffer, we have an error.
447     if (Tok.is(tok::eof)) {
448       // We don't emit errors for unterminated conditionals here,
449       // Lexer::LexEndOfFile can do that properly.
450       // Just return and let the caller lex after this #include.
451       if (PreambleConditionalStack.isRecording())
452         PreambleConditionalStack.SkipInfo.emplace(
453             HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc);
454       break;
455     }
456 
457     // If this token is not a preprocessor directive, just skip it.
458     if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
459       continue;
460 
461     // We just parsed a # character at the start of a line, so we're in
462     // directive mode.  Tell the lexer this so any newlines we see will be
463     // converted into an EOD token (this terminates the macro).
464     CurPPLexer->ParsingPreprocessorDirective = true;
465     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
466 
467 
468     // Read the next token, the directive flavor.
469     LexUnexpandedToken(Tok);
470 
471     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
472     // something bogus), skip it.
473     if (Tok.isNot(tok::raw_identifier)) {
474       CurPPLexer->ParsingPreprocessorDirective = false;
475       // Restore comment saving mode.
476       if (CurLexer) CurLexer->resetExtendedTokenMode();
477       continue;
478     }
479 
480     // If the first letter isn't i or e, it isn't intesting to us.  We know that
481     // this is safe in the face of spelling differences, because there is no way
482     // to spell an i/e in a strange way that is another letter.  Skipping this
483     // allows us to avoid looking up the identifier info for #define/#undef and
484     // other common directives.
485     StringRef RI = Tok.getRawIdentifier();
486 
487     char FirstChar = RI[0];
488     if (FirstChar >= 'a' && FirstChar <= 'z' &&
489         FirstChar != 'i' && FirstChar != 'e') {
490       CurPPLexer->ParsingPreprocessorDirective = false;
491       // Restore comment saving mode.
492       if (CurLexer) CurLexer->resetExtendedTokenMode();
493       continue;
494     }
495 
496     // Get the identifier name without trigraphs or embedded newlines.  Note
497     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
498     // when skipping.
499     char DirectiveBuf[20];
500     StringRef Directive;
501     if (!Tok.needsCleaning() && RI.size() < 20) {
502       Directive = RI;
503     } else {
504       std::string DirectiveStr = getSpelling(Tok);
505       size_t IdLen = DirectiveStr.size();
506       if (IdLen >= 20) {
507         CurPPLexer->ParsingPreprocessorDirective = false;
508         // Restore comment saving mode.
509         if (CurLexer) CurLexer->resetExtendedTokenMode();
510         continue;
511       }
512       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
513       Directive = StringRef(DirectiveBuf, IdLen);
514     }
515 
516     if (Directive.startswith("if")) {
517       StringRef Sub = Directive.substr(2);
518       if (Sub.empty() ||   // "if"
519           Sub == "def" ||   // "ifdef"
520           Sub == "ndef") {  // "ifndef"
521         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
522         // bother parsing the condition.
523         DiscardUntilEndOfDirective();
524         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
525                                        /*foundnonskip*/false,
526                                        /*foundelse*/false);
527       }
528     } else if (Directive[0] == 'e') {
529       StringRef Sub = Directive.substr(1);
530       if (Sub == "ndif") {  // "endif"
531         PPConditionalInfo CondInfo;
532         CondInfo.WasSkipping = true; // Silence bogus warning.
533         bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
534         (void)InCond;  // Silence warning in no-asserts mode.
535         assert(!InCond && "Can't be skipping if not in a conditional!");
536 
537         // If we popped the outermost skipping block, we're done skipping!
538         if (!CondInfo.WasSkipping) {
539           // Restore the value of LexingRawMode so that trailing comments
540           // are handled correctly, if we've reached the outermost block.
541           CurPPLexer->LexingRawMode = false;
542           endLoc = CheckEndOfDirective("endif");
543           CurPPLexer->LexingRawMode = true;
544           if (Callbacks)
545             Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
546           break;
547         } else {
548           DiscardUntilEndOfDirective();
549         }
550       } else if (Sub == "lse") { // "else".
551         // #else directive in a skipping conditional.  If not in some other
552         // skipping conditional, and if #else hasn't already been seen, enter it
553         // as a non-skipping conditional.
554         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
555 
556         // If this is a #else with a #else before it, report the error.
557         if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else);
558 
559         // Note that we've seen a #else in this conditional.
560         CondInfo.FoundElse = true;
561 
562         // If the conditional is at the top level, and the #if block wasn't
563         // entered, enter the #else block now.
564         if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
565           CondInfo.FoundNonSkip = true;
566           // Restore the value of LexingRawMode so that trailing comments
567           // are handled correctly.
568           CurPPLexer->LexingRawMode = false;
569           endLoc = CheckEndOfDirective("else");
570           CurPPLexer->LexingRawMode = true;
571           if (Callbacks)
572             Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
573           break;
574         } else {
575           DiscardUntilEndOfDirective();  // C99 6.10p4.
576         }
577       } else if (Sub == "lif") {  // "elif".
578         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
579 
580         // If this is a #elif with a #else before it, report the error.
581         if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else);
582 
583         // If this is in a skipping block or if we're already handled this #if
584         // block, don't bother parsing the condition.
585         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
586           DiscardUntilEndOfDirective();
587         } else {
588           // Restore the value of LexingRawMode so that identifiers are
589           // looked up, etc, inside the #elif expression.
590           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
591           CurPPLexer->LexingRawMode = false;
592           IdentifierInfo *IfNDefMacro = nullptr;
593           DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
594           const bool CondValue = DER.Conditional;
595           CurPPLexer->LexingRawMode = true;
596           if (Callbacks) {
597             Callbacks->Elif(
598                 Tok.getLocation(), DER.ExprRange,
599                 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
600                 CondInfo.IfLoc);
601           }
602           // If this condition is true, enter it!
603           if (CondValue) {
604             CondInfo.FoundNonSkip = true;
605             break;
606           }
607         }
608       }
609     }
610 
611     CurPPLexer->ParsingPreprocessorDirective = false;
612     // Restore comment saving mode.
613     if (CurLexer) CurLexer->resetExtendedTokenMode();
614   }
615 
616   // Finally, if we are out of the conditional (saw an #endif or ran off the end
617   // of the file, just stop skipping and return to lexing whatever came after
618   // the #if block.
619   CurPPLexer->LexingRawMode = false;
620 
621   // The last skipped range isn't actually skipped yet if it's truncated
622   // by the end of the preamble; we'll resume parsing after the preamble.
623   if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
624     Callbacks->SourceRangeSkipped(
625         SourceRange(HashTokenLoc, endLoc.isValid()
626                                       ? endLoc
627                                       : CurPPLexer->getSourceLocation()),
628         Tok.getLocation());
629 }
630 
631 Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {
632   if (!SourceMgr.isInMainFile(Loc)) {
633     // Try to determine the module of the include directive.
634     // FIXME: Look into directly passing the FileEntry from LookupFile instead.
635     FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
636     if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {
637       // The include comes from an included file.
638       return HeaderInfo.getModuleMap()
639           .findModuleForHeader(EntryOfIncl)
640           .getModule();
641     }
642   }
643 
644   // This is either in the main file or not in a file at all. It belongs
645   // to the current module, if there is one.
646   return getLangOpts().CurrentModule.empty()
647              ? nullptr
648              : HeaderInfo.lookupModule(getLangOpts().CurrentModule);
649 }
650 
651 const FileEntry *
652 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
653                                                SourceLocation Loc) {
654   Module *IncM = getModuleForLocation(IncLoc);
655 
656   // Walk up through the include stack, looking through textual headers of M
657   // until we hit a non-textual header that we can #include. (We assume textual
658   // headers of a module with non-textual headers aren't meant to be used to
659   // import entities from the module.)
660   auto &SM = getSourceManager();
661   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
662     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
663     auto *FE = SM.getFileEntryForID(ID);
664     if (!FE)
665       break;
666 
667     // We want to find all possible modules that might contain this header, so
668     // search all enclosing directories for module maps and load them.
669     HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
670                             SourceMgr.isInSystemHeader(Loc));
671 
672     bool InPrivateHeader = false;
673     for (auto Header : HeaderInfo.findAllModulesForHeader(FE)) {
674       if (!Header.isAccessibleFrom(IncM)) {
675         // It's in a private header; we can't #include it.
676         // FIXME: If there's a public header in some module that re-exports it,
677         // then we could suggest including that, but it's not clear that's the
678         // expected way to make this entity visible.
679         InPrivateHeader = true;
680         continue;
681       }
682 
683       // We'll suggest including textual headers below if they're
684       // include-guarded.
685       if (Header.getRole() & ModuleMap::TextualHeader)
686         continue;
687 
688       // If we have a module import syntax, we shouldn't include a header to
689       // make a particular module visible. Let the caller know they should
690       // suggest an import instead.
691       if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules ||
692           getLangOpts().ModulesTS)
693         return nullptr;
694 
695       // If this is an accessible, non-textual header of M's top-level module
696       // that transitively includes the given location and makes the
697       // corresponding module visible, this is the thing to #include.
698       return FE;
699     }
700 
701     // FIXME: If we're bailing out due to a private header, we shouldn't suggest
702     // an import either.
703     if (InPrivateHeader)
704       return nullptr;
705 
706     // If the header is includable and has an include guard, assume the
707     // intended way to expose its contents is by #include, not by importing a
708     // module that transitively includes it.
709     if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
710       return FE;
711 
712     Loc = SM.getIncludeLoc(ID);
713   }
714 
715   return nullptr;
716 }
717 
718 Optional<FileEntryRef> Preprocessor::LookupFile(
719     SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
720     const DirectoryLookup *FromDir, const FileEntry *FromFile,
721     const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
722     SmallVectorImpl<char> *RelativePath,
723     ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
724     bool *IsFrameworkFound, bool SkipCache) {
725   Module *RequestingModule = getModuleForLocation(FilenameLoc);
726   bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc);
727 
728   // If the header lookup mechanism may be relative to the current inclusion
729   // stack, record the parent #includes.
730   SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16>
731       Includers;
732   bool BuildSystemModule = false;
733   if (!FromDir && !FromFile) {
734     FileID FID = getCurrentFileLexer()->getFileID();
735     const FileEntry *FileEnt = SourceMgr.getFileEntryForID(FID);
736 
737     // If there is no file entry associated with this file, it must be the
738     // predefines buffer or the module includes buffer. Any other file is not
739     // lexed with a normal lexer, so it won't be scanned for preprocessor
740     // directives.
741     //
742     // If we have the predefines buffer, resolve #include references (which come
743     // from the -include command line argument) from the current working
744     // directory instead of relative to the main file.
745     //
746     // If we have the module includes buffer, resolve #include references (which
747     // come from header declarations in the module map) relative to the module
748     // map file.
749     if (!FileEnt) {
750       if (FID == SourceMgr.getMainFileID() && MainFileDir) {
751         Includers.push_back(std::make_pair(nullptr, MainFileDir));
752         BuildSystemModule = getCurrentModule()->IsSystem;
753       } else if ((FileEnt =
754                     SourceMgr.getFileEntryForID(SourceMgr.getMainFileID())))
755         Includers.push_back(std::make_pair(FileEnt, *FileMgr.getDirectory(".")));
756     } else {
757       Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
758     }
759 
760     // MSVC searches the current include stack from top to bottom for
761     // headers included by quoted include directives.
762     // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
763     if (LangOpts.MSVCCompat && !isAngled) {
764       for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
765         if (IsFileLexer(ISEntry))
766           if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
767             Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
768       }
769     }
770   }
771 
772   CurDir = CurDirLookup;
773 
774   if (FromFile) {
775     // We're supposed to start looking from after a particular file. Search
776     // the include path until we find that file or run out of files.
777     const DirectoryLookup *TmpCurDir = CurDir;
778     const DirectoryLookup *TmpFromDir = nullptr;
779     while (Optional<FileEntryRef> FE = HeaderInfo.LookupFile(
780                Filename, FilenameLoc, isAngled, TmpFromDir, TmpCurDir,
781                Includers, SearchPath, RelativePath, RequestingModule,
782                SuggestedModule, /*IsMapped=*/nullptr,
783                /*IsFrameworkFound=*/nullptr, SkipCache)) {
784       // Keep looking as if this file did a #include_next.
785       TmpFromDir = TmpCurDir;
786       ++TmpFromDir;
787       if (&FE->getFileEntry() == FromFile) {
788         // Found it.
789         FromDir = TmpFromDir;
790         CurDir = TmpCurDir;
791         break;
792       }
793     }
794   }
795 
796   // Do a standard file entry lookup.
797   Optional<FileEntryRef> FE = HeaderInfo.LookupFile(
798       Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath,
799       RelativePath, RequestingModule, SuggestedModule, IsMapped,
800       IsFrameworkFound, SkipCache, BuildSystemModule);
801   if (FE) {
802     if (SuggestedModule && !LangOpts.AsmPreprocessor)
803       HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
804           RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
805           Filename, &FE->getFileEntry());
806     return FE;
807   }
808 
809   const FileEntry *CurFileEnt;
810   // Otherwise, see if this is a subframework header.  If so, this is relative
811   // to one of the headers on the #include stack.  Walk the list of the current
812   // headers on the #include stack and pass them to HeaderInfo.
813   if (IsFileLexer()) {
814     if ((CurFileEnt = CurPPLexer->getFileEntry())) {
815       if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader(
816               Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule,
817               SuggestedModule)) {
818         if (SuggestedModule && !LangOpts.AsmPreprocessor)
819           HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
820               RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
821               Filename, &FE->getFileEntry());
822         return FE;
823       }
824     }
825   }
826 
827   for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
828     if (IsFileLexer(ISEntry)) {
829       if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
830         if (Optional<FileEntryRef> FE = HeaderInfo.LookupSubframeworkHeader(
831                 Filename, CurFileEnt, SearchPath, RelativePath,
832                 RequestingModule, SuggestedModule)) {
833           if (SuggestedModule && !LangOpts.AsmPreprocessor)
834             HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
835                 RequestingModule, RequestingModuleIsModuleInterface,
836                 FilenameLoc, Filename, &FE->getFileEntry());
837           return FE;
838         }
839       }
840     }
841   }
842 
843   // Otherwise, we really couldn't find the file.
844   return None;
845 }
846 
847 //===----------------------------------------------------------------------===//
848 // Preprocessor Directive Handling.
849 //===----------------------------------------------------------------------===//
850 
851 class Preprocessor::ResetMacroExpansionHelper {
852 public:
853   ResetMacroExpansionHelper(Preprocessor *pp)
854     : PP(pp), save(pp->DisableMacroExpansion) {
855     if (pp->MacroExpansionInDirectivesOverride)
856       pp->DisableMacroExpansion = false;
857   }
858 
859   ~ResetMacroExpansionHelper() {
860     PP->DisableMacroExpansion = save;
861   }
862 
863 private:
864   Preprocessor *PP;
865   bool save;
866 };
867 
868 /// Process a directive while looking for the through header or a #pragma
869 /// hdrstop. The following directives are handled:
870 /// #include (to check if it is the through header)
871 /// #define (to warn about macros that don't match the PCH)
872 /// #pragma (to check for pragma hdrstop).
873 /// All other directives are completely discarded.
874 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
875                                                        SourceLocation HashLoc) {
876   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
877     if (II->getPPKeywordID() == tok::pp_define) {
878       return HandleDefineDirective(Result,
879                                    /*ImmediatelyAfterHeaderGuard=*/false);
880     }
881     if (SkippingUntilPCHThroughHeader &&
882         II->getPPKeywordID() == tok::pp_include) {
883       return HandleIncludeDirective(HashLoc, Result);
884     }
885     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
886       Lex(Result);
887       auto *II = Result.getIdentifierInfo();
888       if (II && II->getName() == "hdrstop")
889         return HandlePragmaHdrstop(Result);
890     }
891   }
892   DiscardUntilEndOfDirective();
893 }
894 
895 /// HandleDirective - This callback is invoked when the lexer sees a # token
896 /// at the start of a line.  This consumes the directive, modifies the
897 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
898 /// read is the correct one.
899 void Preprocessor::HandleDirective(Token &Result) {
900   // FIXME: Traditional: # with whitespace before it not recognized by K&R?
901 
902   // We just parsed a # character at the start of a line, so we're in directive
903   // mode.  Tell the lexer this so any newlines we see will be converted into an
904   // EOD token (which terminates the directive).
905   CurPPLexer->ParsingPreprocessorDirective = true;
906   if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
907 
908   bool ImmediatelyAfterTopLevelIfndef =
909       CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
910   CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
911 
912   ++NumDirectives;
913 
914   // We are about to read a token.  For the multiple-include optimization FA to
915   // work, we have to remember if we had read any tokens *before* this
916   // pp-directive.
917   bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
918 
919   // Save the '#' token in case we need to return it later.
920   Token SavedHash = Result;
921 
922   // Read the next token, the directive flavor.  This isn't expanded due to
923   // C99 6.10.3p8.
924   LexUnexpandedToken(Result);
925 
926   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
927   //   #define A(x) #x
928   //   A(abc
929   //     #warning blah
930   //   def)
931   // If so, the user is relying on undefined behavior, emit a diagnostic. Do
932   // not support this for #include-like directives, since that can result in
933   // terrible diagnostics, and does not work in GCC.
934   if (InMacroArgs) {
935     if (IdentifierInfo *II = Result.getIdentifierInfo()) {
936       switch (II->getPPKeywordID()) {
937       case tok::pp_include:
938       case tok::pp_import:
939       case tok::pp_include_next:
940       case tok::pp___include_macros:
941       case tok::pp_pragma:
942         Diag(Result, diag::err_embedded_directive) << II->getName();
943         Diag(*ArgMacro, diag::note_macro_expansion_here)
944             << ArgMacro->getIdentifierInfo();
945         DiscardUntilEndOfDirective();
946         return;
947       default:
948         break;
949       }
950     }
951     Diag(Result, diag::ext_embedded_directive);
952   }
953 
954   // Temporarily enable macro expansion if set so
955   // and reset to previous state when returning from this function.
956   ResetMacroExpansionHelper helper(this);
957 
958   if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
959     return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
960 
961   switch (Result.getKind()) {
962   case tok::eod:
963     return;   // null directive.
964   case tok::code_completion:
965     if (CodeComplete)
966       CodeComplete->CodeCompleteDirective(
967                                     CurPPLexer->getConditionalStackDepth() > 0);
968     setCodeCompletionReached();
969     return;
970   case tok::numeric_constant:  // # 7  GNU line marker directive.
971     if (getLangOpts().AsmPreprocessor)
972       break;  // # 4 is not a preprocessor directive in .S files.
973     return HandleDigitDirective(Result);
974   default:
975     IdentifierInfo *II = Result.getIdentifierInfo();
976     if (!II) break; // Not an identifier.
977 
978     // Ask what the preprocessor keyword ID is.
979     switch (II->getPPKeywordID()) {
980     default: break;
981     // C99 6.10.1 - Conditional Inclusion.
982     case tok::pp_if:
983       return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
984     case tok::pp_ifdef:
985       return HandleIfdefDirective(Result, SavedHash, false,
986                                   true /*not valid for miopt*/);
987     case tok::pp_ifndef:
988       return HandleIfdefDirective(Result, SavedHash, true,
989                                   ReadAnyTokensBeforeDirective);
990     case tok::pp_elif:
991       return HandleElifDirective(Result, SavedHash);
992     case tok::pp_else:
993       return HandleElseDirective(Result, SavedHash);
994     case tok::pp_endif:
995       return HandleEndifDirective(Result);
996 
997     // C99 6.10.2 - Source File Inclusion.
998     case tok::pp_include:
999       // Handle #include.
1000       return HandleIncludeDirective(SavedHash.getLocation(), Result);
1001     case tok::pp___include_macros:
1002       // Handle -imacros.
1003       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1004 
1005     // C99 6.10.3 - Macro Replacement.
1006     case tok::pp_define:
1007       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1008     case tok::pp_undef:
1009       return HandleUndefDirective();
1010 
1011     // C99 6.10.4 - Line Control.
1012     case tok::pp_line:
1013       return HandleLineDirective();
1014 
1015     // C99 6.10.5 - Error Directive.
1016     case tok::pp_error:
1017       return HandleUserDiagnosticDirective(Result, false);
1018 
1019     // C99 6.10.6 - Pragma Directive.
1020     case tok::pp_pragma:
1021       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1022 
1023     // GNU Extensions.
1024     case tok::pp_import:
1025       return HandleImportDirective(SavedHash.getLocation(), Result);
1026     case tok::pp_include_next:
1027       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1028 
1029     case tok::pp_warning:
1030       Diag(Result, diag::ext_pp_warning_directive);
1031       return HandleUserDiagnosticDirective(Result, true);
1032     case tok::pp_ident:
1033       return HandleIdentSCCSDirective(Result);
1034     case tok::pp_sccs:
1035       return HandleIdentSCCSDirective(Result);
1036     case tok::pp_assert:
1037       //isExtension = true;  // FIXME: implement #assert
1038       break;
1039     case tok::pp_unassert:
1040       //isExtension = true;  // FIXME: implement #unassert
1041       break;
1042 
1043     case tok::pp___public_macro:
1044       if (getLangOpts().Modules)
1045         return HandleMacroPublicDirective(Result);
1046       break;
1047 
1048     case tok::pp___private_macro:
1049       if (getLangOpts().Modules)
1050         return HandleMacroPrivateDirective();
1051       break;
1052     }
1053     break;
1054   }
1055 
1056   // If this is a .S file, treat unknown # directives as non-preprocessor
1057   // directives.  This is important because # may be a comment or introduce
1058   // various pseudo-ops.  Just return the # token and push back the following
1059   // token to be lexed next time.
1060   if (getLangOpts().AsmPreprocessor) {
1061     auto Toks = std::make_unique<Token[]>(2);
1062     // Return the # and the token after it.
1063     Toks[0] = SavedHash;
1064     Toks[1] = Result;
1065 
1066     // If the second token is a hashhash token, then we need to translate it to
1067     // unknown so the token lexer doesn't try to perform token pasting.
1068     if (Result.is(tok::hashhash))
1069       Toks[1].setKind(tok::unknown);
1070 
1071     // Enter this token stream so that we re-lex the tokens.  Make sure to
1072     // enable macro expansion, in case the token after the # is an identifier
1073     // that is expanded.
1074     EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1075     return;
1076   }
1077 
1078   // If we reached here, the preprocessing token is not valid!
1079   Diag(Result, diag::err_pp_invalid_directive);
1080 
1081   // Read the rest of the PP line.
1082   DiscardUntilEndOfDirective();
1083 
1084   // Okay, we're done parsing the directive.
1085 }
1086 
1087 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1088 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
1089 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1090                          unsigned DiagID, Preprocessor &PP,
1091                          bool IsGNULineDirective=false) {
1092   if (DigitTok.isNot(tok::numeric_constant)) {
1093     PP.Diag(DigitTok, DiagID);
1094 
1095     if (DigitTok.isNot(tok::eod))
1096       PP.DiscardUntilEndOfDirective();
1097     return true;
1098   }
1099 
1100   SmallString<64> IntegerBuffer;
1101   IntegerBuffer.resize(DigitTok.getLength());
1102   const char *DigitTokBegin = &IntegerBuffer[0];
1103   bool Invalid = false;
1104   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1105   if (Invalid)
1106     return true;
1107 
1108   // Verify that we have a simple digit-sequence, and compute the value.  This
1109   // is always a simple digit string computed in decimal, so we do this manually
1110   // here.
1111   Val = 0;
1112   for (unsigned i = 0; i != ActualLength; ++i) {
1113     // C++1y [lex.fcon]p1:
1114     //   Optional separating single quotes in a digit-sequence are ignored
1115     if (DigitTokBegin[i] == '\'')
1116       continue;
1117 
1118     if (!isDigit(DigitTokBegin[i])) {
1119       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1120               diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1121       PP.DiscardUntilEndOfDirective();
1122       return true;
1123     }
1124 
1125     unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1126     if (NextVal < Val) { // overflow.
1127       PP.Diag(DigitTok, DiagID);
1128       PP.DiscardUntilEndOfDirective();
1129       return true;
1130     }
1131     Val = NextVal;
1132   }
1133 
1134   if (DigitTokBegin[0] == '0' && Val)
1135     PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1136       << IsGNULineDirective;
1137 
1138   return false;
1139 }
1140 
1141 /// Handle a \#line directive: C99 6.10.4.
1142 ///
1143 /// The two acceptable forms are:
1144 /// \verbatim
1145 ///   # line digit-sequence
1146 ///   # line digit-sequence "s-char-sequence"
1147 /// \endverbatim
1148 void Preprocessor::HandleLineDirective() {
1149   // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1150   // expanded.
1151   Token DigitTok;
1152   Lex(DigitTok);
1153 
1154   // Validate the number and convert it to an unsigned.
1155   unsigned LineNo;
1156   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1157     return;
1158 
1159   if (LineNo == 0)
1160     Diag(DigitTok, diag::ext_pp_line_zero);
1161 
1162   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1163   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1164   unsigned LineLimit = 32768U;
1165   if (LangOpts.C99 || LangOpts.CPlusPlus11)
1166     LineLimit = 2147483648U;
1167   if (LineNo >= LineLimit)
1168     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1169   else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1170     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1171 
1172   int FilenameID = -1;
1173   Token StrTok;
1174   Lex(StrTok);
1175 
1176   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1177   // string followed by eod.
1178   if (StrTok.is(tok::eod))
1179     ; // ok
1180   else if (StrTok.isNot(tok::string_literal)) {
1181     Diag(StrTok, diag::err_pp_line_invalid_filename);
1182     DiscardUntilEndOfDirective();
1183     return;
1184   } else if (StrTok.hasUDSuffix()) {
1185     Diag(StrTok, diag::err_invalid_string_udl);
1186     DiscardUntilEndOfDirective();
1187     return;
1188   } else {
1189     // Parse and validate the string, converting it into a unique ID.
1190     StringLiteralParser Literal(StrTok, *this);
1191     assert(Literal.isAscii() && "Didn't allow wide strings in");
1192     if (Literal.hadError) {
1193       DiscardUntilEndOfDirective();
1194       return;
1195     }
1196     if (Literal.Pascal) {
1197       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1198       DiscardUntilEndOfDirective();
1199       return;
1200     }
1201     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1202 
1203     // Verify that there is nothing after the string, other than EOD.  Because
1204     // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1205     CheckEndOfDirective("line", true);
1206   }
1207 
1208   // Take the file kind of the file containing the #line directive. #line
1209   // directives are often used for generated sources from the same codebase, so
1210   // the new file should generally be classified the same way as the current
1211   // file. This is visible in GCC's pre-processed output, which rewrites #line
1212   // to GNU line markers.
1213   SrcMgr::CharacteristicKind FileKind =
1214       SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1215 
1216   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1217                         false, FileKind);
1218 
1219   if (Callbacks)
1220     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1221                            PPCallbacks::RenameFile, FileKind);
1222 }
1223 
1224 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1225 /// marker directive.
1226 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1227                                 SrcMgr::CharacteristicKind &FileKind,
1228                                 Preprocessor &PP) {
1229   unsigned FlagVal;
1230   Token FlagTok;
1231   PP.Lex(FlagTok);
1232   if (FlagTok.is(tok::eod)) return false;
1233   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1234     return true;
1235 
1236   if (FlagVal == 1) {
1237     IsFileEntry = true;
1238 
1239     PP.Lex(FlagTok);
1240     if (FlagTok.is(tok::eod)) return false;
1241     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1242       return true;
1243   } else if (FlagVal == 2) {
1244     IsFileExit = true;
1245 
1246     SourceManager &SM = PP.getSourceManager();
1247     // If we are leaving the current presumed file, check to make sure the
1248     // presumed include stack isn't empty!
1249     FileID CurFileID =
1250       SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1251     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1252     if (PLoc.isInvalid())
1253       return true;
1254 
1255     // If there is no include loc (main file) or if the include loc is in a
1256     // different physical file, then we aren't in a "1" line marker flag region.
1257     SourceLocation IncLoc = PLoc.getIncludeLoc();
1258     if (IncLoc.isInvalid() ||
1259         SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1260       PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1261       PP.DiscardUntilEndOfDirective();
1262       return true;
1263     }
1264 
1265     PP.Lex(FlagTok);
1266     if (FlagTok.is(tok::eod)) return false;
1267     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1268       return true;
1269   }
1270 
1271   // We must have 3 if there are still flags.
1272   if (FlagVal != 3) {
1273     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1274     PP.DiscardUntilEndOfDirective();
1275     return true;
1276   }
1277 
1278   FileKind = SrcMgr::C_System;
1279 
1280   PP.Lex(FlagTok);
1281   if (FlagTok.is(tok::eod)) return false;
1282   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1283     return true;
1284 
1285   // We must have 4 if there is yet another flag.
1286   if (FlagVal != 4) {
1287     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1288     PP.DiscardUntilEndOfDirective();
1289     return true;
1290   }
1291 
1292   FileKind = SrcMgr::C_ExternCSystem;
1293 
1294   PP.Lex(FlagTok);
1295   if (FlagTok.is(tok::eod)) return false;
1296 
1297   // There are no more valid flags here.
1298   PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1299   PP.DiscardUntilEndOfDirective();
1300   return true;
1301 }
1302 
1303 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1304 /// one of the following forms:
1305 ///
1306 ///     # 42
1307 ///     # 42 "file" ('1' | '2')?
1308 ///     # 42 "file" ('1' | '2')? '3' '4'?
1309 ///
1310 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1311   // Validate the number and convert it to an unsigned.  GNU does not have a
1312   // line # limit other than it fit in 32-bits.
1313   unsigned LineNo;
1314   if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1315                    *this, true))
1316     return;
1317 
1318   Token StrTok;
1319   Lex(StrTok);
1320 
1321   bool IsFileEntry = false, IsFileExit = false;
1322   int FilenameID = -1;
1323   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1324 
1325   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1326   // string followed by eod.
1327   if (StrTok.is(tok::eod)) {
1328     // Treat this like "#line NN", which doesn't change file characteristics.
1329     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1330   } else if (StrTok.isNot(tok::string_literal)) {
1331     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1332     DiscardUntilEndOfDirective();
1333     return;
1334   } else if (StrTok.hasUDSuffix()) {
1335     Diag(StrTok, diag::err_invalid_string_udl);
1336     DiscardUntilEndOfDirective();
1337     return;
1338   } else {
1339     // Parse and validate the string, converting it into a unique ID.
1340     StringLiteralParser Literal(StrTok, *this);
1341     assert(Literal.isAscii() && "Didn't allow wide strings in");
1342     if (Literal.hadError) {
1343       DiscardUntilEndOfDirective();
1344       return;
1345     }
1346     if (Literal.Pascal) {
1347       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1348       DiscardUntilEndOfDirective();
1349       return;
1350     }
1351     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1352 
1353     // If a filename was present, read any flags that are present.
1354     if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1355       return;
1356   }
1357 
1358   // Create a line note with this information.
1359   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1360                         IsFileExit, FileKind);
1361 
1362   // If the preprocessor has callbacks installed, notify them of the #line
1363   // change.  This is used so that the line marker comes out in -E mode for
1364   // example.
1365   if (Callbacks) {
1366     PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1367     if (IsFileEntry)
1368       Reason = PPCallbacks::EnterFile;
1369     else if (IsFileExit)
1370       Reason = PPCallbacks::ExitFile;
1371 
1372     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1373   }
1374 }
1375 
1376 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1377 ///
1378 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1379                                                  bool isWarning) {
1380   // Read the rest of the line raw.  We do this because we don't want macros
1381   // to be expanded and we don't require that the tokens be valid preprocessing
1382   // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1383   // collapse multiple consecutive white space between tokens, but this isn't
1384   // specified by the standard.
1385   SmallString<128> Message;
1386   CurLexer->ReadToEndOfLine(&Message);
1387 
1388   // Find the first non-whitespace character, so that we can make the
1389   // diagnostic more succinct.
1390   StringRef Msg = StringRef(Message).ltrim(' ');
1391 
1392   if (isWarning)
1393     Diag(Tok, diag::pp_hash_warning) << Msg;
1394   else
1395     Diag(Tok, diag::err_pp_hash_error) << Msg;
1396 }
1397 
1398 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1399 ///
1400 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1401   // Yes, this directive is an extension.
1402   Diag(Tok, diag::ext_pp_ident_directive);
1403 
1404   // Read the string argument.
1405   Token StrTok;
1406   Lex(StrTok);
1407 
1408   // If the token kind isn't a string, it's a malformed directive.
1409   if (StrTok.isNot(tok::string_literal) &&
1410       StrTok.isNot(tok::wide_string_literal)) {
1411     Diag(StrTok, diag::err_pp_malformed_ident);
1412     if (StrTok.isNot(tok::eod))
1413       DiscardUntilEndOfDirective();
1414     return;
1415   }
1416 
1417   if (StrTok.hasUDSuffix()) {
1418     Diag(StrTok, diag::err_invalid_string_udl);
1419     DiscardUntilEndOfDirective();
1420     return;
1421   }
1422 
1423   // Verify that there is nothing after the string, other than EOD.
1424   CheckEndOfDirective("ident");
1425 
1426   if (Callbacks) {
1427     bool Invalid = false;
1428     std::string Str = getSpelling(StrTok, &Invalid);
1429     if (!Invalid)
1430       Callbacks->Ident(Tok.getLocation(), Str);
1431   }
1432 }
1433 
1434 /// Handle a #public directive.
1435 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1436   Token MacroNameTok;
1437   ReadMacroName(MacroNameTok, MU_Undef);
1438 
1439   // Error reading macro name?  If so, diagnostic already issued.
1440   if (MacroNameTok.is(tok::eod))
1441     return;
1442 
1443   // Check to see if this is the last token on the #__public_macro line.
1444   CheckEndOfDirective("__public_macro");
1445 
1446   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1447   // Okay, we finally have a valid identifier to undef.
1448   MacroDirective *MD = getLocalMacroDirective(II);
1449 
1450   // If the macro is not defined, this is an error.
1451   if (!MD) {
1452     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1453     return;
1454   }
1455 
1456   // Note that this macro has now been exported.
1457   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1458                                 MacroNameTok.getLocation(), /*isPublic=*/true));
1459 }
1460 
1461 /// Handle a #private directive.
1462 void Preprocessor::HandleMacroPrivateDirective() {
1463   Token MacroNameTok;
1464   ReadMacroName(MacroNameTok, MU_Undef);
1465 
1466   // Error reading macro name?  If so, diagnostic already issued.
1467   if (MacroNameTok.is(tok::eod))
1468     return;
1469 
1470   // Check to see if this is the last token on the #__private_macro line.
1471   CheckEndOfDirective("__private_macro");
1472 
1473   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1474   // Okay, we finally have a valid identifier to undef.
1475   MacroDirective *MD = getLocalMacroDirective(II);
1476 
1477   // If the macro is not defined, this is an error.
1478   if (!MD) {
1479     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1480     return;
1481   }
1482 
1483   // Note that this macro has now been marked private.
1484   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1485                                MacroNameTok.getLocation(), /*isPublic=*/false));
1486 }
1487 
1488 //===----------------------------------------------------------------------===//
1489 // Preprocessor Include Directive Handling.
1490 //===----------------------------------------------------------------------===//
1491 
1492 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1493 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1494 /// true if the input filename was in <>'s or false if it were in ""'s.  The
1495 /// caller is expected to provide a buffer that is large enough to hold the
1496 /// spelling of the filename, but is also expected to handle the case when
1497 /// this method decides to use a different buffer.
1498 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1499                                               StringRef &Buffer) {
1500   // Get the text form of the filename.
1501   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1502 
1503   // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1504   // C++20 [lex.header]/2:
1505   //
1506   // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1507   //   in C: behavior is undefined
1508   //   in C++: program is conditionally-supported with implementation-defined
1509   //           semantics
1510 
1511   // Make sure the filename is <x> or "x".
1512   bool isAngled;
1513   if (Buffer[0] == '<') {
1514     if (Buffer.back() != '>') {
1515       Diag(Loc, diag::err_pp_expects_filename);
1516       Buffer = StringRef();
1517       return true;
1518     }
1519     isAngled = true;
1520   } else if (Buffer[0] == '"') {
1521     if (Buffer.back() != '"') {
1522       Diag(Loc, diag::err_pp_expects_filename);
1523       Buffer = StringRef();
1524       return true;
1525     }
1526     isAngled = false;
1527   } else {
1528     Diag(Loc, diag::err_pp_expects_filename);
1529     Buffer = StringRef();
1530     return true;
1531   }
1532 
1533   // Diagnose #include "" as invalid.
1534   if (Buffer.size() <= 2) {
1535     Diag(Loc, diag::err_pp_empty_filename);
1536     Buffer = StringRef();
1537     return true;
1538   }
1539 
1540   // Skip the brackets.
1541   Buffer = Buffer.substr(1, Buffer.size()-2);
1542   return isAngled;
1543 }
1544 
1545 /// Push a token onto the token stream containing an annotation.
1546 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1547                                         tok::TokenKind Kind,
1548                                         void *AnnotationVal) {
1549   // FIXME: Produce this as the current token directly, rather than
1550   // allocating a new token for it.
1551   auto Tok = std::make_unique<Token[]>(1);
1552   Tok[0].startToken();
1553   Tok[0].setKind(Kind);
1554   Tok[0].setLocation(Range.getBegin());
1555   Tok[0].setAnnotationEndLoc(Range.getEnd());
1556   Tok[0].setAnnotationValue(AnnotationVal);
1557   EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1558 }
1559 
1560 /// Produce a diagnostic informing the user that a #include or similar
1561 /// was implicitly treated as a module import.
1562 static void diagnoseAutoModuleImport(
1563     Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1564     ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1565     SourceLocation PathEnd) {
1566   StringRef ImportKeyword;
1567   if (PP.getLangOpts().ObjC)
1568     ImportKeyword = "@import";
1569   else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules)
1570     ImportKeyword = "import";
1571   else
1572     return; // no import syntax available
1573 
1574   SmallString<128> PathString;
1575   for (size_t I = 0, N = Path.size(); I != N; ++I) {
1576     if (I)
1577       PathString += '.';
1578     PathString += Path[I].first->getName();
1579   }
1580   int IncludeKind = 0;
1581 
1582   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1583   case tok::pp_include:
1584     IncludeKind = 0;
1585     break;
1586 
1587   case tok::pp_import:
1588     IncludeKind = 1;
1589     break;
1590 
1591   case tok::pp_include_next:
1592     IncludeKind = 2;
1593     break;
1594 
1595   case tok::pp___include_macros:
1596     IncludeKind = 3;
1597     break;
1598 
1599   default:
1600     llvm_unreachable("unknown include directive kind");
1601   }
1602 
1603   CharSourceRange ReplaceRange(SourceRange(HashLoc, PathEnd),
1604                                /*IsTokenRange=*/false);
1605   PP.Diag(HashLoc, diag::warn_auto_module_import)
1606       << IncludeKind << PathString
1607       << FixItHint::CreateReplacement(
1608              ReplaceRange, (ImportKeyword + " " + PathString + ";").str());
1609 }
1610 
1611 // Given a vector of path components and a string containing the real
1612 // path to the file, build a properly-cased replacement in the vector,
1613 // and return true if the replacement should be suggested.
1614 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1615                             StringRef RealPathName) {
1616   auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1617   auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1618   int Cnt = 0;
1619   bool SuggestReplacement = false;
1620   // Below is a best-effort to handle ".." in paths. It is admittedly
1621   // not 100% correct in the presence of symlinks.
1622   for (auto &Component : llvm::reverse(Components)) {
1623     if ("." == Component) {
1624     } else if (".." == Component) {
1625       ++Cnt;
1626     } else if (Cnt) {
1627       --Cnt;
1628     } else if (RealPathComponentIter != RealPathComponentEnd) {
1629       if (Component != *RealPathComponentIter) {
1630         // If these path components differ by more than just case, then we
1631         // may be looking at symlinked paths. Bail on this diagnostic to avoid
1632         // noisy false positives.
1633         SuggestReplacement = RealPathComponentIter->equals_lower(Component);
1634         if (!SuggestReplacement)
1635           break;
1636         Component = *RealPathComponentIter;
1637       }
1638       ++RealPathComponentIter;
1639     }
1640   }
1641   return SuggestReplacement;
1642 }
1643 
1644 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1645                                           const TargetInfo &TargetInfo,
1646                                           DiagnosticsEngine &Diags, Module *M) {
1647   Module::Requirement Requirement;
1648   Module::UnresolvedHeaderDirective MissingHeader;
1649   Module *ShadowingModule = nullptr;
1650   if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1651                      ShadowingModule))
1652     return false;
1653 
1654   if (MissingHeader.FileNameLoc.isValid()) {
1655     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1656         << MissingHeader.IsUmbrella << MissingHeader.FileName;
1657   } else if (ShadowingModule) {
1658     Diags.Report(M->DefinitionLoc, diag::err_module_shadowed) << M->Name;
1659     Diags.Report(ShadowingModule->DefinitionLoc,
1660                  diag::note_previous_definition);
1661   } else {
1662     // FIXME: Track the location at which the requirement was specified, and
1663     // use it here.
1664     Diags.Report(M->DefinitionLoc, diag::err_module_unavailable)
1665         << M->getFullModuleName() << Requirement.second << Requirement.first;
1666   }
1667   return true;
1668 }
1669 
1670 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
1671 /// the file to be included from the lexer, then include it!  This is a common
1672 /// routine with functionality shared between \#include, \#include_next and
1673 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
1674 /// specifies the file to start searching from.
1675 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
1676                                           Token &IncludeTok,
1677                                           const DirectoryLookup *LookupFrom,
1678                                           const FileEntry *LookupFromFile) {
1679   Token FilenameTok;
1680   if (LexHeaderName(FilenameTok))
1681     return;
1682 
1683   if (FilenameTok.isNot(tok::header_name)) {
1684     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
1685     if (FilenameTok.isNot(tok::eod))
1686       DiscardUntilEndOfDirective();
1687     return;
1688   }
1689 
1690   // Verify that there is nothing after the filename, other than EOD.  Note
1691   // that we allow macros that expand to nothing after the filename, because
1692   // this falls into the category of "#include pp-tokens new-line" specified
1693   // in C99 6.10.2p4.
1694   SourceLocation EndLoc =
1695       CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
1696 
1697   auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
1698                                             EndLoc, LookupFrom, LookupFromFile);
1699   switch (Action.Kind) {
1700   case ImportAction::None:
1701   case ImportAction::SkippedModuleImport:
1702     break;
1703   case ImportAction::ModuleBegin:
1704     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
1705                          tok::annot_module_begin, Action.ModuleForHeader);
1706     break;
1707   case ImportAction::ModuleImport:
1708     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
1709                          tok::annot_module_include, Action.ModuleForHeader);
1710     break;
1711   case ImportAction::Failure:
1712     assert(TheModuleLoader.HadFatalFailure &&
1713            "This should be an early exit only to a fatal error");
1714     TheModuleLoader.HadFatalFailure = true;
1715     IncludeTok.setKind(tok::eof);
1716     CurLexer->cutOffLexing();
1717     return;
1718   }
1719 }
1720 
1721 Optional<FileEntryRef> Preprocessor::LookupHeaderIncludeOrImport(
1722     const DirectoryLookup *&CurDir, StringRef& Filename,
1723     SourceLocation FilenameLoc, CharSourceRange FilenameRange,
1724     const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
1725     bool &IsMapped, const DirectoryLookup *LookupFrom,
1726     const FileEntry *LookupFromFile, StringRef& LookupFilename,
1727     SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
1728     ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
1729   Optional<FileEntryRef> File = LookupFile(
1730       FilenameLoc, LookupFilename,
1731       isAngled, LookupFrom, LookupFromFile, CurDir,
1732       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
1733       &SuggestedModule, &IsMapped, &IsFrameworkFound);
1734   if (File)
1735     return File;
1736 
1737   if (Callbacks) {
1738     // Give the clients a chance to recover.
1739     SmallString<128> RecoveryPath;
1740     if (Callbacks->FileNotFound(Filename, RecoveryPath)) {
1741       if (auto DE = FileMgr.getOptionalDirectoryRef(RecoveryPath)) {
1742         // Add the recovery path to the list of search paths.
1743         DirectoryLookup DL(*DE, SrcMgr::C_User, false);
1744         HeaderInfo.AddSearchPath(DL, isAngled);
1745 
1746         // Try the lookup again, skipping the cache.
1747         Optional<FileEntryRef> File = LookupFile(
1748             FilenameLoc,
1749             LookupFilename, isAngled,
1750             LookupFrom, LookupFromFile, CurDir, nullptr, nullptr,
1751             &SuggestedModule, &IsMapped, /*IsFrameworkFound=*/nullptr,
1752             /*SkipCache*/ true);
1753         if (File)
1754           return File;
1755       }
1756     }
1757   }
1758 
1759   if (SuppressIncludeNotFoundError)
1760     return None;
1761 
1762   // If the file could not be located and it was included via angle
1763   // brackets, we can attempt a lookup as though it were a quoted path to
1764   // provide the user with a possible fixit.
1765   if (isAngled) {
1766     Optional<FileEntryRef> File = LookupFile(
1767         FilenameLoc, LookupFilename,
1768         false, LookupFrom, LookupFromFile, CurDir,
1769         Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
1770         &SuggestedModule, &IsMapped,
1771         /*IsFrameworkFound=*/nullptr);
1772     if (File) {
1773       Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
1774           << Filename << IsImportDecl
1775           << FixItHint::CreateReplacement(FilenameRange,
1776                                           "\"" + Filename.str() + "\"");
1777       return File;
1778     }
1779   }
1780 
1781   // Check for likely typos due to leading or trailing non-isAlphanumeric
1782   // characters
1783   StringRef OriginalFilename = Filename;
1784   if (LangOpts.SpellChecking) {
1785     // A heuristic to correct a typo file name by removing leading and
1786     // trailing non-isAlphanumeric characters.
1787     auto CorrectTypoFilename = [](llvm::StringRef Filename) {
1788       Filename = Filename.drop_until(isAlphanumeric);
1789       while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
1790         Filename = Filename.drop_back();
1791       }
1792       return Filename;
1793     };
1794     StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
1795     StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
1796 
1797     Optional<FileEntryRef> File = LookupFile(
1798         FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom, LookupFromFile,
1799         CurDir, Callbacks ? &SearchPath : nullptr,
1800         Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
1801         /*IsFrameworkFound=*/nullptr);
1802     if (File) {
1803       auto Hint =
1804           isAngled ? FixItHint::CreateReplacement(
1805                          FilenameRange, "<" + TypoCorrectionName.str() + ">")
1806                    : FixItHint::CreateReplacement(
1807                          FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
1808       Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
1809           << OriginalFilename << TypoCorrectionName << Hint;
1810       // We found the file, so set the Filename to the name after typo
1811       // correction.
1812       Filename = TypoCorrectionName;
1813       LookupFilename = TypoCorrectionLookupName;
1814       return File;
1815     }
1816   }
1817 
1818   // If the file is still not found, just go with the vanilla diagnostic
1819   assert(!File.hasValue() && "expected missing file");
1820   Diag(FilenameTok, diag::err_pp_file_not_found)
1821       << OriginalFilename << FilenameRange;
1822   if (IsFrameworkFound) {
1823     size_t SlashPos = OriginalFilename.find('/');
1824     assert(SlashPos != StringRef::npos &&
1825            "Include with framework name should have '/' in the filename");
1826     StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
1827     FrameworkCacheEntry &CacheEntry =
1828         HeaderInfo.LookupFrameworkCache(FrameworkName);
1829     assert(CacheEntry.Directory && "Found framework should be in cache");
1830     Diag(FilenameTok, diag::note_pp_framework_without_header)
1831         << OriginalFilename.substr(SlashPos + 1) << FrameworkName
1832         << CacheEntry.Directory->getName();
1833   }
1834 
1835   return None;
1836 }
1837 
1838 /// Handle either a #include-like directive or an import declaration that names
1839 /// a header file.
1840 ///
1841 /// \param HashLoc The location of the '#' token for an include, or
1842 ///        SourceLocation() for an import declaration.
1843 /// \param IncludeTok The include / include_next / import token.
1844 /// \param FilenameTok The header-name token.
1845 /// \param EndLoc The location at which any imported macros become visible.
1846 /// \param LookupFrom For #include_next, the starting directory for the
1847 ///        directory lookup.
1848 /// \param LookupFromFile For #include_next, the starting file for the directory
1849 ///        lookup.
1850 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
1851     SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
1852     SourceLocation EndLoc, const DirectoryLookup *LookupFrom,
1853     const FileEntry *LookupFromFile) {
1854   SmallString<128> FilenameBuffer;
1855   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
1856   SourceLocation CharEnd = FilenameTok.getEndLoc();
1857 
1858   CharSourceRange FilenameRange
1859     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
1860   StringRef OriginalFilename = Filename;
1861   bool isAngled =
1862     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
1863 
1864   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
1865   // error.
1866   if (Filename.empty())
1867     return {ImportAction::None};
1868 
1869   bool IsImportDecl = HashLoc.isInvalid();
1870   SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
1871 
1872   // Complain about attempts to #include files in an audit pragma.
1873   if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
1874     Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
1875     Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
1876 
1877     // Immediately leave the pragma.
1878     PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
1879   }
1880 
1881   // Complain about attempts to #include files in an assume-nonnull pragma.
1882   if (PragmaAssumeNonNullLoc.isValid()) {
1883     Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
1884     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
1885 
1886     // Immediately leave the pragma.
1887     PragmaAssumeNonNullLoc = SourceLocation();
1888   }
1889 
1890   if (HeaderInfo.HasIncludeAliasMap()) {
1891     // Map the filename with the brackets still attached.  If the name doesn't
1892     // map to anything, fall back on the filename we've already gotten the
1893     // spelling for.
1894     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
1895     if (!NewName.empty())
1896       Filename = NewName;
1897   }
1898 
1899   // Search include directories.
1900   bool IsMapped = false;
1901   bool IsFrameworkFound = false;
1902   const DirectoryLookup *CurDir;
1903   SmallString<1024> SearchPath;
1904   SmallString<1024> RelativePath;
1905   // We get the raw path only if we have 'Callbacks' to which we later pass
1906   // the path.
1907   ModuleMap::KnownHeader SuggestedModule;
1908   SourceLocation FilenameLoc = FilenameTok.getLocation();
1909   StringRef LookupFilename = Filename;
1910 
1911 #ifdef _WIN32
1912   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::windows;
1913 #else
1914   // Normalize slashes when compiling with -fms-extensions on non-Windows. This
1915   // is unnecessary on Windows since the filesystem there handles backslashes.
1916   SmallString<128> NormalizedPath;
1917   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::posix;
1918   if (LangOpts.MicrosoftExt) {
1919     NormalizedPath = Filename.str();
1920     llvm::sys::path::native(NormalizedPath);
1921     LookupFilename = NormalizedPath;
1922     BackslashStyle = llvm::sys::path::Style::windows;
1923   }
1924 #endif
1925 
1926   Optional<FileEntryRef> File = LookupHeaderIncludeOrImport(
1927       CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
1928       IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
1929       LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
1930 
1931   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
1932     if (File && isPCHThroughHeader(&File->getFileEntry()))
1933       SkippingUntilPCHThroughHeader = false;
1934     return {ImportAction::None};
1935   }
1936 
1937   // Should we enter the source file? Set to Skip if either the source file is
1938   // known to have no effect beyond its effect on module visibility -- that is,
1939   // if it's got an include guard that is already defined, set to Import if it
1940   // is a modular header we've already built and should import.
1941   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
1942 
1943   if (PPOpts->SingleFileParseMode)
1944     Action = IncludeLimitReached;
1945 
1946   // If we've reached the max allowed include depth, it is usually due to an
1947   // include cycle. Don't enter already processed files again as it can lead to
1948   // reaching the max allowed include depth again.
1949   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
1950       HeaderInfo.getFileInfo(&File->getFileEntry()).NumIncludes)
1951     Action = IncludeLimitReached;
1952 
1953   // Determine whether we should try to import the module for this #include, if
1954   // there is one. Don't do so if precompiled module support is disabled or we
1955   // are processing this module textually (because we're building the module).
1956   if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
1957       !isForModuleBuilding(SuggestedModule.getModule(),
1958                            getLangOpts().CurrentModule,
1959                            getLangOpts().ModuleName)) {
1960     // If this include corresponds to a module but that module is
1961     // unavailable, diagnose the situation and bail out.
1962     // FIXME: Remove this; loadModule does the same check (but produces
1963     // slightly worse diagnostics).
1964     if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), getDiagnostics(),
1965                                SuggestedModule.getModule())) {
1966       Diag(FilenameTok.getLocation(),
1967            diag::note_implicit_top_level_module_import_here)
1968           << SuggestedModule.getModule()->getTopLevelModuleName();
1969       return {ImportAction::None};
1970     }
1971 
1972     // Compute the module access path corresponding to this module.
1973     // FIXME: Should we have a second loadModule() overload to avoid this
1974     // extra lookup step?
1975     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
1976     for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
1977       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
1978                                     FilenameTok.getLocation()));
1979     std::reverse(Path.begin(), Path.end());
1980 
1981     // Warn that we're replacing the include/import with a module import.
1982     if (!IsImportDecl)
1983       diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
1984 
1985     // Load the module to import its macros. We'll make the declarations
1986     // visible when the parser gets here.
1987     // FIXME: Pass SuggestedModule in here rather than converting it to a path
1988     // and making the module loader convert it back again.
1989     ModuleLoadResult Imported = TheModuleLoader.loadModule(
1990         IncludeTok.getLocation(), Path, Module::Hidden,
1991         /*IsInclusionDirective=*/true);
1992     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
1993            "the imported module is different than the suggested one");
1994 
1995     if (Imported) {
1996       Action = Import;
1997     } else if (Imported.isMissingExpected()) {
1998       // We failed to find a submodule that we assumed would exist (because it
1999       // was in the directory of an umbrella header, for instance), but no
2000       // actual module containing it exists (because the umbrella header is
2001       // incomplete).  Treat this as a textual inclusion.
2002       SuggestedModule = ModuleMap::KnownHeader();
2003     } else if (Imported.isConfigMismatch()) {
2004       // On a configuration mismatch, enter the header textually. We still know
2005       // that it's part of the corresponding module.
2006     } else {
2007       // We hit an error processing the import. Bail out.
2008       if (hadModuleLoaderFatalFailure()) {
2009         // With a fatal failure in the module loader, we abort parsing.
2010         Token &Result = IncludeTok;
2011         assert(CurLexer && "#include but no current lexer set!");
2012         Result.startToken();
2013         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2014         CurLexer->cutOffLexing();
2015       }
2016       return {ImportAction::None};
2017     }
2018   }
2019 
2020   // The #included file will be considered to be a system header if either it is
2021   // in a system include directory, or if the #includer is a system include
2022   // header.
2023   SrcMgr::CharacteristicKind FileCharacter =
2024       SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2025   if (File)
2026     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(&File->getFileEntry()),
2027                              FileCharacter);
2028 
2029   // If this is a '#import' or an import-declaration, don't re-enter the file.
2030   //
2031   // FIXME: If we have a suggested module for a '#include', and we've already
2032   // visited this file, don't bother entering it again. We know it has no
2033   // further effect.
2034   bool EnterOnce =
2035       IsImportDecl ||
2036       IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2037 
2038   // Ask HeaderInfo if we should enter this #include file.  If not, #including
2039   // this file will have no effect.
2040   if (Action == Enter && File &&
2041       !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
2042                                          EnterOnce, getLangOpts().Modules,
2043                                          SuggestedModule.getModule())) {
2044     // Even if we've already preprocessed this header once and know that we
2045     // don't need to see its contents again, we still need to import it if it's
2046     // modular because we might not have imported it from this submodule before.
2047     //
2048     // FIXME: We don't do this when compiling a PCH because the AST
2049     // serialization layer can't cope with it. This means we get local
2050     // submodule visibility semantics wrong in that case.
2051     Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
2052   }
2053 
2054   // Check for circular inclusion of the main file.
2055   // We can't generate a consistent preamble with regard to the conditional
2056   // stack if the main file is included again as due to the preamble bounds
2057   // some directives (e.g. #endif of a header guard) will never be seen.
2058   // Since this will lead to confusing errors, avoid the inclusion.
2059   if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2060       SourceMgr.isMainFile(*File)) {
2061     Diag(FilenameTok.getLocation(),
2062          diag::err_pp_including_mainfile_in_preamble);
2063     return {ImportAction::None};
2064   }
2065 
2066   if (Callbacks && !IsImportDecl) {
2067     // Notify the callback object that we've seen an inclusion directive.
2068     // FIXME: Use a different callback for a pp-import?
2069     Callbacks->InclusionDirective(
2070         HashLoc, IncludeTok, LookupFilename, isAngled, FilenameRange,
2071         File ? &File->getFileEntry() : nullptr, SearchPath, RelativePath,
2072         Action == Import ? SuggestedModule.getModule() : nullptr,
2073         FileCharacter);
2074     if (Action == Skip && File)
2075       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2076   }
2077 
2078   if (!File)
2079     return {ImportAction::None};
2080 
2081   // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2082   // module corresponding to the named header.
2083   if (IsImportDecl && !SuggestedModule) {
2084     Diag(FilenameTok, diag::err_header_import_not_header_unit)
2085       << OriginalFilename << File->getName();
2086     return {ImportAction::None};
2087   }
2088 
2089   // Issue a diagnostic if the name of the file on disk has a different case
2090   // than the one we're about to open.
2091   const bool CheckIncludePathPortability =
2092       !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2093 
2094   if (CheckIncludePathPortability) {
2095     StringRef Name = LookupFilename;
2096     StringRef NameWithoriginalSlashes = Filename;
2097 #if defined(_WIN32)
2098     // Skip UNC prefix if present. (tryGetRealPathName() always
2099     // returns a path with the prefix skipped.)
2100     bool NameWasUNC = Name.consume_front("\\\\?\\");
2101     NameWithoriginalSlashes.consume_front("\\\\?\\");
2102 #endif
2103     StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2104     SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2105                                           llvm::sys::path::end(Name));
2106 #if defined(_WIN32)
2107     // -Wnonportable-include-path is designed to diagnose includes using
2108     // case even on systems with a case-insensitive file system.
2109     // On Windows, RealPathName always starts with an upper-case drive
2110     // letter for absolute paths, but Name might start with either
2111     // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2112     // ("foo" will always have on-disk case, no matter which case was
2113     // used in the cd command). To not emit this warning solely for
2114     // the drive letter, whose case is dependent on if `cd` is used
2115     // with upper- or lower-case drive letters, always consider the
2116     // given drive letter case as correct for the purpose of this warning.
2117     SmallString<128> FixedDriveRealPath;
2118     if (llvm::sys::path::is_absolute(Name) &&
2119         llvm::sys::path::is_absolute(RealPathName) &&
2120         toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2121         isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2122       assert(Components.size() >= 3 && "should have drive, backslash, name");
2123       assert(Components[0].size() == 2 && "should start with drive");
2124       assert(Components[0][1] == ':' && "should have colon");
2125       FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2126       RealPathName = FixedDriveRealPath;
2127     }
2128 #endif
2129 
2130     if (trySimplifyPath(Components, RealPathName)) {
2131       SmallString<128> Path;
2132       Path.reserve(Name.size()+2);
2133       Path.push_back(isAngled ? '<' : '"');
2134 
2135       const auto IsSep = [BackslashStyle](char c) {
2136         return llvm::sys::path::is_separator(c, BackslashStyle);
2137       };
2138 
2139       for (auto Component : Components) {
2140         // On POSIX, Components will contain a single '/' as first element
2141         // exactly if Name is an absolute path.
2142         // On Windows, it will contain "C:" followed by '\' for absolute paths.
2143         // The drive letter is optional for absolute paths on Windows, but
2144         // clang currently cannot process absolute paths in #include lines that
2145         // don't have a drive.
2146         // If the first entry in Components is a directory separator,
2147         // then the code at the bottom of this loop that keeps the original
2148         // directory separator style copies it. If the second entry is
2149         // a directory separator (the C:\ case), then that separator already
2150         // got copied when the C: was processed and we want to skip that entry.
2151         if (!(Component.size() == 1 && IsSep(Component[0])))
2152           Path.append(Component);
2153         else if (!Path.empty())
2154           continue;
2155 
2156         // Append the separator(s) the user used, or the close quote
2157         if (Path.size() > NameWithoriginalSlashes.size()) {
2158           Path.push_back(isAngled ? '>' : '"');
2159           continue;
2160         }
2161         assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2162         do
2163           Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2164         while (Path.size() <= NameWithoriginalSlashes.size() &&
2165                IsSep(NameWithoriginalSlashes[Path.size()-1]));
2166       }
2167 
2168 #if defined(_WIN32)
2169       // Restore UNC prefix if it was there.
2170       if (NameWasUNC)
2171         Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2172 #endif
2173 
2174       // For user files and known standard headers, issue a diagnostic.
2175       // For other system headers, don't. They can be controlled separately.
2176       auto DiagId =
2177           (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2178               ? diag::pp_nonportable_path
2179               : diag::pp_nonportable_system_path;
2180       Diag(FilenameTok, DiagId) << Path <<
2181         FixItHint::CreateReplacement(FilenameRange, Path);
2182     }
2183   }
2184 
2185   switch (Action) {
2186   case Skip:
2187     // If we don't need to enter the file, stop now.
2188     if (Module *M = SuggestedModule.getModule())
2189       return {ImportAction::SkippedModuleImport, M};
2190     return {ImportAction::None};
2191 
2192   case IncludeLimitReached:
2193     // If we reached our include limit and don't want to enter any more files,
2194     // don't go any further.
2195     return {ImportAction::None};
2196 
2197   case Import: {
2198     // If this is a module import, make it visible if needed.
2199     Module *M = SuggestedModule.getModule();
2200     assert(M && "no module to import");
2201 
2202     makeModuleVisible(M, EndLoc);
2203 
2204     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2205         tok::pp___include_macros)
2206       return {ImportAction::None};
2207 
2208     return {ImportAction::ModuleImport, M};
2209   }
2210 
2211   case Enter:
2212     break;
2213   }
2214 
2215   // Check that we don't have infinite #include recursion.
2216   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2217     Diag(FilenameTok, diag::err_pp_include_too_deep);
2218     HasReachedMaxIncludeDepth = true;
2219     return {ImportAction::None};
2220   }
2221 
2222   // Look up the file, create a File ID for it.
2223   SourceLocation IncludePos = FilenameTok.getLocation();
2224   // If the filename string was the result of macro expansions, set the include
2225   // position on the file where it will be included and after the expansions.
2226   if (IncludePos.isMacroID())
2227     IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2228   FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2229   if (!FID.isValid()) {
2230     TheModuleLoader.HadFatalFailure = true;
2231     return ImportAction::Failure;
2232   }
2233 
2234   // If all is good, enter the new file!
2235   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
2236     return {ImportAction::None};
2237 
2238   // Determine if we're switching to building a new submodule, and which one.
2239   if (auto *M = SuggestedModule.getModule()) {
2240     if (M->getTopLevelModule()->ShadowingModule) {
2241       // We are building a submodule that belongs to a shadowed module. This
2242       // means we find header files in the shadowed module.
2243       Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
2244         << M->getFullModuleName();
2245       Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2246            diag::note_previous_definition);
2247       return {ImportAction::None};
2248     }
2249     // When building a pch, -fmodule-name tells the compiler to textually
2250     // include headers in the specified module. We are not building the
2251     // specified module.
2252     //
2253     // FIXME: This is the wrong way to handle this. We should produce a PCH
2254     // that behaves the same as the header would behave in a compilation using
2255     // that PCH, which means we should enter the submodule. We need to teach
2256     // the AST serialization layer to deal with the resulting AST.
2257     if (getLangOpts().CompilingPCH &&
2258         isForModuleBuilding(M, getLangOpts().CurrentModule,
2259                             getLangOpts().ModuleName))
2260       return {ImportAction::None};
2261 
2262     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2263     CurLexerSubmodule = M;
2264 
2265     // Let the macro handling code know that any future macros are within
2266     // the new submodule.
2267     EnterSubmodule(M, EndLoc, /*ForPragma*/false);
2268 
2269     // Let the parser know that any future declarations are within the new
2270     // submodule.
2271     // FIXME: There's no point doing this if we're handling a #__include_macros
2272     // directive.
2273     return {ImportAction::ModuleBegin, M};
2274   }
2275 
2276   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2277   return {ImportAction::None};
2278 }
2279 
2280 /// HandleIncludeNextDirective - Implements \#include_next.
2281 ///
2282 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2283                                               Token &IncludeNextTok) {
2284   Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2285 
2286   // #include_next is like #include, except that we start searching after
2287   // the current found directory.  If we can't do this, issue a
2288   // diagnostic.
2289   const DirectoryLookup *Lookup = CurDirLookup;
2290   const FileEntry *LookupFromFile = nullptr;
2291   if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2292     // If the main file is a header, then it's either for PCH/AST generation,
2293     // or libclang opened it. Either way, handle it as a normal include below
2294     // and do not complain about include_next.
2295   } else if (isInPrimaryFile()) {
2296     Lookup = nullptr;
2297     Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2298   } else if (CurLexerSubmodule) {
2299     // Start looking up in the directory *after* the one in which the current
2300     // file would be found, if any.
2301     assert(CurPPLexer && "#include_next directive in macro?");
2302     LookupFromFile = CurPPLexer->getFileEntry();
2303     Lookup = nullptr;
2304   } else if (!Lookup) {
2305     // The current file was not found by walking the include path. Either it
2306     // is the primary file (handled above), or it was found by absolute path,
2307     // or it was found relative to such a file.
2308     // FIXME: Track enough information so we know which case we're in.
2309     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2310   } else {
2311     // Start looking up in the next directory.
2312     ++Lookup;
2313   }
2314 
2315   return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2316                                 LookupFromFile);
2317 }
2318 
2319 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2320 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2321   // The Microsoft #import directive takes a type library and generates header
2322   // files from it, and includes those.  This is beyond the scope of what clang
2323   // does, so we ignore it and error out.  However, #import can optionally have
2324   // trailing attributes that span multiple lines.  We're going to eat those
2325   // so we can continue processing from there.
2326   Diag(Tok, diag::err_pp_import_directive_ms );
2327 
2328   // Read tokens until we get to the end of the directive.  Note that the
2329   // directive can be split over multiple lines using the backslash character.
2330   DiscardUntilEndOfDirective();
2331 }
2332 
2333 /// HandleImportDirective - Implements \#import.
2334 ///
2335 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2336                                          Token &ImportTok) {
2337   if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2338     if (LangOpts.MSVCCompat)
2339       return HandleMicrosoftImportDirective(ImportTok);
2340     Diag(ImportTok, diag::ext_pp_import_directive);
2341   }
2342   return HandleIncludeDirective(HashLoc, ImportTok);
2343 }
2344 
2345 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2346 /// pseudo directive in the predefines buffer.  This handles it by sucking all
2347 /// tokens through the preprocessor and discarding them (only keeping the side
2348 /// effects on the preprocessor).
2349 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2350                                                 Token &IncludeMacrosTok) {
2351   // This directive should only occur in the predefines buffer.  If not, emit an
2352   // error and reject it.
2353   SourceLocation Loc = IncludeMacrosTok.getLocation();
2354   if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2355     Diag(IncludeMacrosTok.getLocation(),
2356          diag::pp_include_macros_out_of_predefines);
2357     DiscardUntilEndOfDirective();
2358     return;
2359   }
2360 
2361   // Treat this as a normal #include for checking purposes.  If this is
2362   // successful, it will push a new lexer onto the include stack.
2363   HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2364 
2365   Token TmpTok;
2366   do {
2367     Lex(TmpTok);
2368     assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2369   } while (TmpTok.isNot(tok::hashhash));
2370 }
2371 
2372 //===----------------------------------------------------------------------===//
2373 // Preprocessor Macro Directive Handling.
2374 //===----------------------------------------------------------------------===//
2375 
2376 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2377 /// definition has just been read.  Lex the rest of the parameters and the
2378 /// closing ), updating MI with what we learn.  Return true if an error occurs
2379 /// parsing the param list.
2380 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2381   SmallVector<IdentifierInfo*, 32> Parameters;
2382 
2383   while (true) {
2384     LexUnexpandedToken(Tok);
2385     switch (Tok.getKind()) {
2386     case tok::r_paren:
2387       // Found the end of the parameter list.
2388       if (Parameters.empty())  // #define FOO()
2389         return false;
2390       // Otherwise we have #define FOO(A,)
2391       Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2392       return true;
2393     case tok::ellipsis:  // #define X(... -> C99 varargs
2394       if (!LangOpts.C99)
2395         Diag(Tok, LangOpts.CPlusPlus11 ?
2396              diag::warn_cxx98_compat_variadic_macro :
2397              diag::ext_variadic_macro);
2398 
2399       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2400       if (LangOpts.OpenCL) {
2401         Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2402       }
2403 
2404       // Lex the token after the identifier.
2405       LexUnexpandedToken(Tok);
2406       if (Tok.isNot(tok::r_paren)) {
2407         Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2408         return true;
2409       }
2410       // Add the __VA_ARGS__ identifier as a parameter.
2411       Parameters.push_back(Ident__VA_ARGS__);
2412       MI->setIsC99Varargs();
2413       MI->setParameterList(Parameters, BP);
2414       return false;
2415     case tok::eod:  // #define X(
2416       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2417       return true;
2418     default:
2419       // Handle keywords and identifiers here to accept things like
2420       // #define Foo(for) for.
2421       IdentifierInfo *II = Tok.getIdentifierInfo();
2422       if (!II) {
2423         // #define X(1
2424         Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2425         return true;
2426       }
2427 
2428       // If this is already used as a parameter, it is used multiple times (e.g.
2429       // #define X(A,A.
2430       if (llvm::find(Parameters, II) != Parameters.end()) { // C99 6.10.3p6
2431         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2432         return true;
2433       }
2434 
2435       // Add the parameter to the macro info.
2436       Parameters.push_back(II);
2437 
2438       // Lex the token after the identifier.
2439       LexUnexpandedToken(Tok);
2440 
2441       switch (Tok.getKind()) {
2442       default:          // #define X(A B
2443         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2444         return true;
2445       case tok::r_paren: // #define X(A)
2446         MI->setParameterList(Parameters, BP);
2447         return false;
2448       case tok::comma:  // #define X(A,
2449         break;
2450       case tok::ellipsis:  // #define X(A... -> GCC extension
2451         // Diagnose extension.
2452         Diag(Tok, diag::ext_named_variadic_macro);
2453 
2454         // Lex the token after the identifier.
2455         LexUnexpandedToken(Tok);
2456         if (Tok.isNot(tok::r_paren)) {
2457           Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2458           return true;
2459         }
2460 
2461         MI->setIsGNUVarargs();
2462         MI->setParameterList(Parameters, BP);
2463         return false;
2464       }
2465     }
2466   }
2467 }
2468 
2469 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2470                                    const LangOptions &LOptions) {
2471   if (MI->getNumTokens() == 1) {
2472     const Token &Value = MI->getReplacementToken(0);
2473 
2474     // Macro that is identity, like '#define inline inline' is a valid pattern.
2475     if (MacroName.getKind() == Value.getKind())
2476       return true;
2477 
2478     // Macro that maps a keyword to the same keyword decorated with leading/
2479     // trailing underscores is a valid pattern:
2480     //    #define inline __inline
2481     //    #define inline __inline__
2482     //    #define inline _inline (in MS compatibility mode)
2483     StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2484     if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2485       if (!II->isKeyword(LOptions))
2486         return false;
2487       StringRef ValueText = II->getName();
2488       StringRef TrimmedValue = ValueText;
2489       if (!ValueText.startswith("__")) {
2490         if (ValueText.startswith("_"))
2491           TrimmedValue = TrimmedValue.drop_front(1);
2492         else
2493           return false;
2494       } else {
2495         TrimmedValue = TrimmedValue.drop_front(2);
2496         if (TrimmedValue.endswith("__"))
2497           TrimmedValue = TrimmedValue.drop_back(2);
2498       }
2499       return TrimmedValue.equals(MacroText);
2500     } else {
2501       return false;
2502     }
2503   }
2504 
2505   // #define inline
2506   return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2507                            tok::kw_const) &&
2508          MI->getNumTokens() == 0;
2509 }
2510 
2511 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2512 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2513 // doing so performs certain validity checks including (but not limited to):
2514 //   - # (stringization) is followed by a macro parameter
2515 //
2516 //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2517 //  a pointer to a MacroInfo object.
2518 
2519 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2520     const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2521 
2522   Token LastTok = MacroNameTok;
2523   // Create the new macro.
2524   MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2525 
2526   Token Tok;
2527   LexUnexpandedToken(Tok);
2528 
2529   // Ensure we consume the rest of the macro body if errors occur.
2530   auto _ = llvm::make_scope_exit([&]() {
2531     // The flag indicates if we are still waiting for 'eod'.
2532     if (CurLexer->ParsingPreprocessorDirective)
2533       DiscardUntilEndOfDirective();
2534   });
2535 
2536   // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2537   // within their appropriate context.
2538   VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2539 
2540   // If this is a function-like macro definition, parse the argument list,
2541   // marking each of the identifiers as being used as macro arguments.  Also,
2542   // check other constraints on the first token of the macro body.
2543   if (Tok.is(tok::eod)) {
2544     if (ImmediatelyAfterHeaderGuard) {
2545       // Save this macro information since it may part of a header guard.
2546       CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2547                                         MacroNameTok.getLocation());
2548     }
2549     // If there is no body to this macro, we have no special handling here.
2550   } else if (Tok.hasLeadingSpace()) {
2551     // This is a normal token with leading space.  Clear the leading space
2552     // marker on the first token to get proper expansion.
2553     Tok.clearFlag(Token::LeadingSpace);
2554   } else if (Tok.is(tok::l_paren)) {
2555     // This is a function-like macro definition.  Read the argument list.
2556     MI->setIsFunctionLike();
2557     if (ReadMacroParameterList(MI, LastTok))
2558       return nullptr;
2559 
2560     // If this is a definition of an ISO C/C++ variadic function-like macro (not
2561     // using the GNU named varargs extension) inform our variadic scope guard
2562     // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2563     // allowed only within the definition of a variadic macro.
2564 
2565     if (MI->isC99Varargs()) {
2566       VariadicMacroScopeGuard.enterScope();
2567     }
2568 
2569     // Read the first token after the arg list for down below.
2570     LexUnexpandedToken(Tok);
2571   } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2572     // C99 requires whitespace between the macro definition and the body.  Emit
2573     // a diagnostic for something like "#define X+".
2574     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2575   } else {
2576     // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2577     // first character of a replacement list is not a character required by
2578     // subclause 5.2.1, then there shall be white-space separation between the
2579     // identifier and the replacement list.".  5.2.1 lists this set:
2580     //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2581     // is irrelevant here.
2582     bool isInvalid = false;
2583     if (Tok.is(tok::at)) // @ is not in the list above.
2584       isInvalid = true;
2585     else if (Tok.is(tok::unknown)) {
2586       // If we have an unknown token, it is something strange like "`".  Since
2587       // all of valid characters would have lexed into a single character
2588       // token of some sort, we know this is not a valid case.
2589       isInvalid = true;
2590     }
2591     if (isInvalid)
2592       Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2593     else
2594       Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2595   }
2596 
2597   if (!Tok.is(tok::eod))
2598     LastTok = Tok;
2599 
2600   // Read the rest of the macro body.
2601   if (MI->isObjectLike()) {
2602     // Object-like macros are very simple, just read their body.
2603     while (Tok.isNot(tok::eod)) {
2604       LastTok = Tok;
2605       MI->AddTokenToBody(Tok);
2606       // Get the next token of the macro.
2607       LexUnexpandedToken(Tok);
2608     }
2609   } else {
2610     // Otherwise, read the body of a function-like macro.  While we are at it,
2611     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
2612     // parameters in function-like macro expansions.
2613 
2614     VAOptDefinitionContext VAOCtx(*this);
2615 
2616     while (Tok.isNot(tok::eod)) {
2617       LastTok = Tok;
2618 
2619       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
2620         MI->AddTokenToBody(Tok);
2621 
2622         if (VAOCtx.isVAOptToken(Tok)) {
2623           // If we're already within a VAOPT, emit an error.
2624           if (VAOCtx.isInVAOpt()) {
2625             Diag(Tok, diag::err_pp_vaopt_nested_use);
2626             return nullptr;
2627           }
2628           // Ensure VAOPT is followed by a '(' .
2629           LexUnexpandedToken(Tok);
2630           if (Tok.isNot(tok::l_paren)) {
2631             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
2632             return nullptr;
2633           }
2634           MI->AddTokenToBody(Tok);
2635           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
2636           LexUnexpandedToken(Tok);
2637           if (Tok.is(tok::hashhash)) {
2638             Diag(Tok, diag::err_vaopt_paste_at_start);
2639             return nullptr;
2640           }
2641           continue;
2642         } else if (VAOCtx.isInVAOpt()) {
2643           if (Tok.is(tok::r_paren)) {
2644             if (VAOCtx.sawClosingParen()) {
2645               const unsigned NumTokens = MI->getNumTokens();
2646               assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( "
2647                                        "and a subsequent tok::r_paren");
2648               if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) {
2649                 Diag(Tok, diag::err_vaopt_paste_at_end);
2650                 return nullptr;
2651               }
2652             }
2653           } else if (Tok.is(tok::l_paren)) {
2654             VAOCtx.sawOpeningParen(Tok.getLocation());
2655           }
2656         }
2657         // Get the next token of the macro.
2658         LexUnexpandedToken(Tok);
2659         continue;
2660       }
2661 
2662       // If we're in -traditional mode, then we should ignore stringification
2663       // and token pasting. Mark the tokens as unknown so as not to confuse
2664       // things.
2665       if (getLangOpts().TraditionalCPP) {
2666         Tok.setKind(tok::unknown);
2667         MI->AddTokenToBody(Tok);
2668 
2669         // Get the next token of the macro.
2670         LexUnexpandedToken(Tok);
2671         continue;
2672       }
2673 
2674       if (Tok.is(tok::hashhash)) {
2675         // If we see token pasting, check if it looks like the gcc comma
2676         // pasting extension.  We'll use this information to suppress
2677         // diagnostics later on.
2678 
2679         // Get the next token of the macro.
2680         LexUnexpandedToken(Tok);
2681 
2682         if (Tok.is(tok::eod)) {
2683           MI->AddTokenToBody(LastTok);
2684           break;
2685         }
2686 
2687         unsigned NumTokens = MI->getNumTokens();
2688         if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
2689             MI->getReplacementToken(NumTokens-1).is(tok::comma))
2690           MI->setHasCommaPasting();
2691 
2692         // Things look ok, add the '##' token to the macro.
2693         MI->AddTokenToBody(LastTok);
2694         continue;
2695       }
2696 
2697       // Our Token is a stringization operator.
2698       // Get the next token of the macro.
2699       LexUnexpandedToken(Tok);
2700 
2701       // Check for a valid macro arg identifier or __VA_OPT__.
2702       if (!VAOCtx.isVAOptToken(Tok) &&
2703           (Tok.getIdentifierInfo() == nullptr ||
2704            MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
2705 
2706         // If this is assembler-with-cpp mode, we accept random gibberish after
2707         // the '#' because '#' is often a comment character.  However, change
2708         // the kind of the token to tok::unknown so that the preprocessor isn't
2709         // confused.
2710         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
2711           LastTok.setKind(tok::unknown);
2712           MI->AddTokenToBody(LastTok);
2713           continue;
2714         } else {
2715           Diag(Tok, diag::err_pp_stringize_not_parameter)
2716             << LastTok.is(tok::hashat);
2717           return nullptr;
2718         }
2719       }
2720 
2721       // Things look ok, add the '#' and param name tokens to the macro.
2722       MI->AddTokenToBody(LastTok);
2723 
2724       // If the token following '#' is VAOPT, let the next iteration handle it
2725       // and check it for correctness, otherwise add the token and prime the
2726       // loop with the next one.
2727       if (!VAOCtx.isVAOptToken(Tok)) {
2728         MI->AddTokenToBody(Tok);
2729         LastTok = Tok;
2730 
2731         // Get the next token of the macro.
2732         LexUnexpandedToken(Tok);
2733       }
2734     }
2735     if (VAOCtx.isInVAOpt()) {
2736       assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
2737       Diag(Tok, diag::err_pp_expected_after)
2738         << LastTok.getKind() << tok::r_paren;
2739       Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
2740       return nullptr;
2741     }
2742   }
2743   MI->setDefinitionEndLoc(LastTok.getLocation());
2744   return MI;
2745 }
2746 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
2747 /// line then lets the caller lex the next real token.
2748 void Preprocessor::HandleDefineDirective(
2749     Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
2750   ++NumDefined;
2751 
2752   Token MacroNameTok;
2753   bool MacroShadowsKeyword;
2754   ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
2755 
2756   // Error reading macro name?  If so, diagnostic already issued.
2757   if (MacroNameTok.is(tok::eod))
2758     return;
2759 
2760   // If we are supposed to keep comments in #defines, reenable comment saving
2761   // mode.
2762   if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
2763 
2764   MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
2765       MacroNameTok, ImmediatelyAfterHeaderGuard);
2766 
2767   if (!MI) return;
2768 
2769   if (MacroShadowsKeyword &&
2770       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
2771     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
2772   }
2773   // Check that there is no paste (##) operator at the beginning or end of the
2774   // replacement list.
2775   unsigned NumTokens = MI->getNumTokens();
2776   if (NumTokens != 0) {
2777     if (MI->getReplacementToken(0).is(tok::hashhash)) {
2778       Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
2779       return;
2780     }
2781     if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
2782       Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
2783       return;
2784     }
2785   }
2786 
2787   // When skipping just warn about macros that do not match.
2788   if (SkippingUntilPCHThroughHeader) {
2789     const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
2790     if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
2791                              /*Syntactic=*/LangOpts.MicrosoftExt))
2792       Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
2793           << MacroNameTok.getIdentifierInfo();
2794     // Issue the diagnostic but allow the change if msvc extensions are enabled
2795     if (!LangOpts.MicrosoftExt)
2796       return;
2797   }
2798 
2799   // Finally, if this identifier already had a macro defined for it, verify that
2800   // the macro bodies are identical, and issue diagnostics if they are not.
2801   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
2802     // In Objective-C, ignore attempts to directly redefine the builtin
2803     // definitions of the ownership qualifiers.  It's still possible to
2804     // #undef them.
2805     auto isObjCProtectedMacro = [](const IdentifierInfo *II) -> bool {
2806       return II->isStr("__strong") ||
2807              II->isStr("__weak") ||
2808              II->isStr("__unsafe_unretained") ||
2809              II->isStr("__autoreleasing");
2810     };
2811    if (getLangOpts().ObjC &&
2812         SourceMgr.getFileID(OtherMI->getDefinitionLoc())
2813           == getPredefinesFileID() &&
2814         isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
2815       // Warn if it changes the tokens.
2816       if ((!getDiagnostics().getSuppressSystemWarnings() ||
2817            !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
2818           !MI->isIdenticalTo(*OtherMI, *this,
2819                              /*Syntactic=*/LangOpts.MicrosoftExt)) {
2820         Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
2821       }
2822       assert(!OtherMI->isWarnIfUnused());
2823       return;
2824     }
2825 
2826     // It is very common for system headers to have tons of macro redefinitions
2827     // and for warnings to be disabled in system headers.  If this is the case,
2828     // then don't bother calling MacroInfo::isIdenticalTo.
2829     if (!getDiagnostics().getSuppressSystemWarnings() ||
2830         !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
2831       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
2832         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
2833 
2834       // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
2835       // C++ [cpp.predefined]p4, but allow it as an extension.
2836       if (OtherMI->isBuiltinMacro())
2837         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
2838       // Macros must be identical.  This means all tokens and whitespace
2839       // separation must be the same.  C99 6.10.3p2.
2840       else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
2841                !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
2842         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
2843           << MacroNameTok.getIdentifierInfo();
2844         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
2845       }
2846     }
2847     if (OtherMI->isWarnIfUnused())
2848       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
2849   }
2850 
2851   DefMacroDirective *MD =
2852       appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
2853 
2854   assert(!MI->isUsed());
2855   // If we need warning for not using the macro, add its location in the
2856   // warn-because-unused-macro set. If it gets used it will be removed from set.
2857   if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
2858       !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
2859       !MacroExpansionInDirectivesOverride &&
2860       getSourceManager().getFileID(MI->getDefinitionLoc()) !=
2861           getPredefinesFileID()) {
2862     MI->setIsWarnIfUnused(true);
2863     WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
2864   }
2865 
2866   // If the callbacks want to know, tell them about the macro definition.
2867   if (Callbacks)
2868     Callbacks->MacroDefined(MacroNameTok, MD);
2869 }
2870 
2871 /// HandleUndefDirective - Implements \#undef.
2872 ///
2873 void Preprocessor::HandleUndefDirective() {
2874   ++NumUndefined;
2875 
2876   Token MacroNameTok;
2877   ReadMacroName(MacroNameTok, MU_Undef);
2878 
2879   // Error reading macro name?  If so, diagnostic already issued.
2880   if (MacroNameTok.is(tok::eod))
2881     return;
2882 
2883   // Check to see if this is the last token on the #undef line.
2884   CheckEndOfDirective("undef");
2885 
2886   // Okay, we have a valid identifier to undef.
2887   auto *II = MacroNameTok.getIdentifierInfo();
2888   auto MD = getMacroDefinition(II);
2889   UndefMacroDirective *Undef = nullptr;
2890 
2891   // If the macro is not defined, this is a noop undef.
2892   if (const MacroInfo *MI = MD.getMacroInfo()) {
2893     if (!MI->isUsed() && MI->isWarnIfUnused())
2894       Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
2895 
2896     if (MI->isWarnIfUnused())
2897       WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
2898 
2899     Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
2900   }
2901 
2902   // If the callbacks want to know, tell them about the macro #undef.
2903   // Note: no matter if the macro was defined or not.
2904   if (Callbacks)
2905     Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
2906 
2907   if (Undef)
2908     appendMacroDirective(II, Undef);
2909 }
2910 
2911 //===----------------------------------------------------------------------===//
2912 // Preprocessor Conditional Directive Handling.
2913 //===----------------------------------------------------------------------===//
2914 
2915 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
2916 /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
2917 /// true if any tokens have been returned or pp-directives activated before this
2918 /// \#ifndef has been lexed.
2919 ///
2920 void Preprocessor::HandleIfdefDirective(Token &Result,
2921                                         const Token &HashToken,
2922                                         bool isIfndef,
2923                                         bool ReadAnyTokensBeforeDirective) {
2924   ++NumIf;
2925   Token DirectiveTok = Result;
2926 
2927   Token MacroNameTok;
2928   ReadMacroName(MacroNameTok);
2929 
2930   // Error reading macro name?  If so, diagnostic already issued.
2931   if (MacroNameTok.is(tok::eod)) {
2932     // Skip code until we get to #endif.  This helps with recovery by not
2933     // emitting an error when the #endif is reached.
2934     SkipExcludedConditionalBlock(HashToken.getLocation(),
2935                                  DirectiveTok.getLocation(),
2936                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
2937     return;
2938   }
2939 
2940   // Check to see if this is the last token on the #if[n]def line.
2941   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
2942 
2943   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
2944   auto MD = getMacroDefinition(MII);
2945   MacroInfo *MI = MD.getMacroInfo();
2946 
2947   if (CurPPLexer->getConditionalStackDepth() == 0) {
2948     // If the start of a top-level #ifdef and if the macro is not defined,
2949     // inform MIOpt that this might be the start of a proper include guard.
2950     // Otherwise it is some other form of unknown conditional which we can't
2951     // handle.
2952     if (!ReadAnyTokensBeforeDirective && !MI) {
2953       assert(isIfndef && "#ifdef shouldn't reach here");
2954       CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
2955     } else
2956       CurPPLexer->MIOpt.EnterTopLevelConditional();
2957   }
2958 
2959   // If there is a macro, process it.
2960   if (MI)  // Mark it used.
2961     markMacroAsUsed(MI);
2962 
2963   if (Callbacks) {
2964     if (isIfndef)
2965       Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
2966     else
2967       Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
2968   }
2969 
2970   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
2971     getSourceManager().isInMainFile(DirectiveTok.getLocation());
2972 
2973   // Should we include the stuff contained by this directive?
2974   if (PPOpts->SingleFileParseMode && !MI) {
2975     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
2976     // the directive blocks.
2977     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
2978                                      /*wasskip*/false, /*foundnonskip*/false,
2979                                      /*foundelse*/false);
2980   } else if (!MI == isIfndef || RetainExcludedCB) {
2981     // Yes, remember that we are inside a conditional, then lex the next token.
2982     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
2983                                      /*wasskip*/false, /*foundnonskip*/true,
2984                                      /*foundelse*/false);
2985   } else {
2986     // No, skip the contents of this block.
2987     SkipExcludedConditionalBlock(HashToken.getLocation(),
2988                                  DirectiveTok.getLocation(),
2989                                  /*Foundnonskip*/ false,
2990                                  /*FoundElse*/ false);
2991   }
2992 }
2993 
2994 /// HandleIfDirective - Implements the \#if directive.
2995 ///
2996 void Preprocessor::HandleIfDirective(Token &IfToken,
2997                                      const Token &HashToken,
2998                                      bool ReadAnyTokensBeforeDirective) {
2999   ++NumIf;
3000 
3001   // Parse and evaluate the conditional expression.
3002   IdentifierInfo *IfNDefMacro = nullptr;
3003   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3004   const bool ConditionalTrue = DER.Conditional;
3005 
3006   // If this condition is equivalent to #ifndef X, and if this is the first
3007   // directive seen, handle it for the multiple-include optimization.
3008   if (CurPPLexer->getConditionalStackDepth() == 0) {
3009     if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3010       // FIXME: Pass in the location of the macro name, not the 'if' token.
3011       CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3012     else
3013       CurPPLexer->MIOpt.EnterTopLevelConditional();
3014   }
3015 
3016   if (Callbacks)
3017     Callbacks->If(
3018         IfToken.getLocation(), DER.ExprRange,
3019         (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3020 
3021   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3022     getSourceManager().isInMainFile(IfToken.getLocation());
3023 
3024   // Should we include the stuff contained by this directive?
3025   if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3026     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3027     // the directive blocks.
3028     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3029                                      /*foundnonskip*/false, /*foundelse*/false);
3030   } else if (ConditionalTrue || RetainExcludedCB) {
3031     // Yes, remember that we are inside a conditional, then lex the next token.
3032     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3033                                    /*foundnonskip*/true, /*foundelse*/false);
3034   } else {
3035     // No, skip the contents of this block.
3036     SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3037                                  /*Foundnonskip*/ false,
3038                                  /*FoundElse*/ false);
3039   }
3040 }
3041 
3042 /// HandleEndifDirective - Implements the \#endif directive.
3043 ///
3044 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3045   ++NumEndif;
3046 
3047   // Check that this is the whole directive.
3048   CheckEndOfDirective("endif");
3049 
3050   PPConditionalInfo CondInfo;
3051   if (CurPPLexer->popConditionalLevel(CondInfo)) {
3052     // No conditionals on the stack: this is an #endif without an #if.
3053     Diag(EndifToken, diag::err_pp_endif_without_if);
3054     return;
3055   }
3056 
3057   // If this the end of a top-level #endif, inform MIOpt.
3058   if (CurPPLexer->getConditionalStackDepth() == 0)
3059     CurPPLexer->MIOpt.ExitTopLevelConditional();
3060 
3061   assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3062          "This code should only be reachable in the non-skipping case!");
3063 
3064   if (Callbacks)
3065     Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3066 }
3067 
3068 /// HandleElseDirective - Implements the \#else directive.
3069 ///
3070 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3071   ++NumElse;
3072 
3073   // #else directive in a non-skipping conditional... start skipping.
3074   CheckEndOfDirective("else");
3075 
3076   PPConditionalInfo CI;
3077   if (CurPPLexer->popConditionalLevel(CI)) {
3078     Diag(Result, diag::pp_err_else_without_if);
3079     return;
3080   }
3081 
3082   // If this is a top-level #else, inform the MIOpt.
3083   if (CurPPLexer->getConditionalStackDepth() == 0)
3084     CurPPLexer->MIOpt.EnterTopLevelConditional();
3085 
3086   // If this is a #else with a #else before it, report the error.
3087   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3088 
3089   if (Callbacks)
3090     Callbacks->Else(Result.getLocation(), CI.IfLoc);
3091 
3092   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3093     getSourceManager().isInMainFile(Result.getLocation());
3094 
3095   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3096     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3097     // the directive blocks.
3098     CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3099                                      /*foundnonskip*/false, /*foundelse*/true);
3100     return;
3101   }
3102 
3103   // Finally, skip the rest of the contents of this block.
3104   SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3105                                /*Foundnonskip*/ true,
3106                                /*FoundElse*/ true, Result.getLocation());
3107 }
3108 
3109 /// HandleElifDirective - Implements the \#elif directive.
3110 ///
3111 void Preprocessor::HandleElifDirective(Token &ElifToken,
3112                                        const Token &HashToken) {
3113   ++NumElse;
3114 
3115   // #elif directive in a non-skipping conditional... start skipping.
3116   // We don't care what the condition is, because we will always skip it (since
3117   // the block immediately before it was included).
3118   SourceRange ConditionRange = DiscardUntilEndOfDirective();
3119 
3120   PPConditionalInfo CI;
3121   if (CurPPLexer->popConditionalLevel(CI)) {
3122     Diag(ElifToken, diag::pp_err_elif_without_if);
3123     return;
3124   }
3125 
3126   // If this is a top-level #elif, inform the MIOpt.
3127   if (CurPPLexer->getConditionalStackDepth() == 0)
3128     CurPPLexer->MIOpt.EnterTopLevelConditional();
3129 
3130   // If this is a #elif with a #else before it, report the error.
3131   if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
3132 
3133   if (Callbacks)
3134     Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3135                     PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3136 
3137   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3138     getSourceManager().isInMainFile(ElifToken.getLocation());
3139 
3140   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3141     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3142     // the directive blocks.
3143     CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3144                                      /*foundnonskip*/false, /*foundelse*/false);
3145     return;
3146   }
3147 
3148   // Finally, skip the rest of the contents of this block.
3149   SkipExcludedConditionalBlock(
3150       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3151       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3152 }
3153