xref: /freebsd/contrib/llvm-project/clang/lib/Lex/PPMacroExpansion.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- PPMacroExpansion.cpp - Top level Macro Expansion -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the top level handling of macro expansion for the
10 // preprocessor.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/AttributeCommonInfo.h"
15 #include "clang/Basic/Attributes.h"
16 #include "clang/Basic/Builtins.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/LangOptions.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/TargetInfo.h"
22 #include "clang/Lex/CodeCompletionHandler.h"
23 #include "clang/Lex/DirectoryLookup.h"
24 #include "clang/Lex/ExternalPreprocessorSource.h"
25 #include "clang/Lex/HeaderSearch.h"
26 #include "clang/Lex/LexDiagnostic.h"
27 #include "clang/Lex/LiteralSupport.h"
28 #include "clang/Lex/MacroArgs.h"
29 #include "clang/Lex/MacroInfo.h"
30 #include "clang/Lex/Preprocessor.h"
31 #include "clang/Lex/PreprocessorLexer.h"
32 #include "clang/Lex/PreprocessorOptions.h"
33 #include "clang/Lex/Token.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/DenseSet.h"
37 #include "llvm/ADT/FoldingSet.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/StringRef.h"
41 #include "llvm/ADT/StringSwitch.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/Format.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <cstddef>
49 #include <cstring>
50 #include <ctime>
51 #include <iomanip>
52 #include <optional>
53 #include <sstream>
54 #include <string>
55 #include <tuple>
56 #include <utility>
57 
58 using namespace clang;
59 
60 MacroDirective *
getLocalMacroDirectiveHistory(const IdentifierInfo * II) const61 Preprocessor::getLocalMacroDirectiveHistory(const IdentifierInfo *II) const {
62   if (!II->hadMacroDefinition())
63     return nullptr;
64   auto Pos = CurSubmoduleState->Macros.find(II);
65   return Pos == CurSubmoduleState->Macros.end() ? nullptr
66                                                 : Pos->second.getLatest();
67 }
68 
appendMacroDirective(IdentifierInfo * II,MacroDirective * MD)69 void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){
70   assert(MD && "MacroDirective should be non-zero!");
71   assert(!MD->getPrevious() && "Already attached to a MacroDirective history.");
72 
73   MacroState &StoredMD = CurSubmoduleState->Macros[II];
74   auto *OldMD = StoredMD.getLatest();
75   MD->setPrevious(OldMD);
76   StoredMD.setLatest(MD);
77   StoredMD.overrideActiveModuleMacros(*this, II);
78 
79   if (needModuleMacros()) {
80     // Track that we created a new macro directive, so we know we should
81     // consider building a ModuleMacro for it when we get to the end of
82     // the module.
83     PendingModuleMacroNames.push_back(II);
84   }
85 
86   // Set up the identifier as having associated macro history.
87   II->setHasMacroDefinition(true);
88   if (!MD->isDefined() && !LeafModuleMacros.contains(II))
89     II->setHasMacroDefinition(false);
90   if (II->isFromAST())
91     II->setChangedSinceDeserialization();
92 }
93 
setLoadedMacroDirective(IdentifierInfo * II,MacroDirective * ED,MacroDirective * MD)94 void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II,
95                                            MacroDirective *ED,
96                                            MacroDirective *MD) {
97   // Normally, when a macro is defined, it goes through appendMacroDirective()
98   // above, which chains a macro to previous defines, undefs, etc.
99   // However, in a pch, the whole macro history up to the end of the pch is
100   // stored, so ASTReader goes through this function instead.
101   // However, built-in macros are already registered in the Preprocessor
102   // ctor, and ASTWriter stops writing the macro chain at built-in macros,
103   // so in that case the chain from the pch needs to be spliced to the existing
104   // built-in.
105 
106   assert(II && MD);
107   MacroState &StoredMD = CurSubmoduleState->Macros[II];
108 
109   if (auto *OldMD = StoredMD.getLatest()) {
110     // shouldIgnoreMacro() in ASTWriter also stops at macros from the
111     // predefines buffer in module builds. However, in module builds, modules
112     // are loaded completely before predefines are processed, so StoredMD
113     // will be nullptr for them when they're loaded. StoredMD should only be
114     // non-nullptr for builtins read from a pch file.
115     assert(OldMD->getMacroInfo()->isBuiltinMacro() &&
116            "only built-ins should have an entry here");
117     assert(!OldMD->getPrevious() && "builtin should only have a single entry");
118     ED->setPrevious(OldMD);
119     StoredMD.setLatest(MD);
120   } else {
121     StoredMD = MD;
122   }
123 
124   // Setup the identifier as having associated macro history.
125   II->setHasMacroDefinition(true);
126   if (!MD->isDefined() && !LeafModuleMacros.contains(II))
127     II->setHasMacroDefinition(false);
128 }
129 
addModuleMacro(Module * Mod,IdentifierInfo * II,MacroInfo * Macro,ArrayRef<ModuleMacro * > Overrides,bool & New)130 ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II,
131                                           MacroInfo *Macro,
132                                           ArrayRef<ModuleMacro *> Overrides,
133                                           bool &New) {
134   llvm::FoldingSetNodeID ID;
135   ModuleMacro::Profile(ID, Mod, II);
136 
137   void *InsertPos;
138   if (auto *MM = ModuleMacros.FindNodeOrInsertPos(ID, InsertPos)) {
139     New = false;
140     return MM;
141   }
142 
143   auto *MM = ModuleMacro::create(*this, Mod, II, Macro, Overrides);
144   ModuleMacros.InsertNode(MM, InsertPos);
145 
146   // Each overridden macro is now overridden by one more macro.
147   bool HidAny = false;
148   for (auto *O : Overrides) {
149     HidAny |= (O->NumOverriddenBy == 0);
150     ++O->NumOverriddenBy;
151   }
152 
153   // If we were the first overrider for any macro, it's no longer a leaf.
154   auto &LeafMacros = LeafModuleMacros[II];
155   if (HidAny) {
156     llvm::erase_if(LeafMacros,
157                    [](ModuleMacro *MM) { return MM->NumOverriddenBy != 0; });
158   }
159 
160   // The new macro is always a leaf macro.
161   LeafMacros.push_back(MM);
162   // The identifier now has defined macros (that may or may not be visible).
163   II->setHasMacroDefinition(true);
164 
165   New = true;
166   return MM;
167 }
168 
getModuleMacro(Module * Mod,const IdentifierInfo * II)169 ModuleMacro *Preprocessor::getModuleMacro(Module *Mod,
170                                           const IdentifierInfo *II) {
171   llvm::FoldingSetNodeID ID;
172   ModuleMacro::Profile(ID, Mod, II);
173 
174   void *InsertPos;
175   return ModuleMacros.FindNodeOrInsertPos(ID, InsertPos);
176 }
177 
updateModuleMacroInfo(const IdentifierInfo * II,ModuleMacroInfo & Info)178 void Preprocessor::updateModuleMacroInfo(const IdentifierInfo *II,
179                                          ModuleMacroInfo &Info) {
180   assert(Info.ActiveModuleMacrosGeneration !=
181              CurSubmoduleState->VisibleModules.getGeneration() &&
182          "don't need to update this macro name info");
183   Info.ActiveModuleMacrosGeneration =
184       CurSubmoduleState->VisibleModules.getGeneration();
185 
186   auto Leaf = LeafModuleMacros.find(II);
187   if (Leaf == LeafModuleMacros.end()) {
188     // No imported macros at all: nothing to do.
189     return;
190   }
191 
192   Info.ActiveModuleMacros.clear();
193 
194   // Every macro that's locally overridden is overridden by a visible macro.
195   llvm::DenseMap<ModuleMacro *, int> NumHiddenOverrides;
196   for (auto *O : Info.OverriddenMacros)
197     NumHiddenOverrides[O] = -1;
198 
199   // Collect all macros that are not overridden by a visible macro.
200   llvm::SmallVector<ModuleMacro *, 16> Worklist;
201   for (auto *LeafMM : Leaf->second) {
202     assert(LeafMM->getNumOverridingMacros() == 0 && "leaf macro overridden");
203     if (NumHiddenOverrides.lookup(LeafMM) == 0)
204       Worklist.push_back(LeafMM);
205   }
206   while (!Worklist.empty()) {
207     auto *MM = Worklist.pop_back_val();
208     if (CurSubmoduleState->VisibleModules.isVisible(MM->getOwningModule())) {
209       // We only care about collecting definitions; undefinitions only act
210       // to override other definitions.
211       if (MM->getMacroInfo())
212         Info.ActiveModuleMacros.push_back(MM);
213     } else {
214       for (auto *O : MM->overrides())
215         if ((unsigned)++NumHiddenOverrides[O] == O->getNumOverridingMacros())
216           Worklist.push_back(O);
217     }
218   }
219   // Our reverse postorder walk found the macros in reverse order.
220   std::reverse(Info.ActiveModuleMacros.begin(), Info.ActiveModuleMacros.end());
221 
222   // Determine whether the macro name is ambiguous.
223   MacroInfo *MI = nullptr;
224   bool IsSystemMacro = true;
225   bool IsAmbiguous = false;
226   if (auto *MD = Info.MD) {
227     while (isa_and_nonnull<VisibilityMacroDirective>(MD))
228       MD = MD->getPrevious();
229     if (auto *DMD = dyn_cast_or_null<DefMacroDirective>(MD)) {
230       MI = DMD->getInfo();
231       IsSystemMacro &= SourceMgr.isInSystemHeader(DMD->getLocation());
232     }
233   }
234   for (auto *Active : Info.ActiveModuleMacros) {
235     auto *NewMI = Active->getMacroInfo();
236 
237     // Before marking the macro as ambiguous, check if this is a case where
238     // both macros are in system headers. If so, we trust that the system
239     // did not get it wrong. This also handles cases where Clang's own
240     // headers have a different spelling of certain system macros:
241     //   #define LONG_MAX __LONG_MAX__ (clang's limits.h)
242     //   #define LONG_MAX 0x7fffffffffffffffL (system's limits.h)
243     //
244     // FIXME: Remove the defined-in-system-headers check. clang's limits.h
245     // overrides the system limits.h's macros, so there's no conflict here.
246     if (MI && NewMI != MI &&
247         !MI->isIdenticalTo(*NewMI, *this, /*Syntactically=*/true))
248       IsAmbiguous = true;
249     IsSystemMacro &= Active->getOwningModule()->IsSystem ||
250                      SourceMgr.isInSystemHeader(NewMI->getDefinitionLoc());
251     MI = NewMI;
252   }
253   Info.IsAmbiguous = IsAmbiguous && !IsSystemMacro;
254 }
255 
dumpMacroInfo(const IdentifierInfo * II)256 void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) {
257   ArrayRef<ModuleMacro*> Leaf;
258   auto LeafIt = LeafModuleMacros.find(II);
259   if (LeafIt != LeafModuleMacros.end())
260     Leaf = LeafIt->second;
261   const MacroState *State = nullptr;
262   auto Pos = CurSubmoduleState->Macros.find(II);
263   if (Pos != CurSubmoduleState->Macros.end())
264     State = &Pos->second;
265 
266   llvm::errs() << "MacroState " << State << " " << II->getNameStart();
267   if (State && State->isAmbiguous(*this, II))
268     llvm::errs() << " ambiguous";
269   if (State && !State->getOverriddenMacros().empty()) {
270     llvm::errs() << " overrides";
271     for (auto *O : State->getOverriddenMacros())
272       llvm::errs() << " " << O->getOwningModule()->getFullModuleName();
273   }
274   llvm::errs() << "\n";
275 
276   // Dump local macro directives.
277   for (auto *MD = State ? State->getLatest() : nullptr; MD;
278        MD = MD->getPrevious()) {
279     llvm::errs() << " ";
280     MD->dump();
281   }
282 
283   // Dump module macros.
284   llvm::DenseSet<ModuleMacro*> Active;
285   for (auto *MM : State ? State->getActiveModuleMacros(*this, II)
286                         : ArrayRef<ModuleMacro *>())
287     Active.insert(MM);
288   llvm::DenseSet<ModuleMacro*> Visited;
289   llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf);
290   while (!Worklist.empty()) {
291     auto *MM = Worklist.pop_back_val();
292     llvm::errs() << " ModuleMacro " << MM << " "
293                  << MM->getOwningModule()->getFullModuleName();
294     if (!MM->getMacroInfo())
295       llvm::errs() << " undef";
296 
297     if (Active.count(MM))
298       llvm::errs() << " active";
299     else if (!CurSubmoduleState->VisibleModules.isVisible(
300                  MM->getOwningModule()))
301       llvm::errs() << " hidden";
302     else if (MM->getMacroInfo())
303       llvm::errs() << " overridden";
304 
305     if (!MM->overrides().empty()) {
306       llvm::errs() << " overrides";
307       for (auto *O : MM->overrides()) {
308         llvm::errs() << " " << O->getOwningModule()->getFullModuleName();
309         if (Visited.insert(O).second)
310           Worklist.push_back(O);
311       }
312     }
313     llvm::errs() << "\n";
314     if (auto *MI = MM->getMacroInfo()) {
315       llvm::errs() << "  ";
316       MI->dump();
317       llvm::errs() << "\n";
318     }
319   }
320 }
321 
322 /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
323 /// identifier table.
RegisterBuiltinMacros()324 void Preprocessor::RegisterBuiltinMacros() {
325   Ident__LINE__ = RegisterBuiltinMacro("__LINE__");
326   Ident__FILE__ = RegisterBuiltinMacro("__FILE__");
327   Ident__DATE__ = RegisterBuiltinMacro("__DATE__");
328   Ident__TIME__ = RegisterBuiltinMacro("__TIME__");
329   Ident__COUNTER__ = RegisterBuiltinMacro("__COUNTER__");
330   Ident_Pragma = RegisterBuiltinMacro("_Pragma");
331   Ident__FLT_EVAL_METHOD__ = RegisterBuiltinMacro("__FLT_EVAL_METHOD__");
332 
333   // C++ Standing Document Extensions.
334   if (getLangOpts().CPlusPlus)
335     Ident__has_cpp_attribute = RegisterBuiltinMacro("__has_cpp_attribute");
336   else
337     Ident__has_cpp_attribute = nullptr;
338 
339   // GCC Extensions.
340   Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__");
341   Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__");
342   Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__");
343 
344   // Microsoft Extensions.
345   if (getLangOpts().MicrosoftExt) {
346     Ident__identifier = RegisterBuiltinMacro("__identifier");
347     Ident__pragma = RegisterBuiltinMacro("__pragma");
348   } else {
349     Ident__identifier = nullptr;
350     Ident__pragma = nullptr;
351   }
352 
353   // Clang Extensions.
354   Ident__FILE_NAME__ = RegisterBuiltinMacro("__FILE_NAME__");
355   Ident__has_feature = RegisterBuiltinMacro("__has_feature");
356   Ident__has_extension = RegisterBuiltinMacro("__has_extension");
357   Ident__has_builtin = RegisterBuiltinMacro("__has_builtin");
358   Ident__has_constexpr_builtin =
359       RegisterBuiltinMacro("__has_constexpr_builtin");
360   Ident__has_attribute = RegisterBuiltinMacro("__has_attribute");
361   if (!getLangOpts().CPlusPlus)
362     Ident__has_c_attribute = RegisterBuiltinMacro("__has_c_attribute");
363   else
364     Ident__has_c_attribute = nullptr;
365 
366   Ident__has_declspec = RegisterBuiltinMacro("__has_declspec_attribute");
367   Ident__has_embed = RegisterBuiltinMacro("__has_embed");
368   Ident__has_include = RegisterBuiltinMacro("__has_include");
369   Ident__has_include_next = RegisterBuiltinMacro("__has_include_next");
370   Ident__has_warning = RegisterBuiltinMacro("__has_warning");
371   Ident__is_identifier = RegisterBuiltinMacro("__is_identifier");
372   Ident__is_target_arch = RegisterBuiltinMacro("__is_target_arch");
373   Ident__is_target_vendor = RegisterBuiltinMacro("__is_target_vendor");
374   Ident__is_target_os = RegisterBuiltinMacro("__is_target_os");
375   Ident__is_target_environment =
376       RegisterBuiltinMacro("__is_target_environment");
377   Ident__is_target_variant_os = RegisterBuiltinMacro("__is_target_variant_os");
378   Ident__is_target_variant_environment =
379       RegisterBuiltinMacro("__is_target_variant_environment");
380 
381   // Modules.
382   Ident__building_module = RegisterBuiltinMacro("__building_module");
383   if (!getLangOpts().CurrentModule.empty())
384     Ident__MODULE__ = RegisterBuiltinMacro("__MODULE__");
385   else
386     Ident__MODULE__ = nullptr;
387 }
388 
389 /// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
390 /// in its expansion, currently expands to that token literally.
isTrivialSingleTokenExpansion(const MacroInfo * MI,const IdentifierInfo * MacroIdent,Preprocessor & PP)391 static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
392                                           const IdentifierInfo *MacroIdent,
393                                           Preprocessor &PP) {
394   IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
395 
396   // If the token isn't an identifier, it's always literally expanded.
397   if (!II) return true;
398 
399   // If the information about this identifier is out of date, update it from
400   // the external source.
401   if (II->isOutOfDate())
402     PP.getExternalSource()->updateOutOfDateIdentifier(*II);
403 
404   // If the identifier is a macro, and if that macro is enabled, it may be
405   // expanded so it's not a trivial expansion.
406   if (auto *ExpansionMI = PP.getMacroInfo(II))
407     if (ExpansionMI->isEnabled() &&
408         // Fast expanding "#define X X" is ok, because X would be disabled.
409         II != MacroIdent)
410       return false;
411 
412   // If this is an object-like macro invocation, it is safe to trivially expand
413   // it.
414   if (MI->isObjectLike()) return true;
415 
416   // If this is a function-like macro invocation, it's safe to trivially expand
417   // as long as the identifier is not a macro argument.
418   return !llvm::is_contained(MI->params(), II);
419 }
420 
421 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
422 /// expanded as a macro, handle it and return the next token as 'Identifier'.
HandleMacroExpandedIdentifier(Token & Identifier,const MacroDefinition & M)423 bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
424                                                  const MacroDefinition &M) {
425   emitMacroExpansionWarnings(Identifier);
426 
427   MacroInfo *MI = M.getMacroInfo();
428 
429   // If this is a macro expansion in the "#if !defined(x)" line for the file,
430   // then the macro could expand to different things in other contexts, we need
431   // to disable the optimization in this case.
432   if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro();
433 
434   // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
435   if (MI->isBuiltinMacro()) {
436     if (Callbacks)
437       Callbacks->MacroExpands(Identifier, M, Identifier.getLocation(),
438                               /*Args=*/nullptr);
439     ExpandBuiltinMacro(Identifier);
440     return true;
441   }
442 
443   /// Args - If this is a function-like macro expansion, this contains,
444   /// for each macro argument, the list of tokens that were provided to the
445   /// invocation.
446   MacroArgs *Args = nullptr;
447 
448   // Remember where the end of the expansion occurred.  For an object-like
449   // macro, this is the identifier.  For a function-like macro, this is the ')'.
450   SourceLocation ExpansionEnd = Identifier.getLocation();
451 
452   // If this is a function-like macro, read the arguments.
453   if (MI->isFunctionLike()) {
454     // Remember that we are now parsing the arguments to a macro invocation.
455     // Preprocessor directives used inside macro arguments are not portable, and
456     // this enables the warning.
457     InMacroArgs = true;
458     ArgMacro = &Identifier;
459 
460     Args = ReadMacroCallArgumentList(Identifier, MI, ExpansionEnd);
461 
462     // Finished parsing args.
463     InMacroArgs = false;
464     ArgMacro = nullptr;
465 
466     // If there was an error parsing the arguments, bail out.
467     if (!Args) return true;
468 
469     ++NumFnMacroExpanded;
470   } else {
471     ++NumMacroExpanded;
472   }
473 
474   // Notice that this macro has been used.
475   markMacroAsUsed(MI);
476 
477   // Remember where the token is expanded.
478   SourceLocation ExpandLoc = Identifier.getLocation();
479   SourceRange ExpansionRange(ExpandLoc, ExpansionEnd);
480 
481   if (Callbacks) {
482     if (InMacroArgs) {
483       // We can have macro expansion inside a conditional directive while
484       // reading the function macro arguments. To ensure, in that case, that
485       // MacroExpands callbacks still happen in source order, queue this
486       // callback to have it happen after the function macro callback.
487       DelayedMacroExpandsCallbacks.push_back(
488           MacroExpandsInfo(Identifier, M, ExpansionRange));
489     } else {
490       Callbacks->MacroExpands(Identifier, M, ExpansionRange, Args);
491       if (!DelayedMacroExpandsCallbacks.empty()) {
492         for (const MacroExpandsInfo &Info : DelayedMacroExpandsCallbacks) {
493           // FIXME: We lose macro args info with delayed callback.
494           Callbacks->MacroExpands(Info.Tok, Info.MD, Info.Range,
495                                   /*Args=*/nullptr);
496         }
497         DelayedMacroExpandsCallbacks.clear();
498       }
499     }
500   }
501 
502   // If the macro definition is ambiguous, complain.
503   if (M.isAmbiguous()) {
504     Diag(Identifier, diag::warn_pp_ambiguous_macro)
505       << Identifier.getIdentifierInfo();
506     Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen)
507       << Identifier.getIdentifierInfo();
508     M.forAllDefinitions([&](const MacroInfo *OtherMI) {
509       if (OtherMI != MI)
510         Diag(OtherMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other)
511           << Identifier.getIdentifierInfo();
512     });
513   }
514 
515   // If we started lexing a macro, enter the macro expansion body.
516 
517   // If this macro expands to no tokens, don't bother to push it onto the
518   // expansion stack, only to take it right back off.
519   if (MI->getNumTokens() == 0) {
520     // No need for arg info.
521     if (Args) Args->destroy(*this);
522 
523     // Propagate whitespace info as if we had pushed, then popped,
524     // a macro context.
525     Identifier.setFlag(Token::LeadingEmptyMacro);
526     PropagateLineStartLeadingSpaceInfo(Identifier);
527     ++NumFastMacroExpanded;
528     return false;
529   } else if (MI->getNumTokens() == 1 &&
530              isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
531                                            *this)) {
532     // Otherwise, if this macro expands into a single trivially-expanded
533     // token: expand it now.  This handles common cases like
534     // "#define VAL 42".
535 
536     // No need for arg info.
537     if (Args) Args->destroy(*this);
538 
539     // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
540     // identifier to the expanded token.
541     bool isAtStartOfLine = Identifier.isAtStartOfLine();
542     bool hasLeadingSpace = Identifier.hasLeadingSpace();
543 
544     // Replace the result token.
545     Identifier = MI->getReplacementToken(0);
546 
547     // Restore the StartOfLine/LeadingSpace markers.
548     Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
549     Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
550 
551     // Update the tokens location to include both its expansion and physical
552     // locations.
553     SourceLocation Loc =
554       SourceMgr.createExpansionLoc(Identifier.getLocation(), ExpandLoc,
555                                    ExpansionEnd,Identifier.getLength());
556     Identifier.setLocation(Loc);
557 
558     // If this is a disabled macro or #define X X, we must mark the result as
559     // unexpandable.
560     if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) {
561       if (MacroInfo *NewMI = getMacroInfo(NewII))
562         if (!NewMI->isEnabled() || NewMI == MI) {
563           Identifier.setFlag(Token::DisableExpand);
564           // Don't warn for "#define X X" like "#define bool bool" from
565           // stdbool.h.
566           if (NewMI != MI || MI->isFunctionLike())
567             Diag(Identifier, diag::pp_disabled_macro_expansion);
568         }
569     }
570 
571     // Since this is not an identifier token, it can't be macro expanded, so
572     // we're done.
573     ++NumFastMacroExpanded;
574     return true;
575   }
576 
577   // Start expanding the macro.
578   EnterMacro(Identifier, ExpansionEnd, MI, Args);
579   return false;
580 }
581 
582 enum Bracket {
583   Brace,
584   Paren
585 };
586 
587 /// CheckMatchedBrackets - Returns true if the braces and parentheses in the
588 /// token vector are properly nested.
CheckMatchedBrackets(const SmallVectorImpl<Token> & Tokens)589 static bool CheckMatchedBrackets(const SmallVectorImpl<Token> &Tokens) {
590   SmallVector<Bracket, 8> Brackets;
591   for (SmallVectorImpl<Token>::const_iterator I = Tokens.begin(),
592                                               E = Tokens.end();
593        I != E; ++I) {
594     if (I->is(tok::l_paren)) {
595       Brackets.push_back(Paren);
596     } else if (I->is(tok::r_paren)) {
597       if (Brackets.empty() || Brackets.back() == Brace)
598         return false;
599       Brackets.pop_back();
600     } else if (I->is(tok::l_brace)) {
601       Brackets.push_back(Brace);
602     } else if (I->is(tok::r_brace)) {
603       if (Brackets.empty() || Brackets.back() == Paren)
604         return false;
605       Brackets.pop_back();
606     }
607   }
608   return Brackets.empty();
609 }
610 
611 /// GenerateNewArgTokens - Returns true if OldTokens can be converted to a new
612 /// vector of tokens in NewTokens.  The new number of arguments will be placed
613 /// in NumArgs and the ranges which need to surrounded in parentheses will be
614 /// in ParenHints.
615 /// Returns false if the token stream cannot be changed.  If this is because
616 /// of an initializer list starting a macro argument, the range of those
617 /// initializer lists will be place in InitLists.
GenerateNewArgTokens(Preprocessor & PP,SmallVectorImpl<Token> & OldTokens,SmallVectorImpl<Token> & NewTokens,unsigned & NumArgs,SmallVectorImpl<SourceRange> & ParenHints,SmallVectorImpl<SourceRange> & InitLists)618 static bool GenerateNewArgTokens(Preprocessor &PP,
619                                  SmallVectorImpl<Token> &OldTokens,
620                                  SmallVectorImpl<Token> &NewTokens,
621                                  unsigned &NumArgs,
622                                  SmallVectorImpl<SourceRange> &ParenHints,
623                                  SmallVectorImpl<SourceRange> &InitLists) {
624   if (!CheckMatchedBrackets(OldTokens))
625     return false;
626 
627   // Once it is known that the brackets are matched, only a simple count of the
628   // braces is needed.
629   unsigned Braces = 0;
630 
631   // First token of a new macro argument.
632   SmallVectorImpl<Token>::iterator ArgStartIterator = OldTokens.begin();
633 
634   // First closing brace in a new macro argument.  Used to generate
635   // SourceRanges for InitLists.
636   SmallVectorImpl<Token>::iterator ClosingBrace = OldTokens.end();
637   NumArgs = 0;
638   Token TempToken;
639   // Set to true when a macro separator token is found inside a braced list.
640   // If true, the fixed argument spans multiple old arguments and ParenHints
641   // will be updated.
642   bool FoundSeparatorToken = false;
643   for (SmallVectorImpl<Token>::iterator I = OldTokens.begin(),
644                                         E = OldTokens.end();
645        I != E; ++I) {
646     if (I->is(tok::l_brace)) {
647       ++Braces;
648     } else if (I->is(tok::r_brace)) {
649       --Braces;
650       if (Braces == 0 && ClosingBrace == E && FoundSeparatorToken)
651         ClosingBrace = I;
652     } else if (I->is(tok::eof)) {
653       // EOF token is used to separate macro arguments
654       if (Braces != 0) {
655         // Assume comma separator is actually braced list separator and change
656         // it back to a comma.
657         FoundSeparatorToken = true;
658         I->setKind(tok::comma);
659         I->setLength(1);
660       } else { // Braces == 0
661         // Separator token still separates arguments.
662         ++NumArgs;
663 
664         // If the argument starts with a brace, it can't be fixed with
665         // parentheses.  A different diagnostic will be given.
666         if (FoundSeparatorToken && ArgStartIterator->is(tok::l_brace)) {
667           InitLists.push_back(
668               SourceRange(ArgStartIterator->getLocation(),
669                           PP.getLocForEndOfToken(ClosingBrace->getLocation())));
670           ClosingBrace = E;
671         }
672 
673         // Add left paren
674         if (FoundSeparatorToken) {
675           TempToken.startToken();
676           TempToken.setKind(tok::l_paren);
677           TempToken.setLocation(ArgStartIterator->getLocation());
678           TempToken.setLength(0);
679           NewTokens.push_back(TempToken);
680         }
681 
682         // Copy over argument tokens
683         NewTokens.insert(NewTokens.end(), ArgStartIterator, I);
684 
685         // Add right paren and store the paren locations in ParenHints
686         if (FoundSeparatorToken) {
687           SourceLocation Loc = PP.getLocForEndOfToken((I - 1)->getLocation());
688           TempToken.startToken();
689           TempToken.setKind(tok::r_paren);
690           TempToken.setLocation(Loc);
691           TempToken.setLength(0);
692           NewTokens.push_back(TempToken);
693           ParenHints.push_back(SourceRange(ArgStartIterator->getLocation(),
694                                            Loc));
695         }
696 
697         // Copy separator token
698         NewTokens.push_back(*I);
699 
700         // Reset values
701         ArgStartIterator = I + 1;
702         FoundSeparatorToken = false;
703       }
704     }
705   }
706 
707   return !ParenHints.empty() && InitLists.empty();
708 }
709 
710 /// ReadFunctionLikeMacroArgs - After reading "MACRO" and knowing that the next
711 /// token is the '(' of the macro, this method is invoked to read all of the
712 /// actual arguments specified for the macro invocation.  This returns null on
713 /// error.
ReadMacroCallArgumentList(Token & MacroName,MacroInfo * MI,SourceLocation & MacroEnd)714 MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName,
715                                                    MacroInfo *MI,
716                                                    SourceLocation &MacroEnd) {
717   // The number of fixed arguments to parse.
718   unsigned NumFixedArgsLeft = MI->getNumParams();
719   bool isVariadic = MI->isVariadic();
720 
721   // Outer loop, while there are more arguments, keep reading them.
722   Token Tok;
723 
724   // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
725   // an argument value in a macro could expand to ',' or '(' or ')'.
726   LexUnexpandedToken(Tok);
727   assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
728 
729   // ArgTokens - Build up a list of tokens that make up each argument.  Each
730   // argument is separated by an EOF token.  Use a SmallVector so we can avoid
731   // heap allocations in the common case.
732   SmallVector<Token, 64> ArgTokens;
733   bool ContainsCodeCompletionTok = false;
734   bool FoundElidedComma = false;
735 
736   SourceLocation TooManyArgsLoc;
737 
738   unsigned NumActuals = 0;
739   while (Tok.isNot(tok::r_paren)) {
740     if (ContainsCodeCompletionTok && Tok.isOneOf(tok::eof, tok::eod))
741       break;
742 
743     assert(Tok.isOneOf(tok::l_paren, tok::comma) &&
744            "only expect argument separators here");
745 
746     size_t ArgTokenStart = ArgTokens.size();
747     SourceLocation ArgStartLoc = Tok.getLocation();
748 
749     // C99 6.10.3p11: Keep track of the number of l_parens we have seen.  Note
750     // that we already consumed the first one.
751     unsigned NumParens = 0;
752 
753     while (true) {
754       // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
755       // an argument value in a macro could expand to ',' or '(' or ')'.
756       LexUnexpandedToken(Tok);
757 
758       if (Tok.isOneOf(tok::eof, tok::eod)) { // "#if f(<eof>" & "#if f(\n"
759         if (!ContainsCodeCompletionTok) {
760           Diag(MacroName, diag::err_unterm_macro_invoc);
761           Diag(MI->getDefinitionLoc(), diag::note_macro_here)
762             << MacroName.getIdentifierInfo();
763           // Do not lose the EOF/EOD.  Return it to the client.
764           MacroName = Tok;
765           return nullptr;
766         }
767         // Do not lose the EOF/EOD.
768         auto Toks = std::make_unique<Token[]>(1);
769         Toks[0] = Tok;
770         EnterTokenStream(std::move(Toks), 1, true, /*IsReinject*/ false);
771         break;
772       } else if (Tok.is(tok::r_paren)) {
773         // If we found the ) token, the macro arg list is done.
774         if (NumParens-- == 0) {
775           MacroEnd = Tok.getLocation();
776           if (!ArgTokens.empty() &&
777               ArgTokens.back().commaAfterElided()) {
778             FoundElidedComma = true;
779           }
780           break;
781         }
782       } else if (Tok.is(tok::l_paren)) {
783         ++NumParens;
784       } else if (Tok.is(tok::comma)) {
785         // In Microsoft-compatibility mode, single commas from nested macro
786         // expansions should not be considered as argument separators. We test
787         // for this with the IgnoredComma token flag.
788         if (Tok.getFlags() & Token::IgnoredComma) {
789           // However, in MSVC's preprocessor, subsequent expansions do treat
790           // these commas as argument separators. This leads to a common
791           // workaround used in macros that need to work in both MSVC and
792           // compliant preprocessors. Therefore, the IgnoredComma flag can only
793           // apply once to any given token.
794           Tok.clearFlag(Token::IgnoredComma);
795         } else if (NumParens == 0) {
796           // Comma ends this argument if there are more fixed arguments
797           // expected. However, if this is a variadic macro, and this is part of
798           // the variadic part, then the comma is just an argument token.
799           if (!isVariadic)
800             break;
801           if (NumFixedArgsLeft > 1)
802             break;
803         }
804       } else if (Tok.is(tok::comment) && !KeepMacroComments) {
805         // If this is a comment token in the argument list and we're just in
806         // -C mode (not -CC mode), discard the comment.
807         continue;
808       } else if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
809         // Reading macro arguments can cause macros that we are currently
810         // expanding from to be popped off the expansion stack.  Doing so causes
811         // them to be reenabled for expansion.  Here we record whether any
812         // identifiers we lex as macro arguments correspond to disabled macros.
813         // If so, we mark the token as noexpand.  This is a subtle aspect of
814         // C99 6.10.3.4p2.
815         if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
816           if (!MI->isEnabled())
817             Tok.setFlag(Token::DisableExpand);
818       } else if (Tok.is(tok::code_completion)) {
819         ContainsCodeCompletionTok = true;
820         if (CodeComplete)
821           CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
822                                                   MI, NumActuals);
823         // Don't mark that we reached the code-completion point because the
824         // parser is going to handle the token and there will be another
825         // code-completion callback.
826       }
827 
828       ArgTokens.push_back(Tok);
829     }
830 
831     // If this was an empty argument list foo(), don't add this as an empty
832     // argument.
833     if (ArgTokens.empty() && Tok.getKind() == tok::r_paren)
834       break;
835 
836     // If this is not a variadic macro, and too many args were specified, emit
837     // an error.
838     if (!isVariadic && NumFixedArgsLeft == 0 && TooManyArgsLoc.isInvalid()) {
839       if (ArgTokens.size() != ArgTokenStart)
840         TooManyArgsLoc = ArgTokens[ArgTokenStart].getLocation();
841       else
842         TooManyArgsLoc = ArgStartLoc;
843     }
844 
845     // Empty arguments are standard in C99 and C++0x, and are supported as an
846     // extension in other modes.
847     if (ArgTokens.size() == ArgTokenStart && !getLangOpts().C99)
848       Diag(Tok, getLangOpts().CPlusPlus11
849                     ? diag::warn_cxx98_compat_empty_fnmacro_arg
850                     : diag::ext_empty_fnmacro_arg);
851 
852     // Add a marker EOF token to the end of the token list for this argument.
853     Token EOFTok;
854     EOFTok.startToken();
855     EOFTok.setKind(tok::eof);
856     EOFTok.setLocation(Tok.getLocation());
857     EOFTok.setLength(0);
858     ArgTokens.push_back(EOFTok);
859     ++NumActuals;
860     if (!ContainsCodeCompletionTok && NumFixedArgsLeft != 0)
861       --NumFixedArgsLeft;
862   }
863 
864   // Okay, we either found the r_paren.  Check to see if we parsed too few
865   // arguments.
866   unsigned MinArgsExpected = MI->getNumParams();
867 
868   // If this is not a variadic macro, and too many args were specified, emit
869   // an error.
870   if (!isVariadic && NumActuals > MinArgsExpected &&
871       !ContainsCodeCompletionTok) {
872     // Emit the diagnostic at the macro name in case there is a missing ).
873     // Emitting it at the , could be far away from the macro name.
874     Diag(TooManyArgsLoc, diag::err_too_many_args_in_macro_invoc);
875     Diag(MI->getDefinitionLoc(), diag::note_macro_here)
876       << MacroName.getIdentifierInfo();
877 
878     // Commas from braced initializer lists will be treated as argument
879     // separators inside macros.  Attempt to correct for this with parentheses.
880     // TODO: See if this can be generalized to angle brackets for templates
881     // inside macro arguments.
882 
883     SmallVector<Token, 4> FixedArgTokens;
884     unsigned FixedNumArgs = 0;
885     SmallVector<SourceRange, 4> ParenHints, InitLists;
886     if (!GenerateNewArgTokens(*this, ArgTokens, FixedArgTokens, FixedNumArgs,
887                               ParenHints, InitLists)) {
888       if (!InitLists.empty()) {
889         DiagnosticBuilder DB =
890             Diag(MacroName,
891                  diag::note_init_list_at_beginning_of_macro_argument);
892         for (SourceRange Range : InitLists)
893           DB << Range;
894       }
895       return nullptr;
896     }
897     if (FixedNumArgs != MinArgsExpected)
898       return nullptr;
899 
900     DiagnosticBuilder DB = Diag(MacroName, diag::note_suggest_parens_for_macro);
901     for (SourceRange ParenLocation : ParenHints) {
902       DB << FixItHint::CreateInsertion(ParenLocation.getBegin(), "(");
903       DB << FixItHint::CreateInsertion(ParenLocation.getEnd(), ")");
904     }
905     ArgTokens.swap(FixedArgTokens);
906     NumActuals = FixedNumArgs;
907   }
908 
909   // See MacroArgs instance var for description of this.
910   bool isVarargsElided = false;
911 
912   if (ContainsCodeCompletionTok) {
913     // Recover from not-fully-formed macro invocation during code-completion.
914     Token EOFTok;
915     EOFTok.startToken();
916     EOFTok.setKind(tok::eof);
917     EOFTok.setLocation(Tok.getLocation());
918     EOFTok.setLength(0);
919     for (; NumActuals < MinArgsExpected; ++NumActuals)
920       ArgTokens.push_back(EOFTok);
921   }
922 
923   if (NumActuals < MinArgsExpected) {
924     // There are several cases where too few arguments is ok, handle them now.
925     if (NumActuals == 0 && MinArgsExpected == 1) {
926       // #define A(X)  or  #define A(...)   ---> A()
927 
928       // If there is exactly one argument, and that argument is missing,
929       // then we have an empty "()" argument empty list.  This is fine, even if
930       // the macro expects one argument (the argument is just empty).
931       isVarargsElided = MI->isVariadic();
932     } else if ((FoundElidedComma || MI->isVariadic()) &&
933                (NumActuals+1 == MinArgsExpected ||  // A(x, ...) -> A(X)
934                 (NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A()
935       // Varargs where the named vararg parameter is missing: OK as extension.
936       //   #define A(x, ...)
937       //   A("blah")
938       //
939       // If the macro contains the comma pasting extension, the diagnostic
940       // is suppressed; we know we'll get another diagnostic later.
941       if (!MI->hasCommaPasting()) {
942         // C++20 [cpp.replace]p15, C23 6.10.5p12
943         //
944         // C++20 and C23 allow this construct, but standards before that
945         // do not (we allow it as an extension).
946         unsigned ID;
947         if (getLangOpts().CPlusPlus20)
948           ID = diag::warn_cxx17_compat_missing_varargs_arg;
949         else if (getLangOpts().CPlusPlus)
950           ID = diag::ext_cxx_missing_varargs_arg;
951         else if (getLangOpts().C23)
952           ID = diag::warn_c17_compat_missing_varargs_arg;
953         else
954           ID = diag::ext_c_missing_varargs_arg;
955         Diag(Tok, ID);
956         Diag(MI->getDefinitionLoc(), diag::note_macro_here)
957           << MacroName.getIdentifierInfo();
958       }
959 
960       // Remember this occurred, allowing us to elide the comma when used for
961       // cases like:
962       //   #define A(x, foo...) blah(a, ## foo)
963       //   #define B(x, ...) blah(a, ## __VA_ARGS__)
964       //   #define C(...) blah(a, ## __VA_ARGS__)
965       //  A(x) B(x) C()
966       isVarargsElided = true;
967     } else if (!ContainsCodeCompletionTok) {
968       // Otherwise, emit the error.
969       Diag(Tok, diag::err_too_few_args_in_macro_invoc);
970       Diag(MI->getDefinitionLoc(), diag::note_macro_here)
971         << MacroName.getIdentifierInfo();
972       return nullptr;
973     }
974 
975     // Add a marker EOF token to the end of the token list for this argument.
976     SourceLocation EndLoc = Tok.getLocation();
977     Tok.startToken();
978     Tok.setKind(tok::eof);
979     Tok.setLocation(EndLoc);
980     Tok.setLength(0);
981     ArgTokens.push_back(Tok);
982 
983     // If we expect two arguments, add both as empty.
984     if (NumActuals == 0 && MinArgsExpected == 2)
985       ArgTokens.push_back(Tok);
986 
987   } else if (NumActuals > MinArgsExpected && !MI->isVariadic() &&
988              !ContainsCodeCompletionTok) {
989     // Emit the diagnostic at the macro name in case there is a missing ).
990     // Emitting it at the , could be far away from the macro name.
991     Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
992     Diag(MI->getDefinitionLoc(), diag::note_macro_here)
993       << MacroName.getIdentifierInfo();
994     return nullptr;
995   }
996 
997   return MacroArgs::create(MI, ArgTokens, isVarargsElided, *this);
998 }
999 
1000 /// Keeps macro expanded tokens for TokenLexers.
1001 //
1002 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1003 /// going to lex in the cache and when it finishes the tokens are removed
1004 /// from the end of the cache.
cacheMacroExpandedTokens(TokenLexer * tokLexer,ArrayRef<Token> tokens)1005 Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer,
1006                                               ArrayRef<Token> tokens) {
1007   assert(tokLexer);
1008   if (tokens.empty())
1009     return nullptr;
1010 
1011   size_t newIndex = MacroExpandedTokens.size();
1012   bool cacheNeedsToGrow = tokens.size() >
1013                       MacroExpandedTokens.capacity()-MacroExpandedTokens.size();
1014   MacroExpandedTokens.append(tokens.begin(), tokens.end());
1015 
1016   if (cacheNeedsToGrow) {
1017     // Go through all the TokenLexers whose 'Tokens' pointer points in the
1018     // buffer and update the pointers to the (potential) new buffer array.
1019     for (const auto &Lexer : MacroExpandingLexersStack) {
1020       TokenLexer *prevLexer;
1021       size_t tokIndex;
1022       std::tie(prevLexer, tokIndex) = Lexer;
1023       prevLexer->Tokens = MacroExpandedTokens.data() + tokIndex;
1024     }
1025   }
1026 
1027   MacroExpandingLexersStack.push_back(std::make_pair(tokLexer, newIndex));
1028   return MacroExpandedTokens.data() + newIndex;
1029 }
1030 
removeCachedMacroExpandedTokensOfLastLexer()1031 void Preprocessor::removeCachedMacroExpandedTokensOfLastLexer() {
1032   assert(!MacroExpandingLexersStack.empty());
1033   size_t tokIndex = MacroExpandingLexersStack.back().second;
1034   assert(tokIndex < MacroExpandedTokens.size());
1035   // Pop the cached macro expanded tokens from the end.
1036   MacroExpandedTokens.resize(tokIndex);
1037   MacroExpandingLexersStack.pop_back();
1038 }
1039 
1040 /// ComputeDATE_TIME - Compute the current time, enter it into the specified
1041 /// scratch buffer, then return DATELoc/TIMELoc locations with the position of
1042 /// the identifier tokens inserted.
ComputeDATE_TIME(SourceLocation & DATELoc,SourceLocation & TIMELoc,Preprocessor & PP)1043 static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
1044                              Preprocessor &PP) {
1045   time_t TT;
1046   std::tm *TM;
1047   if (PP.getPreprocessorOpts().SourceDateEpoch) {
1048     TT = *PP.getPreprocessorOpts().SourceDateEpoch;
1049     TM = std::gmtime(&TT);
1050   } else {
1051     TT = std::time(nullptr);
1052     TM = std::localtime(&TT);
1053   }
1054 
1055   static const char * const Months[] = {
1056     "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
1057   };
1058 
1059   {
1060     SmallString<32> TmpBuffer;
1061     llvm::raw_svector_ostream TmpStream(TmpBuffer);
1062     if (TM)
1063       TmpStream << llvm::format("\"%s %2d %4d\"", Months[TM->tm_mon],
1064                                 TM->tm_mday, TM->tm_year + 1900);
1065     else
1066       TmpStream << "??? ?? ????";
1067     Token TmpTok;
1068     TmpTok.startToken();
1069     PP.CreateString(TmpStream.str(), TmpTok);
1070     DATELoc = TmpTok.getLocation();
1071   }
1072 
1073   {
1074     SmallString<32> TmpBuffer;
1075     llvm::raw_svector_ostream TmpStream(TmpBuffer);
1076     if (TM)
1077       TmpStream << llvm::format("\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min,
1078                                 TM->tm_sec);
1079     else
1080       TmpStream << "??:??:??";
1081     Token TmpTok;
1082     TmpTok.startToken();
1083     PP.CreateString(TmpStream.str(), TmpTok);
1084     TIMELoc = TmpTok.getLocation();
1085   }
1086 }
1087 
1088 /// HasFeature - Return true if we recognize and implement the feature
1089 /// specified by the identifier as a standard language feature.
HasFeature(const Preprocessor & PP,StringRef Feature)1090 static bool HasFeature(const Preprocessor &PP, StringRef Feature) {
1091   const LangOptions &LangOpts = PP.getLangOpts();
1092 
1093   // Normalize the feature name, __foo__ becomes foo.
1094   if (Feature.starts_with("__") && Feature.ends_with("__") &&
1095       Feature.size() >= 4)
1096     Feature = Feature.substr(2, Feature.size() - 4);
1097 
1098 #define FEATURE(Name, Predicate) .Case(#Name, Predicate)
1099   return llvm::StringSwitch<bool>(Feature)
1100 #include "clang/Basic/Features.def"
1101       .Default(false);
1102 #undef FEATURE
1103 }
1104 
1105 /// HasExtension - Return true if we recognize and implement the feature
1106 /// specified by the identifier, either as an extension or a standard language
1107 /// feature.
HasExtension(const Preprocessor & PP,StringRef Extension)1108 static bool HasExtension(const Preprocessor &PP, StringRef Extension) {
1109   if (HasFeature(PP, Extension))
1110     return true;
1111 
1112   // If the use of an extension results in an error diagnostic, extensions are
1113   // effectively unavailable, so just return false here.
1114   if (PP.getDiagnostics().getExtensionHandlingBehavior() >=
1115       diag::Severity::Error)
1116     return false;
1117 
1118   const LangOptions &LangOpts = PP.getLangOpts();
1119 
1120   // Normalize the extension name, __foo__ becomes foo.
1121   if (Extension.starts_with("__") && Extension.ends_with("__") &&
1122       Extension.size() >= 4)
1123     Extension = Extension.substr(2, Extension.size() - 4);
1124 
1125     // Because we inherit the feature list from HasFeature, this string switch
1126     // must be less restrictive than HasFeature's.
1127 #define EXTENSION(Name, Predicate) .Case(#Name, Predicate)
1128   return llvm::StringSwitch<bool>(Extension)
1129 #include "clang/Basic/Features.def"
1130       .Default(false);
1131 #undef EXTENSION
1132 }
1133 
1134 /// EvaluateHasIncludeCommon - Process a '__has_include("path")'
1135 /// or '__has_include_next("path")' expression.
1136 /// Returns true if successful.
EvaluateHasIncludeCommon(Token & Tok,IdentifierInfo * II,Preprocessor & PP,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)1137 static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II,
1138                                      Preprocessor &PP,
1139                                      ConstSearchDirIterator LookupFrom,
1140                                      const FileEntry *LookupFromFile) {
1141   // Save the location of the current token.  If a '(' is later found, use
1142   // that location.  If not, use the end of this location instead.
1143   SourceLocation LParenLoc = Tok.getLocation();
1144 
1145   // These expressions are only allowed within a preprocessor directive.
1146   if (!PP.isParsingIfOrElifDirective()) {
1147     PP.Diag(LParenLoc, diag::err_pp_directive_required) << II;
1148     // Return a valid identifier token.
1149     assert(Tok.is(tok::identifier));
1150     Tok.setIdentifierInfo(II);
1151     return false;
1152   }
1153 
1154   // Get '('. If we don't have a '(', try to form a header-name token.
1155   do {
1156     if (PP.LexHeaderName(Tok))
1157       return false;
1158   } while (Tok.getKind() == tok::comment);
1159 
1160   // Ensure we have a '('.
1161   if (Tok.isNot(tok::l_paren)) {
1162     // No '(', use end of last token.
1163     LParenLoc = PP.getLocForEndOfToken(LParenLoc);
1164     PP.Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
1165     // If the next token looks like a filename or the start of one,
1166     // assume it is and process it as such.
1167     if (Tok.isNot(tok::header_name))
1168       return false;
1169   } else {
1170     // Save '(' location for possible missing ')' message.
1171     LParenLoc = Tok.getLocation();
1172     if (PP.LexHeaderName(Tok))
1173       return false;
1174   }
1175 
1176   if (Tok.isNot(tok::header_name)) {
1177     PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
1178     return false;
1179   }
1180 
1181   // Reserve a buffer to get the spelling.
1182   SmallString<128> FilenameBuffer;
1183   bool Invalid = false;
1184   StringRef Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
1185   if (Invalid)
1186     return false;
1187 
1188   SourceLocation FilenameLoc = Tok.getLocation();
1189 
1190   // Get ')'.
1191   PP.LexNonComment(Tok);
1192 
1193   // Ensure we have a trailing ).
1194   if (Tok.isNot(tok::r_paren)) {
1195     PP.Diag(PP.getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
1196         << II << tok::r_paren;
1197     PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
1198     return false;
1199   }
1200 
1201   bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
1202   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
1203   // error.
1204   if (Filename.empty())
1205     return false;
1206 
1207   // Passing this to LookupFile forces header search to check whether the found
1208   // file belongs to a module. Skipping that check could incorrectly mark
1209   // modular header as textual, causing issues down the line.
1210   ModuleMap::KnownHeader KH;
1211 
1212   // Search include directories.
1213   OptionalFileEntryRef File =
1214       PP.LookupFile(FilenameLoc, Filename, isAngled, LookupFrom, LookupFromFile,
1215                     nullptr, nullptr, nullptr, &KH, nullptr, nullptr);
1216 
1217   if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
1218     SrcMgr::CharacteristicKind FileType = SrcMgr::C_User;
1219     if (File)
1220       FileType = PP.getHeaderSearchInfo().getFileDirFlavor(*File);
1221     Callbacks->HasInclude(FilenameLoc, Filename, isAngled, File, FileType);
1222   }
1223 
1224   // Get the result value.  A result of true means the file exists.
1225   return File.has_value();
1226 }
1227 
1228 /// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression.
1229 /// Returns a filled optional with the value if successful; otherwise, empty.
EvaluateHasEmbed(Token & Tok,IdentifierInfo * II)1230 EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
1231   // These expressions are only allowed within a preprocessor directive.
1232   if (!this->isParsingIfOrElifDirective()) {
1233     Diag(Tok, diag::err_pp_directive_required) << II;
1234     // Return a valid identifier token.
1235     assert(Tok.is(tok::identifier));
1236     Tok.setIdentifierInfo(II);
1237     return EmbedResult::Invalid;
1238   }
1239 
1240   // Ensure we have a '('.
1241   LexUnexpandedToken(Tok);
1242   if (Tok.isNot(tok::l_paren)) {
1243     Diag(Tok, diag::err_pp_expected_after) << II << tok::l_paren;
1244     // If the next token looks like a filename or the start of one,
1245     // assume it is and process it as such.
1246     return EmbedResult::Invalid;
1247   }
1248 
1249   // Save '(' location for possible missing ')' message and then lex the header
1250   // name token for the embed resource.
1251   SourceLocation LParenLoc = Tok.getLocation();
1252   if (this->LexHeaderName(Tok))
1253     return EmbedResult::Invalid;
1254 
1255   if (Tok.isNot(tok::header_name)) {
1256     Diag(Tok.getLocation(), diag::err_pp_expects_filename);
1257     return EmbedResult::Invalid;
1258   }
1259 
1260   SourceLocation FilenameLoc = Tok.getLocation();
1261   Token FilenameTok = Tok;
1262 
1263   std::optional<LexEmbedParametersResult> Params =
1264       this->LexEmbedParameters(Tok, /*ForHasEmbed=*/true);
1265   assert((Params || Tok.is(tok::eod)) &&
1266          "expected success or to be at the end of the directive");
1267 
1268   if (!Params)
1269     return EmbedResult::Invalid;
1270 
1271   if (Params->UnrecognizedParams > 0)
1272     return EmbedResult::NotFound;
1273 
1274   if (!Tok.is(tok::r_paren)) {
1275     Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
1276         << II << tok::r_paren;
1277     Diag(LParenLoc, diag::note_matching) << tok::l_paren;
1278     if (Tok.isNot(tok::eod))
1279       DiscardUntilEndOfDirective();
1280     return EmbedResult::Invalid;
1281   }
1282 
1283   SmallString<128> FilenameBuffer;
1284   StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
1285   bool isAngled =
1286       this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
1287   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
1288   // error.
1289   assert(!Filename.empty());
1290   const FileEntry *LookupFromFile =
1291       this->getCurrentFileLexer() ? *this->getCurrentFileLexer()->getFileEntry()
1292                                   : static_cast<FileEntry *>(nullptr);
1293   OptionalFileEntryRef MaybeFileEntry =
1294       this->LookupEmbedFile(Filename, isAngled, false, LookupFromFile);
1295   if (Callbacks) {
1296     Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry);
1297   }
1298   if (!MaybeFileEntry)
1299     return EmbedResult::NotFound;
1300 
1301   size_t FileSize = MaybeFileEntry->getSize();
1302   // First, "offset" into the file (this reduces the amount of data we can read
1303   // from the file).
1304   if (Params->MaybeOffsetParam) {
1305     if (Params->MaybeOffsetParam->Offset > FileSize)
1306       FileSize = 0;
1307     else
1308       FileSize -= Params->MaybeOffsetParam->Offset;
1309   }
1310 
1311   // Second, limit the data from the file (this also reduces the amount of data
1312   // we can read from the file).
1313   if (Params->MaybeLimitParam) {
1314     if (Params->MaybeLimitParam->Limit > FileSize)
1315       FileSize = 0;
1316     else
1317       FileSize = Params->MaybeLimitParam->Limit;
1318   }
1319 
1320   // If we have no data left to read, the file is empty, otherwise we have the
1321   // expected resource.
1322   if (FileSize == 0)
1323     return EmbedResult::Empty;
1324   return EmbedResult::Found;
1325 }
1326 
EvaluateHasInclude(Token & Tok,IdentifierInfo * II)1327 bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) {
1328   return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr);
1329 }
1330 
EvaluateHasIncludeNext(Token & Tok,IdentifierInfo * II)1331 bool Preprocessor::EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II) {
1332   ConstSearchDirIterator Lookup = nullptr;
1333   const FileEntry *LookupFromFile;
1334   std::tie(Lookup, LookupFromFile) = getIncludeNextStart(Tok);
1335 
1336   return EvaluateHasIncludeCommon(Tok, II, *this, Lookup, LookupFromFile);
1337 }
1338 
1339 /// Process single-argument builtin feature-like macros that return
1340 /// integer values.
EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream & OS,Token & Tok,IdentifierInfo * II,Preprocessor & PP,bool ExpandArgs,llvm::function_ref<int (Token & Tok,bool & HasLexedNextTok)> Op)1341 static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
1342                                             Token &Tok, IdentifierInfo *II,
1343                                             Preprocessor &PP, bool ExpandArgs,
1344                                             llvm::function_ref<
1345                                               int(Token &Tok,
1346                                                   bool &HasLexedNextTok)> Op) {
1347   // Parse the initial '('.
1348   PP.LexUnexpandedToken(Tok);
1349   if (Tok.isNot(tok::l_paren)) {
1350     PP.Diag(Tok.getLocation(), diag::err_pp_expected_after) << II
1351                                                             << tok::l_paren;
1352 
1353     // Provide a dummy '0' value on output stream to elide further errors.
1354     if (!Tok.isOneOf(tok::eof, tok::eod)) {
1355       OS << 0;
1356       Tok.setKind(tok::numeric_constant);
1357     }
1358     return;
1359   }
1360 
1361   unsigned ParenDepth = 1;
1362   SourceLocation LParenLoc = Tok.getLocation();
1363   std::optional<int> Result;
1364 
1365   Token ResultTok;
1366   bool SuppressDiagnostic = false;
1367   while (true) {
1368     // Parse next token.
1369     if (ExpandArgs)
1370       PP.Lex(Tok);
1371     else
1372       PP.LexUnexpandedToken(Tok);
1373 
1374 already_lexed:
1375     switch (Tok.getKind()) {
1376       case tok::eof:
1377       case tok::eod:
1378         // Don't provide even a dummy value if the eod or eof marker is
1379         // reached.  Simply provide a diagnostic.
1380         PP.Diag(Tok.getLocation(), diag::err_unterm_macro_invoc);
1381         return;
1382 
1383       case tok::comma:
1384         if (!SuppressDiagnostic) {
1385           PP.Diag(Tok.getLocation(), diag::err_too_many_args_in_macro_invoc);
1386           SuppressDiagnostic = true;
1387         }
1388         continue;
1389 
1390       case tok::l_paren:
1391         ++ParenDepth;
1392         if (Result)
1393           break;
1394         if (!SuppressDiagnostic) {
1395           PP.Diag(Tok.getLocation(), diag::err_pp_nested_paren) << II;
1396           SuppressDiagnostic = true;
1397         }
1398         continue;
1399 
1400       case tok::r_paren:
1401         if (--ParenDepth > 0)
1402           continue;
1403 
1404         // The last ')' has been reached; return the value if one found or
1405         // a diagnostic and a dummy value.
1406         if (Result) {
1407           OS << *Result;
1408           // For strict conformance to __has_cpp_attribute rules, use 'L'
1409           // suffix for dated literals.
1410           if (*Result > 1)
1411             OS << 'L';
1412         } else {
1413           OS << 0;
1414           if (!SuppressDiagnostic)
1415             PP.Diag(Tok.getLocation(), diag::err_too_few_args_in_macro_invoc);
1416         }
1417         Tok.setKind(tok::numeric_constant);
1418         return;
1419 
1420       default: {
1421         // Parse the macro argument, if one not found so far.
1422         if (Result)
1423           break;
1424 
1425         bool HasLexedNextToken = false;
1426         Result = Op(Tok, HasLexedNextToken);
1427         ResultTok = Tok;
1428         if (HasLexedNextToken)
1429           goto already_lexed;
1430         continue;
1431       }
1432     }
1433 
1434     // Diagnose missing ')'.
1435     if (!SuppressDiagnostic) {
1436       if (auto Diag = PP.Diag(Tok.getLocation(), diag::err_pp_expected_after)) {
1437         if (IdentifierInfo *LastII = ResultTok.getIdentifierInfo())
1438           Diag << LastII;
1439         else
1440           Diag << ResultTok.getKind();
1441         Diag << tok::r_paren << ResultTok.getLocation();
1442       }
1443       PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
1444       SuppressDiagnostic = true;
1445     }
1446   }
1447 }
1448 
1449 /// Helper function to return the IdentifierInfo structure of a Token
1450 /// or generate a diagnostic if none available.
ExpectFeatureIdentifierInfo(Token & Tok,Preprocessor & PP,signed DiagID)1451 static IdentifierInfo *ExpectFeatureIdentifierInfo(Token &Tok,
1452                                                    Preprocessor &PP,
1453                                                    signed DiagID) {
1454   IdentifierInfo *II;
1455   if (!Tok.isAnnotation() && (II = Tok.getIdentifierInfo()))
1456     return II;
1457 
1458   PP.Diag(Tok.getLocation(), DiagID);
1459   return nullptr;
1460 }
1461 
1462 /// Implements the __is_target_arch builtin macro.
isTargetArch(const TargetInfo & TI,const IdentifierInfo * II)1463 static bool isTargetArch(const TargetInfo &TI, const IdentifierInfo *II) {
1464   llvm::Triple Arch(II->getName().lower() + "--");
1465   const llvm::Triple &TT = TI.getTriple();
1466   if (TT.isThumb()) {
1467     // arm matches thumb or thumbv7. armv7 matches thumbv7.
1468     if ((Arch.getSubArch() == llvm::Triple::NoSubArch ||
1469          Arch.getSubArch() == TT.getSubArch()) &&
1470         ((TT.getArch() == llvm::Triple::thumb &&
1471           Arch.getArch() == llvm::Triple::arm) ||
1472          (TT.getArch() == llvm::Triple::thumbeb &&
1473           Arch.getArch() == llvm::Triple::armeb)))
1474       return true;
1475   }
1476   // Check the parsed arch when it has no sub arch to allow Clang to
1477   // match thumb to thumbv7 but to prohibit matching thumbv6 to thumbv7.
1478   return (Arch.getSubArch() == llvm::Triple::NoSubArch ||
1479           Arch.getSubArch() == TT.getSubArch()) &&
1480          Arch.getArch() == TT.getArch();
1481 }
1482 
1483 /// Implements the __is_target_vendor builtin macro.
isTargetVendor(const TargetInfo & TI,const IdentifierInfo * II)1484 static bool isTargetVendor(const TargetInfo &TI, const IdentifierInfo *II) {
1485   StringRef VendorName = TI.getTriple().getVendorName();
1486   if (VendorName.empty())
1487     VendorName = "unknown";
1488   return VendorName.equals_insensitive(II->getName());
1489 }
1490 
1491 /// Implements the __is_target_os builtin macro.
isTargetOS(const TargetInfo & TI,const IdentifierInfo * II)1492 static bool isTargetOS(const TargetInfo &TI, const IdentifierInfo *II) {
1493   llvm::Triple OS(llvm::Twine("unknown-unknown-") + II->getName().lower());
1494   if (OS.getOS() == llvm::Triple::Darwin) {
1495     // Darwin matches macos, ios, etc.
1496     return TI.getTriple().isOSDarwin();
1497   }
1498   return TI.getTriple().getOS() == OS.getOS();
1499 }
1500 
1501 /// Implements the __is_target_environment builtin macro.
isTargetEnvironment(const TargetInfo & TI,const IdentifierInfo * II)1502 static bool isTargetEnvironment(const TargetInfo &TI,
1503                                 const IdentifierInfo *II) {
1504   llvm::Triple Env(llvm::Twine("---") + II->getName().lower());
1505   // The unknown environment is matched only if
1506   // '__is_target_environment(unknown)' is used.
1507   if (Env.getEnvironment() == llvm::Triple::UnknownEnvironment &&
1508       Env.getEnvironmentName() != "unknown")
1509     return false;
1510   return TI.getTriple().getEnvironment() == Env.getEnvironment();
1511 }
1512 
1513 /// Implements the __is_target_variant_os builtin macro.
isTargetVariantOS(const TargetInfo & TI,const IdentifierInfo * II)1514 static bool isTargetVariantOS(const TargetInfo &TI, const IdentifierInfo *II) {
1515   if (TI.getTriple().isOSDarwin()) {
1516     const llvm::Triple *VariantTriple = TI.getDarwinTargetVariantTriple();
1517     if (!VariantTriple)
1518       return false;
1519 
1520     llvm::Triple OS(llvm::Twine("unknown-unknown-") + II->getName().lower());
1521     if (OS.getOS() == llvm::Triple::Darwin) {
1522       // Darwin matches macos, ios, etc.
1523       return VariantTriple->isOSDarwin();
1524     }
1525     return VariantTriple->getOS() == OS.getOS();
1526   }
1527   return false;
1528 }
1529 
1530 /// Implements the __is_target_variant_environment builtin macro.
isTargetVariantEnvironment(const TargetInfo & TI,const IdentifierInfo * II)1531 static bool isTargetVariantEnvironment(const TargetInfo &TI,
1532                                 const IdentifierInfo *II) {
1533   if (TI.getTriple().isOSDarwin()) {
1534     const llvm::Triple *VariantTriple = TI.getDarwinTargetVariantTriple();
1535     if (!VariantTriple)
1536       return false;
1537     llvm::Triple Env(llvm::Twine("---") + II->getName().lower());
1538     return VariantTriple->getEnvironment() == Env.getEnvironment();
1539   }
1540   return false;
1541 }
1542 
1543 #if defined(__sun__) && defined(__svr4__) && defined(__clang__) &&             \
1544     __clang__ < 20
1545 // GCC mangles std::tm as tm for binary compatibility on Solaris (Issue
1546 // #33114).  We need to match this to allow the std::put_time calls to link
1547 // (PR #99075).  clang 20 contains a fix, but the workaround is still needed
1548 // with older versions.
1549 asm("_ZNKSt8time_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE3putES3_"
1550     "RSt8ios_basecPKSt2tmPKcSB_ = "
1551     "_ZNKSt8time_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE3putES3_"
1552     "RSt8ios_basecPK2tmPKcSB_");
1553 #endif
1554 
IsBuiltinTrait(Token & Tok)1555 static bool IsBuiltinTrait(Token &Tok) {
1556 
1557 #define TYPE_TRAIT_1(Spelling, Name, Key)                                      \
1558   case tok::kw_##Spelling:                                                     \
1559     return true;
1560 #define TYPE_TRAIT_2(Spelling, Name, Key)                                      \
1561   case tok::kw_##Spelling:                                                     \
1562     return true;
1563 #define TYPE_TRAIT_N(Spelling, Name, Key)                                      \
1564   case tok::kw_##Spelling:                                                     \
1565     return true;
1566 #define ARRAY_TYPE_TRAIT(Spelling, Name, Key)                                  \
1567   case tok::kw_##Spelling:                                                     \
1568     return true;
1569 #define EXPRESSION_TRAIT(Spelling, Name, Key)                                  \
1570   case tok::kw_##Spelling:                                                     \
1571     return true;
1572 #define TRANSFORM_TYPE_TRAIT_DEF(K, Spelling)                                  \
1573   case tok::kw___##Spelling:                                                   \
1574     return true;
1575 
1576   switch (Tok.getKind()) {
1577   default:
1578     return false;
1579 #include "clang/Basic/TokenKinds.def"
1580   }
1581 }
1582 
1583 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
1584 /// as a builtin macro, handle it and return the next token as 'Tok'.
ExpandBuiltinMacro(Token & Tok)1585 void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
1586   // Figure out which token this is.
1587   IdentifierInfo *II = Tok.getIdentifierInfo();
1588   assert(II && "Can't be a macro without id info!");
1589 
1590   // If this is an _Pragma or Microsoft __pragma directive, expand it,
1591   // invoke the pragma handler, then lex the token after it.
1592   if (II == Ident_Pragma)
1593     return Handle_Pragma(Tok);
1594   else if (II == Ident__pragma) // in non-MS mode this is null
1595     return HandleMicrosoft__pragma(Tok);
1596 
1597   ++NumBuiltinMacroExpanded;
1598 
1599   SmallString<128> TmpBuffer;
1600   llvm::raw_svector_ostream OS(TmpBuffer);
1601 
1602   // Set up the return result.
1603   Tok.setIdentifierInfo(nullptr);
1604   Tok.clearFlag(Token::NeedsCleaning);
1605   bool IsAtStartOfLine = Tok.isAtStartOfLine();
1606   bool HasLeadingSpace = Tok.hasLeadingSpace();
1607 
1608   if (II == Ident__LINE__) {
1609     // C99 6.10.8: "__LINE__: The presumed line number (within the current
1610     // source file) of the current source line (an integer constant)".  This can
1611     // be affected by #line.
1612     SourceLocation Loc = Tok.getLocation();
1613 
1614     // Advance to the location of the first _, this might not be the first byte
1615     // of the token if it starts with an escaped newline.
1616     Loc = AdvanceToTokenCharacter(Loc, 0);
1617 
1618     // One wrinkle here is that GCC expands __LINE__ to location of the *end* of
1619     // a macro expansion.  This doesn't matter for object-like macros, but
1620     // can matter for a function-like macro that expands to contain __LINE__.
1621     // Skip down through expansion points until we find a file loc for the
1622     // end of the expansion history.
1623     Loc = SourceMgr.getExpansionRange(Loc).getEnd();
1624     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
1625 
1626     // __LINE__ expands to a simple numeric value.
1627     OS << (PLoc.isValid()? PLoc.getLine() : 1);
1628     Tok.setKind(tok::numeric_constant);
1629   } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__ ||
1630              II == Ident__FILE_NAME__) {
1631     // C99 6.10.8: "__FILE__: The presumed name of the current source file (a
1632     // character string literal)". This can be affected by #line.
1633     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
1634 
1635     // __BASE_FILE__ is a GNU extension that returns the top of the presumed
1636     // #include stack instead of the current file.
1637     if (II == Ident__BASE_FILE__ && PLoc.isValid()) {
1638       SourceLocation NextLoc = PLoc.getIncludeLoc();
1639       while (NextLoc.isValid()) {
1640         PLoc = SourceMgr.getPresumedLoc(NextLoc);
1641         if (PLoc.isInvalid())
1642           break;
1643 
1644         NextLoc = PLoc.getIncludeLoc();
1645       }
1646     }
1647 
1648     // Escape this filename.  Turn '\' -> '\\' '"' -> '\"'
1649     SmallString<256> FN;
1650     if (PLoc.isValid()) {
1651       // __FILE_NAME__ is a Clang-specific extension that expands to the
1652       // the last part of __FILE__.
1653       if (II == Ident__FILE_NAME__) {
1654         processPathToFileName(FN, PLoc, getLangOpts(), getTargetInfo());
1655       } else {
1656         FN += PLoc.getFilename();
1657         processPathForFileMacro(FN, getLangOpts(), getTargetInfo());
1658       }
1659       Lexer::Stringify(FN);
1660       OS << '"' << FN << '"';
1661     }
1662     Tok.setKind(tok::string_literal);
1663   } else if (II == Ident__DATE__) {
1664     Diag(Tok.getLocation(), diag::warn_pp_date_time);
1665     if (!DATELoc.isValid())
1666       ComputeDATE_TIME(DATELoc, TIMELoc, *this);
1667     Tok.setKind(tok::string_literal);
1668     Tok.setLength(strlen("\"Mmm dd yyyy\""));
1669     Tok.setLocation(SourceMgr.createExpansionLoc(DATELoc, Tok.getLocation(),
1670                                                  Tok.getLocation(),
1671                                                  Tok.getLength()));
1672     return;
1673   } else if (II == Ident__TIME__) {
1674     Diag(Tok.getLocation(), diag::warn_pp_date_time);
1675     if (!TIMELoc.isValid())
1676       ComputeDATE_TIME(DATELoc, TIMELoc, *this);
1677     Tok.setKind(tok::string_literal);
1678     Tok.setLength(strlen("\"hh:mm:ss\""));
1679     Tok.setLocation(SourceMgr.createExpansionLoc(TIMELoc, Tok.getLocation(),
1680                                                  Tok.getLocation(),
1681                                                  Tok.getLength()));
1682     return;
1683   } else if (II == Ident__INCLUDE_LEVEL__) {
1684     // Compute the presumed include depth of this token.  This can be affected
1685     // by GNU line markers.
1686     unsigned Depth = 0;
1687 
1688     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
1689     if (PLoc.isValid()) {
1690       PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
1691       for (; PLoc.isValid(); ++Depth)
1692         PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
1693     }
1694 
1695     // __INCLUDE_LEVEL__ expands to a simple numeric value.
1696     OS << Depth;
1697     Tok.setKind(tok::numeric_constant);
1698   } else if (II == Ident__TIMESTAMP__) {
1699     Diag(Tok.getLocation(), diag::warn_pp_date_time);
1700     // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
1701     // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
1702     std::string Result;
1703     std::stringstream TmpStream;
1704     TmpStream.imbue(std::locale("C"));
1705     if (getPreprocessorOpts().SourceDateEpoch) {
1706       time_t TT = *getPreprocessorOpts().SourceDateEpoch;
1707       std::tm *TM = std::gmtime(&TT);
1708       TmpStream << std::put_time(TM, "%a %b %e %T %Y");
1709     } else {
1710       // Get the file that we are lexing out of.  If we're currently lexing from
1711       // a macro, dig into the include stack.
1712       const FileEntry *CurFile = nullptr;
1713       if (PreprocessorLexer *TheLexer = getCurrentFileLexer())
1714         CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID());
1715       if (CurFile) {
1716         time_t TT = CurFile->getModificationTime();
1717         struct tm *TM = localtime(&TT);
1718         TmpStream << std::put_time(TM, "%a %b %e %T %Y");
1719       }
1720     }
1721     Result = TmpStream.str();
1722     if (Result.empty())
1723       Result = "??? ??? ?? ??:??:?? ????";
1724     OS << '"' << Result << '"';
1725     Tok.setKind(tok::string_literal);
1726   } else if (II == Ident__FLT_EVAL_METHOD__) {
1727     // __FLT_EVAL_METHOD__ is set to the default value.
1728     OS << getTUFPEvalMethod();
1729     // __FLT_EVAL_METHOD__ expands to a simple numeric value.
1730     Tok.setKind(tok::numeric_constant);
1731     if (getLastFPEvalPragmaLocation().isValid()) {
1732       // The program is ill-formed. The value of __FLT_EVAL_METHOD__ is altered
1733       // by the pragma.
1734       Diag(Tok, diag::err_illegal_use_of_flt_eval_macro);
1735       Diag(getLastFPEvalPragmaLocation(), diag::note_pragma_entered_here);
1736     }
1737   } else if (II == Ident__COUNTER__) {
1738     // __COUNTER__ expands to a simple numeric value.
1739     OS << CounterValue++;
1740     Tok.setKind(tok::numeric_constant);
1741   } else if (II == Ident__has_feature) {
1742     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
1743       [this](Token &Tok, bool &HasLexedNextToken) -> int {
1744         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
1745                                            diag::err_feature_check_malformed);
1746         return II && HasFeature(*this, II->getName());
1747       });
1748   } else if (II == Ident__has_extension) {
1749     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
1750       [this](Token &Tok, bool &HasLexedNextToken) -> int {
1751         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
1752                                            diag::err_feature_check_malformed);
1753         return II && HasExtension(*this, II->getName());
1754       });
1755   } else if (II == Ident__has_builtin) {
1756     EvaluateFeatureLikeBuiltinMacro(
1757         OS, Tok, II, *this, false,
1758         [this](Token &Tok, bool &HasLexedNextToken) -> int {
1759           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
1760               Tok, *this, diag::err_feature_check_malformed);
1761           if (!II)
1762             return false;
1763           else if (II->getBuiltinID() != 0) {
1764             switch (II->getBuiltinID()) {
1765             case Builtin::BI__builtin_cpu_is:
1766               return getTargetInfo().supportsCpuIs();
1767             case Builtin::BI__builtin_cpu_init:
1768               return getTargetInfo().supportsCpuInit();
1769             case Builtin::BI__builtin_cpu_supports:
1770               return getTargetInfo().supportsCpuSupports();
1771             case Builtin::BI__builtin_operator_new:
1772             case Builtin::BI__builtin_operator_delete:
1773               // denotes date of behavior change to support calling arbitrary
1774               // usual allocation and deallocation functions. Required by libc++
1775               return 201802;
1776             default:
1777               return Builtin::evaluateRequiredTargetFeatures(
1778                   getBuiltinInfo().getRequiredFeatures(II->getBuiltinID()),
1779                   getTargetInfo().getTargetOpts().FeatureMap);
1780             }
1781             return true;
1782           } else if (IsBuiltinTrait(Tok)) {
1783             return true;
1784           } else if (II->getTokenID() != tok::identifier &&
1785                      II->getName().starts_with("__builtin_")) {
1786             return true;
1787           } else {
1788             return llvm::StringSwitch<bool>(II->getName())
1789         // Report builtin templates as being builtins.
1790 #define BuiltinTemplate(BTName) .Case(#BTName, getLangOpts().CPlusPlus)
1791 #include "clang/Basic/BuiltinTemplates.inc"
1792                 // Likewise for some builtin preprocessor macros.
1793                 // FIXME: This is inconsistent; we usually suggest detecting
1794                 // builtin macros via #ifdef. Don't add more cases here.
1795                 .Case("__is_target_arch", true)
1796                 .Case("__is_target_vendor", true)
1797                 .Case("__is_target_os", true)
1798                 .Case("__is_target_environment", true)
1799                 .Case("__is_target_variant_os", true)
1800                 .Case("__is_target_variant_environment", true)
1801                 .Default(false);
1802           }
1803         });
1804   } else if (II == Ident__has_constexpr_builtin) {
1805     EvaluateFeatureLikeBuiltinMacro(
1806         OS, Tok, II, *this, false,
1807         [this](Token &Tok, bool &HasLexedNextToken) -> int {
1808           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
1809               Tok, *this, diag::err_feature_check_malformed);
1810           if (!II)
1811             return false;
1812           unsigned BuiltinOp = II->getBuiltinID();
1813           return BuiltinOp != 0 &&
1814                  this->getBuiltinInfo().isConstantEvaluated(BuiltinOp);
1815         });
1816   } else if (II == Ident__is_identifier) {
1817     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
1818       [](Token &Tok, bool &HasLexedNextToken) -> int {
1819         return Tok.is(tok::identifier);
1820       });
1821   } else if (II == Ident__has_attribute) {
1822     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
1823       [this](Token &Tok, bool &HasLexedNextToken) -> int {
1824         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
1825                                            diag::err_feature_check_malformed);
1826         return II ? hasAttribute(AttributeCommonInfo::Syntax::AS_GNU, nullptr,
1827                                  II, getTargetInfo(), getLangOpts())
1828                   : 0;
1829       });
1830   } else if (II == Ident__has_declspec) {
1831     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
1832       [this](Token &Tok, bool &HasLexedNextToken) -> int {
1833         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
1834                                            diag::err_feature_check_malformed);
1835         if (II) {
1836           const LangOptions &LangOpts = getLangOpts();
1837           return LangOpts.DeclSpecKeyword &&
1838                  hasAttribute(AttributeCommonInfo::Syntax::AS_Declspec, nullptr,
1839                               II, getTargetInfo(), LangOpts);
1840         }
1841 
1842         return false;
1843       });
1844   } else if (II == Ident__has_cpp_attribute ||
1845              II == Ident__has_c_attribute) {
1846     bool IsCXX = II == Ident__has_cpp_attribute;
1847     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
1848         [&](Token &Tok, bool &HasLexedNextToken) -> int {
1849           IdentifierInfo *ScopeII = nullptr;
1850           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
1851               Tok, *this, diag::err_feature_check_malformed);
1852           if (!II)
1853             return false;
1854 
1855           // It is possible to receive a scope token.  Read the "::", if it is
1856           // available, and the subsequent identifier.
1857           LexUnexpandedToken(Tok);
1858           if (Tok.isNot(tok::coloncolon))
1859             HasLexedNextToken = true;
1860           else {
1861             ScopeII = II;
1862             // Lex an expanded token for the attribute name.
1863             Lex(Tok);
1864             II = ExpectFeatureIdentifierInfo(Tok, *this,
1865                                              diag::err_feature_check_malformed);
1866           }
1867 
1868           AttributeCommonInfo::Syntax Syntax =
1869               IsCXX ? AttributeCommonInfo::Syntax::AS_CXX11
1870                     : AttributeCommonInfo::Syntax::AS_C23;
1871           return II ? hasAttribute(Syntax, ScopeII, II, getTargetInfo(),
1872                                    getLangOpts())
1873                     : 0;
1874         });
1875   } else if (II == Ident__has_include ||
1876              II == Ident__has_include_next) {
1877     // The argument to these two builtins should be a parenthesized
1878     // file name string literal using angle brackets (<>) or
1879     // double-quotes ("").
1880     bool Value;
1881     if (II == Ident__has_include)
1882       Value = EvaluateHasInclude(Tok, II);
1883     else
1884       Value = EvaluateHasIncludeNext(Tok, II);
1885 
1886     if (Tok.isNot(tok::r_paren))
1887       return;
1888     OS << (int)Value;
1889     Tok.setKind(tok::numeric_constant);
1890   } else if (II == Ident__has_embed) {
1891     // The argument to these two builtins should be a parenthesized
1892     // file name string literal using angle brackets (<>) or
1893     // double-quotes (""), optionally followed by a series of
1894     // arguments similar to form like attributes.
1895     EmbedResult Value = EvaluateHasEmbed(Tok, II);
1896     if (Value == EmbedResult::Invalid)
1897       return;
1898 
1899     Tok.setKind(tok::numeric_constant);
1900     OS << static_cast<int>(Value);
1901   } else if (II == Ident__has_warning) {
1902     // The argument should be a parenthesized string literal.
1903     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
1904       [this](Token &Tok, bool &HasLexedNextToken) -> int {
1905         std::string WarningName;
1906         SourceLocation StrStartLoc = Tok.getLocation();
1907 
1908         HasLexedNextToken = Tok.is(tok::string_literal);
1909         if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
1910                                     /*AllowMacroExpansion=*/false))
1911           return false;
1912 
1913         // FIXME: Should we accept "-R..." flags here, or should that be
1914         // handled by a separate __has_remark?
1915         if (WarningName.size() < 3 || WarningName[0] != '-' ||
1916             WarningName[1] != 'W') {
1917           Diag(StrStartLoc, diag::warn_has_warning_invalid_option);
1918           return false;
1919         }
1920 
1921         // Finally, check if the warning flags maps to a diagnostic group.
1922         // We construct a SmallVector here to talk to getDiagnosticIDs().
1923         // Although we don't use the result, this isn't a hot path, and not
1924         // worth special casing.
1925         SmallVector<diag::kind, 10> Diags;
1926         return !getDiagnostics().getDiagnosticIDs()->
1927                 getDiagnosticsInGroup(diag::Flavor::WarningOrError,
1928                                       WarningName.substr(2), Diags);
1929       });
1930   } else if (II == Ident__building_module) {
1931     // The argument to this builtin should be an identifier. The
1932     // builtin evaluates to 1 when that identifier names the module we are
1933     // currently building.
1934     EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
1935       [this](Token &Tok, bool &HasLexedNextToken) -> int {
1936         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
1937                                        diag::err_expected_id_building_module);
1938         return getLangOpts().isCompilingModule() && II &&
1939                (II->getName() == getLangOpts().CurrentModule);
1940       });
1941   } else if (II == Ident__MODULE__) {
1942     // The current module as an identifier.
1943     OS << getLangOpts().CurrentModule;
1944     IdentifierInfo *ModuleII = getIdentifierInfo(getLangOpts().CurrentModule);
1945     Tok.setIdentifierInfo(ModuleII);
1946     Tok.setKind(ModuleII->getTokenID());
1947   } else if (II == Ident__identifier) {
1948     SourceLocation Loc = Tok.getLocation();
1949 
1950     // We're expecting '__identifier' '(' identifier ')'. Try to recover
1951     // if the parens are missing.
1952     LexNonComment(Tok);
1953     if (Tok.isNot(tok::l_paren)) {
1954       // No '(', use end of last token.
1955       Diag(getLocForEndOfToken(Loc), diag::err_pp_expected_after)
1956         << II << tok::l_paren;
1957       // If the next token isn't valid as our argument, we can't recover.
1958       if (!Tok.isAnnotation() && Tok.getIdentifierInfo())
1959         Tok.setKind(tok::identifier);
1960       return;
1961     }
1962 
1963     SourceLocation LParenLoc = Tok.getLocation();
1964     LexNonComment(Tok);
1965 
1966     if (!Tok.isAnnotation() && Tok.getIdentifierInfo())
1967       Tok.setKind(tok::identifier);
1968     else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) {
1969       StringLiteralParser Literal(Tok, *this,
1970                                   StringLiteralEvalMethod::Unevaluated);
1971       if (Literal.hadError)
1972         return;
1973 
1974       Tok.setIdentifierInfo(getIdentifierInfo(Literal.GetString()));
1975       Tok.setKind(tok::identifier);
1976     } else {
1977       Diag(Tok.getLocation(), diag::err_pp_identifier_arg_not_identifier)
1978         << Tok.getKind();
1979       // Don't walk past anything that's not a real token.
1980       if (Tok.isOneOf(tok::eof, tok::eod) || Tok.isAnnotation())
1981         return;
1982     }
1983 
1984     // Discard the ')', preserving 'Tok' as our result.
1985     Token RParen;
1986     LexNonComment(RParen);
1987     if (RParen.isNot(tok::r_paren)) {
1988       Diag(getLocForEndOfToken(Tok.getLocation()), diag::err_pp_expected_after)
1989         << Tok.getKind() << tok::r_paren;
1990       Diag(LParenLoc, diag::note_matching) << tok::l_paren;
1991     }
1992     return;
1993   } else if (II == Ident__is_target_arch) {
1994     EvaluateFeatureLikeBuiltinMacro(
1995         OS, Tok, II, *this, false,
1996         [this](Token &Tok, bool &HasLexedNextToken) -> int {
1997           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
1998               Tok, *this, diag::err_feature_check_malformed);
1999           return II && isTargetArch(getTargetInfo(), II);
2000         });
2001   } else if (II == Ident__is_target_vendor) {
2002     EvaluateFeatureLikeBuiltinMacro(
2003         OS, Tok, II, *this, false,
2004         [this](Token &Tok, bool &HasLexedNextToken) -> int {
2005           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
2006               Tok, *this, diag::err_feature_check_malformed);
2007           return II && isTargetVendor(getTargetInfo(), II);
2008         });
2009   } else if (II == Ident__is_target_os) {
2010     EvaluateFeatureLikeBuiltinMacro(
2011         OS, Tok, II, *this, false,
2012         [this](Token &Tok, bool &HasLexedNextToken) -> int {
2013           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
2014               Tok, *this, diag::err_feature_check_malformed);
2015           return II && isTargetOS(getTargetInfo(), II);
2016         });
2017   } else if (II == Ident__is_target_environment) {
2018     EvaluateFeatureLikeBuiltinMacro(
2019         OS, Tok, II, *this, false,
2020         [this](Token &Tok, bool &HasLexedNextToken) -> int {
2021           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
2022               Tok, *this, diag::err_feature_check_malformed);
2023           return II && isTargetEnvironment(getTargetInfo(), II);
2024         });
2025   } else if (II == Ident__is_target_variant_os) {
2026     EvaluateFeatureLikeBuiltinMacro(
2027         OS, Tok, II, *this, false,
2028         [this](Token &Tok, bool &HasLexedNextToken) -> int {
2029           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
2030               Tok, *this, diag::err_feature_check_malformed);
2031           return II && isTargetVariantOS(getTargetInfo(), II);
2032         });
2033   } else if (II == Ident__is_target_variant_environment) {
2034     EvaluateFeatureLikeBuiltinMacro(
2035         OS, Tok, II, *this, false,
2036         [this](Token &Tok, bool &HasLexedNextToken) -> int {
2037           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
2038               Tok, *this, diag::err_feature_check_malformed);
2039           return II && isTargetVariantEnvironment(getTargetInfo(), II);
2040         });
2041   } else {
2042     llvm_unreachable("Unknown identifier!");
2043   }
2044   CreateString(OS.str(), Tok, Tok.getLocation(), Tok.getLocation());
2045   Tok.setFlagValue(Token::StartOfLine, IsAtStartOfLine);
2046   Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
2047   Tok.clearFlag(Token::NeedsCleaning);
2048 }
2049 
markMacroAsUsed(MacroInfo * MI)2050 void Preprocessor::markMacroAsUsed(MacroInfo *MI) {
2051   // If the 'used' status changed, and the macro requires 'unused' warning,
2052   // remove its SourceLocation from the warn-for-unused-macro locations.
2053   if (MI->isWarnIfUnused() && !MI->isUsed())
2054     WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
2055   MI->setIsUsed(true);
2056 }
2057 
processPathForFileMacro(SmallVectorImpl<char> & Path,const LangOptions & LangOpts,const TargetInfo & TI)2058 void Preprocessor::processPathForFileMacro(SmallVectorImpl<char> &Path,
2059                                            const LangOptions &LangOpts,
2060                                            const TargetInfo &TI) {
2061   LangOpts.remapPathPrefix(Path);
2062   if (LangOpts.UseTargetPathSeparator) {
2063     if (TI.getTriple().isOSWindows())
2064       llvm::sys::path::remove_dots(Path, false,
2065                                    llvm::sys::path::Style::windows_backslash);
2066     else
2067       llvm::sys::path::remove_dots(Path, false, llvm::sys::path::Style::posix);
2068   }
2069 }
2070 
processPathToFileName(SmallVectorImpl<char> & FileName,const PresumedLoc & PLoc,const LangOptions & LangOpts,const TargetInfo & TI)2071 void Preprocessor::processPathToFileName(SmallVectorImpl<char> &FileName,
2072                                          const PresumedLoc &PLoc,
2073                                          const LangOptions &LangOpts,
2074                                          const TargetInfo &TI) {
2075   // Try to get the last path component, failing that return the original
2076   // presumed location.
2077   StringRef PLFileName = llvm::sys::path::filename(PLoc.getFilename());
2078   if (PLFileName.empty())
2079     PLFileName = PLoc.getFilename();
2080   FileName.append(PLFileName.begin(), PLFileName.end());
2081   processPathForFileMacro(FileName, LangOpts, TI);
2082 }
2083