xref: /freebsd/contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/Module.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/HeaderSearch.h"
27 #include "clang/Lex/Lexer.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/PPEmbedParameters.h"
33 #include "clang/Lex/Token.h"
34 #include "clang/Lex/TokenLexer.h"
35 #include "llvm/ADT/APSInt.h"
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/FoldingSet.h"
39 #include "llvm/ADT/FunctionExtras.h"
40 #include "llvm/ADT/PointerUnion.h"
41 #include "llvm/ADT/STLExtras.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/ADT/TinyPtrVector.h"
46 #include "llvm/ADT/iterator_range.h"
47 #include "llvm/Support/Allocator.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Registry.h"
50 #include <cassert>
51 #include <cstddef>
52 #include <cstdint>
53 #include <map>
54 #include <memory>
55 #include <optional>
56 #include <string>
57 #include <utility>
58 #include <vector>
59 
60 namespace llvm {
61 
62 template<unsigned InternalLen> class SmallString;
63 
64 } // namespace llvm
65 
66 namespace clang {
67 
68 class CodeCompletionHandler;
69 class CommentHandler;
70 class DirectoryEntry;
71 class EmptylineHandler;
72 class ExternalPreprocessorSource;
73 class FileEntry;
74 class FileManager;
75 class HeaderSearch;
76 class MacroArgs;
77 class PragmaHandler;
78 class PragmaNamespace;
79 class PreprocessingRecord;
80 class PreprocessorLexer;
81 class PreprocessorOptions;
82 class ScratchBuffer;
83 class TargetInfo;
84 
85 namespace Builtin {
86 class Context;
87 }
88 
89 /// Stores token information for comparing actual tokens with
90 /// predefined values.  Only handles simple tokens and identifiers.
91 class TokenValue {
92   tok::TokenKind Kind;
93   IdentifierInfo *II;
94 
95 public:
TokenValue(tok::TokenKind Kind)96   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
97     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
98     assert(Kind != tok::identifier &&
99            "Identifiers should be created by TokenValue(IdentifierInfo *)");
100     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
101     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
102   }
103 
TokenValue(IdentifierInfo * II)104   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
105 
106   bool operator==(const Token &Tok) const {
107     return Tok.getKind() == Kind &&
108         (!II || II == Tok.getIdentifierInfo());
109   }
110 };
111 
112 /// Context in which macro name is used.
113 enum MacroUse {
114   // other than #define or #undef
115   MU_Other  = 0,
116 
117   // macro name specified in #define
118   MU_Define = 1,
119 
120   // macro name specified in #undef
121   MU_Undef  = 2
122 };
123 
124 enum class EmbedResult {
125   Invalid = -1, // Parsing error occurred.
126   NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
127   Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
128   Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
129 };
130 
131 /// Engages in a tight little dance with the lexer to efficiently
132 /// preprocess tokens.
133 ///
134 /// Lexers know only about tokens within a single source file, and don't
135 /// know anything about preprocessor-level issues like the \#include stack,
136 /// token expansion, etc.
137 class Preprocessor {
138   friend class VAOptDefinitionContext;
139   friend class VariadicMacroScopeGuard;
140 
141   llvm::unique_function<void(const clang::Token &)> OnToken;
142   std::shared_ptr<PreprocessorOptions> PPOpts;
143   DiagnosticsEngine        *Diags;
144   const LangOptions &LangOpts;
145   const TargetInfo *Target = nullptr;
146   const TargetInfo *AuxTarget = nullptr;
147   FileManager       &FileMgr;
148   SourceManager     &SourceMgr;
149   std::unique_ptr<ScratchBuffer> ScratchBuf;
150   HeaderSearch      &HeaderInfo;
151   ModuleLoader      &TheModuleLoader;
152 
153   /// External source of macros.
154   ExternalPreprocessorSource *ExternalSource;
155 
156   /// A BumpPtrAllocator object used to quickly allocate and release
157   /// objects internal to the Preprocessor.
158   llvm::BumpPtrAllocator BP;
159 
160   /// Identifiers for builtin macros and other builtins.
161   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
162   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
163   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
164   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
165   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
166   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
167   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
168   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
169   IdentifierInfo *Ident__identifier;               // __identifier
170   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
171   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
172   IdentifierInfo *Ident__has_feature;              // __has_feature
173   IdentifierInfo *Ident__has_extension;            // __has_extension
174   IdentifierInfo *Ident__has_builtin;              // __has_builtin
175   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
176   IdentifierInfo *Ident__has_attribute;            // __has_attribute
177   IdentifierInfo *Ident__has_embed;                // __has_embed
178   IdentifierInfo *Ident__has_include;              // __has_include
179   IdentifierInfo *Ident__has_include_next;         // __has_include_next
180   IdentifierInfo *Ident__has_warning;              // __has_warning
181   IdentifierInfo *Ident__is_identifier;            // __is_identifier
182   IdentifierInfo *Ident__building_module;          // __building_module
183   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
184   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
185   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
186   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
187   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
188   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
189   IdentifierInfo *Ident__is_target_os;             // __is_target_os
190   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
191   IdentifierInfo *Ident__is_target_variant_os;
192   IdentifierInfo *Ident__is_target_variant_environment;
193   IdentifierInfo *Ident__FLT_EVAL_METHOD__;        // __FLT_EVAL_METHOD
194 
195   // Weak, only valid (and set) while InMacroArgs is true.
196   Token* ArgMacro;
197 
198   SourceLocation DATELoc, TIMELoc;
199 
200   // FEM_UnsetOnCommandLine means that an explicit evaluation method was
201   // not specified on the command line. The target is queried to set the
202   // default evaluation method.
203   LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
204       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
205 
206   // The most recent pragma location where the floating point evaluation
207   // method was modified. This is used to determine whether the
208   // 'pragma clang fp eval_method' was used whithin the current scope.
209   SourceLocation LastFPEvalPragmaLocation;
210 
211   LangOptions::FPEvalMethodKind TUFPEvalMethod =
212       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
213 
214   // Next __COUNTER__ value, starts at 0.
215   unsigned CounterValue = 0;
216 
217   enum {
218     /// Maximum depth of \#includes.
219     MaxAllowedIncludeStackDepth = 200
220   };
221 
222   // State that is set before the preprocessor begins.
223   bool KeepComments : 1;
224   bool KeepMacroComments : 1;
225   bool SuppressIncludeNotFoundError : 1;
226 
227   // State that changes while the preprocessor runs:
228   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
229 
230   /// Whether the preprocessor owns the header search object.
231   bool OwnsHeaderSearch : 1;
232 
233   /// True if macro expansion is disabled.
234   bool DisableMacroExpansion : 1;
235 
236   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
237   /// when parsing preprocessor directives.
238   bool MacroExpansionInDirectivesOverride : 1;
239 
240   class ResetMacroExpansionHelper;
241 
242   /// Whether we have already loaded macros from the external source.
243   mutable bool ReadMacrosFromExternalSource : 1;
244 
245   /// True if pragmas are enabled.
246   bool PragmasEnabled : 1;
247 
248   /// True if the current build action is a preprocessing action.
249   bool PreprocessedOutput : 1;
250 
251   /// True if we are currently preprocessing a #if or #elif directive
252   bool ParsingIfOrElifDirective;
253 
254   /// True if we are pre-expanding macro arguments.
255   bool InMacroArgPreExpansion;
256 
257   /// Mapping/lookup information for all identifiers in
258   /// the program, including program keywords.
259   mutable IdentifierTable Identifiers;
260 
261   /// This table contains all the selectors in the program.
262   ///
263   /// Unlike IdentifierTable above, this table *isn't* populated by the
264   /// preprocessor. It is declared/expanded here because its role/lifetime is
265   /// conceptually similar to the IdentifierTable. In addition, the current
266   /// control flow (in clang::ParseAST()), make it convenient to put here.
267   ///
268   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
269   /// the lifetime of the preprocessor.
270   SelectorTable Selectors;
271 
272   /// Information about builtins.
273   std::unique_ptr<Builtin::Context> BuiltinInfo;
274 
275   /// Tracks all of the pragmas that the client registered
276   /// with this preprocessor.
277   std::unique_ptr<PragmaNamespace> PragmaHandlers;
278 
279   /// Pragma handlers of the original source is stored here during the
280   /// parsing of a model file.
281   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
282 
283   /// Tracks all of the comment handlers that the client registered
284   /// with this preprocessor.
285   std::vector<CommentHandler *> CommentHandlers;
286 
287   /// Empty line handler.
288   EmptylineHandler *Emptyline = nullptr;
289 
290   /// True to avoid tearing down the lexer etc on EOF
291   bool IncrementalProcessing = false;
292 
293 public:
294   /// The kind of translation unit we are processing.
295   const TranslationUnitKind TUKind;
296 
297   /// Returns a pointer into the given file's buffer that's guaranteed
298   /// to be between tokens. The returned pointer is always before \p Start.
299   /// The maximum distance betweenthe returned pointer and \p Start is
300   /// limited by a constant value, but also an implementation detail.
301   /// If no such check point exists, \c nullptr is returned.
302   const char *getCheckPoint(FileID FID, const char *Start) const;
303 
304 private:
305   /// The code-completion handler.
306   CodeCompletionHandler *CodeComplete = nullptr;
307 
308   /// The file that we're performing code-completion for, if any.
309   const FileEntry *CodeCompletionFile = nullptr;
310 
311   /// The offset in file for the code-completion point.
312   unsigned CodeCompletionOffset = 0;
313 
314   /// The location for the code-completion point. This gets instantiated
315   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
316   SourceLocation CodeCompletionLoc;
317 
318   /// The start location for the file of the code-completion point.
319   ///
320   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
321   /// for preprocessing.
322   SourceLocation CodeCompletionFileLoc;
323 
324   /// The source location of the \c import contextual keyword we just
325   /// lexed, if any.
326   SourceLocation ModuleImportLoc;
327 
328   /// The import path for named module that we're currently processing.
329   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
330 
331   llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
332   unsigned CheckPointCounter = 0;
333 
334   /// Whether the import is an `@import` or a standard c++ modules import.
335   bool IsAtImport = false;
336 
337   /// Whether the last token we lexed was an '@'.
338   bool LastTokenWasAt = false;
339 
340   /// A position within a C++20 import-seq.
341   class StdCXXImportSeq {
342   public:
343     enum State : int {
344       // Positive values represent a number of unclosed brackets.
345       AtTopLevel = 0,
346       AfterTopLevelTokenSeq = -1,
347       AfterExport = -2,
348       AfterImportSeq = -3,
349     };
350 
StdCXXImportSeq(State S)351     StdCXXImportSeq(State S) : S(S) {}
352 
353     /// Saw any kind of open bracket.
handleOpenBracket()354     void handleOpenBracket() {
355       S = static_cast<State>(std::max<int>(S, 0) + 1);
356     }
357     /// Saw any kind of close bracket other than '}'.
handleCloseBracket()358     void handleCloseBracket() {
359       S = static_cast<State>(std::max<int>(S, 1) - 1);
360     }
361     /// Saw a close brace.
handleCloseBrace()362     void handleCloseBrace() {
363       handleCloseBracket();
364       if (S == AtTopLevel && !AfterHeaderName)
365         S = AfterTopLevelTokenSeq;
366     }
367     /// Saw a semicolon.
handleSemi()368     void handleSemi() {
369       if (atTopLevel()) {
370         S = AfterTopLevelTokenSeq;
371         AfterHeaderName = false;
372       }
373     }
374 
375     /// Saw an 'export' identifier.
handleExport()376     void handleExport() {
377       if (S == AfterTopLevelTokenSeq)
378         S = AfterExport;
379       else if (S <= 0)
380         S = AtTopLevel;
381     }
382     /// Saw an 'import' identifier.
handleImport()383     void handleImport() {
384       if (S == AfterTopLevelTokenSeq || S == AfterExport)
385         S = AfterImportSeq;
386       else if (S <= 0)
387         S = AtTopLevel;
388     }
389 
390     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
391     /// until we reach a top-level semicolon.
handleHeaderName()392     void handleHeaderName() {
393       if (S == AfterImportSeq)
394         AfterHeaderName = true;
395       handleMisc();
396     }
397 
398     /// Saw any other token.
handleMisc()399     void handleMisc() {
400       if (S <= 0)
401         S = AtTopLevel;
402     }
403 
atTopLevel()404     bool atTopLevel() { return S <= 0; }
afterImportSeq()405     bool afterImportSeq() { return S == AfterImportSeq; }
afterTopLevelSeq()406     bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
407 
408   private:
409     State S;
410     /// Whether we're in the pp-import-suffix following the header-name in a
411     /// pp-import. If so, a close-brace is not sufficient to end the
412     /// top-level-token-seq of an import-seq.
413     bool AfterHeaderName = false;
414   };
415 
416   /// Our current position within a C++20 import-seq.
417   StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
418 
419   /// Track whether we are in a Global Module Fragment
420   class TrackGMF {
421   public:
422     enum GMFState : int {
423       GMFActive = 1,
424       MaybeGMF = 0,
425       BeforeGMFIntroducer = -1,
426       GMFAbsentOrEnded = -2,
427     };
428 
TrackGMF(GMFState S)429     TrackGMF(GMFState S) : S(S) {}
430 
431     /// Saw a semicolon.
handleSemi()432     void handleSemi() {
433       // If it is immediately after the first instance of the module keyword,
434       // then that introduces the GMF.
435       if (S == MaybeGMF)
436         S = GMFActive;
437     }
438 
439     /// Saw an 'export' identifier.
handleExport()440     void handleExport() {
441       // The presence of an 'export' keyword always ends or excludes a GMF.
442       S = GMFAbsentOrEnded;
443     }
444 
445     /// Saw an 'import' identifier.
handleImport(bool AfterTopLevelTokenSeq)446     void handleImport(bool AfterTopLevelTokenSeq) {
447       // If we see this before any 'module' kw, then we have no GMF.
448       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
449         S = GMFAbsentOrEnded;
450     }
451 
452     /// Saw a 'module' identifier.
handleModule(bool AfterTopLevelTokenSeq)453     void handleModule(bool AfterTopLevelTokenSeq) {
454       // This was the first module identifier and not preceded by any token
455       // that would exclude a GMF.  It could begin a GMF, but only if directly
456       // followed by a semicolon.
457       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
458         S = MaybeGMF;
459       else
460         S = GMFAbsentOrEnded;
461     }
462 
463     /// Saw any other token.
handleMisc()464     void handleMisc() {
465       // We saw something other than ; after the 'module' kw, so not a GMF.
466       if (S == MaybeGMF)
467         S = GMFAbsentOrEnded;
468     }
469 
inGMF()470     bool inGMF() { return S == GMFActive; }
471 
472   private:
473     /// Track the transitions into and out of a Global Module Fragment,
474     /// if one is present.
475     GMFState S;
476   };
477 
478   TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
479 
480   /// Track the status of the c++20 module decl.
481   ///
482   ///   module-declaration:
483   ///     'export'[opt] 'module' module-name module-partition[opt]
484   ///     attribute-specifier-seq[opt] ';'
485   ///
486   ///   module-name:
487   ///     module-name-qualifier[opt] identifier
488   ///
489   ///   module-partition:
490   ///     ':' module-name-qualifier[opt] identifier
491   ///
492   ///   module-name-qualifier:
493   ///     identifier '.'
494   ///     module-name-qualifier identifier '.'
495   ///
496   /// Transition state:
497   ///
498   ///   NotAModuleDecl --- export ---> FoundExport
499   ///   NotAModuleDecl --- module ---> ImplementationCandidate
500   ///   FoundExport --- module ---> InterfaceCandidate
501   ///   ImplementationCandidate --- Identifier ---> ImplementationCandidate
502   ///   ImplementationCandidate --- period ---> ImplementationCandidate
503   ///   ImplementationCandidate --- colon ---> ImplementationCandidate
504   ///   InterfaceCandidate --- Identifier ---> InterfaceCandidate
505   ///   InterfaceCandidate --- period ---> InterfaceCandidate
506   ///   InterfaceCandidate --- colon ---> InterfaceCandidate
507   ///   ImplementationCandidate --- Semi ---> NamedModuleImplementation
508   ///   NamedModuleInterface --- Semi ---> NamedModuleInterface
509   ///   NamedModuleImplementation --- Anything ---> NamedModuleImplementation
510   ///   NamedModuleInterface --- Anything ---> NamedModuleInterface
511   ///
512   /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
513   /// soon since we don't support any module attributes yet.
514   class ModuleDeclSeq {
515     enum ModuleDeclState : int {
516       NotAModuleDecl,
517       FoundExport,
518       InterfaceCandidate,
519       ImplementationCandidate,
520       NamedModuleInterface,
521       NamedModuleImplementation,
522     };
523 
524   public:
525     ModuleDeclSeq() = default;
526 
handleExport()527     void handleExport() {
528       if (State == NotAModuleDecl)
529         State = FoundExport;
530       else if (!isNamedModule())
531         reset();
532     }
533 
handleModule()534     void handleModule() {
535       if (State == FoundExport)
536         State = InterfaceCandidate;
537       else if (State == NotAModuleDecl)
538         State = ImplementationCandidate;
539       else if (!isNamedModule())
540         reset();
541     }
542 
handleIdentifier(IdentifierInfo * Identifier)543     void handleIdentifier(IdentifierInfo *Identifier) {
544       if (isModuleCandidate() && Identifier)
545         Name += Identifier->getName().str();
546       else if (!isNamedModule())
547         reset();
548     }
549 
handleColon()550     void handleColon() {
551       if (isModuleCandidate())
552         Name += ":";
553       else if (!isNamedModule())
554         reset();
555     }
556 
handlePeriod()557     void handlePeriod() {
558       if (isModuleCandidate())
559         Name += ".";
560       else if (!isNamedModule())
561         reset();
562     }
563 
handleSemi()564     void handleSemi() {
565       if (!Name.empty() && isModuleCandidate()) {
566         if (State == InterfaceCandidate)
567           State = NamedModuleInterface;
568         else if (State == ImplementationCandidate)
569           State = NamedModuleImplementation;
570         else
571           llvm_unreachable("Unimaged ModuleDeclState.");
572       } else if (!isNamedModule())
573         reset();
574     }
575 
handleMisc()576     void handleMisc() {
577       if (!isNamedModule())
578         reset();
579     }
580 
isModuleCandidate()581     bool isModuleCandidate() const {
582       return State == InterfaceCandidate || State == ImplementationCandidate;
583     }
584 
isNamedModule()585     bool isNamedModule() const {
586       return State == NamedModuleInterface ||
587              State == NamedModuleImplementation;
588     }
589 
isNamedInterface()590     bool isNamedInterface() const { return State == NamedModuleInterface; }
591 
isImplementationUnit()592     bool isImplementationUnit() const {
593       return State == NamedModuleImplementation && !getName().contains(':');
594     }
595 
getName()596     StringRef getName() const {
597       assert(isNamedModule() && "Can't get name from a non named module");
598       return Name;
599     }
600 
getPrimaryName()601     StringRef getPrimaryName() const {
602       assert(isNamedModule() && "Can't get name from a non named module");
603       return getName().split(':').first;
604     }
605 
reset()606     void reset() {
607       Name.clear();
608       State = NotAModuleDecl;
609     }
610 
611   private:
612     ModuleDeclState State = NotAModuleDecl;
613     std::string Name;
614   };
615 
616   ModuleDeclSeq ModuleDeclState;
617 
618   /// Whether the module import expects an identifier next. Otherwise,
619   /// it expects a '.' or ';'.
620   bool ModuleImportExpectsIdentifier = false;
621 
622   /// The identifier and source location of the currently-active
623   /// \#pragma clang arc_cf_code_audited begin.
624   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
625 
626   /// The source location of the currently-active
627   /// \#pragma clang assume_nonnull begin.
628   SourceLocation PragmaAssumeNonNullLoc;
629 
630   /// Set only for preambles which end with an active
631   /// \#pragma clang assume_nonnull begin.
632   ///
633   /// When the preamble is loaded into the main file,
634   /// `PragmaAssumeNonNullLoc` will be set to this to
635   /// replay the unterminated assume_nonnull.
636   SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
637 
638   /// True if we hit the code-completion point.
639   bool CodeCompletionReached = false;
640 
641   /// The code completion token containing the information
642   /// on the stem that is to be code completed.
643   IdentifierInfo *CodeCompletionII = nullptr;
644 
645   /// Range for the code completion token.
646   SourceRange CodeCompletionTokenRange;
647 
648   /// The directory that the main file should be considered to occupy,
649   /// if it does not correspond to a real file (as happens when building a
650   /// module).
651   OptionalDirectoryEntryRef MainFileDir;
652 
653   /// The number of bytes that we will initially skip when entering the
654   /// main file, along with a flag that indicates whether skipping this number
655   /// of bytes will place the lexer at the start of a line.
656   ///
657   /// This is used when loading a precompiled preamble.
658   std::pair<int, bool> SkipMainFilePreamble;
659 
660   /// Whether we hit an error due to reaching max allowed include depth. Allows
661   /// to avoid hitting the same error over and over again.
662   bool HasReachedMaxIncludeDepth = false;
663 
664   /// The number of currently-active calls to Lex.
665   ///
666   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
667   /// require asking for multiple additional tokens. This counter makes it
668   /// possible for Lex to detect whether it's producing a token for the end
669   /// of phase 4 of translation or for some other situation.
670   unsigned LexLevel = 0;
671 
672   /// The number of (LexLevel 0) preprocessor tokens.
673   unsigned TokenCount = 0;
674 
675   /// Preprocess every token regardless of LexLevel.
676   bool PreprocessToken = false;
677 
678   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
679   /// warning, or zero for unlimited.
680   unsigned MaxTokens = 0;
681   SourceLocation MaxTokensOverrideLoc;
682 
683 public:
684   struct PreambleSkipInfo {
685     SourceLocation HashTokenLoc;
686     SourceLocation IfTokenLoc;
687     bool FoundNonSkipPortion;
688     bool FoundElse;
689     SourceLocation ElseLoc;
690 
PreambleSkipInfoPreambleSkipInfo691     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
692                      bool FoundNonSkipPortion, bool FoundElse,
693                      SourceLocation ElseLoc)
694         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
695           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
696           ElseLoc(ElseLoc) {}
697   };
698 
699   using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
700 
701 private:
702   friend class ASTReader;
703   friend class MacroArgs;
704 
705   class PreambleConditionalStackStore {
706     enum State {
707       Off = 0,
708       Recording = 1,
709       Replaying = 2,
710     };
711 
712   public:
713     PreambleConditionalStackStore() = default;
714 
startRecording()715     void startRecording() { ConditionalStackState = Recording; }
startReplaying()716     void startReplaying() { ConditionalStackState = Replaying; }
isRecording()717     bool isRecording() const { return ConditionalStackState == Recording; }
isReplaying()718     bool isReplaying() const { return ConditionalStackState == Replaying; }
719 
getStack()720     ArrayRef<PPConditionalInfo> getStack() const {
721       return ConditionalStack;
722     }
723 
doneReplaying()724     void doneReplaying() {
725       ConditionalStack.clear();
726       ConditionalStackState = Off;
727     }
728 
setStack(ArrayRef<PPConditionalInfo> s)729     void setStack(ArrayRef<PPConditionalInfo> s) {
730       if (!isRecording() && !isReplaying())
731         return;
732       ConditionalStack.clear();
733       ConditionalStack.append(s.begin(), s.end());
734     }
735 
hasRecordedPreamble()736     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
737 
reachedEOFWhileSkipping()738     bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
739 
clearSkipInfo()740     void clearSkipInfo() { SkipInfo.reset(); }
741 
742     std::optional<PreambleSkipInfo> SkipInfo;
743 
744   private:
745     SmallVector<PPConditionalInfo, 4> ConditionalStack;
746     State ConditionalStackState = Off;
747   } PreambleConditionalStack;
748 
749   /// The current top of the stack that we're lexing from if
750   /// not expanding a macro and we are lexing directly from source code.
751   ///
752   /// Only one of CurLexer, or CurTokenLexer will be non-null.
753   std::unique_ptr<Lexer> CurLexer;
754 
755   /// The current top of the stack that we're lexing from
756   /// if not expanding a macro.
757   ///
758   /// This is an alias for CurLexer.
759   PreprocessorLexer *CurPPLexer = nullptr;
760 
761   /// Used to find the current FileEntry, if CurLexer is non-null
762   /// and if applicable.
763   ///
764   /// This allows us to implement \#include_next and find directory-specific
765   /// properties.
766   ConstSearchDirIterator CurDirLookup = nullptr;
767 
768   /// The current macro we are expanding, if we are expanding a macro.
769   ///
770   /// One of CurLexer and CurTokenLexer must be null.
771   std::unique_ptr<TokenLexer> CurTokenLexer;
772 
773   /// The kind of lexer we're currently working with.
774   typedef bool (*LexerCallback)(Preprocessor &, Token &);
775   LexerCallback CurLexerCallback = &CLK_Lexer;
776 
777   /// If the current lexer is for a submodule that is being built, this
778   /// is that submodule.
779   Module *CurLexerSubmodule = nullptr;
780 
781   /// Keeps track of the stack of files currently
782   /// \#included, and macros currently being expanded from, not counting
783   /// CurLexer/CurTokenLexer.
784   struct IncludeStackInfo {
785     LexerCallback               CurLexerCallback;
786     Module                     *TheSubmodule;
787     std::unique_ptr<Lexer>      TheLexer;
788     PreprocessorLexer          *ThePPLexer;
789     std::unique_ptr<TokenLexer> TheTokenLexer;
790     ConstSearchDirIterator      TheDirLookup;
791 
792     // The following constructors are completely useless copies of the default
793     // versions, only needed to pacify MSVC.
IncludeStackInfoIncludeStackInfo794     IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
795                      std::unique_ptr<Lexer> &&TheLexer,
796                      PreprocessorLexer *ThePPLexer,
797                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
798                      ConstSearchDirIterator TheDirLookup)
799         : CurLexerCallback(std::move(CurLexerCallback)),
800           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
801           ThePPLexer(std::move(ThePPLexer)),
802           TheTokenLexer(std::move(TheTokenLexer)),
803           TheDirLookup(std::move(TheDirLookup)) {}
804   };
805   std::vector<IncludeStackInfo> IncludeMacroStack;
806 
807   /// Actions invoked when some preprocessor activity is
808   /// encountered (e.g. a file is \#included, etc).
809   std::unique_ptr<PPCallbacks> Callbacks;
810 
811   struct MacroExpandsInfo {
812     Token Tok;
813     MacroDefinition MD;
814     SourceRange Range;
815 
MacroExpandsInfoMacroExpandsInfo816     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
817         : Tok(Tok), MD(MD), Range(Range) {}
818   };
819   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
820 
821   /// Information about a name that has been used to define a module macro.
822   struct ModuleMacroInfo {
823     /// The most recent macro directive for this identifier.
824     MacroDirective *MD;
825 
826     /// The active module macros for this identifier.
827     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
828 
829     /// The generation number at which we last updated ActiveModuleMacros.
830     /// \see Preprocessor::VisibleModules.
831     unsigned ActiveModuleMacrosGeneration = 0;
832 
833     /// Whether this macro name is ambiguous.
834     bool IsAmbiguous = false;
835 
836     /// The module macros that are overridden by this macro.
837     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
838 
ModuleMacroInfoModuleMacroInfo839     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
840   };
841 
842   /// The state of a macro for an identifier.
843   class MacroState {
844     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
845 
getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)846     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
847                                    const IdentifierInfo *II) const {
848       if (II->isOutOfDate())
849         PP.updateOutOfDateIdentifier(*II);
850       // FIXME: Find a spare bit on IdentifierInfo and store a
851       //        HasModuleMacros flag.
852       if (!II->hasMacroDefinition() ||
853           (!PP.getLangOpts().Modules &&
854            !PP.getLangOpts().ModulesLocalVisibility) ||
855           !PP.CurSubmoduleState->VisibleModules.getGeneration())
856         return nullptr;
857 
858       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
859       if (!Info) {
860         Info = new (PP.getPreprocessorAllocator())
861             ModuleMacroInfo(State.get<MacroDirective *>());
862         State = Info;
863       }
864 
865       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
866           Info->ActiveModuleMacrosGeneration)
867         PP.updateModuleMacroInfo(II, *Info);
868       return Info;
869     }
870 
871   public:
MacroState()872     MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective * MD)873     MacroState(MacroDirective *MD) : State(MD) {}
874 
MacroState(MacroState && O)875     MacroState(MacroState &&O) noexcept : State(O.State) {
876       O.State = (MacroDirective *)nullptr;
877     }
878 
879     MacroState &operator=(MacroState &&O) noexcept {
880       auto S = O.State;
881       O.State = (MacroDirective *)nullptr;
882       State = S;
883       return *this;
884     }
885 
~MacroState()886     ~MacroState() {
887       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
888         Info->~ModuleMacroInfo();
889     }
890 
getLatest()891     MacroDirective *getLatest() const {
892       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
893         return Info->MD;
894       return State.get<MacroDirective*>();
895     }
896 
setLatest(MacroDirective * MD)897     void setLatest(MacroDirective *MD) {
898       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
899         Info->MD = MD;
900       else
901         State = MD;
902     }
903 
isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)904     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
905       auto *Info = getModuleInfo(PP, II);
906       return Info ? Info->IsAmbiguous : false;
907     }
908 
909     ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)910     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
911       if (auto *Info = getModuleInfo(PP, II))
912         return Info->ActiveModuleMacros;
913       return std::nullopt;
914     }
915 
findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)916     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
917                                                SourceManager &SourceMgr) const {
918       // FIXME: Incorporate module macros into the result of this.
919       if (auto *Latest = getLatest())
920         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
921       return {};
922     }
923 
overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)924     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
925       if (auto *Info = getModuleInfo(PP, II)) {
926         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
927                                       Info->ActiveModuleMacros.begin(),
928                                       Info->ActiveModuleMacros.end());
929         Info->ActiveModuleMacros.clear();
930         Info->IsAmbiguous = false;
931       }
932     }
933 
getOverriddenMacros()934     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
935       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
936         return Info->OverriddenMacros;
937       return std::nullopt;
938     }
939 
setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)940     void setOverriddenMacros(Preprocessor &PP,
941                              ArrayRef<ModuleMacro *> Overrides) {
942       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
943       if (!Info) {
944         if (Overrides.empty())
945           return;
946         Info = new (PP.getPreprocessorAllocator())
947             ModuleMacroInfo(State.get<MacroDirective *>());
948         State = Info;
949       }
950       Info->OverriddenMacros.clear();
951       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
952                                     Overrides.begin(), Overrides.end());
953       Info->ActiveModuleMacrosGeneration = 0;
954     }
955   };
956 
957   /// For each IdentifierInfo that was associated with a macro, we
958   /// keep a mapping to the history of all macro definitions and #undefs in
959   /// the reverse order (the latest one is in the head of the list).
960   ///
961   /// This mapping lives within the \p CurSubmoduleState.
962   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
963 
964   struct SubmoduleState;
965 
966   /// Information about a submodule that we're currently building.
967   struct BuildingSubmoduleInfo {
968     /// The module that we are building.
969     Module *M;
970 
971     /// The location at which the module was included.
972     SourceLocation ImportLoc;
973 
974     /// Whether we entered this submodule via a pragma.
975     bool IsPragma;
976 
977     /// The previous SubmoduleState.
978     SubmoduleState *OuterSubmoduleState;
979 
980     /// The number of pending module macro names when we started building this.
981     unsigned OuterPendingModuleMacroNames;
982 
BuildingSubmoduleInfoBuildingSubmoduleInfo983     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
984                           SubmoduleState *OuterSubmoduleState,
985                           unsigned OuterPendingModuleMacroNames)
986         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
987           OuterSubmoduleState(OuterSubmoduleState),
988           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
989   };
990   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
991 
992   /// Information about a submodule's preprocessor state.
993   struct SubmoduleState {
994     /// The macros for the submodule.
995     MacroMap Macros;
996 
997     /// The set of modules that are visible within the submodule.
998     VisibleModuleSet VisibleModules;
999 
1000     // FIXME: CounterValue?
1001     // FIXME: PragmaPushMacroInfo?
1002   };
1003   std::map<Module *, SubmoduleState> Submodules;
1004 
1005   /// The preprocessor state for preprocessing outside of any submodule.
1006   SubmoduleState NullSubmoduleState;
1007 
1008   /// The current submodule state. Will be \p NullSubmoduleState if we're not
1009   /// in a submodule.
1010   SubmoduleState *CurSubmoduleState;
1011 
1012   /// The files that have been included.
1013   IncludedFilesSet IncludedFiles;
1014 
1015   /// The set of top-level modules that affected preprocessing, but were not
1016   /// imported.
1017   llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1018 
1019   /// The set of known macros exported from modules.
1020   llvm::FoldingSet<ModuleMacro> ModuleMacros;
1021 
1022   /// The names of potential module macros that we've not yet processed.
1023   llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1024 
1025   /// The list of module macros, for each identifier, that are not overridden by
1026   /// any other module macro.
1027   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1028       LeafModuleMacros;
1029 
1030   /// Macros that we want to warn because they are not used at the end
1031   /// of the translation unit.
1032   ///
1033   /// We store just their SourceLocations instead of
1034   /// something like MacroInfo*. The benefit of this is that when we are
1035   /// deserializing from PCH, we don't need to deserialize identifier & macros
1036   /// just so that we can report that they are unused, we just warn using
1037   /// the SourceLocations of this set (that will be filled by the ASTReader).
1038   using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1039   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1040 
1041   /// This is a pair of an optional message and source location used for pragmas
1042   /// that annotate macros like pragma clang restrict_expansion and pragma clang
1043   /// deprecated. This pair stores the optional message and the location of the
1044   /// annotation pragma for use producing diagnostics and notes.
1045   using MsgLocationPair = std::pair<std::string, SourceLocation>;
1046 
1047   struct MacroAnnotationInfo {
1048     SourceLocation Location;
1049     std::string Message;
1050   };
1051 
1052   struct MacroAnnotations {
1053     std::optional<MacroAnnotationInfo> DeprecationInfo;
1054     std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1055     std::optional<SourceLocation> FinalAnnotationLoc;
1056 
makeDeprecationMacroAnnotations1057     static MacroAnnotations makeDeprecation(SourceLocation Loc,
1058                                             std::string Msg) {
1059       return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
1060                               std::nullopt, std::nullopt};
1061     }
1062 
makeRestrictExpansionMacroAnnotations1063     static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
1064                                                   std::string Msg) {
1065       return MacroAnnotations{
1066           std::nullopt, MacroAnnotationInfo{Loc, std::move(Msg)}, std::nullopt};
1067     }
1068 
makeFinalMacroAnnotations1069     static MacroAnnotations makeFinal(SourceLocation Loc) {
1070       return MacroAnnotations{std::nullopt, std::nullopt, Loc};
1071     }
1072   };
1073 
1074   /// Warning information for macro annotations.
1075   llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1076 
1077   /// A "freelist" of MacroArg objects that can be
1078   /// reused for quick allocation.
1079   MacroArgs *MacroArgCache = nullptr;
1080 
1081   /// For each IdentifierInfo used in a \#pragma push_macro directive,
1082   /// we keep a MacroInfo stack used to restore the previous macro value.
1083   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1084       PragmaPushMacroInfo;
1085 
1086   // Various statistics we track for performance analysis.
1087   unsigned NumDirectives = 0;
1088   unsigned NumDefined = 0;
1089   unsigned NumUndefined = 0;
1090   unsigned NumPragma = 0;
1091   unsigned NumIf = 0;
1092   unsigned NumElse = 0;
1093   unsigned NumEndif = 0;
1094   unsigned NumEnteredSourceFiles = 0;
1095   unsigned MaxIncludeStackDepth = 0;
1096   unsigned NumMacroExpanded = 0;
1097   unsigned NumFnMacroExpanded = 0;
1098   unsigned NumBuiltinMacroExpanded = 0;
1099   unsigned NumFastMacroExpanded = 0;
1100   unsigned NumTokenPaste = 0;
1101   unsigned NumFastTokenPaste = 0;
1102   unsigned NumSkipped = 0;
1103 
1104   /// The predefined macros that preprocessor should use from the
1105   /// command line etc.
1106   std::string Predefines;
1107 
1108   /// The file ID for the preprocessor predefines.
1109   FileID PredefinesFileID;
1110 
1111   /// The file ID for the PCH through header.
1112   FileID PCHThroughHeaderFileID;
1113 
1114   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1115   bool SkippingUntilPragmaHdrStop = false;
1116 
1117   /// Whether tokens are being skipped until the through header is seen.
1118   bool SkippingUntilPCHThroughHeader = false;
1119 
1120   /// \{
1121   /// Cache of macro expanders to reduce malloc traffic.
1122   enum { TokenLexerCacheSize = 8 };
1123   unsigned NumCachedTokenLexers;
1124   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1125   /// \}
1126 
1127   /// Keeps macro expanded tokens for TokenLexers.
1128   //
1129   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1130   /// going to lex in the cache and when it finishes the tokens are removed
1131   /// from the end of the cache.
1132   SmallVector<Token, 16> MacroExpandedTokens;
1133   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1134 
1135   /// A record of the macro definitions and expansions that
1136   /// occurred during preprocessing.
1137   ///
1138   /// This is an optional side structure that can be enabled with
1139   /// \c createPreprocessingRecord() prior to preprocessing.
1140   PreprocessingRecord *Record = nullptr;
1141 
1142   /// Cached tokens state.
1143   using CachedTokensTy = SmallVector<Token, 1>;
1144 
1145   /// Cached tokens are stored here when we do backtracking or
1146   /// lookahead. They are "lexed" by the CachingLex() method.
1147   CachedTokensTy CachedTokens;
1148 
1149   /// The position of the cached token that CachingLex() should
1150   /// "lex" next.
1151   ///
1152   /// If it points beyond the CachedTokens vector, it means that a normal
1153   /// Lex() should be invoked.
1154   CachedTokensTy::size_type CachedLexPos = 0;
1155 
1156   /// Stack of backtrack positions, allowing nested backtracks.
1157   ///
1158   /// The EnableBacktrackAtThisPos() method pushes a position to
1159   /// indicate where CachedLexPos should be set when the BackTrack() method is
1160   /// invoked (at which point the last position is popped).
1161   std::vector<CachedTokensTy::size_type> BacktrackPositions;
1162 
1163   /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1164   /// This is used to guard against calling this function recursively.
1165   ///
1166   /// See comments at the use-site for more context about why it is needed.
1167   bool SkippingExcludedConditionalBlock = false;
1168 
1169   /// Keeps track of skipped range mappings that were recorded while skipping
1170   /// excluded conditional directives. It maps the source buffer pointer at
1171   /// the beginning of a skipped block, to the number of bytes that should be
1172   /// skipped.
1173   llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1174 
1175   void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1176 
1177 public:
1178   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
1179                DiagnosticsEngine &diags, const LangOptions &LangOpts,
1180                SourceManager &SM, HeaderSearch &Headers,
1181                ModuleLoader &TheModuleLoader,
1182                IdentifierInfoLookup *IILookup = nullptr,
1183                bool OwnsHeaderSearch = false,
1184                TranslationUnitKind TUKind = TU_Complete);
1185 
1186   ~Preprocessor();
1187 
1188   /// Initialize the preprocessor using information about the target.
1189   ///
1190   /// \param Target is owned by the caller and must remain valid for the
1191   /// lifetime of the preprocessor.
1192   /// \param AuxTarget is owned by the caller and must remain valid for
1193   /// the lifetime of the preprocessor.
1194   void Initialize(const TargetInfo &Target,
1195                   const TargetInfo *AuxTarget = nullptr);
1196 
1197   /// Initialize the preprocessor to parse a model file
1198   ///
1199   /// To parse model files the preprocessor of the original source is reused to
1200   /// preserver the identifier table. However to avoid some duplicate
1201   /// information in the preprocessor some cleanup is needed before it is used
1202   /// to parse model files. This method does that cleanup.
1203   void InitializeForModelFile();
1204 
1205   /// Cleanup after model file parsing
1206   void FinalizeForModelFile();
1207 
1208   /// Retrieve the preprocessor options used to initialize this
1209   /// preprocessor.
getPreprocessorOpts()1210   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
1211 
getDiagnostics()1212   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
setDiagnostics(DiagnosticsEngine & D)1213   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1214 
getLangOpts()1215   const LangOptions &getLangOpts() const { return LangOpts; }
getTargetInfo()1216   const TargetInfo &getTargetInfo() const { return *Target; }
getAuxTargetInfo()1217   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
getFileManager()1218   FileManager &getFileManager() const { return FileMgr; }
getSourceManager()1219   SourceManager &getSourceManager() const { return SourceMgr; }
getHeaderSearchInfo()1220   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1221 
getIdentifierTable()1222   IdentifierTable &getIdentifierTable() { return Identifiers; }
getIdentifierTable()1223   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
getSelectorTable()1224   SelectorTable &getSelectorTable() { return Selectors; }
getBuiltinInfo()1225   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
getPreprocessorAllocator()1226   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1227 
setExternalSource(ExternalPreprocessorSource * Source)1228   void setExternalSource(ExternalPreprocessorSource *Source) {
1229     ExternalSource = Source;
1230   }
1231 
getExternalSource()1232   ExternalPreprocessorSource *getExternalSource() const {
1233     return ExternalSource;
1234   }
1235 
1236   /// Retrieve the module loader associated with this preprocessor.
getModuleLoader()1237   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1238 
hadModuleLoaderFatalFailure()1239   bool hadModuleLoaderFatalFailure() const {
1240     return TheModuleLoader.HadFatalFailure;
1241   }
1242 
1243   /// Retrieve the number of Directives that have been processed by the
1244   /// Preprocessor.
getNumDirectives()1245   unsigned getNumDirectives() const {
1246     return NumDirectives;
1247   }
1248 
1249   /// True if we are currently preprocessing a #if or #elif directive
isParsingIfOrElifDirective()1250   bool isParsingIfOrElifDirective() const {
1251     return ParsingIfOrElifDirective;
1252   }
1253 
1254   /// Control whether the preprocessor retains comments in output.
SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)1255   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1256     this->KeepComments = KeepComments | KeepMacroComments;
1257     this->KeepMacroComments = KeepMacroComments;
1258   }
1259 
getCommentRetentionState()1260   bool getCommentRetentionState() const { return KeepComments; }
1261 
setPragmasEnabled(bool Enabled)1262   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
getPragmasEnabled()1263   bool getPragmasEnabled() const { return PragmasEnabled; }
1264 
SetSuppressIncludeNotFoundError(bool Suppress)1265   void SetSuppressIncludeNotFoundError(bool Suppress) {
1266     SuppressIncludeNotFoundError = Suppress;
1267   }
1268 
GetSuppressIncludeNotFoundError()1269   bool GetSuppressIncludeNotFoundError() {
1270     return SuppressIncludeNotFoundError;
1271   }
1272 
1273   /// Sets whether the preprocessor is responsible for producing output or if
1274   /// it is producing tokens to be consumed by Parse and Sema.
setPreprocessedOutput(bool IsPreprocessedOutput)1275   void setPreprocessedOutput(bool IsPreprocessedOutput) {
1276     PreprocessedOutput = IsPreprocessedOutput;
1277   }
1278 
1279   /// Returns true if the preprocessor is responsible for generating output,
1280   /// false if it is producing tokens to be consumed by Parse and Sema.
isPreprocessedOutput()1281   bool isPreprocessedOutput() const { return PreprocessedOutput; }
1282 
1283   /// Return true if we are lexing directly from the specified lexer.
isCurrentLexer(const PreprocessorLexer * L)1284   bool isCurrentLexer(const PreprocessorLexer *L) const {
1285     return CurPPLexer == L;
1286   }
1287 
1288   /// Return the current lexer being lexed from.
1289   ///
1290   /// Note that this ignores any potentially active macro expansions and _Pragma
1291   /// expansions going on at the time.
getCurrentLexer()1292   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1293 
1294   /// Return the current file lexer being lexed from.
1295   ///
1296   /// Note that this ignores any potentially active macro expansions and _Pragma
1297   /// expansions going on at the time.
1298   PreprocessorLexer *getCurrentFileLexer() const;
1299 
1300   /// Return the submodule owning the file being lexed. This may not be
1301   /// the current module if we have changed modules since entering the file.
getCurrentLexerSubmodule()1302   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1303 
1304   /// Returns the FileID for the preprocessor predefines.
getPredefinesFileID()1305   FileID getPredefinesFileID() const { return PredefinesFileID; }
1306 
1307   /// \{
1308   /// Accessors for preprocessor callbacks.
1309   ///
1310   /// Note that this class takes ownership of any PPCallbacks object given to
1311   /// it.
getPPCallbacks()1312   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
addPPCallbacks(std::unique_ptr<PPCallbacks> C)1313   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1314     if (Callbacks)
1315       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1316                                                 std::move(Callbacks));
1317     Callbacks = std::move(C);
1318   }
1319   /// \}
1320 
1321   /// Get the number of tokens processed so far.
getTokenCount()1322   unsigned getTokenCount() const { return TokenCount; }
1323 
1324   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
getMaxTokens()1325   unsigned getMaxTokens() const { return MaxTokens; }
1326 
overrideMaxTokens(unsigned Value,SourceLocation Loc)1327   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1328     MaxTokens = Value;
1329     MaxTokensOverrideLoc = Loc;
1330   };
1331 
getMaxTokensOverrideLoc()1332   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1333 
1334   /// Register a function that would be called on each token in the final
1335   /// expanded token stream.
1336   /// This also reports annotation tokens produced by the parser.
setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1337   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1338     OnToken = std::move(F);
1339   }
1340 
setPreprocessToken(bool Preprocess)1341   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1342 
isMacroDefined(StringRef Id)1343   bool isMacroDefined(StringRef Id) {
1344     return isMacroDefined(&Identifiers.get(Id));
1345   }
isMacroDefined(const IdentifierInfo * II)1346   bool isMacroDefined(const IdentifierInfo *II) {
1347     return II->hasMacroDefinition() &&
1348            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1349   }
1350 
1351   /// Determine whether II is defined as a macro within the module M,
1352   /// if that is a module that we've already preprocessed. Does not check for
1353   /// macros imported into M.
isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1354   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1355     if (!II->hasMacroDefinition())
1356       return false;
1357     auto I = Submodules.find(M);
1358     if (I == Submodules.end())
1359       return false;
1360     auto J = I->second.Macros.find(II);
1361     if (J == I->second.Macros.end())
1362       return false;
1363     auto *MD = J->second.getLatest();
1364     return MD && MD->isDefined();
1365   }
1366 
getMacroDefinition(const IdentifierInfo * II)1367   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1368     if (!II->hasMacroDefinition())
1369       return {};
1370 
1371     MacroState &S = CurSubmoduleState->Macros[II];
1372     auto *MD = S.getLatest();
1373     while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1374       MD = MD->getPrevious();
1375     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1376                            S.getActiveModuleMacros(*this, II),
1377                            S.isAmbiguous(*this, II));
1378   }
1379 
getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1380   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1381                                           SourceLocation Loc) {
1382     if (!II->hadMacroDefinition())
1383       return {};
1384 
1385     MacroState &S = CurSubmoduleState->Macros[II];
1386     MacroDirective::DefInfo DI;
1387     if (auto *MD = S.getLatest())
1388       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1389     // FIXME: Compute the set of active module macros at the specified location.
1390     return MacroDefinition(DI.getDirective(),
1391                            S.getActiveModuleMacros(*this, II),
1392                            S.isAmbiguous(*this, II));
1393   }
1394 
1395   /// Given an identifier, return its latest non-imported MacroDirective
1396   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
getLocalMacroDirective(const IdentifierInfo * II)1397   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1398     if (!II->hasMacroDefinition())
1399       return nullptr;
1400 
1401     auto *MD = getLocalMacroDirectiveHistory(II);
1402     if (!MD || MD->getDefinition().isUndefined())
1403       return nullptr;
1404 
1405     return MD;
1406   }
1407 
getMacroInfo(const IdentifierInfo * II)1408   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1409     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1410   }
1411 
getMacroInfo(const IdentifierInfo * II)1412   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1413     if (!II->hasMacroDefinition())
1414       return nullptr;
1415     if (auto MD = getMacroDefinition(II))
1416       return MD.getMacroInfo();
1417     return nullptr;
1418   }
1419 
1420   /// Given an identifier, return the latest non-imported macro
1421   /// directive for that identifier.
1422   ///
1423   /// One can iterate over all previous macro directives from the most recent
1424   /// one.
1425   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1426 
1427   /// Add a directive to the macro directive history for this identifier.
1428   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1429   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1430                                              SourceLocation Loc) {
1431     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1432     appendMacroDirective(II, MD);
1433     return MD;
1434   }
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1435   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1436                                              MacroInfo *MI) {
1437     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1438   }
1439 
1440   /// Set a MacroDirective that was loaded from a PCH file.
1441   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1442                                MacroDirective *MD);
1443 
1444   /// Register an exported macro for a module and identifier.
1445   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II,
1446                               MacroInfo *Macro,
1447                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1448   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1449 
1450   /// Get the list of leaf (non-overridden) module macros for a name.
getLeafModuleMacros(const IdentifierInfo * II)1451   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1452     if (II->isOutOfDate())
1453       updateOutOfDateIdentifier(*II);
1454     auto I = LeafModuleMacros.find(II);
1455     if (I != LeafModuleMacros.end())
1456       return I->second;
1457     return std::nullopt;
1458   }
1459 
1460   /// Get the list of submodules that we're currently building.
getBuildingSubmodules()1461   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1462     return BuildingSubmoduleStack;
1463   }
1464 
1465   /// \{
1466   /// Iterators for the macro history table. Currently defined macros have
1467   /// IdentifierInfo::hasMacroDefinition() set and an empty
1468   /// MacroInfo::getUndefLoc() at the head of the list.
1469   using macro_iterator = MacroMap::const_iterator;
1470 
1471   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1472   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1473 
1474   llvm::iterator_range<macro_iterator>
1475   macros(bool IncludeExternalMacros = true) const {
1476     macro_iterator begin = macro_begin(IncludeExternalMacros);
1477     macro_iterator end = macro_end(IncludeExternalMacros);
1478     return llvm::make_range(begin, end);
1479   }
1480 
1481   /// \}
1482 
1483   /// Mark the given clang module as affecting the current clang module or translation unit.
markClangModuleAsAffecting(Module * M)1484   void markClangModuleAsAffecting(Module *M) {
1485     assert(M->isModuleMapModule());
1486     if (!BuildingSubmoduleStack.empty()) {
1487       if (M != BuildingSubmoduleStack.back().M)
1488         BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1489     } else {
1490       AffectingClangModules.insert(M);
1491     }
1492   }
1493 
1494   /// Get the set of top-level clang modules that affected preprocessing, but were not
1495   /// imported.
getAffectingClangModules()1496   const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const {
1497     return AffectingClangModules;
1498   }
1499 
1500   /// Mark the file as included.
1501   /// Returns true if this is the first time the file was included.
markIncluded(FileEntryRef File)1502   bool markIncluded(FileEntryRef File) {
1503     HeaderInfo.getFileInfo(File);
1504     return IncludedFiles.insert(File).second;
1505   }
1506 
1507   /// Return true if this header has already been included.
alreadyIncluded(FileEntryRef File)1508   bool alreadyIncluded(FileEntryRef File) const {
1509     HeaderInfo.getFileInfo(File);
1510     return IncludedFiles.count(File);
1511   }
1512 
1513   /// Get the set of included files.
getIncludedFiles()1514   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
getIncludedFiles()1515   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1516 
1517   /// Return the name of the macro defined before \p Loc that has
1518   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1519   /// return the last one defined.
1520   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1521                                      ArrayRef<TokenValue> Tokens) const;
1522 
1523   /// Get the predefines for this processor.
1524   /// Used by some third-party tools to inspect and add predefines (see
1525   /// https://github.com/llvm/llvm-project/issues/57483).
getPredefines()1526   const std::string &getPredefines() const { return Predefines; }
1527 
1528   /// Set the predefines for this Preprocessor.
1529   ///
1530   /// These predefines are automatically injected when parsing the main file.
setPredefines(std::string P)1531   void setPredefines(std::string P) { Predefines = std::move(P); }
1532 
1533   /// Return information about the specified preprocessor
1534   /// identifier token.
getIdentifierInfo(StringRef Name)1535   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1536     return &Identifiers.get(Name);
1537   }
1538 
1539   /// Add the specified pragma handler to this preprocessor.
1540   ///
1541   /// If \p Namespace is non-null, then it is a token required to exist on the
1542   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1543   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
AddPragmaHandler(PragmaHandler * Handler)1544   void AddPragmaHandler(PragmaHandler *Handler) {
1545     AddPragmaHandler(StringRef(), Handler);
1546   }
1547 
1548   /// Remove the specific pragma handler from this preprocessor.
1549   ///
1550   /// If \p Namespace is non-null, then it should be the namespace that
1551   /// \p Handler was added to. It is an error to remove a handler that
1552   /// has not been registered.
1553   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
RemovePragmaHandler(PragmaHandler * Handler)1554   void RemovePragmaHandler(PragmaHandler *Handler) {
1555     RemovePragmaHandler(StringRef(), Handler);
1556   }
1557 
1558   /// Install empty handlers for all pragmas (making them ignored).
1559   void IgnorePragmas();
1560 
1561   /// Set empty line handler.
setEmptylineHandler(EmptylineHandler * Handler)1562   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1563 
getEmptylineHandler()1564   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1565 
1566   /// Add the specified comment handler to the preprocessor.
1567   void addCommentHandler(CommentHandler *Handler);
1568 
1569   /// Remove the specified comment handler.
1570   ///
1571   /// It is an error to remove a handler that has not been registered.
1572   void removeCommentHandler(CommentHandler *Handler);
1573 
1574   /// Set the code completion handler to the given object.
setCodeCompletionHandler(CodeCompletionHandler & Handler)1575   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1576     CodeComplete = &Handler;
1577   }
1578 
1579   /// Retrieve the current code-completion handler.
getCodeCompletionHandler()1580   CodeCompletionHandler *getCodeCompletionHandler() const {
1581     return CodeComplete;
1582   }
1583 
1584   /// Clear out the code completion handler.
clearCodeCompletionHandler()1585   void clearCodeCompletionHandler() {
1586     CodeComplete = nullptr;
1587   }
1588 
1589   /// Hook used by the lexer to invoke the "included file" code
1590   /// completion point.
1591   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1592 
1593   /// Hook used by the lexer to invoke the "natural language" code
1594   /// completion point.
1595   void CodeCompleteNaturalLanguage();
1596 
1597   /// Set the code completion token for filtering purposes.
setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1598   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1599     CodeCompletionII = Filter;
1600   }
1601 
1602   /// Set the code completion token range for detecting replacement range later
1603   /// on.
setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1604   void setCodeCompletionTokenRange(const SourceLocation Start,
1605                                    const SourceLocation End) {
1606     CodeCompletionTokenRange = {Start, End};
1607   }
getCodeCompletionTokenRange()1608   SourceRange getCodeCompletionTokenRange() const {
1609     return CodeCompletionTokenRange;
1610   }
1611 
1612   /// Get the code completion token for filtering purposes.
getCodeCompletionFilter()1613   StringRef getCodeCompletionFilter() {
1614     if (CodeCompletionII)
1615       return CodeCompletionII->getName();
1616     return {};
1617   }
1618 
1619   /// Retrieve the preprocessing record, or NULL if there is no
1620   /// preprocessing record.
getPreprocessingRecord()1621   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1622 
1623   /// Create a new preprocessing record, which will keep track of
1624   /// all macro expansions, macro definitions, etc.
1625   void createPreprocessingRecord();
1626 
1627   /// Returns true if the FileEntry is the PCH through header.
1628   bool isPCHThroughHeader(const FileEntry *FE);
1629 
1630   /// True if creating a PCH with a through header.
1631   bool creatingPCHWithThroughHeader();
1632 
1633   /// True if using a PCH with a through header.
1634   bool usingPCHWithThroughHeader();
1635 
1636   /// True if creating a PCH with a #pragma hdrstop.
1637   bool creatingPCHWithPragmaHdrStop();
1638 
1639   /// True if using a PCH with a #pragma hdrstop.
1640   bool usingPCHWithPragmaHdrStop();
1641 
1642   /// Skip tokens until after the #include of the through header or
1643   /// until after a #pragma hdrstop.
1644   void SkipTokensWhileUsingPCH();
1645 
1646   /// Process directives while skipping until the through header or
1647   /// #pragma hdrstop is found.
1648   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1649                                            SourceLocation HashLoc);
1650 
1651   /// Enter the specified FileID as the main source file,
1652   /// which implicitly adds the builtin defines etc.
1653   void EnterMainSourceFile();
1654 
1655   /// Inform the preprocessor callbacks that processing is complete.
1656   void EndSourceFile();
1657 
1658   /// Add a source file to the top of the include stack and
1659   /// start lexing tokens from it instead of the current buffer.
1660   ///
1661   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1662   bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1663                        SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1664 
1665   /// Add a Macro to the top of the include stack and start lexing
1666   /// tokens from it instead of the current buffer.
1667   ///
1668   /// \param Args specifies the tokens input to a function-like macro.
1669   /// \param ILEnd specifies the location of the ')' for a function-like macro
1670   /// or the identifier for an object-like macro.
1671   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1672                   MacroArgs *Args);
1673 
1674 private:
1675   /// Add a "macro" context to the top of the include stack,
1676   /// which will cause the lexer to start returning the specified tokens.
1677   ///
1678   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1679   /// will not be subject to further macro expansion. Otherwise, these tokens
1680   /// will be re-macro-expanded when/if expansion is enabled.
1681   ///
1682   /// If \p OwnsTokens is false, this method assumes that the specified stream
1683   /// of tokens has a permanent owner somewhere, so they do not need to be
1684   /// copied. If it is true, it assumes the array of tokens is allocated with
1685   /// \c new[] and the Preprocessor will delete[] it.
1686   ///
1687   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1688   /// set, see the flag documentation for details.
1689   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1690                         bool DisableMacroExpansion, bool OwnsTokens,
1691                         bool IsReinject);
1692 
1693 public:
EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1694   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1695                         bool DisableMacroExpansion, bool IsReinject) {
1696     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1697                      IsReinject);
1698   }
1699 
EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1700   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1701                         bool IsReinject) {
1702     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1703                      IsReinject);
1704   }
1705 
1706   /// Pop the current lexer/macro exp off the top of the lexer stack.
1707   ///
1708   /// This should only be used in situations where the current state of the
1709   /// top-of-stack lexer is known.
1710   void RemoveTopOfLexerStack();
1711 
1712   /// From the point that this method is called, and until
1713   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1714   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1715   /// make the Preprocessor re-lex the same tokens.
1716   ///
1717   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1718   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1719   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1720   ///
1721   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1722   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1723   /// tokens will continue indefinitely.
1724   ///
1725   void EnableBacktrackAtThisPos();
1726 
1727   /// Disable the last EnableBacktrackAtThisPos call.
1728   void CommitBacktrackedTokens();
1729 
1730   /// Make Preprocessor re-lex the tokens that were lexed since
1731   /// EnableBacktrackAtThisPos() was previously called.
1732   void Backtrack();
1733 
1734   /// True if EnableBacktrackAtThisPos() was called and
1735   /// caching of tokens is on.
isBacktrackEnabled()1736   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1737 
1738   /// Lex the next token for this preprocessor.
1739   void Lex(Token &Result);
1740 
1741   /// Lex all tokens for this preprocessor until (and excluding) end of file.
1742   void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1743 
1744   /// Lex a token, forming a header-name token if possible.
1745   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1746 
1747   /// Lex the parameters for an #embed directive, returns nullopt on error.
1748   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1749                                                              bool ForHasEmbed);
1750 
1751   bool LexAfterModuleImport(Token &Result);
1752   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1753 
1754   void makeModuleVisible(Module *M, SourceLocation Loc);
1755 
getModuleImportLoc(Module * M)1756   SourceLocation getModuleImportLoc(Module *M) const {
1757     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1758   }
1759 
1760   /// Lex a string literal, which may be the concatenation of multiple
1761   /// string literals and may even come from macro expansion.
1762   /// \returns true on success, false if a error diagnostic has been generated.
LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1763   bool LexStringLiteral(Token &Result, std::string &String,
1764                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1765     if (AllowMacroExpansion)
1766       Lex(Result);
1767     else
1768       LexUnexpandedToken(Result);
1769     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1770                                   AllowMacroExpansion);
1771   }
1772 
1773   /// Complete the lexing of a string literal where the first token has
1774   /// already been lexed (see LexStringLiteral).
1775   bool FinishLexStringLiteral(Token &Result, std::string &String,
1776                               const char *DiagnosticTag,
1777                               bool AllowMacroExpansion);
1778 
1779   /// Lex a token.  If it's a comment, keep lexing until we get
1780   /// something not a comment.
1781   ///
1782   /// This is useful in -E -C mode where comments would foul up preprocessor
1783   /// directive handling.
LexNonComment(Token & Result)1784   void LexNonComment(Token &Result) {
1785     do
1786       Lex(Result);
1787     while (Result.getKind() == tok::comment);
1788   }
1789 
1790   /// Just like Lex, but disables macro expansion of identifier tokens.
LexUnexpandedToken(Token & Result)1791   void LexUnexpandedToken(Token &Result) {
1792     // Disable macro expansion.
1793     bool OldVal = DisableMacroExpansion;
1794     DisableMacroExpansion = true;
1795     // Lex the token.
1796     Lex(Result);
1797 
1798     // Reenable it.
1799     DisableMacroExpansion = OldVal;
1800   }
1801 
1802   /// Like LexNonComment, but this disables macro expansion of
1803   /// identifier tokens.
LexUnexpandedNonComment(Token & Result)1804   void LexUnexpandedNonComment(Token &Result) {
1805     do
1806       LexUnexpandedToken(Result);
1807     while (Result.getKind() == tok::comment);
1808   }
1809 
1810   /// Parses a simple integer literal to get its numeric value.  Floating
1811   /// point literals and user defined literals are rejected.  Used primarily to
1812   /// handle pragmas that accept integer arguments.
1813   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1814 
1815   /// Disables macro expansion everywhere except for preprocessor directives.
SetMacroExpansionOnlyInDirectives()1816   void SetMacroExpansionOnlyInDirectives() {
1817     DisableMacroExpansion = true;
1818     MacroExpansionInDirectivesOverride = true;
1819   }
1820 
1821   /// Peeks ahead N tokens and returns that token without consuming any
1822   /// tokens.
1823   ///
1824   /// LookAhead(0) returns the next token that would be returned by Lex(),
1825   /// LookAhead(1) returns the token after it, etc.  This returns normal
1826   /// tokens after phase 5.  As such, it is equivalent to using
1827   /// 'Lex', not 'LexUnexpandedToken'.
LookAhead(unsigned N)1828   const Token &LookAhead(unsigned N) {
1829     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1830     if (CachedLexPos + N < CachedTokens.size())
1831       return CachedTokens[CachedLexPos+N];
1832     else
1833       return PeekAhead(N+1);
1834   }
1835 
1836   /// When backtracking is enabled and tokens are cached,
1837   /// this allows to revert a specific number of tokens.
1838   ///
1839   /// Note that the number of tokens being reverted should be up to the last
1840   /// backtrack position, not more.
RevertCachedTokens(unsigned N)1841   void RevertCachedTokens(unsigned N) {
1842     assert(isBacktrackEnabled() &&
1843            "Should only be called when tokens are cached for backtracking");
1844     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1845          && "Should revert tokens up to the last backtrack position, not more");
1846     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1847            "Corrupted backtrack positions ?");
1848     CachedLexPos -= N;
1849   }
1850 
1851   /// Enters a token in the token stream to be lexed next.
1852   ///
1853   /// If BackTrack() is called afterwards, the token will remain at the
1854   /// insertion point.
1855   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1856   /// flag set. See the flag documentation for details.
EnterToken(const Token & Tok,bool IsReinject)1857   void EnterToken(const Token &Tok, bool IsReinject) {
1858     if (LexLevel) {
1859       // It's not correct in general to enter caching lex mode while in the
1860       // middle of a nested lexing action.
1861       auto TokCopy = std::make_unique<Token[]>(1);
1862       TokCopy[0] = Tok;
1863       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1864     } else {
1865       EnterCachingLexMode();
1866       assert(IsReinject && "new tokens in the middle of cached stream");
1867       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1868     }
1869   }
1870 
1871   /// We notify the Preprocessor that if it is caching tokens (because
1872   /// backtrack is enabled) it should replace the most recent cached tokens
1873   /// with the given annotation token. This function has no effect if
1874   /// backtracking is not enabled.
1875   ///
1876   /// Note that the use of this function is just for optimization, so that the
1877   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1878   /// invoked.
AnnotateCachedTokens(const Token & Tok)1879   void AnnotateCachedTokens(const Token &Tok) {
1880     assert(Tok.isAnnotation() && "Expected annotation token");
1881     if (CachedLexPos != 0 && isBacktrackEnabled())
1882       AnnotatePreviousCachedTokens(Tok);
1883   }
1884 
1885   /// Get the location of the last cached token, suitable for setting the end
1886   /// location of an annotation token.
getLastCachedTokenLocation()1887   SourceLocation getLastCachedTokenLocation() const {
1888     assert(CachedLexPos != 0);
1889     return CachedTokens[CachedLexPos-1].getLastLoc();
1890   }
1891 
1892   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1893   /// CachedTokens.
1894   bool IsPreviousCachedToken(const Token &Tok) const;
1895 
1896   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1897   /// in \p NewToks.
1898   ///
1899   /// Useful when a token needs to be split in smaller ones and CachedTokens
1900   /// most recent token must to be updated to reflect that.
1901   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1902 
1903   /// Replace the last token with an annotation token.
1904   ///
1905   /// Like AnnotateCachedTokens(), this routine replaces an
1906   /// already-parsed (and resolved) token with an annotation
1907   /// token. However, this routine only replaces the last token with
1908   /// the annotation token; it does not affect any other cached
1909   /// tokens. This function has no effect if backtracking is not
1910   /// enabled.
ReplaceLastTokenWithAnnotation(const Token & Tok)1911   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1912     assert(Tok.isAnnotation() && "Expected annotation token");
1913     if (CachedLexPos != 0 && isBacktrackEnabled())
1914       CachedTokens[CachedLexPos-1] = Tok;
1915   }
1916 
1917   /// Enter an annotation token into the token stream.
1918   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1919                             void *AnnotationVal);
1920 
1921   /// Determine whether it's possible for a future call to Lex to produce an
1922   /// annotation token created by a previous call to EnterAnnotationToken.
mightHavePendingAnnotationTokens()1923   bool mightHavePendingAnnotationTokens() {
1924     return CurLexerCallback != CLK_Lexer;
1925   }
1926 
1927   /// Update the current token to represent the provided
1928   /// identifier, in order to cache an action performed by typo correction.
TypoCorrectToken(const Token & Tok)1929   void TypoCorrectToken(const Token &Tok) {
1930     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1931     if (CachedLexPos != 0 && isBacktrackEnabled())
1932       CachedTokens[CachedLexPos-1] = Tok;
1933   }
1934 
1935   /// Recompute the current lexer kind based on the CurLexer/
1936   /// CurTokenLexer pointers.
1937   void recomputeCurLexerKind();
1938 
1939   /// Returns true if incremental processing is enabled
isIncrementalProcessingEnabled()1940   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1941 
1942   /// Enables the incremental processing
1943   void enableIncrementalProcessing(bool value = true) {
1944     IncrementalProcessing = value;
1945   }
1946 
1947   /// Specify the point at which code-completion will be performed.
1948   ///
1949   /// \param File the file in which code completion should occur. If
1950   /// this file is included multiple times, code-completion will
1951   /// perform completion the first time it is included. If NULL, this
1952   /// function clears out the code-completion point.
1953   ///
1954   /// \param Line the line at which code completion should occur
1955   /// (1-based).
1956   ///
1957   /// \param Column the column at which code completion should occur
1958   /// (1-based).
1959   ///
1960   /// \returns true if an error occurred, false otherwise.
1961   bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line,
1962                               unsigned Column);
1963 
1964   /// Determine if we are performing code completion.
isCodeCompletionEnabled()1965   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1966 
1967   /// Returns the location of the code-completion point.
1968   ///
1969   /// Returns an invalid location if code-completion is not enabled or the file
1970   /// containing the code-completion point has not been lexed yet.
getCodeCompletionLoc()1971   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1972 
1973   /// Returns the start location of the file of code-completion point.
1974   ///
1975   /// Returns an invalid location if code-completion is not enabled or the file
1976   /// containing the code-completion point has not been lexed yet.
getCodeCompletionFileLoc()1977   SourceLocation getCodeCompletionFileLoc() const {
1978     return CodeCompletionFileLoc;
1979   }
1980 
1981   /// Returns true if code-completion is enabled and we have hit the
1982   /// code-completion point.
isCodeCompletionReached()1983   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1984 
1985   /// Note that we hit the code-completion point.
setCodeCompletionReached()1986   void setCodeCompletionReached() {
1987     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1988     CodeCompletionReached = true;
1989     // Silence any diagnostics that occur after we hit the code-completion.
1990     getDiagnostics().setSuppressAllDiagnostics(true);
1991   }
1992 
1993   /// The location of the currently-active \#pragma clang
1994   /// arc_cf_code_audited begin.
1995   ///
1996   /// Returns an invalid location if there is no such pragma active.
1997   std::pair<IdentifierInfo *, SourceLocation>
getPragmaARCCFCodeAuditedInfo()1998   getPragmaARCCFCodeAuditedInfo() const {
1999     return PragmaARCCFCodeAuditedInfo;
2000   }
2001 
2002   /// Set the location of the currently-active \#pragma clang
2003   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)2004   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
2005                                      SourceLocation Loc) {
2006     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
2007   }
2008 
2009   /// The location of the currently-active \#pragma clang
2010   /// assume_nonnull begin.
2011   ///
2012   /// Returns an invalid location if there is no such pragma active.
getPragmaAssumeNonNullLoc()2013   SourceLocation getPragmaAssumeNonNullLoc() const {
2014     return PragmaAssumeNonNullLoc;
2015   }
2016 
2017   /// Set the location of the currently-active \#pragma clang
2018   /// assume_nonnull begin.  An invalid location ends the pragma.
setPragmaAssumeNonNullLoc(SourceLocation Loc)2019   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
2020     PragmaAssumeNonNullLoc = Loc;
2021   }
2022 
2023   /// Get the location of the recorded unterminated \#pragma clang
2024   /// assume_nonnull begin in the preamble, if one exists.
2025   ///
2026   /// Returns an invalid location if the premable did not end with
2027   /// such a pragma active or if there is no recorded preamble.
getPreambleRecordedPragmaAssumeNonNullLoc()2028   SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
2029     return PreambleRecordedPragmaAssumeNonNullLoc;
2030   }
2031 
2032   /// Record the location of the unterminated \#pragma clang
2033   /// assume_nonnull begin in the preamble.
setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)2034   void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
2035     PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2036   }
2037 
2038   /// Set the directory in which the main file should be considered
2039   /// to have been found, if it is not a real file.
setMainFileDir(DirectoryEntryRef Dir)2040   void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2041 
2042   /// Instruct the preprocessor to skip part of the main source file.
2043   ///
2044   /// \param Bytes The number of bytes in the preamble to skip.
2045   ///
2046   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2047   /// start of a line.
setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)2048   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2049     SkipMainFilePreamble.first = Bytes;
2050     SkipMainFilePreamble.second = StartOfLine;
2051   }
2052 
2053   /// Forwarding function for diagnostics.  This emits a diagnostic at
2054   /// the specified Token's location, translating the token's start
2055   /// position in the current buffer into a SourcePosition object for rendering.
Diag(SourceLocation Loc,unsigned DiagID)2056   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2057     return Diags->Report(Loc, DiagID);
2058   }
2059 
Diag(const Token & Tok,unsigned DiagID)2060   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2061     return Diags->Report(Tok.getLocation(), DiagID);
2062   }
2063 
2064   /// Return the 'spelling' of the token at the given
2065   /// location; does not go up to the spelling location or down to the
2066   /// expansion location.
2067   ///
2068   /// \param buffer A buffer which will be used only if the token requires
2069   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
2070   /// \param invalid If non-null, will be set \c true if an error occurs.
2071   StringRef getSpelling(SourceLocation loc,
2072                         SmallVectorImpl<char> &buffer,
2073                         bool *invalid = nullptr) const {
2074     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2075   }
2076 
2077   /// Return the 'spelling' of the Tok token.
2078   ///
2079   /// The spelling of a token is the characters used to represent the token in
2080   /// the source file after trigraph expansion and escaped-newline folding.  In
2081   /// particular, this wants to get the true, uncanonicalized, spelling of
2082   /// things like digraphs, UCNs, etc.
2083   ///
2084   /// \param Invalid If non-null, will be set \c true if an error occurs.
2085   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2086     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2087   }
2088 
2089   /// Get the spelling of a token into a preallocated buffer, instead
2090   /// of as an std::string.
2091   ///
2092   /// The caller is required to allocate enough space for the token, which is
2093   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2094   /// actual result is returned.
2095   ///
2096   /// Note that this method may do two possible things: it may either fill in
2097   /// the buffer specified with characters, or it may *change the input pointer*
2098   /// to point to a constant buffer with the data already in it (avoiding a
2099   /// copy).  The caller is not allowed to modify the returned buffer pointer
2100   /// if an internal buffer is returned.
2101   unsigned getSpelling(const Token &Tok, const char *&Buffer,
2102                        bool *Invalid = nullptr) const {
2103     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2104   }
2105 
2106   /// Get the spelling of a token into a SmallVector.
2107   ///
2108   /// Note that the returned StringRef may not point to the
2109   /// supplied buffer if a copy can be avoided.
2110   StringRef getSpelling(const Token &Tok,
2111                         SmallVectorImpl<char> &Buffer,
2112                         bool *Invalid = nullptr) const;
2113 
2114   /// Relex the token at the specified location.
2115   /// \returns true if there was a failure, false on success.
2116   bool getRawToken(SourceLocation Loc, Token &Result,
2117                    bool IgnoreWhiteSpace = false) {
2118     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2119   }
2120 
2121   /// Given a Token \p Tok that is a numeric constant with length 1,
2122   /// return the character.
2123   char
2124   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
2125                                               bool *Invalid = nullptr) const {
2126     assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2127            Tok.getLength() == 1 && "Called on unsupported token");
2128     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2129 
2130     // If the token is carrying a literal data pointer, just use it.
2131     if (const char *D = Tok.getLiteralData())
2132       return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2133 
2134     assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2135     // Otherwise, fall back on getCharacterData, which is slower, but always
2136     // works.
2137     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2138   }
2139 
2140   /// Retrieve the name of the immediate macro expansion.
2141   ///
2142   /// This routine starts from a source location, and finds the name of the
2143   /// macro responsible for its immediate expansion. It looks through any
2144   /// intervening macro argument expansions to compute this. It returns a
2145   /// StringRef that refers to the SourceManager-owned buffer of the source
2146   /// where that macro name is spelled. Thus, the result shouldn't out-live
2147   /// the SourceManager.
getImmediateMacroName(SourceLocation Loc)2148   StringRef getImmediateMacroName(SourceLocation Loc) {
2149     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2150   }
2151 
2152   /// Plop the specified string into a scratch buffer and set the
2153   /// specified token's location and length to it.
2154   ///
2155   /// If specified, the source location provides a location of the expansion
2156   /// point of the token.
2157   void CreateString(StringRef Str, Token &Tok,
2158                     SourceLocation ExpansionLocStart = SourceLocation(),
2159                     SourceLocation ExpansionLocEnd = SourceLocation());
2160 
2161   /// Split the first Length characters out of the token starting at TokLoc
2162   /// and return a location pointing to the split token. Re-lexing from the
2163   /// split token will return the split token rather than the original.
2164   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2165 
2166   /// Computes the source location just past the end of the
2167   /// token at this source location.
2168   ///
2169   /// This routine can be used to produce a source location that
2170   /// points just past the end of the token referenced by \p Loc, and
2171   /// is generally used when a diagnostic needs to point just after a
2172   /// token where it expected something different that it received. If
2173   /// the returned source location would not be meaningful (e.g., if
2174   /// it points into a macro), this routine returns an invalid
2175   /// source location.
2176   ///
2177   /// \param Offset an offset from the end of the token, where the source
2178   /// location should refer to. The default offset (0) produces a source
2179   /// location pointing just past the end of the token; an offset of 1 produces
2180   /// a source location pointing to the last character in the token, etc.
2181   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2182     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2183   }
2184 
2185   /// Returns true if the given MacroID location points at the first
2186   /// token of the macro expansion.
2187   ///
2188   /// \param MacroBegin If non-null and function returns true, it is set to
2189   /// begin location of the macro.
2190   bool isAtStartOfMacroExpansion(SourceLocation loc,
2191                                  SourceLocation *MacroBegin = nullptr) const {
2192     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2193                                             MacroBegin);
2194   }
2195 
2196   /// Returns true if the given MacroID location points at the last
2197   /// token of the macro expansion.
2198   ///
2199   /// \param MacroEnd If non-null and function returns true, it is set to
2200   /// end location of the macro.
2201   bool isAtEndOfMacroExpansion(SourceLocation loc,
2202                                SourceLocation *MacroEnd = nullptr) const {
2203     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2204   }
2205 
2206   /// Print the token to stderr, used for debugging.
2207   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2208   void DumpLocation(SourceLocation Loc) const;
2209   void DumpMacro(const MacroInfo &MI) const;
2210   void dumpMacroInfo(const IdentifierInfo *II);
2211 
2212   /// Given a location that specifies the start of a
2213   /// token, return a new location that specifies a character within the token.
AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)2214   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2215                                          unsigned Char) const {
2216     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2217   }
2218 
2219   /// Increment the counters for the number of token paste operations
2220   /// performed.
2221   ///
2222   /// If fast was specified, this is a 'fast paste' case we handled.
IncrementPasteCounter(bool isFast)2223   void IncrementPasteCounter(bool isFast) {
2224     if (isFast)
2225       ++NumFastTokenPaste;
2226     else
2227       ++NumTokenPaste;
2228   }
2229 
2230   void PrintStats();
2231 
2232   size_t getTotalMemory() const;
2233 
2234   /// When the macro expander pastes together a comment (/##/) in Microsoft
2235   /// mode, this method handles updating the current state, returning the
2236   /// token on the next source line.
2237   void HandleMicrosoftCommentPaste(Token &Tok);
2238 
2239   //===--------------------------------------------------------------------===//
2240   // Preprocessor callback methods.  These are invoked by a lexer as various
2241   // directives and events are found.
2242 
2243   /// Given a tok::raw_identifier token, look up the
2244   /// identifier information for the token and install it into the token,
2245   /// updating the token kind accordingly.
2246   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2247 
2248 private:
2249   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2250 
2251 public:
2252   /// Specifies the reason for poisoning an identifier.
2253   ///
2254   /// If that identifier is accessed while poisoned, then this reason will be
2255   /// used instead of the default "poisoned" diagnostic.
2256   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2257 
2258   /// Display reason for poisoned identifier.
2259   void HandlePoisonedIdentifier(Token & Identifier);
2260 
MaybeHandlePoisonedIdentifier(Token & Identifier)2261   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2262     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2263       if(II->isPoisoned()) {
2264         HandlePoisonedIdentifier(Identifier);
2265       }
2266     }
2267   }
2268 
2269 private:
2270   /// Identifiers used for SEH handling in Borland. These are only
2271   /// allowed in particular circumstances
2272   // __except block
2273   IdentifierInfo *Ident__exception_code,
2274                  *Ident___exception_code,
2275                  *Ident_GetExceptionCode;
2276   // __except filter expression
2277   IdentifierInfo *Ident__exception_info,
2278                  *Ident___exception_info,
2279                  *Ident_GetExceptionInfo;
2280   // __finally
2281   IdentifierInfo *Ident__abnormal_termination,
2282                  *Ident___abnormal_termination,
2283                  *Ident_AbnormalTermination;
2284 
2285   const char *getCurLexerEndPos();
2286   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2287 
2288 public:
2289   void PoisonSEHIdentifiers(bool Poison = true); // Borland
2290 
2291   /// Callback invoked when the lexer reads an identifier and has
2292   /// filled in the tokens IdentifierInfo member.
2293   ///
2294   /// This callback potentially macro expands it or turns it into a named
2295   /// token (like 'for').
2296   ///
2297   /// \returns true if we actually computed a token, false if we need to
2298   /// lex again.
2299   bool HandleIdentifier(Token &Identifier);
2300 
2301   /// Callback invoked when the lexer hits the end of the current file.
2302   ///
2303   /// This either returns the EOF token and returns true, or
2304   /// pops a level off the include stack and returns false, at which point the
2305   /// client should call lex again.
2306   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2307 
2308   /// Callback invoked when the current TokenLexer hits the end of its
2309   /// token stream.
2310   bool HandleEndOfTokenLexer(Token &Result);
2311 
2312   /// Callback invoked when the lexer sees a # token at the start of a
2313   /// line.
2314   ///
2315   /// This consumes the directive, modifies the lexer/preprocessor state, and
2316   /// advances the lexer(s) so that the next token read is the correct one.
2317   void HandleDirective(Token &Result);
2318 
2319   /// Ensure that the next token is a tok::eod token.
2320   ///
2321   /// If not, emit a diagnostic and consume up until the eod.
2322   /// If \p EnableMacros is true, then we consider macros that expand to zero
2323   /// tokens as being ok.
2324   ///
2325   /// \return The location of the end of the directive (the terminating
2326   /// newline).
2327   SourceLocation CheckEndOfDirective(const char *DirType,
2328                                      bool EnableMacros = false);
2329 
2330   /// Read and discard all tokens remaining on the current line until
2331   /// the tok::eod token is found. Returns the range of the skipped tokens.
DiscardUntilEndOfDirective()2332   SourceRange DiscardUntilEndOfDirective() {
2333     Token Tmp;
2334     return DiscardUntilEndOfDirective(Tmp);
2335   }
2336 
2337   /// Same as above except retains the token that was found.
2338   SourceRange DiscardUntilEndOfDirective(Token &Tok);
2339 
2340   /// Returns true if the preprocessor has seen a use of
2341   /// __DATE__ or __TIME__ in the file so far.
SawDateOrTime()2342   bool SawDateOrTime() const {
2343     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2344   }
getCounterValue()2345   unsigned getCounterValue() const { return CounterValue; }
setCounterValue(unsigned V)2346   void setCounterValue(unsigned V) { CounterValue = V; }
2347 
getCurrentFPEvalMethod()2348   LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2349     assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2350            "FPEvalMethod should be set either from command line or from the "
2351            "target info");
2352     return CurrentFPEvalMethod;
2353   }
2354 
getTUFPEvalMethod()2355   LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2356     return TUFPEvalMethod;
2357   }
2358 
getLastFPEvalPragmaLocation()2359   SourceLocation getLastFPEvalPragmaLocation() const {
2360     return LastFPEvalPragmaLocation;
2361   }
2362 
setCurrentFPEvalMethod(SourceLocation PragmaLoc,LangOptions::FPEvalMethodKind Val)2363   void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2364                               LangOptions::FPEvalMethodKind Val) {
2365     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2366            "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2367     // This is the location of the '#pragma float_control" where the
2368     // execution state is modifed.
2369     LastFPEvalPragmaLocation = PragmaLoc;
2370     CurrentFPEvalMethod = Val;
2371     TUFPEvalMethod = Val;
2372   }
2373 
setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)2374   void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2375     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2376            "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2377     TUFPEvalMethod = Val;
2378   }
2379 
2380   /// Retrieves the module that we're currently building, if any.
2381   Module *getCurrentModule();
2382 
2383   /// Retrieves the module whose implementation we're current compiling, if any.
2384   Module *getCurrentModuleImplementation();
2385 
2386   /// If we are preprocessing a named module.
isInNamedModule()2387   bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2388 
2389   /// If we are proprocessing a named interface unit.
2390   /// Note that a module implementation partition is not considered as an
2391   /// named interface unit here although it is importable
2392   /// to ease the parsing.
isInNamedInterfaceUnit()2393   bool isInNamedInterfaceUnit() const {
2394     return ModuleDeclState.isNamedInterface();
2395   }
2396 
2397   /// Get the named module name we're preprocessing.
2398   /// Requires we're preprocessing a named module.
getNamedModuleName()2399   StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2400 
2401   /// If we are implementing an implementation module unit.
2402   /// Note that the module implementation partition is not considered as an
2403   /// implementation unit.
isInImplementationUnit()2404   bool isInImplementationUnit() const {
2405     return ModuleDeclState.isImplementationUnit();
2406   }
2407 
2408   /// If we're importing a standard C++20 Named Modules.
isInImportingCXXNamedModules()2409   bool isInImportingCXXNamedModules() const {
2410     // NamedModuleImportPath will be non-empty only if we're importing
2411     // Standard C++ named modules.
2412     return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2413            !IsAtImport;
2414   }
2415 
2416   /// Allocate a new MacroInfo object with the provided SourceLocation.
2417   MacroInfo *AllocateMacroInfo(SourceLocation L);
2418 
2419   /// Turn the specified lexer token into a fully checked and spelled
2420   /// filename, e.g. as an operand of \#include.
2421   ///
2422   /// The caller is expected to provide a buffer that is large enough to hold
2423   /// the spelling of the filename, but is also expected to handle the case
2424   /// when this method decides to use a different buffer.
2425   ///
2426   /// \returns true if the input filename was in <>'s or false if it was
2427   /// in ""'s.
2428   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2429 
2430   /// Given a "foo" or \<foo> reference, look up the indicated file.
2431   ///
2432   /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
2433   /// reference is for system \#include's or not (i.e. using <> instead of "").
2434   OptionalFileEntryRef
2435   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2436              ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2437              ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2438              SmallVectorImpl<char> *RelativePath,
2439              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2440              bool *IsFrameworkFound, bool SkipCache = false,
2441              bool OpenFile = true, bool CacheFailures = true);
2442 
2443   /// Given a "Filename" or \<Filename> reference, look up the indicated embed
2444   /// resource. \p isAngled indicates whether the file reference is for
2445   /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2446   /// is true, the file looked up is opened for reading, otherwise it only
2447   /// validates that the file exists. Quoted filenames are looked up relative
2448   /// to \p LookupFromFile if it is nonnull.
2449   ///
2450   /// Returns std::nullopt on failure.
2451   OptionalFileEntryRef
2452   LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
2453                   const FileEntry *LookupFromFile = nullptr);
2454 
2455   /// Return true if we're in the top-level file, not in a \#include.
2456   bool isInPrimaryFile() const;
2457 
2458   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2459   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2460   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2461 
2462   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2463                       bool *ShadowFlag = nullptr);
2464 
2465   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2466   Module *LeaveSubmodule(bool ForPragma);
2467 
2468 private:
2469   friend void TokenLexer::ExpandFunctionArguments();
2470 
PushIncludeMacroStack()2471   void PushIncludeMacroStack() {
2472     assert(CurLexerCallback != CLK_CachingLexer &&
2473            "cannot push a caching lexer");
2474     IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2475                                    std::move(CurLexer), CurPPLexer,
2476                                    std::move(CurTokenLexer), CurDirLookup);
2477     CurPPLexer = nullptr;
2478   }
2479 
PopIncludeMacroStack()2480   void PopIncludeMacroStack() {
2481     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2482     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2483     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2484     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2485     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2486     CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2487     IncludeMacroStack.pop_back();
2488   }
2489 
2490   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2491 
2492   /// Determine whether we need to create module macros for #defines in the
2493   /// current context.
2494   bool needModuleMacros() const;
2495 
2496   /// Update the set of active module macros and ambiguity flag for a module
2497   /// macro name.
2498   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2499 
2500   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2501                                                SourceLocation Loc);
2502   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2503   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2504                                                              bool isPublic);
2505 
2506   /// Lex and validate a macro name, which occurs after a
2507   /// \#define or \#undef.
2508   ///
2509   /// \param MacroNameTok Token that represents the name defined or undefined.
2510   /// \param IsDefineUndef Kind if preprocessor directive.
2511   /// \param ShadowFlag Points to flag that is set if macro name shadows
2512   ///                   a keyword.
2513   ///
2514   /// This emits a diagnostic, sets the token kind to eod,
2515   /// and discards the rest of the macro line if the macro name is invalid.
2516   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2517                      bool *ShadowFlag = nullptr);
2518 
2519   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2520   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2521   /// doing so performs certain validity checks including (but not limited to):
2522   ///   - # (stringization) is followed by a macro parameter
2523   /// \param MacroNameTok - Token that represents the macro name
2524   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2525   ///
2526   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2527   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2528   MacroInfo *ReadOptionalMacroParameterListAndBody(
2529       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2530 
2531   /// The ( starting an argument list of a macro definition has just been read.
2532   /// Lex the rest of the parameters and the closing ), updating \p MI with
2533   /// what we learn and saving in \p LastTok the last token read.
2534   /// Return true if an error occurs parsing the arg list.
2535   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2536 
2537   /// Provide a suggestion for a typoed directive. If there is no typo, then
2538   /// just skip suggesting.
2539   ///
2540   /// \param Tok - Token that represents the directive
2541   /// \param Directive - String reference for the directive name
2542   void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2543 
2544   /// We just read a \#if or related directive and decided that the
2545   /// subsequent tokens are in the \#if'd out portion of the
2546   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2547   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2548   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2549   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2550   /// already seen one so a \#else directive is a duplicate.  When this returns,
2551   /// the caller can lex the first valid token.
2552   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2553                                     SourceLocation IfTokenLoc,
2554                                     bool FoundNonSkipPortion, bool FoundElse,
2555                                     SourceLocation ElseLoc = SourceLocation());
2556 
2557   /// Information about the result for evaluating an expression for a
2558   /// preprocessor directive.
2559   struct DirectiveEvalResult {
2560     /// The integral value of the expression.
2561     std::optional<llvm::APSInt> Value;
2562 
2563     /// Whether the expression was evaluated as true or not.
2564     bool Conditional;
2565 
2566     /// True if the expression contained identifiers that were undefined.
2567     bool IncludedUndefinedIds;
2568 
2569     /// The source range for the expression.
2570     SourceRange ExprRange;
2571   };
2572 
2573   /// Evaluate an integer constant expression that may occur after a
2574   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2575   ///
2576   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2577   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2578                                                   bool CheckForEoD = true);
2579 
2580   /// Evaluate an integer constant expression that may occur after a
2581   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2582   ///
2583   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2584   /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2585   /// in the evaluated expression or not.
2586   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2587                                                   Token &Tok,
2588                                                   bool &EvaluatedDefined,
2589                                                   bool CheckForEoD = true);
2590 
2591   /// Process a '__has_embed("path" [, ...])' expression.
2592   ///
2593   /// Returns predefined `__STDC_EMBED_*` macro values if
2594   /// successful.
2595   EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2596 
2597   /// Process a '__has_include("path")' expression.
2598   ///
2599   /// Returns true if successful.
2600   bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2601 
2602   /// Process '__has_include_next("path")' expression.
2603   ///
2604   /// Returns true if successful.
2605   bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2606 
2607   /// Get the directory and file from which to start \#include_next lookup.
2608   std::pair<ConstSearchDirIterator, const FileEntry *>
2609   getIncludeNextStart(const Token &IncludeNextTok) const;
2610 
2611   /// Install the standard preprocessor pragmas:
2612   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2613   void RegisterBuiltinPragmas();
2614 
2615   /// Register builtin macros such as __LINE__ with the identifier table.
2616   void RegisterBuiltinMacros();
2617 
2618   /// If an identifier token is read that is to be expanded as a macro, handle
2619   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2620   /// otherwise the caller should lex again.
2621   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2622 
2623   /// Cache macro expanded tokens for TokenLexers.
2624   //
2625   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2626   /// going to lex in the cache and when it finishes the tokens are removed
2627   /// from the end of the cache.
2628   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2629                                   ArrayRef<Token> tokens);
2630 
2631   void removeCachedMacroExpandedTokensOfLastLexer();
2632 
2633   /// Determine whether the next preprocessor token to be
2634   /// lexed is a '('.  If so, consume the token and return true, if not, this
2635   /// method should have no observable side-effect on the lexed tokens.
2636   bool isNextPPTokenLParen();
2637 
2638   /// After reading "MACRO(", this method is invoked to read all of the formal
2639   /// arguments specified for the macro invocation.  Returns null on error.
2640   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2641                                        SourceLocation &MacroEnd);
2642 
2643   /// If an identifier token is read that is to be expanded
2644   /// as a builtin macro, handle it and return the next token as 'Tok'.
2645   void ExpandBuiltinMacro(Token &Tok);
2646 
2647   /// Read a \c _Pragma directive, slice it up, process it, then
2648   /// return the first token after the directive.
2649   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2650   void Handle_Pragma(Token &Tok);
2651 
2652   /// Like Handle_Pragma except the pragma text is not enclosed within
2653   /// a string literal.
2654   void HandleMicrosoft__pragma(Token &Tok);
2655 
2656   /// Add a lexer to the top of the include stack and
2657   /// start lexing tokens from it instead of the current buffer.
2658   void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2659 
2660   /// Set the FileID for the preprocessor predefines.
setPredefinesFileID(FileID FID)2661   void setPredefinesFileID(FileID FID) {
2662     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2663     PredefinesFileID = FID;
2664   }
2665 
2666   /// Set the FileID for the PCH through header.
2667   void setPCHThroughHeaderFileID(FileID FID);
2668 
2669   /// Returns true if we are lexing from a file and not a
2670   /// pragma or a macro.
IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2671   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2672     return L ? !L->isPragmaLexer() : P != nullptr;
2673   }
2674 
IsFileLexer(const IncludeStackInfo & I)2675   static bool IsFileLexer(const IncludeStackInfo& I) {
2676     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2677   }
2678 
IsFileLexer()2679   bool IsFileLexer() const {
2680     return IsFileLexer(CurLexer.get(), CurPPLexer);
2681   }
2682 
2683   //===--------------------------------------------------------------------===//
2684   // Caching stuff.
2685   void CachingLex(Token &Result);
2686 
InCachingLexMode()2687   bool InCachingLexMode() const {
2688     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2689     // that we are past EOF, not that we are in CachingLex mode.
2690     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2691   }
2692 
2693   void EnterCachingLexMode();
2694   void EnterCachingLexModeUnchecked();
2695 
ExitCachingLexMode()2696   void ExitCachingLexMode() {
2697     if (InCachingLexMode())
2698       RemoveTopOfLexerStack();
2699   }
2700 
2701   const Token &PeekAhead(unsigned N);
2702   void AnnotatePreviousCachedTokens(const Token &Tok);
2703 
2704   //===--------------------------------------------------------------------===//
2705   /// Handle*Directive - implement the various preprocessor directives.  These
2706   /// should side-effect the current preprocessor object so that the next call
2707   /// to Lex() will return the appropriate token next.
2708   void HandleLineDirective();
2709   void HandleDigitDirective(Token &Tok);
2710   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2711   void HandleIdentSCCSDirective(Token &Tok);
2712   void HandleMacroPublicDirective(Token &Tok);
2713   void HandleMacroPrivateDirective();
2714 
2715   /// An additional notification that can be produced by a header inclusion or
2716   /// import to tell the parser what happened.
2717   struct ImportAction {
2718     enum ActionKind {
2719       None,
2720       ModuleBegin,
2721       ModuleImport,
2722       HeaderUnitImport,
2723       SkippedModuleImport,
2724       Failure,
2725     } Kind;
2726     Module *ModuleForHeader = nullptr;
2727 
2728     ImportAction(ActionKind AK, Module *Mod = nullptr)
KindImportAction2729         : Kind(AK), ModuleForHeader(Mod) {
2730       assert((AK == None || Mod || AK == Failure) &&
2731              "no module for module action");
2732     }
2733   };
2734 
2735   OptionalFileEntryRef LookupHeaderIncludeOrImport(
2736       ConstSearchDirIterator *CurDir, StringRef &Filename,
2737       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2738       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2739       bool &IsMapped, ConstSearchDirIterator LookupFrom,
2740       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2741       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2742       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2743   // Binary data inclusion
2744   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
2745                             const FileEntry *LookupFromFile = nullptr);
2746   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2747                                 const LexEmbedParametersResult &Params,
2748                                 StringRef BinaryContents);
2749 
2750   // File inclusion.
2751   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2752                               ConstSearchDirIterator LookupFrom = nullptr,
2753                               const FileEntry *LookupFromFile = nullptr);
2754   ImportAction
2755   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2756                               Token &FilenameTok, SourceLocation EndLoc,
2757                               ConstSearchDirIterator LookupFrom = nullptr,
2758                               const FileEntry *LookupFromFile = nullptr);
2759   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2760   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2761   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2762   void HandleMicrosoftImportDirective(Token &Tok);
2763 
2764 public:
2765   /// Check that the given module is available, producing a diagnostic if not.
2766   /// \return \c true if the check failed (because the module is not available).
2767   ///         \c false if the module appears to be usable.
2768   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2769                                      const TargetInfo &TargetInfo,
2770                                      const Module &M, DiagnosticsEngine &Diags);
2771 
2772   // Module inclusion testing.
2773   /// Find the module that owns the source or header file that
2774   /// \p Loc points to. If the location is in a file that was included
2775   /// into a module, or is outside any module, returns nullptr.
2776   Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2777 
2778   /// We want to produce a diagnostic at location IncLoc concerning an
2779   /// unreachable effect at location MLoc (eg, where a desired entity was
2780   /// declared or defined). Determine whether the right way to make MLoc
2781   /// reachable is by #include, and if so, what header should be included.
2782   ///
2783   /// This is not necessarily fast, and might load unexpected module maps, so
2784   /// should only be called by code that intends to produce an error.
2785   ///
2786   /// \param IncLoc The location at which the missing effect was detected.
2787   /// \param MLoc A location within an unimported module at which the desired
2788   ///        effect occurred.
2789   /// \return A file that can be #included to provide the desired effect. Null
2790   ///         if no such file could be determined or if a #include is not
2791   ///         appropriate (eg, if a module should be imported instead).
2792   OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2793                                                         SourceLocation MLoc);
2794 
isRecordingPreamble()2795   bool isRecordingPreamble() const {
2796     return PreambleConditionalStack.isRecording();
2797   }
2798 
hasRecordedPreamble()2799   bool hasRecordedPreamble() const {
2800     return PreambleConditionalStack.hasRecordedPreamble();
2801   }
2802 
getPreambleConditionalStack()2803   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2804       return PreambleConditionalStack.getStack();
2805   }
2806 
setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2807   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2808     PreambleConditionalStack.setStack(s);
2809   }
2810 
setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,std::optional<PreambleSkipInfo> SkipInfo)2811   void setReplayablePreambleConditionalStack(
2812       ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2813     PreambleConditionalStack.startReplaying();
2814     PreambleConditionalStack.setStack(s);
2815     PreambleConditionalStack.SkipInfo = SkipInfo;
2816   }
2817 
getPreambleSkipInfo()2818   std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2819     return PreambleConditionalStack.SkipInfo;
2820   }
2821 
2822 private:
2823   /// After processing predefined file, initialize the conditional stack from
2824   /// the preamble.
2825   void replayPreambleConditionalStack();
2826 
2827   // Macro handling.
2828   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2829   void HandleUndefDirective();
2830 
2831   // Conditional Inclusion.
2832   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2833                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2834   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2835                          bool ReadAnyTokensBeforeDirective);
2836   void HandleEndifDirective(Token &EndifToken);
2837   void HandleElseDirective(Token &Result, const Token &HashToken);
2838   void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2839                                  tok::PPKeywordKind Kind);
2840 
2841   // Pragmas.
2842   void HandlePragmaDirective(PragmaIntroducer Introducer);
2843 
2844 public:
2845   void HandlePragmaOnce(Token &OnceTok);
2846   void HandlePragmaMark(Token &MarkTok);
2847   void HandlePragmaPoison();
2848   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2849   void HandlePragmaDependency(Token &DependencyTok);
2850   void HandlePragmaPushMacro(Token &Tok);
2851   void HandlePragmaPopMacro(Token &Tok);
2852   void HandlePragmaIncludeAlias(Token &Tok);
2853   void HandlePragmaModuleBuild(Token &Tok);
2854   void HandlePragmaHdrstop(Token &Tok);
2855   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2856 
2857   // Return true and store the first token only if any CommentHandler
2858   // has inserted some tokens and getCommentRetentionState() is false.
2859   bool HandleComment(Token &result, SourceRange Comment);
2860 
2861   /// A macro is used, update information about macros that need unused
2862   /// warnings.
2863   void markMacroAsUsed(MacroInfo *MI);
2864 
addMacroDeprecationMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2865   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2866                               SourceLocation AnnotationLoc) {
2867     auto Annotations = AnnotationInfos.find(II);
2868     if (Annotations == AnnotationInfos.end())
2869       AnnotationInfos.insert(std::make_pair(
2870           II,
2871           MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg))));
2872     else
2873       Annotations->second.DeprecationInfo =
2874           MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2875   }
2876 
addRestrictExpansionMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2877   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2878                                SourceLocation AnnotationLoc) {
2879     auto Annotations = AnnotationInfos.find(II);
2880     if (Annotations == AnnotationInfos.end())
2881       AnnotationInfos.insert(
2882           std::make_pair(II, MacroAnnotations::makeRestrictExpansion(
2883                                  AnnotationLoc, std::move(Msg))));
2884     else
2885       Annotations->second.RestrictExpansionInfo =
2886           MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2887   }
2888 
addFinalLoc(const IdentifierInfo * II,SourceLocation AnnotationLoc)2889   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2890     auto Annotations = AnnotationInfos.find(II);
2891     if (Annotations == AnnotationInfos.end())
2892       AnnotationInfos.insert(
2893           std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc)));
2894     else
2895       Annotations->second.FinalAnnotationLoc = AnnotationLoc;
2896   }
2897 
getMacroAnnotations(const IdentifierInfo * II)2898   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2899     return AnnotationInfos.find(II)->second;
2900   }
2901 
2902   void emitMacroExpansionWarnings(const Token &Identifier,
2903                                   bool IsIfnDef = false) const {
2904     IdentifierInfo *Info = Identifier.getIdentifierInfo();
2905     if (Info->isDeprecatedMacro())
2906       emitMacroDeprecationWarning(Identifier);
2907 
2908     if (Info->isRestrictExpansion() &&
2909         !SourceMgr.isInMainFile(Identifier.getLocation()))
2910       emitRestrictExpansionWarning(Identifier);
2911 
2912     if (!IsIfnDef) {
2913       if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
2914         emitRestrictInfNaNWarning(Identifier, 0);
2915       if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
2916         emitRestrictInfNaNWarning(Identifier, 1);
2917     }
2918   }
2919 
2920   static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2921                                       const LangOptions &LangOpts,
2922                                       const TargetInfo &TI);
2923 
2924   static void processPathToFileName(SmallVectorImpl<char> &FileName,
2925                                     const PresumedLoc &PLoc,
2926                                     const LangOptions &LangOpts,
2927                                     const TargetInfo &TI);
2928 
2929 private:
2930   void emitMacroDeprecationWarning(const Token &Identifier) const;
2931   void emitRestrictExpansionWarning(const Token &Identifier) const;
2932   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2933   void emitRestrictInfNaNWarning(const Token &Identifier,
2934                                  unsigned DiagSelection) const;
2935 
2936   /// This boolean state keeps track if the current scanned token (by this PP)
2937   /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
2938   /// translation unit in a linear order.
2939   bool InSafeBufferOptOutRegion = false;
2940 
2941   /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
2942   /// region if PP is currently in such a region.  Hold undefined value
2943   /// otherwise.
2944   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
2945 
2946   using SafeBufferOptOutRegionsTy =
2947       SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
2948   // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
2949   // translation unit. Each region is represented by a pair of start and
2950   // end locations.
2951   SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
2952 
2953   // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs.  We use the
2954   // following structure to manage them by their ASTs.
2955   struct {
2956     // A map from unique IDs to region maps of loaded ASTs.  The ID identifies a
2957     // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
2958     llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
2959 
2960     // Returns a reference to the safe buffer opt-out regions of the loaded
2961     // AST where `Loc` belongs to. (Construct if absent)
2962     SafeBufferOptOutRegionsTy &
findAndConsLoadedOptOutMap__anon4672ff5f03082963     findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
2964       return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
2965     }
2966 
2967     // Returns a reference to the safe buffer opt-out regions of the loaded
2968     // AST where `Loc` belongs to. (This const function returns nullptr if
2969     // absent.)
2970     const SafeBufferOptOutRegionsTy *
lookupLoadedOptOutMap__anon4672ff5f03082971     lookupLoadedOptOutMap(SourceLocation Loc,
2972                           const SourceManager &SrcMgr) const {
2973       FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
2974       auto Iter = LoadedRegions.find(FID);
2975 
2976       if (Iter == LoadedRegions.end())
2977         return nullptr;
2978       return &Iter->getSecond();
2979     }
2980   } LoadedSafeBufferOptOutMap;
2981 
2982 public:
2983   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
2984   /// region.  This `Loc` must be a source location that has been pre-processed.
2985   bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
2986 
2987   /// Alter the state of whether this PP currently is in a
2988   /// "-Wunsafe-buffer-usage" opt-out region.
2989   ///
2990   /// \param isEnter true if this PP is entering a region; otherwise, this PP
2991   /// is exiting a region
2992   /// \param Loc the location of the entry or exit of a
2993   /// region
2994   /// \return true iff it is INVALID to enter or exit a region, i.e.,
2995   /// attempt to enter a region before exiting a previous region, or exiting a
2996   /// region that PP is not currently in.
2997   bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
2998                                          const SourceLocation &Loc);
2999 
3000   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3001   ///          opt-out region
3002   bool isPPInSafeBufferOptOutRegion();
3003 
3004   /// \param StartLoc output argument. It will be set to the start location of
3005   /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3006   /// returns true.
3007   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3008   ///          opt-out region
3009   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3010 
3011   /// \return a sequence of SourceLocations representing ordered opt-out regions
3012   /// specified by
3013   /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3014   SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3015 
3016   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3017   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3018   /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3019   /// is same as itself before the call.
3020   bool setDeserializedSafeBufferOptOutMap(
3021       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3022 
3023 private:
3024   /// Helper functions to forward lexing to the actual lexer. They all share the
3025   /// same signature.
CLK_Lexer(Preprocessor & P,Token & Result)3026   static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3027     return P.CurLexer->Lex(Result);
3028   }
CLK_TokenLexer(Preprocessor & P,Token & Result)3029   static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3030     return P.CurTokenLexer->Lex(Result);
3031   }
CLK_CachingLexer(Preprocessor & P,Token & Result)3032   static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3033     P.CachingLex(Result);
3034     return true;
3035   }
CLK_DependencyDirectivesLexer(Preprocessor & P,Token & Result)3036   static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3037     return P.CurLexer->LexDependencyDirectiveToken(Result);
3038   }
CLK_LexAfterModuleImport(Preprocessor & P,Token & Result)3039   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
3040     return P.LexAfterModuleImport(Result);
3041   }
3042 };
3043 
3044 /// Abstract base class that describes a handler that will receive
3045 /// source ranges for each of the comments encountered in the source file.
3046 class CommentHandler {
3047 public:
3048   virtual ~CommentHandler();
3049 
3050   // The handler shall return true if it has pushed any tokens
3051   // to be read using e.g. EnterToken or EnterTokenStream.
3052   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3053 };
3054 
3055 /// Abstract base class that describes a handler that will receive
3056 /// source ranges for empty lines encountered in the source file.
3057 class EmptylineHandler {
3058 public:
3059   virtual ~EmptylineHandler();
3060 
3061   // The handler handles empty lines.
3062   virtual void HandleEmptyline(SourceRange Range) = 0;
3063 };
3064 
3065 /// Helper class to shuttle information about #embed directives from the
3066 /// preprocessor to the parser through an annotation token.
3067 struct EmbedAnnotationData {
3068   StringRef BinaryData;
3069 };
3070 
3071 /// Registry of pragma handlers added by plugins
3072 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3073 
3074 } // namespace clang
3075 
3076 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
3077