xref: /freebsd/contrib/llvm-project/clang/include/clang/Lex/Preprocessor.h (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/Module.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/HeaderSearch.h"
27 #include "clang/Lex/Lexer.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/PPEmbedParameters.h"
33 #include "clang/Lex/Token.h"
34 #include "clang/Lex/TokenLexer.h"
35 #include "clang/Support/Compiler.h"
36 #include "llvm/ADT/APSInt.h"
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/DenseMap.h"
39 #include "llvm/ADT/FoldingSet.h"
40 #include "llvm/ADT/FunctionExtras.h"
41 #include "llvm/ADT/PointerUnion.h"
42 #include "llvm/ADT/STLExtras.h"
43 #include "llvm/ADT/SmallPtrSet.h"
44 #include "llvm/ADT/SmallVector.h"
45 #include "llvm/ADT/StringRef.h"
46 #include "llvm/ADT/TinyPtrVector.h"
47 #include "llvm/ADT/iterator_range.h"
48 #include "llvm/Support/Allocator.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Registry.h"
51 #include <cassert>
52 #include <cstddef>
53 #include <cstdint>
54 #include <map>
55 #include <memory>
56 #include <optional>
57 #include <string>
58 #include <utility>
59 #include <vector>
60 
61 namespace llvm {
62 
63 template<unsigned InternalLen> class SmallString;
64 
65 } // namespace llvm
66 
67 namespace clang {
68 
69 class CodeCompletionHandler;
70 class CommentHandler;
71 class DirectoryEntry;
72 class EmptylineHandler;
73 class ExternalPreprocessorSource;
74 class FileEntry;
75 class FileManager;
76 class HeaderSearch;
77 class MacroArgs;
78 class PragmaHandler;
79 class PragmaNamespace;
80 class PreprocessingRecord;
81 class PreprocessorLexer;
82 class PreprocessorOptions;
83 class ScratchBuffer;
84 class TargetInfo;
85 class NoTrivialPPDirectiveTracer;
86 
87 namespace Builtin {
88 class Context;
89 }
90 
91 /// Stores token information for comparing actual tokens with
92 /// predefined values.  Only handles simple tokens and identifiers.
93 class TokenValue {
94   tok::TokenKind Kind;
95   IdentifierInfo *II;
96 
97 public:
TokenValue(tok::TokenKind Kind)98   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
99     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
100     assert(Kind != tok::identifier &&
101            "Identifiers should be created by TokenValue(IdentifierInfo *)");
102     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
103     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
104   }
105 
TokenValue(IdentifierInfo * II)106   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
107 
108   bool operator==(const Token &Tok) const {
109     return Tok.getKind() == Kind &&
110         (!II || II == Tok.getIdentifierInfo());
111   }
112 };
113 
114 /// Context in which macro name is used.
115 enum MacroUse {
116   // other than #define or #undef
117   MU_Other  = 0,
118 
119   // macro name specified in #define
120   MU_Define = 1,
121 
122   // macro name specified in #undef
123   MU_Undef  = 2
124 };
125 
126 enum class EmbedResult {
127   Invalid = -1, // Parsing error occurred.
128   NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
129   Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
130   Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
131 };
132 
133 struct CXXStandardLibraryVersionInfo {
134   enum Library { Unknown, LibStdCXX };
135   Library Lib;
136   std::uint64_t Version;
137 };
138 
139 /// Engages in a tight little dance with the lexer to efficiently
140 /// preprocess tokens.
141 ///
142 /// Lexers know only about tokens within a single source file, and don't
143 /// know anything about preprocessor-level issues like the \#include stack,
144 /// token expansion, etc.
145 class Preprocessor {
146   friend class VAOptDefinitionContext;
147   friend class VariadicMacroScopeGuard;
148 
149   llvm::unique_function<void(const clang::Token &)> OnToken;
150   /// Functor for getting the dependency preprocessor directives of a file.
151   ///
152   /// These are directives derived from a special form of lexing where the
153   /// source input is scanned for the preprocessor directives that might have an
154   /// effect on the dependencies for a compilation unit.
155   DependencyDirectivesGetter *GetDependencyDirectives = nullptr;
156   const PreprocessorOptions &PPOpts;
157   DiagnosticsEngine        *Diags;
158   const LangOptions &LangOpts;
159   const TargetInfo *Target = nullptr;
160   const TargetInfo *AuxTarget = nullptr;
161   FileManager       &FileMgr;
162   SourceManager     &SourceMgr;
163   std::unique_ptr<ScratchBuffer> ScratchBuf;
164   HeaderSearch      &HeaderInfo;
165   ModuleLoader      &TheModuleLoader;
166 
167   /// External source of macros.
168   ExternalPreprocessorSource *ExternalSource;
169 
170   /// A BumpPtrAllocator object used to quickly allocate and release
171   /// objects internal to the Preprocessor.
172   llvm::BumpPtrAllocator BP;
173 
174   /// Identifiers for builtin macros and other builtins.
175   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
176   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
177   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
178   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
179   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
180   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
181   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
182   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
183   IdentifierInfo *Ident__identifier;               // __identifier
184   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
185   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
186   IdentifierInfo *Ident__has_feature;              // __has_feature
187   IdentifierInfo *Ident__has_extension;            // __has_extension
188   IdentifierInfo *Ident__has_builtin;              // __has_builtin
189   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
190   IdentifierInfo *Ident__has_attribute;            // __has_attribute
191   IdentifierInfo *Ident__has_embed;                // __has_embed
192   IdentifierInfo *Ident__has_include;              // __has_include
193   IdentifierInfo *Ident__has_include_next;         // __has_include_next
194   IdentifierInfo *Ident__has_warning;              // __has_warning
195   IdentifierInfo *Ident__is_identifier;            // __is_identifier
196   IdentifierInfo *Ident__building_module;          // __building_module
197   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
198   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
199   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
200   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
201   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
202   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
203   IdentifierInfo *Ident__is_target_os;             // __is_target_os
204   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
205   IdentifierInfo *Ident__is_target_variant_os;
206   IdentifierInfo *Ident__is_target_variant_environment;
207   IdentifierInfo *Ident__FLT_EVAL_METHOD__;        // __FLT_EVAL_METHOD
208 
209   // Weak, only valid (and set) while InMacroArgs is true.
210   Token* ArgMacro;
211 
212   SourceLocation DATELoc, TIMELoc;
213 
214   // FEM_UnsetOnCommandLine means that an explicit evaluation method was
215   // not specified on the command line. The target is queried to set the
216   // default evaluation method.
217   LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
218       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
219 
220   // The most recent pragma location where the floating point evaluation
221   // method was modified. This is used to determine whether the
222   // 'pragma clang fp eval_method' was used whithin the current scope.
223   SourceLocation LastFPEvalPragmaLocation;
224 
225   LangOptions::FPEvalMethodKind TUFPEvalMethod =
226       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
227 
228   // Next __COUNTER__ value, starts at 0.
229   unsigned CounterValue = 0;
230 
231   enum {
232     /// Maximum depth of \#includes.
233     MaxAllowedIncludeStackDepth = 200
234   };
235 
236   // State that is set before the preprocessor begins.
237   bool KeepComments : 1;
238   bool KeepMacroComments : 1;
239   bool SuppressIncludeNotFoundError : 1;
240 
241   // State that changes while the preprocessor runs:
242   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
243 
244   /// Whether the preprocessor owns the header search object.
245   bool OwnsHeaderSearch : 1;
246 
247   /// True if macro expansion is disabled.
248   bool DisableMacroExpansion : 1;
249 
250   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
251   /// when parsing preprocessor directives.
252   bool MacroExpansionInDirectivesOverride : 1;
253 
254   class ResetMacroExpansionHelper;
255 
256   /// Whether we have already loaded macros from the external source.
257   mutable bool ReadMacrosFromExternalSource : 1;
258 
259   /// True if pragmas are enabled.
260   bool PragmasEnabled : 1;
261 
262   /// True if the current build action is a preprocessing action.
263   bool PreprocessedOutput : 1;
264 
265   /// True if we are currently preprocessing a #if or #elif directive
266   bool ParsingIfOrElifDirective;
267 
268   /// True if we are pre-expanding macro arguments.
269   bool InMacroArgPreExpansion;
270 
271   /// Mapping/lookup information for all identifiers in
272   /// the program, including program keywords.
273   mutable IdentifierTable Identifiers;
274 
275   /// This table contains all the selectors in the program.
276   ///
277   /// Unlike IdentifierTable above, this table *isn't* populated by the
278   /// preprocessor. It is declared/expanded here because its role/lifetime is
279   /// conceptually similar to the IdentifierTable. In addition, the current
280   /// control flow (in clang::ParseAST()), make it convenient to put here.
281   ///
282   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
283   /// the lifetime of the preprocessor.
284   SelectorTable Selectors;
285 
286   /// Information about builtins.
287   std::unique_ptr<Builtin::Context> BuiltinInfo;
288 
289   /// Tracks all of the pragmas that the client registered
290   /// with this preprocessor.
291   std::unique_ptr<PragmaNamespace> PragmaHandlers;
292 
293   /// Pragma handlers of the original source is stored here during the
294   /// parsing of a model file.
295   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
296 
297   /// Tracks all of the comment handlers that the client registered
298   /// with this preprocessor.
299   std::vector<CommentHandler *> CommentHandlers;
300 
301   /// Empty line handler.
302   EmptylineHandler *Emptyline = nullptr;
303 
304   /// True to avoid tearing down the lexer etc on EOF
305   bool IncrementalProcessing = false;
306 
307 public:
308   /// The kind of translation unit we are processing.
309   const TranslationUnitKind TUKind;
310 
311   /// Returns a pointer into the given file's buffer that's guaranteed
312   /// to be between tokens. The returned pointer is always before \p Start.
313   /// The maximum distance betweenthe returned pointer and \p Start is
314   /// limited by a constant value, but also an implementation detail.
315   /// If no such check point exists, \c nullptr is returned.
316   const char *getCheckPoint(FileID FID, const char *Start) const;
317 
318 private:
319   /// The code-completion handler.
320   CodeCompletionHandler *CodeComplete = nullptr;
321 
322   /// The file that we're performing code-completion for, if any.
323   const FileEntry *CodeCompletionFile = nullptr;
324 
325   /// The offset in file for the code-completion point.
326   unsigned CodeCompletionOffset = 0;
327 
328   /// The location for the code-completion point. This gets instantiated
329   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
330   SourceLocation CodeCompletionLoc;
331 
332   /// The start location for the file of the code-completion point.
333   ///
334   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
335   /// for preprocessing.
336   SourceLocation CodeCompletionFileLoc;
337 
338   /// The source location of the \c import contextual keyword we just
339   /// lexed, if any.
340   SourceLocation ModuleImportLoc;
341 
342   /// The import path for named module that we're currently processing.
343   SmallVector<IdentifierLoc, 2> NamedModuleImportPath;
344 
345   llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
346   unsigned CheckPointCounter = 0;
347 
348   /// Whether the import is an `@import` or a standard c++ modules import.
349   bool IsAtImport = false;
350 
351   /// Whether the last token we lexed was an '@'.
352   bool LastTokenWasAt = false;
353 
354   /// First pp-token source location in current translation unit.
355   SourceLocation FirstPPTokenLoc;
356 
357   /// A preprocessor directive tracer to trace whether the preprocessing
358   /// state changed. These changes would mean most semantically observable
359   /// preprocessor state, particularly anything that is order dependent.
360   NoTrivialPPDirectiveTracer *DirTracer = nullptr;
361 
362   /// A position within a C++20 import-seq.
363   class StdCXXImportSeq {
364   public:
365     enum State : int {
366       // Positive values represent a number of unclosed brackets.
367       AtTopLevel = 0,
368       AfterTopLevelTokenSeq = -1,
369       AfterExport = -2,
370       AfterImportSeq = -3,
371     };
372 
StdCXXImportSeq(State S)373     StdCXXImportSeq(State S) : S(S) {}
374 
375     /// Saw any kind of open bracket.
handleOpenBracket()376     void handleOpenBracket() {
377       S = static_cast<State>(std::max<int>(S, 0) + 1);
378     }
379     /// Saw any kind of close bracket other than '}'.
handleCloseBracket()380     void handleCloseBracket() {
381       S = static_cast<State>(std::max<int>(S, 1) - 1);
382     }
383     /// Saw a close brace.
handleCloseBrace()384     void handleCloseBrace() {
385       handleCloseBracket();
386       if (S == AtTopLevel && !AfterHeaderName)
387         S = AfterTopLevelTokenSeq;
388     }
389     /// Saw a semicolon.
handleSemi()390     void handleSemi() {
391       if (atTopLevel()) {
392         S = AfterTopLevelTokenSeq;
393         AfterHeaderName = false;
394       }
395     }
396 
397     /// Saw an 'export' identifier.
handleExport()398     void handleExport() {
399       if (S == AfterTopLevelTokenSeq)
400         S = AfterExport;
401       else if (S <= 0)
402         S = AtTopLevel;
403     }
404     /// Saw an 'import' identifier.
handleImport()405     void handleImport() {
406       if (S == AfterTopLevelTokenSeq || S == AfterExport)
407         S = AfterImportSeq;
408       else if (S <= 0)
409         S = AtTopLevel;
410     }
411 
412     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
413     /// until we reach a top-level semicolon.
handleHeaderName()414     void handleHeaderName() {
415       if (S == AfterImportSeq)
416         AfterHeaderName = true;
417       handleMisc();
418     }
419 
420     /// Saw any other token.
handleMisc()421     void handleMisc() {
422       if (S <= 0)
423         S = AtTopLevel;
424     }
425 
atTopLevel()426     bool atTopLevel() { return S <= 0; }
afterImportSeq()427     bool afterImportSeq() { return S == AfterImportSeq; }
afterTopLevelSeq()428     bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
429 
430   private:
431     State S;
432     /// Whether we're in the pp-import-suffix following the header-name in a
433     /// pp-import. If so, a close-brace is not sufficient to end the
434     /// top-level-token-seq of an import-seq.
435     bool AfterHeaderName = false;
436   };
437 
438   /// Our current position within a C++20 import-seq.
439   StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
440 
441   /// Track whether we are in a Global Module Fragment
442   class TrackGMF {
443   public:
444     enum GMFState : int {
445       GMFActive = 1,
446       MaybeGMF = 0,
447       BeforeGMFIntroducer = -1,
448       GMFAbsentOrEnded = -2,
449     };
450 
TrackGMF(GMFState S)451     TrackGMF(GMFState S) : S(S) {}
452 
453     /// Saw a semicolon.
handleSemi()454     void handleSemi() {
455       // If it is immediately after the first instance of the module keyword,
456       // then that introduces the GMF.
457       if (S == MaybeGMF)
458         S = GMFActive;
459     }
460 
461     /// Saw an 'export' identifier.
handleExport()462     void handleExport() {
463       // The presence of an 'export' keyword always ends or excludes a GMF.
464       S = GMFAbsentOrEnded;
465     }
466 
467     /// Saw an 'import' identifier.
handleImport(bool AfterTopLevelTokenSeq)468     void handleImport(bool AfterTopLevelTokenSeq) {
469       // If we see this before any 'module' kw, then we have no GMF.
470       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
471         S = GMFAbsentOrEnded;
472     }
473 
474     /// Saw a 'module' identifier.
handleModule(bool AfterTopLevelTokenSeq)475     void handleModule(bool AfterTopLevelTokenSeq) {
476       // This was the first module identifier and not preceded by any token
477       // that would exclude a GMF.  It could begin a GMF, but only if directly
478       // followed by a semicolon.
479       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
480         S = MaybeGMF;
481       else
482         S = GMFAbsentOrEnded;
483     }
484 
485     /// Saw any other token.
handleMisc()486     void handleMisc() {
487       // We saw something other than ; after the 'module' kw, so not a GMF.
488       if (S == MaybeGMF)
489         S = GMFAbsentOrEnded;
490     }
491 
inGMF()492     bool inGMF() { return S == GMFActive; }
493 
494   private:
495     /// Track the transitions into and out of a Global Module Fragment,
496     /// if one is present.
497     GMFState S;
498   };
499 
500   TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
501 
502   /// Track the status of the c++20 module decl.
503   ///
504   ///   module-declaration:
505   ///     'export'[opt] 'module' module-name module-partition[opt]
506   ///     attribute-specifier-seq[opt] ';'
507   ///
508   ///   module-name:
509   ///     module-name-qualifier[opt] identifier
510   ///
511   ///   module-partition:
512   ///     ':' module-name-qualifier[opt] identifier
513   ///
514   ///   module-name-qualifier:
515   ///     identifier '.'
516   ///     module-name-qualifier identifier '.'
517   ///
518   /// Transition state:
519   ///
520   ///   NotAModuleDecl --- export ---> FoundExport
521   ///   NotAModuleDecl --- module ---> ImplementationCandidate
522   ///   FoundExport --- module ---> InterfaceCandidate
523   ///   ImplementationCandidate --- Identifier ---> ImplementationCandidate
524   ///   ImplementationCandidate --- period ---> ImplementationCandidate
525   ///   ImplementationCandidate --- colon ---> ImplementationCandidate
526   ///   InterfaceCandidate --- Identifier ---> InterfaceCandidate
527   ///   InterfaceCandidate --- period ---> InterfaceCandidate
528   ///   InterfaceCandidate --- colon ---> InterfaceCandidate
529   ///   ImplementationCandidate --- Semi ---> NamedModuleImplementation
530   ///   NamedModuleInterface --- Semi ---> NamedModuleInterface
531   ///   NamedModuleImplementation --- Anything ---> NamedModuleImplementation
532   ///   NamedModuleInterface --- Anything ---> NamedModuleInterface
533   ///
534   /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
535   /// soon since we don't support any module attributes yet.
536   class ModuleDeclSeq {
537     enum ModuleDeclState : int {
538       NotAModuleDecl,
539       FoundExport,
540       InterfaceCandidate,
541       ImplementationCandidate,
542       NamedModuleInterface,
543       NamedModuleImplementation,
544     };
545 
546   public:
547     ModuleDeclSeq() = default;
548 
handleExport()549     void handleExport() {
550       if (State == NotAModuleDecl)
551         State = FoundExport;
552       else if (!isNamedModule())
553         reset();
554     }
555 
handleModule()556     void handleModule() {
557       if (State == FoundExport)
558         State = InterfaceCandidate;
559       else if (State == NotAModuleDecl)
560         State = ImplementationCandidate;
561       else if (!isNamedModule())
562         reset();
563     }
564 
handleIdentifier(IdentifierInfo * Identifier)565     void handleIdentifier(IdentifierInfo *Identifier) {
566       if (isModuleCandidate() && Identifier)
567         Name += Identifier->getName().str();
568       else if (!isNamedModule())
569         reset();
570     }
571 
handleColon()572     void handleColon() {
573       if (isModuleCandidate())
574         Name += ":";
575       else if (!isNamedModule())
576         reset();
577     }
578 
handlePeriod()579     void handlePeriod() {
580       if (isModuleCandidate())
581         Name += ".";
582       else if (!isNamedModule())
583         reset();
584     }
585 
handleSemi()586     void handleSemi() {
587       if (!Name.empty() && isModuleCandidate()) {
588         if (State == InterfaceCandidate)
589           State = NamedModuleInterface;
590         else if (State == ImplementationCandidate)
591           State = NamedModuleImplementation;
592         else
593           llvm_unreachable("Unimaged ModuleDeclState.");
594       } else if (!isNamedModule())
595         reset();
596     }
597 
handleMisc()598     void handleMisc() {
599       if (!isNamedModule())
600         reset();
601     }
602 
isModuleCandidate()603     bool isModuleCandidate() const {
604       return State == InterfaceCandidate || State == ImplementationCandidate;
605     }
606 
isNamedModule()607     bool isNamedModule() const {
608       return State == NamedModuleInterface ||
609              State == NamedModuleImplementation;
610     }
611 
isNamedInterface()612     bool isNamedInterface() const { return State == NamedModuleInterface; }
613 
isImplementationUnit()614     bool isImplementationUnit() const {
615       return State == NamedModuleImplementation && !getName().contains(':');
616     }
617 
isNotAModuleDecl()618     bool isNotAModuleDecl() const { return State == NotAModuleDecl; }
619 
getName()620     StringRef getName() const {
621       assert(isNamedModule() && "Can't get name from a non named module");
622       return Name;
623     }
624 
getPrimaryName()625     StringRef getPrimaryName() const {
626       assert(isNamedModule() && "Can't get name from a non named module");
627       return getName().split(':').first;
628     }
629 
reset()630     void reset() {
631       Name.clear();
632       State = NotAModuleDecl;
633     }
634 
635   private:
636     ModuleDeclState State = NotAModuleDecl;
637     std::string Name;
638   };
639 
640   ModuleDeclSeq ModuleDeclState;
641 
642   /// Whether the module import expects an identifier next. Otherwise,
643   /// it expects a '.' or ';'.
644   bool ModuleImportExpectsIdentifier = false;
645 
646   /// The identifier and source location of the currently-active
647   /// \#pragma clang arc_cf_code_audited begin.
648   IdentifierLoc PragmaARCCFCodeAuditedInfo;
649 
650   /// The source location of the currently-active
651   /// \#pragma clang assume_nonnull begin.
652   SourceLocation PragmaAssumeNonNullLoc;
653 
654   /// Set only for preambles which end with an active
655   /// \#pragma clang assume_nonnull begin.
656   ///
657   /// When the preamble is loaded into the main file,
658   /// `PragmaAssumeNonNullLoc` will be set to this to
659   /// replay the unterminated assume_nonnull.
660   SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
661 
662   /// True if we hit the code-completion point.
663   bool CodeCompletionReached = false;
664 
665   /// The code completion token containing the information
666   /// on the stem that is to be code completed.
667   IdentifierInfo *CodeCompletionII = nullptr;
668 
669   /// Range for the code completion token.
670   SourceRange CodeCompletionTokenRange;
671 
672   /// The directory that the main file should be considered to occupy,
673   /// if it does not correspond to a real file (as happens when building a
674   /// module).
675   OptionalDirectoryEntryRef MainFileDir;
676 
677   /// The number of bytes that we will initially skip when entering the
678   /// main file, along with a flag that indicates whether skipping this number
679   /// of bytes will place the lexer at the start of a line.
680   ///
681   /// This is used when loading a precompiled preamble.
682   std::pair<int, bool> SkipMainFilePreamble;
683 
684   /// Whether we hit an error due to reaching max allowed include depth. Allows
685   /// to avoid hitting the same error over and over again.
686   bool HasReachedMaxIncludeDepth = false;
687 
688   /// The number of currently-active calls to Lex.
689   ///
690   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
691   /// require asking for multiple additional tokens. This counter makes it
692   /// possible for Lex to detect whether it's producing a token for the end
693   /// of phase 4 of translation or for some other situation.
694   unsigned LexLevel = 0;
695 
696   /// The number of (LexLevel 0) preprocessor tokens.
697   unsigned TokenCount = 0;
698 
699   /// Preprocess every token regardless of LexLevel.
700   bool PreprocessToken = false;
701 
702   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
703   /// warning, or zero for unlimited.
704   unsigned MaxTokens = 0;
705   SourceLocation MaxTokensOverrideLoc;
706 
707 public:
708   struct PreambleSkipInfo {
709     SourceLocation HashTokenLoc;
710     SourceLocation IfTokenLoc;
711     bool FoundNonSkipPortion;
712     bool FoundElse;
713     SourceLocation ElseLoc;
714 
PreambleSkipInfoPreambleSkipInfo715     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
716                      bool FoundNonSkipPortion, bool FoundElse,
717                      SourceLocation ElseLoc)
718         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
719           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
720           ElseLoc(ElseLoc) {}
721   };
722 
723   using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
724 
725 private:
726   friend class ASTReader;
727   friend class MacroArgs;
728 
729   class PreambleConditionalStackStore {
730     enum State {
731       Off = 0,
732       Recording = 1,
733       Replaying = 2,
734     };
735 
736   public:
737     PreambleConditionalStackStore() = default;
738 
startRecording()739     void startRecording() { ConditionalStackState = Recording; }
startReplaying()740     void startReplaying() { ConditionalStackState = Replaying; }
isRecording()741     bool isRecording() const { return ConditionalStackState == Recording; }
isReplaying()742     bool isReplaying() const { return ConditionalStackState == Replaying; }
743 
getStack()744     ArrayRef<PPConditionalInfo> getStack() const {
745       return ConditionalStack;
746     }
747 
doneReplaying()748     void doneReplaying() {
749       ConditionalStack.clear();
750       ConditionalStackState = Off;
751     }
752 
setStack(ArrayRef<PPConditionalInfo> s)753     void setStack(ArrayRef<PPConditionalInfo> s) {
754       if (!isRecording() && !isReplaying())
755         return;
756       ConditionalStack.clear();
757       ConditionalStack.append(s.begin(), s.end());
758     }
759 
hasRecordedPreamble()760     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
761 
reachedEOFWhileSkipping()762     bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
763 
clearSkipInfo()764     void clearSkipInfo() { SkipInfo.reset(); }
765 
766     std::optional<PreambleSkipInfo> SkipInfo;
767 
768   private:
769     SmallVector<PPConditionalInfo, 4> ConditionalStack;
770     State ConditionalStackState = Off;
771   } PreambleConditionalStack;
772 
773   /// The current top of the stack that we're lexing from if
774   /// not expanding a macro and we are lexing directly from source code.
775   ///
776   /// Only one of CurLexer, or CurTokenLexer will be non-null.
777   std::unique_ptr<Lexer> CurLexer;
778 
779   /// The current top of the stack that we're lexing from
780   /// if not expanding a macro.
781   ///
782   /// This is an alias for CurLexer.
783   PreprocessorLexer *CurPPLexer = nullptr;
784 
785   /// Used to find the current FileEntry, if CurLexer is non-null
786   /// and if applicable.
787   ///
788   /// This allows us to implement \#include_next and find directory-specific
789   /// properties.
790   ConstSearchDirIterator CurDirLookup = nullptr;
791 
792   /// The current macro we are expanding, if we are expanding a macro.
793   ///
794   /// One of CurLexer and CurTokenLexer must be null.
795   std::unique_ptr<TokenLexer> CurTokenLexer;
796 
797   /// The kind of lexer we're currently working with.
798   typedef bool (*LexerCallback)(Preprocessor &, Token &);
799   LexerCallback CurLexerCallback = &CLK_Lexer;
800 
801   /// If the current lexer is for a submodule that is being built, this
802   /// is that submodule.
803   Module *CurLexerSubmodule = nullptr;
804 
805   /// Keeps track of the stack of files currently
806   /// \#included, and macros currently being expanded from, not counting
807   /// CurLexer/CurTokenLexer.
808   struct IncludeStackInfo {
809     LexerCallback               CurLexerCallback;
810     Module                     *TheSubmodule;
811     std::unique_ptr<Lexer>      TheLexer;
812     PreprocessorLexer          *ThePPLexer;
813     std::unique_ptr<TokenLexer> TheTokenLexer;
814     ConstSearchDirIterator      TheDirLookup;
815 
816     // The following constructors are completely useless copies of the default
817     // versions, only needed to pacify MSVC.
IncludeStackInfoIncludeStackInfo818     IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
819                      std::unique_ptr<Lexer> &&TheLexer,
820                      PreprocessorLexer *ThePPLexer,
821                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
822                      ConstSearchDirIterator TheDirLookup)
823         : CurLexerCallback(std::move(CurLexerCallback)),
824           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
825           ThePPLexer(std::move(ThePPLexer)),
826           TheTokenLexer(std::move(TheTokenLexer)),
827           TheDirLookup(std::move(TheDirLookup)) {}
828   };
829   std::vector<IncludeStackInfo> IncludeMacroStack;
830 
831   /// Actions invoked when some preprocessor activity is
832   /// encountered (e.g. a file is \#included, etc).
833   std::unique_ptr<PPCallbacks> Callbacks;
834 
835   struct MacroExpandsInfo {
836     Token Tok;
837     MacroDefinition MD;
838     SourceRange Range;
839 
MacroExpandsInfoMacroExpandsInfo840     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
841         : Tok(Tok), MD(MD), Range(Range) {}
842   };
843   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
844 
845   /// Information about a name that has been used to define a module macro.
846   struct ModuleMacroInfo {
847     /// The most recent macro directive for this identifier.
848     MacroDirective *MD;
849 
850     /// The active module macros for this identifier.
851     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
852 
853     /// The generation number at which we last updated ActiveModuleMacros.
854     /// \see Preprocessor::VisibleModules.
855     unsigned ActiveModuleMacrosGeneration = 0;
856 
857     /// Whether this macro name is ambiguous.
858     bool IsAmbiguous = false;
859 
860     /// The module macros that are overridden by this macro.
861     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
862 
ModuleMacroInfoModuleMacroInfo863     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
864   };
865 
866   /// The state of a macro for an identifier.
867   class MacroState {
868     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
869 
getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)870     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
871                                    const IdentifierInfo *II) const {
872       if (II->isOutOfDate())
873         PP.updateOutOfDateIdentifier(*II);
874       // FIXME: Find a spare bit on IdentifierInfo and store a
875       //        HasModuleMacros flag.
876       if (!II->hasMacroDefinition() ||
877           (!PP.getLangOpts().Modules &&
878            !PP.getLangOpts().ModulesLocalVisibility) ||
879           !PP.CurSubmoduleState->VisibleModules.getGeneration())
880         return nullptr;
881 
882       auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
883       if (!Info) {
884         Info = new (PP.getPreprocessorAllocator())
885             ModuleMacroInfo(cast<MacroDirective *>(State));
886         State = Info;
887       }
888 
889       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
890           Info->ActiveModuleMacrosGeneration)
891         PP.updateModuleMacroInfo(II, *Info);
892       return Info;
893     }
894 
895   public:
MacroState()896     MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective * MD)897     MacroState(MacroDirective *MD) : State(MD) {}
898 
MacroState(MacroState && O)899     MacroState(MacroState &&O) noexcept : State(O.State) {
900       O.State = (MacroDirective *)nullptr;
901     }
902 
903     MacroState &operator=(MacroState &&O) noexcept {
904       auto S = O.State;
905       O.State = (MacroDirective *)nullptr;
906       State = S;
907       return *this;
908     }
909 
~MacroState()910     ~MacroState() {
911       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
912         Info->~ModuleMacroInfo();
913     }
914 
getLatest()915     MacroDirective *getLatest() const {
916       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
917         return Info->MD;
918       return cast<MacroDirective *>(State);
919     }
920 
setLatest(MacroDirective * MD)921     void setLatest(MacroDirective *MD) {
922       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
923         Info->MD = MD;
924       else
925         State = MD;
926     }
927 
isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)928     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
929       auto *Info = getModuleInfo(PP, II);
930       return Info ? Info->IsAmbiguous : false;
931     }
932 
933     ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)934     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
935       if (auto *Info = getModuleInfo(PP, II))
936         return Info->ActiveModuleMacros;
937       return {};
938     }
939 
findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)940     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
941                                                SourceManager &SourceMgr) const {
942       // FIXME: Incorporate module macros into the result of this.
943       if (auto *Latest = getLatest())
944         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
945       return {};
946     }
947 
overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)948     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
949       if (auto *Info = getModuleInfo(PP, II)) {
950         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
951                                       Info->ActiveModuleMacros.begin(),
952                                       Info->ActiveModuleMacros.end());
953         Info->ActiveModuleMacros.clear();
954         Info->IsAmbiguous = false;
955       }
956     }
957 
getOverriddenMacros()958     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
959       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
960         return Info->OverriddenMacros;
961       return {};
962     }
963 
setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)964     void setOverriddenMacros(Preprocessor &PP,
965                              ArrayRef<ModuleMacro *> Overrides) {
966       auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
967       if (!Info) {
968         if (Overrides.empty())
969           return;
970         Info = new (PP.getPreprocessorAllocator())
971             ModuleMacroInfo(cast<MacroDirective *>(State));
972         State = Info;
973       }
974       Info->OverriddenMacros.clear();
975       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
976                                     Overrides.begin(), Overrides.end());
977       Info->ActiveModuleMacrosGeneration = 0;
978     }
979   };
980 
981   /// For each IdentifierInfo that was associated with a macro, we
982   /// keep a mapping to the history of all macro definitions and #undefs in
983   /// the reverse order (the latest one is in the head of the list).
984   ///
985   /// This mapping lives within the \p CurSubmoduleState.
986   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
987 
988   struct SubmoduleState;
989 
990   /// Information about a submodule that we're currently building.
991   struct BuildingSubmoduleInfo {
992     /// The module that we are building.
993     Module *M;
994 
995     /// The location at which the module was included.
996     SourceLocation ImportLoc;
997 
998     /// Whether we entered this submodule via a pragma.
999     bool IsPragma;
1000 
1001     /// The previous SubmoduleState.
1002     SubmoduleState *OuterSubmoduleState;
1003 
1004     /// The number of pending module macro names when we started building this.
1005     unsigned OuterPendingModuleMacroNames;
1006 
BuildingSubmoduleInfoBuildingSubmoduleInfo1007     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
1008                           SubmoduleState *OuterSubmoduleState,
1009                           unsigned OuterPendingModuleMacroNames)
1010         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
1011           OuterSubmoduleState(OuterSubmoduleState),
1012           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
1013   };
1014   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
1015 
1016   /// Information about a submodule's preprocessor state.
1017   struct SubmoduleState {
1018     /// The macros for the submodule.
1019     MacroMap Macros;
1020 
1021     /// The set of modules that are visible within the submodule.
1022     VisibleModuleSet VisibleModules;
1023 
1024     // FIXME: CounterValue?
1025     // FIXME: PragmaPushMacroInfo?
1026   };
1027   std::map<Module *, SubmoduleState> Submodules;
1028 
1029   /// The preprocessor state for preprocessing outside of any submodule.
1030   SubmoduleState NullSubmoduleState;
1031 
1032   /// The current submodule state. Will be \p NullSubmoduleState if we're not
1033   /// in a submodule.
1034   SubmoduleState *CurSubmoduleState;
1035 
1036   /// The files that have been included.
1037   IncludedFilesSet IncludedFiles;
1038 
1039   /// The set of top-level modules that affected preprocessing, but were not
1040   /// imported.
1041   llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1042 
1043   /// The set of known macros exported from modules.
1044   llvm::FoldingSet<ModuleMacro> ModuleMacros;
1045 
1046   /// The names of potential module macros that we've not yet processed.
1047   llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1048 
1049   /// The list of module macros, for each identifier, that are not overridden by
1050   /// any other module macro.
1051   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1052       LeafModuleMacros;
1053 
1054   /// Macros that we want to warn because they are not used at the end
1055   /// of the translation unit.
1056   ///
1057   /// We store just their SourceLocations instead of
1058   /// something like MacroInfo*. The benefit of this is that when we are
1059   /// deserializing from PCH, we don't need to deserialize identifier & macros
1060   /// just so that we can report that they are unused, we just warn using
1061   /// the SourceLocations of this set (that will be filled by the ASTReader).
1062   using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1063   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1064 
1065   /// This is a pair of an optional message and source location used for pragmas
1066   /// that annotate macros like pragma clang restrict_expansion and pragma clang
1067   /// deprecated. This pair stores the optional message and the location of the
1068   /// annotation pragma for use producing diagnostics and notes.
1069   using MsgLocationPair = std::pair<std::string, SourceLocation>;
1070 
1071   struct MacroAnnotationInfo {
1072     SourceLocation Location;
1073     std::string Message;
1074   };
1075 
1076   struct MacroAnnotations {
1077     std::optional<MacroAnnotationInfo> DeprecationInfo;
1078     std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1079     std::optional<SourceLocation> FinalAnnotationLoc;
1080   };
1081 
1082   /// Warning information for macro annotations.
1083   llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1084 
1085   /// A "freelist" of MacroArg objects that can be
1086   /// reused for quick allocation.
1087   MacroArgs *MacroArgCache = nullptr;
1088 
1089   /// For each IdentifierInfo used in a \#pragma push_macro directive,
1090   /// we keep a MacroInfo stack used to restore the previous macro value.
1091   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1092       PragmaPushMacroInfo;
1093 
1094   // Various statistics we track for performance analysis.
1095   unsigned NumDirectives = 0;
1096   unsigned NumDefined = 0;
1097   unsigned NumUndefined = 0;
1098   unsigned NumPragma = 0;
1099   unsigned NumIf = 0;
1100   unsigned NumElse = 0;
1101   unsigned NumEndif = 0;
1102   unsigned NumEnteredSourceFiles = 0;
1103   unsigned MaxIncludeStackDepth = 0;
1104   unsigned NumMacroExpanded = 0;
1105   unsigned NumFnMacroExpanded = 0;
1106   unsigned NumBuiltinMacroExpanded = 0;
1107   unsigned NumFastMacroExpanded = 0;
1108   unsigned NumTokenPaste = 0;
1109   unsigned NumFastTokenPaste = 0;
1110   unsigned NumSkipped = 0;
1111 
1112   /// The predefined macros that preprocessor should use from the
1113   /// command line etc.
1114   std::string Predefines;
1115 
1116   /// The file ID for the preprocessor predefines.
1117   FileID PredefinesFileID;
1118 
1119   /// The file ID for the PCH through header.
1120   FileID PCHThroughHeaderFileID;
1121 
1122   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1123   bool SkippingUntilPragmaHdrStop = false;
1124 
1125   /// Whether tokens are being skipped until the through header is seen.
1126   bool SkippingUntilPCHThroughHeader = false;
1127 
1128   /// \{
1129   /// Cache of macro expanders to reduce malloc traffic.
1130   enum { TokenLexerCacheSize = 8 };
1131   unsigned NumCachedTokenLexers;
1132   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1133   /// \}
1134 
1135   /// Keeps macro expanded tokens for TokenLexers.
1136   //
1137   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1138   /// going to lex in the cache and when it finishes the tokens are removed
1139   /// from the end of the cache.
1140   SmallVector<Token, 16> MacroExpandedTokens;
1141   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1142 
1143   /// A record of the macro definitions and expansions that
1144   /// occurred during preprocessing.
1145   ///
1146   /// This is an optional side structure that can be enabled with
1147   /// \c createPreprocessingRecord() prior to preprocessing.
1148   PreprocessingRecord *Record = nullptr;
1149 
1150   /// Cached tokens state.
1151   using CachedTokensTy = SmallVector<Token, 1>;
1152 
1153   /// Cached tokens are stored here when we do backtracking or
1154   /// lookahead. They are "lexed" by the CachingLex() method.
1155   CachedTokensTy CachedTokens;
1156 
1157   /// The position of the cached token that CachingLex() should
1158   /// "lex" next.
1159   ///
1160   /// If it points beyond the CachedTokens vector, it means that a normal
1161   /// Lex() should be invoked.
1162   CachedTokensTy::size_type CachedLexPos = 0;
1163 
1164   /// Stack of backtrack positions, allowing nested backtracks.
1165   ///
1166   /// The EnableBacktrackAtThisPos() method pushes a position to
1167   /// indicate where CachedLexPos should be set when the BackTrack() method is
1168   /// invoked (at which point the last position is popped).
1169   std::vector<CachedTokensTy::size_type> BacktrackPositions;
1170 
1171   /// Stack of cached tokens/initial number of cached tokens pairs, allowing
1172   /// nested unannotated backtracks.
1173   std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1174       UnannotatedBacktrackTokens;
1175 
1176   /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1177   /// This is used to guard against calling this function recursively.
1178   ///
1179   /// See comments at the use-site for more context about why it is needed.
1180   bool SkippingExcludedConditionalBlock = false;
1181 
1182   /// Keeps track of skipped range mappings that were recorded while skipping
1183   /// excluded conditional directives. It maps the source buffer pointer at
1184   /// the beginning of a skipped block, to the number of bytes that should be
1185   /// skipped.
1186   llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1187 
1188   void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1189 
1190 public:
1191   Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags,
1192                const LangOptions &LangOpts, SourceManager &SM,
1193                HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
1194                IdentifierInfoLookup *IILookup = nullptr,
1195                bool OwnsHeaderSearch = false,
1196                TranslationUnitKind TUKind = TU_Complete);
1197 
1198   ~Preprocessor();
1199 
1200   /// Initialize the preprocessor using information about the target.
1201   ///
1202   /// \param Target is owned by the caller and must remain valid for the
1203   /// lifetime of the preprocessor.
1204   /// \param AuxTarget is owned by the caller and must remain valid for
1205   /// the lifetime of the preprocessor.
1206   void Initialize(const TargetInfo &Target,
1207                   const TargetInfo *AuxTarget = nullptr);
1208 
1209   /// Initialize the preprocessor to parse a model file
1210   ///
1211   /// To parse model files the preprocessor of the original source is reused to
1212   /// preserver the identifier table. However to avoid some duplicate
1213   /// information in the preprocessor some cleanup is needed before it is used
1214   /// to parse model files. This method does that cleanup.
1215   void InitializeForModelFile();
1216 
1217   /// Cleanup after model file parsing
1218   void FinalizeForModelFile();
1219 
1220   /// Retrieve the preprocessor options used to initialize this preprocessor.
getPreprocessorOpts()1221   const PreprocessorOptions &getPreprocessorOpts() const { return PPOpts; }
1222 
getDiagnostics()1223   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
setDiagnostics(DiagnosticsEngine & D)1224   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1225 
getLangOpts()1226   const LangOptions &getLangOpts() const { return LangOpts; }
getTargetInfo()1227   const TargetInfo &getTargetInfo() const { return *Target; }
getAuxTargetInfo()1228   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
getFileManager()1229   FileManager &getFileManager() const { return FileMgr; }
getSourceManager()1230   SourceManager &getSourceManager() const { return SourceMgr; }
getHeaderSearchInfo()1231   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1232 
getIdentifierTable()1233   IdentifierTable &getIdentifierTable() { return Identifiers; }
getIdentifierTable()1234   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
getSelectorTable()1235   SelectorTable &getSelectorTable() { return Selectors; }
getBuiltinInfo()1236   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
getPreprocessorAllocator()1237   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1238 
setExternalSource(ExternalPreprocessorSource * Source)1239   void setExternalSource(ExternalPreprocessorSource *Source) {
1240     ExternalSource = Source;
1241   }
1242 
getExternalSource()1243   ExternalPreprocessorSource *getExternalSource() const {
1244     return ExternalSource;
1245   }
1246 
1247   /// Retrieve the module loader associated with this preprocessor.
getModuleLoader()1248   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1249 
hadModuleLoaderFatalFailure()1250   bool hadModuleLoaderFatalFailure() const {
1251     return TheModuleLoader.HadFatalFailure;
1252   }
1253 
1254   /// Retrieve the number of Directives that have been processed by the
1255   /// Preprocessor.
getNumDirectives()1256   unsigned getNumDirectives() const {
1257     return NumDirectives;
1258   }
1259 
1260   /// True if we are currently preprocessing a #if or #elif directive
isParsingIfOrElifDirective()1261   bool isParsingIfOrElifDirective() const {
1262     return ParsingIfOrElifDirective;
1263   }
1264 
1265   /// Control whether the preprocessor retains comments in output.
SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)1266   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1267     this->KeepComments = KeepComments | KeepMacroComments;
1268     this->KeepMacroComments = KeepMacroComments;
1269   }
1270 
getCommentRetentionState()1271   bool getCommentRetentionState() const { return KeepComments; }
1272 
setPragmasEnabled(bool Enabled)1273   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
getPragmasEnabled()1274   bool getPragmasEnabled() const { return PragmasEnabled; }
1275 
SetSuppressIncludeNotFoundError(bool Suppress)1276   void SetSuppressIncludeNotFoundError(bool Suppress) {
1277     SuppressIncludeNotFoundError = Suppress;
1278   }
1279 
GetSuppressIncludeNotFoundError()1280   bool GetSuppressIncludeNotFoundError() {
1281     return SuppressIncludeNotFoundError;
1282   }
1283 
1284   /// Sets whether the preprocessor is responsible for producing output or if
1285   /// it is producing tokens to be consumed by Parse and Sema.
setPreprocessedOutput(bool IsPreprocessedOutput)1286   void setPreprocessedOutput(bool IsPreprocessedOutput) {
1287     PreprocessedOutput = IsPreprocessedOutput;
1288   }
1289 
1290   /// Returns true if the preprocessor is responsible for generating output,
1291   /// false if it is producing tokens to be consumed by Parse and Sema.
isPreprocessedOutput()1292   bool isPreprocessedOutput() const { return PreprocessedOutput; }
1293 
1294   /// Return true if we are lexing directly from the specified lexer.
isCurrentLexer(const PreprocessorLexer * L)1295   bool isCurrentLexer(const PreprocessorLexer *L) const {
1296     return CurPPLexer == L;
1297   }
1298 
1299   /// Return the current lexer being lexed from.
1300   ///
1301   /// Note that this ignores any potentially active macro expansions and _Pragma
1302   /// expansions going on at the time.
getCurrentLexer()1303   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1304 
1305   /// Return the current file lexer being lexed from.
1306   ///
1307   /// Note that this ignores any potentially active macro expansions and _Pragma
1308   /// expansions going on at the time.
1309   PreprocessorLexer *getCurrentFileLexer() const;
1310 
1311   /// Return the submodule owning the file being lexed. This may not be
1312   /// the current module if we have changed modules since entering the file.
getCurrentLexerSubmodule()1313   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1314 
1315   /// Returns the FileID for the preprocessor predefines.
getPredefinesFileID()1316   FileID getPredefinesFileID() const { return PredefinesFileID; }
1317 
1318   /// \{
1319   /// Accessors for preprocessor callbacks.
1320   ///
1321   /// Note that this class takes ownership of any PPCallbacks object given to
1322   /// it.
getPPCallbacks()1323   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
addPPCallbacks(std::unique_ptr<PPCallbacks> C)1324   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1325     if (Callbacks)
1326       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1327                                                 std::move(Callbacks));
1328     Callbacks = std::move(C);
1329   }
1330   /// \}
1331 
1332   /// Get the number of tokens processed so far.
getTokenCount()1333   unsigned getTokenCount() const { return TokenCount; }
1334 
1335   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
getMaxTokens()1336   unsigned getMaxTokens() const { return MaxTokens; }
1337 
overrideMaxTokens(unsigned Value,SourceLocation Loc)1338   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1339     MaxTokens = Value;
1340     MaxTokensOverrideLoc = Loc;
1341   };
1342 
getMaxTokensOverrideLoc()1343   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1344 
1345   /// Register a function that would be called on each token in the final
1346   /// expanded token stream.
1347   /// This also reports annotation tokens produced by the parser.
setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1348   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1349     OnToken = std::move(F);
1350   }
1351 
setDependencyDirectivesGetter(DependencyDirectivesGetter & Get)1352   void setDependencyDirectivesGetter(DependencyDirectivesGetter &Get) {
1353     GetDependencyDirectives = &Get;
1354   }
1355 
setPreprocessToken(bool Preprocess)1356   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1357 
isMacroDefined(StringRef Id)1358   bool isMacroDefined(StringRef Id) {
1359     return isMacroDefined(&Identifiers.get(Id));
1360   }
isMacroDefined(const IdentifierInfo * II)1361   bool isMacroDefined(const IdentifierInfo *II) {
1362     return II->hasMacroDefinition() &&
1363            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1364   }
1365 
1366   /// Determine whether II is defined as a macro within the module M,
1367   /// if that is a module that we've already preprocessed. Does not check for
1368   /// macros imported into M.
isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1369   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1370     if (!II->hasMacroDefinition())
1371       return false;
1372     auto I = Submodules.find(M);
1373     if (I == Submodules.end())
1374       return false;
1375     auto J = I->second.Macros.find(II);
1376     if (J == I->second.Macros.end())
1377       return false;
1378     auto *MD = J->second.getLatest();
1379     return MD && MD->isDefined();
1380   }
1381 
getMacroDefinition(const IdentifierInfo * II)1382   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1383     if (!II->hasMacroDefinition())
1384       return {};
1385 
1386     MacroState &S = CurSubmoduleState->Macros[II];
1387     auto *MD = S.getLatest();
1388     while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1389       MD = MD->getPrevious();
1390     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1391                            S.getActiveModuleMacros(*this, II),
1392                            S.isAmbiguous(*this, II));
1393   }
1394 
getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1395   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1396                                           SourceLocation Loc) {
1397     if (!II->hadMacroDefinition())
1398       return {};
1399 
1400     MacroState &S = CurSubmoduleState->Macros[II];
1401     MacroDirective::DefInfo DI;
1402     if (auto *MD = S.getLatest())
1403       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1404     // FIXME: Compute the set of active module macros at the specified location.
1405     return MacroDefinition(DI.getDirective(),
1406                            S.getActiveModuleMacros(*this, II),
1407                            S.isAmbiguous(*this, II));
1408   }
1409 
1410   /// Given an identifier, return its latest non-imported MacroDirective
1411   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
getLocalMacroDirective(const IdentifierInfo * II)1412   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1413     if (!II->hasMacroDefinition())
1414       return nullptr;
1415 
1416     auto *MD = getLocalMacroDirectiveHistory(II);
1417     if (!MD || MD->getDefinition().isUndefined())
1418       return nullptr;
1419 
1420     return MD;
1421   }
1422 
getMacroInfo(const IdentifierInfo * II)1423   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1424     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1425   }
1426 
getMacroInfo(const IdentifierInfo * II)1427   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1428     if (!II->hasMacroDefinition())
1429       return nullptr;
1430     if (auto MD = getMacroDefinition(II))
1431       return MD.getMacroInfo();
1432     return nullptr;
1433   }
1434 
1435   /// Given an identifier, return the latest non-imported macro
1436   /// directive for that identifier.
1437   ///
1438   /// One can iterate over all previous macro directives from the most recent
1439   /// one.
1440   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1441 
1442   /// Add a directive to the macro directive history for this identifier.
1443   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1444   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1445                                              SourceLocation Loc) {
1446     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1447     appendMacroDirective(II, MD);
1448     return MD;
1449   }
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1450   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1451                                              MacroInfo *MI) {
1452     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1453   }
1454 
1455   /// Set a MacroDirective that was loaded from a PCH file.
1456   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1457                                MacroDirective *MD);
1458 
1459   /// Register an exported macro for a module and identifier.
1460   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II,
1461                               MacroInfo *Macro,
1462                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1463   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1464 
1465   /// Get the list of leaf (non-overridden) module macros for a name.
getLeafModuleMacros(const IdentifierInfo * II)1466   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1467     if (II->isOutOfDate())
1468       updateOutOfDateIdentifier(*II);
1469     auto I = LeafModuleMacros.find(II);
1470     if (I != LeafModuleMacros.end())
1471       return I->second;
1472     return {};
1473   }
1474 
1475   /// Get the list of submodules that we're currently building.
getBuildingSubmodules()1476   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1477     return BuildingSubmoduleStack;
1478   }
1479 
1480   /// \{
1481   /// Iterators for the macro history table. Currently defined macros have
1482   /// IdentifierInfo::hasMacroDefinition() set and an empty
1483   /// MacroInfo::getUndefLoc() at the head of the list.
1484   using macro_iterator = MacroMap::const_iterator;
1485 
1486   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1487   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1488 
1489   llvm::iterator_range<macro_iterator>
1490   macros(bool IncludeExternalMacros = true) const {
1491     macro_iterator begin = macro_begin(IncludeExternalMacros);
1492     macro_iterator end = macro_end(IncludeExternalMacros);
1493     return llvm::make_range(begin, end);
1494   }
1495 
1496   /// \}
1497 
1498   /// Mark the given clang module as affecting the current clang module or translation unit.
markClangModuleAsAffecting(Module * M)1499   void markClangModuleAsAffecting(Module *M) {
1500     assert(M->isModuleMapModule());
1501     if (!BuildingSubmoduleStack.empty()) {
1502       if (M != BuildingSubmoduleStack.back().M)
1503         BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1504     } else {
1505       AffectingClangModules.insert(M);
1506     }
1507   }
1508 
1509   /// Get the set of top-level clang modules that affected preprocessing, but were not
1510   /// imported.
getAffectingClangModules()1511   const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const {
1512     return AffectingClangModules;
1513   }
1514 
1515   /// Mark the file as included.
1516   /// Returns true if this is the first time the file was included.
markIncluded(FileEntryRef File)1517   bool markIncluded(FileEntryRef File) {
1518     HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
1519     return IncludedFiles.insert(File).second;
1520   }
1521 
1522   /// Return true if this header has already been included.
alreadyIncluded(FileEntryRef File)1523   bool alreadyIncluded(FileEntryRef File) const {
1524     HeaderInfo.getFileInfo(File);
1525     return IncludedFiles.count(File);
1526   }
1527 
1528   /// Get the set of included files.
getIncludedFiles()1529   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
getIncludedFiles()1530   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1531 
1532   /// Return the name of the macro defined before \p Loc that has
1533   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1534   /// return the last one defined.
1535   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1536                                      ArrayRef<TokenValue> Tokens) const;
1537 
1538   /// Get the predefines for this processor.
1539   /// Used by some third-party tools to inspect and add predefines (see
1540   /// https://github.com/llvm/llvm-project/issues/57483).
getPredefines()1541   const std::string &getPredefines() const { return Predefines; }
1542 
1543   /// Set the predefines for this Preprocessor.
1544   ///
1545   /// These predefines are automatically injected when parsing the main file.
setPredefines(std::string P)1546   void setPredefines(std::string P) { Predefines = std::move(P); }
1547 
1548   /// Return information about the specified preprocessor
1549   /// identifier token.
getIdentifierInfo(StringRef Name)1550   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1551     return &Identifiers.get(Name);
1552   }
1553 
1554   /// Add the specified pragma handler to this preprocessor.
1555   ///
1556   /// If \p Namespace is non-null, then it is a token required to exist on the
1557   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1558   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
AddPragmaHandler(PragmaHandler * Handler)1559   void AddPragmaHandler(PragmaHandler *Handler) {
1560     AddPragmaHandler(StringRef(), Handler);
1561   }
1562 
1563   /// Remove the specific pragma handler from this preprocessor.
1564   ///
1565   /// If \p Namespace is non-null, then it should be the namespace that
1566   /// \p Handler was added to. It is an error to remove a handler that
1567   /// has not been registered.
1568   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
RemovePragmaHandler(PragmaHandler * Handler)1569   void RemovePragmaHandler(PragmaHandler *Handler) {
1570     RemovePragmaHandler(StringRef(), Handler);
1571   }
1572 
1573   /// Install empty handlers for all pragmas (making them ignored).
1574   void IgnorePragmas();
1575 
1576   /// Set empty line handler.
setEmptylineHandler(EmptylineHandler * Handler)1577   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1578 
getEmptylineHandler()1579   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1580 
1581   /// Add the specified comment handler to the preprocessor.
1582   void addCommentHandler(CommentHandler *Handler);
1583 
1584   /// Remove the specified comment handler.
1585   ///
1586   /// It is an error to remove a handler that has not been registered.
1587   void removeCommentHandler(CommentHandler *Handler);
1588 
1589   /// Set the code completion handler to the given object.
setCodeCompletionHandler(CodeCompletionHandler & Handler)1590   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1591     CodeComplete = &Handler;
1592   }
1593 
1594   /// Retrieve the current code-completion handler.
getCodeCompletionHandler()1595   CodeCompletionHandler *getCodeCompletionHandler() const {
1596     return CodeComplete;
1597   }
1598 
1599   /// Clear out the code completion handler.
clearCodeCompletionHandler()1600   void clearCodeCompletionHandler() {
1601     CodeComplete = nullptr;
1602   }
1603 
1604   /// Hook used by the lexer to invoke the "included file" code
1605   /// completion point.
1606   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1607 
1608   /// Hook used by the lexer to invoke the "natural language" code
1609   /// completion point.
1610   void CodeCompleteNaturalLanguage();
1611 
1612   /// Set the code completion token for filtering purposes.
setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1613   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1614     CodeCompletionII = Filter;
1615   }
1616 
1617   /// Set the code completion token range for detecting replacement range later
1618   /// on.
setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1619   void setCodeCompletionTokenRange(const SourceLocation Start,
1620                                    const SourceLocation End) {
1621     CodeCompletionTokenRange = {Start, End};
1622   }
getCodeCompletionTokenRange()1623   SourceRange getCodeCompletionTokenRange() const {
1624     return CodeCompletionTokenRange;
1625   }
1626 
1627   /// Get the code completion token for filtering purposes.
getCodeCompletionFilter()1628   StringRef getCodeCompletionFilter() {
1629     if (CodeCompletionII)
1630       return CodeCompletionII->getName();
1631     return {};
1632   }
1633 
1634   /// Retrieve the preprocessing record, or NULL if there is no
1635   /// preprocessing record.
getPreprocessingRecord()1636   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1637 
1638   /// Create a new preprocessing record, which will keep track of
1639   /// all macro expansions, macro definitions, etc.
1640   void createPreprocessingRecord();
1641 
1642   /// Returns true if the FileEntry is the PCH through header.
1643   bool isPCHThroughHeader(const FileEntry *FE);
1644 
1645   /// True if creating a PCH with a through header.
1646   bool creatingPCHWithThroughHeader();
1647 
1648   /// True if using a PCH with a through header.
1649   bool usingPCHWithThroughHeader();
1650 
1651   /// True if creating a PCH with a #pragma hdrstop.
1652   bool creatingPCHWithPragmaHdrStop();
1653 
1654   /// True if using a PCH with a #pragma hdrstop.
1655   bool usingPCHWithPragmaHdrStop();
1656 
1657   /// Skip tokens until after the #include of the through header or
1658   /// until after a #pragma hdrstop.
1659   void SkipTokensWhileUsingPCH();
1660 
1661   /// Process directives while skipping until the through header or
1662   /// #pragma hdrstop is found.
1663   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1664                                            SourceLocation HashLoc);
1665 
1666   /// Enter the specified FileID as the main source file,
1667   /// which implicitly adds the builtin defines etc.
1668   void EnterMainSourceFile();
1669 
1670   /// Inform the preprocessor callbacks that processing is complete.
1671   void EndSourceFile();
1672 
1673   /// Add a source file to the top of the include stack and
1674   /// start lexing tokens from it instead of the current buffer.
1675   ///
1676   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1677   bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1678                        SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1679 
1680   /// Add a Macro to the top of the include stack and start lexing
1681   /// tokens from it instead of the current buffer.
1682   ///
1683   /// \param Args specifies the tokens input to a function-like macro.
1684   /// \param ILEnd specifies the location of the ')' for a function-like macro
1685   /// or the identifier for an object-like macro.
1686   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1687                   MacroArgs *Args);
1688 
1689 private:
1690   /// Add a "macro" context to the top of the include stack,
1691   /// which will cause the lexer to start returning the specified tokens.
1692   ///
1693   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1694   /// will not be subject to further macro expansion. Otherwise, these tokens
1695   /// will be re-macro-expanded when/if expansion is enabled.
1696   ///
1697   /// If \p OwnsTokens is false, this method assumes that the specified stream
1698   /// of tokens has a permanent owner somewhere, so they do not need to be
1699   /// copied. If it is true, it assumes the array of tokens is allocated with
1700   /// \c new[] and the Preprocessor will delete[] it.
1701   ///
1702   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1703   /// set, see the flag documentation for details.
1704   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1705                         bool DisableMacroExpansion, bool OwnsTokens,
1706                         bool IsReinject);
1707 
1708 public:
EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1709   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1710                         bool DisableMacroExpansion, bool IsReinject) {
1711     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1712                      IsReinject);
1713   }
1714 
EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1715   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1716                         bool IsReinject) {
1717     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1718                      IsReinject);
1719   }
1720 
1721   /// Pop the current lexer/macro exp off the top of the lexer stack.
1722   ///
1723   /// This should only be used in situations where the current state of the
1724   /// top-of-stack lexer is known.
1725   void RemoveTopOfLexerStack();
1726 
1727   /// From the point that this method is called, and until
1728   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1729   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1730   /// make the Preprocessor re-lex the same tokens.
1731   ///
1732   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1733   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1734   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1735   ///
1736   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1737   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1738   /// tokens will continue indefinitely.
1739   ///
1740   /// \param Unannotated Whether token annotations are reverted upon calling
1741   /// Backtrack().
1742   void EnableBacktrackAtThisPos(bool Unannotated = false);
1743 
1744 private:
1745   std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1746 
1747   CachedTokensTy PopUnannotatedBacktrackTokens();
1748 
1749 public:
1750   /// Disable the last EnableBacktrackAtThisPos call.
1751   void CommitBacktrackedTokens();
1752 
1753   /// Make Preprocessor re-lex the tokens that were lexed since
1754   /// EnableBacktrackAtThisPos() was previously called.
1755   void Backtrack();
1756 
1757   /// True if EnableBacktrackAtThisPos() was called and
1758   /// caching of tokens is on.
isBacktrackEnabled()1759   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1760 
1761   /// True if EnableBacktrackAtThisPos() was called and
1762   /// caching of unannotated tokens is on.
isUnannotatedBacktrackEnabled()1763   bool isUnannotatedBacktrackEnabled() const {
1764     return !UnannotatedBacktrackTokens.empty();
1765   }
1766 
1767   /// Lex the next token for this preprocessor.
1768   void Lex(Token &Result);
1769 
1770   /// Lex all tokens for this preprocessor until (and excluding) end of file.
1771   void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1772 
1773   /// Lex a token, forming a header-name token if possible.
1774   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1775 
1776   /// Lex the parameters for an #embed directive, returns nullopt on error.
1777   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1778                                                              bool ForHasEmbed);
1779 
1780   /// Get the start location of the first pp-token in main file.
getMainFileFirstPPTokenLoc()1781   SourceLocation getMainFileFirstPPTokenLoc() const {
1782     assert(FirstPPTokenLoc.isValid() &&
1783            "Did not see the first pp-token in the main file");
1784     return FirstPPTokenLoc;
1785   }
1786 
1787   bool LexAfterModuleImport(Token &Result);
1788   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1789 
1790   void makeModuleVisible(Module *M, SourceLocation Loc,
1791                          bool IncludeExports = true);
1792 
getModuleImportLoc(Module * M)1793   SourceLocation getModuleImportLoc(Module *M) const {
1794     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1795   }
1796 
1797   /// Lex a string literal, which may be the concatenation of multiple
1798   /// string literals and may even come from macro expansion.
1799   /// \returns true on success, false if a error diagnostic has been generated.
LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1800   bool LexStringLiteral(Token &Result, std::string &String,
1801                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1802     if (AllowMacroExpansion)
1803       Lex(Result);
1804     else
1805       LexUnexpandedToken(Result);
1806     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1807                                   AllowMacroExpansion);
1808   }
1809 
1810   /// Complete the lexing of a string literal where the first token has
1811   /// already been lexed (see LexStringLiteral).
1812   bool FinishLexStringLiteral(Token &Result, std::string &String,
1813                               const char *DiagnosticTag,
1814                               bool AllowMacroExpansion);
1815 
1816   /// Lex a token.  If it's a comment, keep lexing until we get
1817   /// something not a comment.
1818   ///
1819   /// This is useful in -E -C mode where comments would foul up preprocessor
1820   /// directive handling.
LexNonComment(Token & Result)1821   void LexNonComment(Token &Result) {
1822     do
1823       Lex(Result);
1824     while (Result.getKind() == tok::comment);
1825   }
1826 
1827   /// Just like Lex, but disables macro expansion of identifier tokens.
LexUnexpandedToken(Token & Result)1828   void LexUnexpandedToken(Token &Result) {
1829     // Disable macro expansion.
1830     bool OldVal = DisableMacroExpansion;
1831     DisableMacroExpansion = true;
1832     // Lex the token.
1833     Lex(Result);
1834 
1835     // Reenable it.
1836     DisableMacroExpansion = OldVal;
1837   }
1838 
1839   /// Like LexNonComment, but this disables macro expansion of
1840   /// identifier tokens.
LexUnexpandedNonComment(Token & Result)1841   void LexUnexpandedNonComment(Token &Result) {
1842     do
1843       LexUnexpandedToken(Result);
1844     while (Result.getKind() == tok::comment);
1845   }
1846 
1847   /// Parses a simple integer literal to get its numeric value.  Floating
1848   /// point literals and user defined literals are rejected.  Used primarily to
1849   /// handle pragmas that accept integer arguments.
1850   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1851 
1852   /// Disables macro expansion everywhere except for preprocessor directives.
SetMacroExpansionOnlyInDirectives()1853   void SetMacroExpansionOnlyInDirectives() {
1854     DisableMacroExpansion = true;
1855     MacroExpansionInDirectivesOverride = true;
1856   }
1857 
1858   /// Peeks ahead N tokens and returns that token without consuming any
1859   /// tokens.
1860   ///
1861   /// LookAhead(0) returns the next token that would be returned by Lex(),
1862   /// LookAhead(1) returns the token after it, etc.  This returns normal
1863   /// tokens after phase 5.  As such, it is equivalent to using
1864   /// 'Lex', not 'LexUnexpandedToken'.
LookAhead(unsigned N)1865   const Token &LookAhead(unsigned N) {
1866     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1867     if (CachedLexPos + N < CachedTokens.size())
1868       return CachedTokens[CachedLexPos+N];
1869     else
1870       return PeekAhead(N+1);
1871   }
1872 
1873   /// When backtracking is enabled and tokens are cached,
1874   /// this allows to revert a specific number of tokens.
1875   ///
1876   /// Note that the number of tokens being reverted should be up to the last
1877   /// backtrack position, not more.
RevertCachedTokens(unsigned N)1878   void RevertCachedTokens(unsigned N) {
1879     assert(isBacktrackEnabled() &&
1880            "Should only be called when tokens are cached for backtracking");
1881     assert(signed(CachedLexPos) - signed(N) >=
1882                signed(LastBacktrackPos().first) &&
1883            "Should revert tokens up to the last backtrack position, not more");
1884     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1885            "Corrupted backtrack positions ?");
1886     CachedLexPos -= N;
1887   }
1888 
1889   /// Enters a token in the token stream to be lexed next.
1890   ///
1891   /// If BackTrack() is called afterwards, the token will remain at the
1892   /// insertion point.
1893   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1894   /// flag set. See the flag documentation for details.
EnterToken(const Token & Tok,bool IsReinject)1895   void EnterToken(const Token &Tok, bool IsReinject) {
1896     if (LexLevel) {
1897       // It's not correct in general to enter caching lex mode while in the
1898       // middle of a nested lexing action.
1899       auto TokCopy = std::make_unique<Token[]>(1);
1900       TokCopy[0] = Tok;
1901       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1902     } else {
1903       EnterCachingLexMode();
1904       assert(IsReinject && "new tokens in the middle of cached stream");
1905       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1906     }
1907   }
1908 
1909   /// We notify the Preprocessor that if it is caching tokens (because
1910   /// backtrack is enabled) it should replace the most recent cached tokens
1911   /// with the given annotation token. This function has no effect if
1912   /// backtracking is not enabled.
1913   ///
1914   /// Note that the use of this function is just for optimization, so that the
1915   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1916   /// invoked.
AnnotateCachedTokens(const Token & Tok)1917   void AnnotateCachedTokens(const Token &Tok) {
1918     assert(Tok.isAnnotation() && "Expected annotation token");
1919     if (CachedLexPos != 0 && isBacktrackEnabled())
1920       AnnotatePreviousCachedTokens(Tok);
1921   }
1922 
1923   /// Get the location of the last cached token, suitable for setting the end
1924   /// location of an annotation token.
getLastCachedTokenLocation()1925   SourceLocation getLastCachedTokenLocation() const {
1926     assert(CachedLexPos != 0);
1927     return CachedTokens[CachedLexPos-1].getLastLoc();
1928   }
1929 
1930   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1931   /// CachedTokens.
1932   bool IsPreviousCachedToken(const Token &Tok) const;
1933 
1934   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1935   /// in \p NewToks.
1936   ///
1937   /// Useful when a token needs to be split in smaller ones and CachedTokens
1938   /// most recent token must to be updated to reflect that.
1939   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1940 
1941   /// Replace the last token with an annotation token.
1942   ///
1943   /// Like AnnotateCachedTokens(), this routine replaces an
1944   /// already-parsed (and resolved) token with an annotation
1945   /// token. However, this routine only replaces the last token with
1946   /// the annotation token; it does not affect any other cached
1947   /// tokens. This function has no effect if backtracking is not
1948   /// enabled.
ReplaceLastTokenWithAnnotation(const Token & Tok)1949   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1950     assert(Tok.isAnnotation() && "Expected annotation token");
1951     if (CachedLexPos != 0 && isBacktrackEnabled())
1952       CachedTokens[CachedLexPos-1] = Tok;
1953   }
1954 
1955   /// Enter an annotation token into the token stream.
1956   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1957                             void *AnnotationVal);
1958 
1959   /// Determine whether it's possible for a future call to Lex to produce an
1960   /// annotation token created by a previous call to EnterAnnotationToken.
mightHavePendingAnnotationTokens()1961   bool mightHavePendingAnnotationTokens() {
1962     return CurLexerCallback != CLK_Lexer;
1963   }
1964 
1965   /// Update the current token to represent the provided
1966   /// identifier, in order to cache an action performed by typo correction.
TypoCorrectToken(const Token & Tok)1967   void TypoCorrectToken(const Token &Tok) {
1968     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1969     if (CachedLexPos != 0 && isBacktrackEnabled())
1970       CachedTokens[CachedLexPos-1] = Tok;
1971   }
1972 
1973   /// Recompute the current lexer kind based on the CurLexer/
1974   /// CurTokenLexer pointers.
1975   void recomputeCurLexerKind();
1976 
1977   /// Returns true if incremental processing is enabled
isIncrementalProcessingEnabled()1978   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1979 
1980   /// Enables the incremental processing
1981   void enableIncrementalProcessing(bool value = true) {
1982     IncrementalProcessing = value;
1983   }
1984 
1985   /// Specify the point at which code-completion will be performed.
1986   ///
1987   /// \param File the file in which code completion should occur. If
1988   /// this file is included multiple times, code-completion will
1989   /// perform completion the first time it is included. If NULL, this
1990   /// function clears out the code-completion point.
1991   ///
1992   /// \param Line the line at which code completion should occur
1993   /// (1-based).
1994   ///
1995   /// \param Column the column at which code completion should occur
1996   /// (1-based).
1997   ///
1998   /// \returns true if an error occurred, false otherwise.
1999   bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line,
2000                               unsigned Column);
2001 
2002   /// Determine if we are performing code completion.
isCodeCompletionEnabled()2003   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
2004 
2005   /// Returns the location of the code-completion point.
2006   ///
2007   /// Returns an invalid location if code-completion is not enabled or the file
2008   /// containing the code-completion point has not been lexed yet.
getCodeCompletionLoc()2009   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
2010 
2011   /// Returns the start location of the file of code-completion point.
2012   ///
2013   /// Returns an invalid location if code-completion is not enabled or the file
2014   /// containing the code-completion point has not been lexed yet.
getCodeCompletionFileLoc()2015   SourceLocation getCodeCompletionFileLoc() const {
2016     return CodeCompletionFileLoc;
2017   }
2018 
2019   /// Returns true if code-completion is enabled and we have hit the
2020   /// code-completion point.
isCodeCompletionReached()2021   bool isCodeCompletionReached() const { return CodeCompletionReached; }
2022 
2023   /// Note that we hit the code-completion point.
setCodeCompletionReached()2024   void setCodeCompletionReached() {
2025     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
2026     CodeCompletionReached = true;
2027     // Silence any diagnostics that occur after we hit the code-completion.
2028     getDiagnostics().setSuppressAllDiagnostics(true);
2029   }
2030 
2031   /// The location of the currently-active \#pragma clang
2032   /// arc_cf_code_audited begin.
2033   ///
2034   /// Returns an invalid location if there is no such pragma active.
getPragmaARCCFCodeAuditedInfo()2035   IdentifierLoc getPragmaARCCFCodeAuditedInfo() const {
2036     return PragmaARCCFCodeAuditedInfo;
2037   }
2038 
2039   /// Set the location of the currently-active \#pragma clang
2040   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)2041   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
2042                                      SourceLocation Loc) {
2043     PragmaARCCFCodeAuditedInfo = IdentifierLoc(Loc, Ident);
2044   }
2045 
2046   /// The location of the currently-active \#pragma clang
2047   /// assume_nonnull begin.
2048   ///
2049   /// Returns an invalid location if there is no such pragma active.
getPragmaAssumeNonNullLoc()2050   SourceLocation getPragmaAssumeNonNullLoc() const {
2051     return PragmaAssumeNonNullLoc;
2052   }
2053 
2054   /// Set the location of the currently-active \#pragma clang
2055   /// assume_nonnull begin.  An invalid location ends the pragma.
setPragmaAssumeNonNullLoc(SourceLocation Loc)2056   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
2057     PragmaAssumeNonNullLoc = Loc;
2058   }
2059 
2060   /// Get the location of the recorded unterminated \#pragma clang
2061   /// assume_nonnull begin in the preamble, if one exists.
2062   ///
2063   /// Returns an invalid location if the premable did not end with
2064   /// such a pragma active or if there is no recorded preamble.
getPreambleRecordedPragmaAssumeNonNullLoc()2065   SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
2066     return PreambleRecordedPragmaAssumeNonNullLoc;
2067   }
2068 
2069   /// Record the location of the unterminated \#pragma clang
2070   /// assume_nonnull begin in the preamble.
setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)2071   void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
2072     PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2073   }
2074 
2075   /// Set the directory in which the main file should be considered
2076   /// to have been found, if it is not a real file.
setMainFileDir(DirectoryEntryRef Dir)2077   void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2078 
2079   /// Instruct the preprocessor to skip part of the main source file.
2080   ///
2081   /// \param Bytes The number of bytes in the preamble to skip.
2082   ///
2083   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2084   /// start of a line.
setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)2085   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2086     SkipMainFilePreamble.first = Bytes;
2087     SkipMainFilePreamble.second = StartOfLine;
2088   }
2089 
2090   /// Forwarding function for diagnostics.  This emits a diagnostic at
2091   /// the specified Token's location, translating the token's start
2092   /// position in the current buffer into a SourcePosition object for rendering.
Diag(SourceLocation Loc,unsigned DiagID)2093   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2094     return Diags->Report(Loc, DiagID);
2095   }
2096 
Diag(const Token & Tok,unsigned DiagID)2097   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2098     return Diags->Report(Tok.getLocation(), DiagID);
2099   }
2100 
2101   /// Return the 'spelling' of the token at the given
2102   /// location; does not go up to the spelling location or down to the
2103   /// expansion location.
2104   ///
2105   /// \param buffer A buffer which will be used only if the token requires
2106   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
2107   /// \param invalid If non-null, will be set \c true if an error occurs.
2108   StringRef getSpelling(SourceLocation loc,
2109                         SmallVectorImpl<char> &buffer,
2110                         bool *invalid = nullptr) const {
2111     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2112   }
2113 
2114   /// Return the 'spelling' of the Tok token.
2115   ///
2116   /// The spelling of a token is the characters used to represent the token in
2117   /// the source file after trigraph expansion and escaped-newline folding.  In
2118   /// particular, this wants to get the true, uncanonicalized, spelling of
2119   /// things like digraphs, UCNs, etc.
2120   ///
2121   /// \param Invalid If non-null, will be set \c true if an error occurs.
2122   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2123     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2124   }
2125 
2126   /// Get the spelling of a token into a preallocated buffer, instead
2127   /// of as an std::string.
2128   ///
2129   /// The caller is required to allocate enough space for the token, which is
2130   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2131   /// actual result is returned.
2132   ///
2133   /// Note that this method may do two possible things: it may either fill in
2134   /// the buffer specified with characters, or it may *change the input pointer*
2135   /// to point to a constant buffer with the data already in it (avoiding a
2136   /// copy).  The caller is not allowed to modify the returned buffer pointer
2137   /// if an internal buffer is returned.
2138   unsigned getSpelling(const Token &Tok, const char *&Buffer,
2139                        bool *Invalid = nullptr) const {
2140     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2141   }
2142 
2143   /// Get the spelling of a token into a SmallVector.
2144   ///
2145   /// Note that the returned StringRef may not point to the
2146   /// supplied buffer if a copy can be avoided.
2147   StringRef getSpelling(const Token &Tok,
2148                         SmallVectorImpl<char> &Buffer,
2149                         bool *Invalid = nullptr) const;
2150 
2151   /// Relex the token at the specified location.
2152   /// \returns true if there was a failure, false on success.
2153   bool getRawToken(SourceLocation Loc, Token &Result,
2154                    bool IgnoreWhiteSpace = false) {
2155     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2156   }
2157 
2158   /// Given a Token \p Tok that is a numeric constant with length 1,
2159   /// return the value of constant as an unsigned 8-bit integer.
2160   uint8_t
2161   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
2162                                               bool *Invalid = nullptr) const {
2163     assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2164            Tok.getLength() == 1 && "Called on unsupported token");
2165     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2166 
2167     // If the token is carrying a literal data pointer, just use it.
2168     if (const char *D = Tok.getLiteralData())
2169       return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2170 
2171     assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2172     // Otherwise, fall back on getCharacterData, which is slower, but always
2173     // works.
2174     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2175   }
2176 
2177   /// Retrieve the name of the immediate macro expansion.
2178   ///
2179   /// This routine starts from a source location, and finds the name of the
2180   /// macro responsible for its immediate expansion. It looks through any
2181   /// intervening macro argument expansions to compute this. It returns a
2182   /// StringRef that refers to the SourceManager-owned buffer of the source
2183   /// where that macro name is spelled. Thus, the result shouldn't out-live
2184   /// the SourceManager.
getImmediateMacroName(SourceLocation Loc)2185   StringRef getImmediateMacroName(SourceLocation Loc) {
2186     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2187   }
2188 
2189   /// Plop the specified string into a scratch buffer and set the
2190   /// specified token's location and length to it.
2191   ///
2192   /// If specified, the source location provides a location of the expansion
2193   /// point of the token.
2194   void CreateString(StringRef Str, Token &Tok,
2195                     SourceLocation ExpansionLocStart = SourceLocation(),
2196                     SourceLocation ExpansionLocEnd = SourceLocation());
2197 
2198   /// Split the first Length characters out of the token starting at TokLoc
2199   /// and return a location pointing to the split token. Re-lexing from the
2200   /// split token will return the split token rather than the original.
2201   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2202 
2203   /// Computes the source location just past the end of the
2204   /// token at this source location.
2205   ///
2206   /// This routine can be used to produce a source location that
2207   /// points just past the end of the token referenced by \p Loc, and
2208   /// is generally used when a diagnostic needs to point just after a
2209   /// token where it expected something different that it received. If
2210   /// the returned source location would not be meaningful (e.g., if
2211   /// it points into a macro), this routine returns an invalid
2212   /// source location.
2213   ///
2214   /// \param Offset an offset from the end of the token, where the source
2215   /// location should refer to. The default offset (0) produces a source
2216   /// location pointing just past the end of the token; an offset of 1 produces
2217   /// a source location pointing to the last character in the token, etc.
2218   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2219     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2220   }
2221 
2222   /// Returns true if the given MacroID location points at the first
2223   /// token of the macro expansion.
2224   ///
2225   /// \param MacroBegin If non-null and function returns true, it is set to
2226   /// begin location of the macro.
2227   bool isAtStartOfMacroExpansion(SourceLocation loc,
2228                                  SourceLocation *MacroBegin = nullptr) const {
2229     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2230                                             MacroBegin);
2231   }
2232 
2233   /// Returns true if the given MacroID location points at the last
2234   /// token of the macro expansion.
2235   ///
2236   /// \param MacroEnd If non-null and function returns true, it is set to
2237   /// end location of the macro.
2238   bool isAtEndOfMacroExpansion(SourceLocation loc,
2239                                SourceLocation *MacroEnd = nullptr) const {
2240     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2241   }
2242 
2243   /// Print the token to stderr, used for debugging.
2244   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2245   void DumpLocation(SourceLocation Loc) const;
2246   void DumpMacro(const MacroInfo &MI) const;
2247   void dumpMacroInfo(const IdentifierInfo *II);
2248 
2249   /// Given a location that specifies the start of a
2250   /// token, return a new location that specifies a character within the token.
AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)2251   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2252                                          unsigned Char) const {
2253     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2254   }
2255 
2256   /// Increment the counters for the number of token paste operations
2257   /// performed.
2258   ///
2259   /// If fast was specified, this is a 'fast paste' case we handled.
IncrementPasteCounter(bool isFast)2260   void IncrementPasteCounter(bool isFast) {
2261     if (isFast)
2262       ++NumFastTokenPaste;
2263     else
2264       ++NumTokenPaste;
2265   }
2266 
2267   void PrintStats();
2268 
2269   size_t getTotalMemory() const;
2270 
2271   /// When the macro expander pastes together a comment (/##/) in Microsoft
2272   /// mode, this method handles updating the current state, returning the
2273   /// token on the next source line.
2274   void HandleMicrosoftCommentPaste(Token &Tok);
2275 
2276   //===--------------------------------------------------------------------===//
2277   // Preprocessor callback methods.  These are invoked by a lexer as various
2278   // directives and events are found.
2279 
2280   /// Given a tok::raw_identifier token, look up the
2281   /// identifier information for the token and install it into the token,
2282   /// updating the token kind accordingly.
2283   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2284 
2285 private:
2286   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2287 
2288 public:
2289   /// Specifies the reason for poisoning an identifier.
2290   ///
2291   /// If that identifier is accessed while poisoned, then this reason will be
2292   /// used instead of the default "poisoned" diagnostic.
2293   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2294 
2295   /// Display reason for poisoned identifier.
2296   void HandlePoisonedIdentifier(Token & Identifier);
2297 
MaybeHandlePoisonedIdentifier(Token & Identifier)2298   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2299     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2300       if(II->isPoisoned()) {
2301         HandlePoisonedIdentifier(Identifier);
2302       }
2303     }
2304   }
2305 
2306   /// Check whether the next pp-token is one of the specificed token kind. this
2307   /// method should have no observable side-effect on the lexed tokens.
isNextPPTokenOneOf(Ts...Ks)2308   template <typename... Ts> bool isNextPPTokenOneOf(Ts... Ks) {
2309     static_assert(sizeof...(Ts) > 0,
2310                   "requires at least one tok::TokenKind specified");
2311     // Do some quick tests for rejection cases.
2312     std::optional<Token> Val;
2313     if (CurLexer)
2314       Val = CurLexer->peekNextPPToken();
2315     else
2316       Val = CurTokenLexer->peekNextPPToken();
2317 
2318     if (!Val) {
2319       // We have run off the end.  If it's a source file we don't
2320       // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
2321       // macro stack.
2322       if (CurPPLexer)
2323         return false;
2324       for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
2325         if (Entry.TheLexer)
2326           Val = Entry.TheLexer->peekNextPPToken();
2327         else
2328           Val = Entry.TheTokenLexer->peekNextPPToken();
2329 
2330         if (Val)
2331           break;
2332 
2333         // Ran off the end of a source file?
2334         if (Entry.ThePPLexer)
2335           return false;
2336       }
2337     }
2338 
2339     // Okay, we found the token and return.  Otherwise we found the end of the
2340     // translation unit.
2341     return Val->isOneOf(Ks...);
2342   }
2343 
2344 private:
2345   /// Identifiers used for SEH handling in Borland. These are only
2346   /// allowed in particular circumstances
2347   // __except block
2348   IdentifierInfo *Ident__exception_code,
2349                  *Ident___exception_code,
2350                  *Ident_GetExceptionCode;
2351   // __except filter expression
2352   IdentifierInfo *Ident__exception_info,
2353                  *Ident___exception_info,
2354                  *Ident_GetExceptionInfo;
2355   // __finally
2356   IdentifierInfo *Ident__abnormal_termination,
2357                  *Ident___abnormal_termination,
2358                  *Ident_AbnormalTermination;
2359 
2360   const char *getCurLexerEndPos();
2361   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2362 
2363 public:
2364   void PoisonSEHIdentifiers(bool Poison = true); // Borland
2365 
2366   /// Callback invoked when the lexer reads an identifier and has
2367   /// filled in the tokens IdentifierInfo member.
2368   ///
2369   /// This callback potentially macro expands it or turns it into a named
2370   /// token (like 'for').
2371   ///
2372   /// \returns true if we actually computed a token, false if we need to
2373   /// lex again.
2374   bool HandleIdentifier(Token &Identifier);
2375 
2376   /// Callback invoked when the lexer hits the end of the current file.
2377   ///
2378   /// This either returns the EOF token and returns true, or
2379   /// pops a level off the include stack and returns false, at which point the
2380   /// client should call lex again.
2381   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2382 
2383   /// Callback invoked when the current TokenLexer hits the end of its
2384   /// token stream.
2385   bool HandleEndOfTokenLexer(Token &Result);
2386 
2387   /// Callback invoked when the lexer sees a # token at the start of a
2388   /// line.
2389   ///
2390   /// This consumes the directive, modifies the lexer/preprocessor state, and
2391   /// advances the lexer(s) so that the next token read is the correct one.
2392   void HandleDirective(Token &Result);
2393 
2394   /// Ensure that the next token is a tok::eod token.
2395   ///
2396   /// If not, emit a diagnostic and consume up until the eod.
2397   /// If \p EnableMacros is true, then we consider macros that expand to zero
2398   /// tokens as being ok.
2399   ///
2400   /// \return The location of the end of the directive (the terminating
2401   /// newline).
2402   SourceLocation CheckEndOfDirective(const char *DirType,
2403                                      bool EnableMacros = false);
2404 
2405   /// Read and discard all tokens remaining on the current line until
2406   /// the tok::eod token is found. Returns the range of the skipped tokens.
DiscardUntilEndOfDirective()2407   SourceRange DiscardUntilEndOfDirective() {
2408     Token Tmp;
2409     return DiscardUntilEndOfDirective(Tmp);
2410   }
2411 
2412   /// Same as above except retains the token that was found.
2413   SourceRange DiscardUntilEndOfDirective(Token &Tok);
2414 
2415   /// Returns true if the preprocessor has seen a use of
2416   /// __DATE__ or __TIME__ in the file so far.
SawDateOrTime()2417   bool SawDateOrTime() const {
2418     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2419   }
getCounterValue()2420   unsigned getCounterValue() const { return CounterValue; }
setCounterValue(unsigned V)2421   void setCounterValue(unsigned V) { CounterValue = V; }
2422 
getCurrentFPEvalMethod()2423   LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2424     assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2425            "FPEvalMethod should be set either from command line or from the "
2426            "target info");
2427     return CurrentFPEvalMethod;
2428   }
2429 
getTUFPEvalMethod()2430   LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2431     return TUFPEvalMethod;
2432   }
2433 
getLastFPEvalPragmaLocation()2434   SourceLocation getLastFPEvalPragmaLocation() const {
2435     return LastFPEvalPragmaLocation;
2436   }
2437 
setCurrentFPEvalMethod(SourceLocation PragmaLoc,LangOptions::FPEvalMethodKind Val)2438   void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2439                               LangOptions::FPEvalMethodKind Val) {
2440     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2441            "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2442     // This is the location of the '#pragma float_control" where the
2443     // execution state is modifed.
2444     LastFPEvalPragmaLocation = PragmaLoc;
2445     CurrentFPEvalMethod = Val;
2446     TUFPEvalMethod = Val;
2447   }
2448 
setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)2449   void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2450     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2451            "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2452     TUFPEvalMethod = Val;
2453   }
2454 
2455   /// Retrieves the module that we're currently building, if any.
2456   Module *getCurrentModule();
2457 
2458   /// Retrieves the module whose implementation we're current compiling, if any.
2459   Module *getCurrentModuleImplementation();
2460 
2461   /// If we are preprocessing a named module.
isInNamedModule()2462   bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2463 
2464   /// If we are proprocessing a named interface unit.
2465   /// Note that a module implementation partition is not considered as an
2466   /// named interface unit here although it is importable
2467   /// to ease the parsing.
isInNamedInterfaceUnit()2468   bool isInNamedInterfaceUnit() const {
2469     return ModuleDeclState.isNamedInterface();
2470   }
2471 
2472   /// Get the named module name we're preprocessing.
2473   /// Requires we're preprocessing a named module.
getNamedModuleName()2474   StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2475 
2476   /// If we are implementing an implementation module unit.
2477   /// Note that the module implementation partition is not considered as an
2478   /// implementation unit.
isInImplementationUnit()2479   bool isInImplementationUnit() const {
2480     return ModuleDeclState.isImplementationUnit();
2481   }
2482 
2483   /// If we're importing a standard C++20 Named Modules.
isInImportingCXXNamedModules()2484   bool isInImportingCXXNamedModules() const {
2485     // NamedModuleImportPath will be non-empty only if we're importing
2486     // Standard C++ named modules.
2487     return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2488            !IsAtImport;
2489   }
2490 
2491   /// Allocate a new MacroInfo object with the provided SourceLocation.
2492   MacroInfo *AllocateMacroInfo(SourceLocation L);
2493 
2494   /// Turn the specified lexer token into a fully checked and spelled
2495   /// filename, e.g. as an operand of \#include.
2496   ///
2497   /// The caller is expected to provide a buffer that is large enough to hold
2498   /// the spelling of the filename, but is also expected to handle the case
2499   /// when this method decides to use a different buffer.
2500   ///
2501   /// \returns true if the input filename was in <>'s or false if it was
2502   /// in ""'s.
2503   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2504 
2505   /// Given a "foo" or \<foo> reference, look up the indicated file.
2506   ///
2507   /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
2508   /// reference is for system \#include's or not (i.e. using <> instead of "").
2509   OptionalFileEntryRef
2510   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2511              ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2512              ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2513              SmallVectorImpl<char> *RelativePath,
2514              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2515              bool *IsFrameworkFound, bool SkipCache = false,
2516              bool OpenFile = true, bool CacheFailures = true);
2517 
2518   /// Given a "Filename" or \<Filename> reference, look up the indicated embed
2519   /// resource. \p isAngled indicates whether the file reference is for
2520   /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2521   /// is true, the file looked up is opened for reading, otherwise it only
2522   /// validates that the file exists. Quoted filenames are looked up relative
2523   /// to \p LookupFromFile if it is nonnull.
2524   ///
2525   /// Returns std::nullopt on failure.
2526   OptionalFileEntryRef
2527   LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
2528                   const FileEntry *LookupFromFile = nullptr);
2529 
2530   /// Return true if we're in the top-level file, not in a \#include.
2531   bool isInPrimaryFile() const;
2532 
2533   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2534   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2535   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2536 
2537   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2538                       bool *ShadowFlag = nullptr);
2539 
2540   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2541   Module *LeaveSubmodule(bool ForPragma);
2542 
2543 private:
2544   friend void TokenLexer::ExpandFunctionArguments();
2545 
PushIncludeMacroStack()2546   void PushIncludeMacroStack() {
2547     assert(CurLexerCallback != CLK_CachingLexer &&
2548            "cannot push a caching lexer");
2549     IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2550                                    std::move(CurLexer), CurPPLexer,
2551                                    std::move(CurTokenLexer), CurDirLookup);
2552     CurPPLexer = nullptr;
2553   }
2554 
PopIncludeMacroStack()2555   void PopIncludeMacroStack() {
2556     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2557     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2558     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2559     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2560     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2561     CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2562     IncludeMacroStack.pop_back();
2563   }
2564 
2565   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2566 
2567   /// Determine whether we need to create module macros for #defines in the
2568   /// current context.
2569   bool needModuleMacros() const;
2570 
2571   /// Update the set of active module macros and ambiguity flag for a module
2572   /// macro name.
2573   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2574 
2575   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2576                                                SourceLocation Loc);
2577   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2578   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2579                                                              bool isPublic);
2580 
2581   /// Lex and validate a macro name, which occurs after a
2582   /// \#define or \#undef.
2583   ///
2584   /// \param MacroNameTok Token that represents the name defined or undefined.
2585   /// \param IsDefineUndef Kind if preprocessor directive.
2586   /// \param ShadowFlag Points to flag that is set if macro name shadows
2587   ///                   a keyword.
2588   ///
2589   /// This emits a diagnostic, sets the token kind to eod,
2590   /// and discards the rest of the macro line if the macro name is invalid.
2591   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2592                      bool *ShadowFlag = nullptr);
2593 
2594   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2595   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2596   /// doing so performs certain validity checks including (but not limited to):
2597   ///   - # (stringization) is followed by a macro parameter
2598   /// \param MacroNameTok - Token that represents the macro name
2599   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2600   ///
2601   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2602   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2603   MacroInfo *ReadOptionalMacroParameterListAndBody(
2604       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2605 
2606   /// The ( starting an argument list of a macro definition has just been read.
2607   /// Lex the rest of the parameters and the closing ), updating \p MI with
2608   /// what we learn and saving in \p LastTok the last token read.
2609   /// Return true if an error occurs parsing the arg list.
2610   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2611 
2612   /// Provide a suggestion for a typoed directive. If there is no typo, then
2613   /// just skip suggesting.
2614   ///
2615   /// \param Tok - Token that represents the directive
2616   /// \param Directive - String reference for the directive name
2617   void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2618 
2619   /// We just read a \#if or related directive and decided that the
2620   /// subsequent tokens are in the \#if'd out portion of the
2621   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2622   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2623   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2624   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2625   /// already seen one so a \#else directive is a duplicate.  When this returns,
2626   /// the caller can lex the first valid token.
2627   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2628                                     SourceLocation IfTokenLoc,
2629                                     bool FoundNonSkipPortion, bool FoundElse,
2630                                     SourceLocation ElseLoc = SourceLocation());
2631 
2632   /// Information about the result for evaluating an expression for a
2633   /// preprocessor directive.
2634   struct DirectiveEvalResult {
2635     /// The integral value of the expression.
2636     std::optional<llvm::APSInt> Value;
2637 
2638     /// Whether the expression was evaluated as true or not.
2639     bool Conditional;
2640 
2641     /// True if the expression contained identifiers that were undefined.
2642     bool IncludedUndefinedIds;
2643 
2644     /// The source range for the expression.
2645     SourceRange ExprRange;
2646   };
2647 
2648   /// Evaluate an integer constant expression that may occur after a
2649   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2650   ///
2651   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2652   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2653                                                   bool CheckForEoD = true);
2654 
2655   /// Evaluate an integer constant expression that may occur after a
2656   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2657   ///
2658   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2659   /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2660   /// in the evaluated expression or not.
2661   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2662                                                   Token &Tok,
2663                                                   bool &EvaluatedDefined,
2664                                                   bool CheckForEoD = true);
2665 
2666   /// Process a '__has_embed("path" [, ...])' expression.
2667   ///
2668   /// Returns predefined `__STDC_EMBED_*` macro values if
2669   /// successful.
2670   EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2671 
2672   /// Process a '__has_include("path")' expression.
2673   ///
2674   /// Returns true if successful.
2675   bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2676 
2677   /// Process '__has_include_next("path")' expression.
2678   ///
2679   /// Returns true if successful.
2680   bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2681 
2682   /// Get the directory and file from which to start \#include_next lookup.
2683   std::pair<ConstSearchDirIterator, const FileEntry *>
2684   getIncludeNextStart(const Token &IncludeNextTok) const;
2685 
2686   /// Install the standard preprocessor pragmas:
2687   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2688   void RegisterBuiltinPragmas();
2689 
2690   /// RegisterBuiltinMacro - Register the specified identifier in the identifier
2691   /// table and mark it as a builtin macro to be expanded.
RegisterBuiltinMacro(const char * Name)2692   IdentifierInfo *RegisterBuiltinMacro(const char *Name) {
2693     // Get the identifier.
2694     IdentifierInfo *Id = getIdentifierInfo(Name);
2695 
2696     // Mark it as being a macro that is builtin.
2697     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
2698     MI->setIsBuiltinMacro();
2699     appendDefMacroDirective(Id, MI);
2700     return Id;
2701   }
2702 
2703   /// Register builtin macros such as __LINE__ with the identifier table.
2704   void RegisterBuiltinMacros();
2705 
2706   /// If an identifier token is read that is to be expanded as a macro, handle
2707   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2708   /// otherwise the caller should lex again.
2709   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2710 
2711   /// Cache macro expanded tokens for TokenLexers.
2712   //
2713   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2714   /// going to lex in the cache and when it finishes the tokens are removed
2715   /// from the end of the cache.
2716   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2717                                   ArrayRef<Token> tokens);
2718 
2719   void removeCachedMacroExpandedTokensOfLastLexer();
2720 
2721   /// After reading "MACRO(", this method is invoked to read all of the formal
2722   /// arguments specified for the macro invocation.  Returns null on error.
2723   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2724                                        SourceLocation &MacroEnd);
2725 
2726   /// If an identifier token is read that is to be expanded
2727   /// as a builtin macro, handle it and return the next token as 'Tok'.
2728   void ExpandBuiltinMacro(Token &Tok);
2729 
2730   /// Read a \c _Pragma directive, slice it up, process it, then
2731   /// return the first token after the directive.
2732   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2733   void Handle_Pragma(Token &Tok);
2734 
2735   /// Like Handle_Pragma except the pragma text is not enclosed within
2736   /// a string literal.
2737   void HandleMicrosoft__pragma(Token &Tok);
2738 
2739   /// Add a lexer to the top of the include stack and
2740   /// start lexing tokens from it instead of the current buffer.
2741   void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2742 
2743   /// Set the FileID for the preprocessor predefines.
setPredefinesFileID(FileID FID)2744   void setPredefinesFileID(FileID FID) {
2745     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2746     PredefinesFileID = FID;
2747   }
2748 
2749   /// Set the FileID for the PCH through header.
2750   void setPCHThroughHeaderFileID(FileID FID);
2751 
2752   /// Returns true if we are lexing from a file and not a
2753   /// pragma or a macro.
IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2754   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2755     return L ? !L->isPragmaLexer() : P != nullptr;
2756   }
2757 
IsFileLexer(const IncludeStackInfo & I)2758   static bool IsFileLexer(const IncludeStackInfo& I) {
2759     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2760   }
2761 
IsFileLexer()2762   bool IsFileLexer() const {
2763     return IsFileLexer(CurLexer.get(), CurPPLexer);
2764   }
2765 
2766   //===--------------------------------------------------------------------===//
2767   // Standard Library Identification
2768   std::optional<CXXStandardLibraryVersionInfo> CXXStandardLibraryVersion;
2769 
2770 public:
2771   std::optional<std::uint64_t> getStdLibCxxVersion();
2772   bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion);
2773 
2774 private:
2775   //===--------------------------------------------------------------------===//
2776   // Caching stuff.
2777   void CachingLex(Token &Result);
2778 
InCachingLexMode()2779   bool InCachingLexMode() const {
2780     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2781     // that we are past EOF, not that we are in CachingLex mode.
2782     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2783   }
2784 
2785   void EnterCachingLexMode();
2786   void EnterCachingLexModeUnchecked();
2787 
ExitCachingLexMode()2788   void ExitCachingLexMode() {
2789     if (InCachingLexMode())
2790       RemoveTopOfLexerStack();
2791   }
2792 
2793   const Token &PeekAhead(unsigned N);
2794   void AnnotatePreviousCachedTokens(const Token &Tok);
2795 
2796   //===--------------------------------------------------------------------===//
2797   /// Handle*Directive - implement the various preprocessor directives.  These
2798   /// should side-effect the current preprocessor object so that the next call
2799   /// to Lex() will return the appropriate token next.
2800   void HandleLineDirective();
2801   void HandleDigitDirective(Token &Tok);
2802   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2803   void HandleIdentSCCSDirective(Token &Tok);
2804   void HandleMacroPublicDirective(Token &Tok);
2805   void HandleMacroPrivateDirective();
2806 
2807   /// An additional notification that can be produced by a header inclusion or
2808   /// import to tell the parser what happened.
2809   struct ImportAction {
2810     enum ActionKind {
2811       None,
2812       ModuleBegin,
2813       ModuleImport,
2814       HeaderUnitImport,
2815       SkippedModuleImport,
2816       Failure,
2817     } Kind;
2818     Module *ModuleForHeader = nullptr;
2819 
2820     ImportAction(ActionKind AK, Module *Mod = nullptr)
KindImportAction2821         : Kind(AK), ModuleForHeader(Mod) {
2822       assert((AK == None || Mod || AK == Failure) &&
2823              "no module for module action");
2824     }
2825   };
2826 
2827   OptionalFileEntryRef LookupHeaderIncludeOrImport(
2828       ConstSearchDirIterator *CurDir, StringRef &Filename,
2829       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2830       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2831       bool &IsMapped, ConstSearchDirIterator LookupFrom,
2832       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2833       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2834       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2835   // Binary data inclusion
2836   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
2837                             const FileEntry *LookupFromFile = nullptr);
2838   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2839                                 const LexEmbedParametersResult &Params,
2840                                 StringRef BinaryContents, StringRef FileName);
2841 
2842   // File inclusion.
2843   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2844                               ConstSearchDirIterator LookupFrom = nullptr,
2845                               const FileEntry *LookupFromFile = nullptr);
2846   ImportAction
2847   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2848                               Token &FilenameTok, SourceLocation EndLoc,
2849                               ConstSearchDirIterator LookupFrom = nullptr,
2850                               const FileEntry *LookupFromFile = nullptr);
2851   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2852   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2853   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2854   void HandleMicrosoftImportDirective(Token &Tok);
2855 
2856 public:
2857   /// Check that the given module is available, producing a diagnostic if not.
2858   /// \return \c true if the check failed (because the module is not available).
2859   ///         \c false if the module appears to be usable.
2860   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2861                                      const TargetInfo &TargetInfo,
2862                                      const Module &M, DiagnosticsEngine &Diags);
2863 
2864   // Module inclusion testing.
2865   /// Find the module that owns the source or header file that
2866   /// \p Loc points to. If the location is in a file that was included
2867   /// into a module, or is outside any module, returns nullptr.
2868   Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2869 
2870   /// We want to produce a diagnostic at location IncLoc concerning an
2871   /// unreachable effect at location MLoc (eg, where a desired entity was
2872   /// declared or defined). Determine whether the right way to make MLoc
2873   /// reachable is by #include, and if so, what header should be included.
2874   ///
2875   /// This is not necessarily fast, and might load unexpected module maps, so
2876   /// should only be called by code that intends to produce an error.
2877   ///
2878   /// \param IncLoc The location at which the missing effect was detected.
2879   /// \param MLoc A location within an unimported module at which the desired
2880   ///        effect occurred.
2881   /// \return A file that can be #included to provide the desired effect. Null
2882   ///         if no such file could be determined or if a #include is not
2883   ///         appropriate (eg, if a module should be imported instead).
2884   OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2885                                                         SourceLocation MLoc);
2886 
isRecordingPreamble()2887   bool isRecordingPreamble() const {
2888     return PreambleConditionalStack.isRecording();
2889   }
2890 
hasRecordedPreamble()2891   bool hasRecordedPreamble() const {
2892     return PreambleConditionalStack.hasRecordedPreamble();
2893   }
2894 
getPreambleConditionalStack()2895   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2896       return PreambleConditionalStack.getStack();
2897   }
2898 
setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2899   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2900     PreambleConditionalStack.setStack(s);
2901   }
2902 
setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,std::optional<PreambleSkipInfo> SkipInfo)2903   void setReplayablePreambleConditionalStack(
2904       ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2905     PreambleConditionalStack.startReplaying();
2906     PreambleConditionalStack.setStack(s);
2907     PreambleConditionalStack.SkipInfo = SkipInfo;
2908   }
2909 
getPreambleSkipInfo()2910   std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2911     return PreambleConditionalStack.SkipInfo;
2912   }
2913 
2914 private:
2915   /// After processing predefined file, initialize the conditional stack from
2916   /// the preamble.
2917   void replayPreambleConditionalStack();
2918 
2919   // Macro handling.
2920   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2921   void HandleUndefDirective();
2922 
2923   // Conditional Inclusion.
2924   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2925                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2926   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2927                          bool ReadAnyTokensBeforeDirective);
2928   void HandleEndifDirective(Token &EndifToken);
2929   void HandleElseDirective(Token &Result, const Token &HashToken);
2930   void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2931                                  tok::PPKeywordKind Kind);
2932 
2933   // Pragmas.
2934   void HandlePragmaDirective(PragmaIntroducer Introducer);
2935 
2936 public:
2937   void HandlePragmaOnce(Token &OnceTok);
2938   void HandlePragmaMark(Token &MarkTok);
2939   void HandlePragmaPoison();
2940   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2941   void HandlePragmaDependency(Token &DependencyTok);
2942   void HandlePragmaPushMacro(Token &Tok);
2943   void HandlePragmaPopMacro(Token &Tok);
2944   void HandlePragmaIncludeAlias(Token &Tok);
2945   void HandlePragmaModuleBuild(Token &Tok);
2946   void HandlePragmaHdrstop(Token &Tok);
2947   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2948 
2949   // Return true and store the first token only if any CommentHandler
2950   // has inserted some tokens and getCommentRetentionState() is false.
2951   bool HandleComment(Token &result, SourceRange Comment);
2952 
2953   /// A macro is used, update information about macros that need unused
2954   /// warnings.
2955   void markMacroAsUsed(MacroInfo *MI);
2956 
addMacroDeprecationMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2957   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2958                               SourceLocation AnnotationLoc) {
2959     AnnotationInfos[II].DeprecationInfo =
2960         MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2961   }
2962 
addRestrictExpansionMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2963   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2964                                SourceLocation AnnotationLoc) {
2965     AnnotationInfos[II].RestrictExpansionInfo =
2966         MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2967   }
2968 
addFinalLoc(const IdentifierInfo * II,SourceLocation AnnotationLoc)2969   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2970     AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
2971   }
2972 
getMacroAnnotations(const IdentifierInfo * II)2973   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2974     return AnnotationInfos.find(II)->second;
2975   }
2976 
2977   void emitMacroExpansionWarnings(const Token &Identifier,
2978                                   bool IsIfnDef = false) const {
2979     IdentifierInfo *Info = Identifier.getIdentifierInfo();
2980     if (Info->isDeprecatedMacro())
2981       emitMacroDeprecationWarning(Identifier);
2982 
2983     if (Info->isRestrictExpansion() &&
2984         !SourceMgr.isInMainFile(Identifier.getLocation()))
2985       emitRestrictExpansionWarning(Identifier);
2986 
2987     if (!IsIfnDef) {
2988       if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
2989         emitRestrictInfNaNWarning(Identifier, 0);
2990       if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
2991         emitRestrictInfNaNWarning(Identifier, 1);
2992     }
2993   }
2994 
2995   static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2996                                       const LangOptions &LangOpts,
2997                                       const TargetInfo &TI);
2998 
2999   static void processPathToFileName(SmallVectorImpl<char> &FileName,
3000                                     const PresumedLoc &PLoc,
3001                                     const LangOptions &LangOpts,
3002                                     const TargetInfo &TI);
3003 
3004 private:
3005   void emitMacroDeprecationWarning(const Token &Identifier) const;
3006   void emitRestrictExpansionWarning(const Token &Identifier) const;
3007   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
3008   void emitRestrictInfNaNWarning(const Token &Identifier,
3009                                  unsigned DiagSelection) const;
3010 
3011   /// This boolean state keeps track if the current scanned token (by this PP)
3012   /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
3013   /// translation unit in a linear order.
3014   bool InSafeBufferOptOutRegion = false;
3015 
3016   /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
3017   /// region if PP is currently in such a region.  Hold undefined value
3018   /// otherwise.
3019   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
3020 
3021   using SafeBufferOptOutRegionsTy =
3022       SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
3023   // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
3024   // translation unit. Each region is represented by a pair of start and
3025   // end locations.
3026   SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
3027 
3028   // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs.  We use the
3029   // following structure to manage them by their ASTs.
3030   struct {
3031     // A map from unique IDs to region maps of loaded ASTs.  The ID identifies a
3032     // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
3033     llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
3034 
3035     // Returns a reference to the safe buffer opt-out regions of the loaded
3036     // AST where `Loc` belongs to. (Construct if absent)
3037     SafeBufferOptOutRegionsTy &
findAndConsLoadedOptOutMap__anon4672ff5f03083038     findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
3039       return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
3040     }
3041 
3042     // Returns a reference to the safe buffer opt-out regions of the loaded
3043     // AST where `Loc` belongs to. (This const function returns nullptr if
3044     // absent.)
3045     const SafeBufferOptOutRegionsTy *
lookupLoadedOptOutMap__anon4672ff5f03083046     lookupLoadedOptOutMap(SourceLocation Loc,
3047                           const SourceManager &SrcMgr) const {
3048       FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
3049       auto Iter = LoadedRegions.find(FID);
3050 
3051       if (Iter == LoadedRegions.end())
3052         return nullptr;
3053       return &Iter->getSecond();
3054     }
3055   } LoadedSafeBufferOptOutMap;
3056 
3057 public:
3058   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
3059   /// region.  This `Loc` must be a source location that has been pre-processed.
3060   bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
3061 
3062   /// Alter the state of whether this PP currently is in a
3063   /// "-Wunsafe-buffer-usage" opt-out region.
3064   ///
3065   /// \param isEnter true if this PP is entering a region; otherwise, this PP
3066   /// is exiting a region
3067   /// \param Loc the location of the entry or exit of a
3068   /// region
3069   /// \return true iff it is INVALID to enter or exit a region, i.e.,
3070   /// attempt to enter a region before exiting a previous region, or exiting a
3071   /// region that PP is not currently in.
3072   bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
3073                                          const SourceLocation &Loc);
3074 
3075   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3076   ///          opt-out region
3077   bool isPPInSafeBufferOptOutRegion();
3078 
3079   /// \param StartLoc output argument. It will be set to the start location of
3080   /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3081   /// returns true.
3082   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3083   ///          opt-out region
3084   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3085 
3086   /// \return a sequence of SourceLocations representing ordered opt-out regions
3087   /// specified by
3088   /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3089   SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3090 
3091   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3092   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3093   /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3094   /// is same as itself before the call.
3095   bool setDeserializedSafeBufferOptOutMap(
3096       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3097 
3098   /// Whether we've seen pp-directives which may have changed the preprocessing
3099   /// state.
3100   bool hasSeenNoTrivialPPDirective() const;
3101 
3102 private:
3103   /// Helper functions to forward lexing to the actual lexer. They all share the
3104   /// same signature.
CLK_Lexer(Preprocessor & P,Token & Result)3105   static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3106     return P.CurLexer->Lex(Result);
3107   }
CLK_TokenLexer(Preprocessor & P,Token & Result)3108   static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3109     return P.CurTokenLexer->Lex(Result);
3110   }
CLK_CachingLexer(Preprocessor & P,Token & Result)3111   static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3112     P.CachingLex(Result);
3113     return true;
3114   }
CLK_DependencyDirectivesLexer(Preprocessor & P,Token & Result)3115   static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3116     return P.CurLexer->LexDependencyDirectiveToken(Result);
3117   }
CLK_LexAfterModuleImport(Preprocessor & P,Token & Result)3118   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
3119     return P.LexAfterModuleImport(Result);
3120   }
3121 };
3122 
3123 /// Abstract base class that describes a handler that will receive
3124 /// source ranges for each of the comments encountered in the source file.
3125 class CommentHandler {
3126 public:
3127   virtual ~CommentHandler();
3128 
3129   // The handler shall return true if it has pushed any tokens
3130   // to be read using e.g. EnterToken or EnterTokenStream.
3131   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3132 };
3133 
3134 /// Abstract base class that describes a handler that will receive
3135 /// source ranges for empty lines encountered in the source file.
3136 class EmptylineHandler {
3137 public:
3138   virtual ~EmptylineHandler();
3139 
3140   // The handler handles empty lines.
3141   virtual void HandleEmptyline(SourceRange Range) = 0;
3142 };
3143 
3144 /// Helper class to shuttle information about #embed directives from the
3145 /// preprocessor to the parser through an annotation token.
3146 struct EmbedAnnotationData {
3147   StringRef BinaryData;
3148   StringRef FileName;
3149 };
3150 
3151 /// Registry of pragma handlers added by plugins
3152 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3153 
3154 } // namespace clang
3155 
3156 namespace llvm {
3157 extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>;
3158 } // namespace llvm
3159 
3160 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
3161