1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/Module.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/HeaderSearch.h" 27 #include "clang/Lex/Lexer.h" 28 #include "clang/Lex/MacroInfo.h" 29 #include "clang/Lex/ModuleLoader.h" 30 #include "clang/Lex/ModuleMap.h" 31 #include "clang/Lex/PPCallbacks.h" 32 #include "clang/Lex/PPEmbedParameters.h" 33 #include "clang/Lex/Token.h" 34 #include "clang/Lex/TokenLexer.h" 35 #include "clang/Support/Compiler.h" 36 #include "llvm/ADT/APSInt.h" 37 #include "llvm/ADT/ArrayRef.h" 38 #include "llvm/ADT/DenseMap.h" 39 #include "llvm/ADT/FoldingSet.h" 40 #include "llvm/ADT/FunctionExtras.h" 41 #include "llvm/ADT/PointerUnion.h" 42 #include "llvm/ADT/STLExtras.h" 43 #include "llvm/ADT/SmallPtrSet.h" 44 #include "llvm/ADT/SmallVector.h" 45 #include "llvm/ADT/StringRef.h" 46 #include "llvm/ADT/TinyPtrVector.h" 47 #include "llvm/ADT/iterator_range.h" 48 #include "llvm/Support/Allocator.h" 49 #include "llvm/Support/Casting.h" 50 #include "llvm/Support/Registry.h" 51 #include <cassert> 52 #include <cstddef> 53 #include <cstdint> 54 #include <map> 55 #include <memory> 56 #include <optional> 57 #include <string> 58 #include <utility> 59 #include <vector> 60 61 namespace llvm { 62 63 template<unsigned InternalLen> class SmallString; 64 65 } // namespace llvm 66 67 namespace clang { 68 69 class CodeCompletionHandler; 70 class CommentHandler; 71 class DirectoryEntry; 72 class EmptylineHandler; 73 class ExternalPreprocessorSource; 74 class FileEntry; 75 class FileManager; 76 class HeaderSearch; 77 class MacroArgs; 78 class PragmaHandler; 79 class PragmaNamespace; 80 class PreprocessingRecord; 81 class PreprocessorLexer; 82 class PreprocessorOptions; 83 class ScratchBuffer; 84 class TargetInfo; 85 class NoTrivialPPDirectiveTracer; 86 87 namespace Builtin { 88 class Context; 89 } 90 91 /// Stores token information for comparing actual tokens with 92 /// predefined values. Only handles simple tokens and identifiers. 93 class TokenValue { 94 tok::TokenKind Kind; 95 IdentifierInfo *II; 96 97 public: TokenValue(tok::TokenKind Kind)98 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 99 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 100 assert(Kind != tok::identifier && 101 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 102 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 103 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 104 } 105 TokenValue(IdentifierInfo * II)106 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 107 108 bool operator==(const Token &Tok) const { 109 return Tok.getKind() == Kind && 110 (!II || II == Tok.getIdentifierInfo()); 111 } 112 }; 113 114 /// Context in which macro name is used. 115 enum MacroUse { 116 // other than #define or #undef 117 MU_Other = 0, 118 119 // macro name specified in #define 120 MU_Define = 1, 121 122 // macro name specified in #undef 123 MU_Undef = 2 124 }; 125 126 enum class EmbedResult { 127 Invalid = -1, // Parsing error occurred. 128 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__ 129 Found = 1, // Corresponds to __STDC_EMBED_FOUND__ 130 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__ 131 }; 132 133 struct CXXStandardLibraryVersionInfo { 134 enum Library { Unknown, LibStdCXX }; 135 Library Lib; 136 std::uint64_t Version; 137 }; 138 139 /// Engages in a tight little dance with the lexer to efficiently 140 /// preprocess tokens. 141 /// 142 /// Lexers know only about tokens within a single source file, and don't 143 /// know anything about preprocessor-level issues like the \#include stack, 144 /// token expansion, etc. 145 class Preprocessor { 146 friend class VAOptDefinitionContext; 147 friend class VariadicMacroScopeGuard; 148 149 llvm::unique_function<void(const clang::Token &)> OnToken; 150 /// Functor for getting the dependency preprocessor directives of a file. 151 /// 152 /// These are directives derived from a special form of lexing where the 153 /// source input is scanned for the preprocessor directives that might have an 154 /// effect on the dependencies for a compilation unit. 155 DependencyDirectivesGetter *GetDependencyDirectives = nullptr; 156 const PreprocessorOptions &PPOpts; 157 DiagnosticsEngine *Diags; 158 const LangOptions &LangOpts; 159 const TargetInfo *Target = nullptr; 160 const TargetInfo *AuxTarget = nullptr; 161 FileManager &FileMgr; 162 SourceManager &SourceMgr; 163 std::unique_ptr<ScratchBuffer> ScratchBuf; 164 HeaderSearch &HeaderInfo; 165 ModuleLoader &TheModuleLoader; 166 167 /// External source of macros. 168 ExternalPreprocessorSource *ExternalSource; 169 170 /// A BumpPtrAllocator object used to quickly allocate and release 171 /// objects internal to the Preprocessor. 172 llvm::BumpPtrAllocator BP; 173 174 /// Identifiers for builtin macros and other builtins. 175 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 176 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 177 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 178 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 179 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 180 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 181 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 182 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 183 IdentifierInfo *Ident__identifier; // __identifier 184 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 185 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 186 IdentifierInfo *Ident__has_feature; // __has_feature 187 IdentifierInfo *Ident__has_extension; // __has_extension 188 IdentifierInfo *Ident__has_builtin; // __has_builtin 189 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin 190 IdentifierInfo *Ident__has_attribute; // __has_attribute 191 IdentifierInfo *Ident__has_embed; // __has_embed 192 IdentifierInfo *Ident__has_include; // __has_include 193 IdentifierInfo *Ident__has_include_next; // __has_include_next 194 IdentifierInfo *Ident__has_warning; // __has_warning 195 IdentifierInfo *Ident__is_identifier; // __is_identifier 196 IdentifierInfo *Ident__building_module; // __building_module 197 IdentifierInfo *Ident__MODULE__; // __MODULE__ 198 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 199 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 200 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 201 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 202 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 203 IdentifierInfo *Ident__is_target_os; // __is_target_os 204 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 205 IdentifierInfo *Ident__is_target_variant_os; 206 IdentifierInfo *Ident__is_target_variant_environment; 207 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD 208 209 // Weak, only valid (and set) while InMacroArgs is true. 210 Token* ArgMacro; 211 212 SourceLocation DATELoc, TIMELoc; 213 214 // FEM_UnsetOnCommandLine means that an explicit evaluation method was 215 // not specified on the command line. The target is queried to set the 216 // default evaluation method. 217 LangOptions::FPEvalMethodKind CurrentFPEvalMethod = 218 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 219 220 // The most recent pragma location where the floating point evaluation 221 // method was modified. This is used to determine whether the 222 // 'pragma clang fp eval_method' was used whithin the current scope. 223 SourceLocation LastFPEvalPragmaLocation; 224 225 LangOptions::FPEvalMethodKind TUFPEvalMethod = 226 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 227 228 // Next __COUNTER__ value, starts at 0. 229 unsigned CounterValue = 0; 230 231 enum { 232 /// Maximum depth of \#includes. 233 MaxAllowedIncludeStackDepth = 200 234 }; 235 236 // State that is set before the preprocessor begins. 237 bool KeepComments : 1; 238 bool KeepMacroComments : 1; 239 bool SuppressIncludeNotFoundError : 1; 240 241 // State that changes while the preprocessor runs: 242 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 243 244 /// Whether the preprocessor owns the header search object. 245 bool OwnsHeaderSearch : 1; 246 247 /// True if macro expansion is disabled. 248 bool DisableMacroExpansion : 1; 249 250 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 251 /// when parsing preprocessor directives. 252 bool MacroExpansionInDirectivesOverride : 1; 253 254 class ResetMacroExpansionHelper; 255 256 /// Whether we have already loaded macros from the external source. 257 mutable bool ReadMacrosFromExternalSource : 1; 258 259 /// True if pragmas are enabled. 260 bool PragmasEnabled : 1; 261 262 /// True if the current build action is a preprocessing action. 263 bool PreprocessedOutput : 1; 264 265 /// True if we are currently preprocessing a #if or #elif directive 266 bool ParsingIfOrElifDirective; 267 268 /// True if we are pre-expanding macro arguments. 269 bool InMacroArgPreExpansion; 270 271 /// Mapping/lookup information for all identifiers in 272 /// the program, including program keywords. 273 mutable IdentifierTable Identifiers; 274 275 /// This table contains all the selectors in the program. 276 /// 277 /// Unlike IdentifierTable above, this table *isn't* populated by the 278 /// preprocessor. It is declared/expanded here because its role/lifetime is 279 /// conceptually similar to the IdentifierTable. In addition, the current 280 /// control flow (in clang::ParseAST()), make it convenient to put here. 281 /// 282 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 283 /// the lifetime of the preprocessor. 284 SelectorTable Selectors; 285 286 /// Information about builtins. 287 std::unique_ptr<Builtin::Context> BuiltinInfo; 288 289 /// Tracks all of the pragmas that the client registered 290 /// with this preprocessor. 291 std::unique_ptr<PragmaNamespace> PragmaHandlers; 292 293 /// Pragma handlers of the original source is stored here during the 294 /// parsing of a model file. 295 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 296 297 /// Tracks all of the comment handlers that the client registered 298 /// with this preprocessor. 299 std::vector<CommentHandler *> CommentHandlers; 300 301 /// Empty line handler. 302 EmptylineHandler *Emptyline = nullptr; 303 304 /// True to avoid tearing down the lexer etc on EOF 305 bool IncrementalProcessing = false; 306 307 public: 308 /// The kind of translation unit we are processing. 309 const TranslationUnitKind TUKind; 310 311 /// Returns a pointer into the given file's buffer that's guaranteed 312 /// to be between tokens. The returned pointer is always before \p Start. 313 /// The maximum distance betweenthe returned pointer and \p Start is 314 /// limited by a constant value, but also an implementation detail. 315 /// If no such check point exists, \c nullptr is returned. 316 const char *getCheckPoint(FileID FID, const char *Start) const; 317 318 private: 319 /// The code-completion handler. 320 CodeCompletionHandler *CodeComplete = nullptr; 321 322 /// The file that we're performing code-completion for, if any. 323 const FileEntry *CodeCompletionFile = nullptr; 324 325 /// The offset in file for the code-completion point. 326 unsigned CodeCompletionOffset = 0; 327 328 /// The location for the code-completion point. This gets instantiated 329 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 330 SourceLocation CodeCompletionLoc; 331 332 /// The start location for the file of the code-completion point. 333 /// 334 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 335 /// for preprocessing. 336 SourceLocation CodeCompletionFileLoc; 337 338 /// The source location of the \c import contextual keyword we just 339 /// lexed, if any. 340 SourceLocation ModuleImportLoc; 341 342 /// The import path for named module that we're currently processing. 343 SmallVector<IdentifierLoc, 2> NamedModuleImportPath; 344 345 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints; 346 unsigned CheckPointCounter = 0; 347 348 /// Whether the import is an `@import` or a standard c++ modules import. 349 bool IsAtImport = false; 350 351 /// Whether the last token we lexed was an '@'. 352 bool LastTokenWasAt = false; 353 354 /// First pp-token source location in current translation unit. 355 SourceLocation FirstPPTokenLoc; 356 357 /// A preprocessor directive tracer to trace whether the preprocessing 358 /// state changed. These changes would mean most semantically observable 359 /// preprocessor state, particularly anything that is order dependent. 360 NoTrivialPPDirectiveTracer *DirTracer = nullptr; 361 362 /// A position within a C++20 import-seq. 363 class StdCXXImportSeq { 364 public: 365 enum State : int { 366 // Positive values represent a number of unclosed brackets. 367 AtTopLevel = 0, 368 AfterTopLevelTokenSeq = -1, 369 AfterExport = -2, 370 AfterImportSeq = -3, 371 }; 372 StdCXXImportSeq(State S)373 StdCXXImportSeq(State S) : S(S) {} 374 375 /// Saw any kind of open bracket. handleOpenBracket()376 void handleOpenBracket() { 377 S = static_cast<State>(std::max<int>(S, 0) + 1); 378 } 379 /// Saw any kind of close bracket other than '}'. handleCloseBracket()380 void handleCloseBracket() { 381 S = static_cast<State>(std::max<int>(S, 1) - 1); 382 } 383 /// Saw a close brace. handleCloseBrace()384 void handleCloseBrace() { 385 handleCloseBracket(); 386 if (S == AtTopLevel && !AfterHeaderName) 387 S = AfterTopLevelTokenSeq; 388 } 389 /// Saw a semicolon. handleSemi()390 void handleSemi() { 391 if (atTopLevel()) { 392 S = AfterTopLevelTokenSeq; 393 AfterHeaderName = false; 394 } 395 } 396 397 /// Saw an 'export' identifier. handleExport()398 void handleExport() { 399 if (S == AfterTopLevelTokenSeq) 400 S = AfterExport; 401 else if (S <= 0) 402 S = AtTopLevel; 403 } 404 /// Saw an 'import' identifier. handleImport()405 void handleImport() { 406 if (S == AfterTopLevelTokenSeq || S == AfterExport) 407 S = AfterImportSeq; 408 else if (S <= 0) 409 S = AtTopLevel; 410 } 411 412 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 413 /// until we reach a top-level semicolon. handleHeaderName()414 void handleHeaderName() { 415 if (S == AfterImportSeq) 416 AfterHeaderName = true; 417 handleMisc(); 418 } 419 420 /// Saw any other token. handleMisc()421 void handleMisc() { 422 if (S <= 0) 423 S = AtTopLevel; 424 } 425 atTopLevel()426 bool atTopLevel() { return S <= 0; } afterImportSeq()427 bool afterImportSeq() { return S == AfterImportSeq; } afterTopLevelSeq()428 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } 429 430 private: 431 State S; 432 /// Whether we're in the pp-import-suffix following the header-name in a 433 /// pp-import. If so, a close-brace is not sufficient to end the 434 /// top-level-token-seq of an import-seq. 435 bool AfterHeaderName = false; 436 }; 437 438 /// Our current position within a C++20 import-seq. 439 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq; 440 441 /// Track whether we are in a Global Module Fragment 442 class TrackGMF { 443 public: 444 enum GMFState : int { 445 GMFActive = 1, 446 MaybeGMF = 0, 447 BeforeGMFIntroducer = -1, 448 GMFAbsentOrEnded = -2, 449 }; 450 TrackGMF(GMFState S)451 TrackGMF(GMFState S) : S(S) {} 452 453 /// Saw a semicolon. handleSemi()454 void handleSemi() { 455 // If it is immediately after the first instance of the module keyword, 456 // then that introduces the GMF. 457 if (S == MaybeGMF) 458 S = GMFActive; 459 } 460 461 /// Saw an 'export' identifier. handleExport()462 void handleExport() { 463 // The presence of an 'export' keyword always ends or excludes a GMF. 464 S = GMFAbsentOrEnded; 465 } 466 467 /// Saw an 'import' identifier. handleImport(bool AfterTopLevelTokenSeq)468 void handleImport(bool AfterTopLevelTokenSeq) { 469 // If we see this before any 'module' kw, then we have no GMF. 470 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 471 S = GMFAbsentOrEnded; 472 } 473 474 /// Saw a 'module' identifier. handleModule(bool AfterTopLevelTokenSeq)475 void handleModule(bool AfterTopLevelTokenSeq) { 476 // This was the first module identifier and not preceded by any token 477 // that would exclude a GMF. It could begin a GMF, but only if directly 478 // followed by a semicolon. 479 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 480 S = MaybeGMF; 481 else 482 S = GMFAbsentOrEnded; 483 } 484 485 /// Saw any other token. handleMisc()486 void handleMisc() { 487 // We saw something other than ; after the 'module' kw, so not a GMF. 488 if (S == MaybeGMF) 489 S = GMFAbsentOrEnded; 490 } 491 inGMF()492 bool inGMF() { return S == GMFActive; } 493 494 private: 495 /// Track the transitions into and out of a Global Module Fragment, 496 /// if one is present. 497 GMFState S; 498 }; 499 500 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; 501 502 /// Track the status of the c++20 module decl. 503 /// 504 /// module-declaration: 505 /// 'export'[opt] 'module' module-name module-partition[opt] 506 /// attribute-specifier-seq[opt] ';' 507 /// 508 /// module-name: 509 /// module-name-qualifier[opt] identifier 510 /// 511 /// module-partition: 512 /// ':' module-name-qualifier[opt] identifier 513 /// 514 /// module-name-qualifier: 515 /// identifier '.' 516 /// module-name-qualifier identifier '.' 517 /// 518 /// Transition state: 519 /// 520 /// NotAModuleDecl --- export ---> FoundExport 521 /// NotAModuleDecl --- module ---> ImplementationCandidate 522 /// FoundExport --- module ---> InterfaceCandidate 523 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate 524 /// ImplementationCandidate --- period ---> ImplementationCandidate 525 /// ImplementationCandidate --- colon ---> ImplementationCandidate 526 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate 527 /// InterfaceCandidate --- period ---> InterfaceCandidate 528 /// InterfaceCandidate --- colon ---> InterfaceCandidate 529 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation 530 /// NamedModuleInterface --- Semi ---> NamedModuleInterface 531 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation 532 /// NamedModuleInterface --- Anything ---> NamedModuleInterface 533 /// 534 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad 535 /// soon since we don't support any module attributes yet. 536 class ModuleDeclSeq { 537 enum ModuleDeclState : int { 538 NotAModuleDecl, 539 FoundExport, 540 InterfaceCandidate, 541 ImplementationCandidate, 542 NamedModuleInterface, 543 NamedModuleImplementation, 544 }; 545 546 public: 547 ModuleDeclSeq() = default; 548 handleExport()549 void handleExport() { 550 if (State == NotAModuleDecl) 551 State = FoundExport; 552 else if (!isNamedModule()) 553 reset(); 554 } 555 handleModule()556 void handleModule() { 557 if (State == FoundExport) 558 State = InterfaceCandidate; 559 else if (State == NotAModuleDecl) 560 State = ImplementationCandidate; 561 else if (!isNamedModule()) 562 reset(); 563 } 564 handleIdentifier(IdentifierInfo * Identifier)565 void handleIdentifier(IdentifierInfo *Identifier) { 566 if (isModuleCandidate() && Identifier) 567 Name += Identifier->getName().str(); 568 else if (!isNamedModule()) 569 reset(); 570 } 571 handleColon()572 void handleColon() { 573 if (isModuleCandidate()) 574 Name += ":"; 575 else if (!isNamedModule()) 576 reset(); 577 } 578 handlePeriod()579 void handlePeriod() { 580 if (isModuleCandidate()) 581 Name += "."; 582 else if (!isNamedModule()) 583 reset(); 584 } 585 handleSemi()586 void handleSemi() { 587 if (!Name.empty() && isModuleCandidate()) { 588 if (State == InterfaceCandidate) 589 State = NamedModuleInterface; 590 else if (State == ImplementationCandidate) 591 State = NamedModuleImplementation; 592 else 593 llvm_unreachable("Unimaged ModuleDeclState."); 594 } else if (!isNamedModule()) 595 reset(); 596 } 597 handleMisc()598 void handleMisc() { 599 if (!isNamedModule()) 600 reset(); 601 } 602 isModuleCandidate()603 bool isModuleCandidate() const { 604 return State == InterfaceCandidate || State == ImplementationCandidate; 605 } 606 isNamedModule()607 bool isNamedModule() const { 608 return State == NamedModuleInterface || 609 State == NamedModuleImplementation; 610 } 611 isNamedInterface()612 bool isNamedInterface() const { return State == NamedModuleInterface; } 613 isImplementationUnit()614 bool isImplementationUnit() const { 615 return State == NamedModuleImplementation && !getName().contains(':'); 616 } 617 isNotAModuleDecl()618 bool isNotAModuleDecl() const { return State == NotAModuleDecl; } 619 getName()620 StringRef getName() const { 621 assert(isNamedModule() && "Can't get name from a non named module"); 622 return Name; 623 } 624 getPrimaryName()625 StringRef getPrimaryName() const { 626 assert(isNamedModule() && "Can't get name from a non named module"); 627 return getName().split(':').first; 628 } 629 reset()630 void reset() { 631 Name.clear(); 632 State = NotAModuleDecl; 633 } 634 635 private: 636 ModuleDeclState State = NotAModuleDecl; 637 std::string Name; 638 }; 639 640 ModuleDeclSeq ModuleDeclState; 641 642 /// Whether the module import expects an identifier next. Otherwise, 643 /// it expects a '.' or ';'. 644 bool ModuleImportExpectsIdentifier = false; 645 646 /// The identifier and source location of the currently-active 647 /// \#pragma clang arc_cf_code_audited begin. 648 IdentifierLoc PragmaARCCFCodeAuditedInfo; 649 650 /// The source location of the currently-active 651 /// \#pragma clang assume_nonnull begin. 652 SourceLocation PragmaAssumeNonNullLoc; 653 654 /// Set only for preambles which end with an active 655 /// \#pragma clang assume_nonnull begin. 656 /// 657 /// When the preamble is loaded into the main file, 658 /// `PragmaAssumeNonNullLoc` will be set to this to 659 /// replay the unterminated assume_nonnull. 660 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc; 661 662 /// True if we hit the code-completion point. 663 bool CodeCompletionReached = false; 664 665 /// The code completion token containing the information 666 /// on the stem that is to be code completed. 667 IdentifierInfo *CodeCompletionII = nullptr; 668 669 /// Range for the code completion token. 670 SourceRange CodeCompletionTokenRange; 671 672 /// The directory that the main file should be considered to occupy, 673 /// if it does not correspond to a real file (as happens when building a 674 /// module). 675 OptionalDirectoryEntryRef MainFileDir; 676 677 /// The number of bytes that we will initially skip when entering the 678 /// main file, along with a flag that indicates whether skipping this number 679 /// of bytes will place the lexer at the start of a line. 680 /// 681 /// This is used when loading a precompiled preamble. 682 std::pair<int, bool> SkipMainFilePreamble; 683 684 /// Whether we hit an error due to reaching max allowed include depth. Allows 685 /// to avoid hitting the same error over and over again. 686 bool HasReachedMaxIncludeDepth = false; 687 688 /// The number of currently-active calls to Lex. 689 /// 690 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 691 /// require asking for multiple additional tokens. This counter makes it 692 /// possible for Lex to detect whether it's producing a token for the end 693 /// of phase 4 of translation or for some other situation. 694 unsigned LexLevel = 0; 695 696 /// The number of (LexLevel 0) preprocessor tokens. 697 unsigned TokenCount = 0; 698 699 /// Preprocess every token regardless of LexLevel. 700 bool PreprocessToken = false; 701 702 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 703 /// warning, or zero for unlimited. 704 unsigned MaxTokens = 0; 705 SourceLocation MaxTokensOverrideLoc; 706 707 public: 708 struct PreambleSkipInfo { 709 SourceLocation HashTokenLoc; 710 SourceLocation IfTokenLoc; 711 bool FoundNonSkipPortion; 712 bool FoundElse; 713 SourceLocation ElseLoc; 714 PreambleSkipInfoPreambleSkipInfo715 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 716 bool FoundNonSkipPortion, bool FoundElse, 717 SourceLocation ElseLoc) 718 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 719 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 720 ElseLoc(ElseLoc) {} 721 }; 722 723 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>; 724 725 private: 726 friend class ASTReader; 727 friend class MacroArgs; 728 729 class PreambleConditionalStackStore { 730 enum State { 731 Off = 0, 732 Recording = 1, 733 Replaying = 2, 734 }; 735 736 public: 737 PreambleConditionalStackStore() = default; 738 startRecording()739 void startRecording() { ConditionalStackState = Recording; } startReplaying()740 void startReplaying() { ConditionalStackState = Replaying; } isRecording()741 bool isRecording() const { return ConditionalStackState == Recording; } isReplaying()742 bool isReplaying() const { return ConditionalStackState == Replaying; } 743 getStack()744 ArrayRef<PPConditionalInfo> getStack() const { 745 return ConditionalStack; 746 } 747 doneReplaying()748 void doneReplaying() { 749 ConditionalStack.clear(); 750 ConditionalStackState = Off; 751 } 752 setStack(ArrayRef<PPConditionalInfo> s)753 void setStack(ArrayRef<PPConditionalInfo> s) { 754 if (!isRecording() && !isReplaying()) 755 return; 756 ConditionalStack.clear(); 757 ConditionalStack.append(s.begin(), s.end()); 758 } 759 hasRecordedPreamble()760 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 761 reachedEOFWhileSkipping()762 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); } 763 clearSkipInfo()764 void clearSkipInfo() { SkipInfo.reset(); } 765 766 std::optional<PreambleSkipInfo> SkipInfo; 767 768 private: 769 SmallVector<PPConditionalInfo, 4> ConditionalStack; 770 State ConditionalStackState = Off; 771 } PreambleConditionalStack; 772 773 /// The current top of the stack that we're lexing from if 774 /// not expanding a macro and we are lexing directly from source code. 775 /// 776 /// Only one of CurLexer, or CurTokenLexer will be non-null. 777 std::unique_ptr<Lexer> CurLexer; 778 779 /// The current top of the stack that we're lexing from 780 /// if not expanding a macro. 781 /// 782 /// This is an alias for CurLexer. 783 PreprocessorLexer *CurPPLexer = nullptr; 784 785 /// Used to find the current FileEntry, if CurLexer is non-null 786 /// and if applicable. 787 /// 788 /// This allows us to implement \#include_next and find directory-specific 789 /// properties. 790 ConstSearchDirIterator CurDirLookup = nullptr; 791 792 /// The current macro we are expanding, if we are expanding a macro. 793 /// 794 /// One of CurLexer and CurTokenLexer must be null. 795 std::unique_ptr<TokenLexer> CurTokenLexer; 796 797 /// The kind of lexer we're currently working with. 798 typedef bool (*LexerCallback)(Preprocessor &, Token &); 799 LexerCallback CurLexerCallback = &CLK_Lexer; 800 801 /// If the current lexer is for a submodule that is being built, this 802 /// is that submodule. 803 Module *CurLexerSubmodule = nullptr; 804 805 /// Keeps track of the stack of files currently 806 /// \#included, and macros currently being expanded from, not counting 807 /// CurLexer/CurTokenLexer. 808 struct IncludeStackInfo { 809 LexerCallback CurLexerCallback; 810 Module *TheSubmodule; 811 std::unique_ptr<Lexer> TheLexer; 812 PreprocessorLexer *ThePPLexer; 813 std::unique_ptr<TokenLexer> TheTokenLexer; 814 ConstSearchDirIterator TheDirLookup; 815 816 // The following constructors are completely useless copies of the default 817 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo818 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule, 819 std::unique_ptr<Lexer> &&TheLexer, 820 PreprocessorLexer *ThePPLexer, 821 std::unique_ptr<TokenLexer> &&TheTokenLexer, 822 ConstSearchDirIterator TheDirLookup) 823 : CurLexerCallback(std::move(CurLexerCallback)), 824 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 825 ThePPLexer(std::move(ThePPLexer)), 826 TheTokenLexer(std::move(TheTokenLexer)), 827 TheDirLookup(std::move(TheDirLookup)) {} 828 }; 829 std::vector<IncludeStackInfo> IncludeMacroStack; 830 831 /// Actions invoked when some preprocessor activity is 832 /// encountered (e.g. a file is \#included, etc). 833 std::unique_ptr<PPCallbacks> Callbacks; 834 835 struct MacroExpandsInfo { 836 Token Tok; 837 MacroDefinition MD; 838 SourceRange Range; 839 MacroExpandsInfoMacroExpandsInfo840 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 841 : Tok(Tok), MD(MD), Range(Range) {} 842 }; 843 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 844 845 /// Information about a name that has been used to define a module macro. 846 struct ModuleMacroInfo { 847 /// The most recent macro directive for this identifier. 848 MacroDirective *MD; 849 850 /// The active module macros for this identifier. 851 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 852 853 /// The generation number at which we last updated ActiveModuleMacros. 854 /// \see Preprocessor::VisibleModules. 855 unsigned ActiveModuleMacrosGeneration = 0; 856 857 /// Whether this macro name is ambiguous. 858 bool IsAmbiguous = false; 859 860 /// The module macros that are overridden by this macro. 861 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 862 ModuleMacroInfoModuleMacroInfo863 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 864 }; 865 866 /// The state of a macro for an identifier. 867 class MacroState { 868 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 869 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)870 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 871 const IdentifierInfo *II) const { 872 if (II->isOutOfDate()) 873 PP.updateOutOfDateIdentifier(*II); 874 // FIXME: Find a spare bit on IdentifierInfo and store a 875 // HasModuleMacros flag. 876 if (!II->hasMacroDefinition() || 877 (!PP.getLangOpts().Modules && 878 !PP.getLangOpts().ModulesLocalVisibility) || 879 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 880 return nullptr; 881 882 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State); 883 if (!Info) { 884 Info = new (PP.getPreprocessorAllocator()) 885 ModuleMacroInfo(cast<MacroDirective *>(State)); 886 State = Info; 887 } 888 889 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 890 Info->ActiveModuleMacrosGeneration) 891 PP.updateModuleMacroInfo(II, *Info); 892 return Info; 893 } 894 895 public: MacroState()896 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)897 MacroState(MacroDirective *MD) : State(MD) {} 898 MacroState(MacroState && O)899 MacroState(MacroState &&O) noexcept : State(O.State) { 900 O.State = (MacroDirective *)nullptr; 901 } 902 903 MacroState &operator=(MacroState &&O) noexcept { 904 auto S = O.State; 905 O.State = (MacroDirective *)nullptr; 906 State = S; 907 return *this; 908 } 909 ~MacroState()910 ~MacroState() { 911 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 912 Info->~ModuleMacroInfo(); 913 } 914 getLatest()915 MacroDirective *getLatest() const { 916 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 917 return Info->MD; 918 return cast<MacroDirective *>(State); 919 } 920 setLatest(MacroDirective * MD)921 void setLatest(MacroDirective *MD) { 922 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 923 Info->MD = MD; 924 else 925 State = MD; 926 } 927 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)928 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 929 auto *Info = getModuleInfo(PP, II); 930 return Info ? Info->IsAmbiguous : false; 931 } 932 933 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)934 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 935 if (auto *Info = getModuleInfo(PP, II)) 936 return Info->ActiveModuleMacros; 937 return {}; 938 } 939 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)940 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 941 SourceManager &SourceMgr) const { 942 // FIXME: Incorporate module macros into the result of this. 943 if (auto *Latest = getLatest()) 944 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 945 return {}; 946 } 947 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)948 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 949 if (auto *Info = getModuleInfo(PP, II)) { 950 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 951 Info->ActiveModuleMacros.begin(), 952 Info->ActiveModuleMacros.end()); 953 Info->ActiveModuleMacros.clear(); 954 Info->IsAmbiguous = false; 955 } 956 } 957 getOverriddenMacros()958 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 959 if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State)) 960 return Info->OverriddenMacros; 961 return {}; 962 } 963 setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)964 void setOverriddenMacros(Preprocessor &PP, 965 ArrayRef<ModuleMacro *> Overrides) { 966 auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State); 967 if (!Info) { 968 if (Overrides.empty()) 969 return; 970 Info = new (PP.getPreprocessorAllocator()) 971 ModuleMacroInfo(cast<MacroDirective *>(State)); 972 State = Info; 973 } 974 Info->OverriddenMacros.clear(); 975 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 976 Overrides.begin(), Overrides.end()); 977 Info->ActiveModuleMacrosGeneration = 0; 978 } 979 }; 980 981 /// For each IdentifierInfo that was associated with a macro, we 982 /// keep a mapping to the history of all macro definitions and #undefs in 983 /// the reverse order (the latest one is in the head of the list). 984 /// 985 /// This mapping lives within the \p CurSubmoduleState. 986 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 987 988 struct SubmoduleState; 989 990 /// Information about a submodule that we're currently building. 991 struct BuildingSubmoduleInfo { 992 /// The module that we are building. 993 Module *M; 994 995 /// The location at which the module was included. 996 SourceLocation ImportLoc; 997 998 /// Whether we entered this submodule via a pragma. 999 bool IsPragma; 1000 1001 /// The previous SubmoduleState. 1002 SubmoduleState *OuterSubmoduleState; 1003 1004 /// The number of pending module macro names when we started building this. 1005 unsigned OuterPendingModuleMacroNames; 1006 BuildingSubmoduleInfoBuildingSubmoduleInfo1007 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 1008 SubmoduleState *OuterSubmoduleState, 1009 unsigned OuterPendingModuleMacroNames) 1010 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 1011 OuterSubmoduleState(OuterSubmoduleState), 1012 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 1013 }; 1014 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 1015 1016 /// Information about a submodule's preprocessor state. 1017 struct SubmoduleState { 1018 /// The macros for the submodule. 1019 MacroMap Macros; 1020 1021 /// The set of modules that are visible within the submodule. 1022 VisibleModuleSet VisibleModules; 1023 1024 // FIXME: CounterValue? 1025 // FIXME: PragmaPushMacroInfo? 1026 }; 1027 std::map<Module *, SubmoduleState> Submodules; 1028 1029 /// The preprocessor state for preprocessing outside of any submodule. 1030 SubmoduleState NullSubmoduleState; 1031 1032 /// The current submodule state. Will be \p NullSubmoduleState if we're not 1033 /// in a submodule. 1034 SubmoduleState *CurSubmoduleState; 1035 1036 /// The files that have been included. 1037 IncludedFilesSet IncludedFiles; 1038 1039 /// The set of top-level modules that affected preprocessing, but were not 1040 /// imported. 1041 llvm::SmallSetVector<Module *, 2> AffectingClangModules; 1042 1043 /// The set of known macros exported from modules. 1044 llvm::FoldingSet<ModuleMacro> ModuleMacros; 1045 1046 /// The names of potential module macros that we've not yet processed. 1047 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames; 1048 1049 /// The list of module macros, for each identifier, that are not overridden by 1050 /// any other module macro. 1051 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 1052 LeafModuleMacros; 1053 1054 /// Macros that we want to warn because they are not used at the end 1055 /// of the translation unit. 1056 /// 1057 /// We store just their SourceLocations instead of 1058 /// something like MacroInfo*. The benefit of this is that when we are 1059 /// deserializing from PCH, we don't need to deserialize identifier & macros 1060 /// just so that we can report that they are unused, we just warn using 1061 /// the SourceLocations of this set (that will be filled by the ASTReader). 1062 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>; 1063 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 1064 1065 /// This is a pair of an optional message and source location used for pragmas 1066 /// that annotate macros like pragma clang restrict_expansion and pragma clang 1067 /// deprecated. This pair stores the optional message and the location of the 1068 /// annotation pragma for use producing diagnostics and notes. 1069 using MsgLocationPair = std::pair<std::string, SourceLocation>; 1070 1071 struct MacroAnnotationInfo { 1072 SourceLocation Location; 1073 std::string Message; 1074 }; 1075 1076 struct MacroAnnotations { 1077 std::optional<MacroAnnotationInfo> DeprecationInfo; 1078 std::optional<MacroAnnotationInfo> RestrictExpansionInfo; 1079 std::optional<SourceLocation> FinalAnnotationLoc; 1080 }; 1081 1082 /// Warning information for macro annotations. 1083 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos; 1084 1085 /// A "freelist" of MacroArg objects that can be 1086 /// reused for quick allocation. 1087 MacroArgs *MacroArgCache = nullptr; 1088 1089 /// For each IdentifierInfo used in a \#pragma push_macro directive, 1090 /// we keep a MacroInfo stack used to restore the previous macro value. 1091 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 1092 PragmaPushMacroInfo; 1093 1094 // Various statistics we track for performance analysis. 1095 unsigned NumDirectives = 0; 1096 unsigned NumDefined = 0; 1097 unsigned NumUndefined = 0; 1098 unsigned NumPragma = 0; 1099 unsigned NumIf = 0; 1100 unsigned NumElse = 0; 1101 unsigned NumEndif = 0; 1102 unsigned NumEnteredSourceFiles = 0; 1103 unsigned MaxIncludeStackDepth = 0; 1104 unsigned NumMacroExpanded = 0; 1105 unsigned NumFnMacroExpanded = 0; 1106 unsigned NumBuiltinMacroExpanded = 0; 1107 unsigned NumFastMacroExpanded = 0; 1108 unsigned NumTokenPaste = 0; 1109 unsigned NumFastTokenPaste = 0; 1110 unsigned NumSkipped = 0; 1111 1112 /// The predefined macros that preprocessor should use from the 1113 /// command line etc. 1114 std::string Predefines; 1115 1116 /// The file ID for the preprocessor predefines. 1117 FileID PredefinesFileID; 1118 1119 /// The file ID for the PCH through header. 1120 FileID PCHThroughHeaderFileID; 1121 1122 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 1123 bool SkippingUntilPragmaHdrStop = false; 1124 1125 /// Whether tokens are being skipped until the through header is seen. 1126 bool SkippingUntilPCHThroughHeader = false; 1127 1128 /// \{ 1129 /// Cache of macro expanders to reduce malloc traffic. 1130 enum { TokenLexerCacheSize = 8 }; 1131 unsigned NumCachedTokenLexers; 1132 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 1133 /// \} 1134 1135 /// Keeps macro expanded tokens for TokenLexers. 1136 // 1137 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1138 /// going to lex in the cache and when it finishes the tokens are removed 1139 /// from the end of the cache. 1140 SmallVector<Token, 16> MacroExpandedTokens; 1141 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 1142 1143 /// A record of the macro definitions and expansions that 1144 /// occurred during preprocessing. 1145 /// 1146 /// This is an optional side structure that can be enabled with 1147 /// \c createPreprocessingRecord() prior to preprocessing. 1148 PreprocessingRecord *Record = nullptr; 1149 1150 /// Cached tokens state. 1151 using CachedTokensTy = SmallVector<Token, 1>; 1152 1153 /// Cached tokens are stored here when we do backtracking or 1154 /// lookahead. They are "lexed" by the CachingLex() method. 1155 CachedTokensTy CachedTokens; 1156 1157 /// The position of the cached token that CachingLex() should 1158 /// "lex" next. 1159 /// 1160 /// If it points beyond the CachedTokens vector, it means that a normal 1161 /// Lex() should be invoked. 1162 CachedTokensTy::size_type CachedLexPos = 0; 1163 1164 /// Stack of backtrack positions, allowing nested backtracks. 1165 /// 1166 /// The EnableBacktrackAtThisPos() method pushes a position to 1167 /// indicate where CachedLexPos should be set when the BackTrack() method is 1168 /// invoked (at which point the last position is popped). 1169 std::vector<CachedTokensTy::size_type> BacktrackPositions; 1170 1171 /// Stack of cached tokens/initial number of cached tokens pairs, allowing 1172 /// nested unannotated backtracks. 1173 std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>> 1174 UnannotatedBacktrackTokens; 1175 1176 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. 1177 /// This is used to guard against calling this function recursively. 1178 /// 1179 /// See comments at the use-site for more context about why it is needed. 1180 bool SkippingExcludedConditionalBlock = false; 1181 1182 /// Keeps track of skipped range mappings that were recorded while skipping 1183 /// excluded conditional directives. It maps the source buffer pointer at 1184 /// the beginning of a skipped block, to the number of bytes that should be 1185 /// skipped. 1186 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges; 1187 1188 void updateOutOfDateIdentifier(const IdentifierInfo &II) const; 1189 1190 public: 1191 Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, 1192 const LangOptions &LangOpts, SourceManager &SM, 1193 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 1194 IdentifierInfoLookup *IILookup = nullptr, 1195 bool OwnsHeaderSearch = false, 1196 TranslationUnitKind TUKind = TU_Complete); 1197 1198 ~Preprocessor(); 1199 1200 /// Initialize the preprocessor using information about the target. 1201 /// 1202 /// \param Target is owned by the caller and must remain valid for the 1203 /// lifetime of the preprocessor. 1204 /// \param AuxTarget is owned by the caller and must remain valid for 1205 /// the lifetime of the preprocessor. 1206 void Initialize(const TargetInfo &Target, 1207 const TargetInfo *AuxTarget = nullptr); 1208 1209 /// Initialize the preprocessor to parse a model file 1210 /// 1211 /// To parse model files the preprocessor of the original source is reused to 1212 /// preserver the identifier table. However to avoid some duplicate 1213 /// information in the preprocessor some cleanup is needed before it is used 1214 /// to parse model files. This method does that cleanup. 1215 void InitializeForModelFile(); 1216 1217 /// Cleanup after model file parsing 1218 void FinalizeForModelFile(); 1219 1220 /// Retrieve the preprocessor options used to initialize this preprocessor. getPreprocessorOpts()1221 const PreprocessorOptions &getPreprocessorOpts() const { return PPOpts; } 1222 getDiagnostics()1223 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)1224 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 1225 getLangOpts()1226 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()1227 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()1228 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()1229 FileManager &getFileManager() const { return FileMgr; } getSourceManager()1230 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()1231 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 1232 getIdentifierTable()1233 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()1234 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()1235 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()1236 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } getPreprocessorAllocator()1237 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 1238 setExternalSource(ExternalPreprocessorSource * Source)1239 void setExternalSource(ExternalPreprocessorSource *Source) { 1240 ExternalSource = Source; 1241 } 1242 getExternalSource()1243 ExternalPreprocessorSource *getExternalSource() const { 1244 return ExternalSource; 1245 } 1246 1247 /// Retrieve the module loader associated with this preprocessor. getModuleLoader()1248 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 1249 hadModuleLoaderFatalFailure()1250 bool hadModuleLoaderFatalFailure() const { 1251 return TheModuleLoader.HadFatalFailure; 1252 } 1253 1254 /// Retrieve the number of Directives that have been processed by the 1255 /// Preprocessor. getNumDirectives()1256 unsigned getNumDirectives() const { 1257 return NumDirectives; 1258 } 1259 1260 /// True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()1261 bool isParsingIfOrElifDirective() const { 1262 return ParsingIfOrElifDirective; 1263 } 1264 1265 /// Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)1266 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 1267 this->KeepComments = KeepComments | KeepMacroComments; 1268 this->KeepMacroComments = KeepMacroComments; 1269 } 1270 getCommentRetentionState()1271 bool getCommentRetentionState() const { return KeepComments; } 1272 setPragmasEnabled(bool Enabled)1273 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()1274 bool getPragmasEnabled() const { return PragmasEnabled; } 1275 SetSuppressIncludeNotFoundError(bool Suppress)1276 void SetSuppressIncludeNotFoundError(bool Suppress) { 1277 SuppressIncludeNotFoundError = Suppress; 1278 } 1279 GetSuppressIncludeNotFoundError()1280 bool GetSuppressIncludeNotFoundError() { 1281 return SuppressIncludeNotFoundError; 1282 } 1283 1284 /// Sets whether the preprocessor is responsible for producing output or if 1285 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)1286 void setPreprocessedOutput(bool IsPreprocessedOutput) { 1287 PreprocessedOutput = IsPreprocessedOutput; 1288 } 1289 1290 /// Returns true if the preprocessor is responsible for generating output, 1291 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()1292 bool isPreprocessedOutput() const { return PreprocessedOutput; } 1293 1294 /// Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)1295 bool isCurrentLexer(const PreprocessorLexer *L) const { 1296 return CurPPLexer == L; 1297 } 1298 1299 /// Return the current lexer being lexed from. 1300 /// 1301 /// Note that this ignores any potentially active macro expansions and _Pragma 1302 /// expansions going on at the time. getCurrentLexer()1303 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1304 1305 /// Return the current file lexer being lexed from. 1306 /// 1307 /// Note that this ignores any potentially active macro expansions and _Pragma 1308 /// expansions going on at the time. 1309 PreprocessorLexer *getCurrentFileLexer() const; 1310 1311 /// Return the submodule owning the file being lexed. This may not be 1312 /// the current module if we have changed modules since entering the file. getCurrentLexerSubmodule()1313 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1314 1315 /// Returns the FileID for the preprocessor predefines. getPredefinesFileID()1316 FileID getPredefinesFileID() const { return PredefinesFileID; } 1317 1318 /// \{ 1319 /// Accessors for preprocessor callbacks. 1320 /// 1321 /// Note that this class takes ownership of any PPCallbacks object given to 1322 /// it. getPPCallbacks()1323 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)1324 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1325 if (Callbacks) 1326 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1327 std::move(Callbacks)); 1328 Callbacks = std::move(C); 1329 } 1330 /// \} 1331 1332 /// Get the number of tokens processed so far. getTokenCount()1333 unsigned getTokenCount() const { return TokenCount; } 1334 1335 /// Get the max number of tokens before issuing a -Wmax-tokens warning. getMaxTokens()1336 unsigned getMaxTokens() const { return MaxTokens; } 1337 overrideMaxTokens(unsigned Value,SourceLocation Loc)1338 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1339 MaxTokens = Value; 1340 MaxTokensOverrideLoc = Loc; 1341 }; 1342 getMaxTokensOverrideLoc()1343 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1344 1345 /// Register a function that would be called on each token in the final 1346 /// expanded token stream. 1347 /// This also reports annotation tokens produced by the parser. setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1348 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1349 OnToken = std::move(F); 1350 } 1351 setDependencyDirectivesGetter(DependencyDirectivesGetter & Get)1352 void setDependencyDirectivesGetter(DependencyDirectivesGetter &Get) { 1353 GetDependencyDirectives = &Get; 1354 } 1355 setPreprocessToken(bool Preprocess)1356 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1357 isMacroDefined(StringRef Id)1358 bool isMacroDefined(StringRef Id) { 1359 return isMacroDefined(&Identifiers.get(Id)); 1360 } isMacroDefined(const IdentifierInfo * II)1361 bool isMacroDefined(const IdentifierInfo *II) { 1362 return II->hasMacroDefinition() && 1363 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1364 } 1365 1366 /// Determine whether II is defined as a macro within the module M, 1367 /// if that is a module that we've already preprocessed. Does not check for 1368 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1369 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1370 if (!II->hasMacroDefinition()) 1371 return false; 1372 auto I = Submodules.find(M); 1373 if (I == Submodules.end()) 1374 return false; 1375 auto J = I->second.Macros.find(II); 1376 if (J == I->second.Macros.end()) 1377 return false; 1378 auto *MD = J->second.getLatest(); 1379 return MD && MD->isDefined(); 1380 } 1381 getMacroDefinition(const IdentifierInfo * II)1382 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1383 if (!II->hasMacroDefinition()) 1384 return {}; 1385 1386 MacroState &S = CurSubmoduleState->Macros[II]; 1387 auto *MD = S.getLatest(); 1388 while (isa_and_nonnull<VisibilityMacroDirective>(MD)) 1389 MD = MD->getPrevious(); 1390 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1391 S.getActiveModuleMacros(*this, II), 1392 S.isAmbiguous(*this, II)); 1393 } 1394 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1395 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1396 SourceLocation Loc) { 1397 if (!II->hadMacroDefinition()) 1398 return {}; 1399 1400 MacroState &S = CurSubmoduleState->Macros[II]; 1401 MacroDirective::DefInfo DI; 1402 if (auto *MD = S.getLatest()) 1403 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1404 // FIXME: Compute the set of active module macros at the specified location. 1405 return MacroDefinition(DI.getDirective(), 1406 S.getActiveModuleMacros(*this, II), 1407 S.isAmbiguous(*this, II)); 1408 } 1409 1410 /// Given an identifier, return its latest non-imported MacroDirective 1411 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)1412 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1413 if (!II->hasMacroDefinition()) 1414 return nullptr; 1415 1416 auto *MD = getLocalMacroDirectiveHistory(II); 1417 if (!MD || MD->getDefinition().isUndefined()) 1418 return nullptr; 1419 1420 return MD; 1421 } 1422 getMacroInfo(const IdentifierInfo * II)1423 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1424 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1425 } 1426 getMacroInfo(const IdentifierInfo * II)1427 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1428 if (!II->hasMacroDefinition()) 1429 return nullptr; 1430 if (auto MD = getMacroDefinition(II)) 1431 return MD.getMacroInfo(); 1432 return nullptr; 1433 } 1434 1435 /// Given an identifier, return the latest non-imported macro 1436 /// directive for that identifier. 1437 /// 1438 /// One can iterate over all previous macro directives from the most recent 1439 /// one. 1440 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1441 1442 /// Add a directive to the macro directive history for this identifier. 1443 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1444 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1445 SourceLocation Loc) { 1446 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1447 appendMacroDirective(II, MD); 1448 return MD; 1449 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1450 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1451 MacroInfo *MI) { 1452 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1453 } 1454 1455 /// Set a MacroDirective that was loaded from a PCH file. 1456 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1457 MacroDirective *MD); 1458 1459 /// Register an exported macro for a module and identifier. 1460 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, 1461 MacroInfo *Macro, 1462 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1463 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1464 1465 /// Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)1466 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1467 if (II->isOutOfDate()) 1468 updateOutOfDateIdentifier(*II); 1469 auto I = LeafModuleMacros.find(II); 1470 if (I != LeafModuleMacros.end()) 1471 return I->second; 1472 return {}; 1473 } 1474 1475 /// Get the list of submodules that we're currently building. getBuildingSubmodules()1476 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1477 return BuildingSubmoduleStack; 1478 } 1479 1480 /// \{ 1481 /// Iterators for the macro history table. Currently defined macros have 1482 /// IdentifierInfo::hasMacroDefinition() set and an empty 1483 /// MacroInfo::getUndefLoc() at the head of the list. 1484 using macro_iterator = MacroMap::const_iterator; 1485 1486 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1487 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1488 1489 llvm::iterator_range<macro_iterator> 1490 macros(bool IncludeExternalMacros = true) const { 1491 macro_iterator begin = macro_begin(IncludeExternalMacros); 1492 macro_iterator end = macro_end(IncludeExternalMacros); 1493 return llvm::make_range(begin, end); 1494 } 1495 1496 /// \} 1497 1498 /// Mark the given clang module as affecting the current clang module or translation unit. markClangModuleAsAffecting(Module * M)1499 void markClangModuleAsAffecting(Module *M) { 1500 assert(M->isModuleMapModule()); 1501 if (!BuildingSubmoduleStack.empty()) { 1502 if (M != BuildingSubmoduleStack.back().M) 1503 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M); 1504 } else { 1505 AffectingClangModules.insert(M); 1506 } 1507 } 1508 1509 /// Get the set of top-level clang modules that affected preprocessing, but were not 1510 /// imported. getAffectingClangModules()1511 const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const { 1512 return AffectingClangModules; 1513 } 1514 1515 /// Mark the file as included. 1516 /// Returns true if this is the first time the file was included. markIncluded(FileEntryRef File)1517 bool markIncluded(FileEntryRef File) { 1518 HeaderInfo.getFileInfo(File).IsLocallyIncluded = true; 1519 return IncludedFiles.insert(File).second; 1520 } 1521 1522 /// Return true if this header has already been included. alreadyIncluded(FileEntryRef File)1523 bool alreadyIncluded(FileEntryRef File) const { 1524 HeaderInfo.getFileInfo(File); 1525 return IncludedFiles.count(File); 1526 } 1527 1528 /// Get the set of included files. getIncludedFiles()1529 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } getIncludedFiles()1530 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } 1531 1532 /// Return the name of the macro defined before \p Loc that has 1533 /// spelling \p Tokens. If there are multiple macros with same spelling, 1534 /// return the last one defined. 1535 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1536 ArrayRef<TokenValue> Tokens) const; 1537 1538 /// Get the predefines for this processor. 1539 /// Used by some third-party tools to inspect and add predefines (see 1540 /// https://github.com/llvm/llvm-project/issues/57483). getPredefines()1541 const std::string &getPredefines() const { return Predefines; } 1542 1543 /// Set the predefines for this Preprocessor. 1544 /// 1545 /// These predefines are automatically injected when parsing the main file. setPredefines(std::string P)1546 void setPredefines(std::string P) { Predefines = std::move(P); } 1547 1548 /// Return information about the specified preprocessor 1549 /// identifier token. getIdentifierInfo(StringRef Name)1550 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1551 return &Identifiers.get(Name); 1552 } 1553 1554 /// Add the specified pragma handler to this preprocessor. 1555 /// 1556 /// If \p Namespace is non-null, then it is a token required to exist on the 1557 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1558 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)1559 void AddPragmaHandler(PragmaHandler *Handler) { 1560 AddPragmaHandler(StringRef(), Handler); 1561 } 1562 1563 /// Remove the specific pragma handler from this preprocessor. 1564 /// 1565 /// If \p Namespace is non-null, then it should be the namespace that 1566 /// \p Handler was added to. It is an error to remove a handler that 1567 /// has not been registered. 1568 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)1569 void RemovePragmaHandler(PragmaHandler *Handler) { 1570 RemovePragmaHandler(StringRef(), Handler); 1571 } 1572 1573 /// Install empty handlers for all pragmas (making them ignored). 1574 void IgnorePragmas(); 1575 1576 /// Set empty line handler. setEmptylineHandler(EmptylineHandler * Handler)1577 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1578 getEmptylineHandler()1579 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1580 1581 /// Add the specified comment handler to the preprocessor. 1582 void addCommentHandler(CommentHandler *Handler); 1583 1584 /// Remove the specified comment handler. 1585 /// 1586 /// It is an error to remove a handler that has not been registered. 1587 void removeCommentHandler(CommentHandler *Handler); 1588 1589 /// Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)1590 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1591 CodeComplete = &Handler; 1592 } 1593 1594 /// Retrieve the current code-completion handler. getCodeCompletionHandler()1595 CodeCompletionHandler *getCodeCompletionHandler() const { 1596 return CodeComplete; 1597 } 1598 1599 /// Clear out the code completion handler. clearCodeCompletionHandler()1600 void clearCodeCompletionHandler() { 1601 CodeComplete = nullptr; 1602 } 1603 1604 /// Hook used by the lexer to invoke the "included file" code 1605 /// completion point. 1606 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1607 1608 /// Hook used by the lexer to invoke the "natural language" code 1609 /// completion point. 1610 void CodeCompleteNaturalLanguage(); 1611 1612 /// Set the code completion token for filtering purposes. setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1613 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1614 CodeCompletionII = Filter; 1615 } 1616 1617 /// Set the code completion token range for detecting replacement range later 1618 /// on. setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1619 void setCodeCompletionTokenRange(const SourceLocation Start, 1620 const SourceLocation End) { 1621 CodeCompletionTokenRange = {Start, End}; 1622 } getCodeCompletionTokenRange()1623 SourceRange getCodeCompletionTokenRange() const { 1624 return CodeCompletionTokenRange; 1625 } 1626 1627 /// Get the code completion token for filtering purposes. getCodeCompletionFilter()1628 StringRef getCodeCompletionFilter() { 1629 if (CodeCompletionII) 1630 return CodeCompletionII->getName(); 1631 return {}; 1632 } 1633 1634 /// Retrieve the preprocessing record, or NULL if there is no 1635 /// preprocessing record. getPreprocessingRecord()1636 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1637 1638 /// Create a new preprocessing record, which will keep track of 1639 /// all macro expansions, macro definitions, etc. 1640 void createPreprocessingRecord(); 1641 1642 /// Returns true if the FileEntry is the PCH through header. 1643 bool isPCHThroughHeader(const FileEntry *FE); 1644 1645 /// True if creating a PCH with a through header. 1646 bool creatingPCHWithThroughHeader(); 1647 1648 /// True if using a PCH with a through header. 1649 bool usingPCHWithThroughHeader(); 1650 1651 /// True if creating a PCH with a #pragma hdrstop. 1652 bool creatingPCHWithPragmaHdrStop(); 1653 1654 /// True if using a PCH with a #pragma hdrstop. 1655 bool usingPCHWithPragmaHdrStop(); 1656 1657 /// Skip tokens until after the #include of the through header or 1658 /// until after a #pragma hdrstop. 1659 void SkipTokensWhileUsingPCH(); 1660 1661 /// Process directives while skipping until the through header or 1662 /// #pragma hdrstop is found. 1663 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1664 SourceLocation HashLoc); 1665 1666 /// Enter the specified FileID as the main source file, 1667 /// which implicitly adds the builtin defines etc. 1668 void EnterMainSourceFile(); 1669 1670 /// Inform the preprocessor callbacks that processing is complete. 1671 void EndSourceFile(); 1672 1673 /// Add a source file to the top of the include stack and 1674 /// start lexing tokens from it instead of the current buffer. 1675 /// 1676 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1677 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, 1678 SourceLocation Loc, bool IsFirstIncludeOfFile = true); 1679 1680 /// Add a Macro to the top of the include stack and start lexing 1681 /// tokens from it instead of the current buffer. 1682 /// 1683 /// \param Args specifies the tokens input to a function-like macro. 1684 /// \param ILEnd specifies the location of the ')' for a function-like macro 1685 /// or the identifier for an object-like macro. 1686 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1687 MacroArgs *Args); 1688 1689 private: 1690 /// Add a "macro" context to the top of the include stack, 1691 /// which will cause the lexer to start returning the specified tokens. 1692 /// 1693 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1694 /// will not be subject to further macro expansion. Otherwise, these tokens 1695 /// will be re-macro-expanded when/if expansion is enabled. 1696 /// 1697 /// If \p OwnsTokens is false, this method assumes that the specified stream 1698 /// of tokens has a permanent owner somewhere, so they do not need to be 1699 /// copied. If it is true, it assumes the array of tokens is allocated with 1700 /// \c new[] and the Preprocessor will delete[] it. 1701 /// 1702 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1703 /// set, see the flag documentation for details. 1704 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1705 bool DisableMacroExpansion, bool OwnsTokens, 1706 bool IsReinject); 1707 1708 public: EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1709 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1710 bool DisableMacroExpansion, bool IsReinject) { 1711 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1712 IsReinject); 1713 } 1714 EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1715 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1716 bool IsReinject) { 1717 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1718 IsReinject); 1719 } 1720 1721 /// Pop the current lexer/macro exp off the top of the lexer stack. 1722 /// 1723 /// This should only be used in situations where the current state of the 1724 /// top-of-stack lexer is known. 1725 void RemoveTopOfLexerStack(); 1726 1727 /// From the point that this method is called, and until 1728 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1729 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1730 /// make the Preprocessor re-lex the same tokens. 1731 /// 1732 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1733 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1734 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1735 /// 1736 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1737 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1738 /// tokens will continue indefinitely. 1739 /// 1740 /// \param Unannotated Whether token annotations are reverted upon calling 1741 /// Backtrack(). 1742 void EnableBacktrackAtThisPos(bool Unannotated = false); 1743 1744 private: 1745 std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos(); 1746 1747 CachedTokensTy PopUnannotatedBacktrackTokens(); 1748 1749 public: 1750 /// Disable the last EnableBacktrackAtThisPos call. 1751 void CommitBacktrackedTokens(); 1752 1753 /// Make Preprocessor re-lex the tokens that were lexed since 1754 /// EnableBacktrackAtThisPos() was previously called. 1755 void Backtrack(); 1756 1757 /// True if EnableBacktrackAtThisPos() was called and 1758 /// caching of tokens is on. isBacktrackEnabled()1759 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1760 1761 /// True if EnableBacktrackAtThisPos() was called and 1762 /// caching of unannotated tokens is on. isUnannotatedBacktrackEnabled()1763 bool isUnannotatedBacktrackEnabled() const { 1764 return !UnannotatedBacktrackTokens.empty(); 1765 } 1766 1767 /// Lex the next token for this preprocessor. 1768 void Lex(Token &Result); 1769 1770 /// Lex all tokens for this preprocessor until (and excluding) end of file. 1771 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr); 1772 1773 /// Lex a token, forming a header-name token if possible. 1774 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1775 1776 /// Lex the parameters for an #embed directive, returns nullopt on error. 1777 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, 1778 bool ForHasEmbed); 1779 1780 /// Get the start location of the first pp-token in main file. getMainFileFirstPPTokenLoc()1781 SourceLocation getMainFileFirstPPTokenLoc() const { 1782 assert(FirstPPTokenLoc.isValid() && 1783 "Did not see the first pp-token in the main file"); 1784 return FirstPPTokenLoc; 1785 } 1786 1787 bool LexAfterModuleImport(Token &Result); 1788 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1789 1790 void makeModuleVisible(Module *M, SourceLocation Loc, 1791 bool IncludeExports = true); 1792 getModuleImportLoc(Module * M)1793 SourceLocation getModuleImportLoc(Module *M) const { 1794 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1795 } 1796 1797 /// Lex a string literal, which may be the concatenation of multiple 1798 /// string literals and may even come from macro expansion. 1799 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1800 bool LexStringLiteral(Token &Result, std::string &String, 1801 const char *DiagnosticTag, bool AllowMacroExpansion) { 1802 if (AllowMacroExpansion) 1803 Lex(Result); 1804 else 1805 LexUnexpandedToken(Result); 1806 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1807 AllowMacroExpansion); 1808 } 1809 1810 /// Complete the lexing of a string literal where the first token has 1811 /// already been lexed (see LexStringLiteral). 1812 bool FinishLexStringLiteral(Token &Result, std::string &String, 1813 const char *DiagnosticTag, 1814 bool AllowMacroExpansion); 1815 1816 /// Lex a token. If it's a comment, keep lexing until we get 1817 /// something not a comment. 1818 /// 1819 /// This is useful in -E -C mode where comments would foul up preprocessor 1820 /// directive handling. LexNonComment(Token & Result)1821 void LexNonComment(Token &Result) { 1822 do 1823 Lex(Result); 1824 while (Result.getKind() == tok::comment); 1825 } 1826 1827 /// Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1828 void LexUnexpandedToken(Token &Result) { 1829 // Disable macro expansion. 1830 bool OldVal = DisableMacroExpansion; 1831 DisableMacroExpansion = true; 1832 // Lex the token. 1833 Lex(Result); 1834 1835 // Reenable it. 1836 DisableMacroExpansion = OldVal; 1837 } 1838 1839 /// Like LexNonComment, but this disables macro expansion of 1840 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1841 void LexUnexpandedNonComment(Token &Result) { 1842 do 1843 LexUnexpandedToken(Result); 1844 while (Result.getKind() == tok::comment); 1845 } 1846 1847 /// Parses a simple integer literal to get its numeric value. Floating 1848 /// point literals and user defined literals are rejected. Used primarily to 1849 /// handle pragmas that accept integer arguments. 1850 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1851 1852 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1853 void SetMacroExpansionOnlyInDirectives() { 1854 DisableMacroExpansion = true; 1855 MacroExpansionInDirectivesOverride = true; 1856 } 1857 1858 /// Peeks ahead N tokens and returns that token without consuming any 1859 /// tokens. 1860 /// 1861 /// LookAhead(0) returns the next token that would be returned by Lex(), 1862 /// LookAhead(1) returns the token after it, etc. This returns normal 1863 /// tokens after phase 5. As such, it is equivalent to using 1864 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1865 const Token &LookAhead(unsigned N) { 1866 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1867 if (CachedLexPos + N < CachedTokens.size()) 1868 return CachedTokens[CachedLexPos+N]; 1869 else 1870 return PeekAhead(N+1); 1871 } 1872 1873 /// When backtracking is enabled and tokens are cached, 1874 /// this allows to revert a specific number of tokens. 1875 /// 1876 /// Note that the number of tokens being reverted should be up to the last 1877 /// backtrack position, not more. RevertCachedTokens(unsigned N)1878 void RevertCachedTokens(unsigned N) { 1879 assert(isBacktrackEnabled() && 1880 "Should only be called when tokens are cached for backtracking"); 1881 assert(signed(CachedLexPos) - signed(N) >= 1882 signed(LastBacktrackPos().first) && 1883 "Should revert tokens up to the last backtrack position, not more"); 1884 assert(signed(CachedLexPos) - signed(N) >= 0 && 1885 "Corrupted backtrack positions ?"); 1886 CachedLexPos -= N; 1887 } 1888 1889 /// Enters a token in the token stream to be lexed next. 1890 /// 1891 /// If BackTrack() is called afterwards, the token will remain at the 1892 /// insertion point. 1893 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1894 /// flag set. See the flag documentation for details. EnterToken(const Token & Tok,bool IsReinject)1895 void EnterToken(const Token &Tok, bool IsReinject) { 1896 if (LexLevel) { 1897 // It's not correct in general to enter caching lex mode while in the 1898 // middle of a nested lexing action. 1899 auto TokCopy = std::make_unique<Token[]>(1); 1900 TokCopy[0] = Tok; 1901 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1902 } else { 1903 EnterCachingLexMode(); 1904 assert(IsReinject && "new tokens in the middle of cached stream"); 1905 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1906 } 1907 } 1908 1909 /// We notify the Preprocessor that if it is caching tokens (because 1910 /// backtrack is enabled) it should replace the most recent cached tokens 1911 /// with the given annotation token. This function has no effect if 1912 /// backtracking is not enabled. 1913 /// 1914 /// Note that the use of this function is just for optimization, so that the 1915 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1916 /// invoked. AnnotateCachedTokens(const Token & Tok)1917 void AnnotateCachedTokens(const Token &Tok) { 1918 assert(Tok.isAnnotation() && "Expected annotation token"); 1919 if (CachedLexPos != 0 && isBacktrackEnabled()) 1920 AnnotatePreviousCachedTokens(Tok); 1921 } 1922 1923 /// Get the location of the last cached token, suitable for setting the end 1924 /// location of an annotation token. getLastCachedTokenLocation()1925 SourceLocation getLastCachedTokenLocation() const { 1926 assert(CachedLexPos != 0); 1927 return CachedTokens[CachedLexPos-1].getLastLoc(); 1928 } 1929 1930 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1931 /// CachedTokens. 1932 bool IsPreviousCachedToken(const Token &Tok) const; 1933 1934 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1935 /// in \p NewToks. 1936 /// 1937 /// Useful when a token needs to be split in smaller ones and CachedTokens 1938 /// most recent token must to be updated to reflect that. 1939 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1940 1941 /// Replace the last token with an annotation token. 1942 /// 1943 /// Like AnnotateCachedTokens(), this routine replaces an 1944 /// already-parsed (and resolved) token with an annotation 1945 /// token. However, this routine only replaces the last token with 1946 /// the annotation token; it does not affect any other cached 1947 /// tokens. This function has no effect if backtracking is not 1948 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1949 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1950 assert(Tok.isAnnotation() && "Expected annotation token"); 1951 if (CachedLexPos != 0 && isBacktrackEnabled()) 1952 CachedTokens[CachedLexPos-1] = Tok; 1953 } 1954 1955 /// Enter an annotation token into the token stream. 1956 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1957 void *AnnotationVal); 1958 1959 /// Determine whether it's possible for a future call to Lex to produce an 1960 /// annotation token created by a previous call to EnterAnnotationToken. mightHavePendingAnnotationTokens()1961 bool mightHavePendingAnnotationTokens() { 1962 return CurLexerCallback != CLK_Lexer; 1963 } 1964 1965 /// Update the current token to represent the provided 1966 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1967 void TypoCorrectToken(const Token &Tok) { 1968 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1969 if (CachedLexPos != 0 && isBacktrackEnabled()) 1970 CachedTokens[CachedLexPos-1] = Tok; 1971 } 1972 1973 /// Recompute the current lexer kind based on the CurLexer/ 1974 /// CurTokenLexer pointers. 1975 void recomputeCurLexerKind(); 1976 1977 /// Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1978 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1979 1980 /// Enables the incremental processing 1981 void enableIncrementalProcessing(bool value = true) { 1982 IncrementalProcessing = value; 1983 } 1984 1985 /// Specify the point at which code-completion will be performed. 1986 /// 1987 /// \param File the file in which code completion should occur. If 1988 /// this file is included multiple times, code-completion will 1989 /// perform completion the first time it is included. If NULL, this 1990 /// function clears out the code-completion point. 1991 /// 1992 /// \param Line the line at which code completion should occur 1993 /// (1-based). 1994 /// 1995 /// \param Column the column at which code completion should occur 1996 /// (1-based). 1997 /// 1998 /// \returns true if an error occurred, false otherwise. 1999 bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, 2000 unsigned Column); 2001 2002 /// Determine if we are performing code completion. isCodeCompletionEnabled()2003 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 2004 2005 /// Returns the location of the code-completion point. 2006 /// 2007 /// Returns an invalid location if code-completion is not enabled or the file 2008 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()2009 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 2010 2011 /// Returns the start location of the file of code-completion point. 2012 /// 2013 /// Returns an invalid location if code-completion is not enabled or the file 2014 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()2015 SourceLocation getCodeCompletionFileLoc() const { 2016 return CodeCompletionFileLoc; 2017 } 2018 2019 /// Returns true if code-completion is enabled and we have hit the 2020 /// code-completion point. isCodeCompletionReached()2021 bool isCodeCompletionReached() const { return CodeCompletionReached; } 2022 2023 /// Note that we hit the code-completion point. setCodeCompletionReached()2024 void setCodeCompletionReached() { 2025 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 2026 CodeCompletionReached = true; 2027 // Silence any diagnostics that occur after we hit the code-completion. 2028 getDiagnostics().setSuppressAllDiagnostics(true); 2029 } 2030 2031 /// The location of the currently-active \#pragma clang 2032 /// arc_cf_code_audited begin. 2033 /// 2034 /// Returns an invalid location if there is no such pragma active. getPragmaARCCFCodeAuditedInfo()2035 IdentifierLoc getPragmaARCCFCodeAuditedInfo() const { 2036 return PragmaARCCFCodeAuditedInfo; 2037 } 2038 2039 /// Set the location of the currently-active \#pragma clang 2040 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)2041 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 2042 SourceLocation Loc) { 2043 PragmaARCCFCodeAuditedInfo = IdentifierLoc(Loc, Ident); 2044 } 2045 2046 /// The location of the currently-active \#pragma clang 2047 /// assume_nonnull begin. 2048 /// 2049 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()2050 SourceLocation getPragmaAssumeNonNullLoc() const { 2051 return PragmaAssumeNonNullLoc; 2052 } 2053 2054 /// Set the location of the currently-active \#pragma clang 2055 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)2056 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 2057 PragmaAssumeNonNullLoc = Loc; 2058 } 2059 2060 /// Get the location of the recorded unterminated \#pragma clang 2061 /// assume_nonnull begin in the preamble, if one exists. 2062 /// 2063 /// Returns an invalid location if the premable did not end with 2064 /// such a pragma active or if there is no recorded preamble. getPreambleRecordedPragmaAssumeNonNullLoc()2065 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const { 2066 return PreambleRecordedPragmaAssumeNonNullLoc; 2067 } 2068 2069 /// Record the location of the unterminated \#pragma clang 2070 /// assume_nonnull begin in the preamble. setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)2071 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) { 2072 PreambleRecordedPragmaAssumeNonNullLoc = Loc; 2073 } 2074 2075 /// Set the directory in which the main file should be considered 2076 /// to have been found, if it is not a real file. setMainFileDir(DirectoryEntryRef Dir)2077 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; } 2078 2079 /// Instruct the preprocessor to skip part of the main source file. 2080 /// 2081 /// \param Bytes The number of bytes in the preamble to skip. 2082 /// 2083 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 2084 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)2085 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 2086 SkipMainFilePreamble.first = Bytes; 2087 SkipMainFilePreamble.second = StartOfLine; 2088 } 2089 2090 /// Forwarding function for diagnostics. This emits a diagnostic at 2091 /// the specified Token's location, translating the token's start 2092 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)2093 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 2094 return Diags->Report(Loc, DiagID); 2095 } 2096 Diag(const Token & Tok,unsigned DiagID)2097 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 2098 return Diags->Report(Tok.getLocation(), DiagID); 2099 } 2100 2101 /// Return the 'spelling' of the token at the given 2102 /// location; does not go up to the spelling location or down to the 2103 /// expansion location. 2104 /// 2105 /// \param buffer A buffer which will be used only if the token requires 2106 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 2107 /// \param invalid If non-null, will be set \c true if an error occurs. 2108 StringRef getSpelling(SourceLocation loc, 2109 SmallVectorImpl<char> &buffer, 2110 bool *invalid = nullptr) const { 2111 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 2112 } 2113 2114 /// Return the 'spelling' of the Tok token. 2115 /// 2116 /// The spelling of a token is the characters used to represent the token in 2117 /// the source file after trigraph expansion and escaped-newline folding. In 2118 /// particular, this wants to get the true, uncanonicalized, spelling of 2119 /// things like digraphs, UCNs, etc. 2120 /// 2121 /// \param Invalid If non-null, will be set \c true if an error occurs. 2122 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 2123 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 2124 } 2125 2126 /// Get the spelling of a token into a preallocated buffer, instead 2127 /// of as an std::string. 2128 /// 2129 /// The caller is required to allocate enough space for the token, which is 2130 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 2131 /// actual result is returned. 2132 /// 2133 /// Note that this method may do two possible things: it may either fill in 2134 /// the buffer specified with characters, or it may *change the input pointer* 2135 /// to point to a constant buffer with the data already in it (avoiding a 2136 /// copy). The caller is not allowed to modify the returned buffer pointer 2137 /// if an internal buffer is returned. 2138 unsigned getSpelling(const Token &Tok, const char *&Buffer, 2139 bool *Invalid = nullptr) const { 2140 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 2141 } 2142 2143 /// Get the spelling of a token into a SmallVector. 2144 /// 2145 /// Note that the returned StringRef may not point to the 2146 /// supplied buffer if a copy can be avoided. 2147 StringRef getSpelling(const Token &Tok, 2148 SmallVectorImpl<char> &Buffer, 2149 bool *Invalid = nullptr) const; 2150 2151 /// Relex the token at the specified location. 2152 /// \returns true if there was a failure, false on success. 2153 bool getRawToken(SourceLocation Loc, Token &Result, 2154 bool IgnoreWhiteSpace = false) { 2155 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 2156 } 2157 2158 /// Given a Token \p Tok that is a numeric constant with length 1, 2159 /// return the value of constant as an unsigned 8-bit integer. 2160 uint8_t 2161 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 2162 bool *Invalid = nullptr) const { 2163 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) && 2164 Tok.getLength() == 1 && "Called on unsupported token"); 2165 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 2166 2167 // If the token is carrying a literal data pointer, just use it. 2168 if (const char *D = Tok.getLiteralData()) 2169 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0'; 2170 2171 assert(Tok.is(tok::numeric_constant) && "binary data with no data"); 2172 // Otherwise, fall back on getCharacterData, which is slower, but always 2173 // works. 2174 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0'; 2175 } 2176 2177 /// Retrieve the name of the immediate macro expansion. 2178 /// 2179 /// This routine starts from a source location, and finds the name of the 2180 /// macro responsible for its immediate expansion. It looks through any 2181 /// intervening macro argument expansions to compute this. It returns a 2182 /// StringRef that refers to the SourceManager-owned buffer of the source 2183 /// where that macro name is spelled. Thus, the result shouldn't out-live 2184 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)2185 StringRef getImmediateMacroName(SourceLocation Loc) { 2186 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 2187 } 2188 2189 /// Plop the specified string into a scratch buffer and set the 2190 /// specified token's location and length to it. 2191 /// 2192 /// If specified, the source location provides a location of the expansion 2193 /// point of the token. 2194 void CreateString(StringRef Str, Token &Tok, 2195 SourceLocation ExpansionLocStart = SourceLocation(), 2196 SourceLocation ExpansionLocEnd = SourceLocation()); 2197 2198 /// Split the first Length characters out of the token starting at TokLoc 2199 /// and return a location pointing to the split token. Re-lexing from the 2200 /// split token will return the split token rather than the original. 2201 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 2202 2203 /// Computes the source location just past the end of the 2204 /// token at this source location. 2205 /// 2206 /// This routine can be used to produce a source location that 2207 /// points just past the end of the token referenced by \p Loc, and 2208 /// is generally used when a diagnostic needs to point just after a 2209 /// token where it expected something different that it received. If 2210 /// the returned source location would not be meaningful (e.g., if 2211 /// it points into a macro), this routine returns an invalid 2212 /// source location. 2213 /// 2214 /// \param Offset an offset from the end of the token, where the source 2215 /// location should refer to. The default offset (0) produces a source 2216 /// location pointing just past the end of the token; an offset of 1 produces 2217 /// a source location pointing to the last character in the token, etc. 2218 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 2219 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 2220 } 2221 2222 /// Returns true if the given MacroID location points at the first 2223 /// token of the macro expansion. 2224 /// 2225 /// \param MacroBegin If non-null and function returns true, it is set to 2226 /// begin location of the macro. 2227 bool isAtStartOfMacroExpansion(SourceLocation loc, 2228 SourceLocation *MacroBegin = nullptr) const { 2229 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 2230 MacroBegin); 2231 } 2232 2233 /// Returns true if the given MacroID location points at the last 2234 /// token of the macro expansion. 2235 /// 2236 /// \param MacroEnd If non-null and function returns true, it is set to 2237 /// end location of the macro. 2238 bool isAtEndOfMacroExpansion(SourceLocation loc, 2239 SourceLocation *MacroEnd = nullptr) const { 2240 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 2241 } 2242 2243 /// Print the token to stderr, used for debugging. 2244 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 2245 void DumpLocation(SourceLocation Loc) const; 2246 void DumpMacro(const MacroInfo &MI) const; 2247 void dumpMacroInfo(const IdentifierInfo *II); 2248 2249 /// Given a location that specifies the start of a 2250 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)2251 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 2252 unsigned Char) const { 2253 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 2254 } 2255 2256 /// Increment the counters for the number of token paste operations 2257 /// performed. 2258 /// 2259 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)2260 void IncrementPasteCounter(bool isFast) { 2261 if (isFast) 2262 ++NumFastTokenPaste; 2263 else 2264 ++NumTokenPaste; 2265 } 2266 2267 void PrintStats(); 2268 2269 size_t getTotalMemory() const; 2270 2271 /// When the macro expander pastes together a comment (/##/) in Microsoft 2272 /// mode, this method handles updating the current state, returning the 2273 /// token on the next source line. 2274 void HandleMicrosoftCommentPaste(Token &Tok); 2275 2276 //===--------------------------------------------------------------------===// 2277 // Preprocessor callback methods. These are invoked by a lexer as various 2278 // directives and events are found. 2279 2280 /// Given a tok::raw_identifier token, look up the 2281 /// identifier information for the token and install it into the token, 2282 /// updating the token kind accordingly. 2283 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 2284 2285 private: 2286 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 2287 2288 public: 2289 /// Specifies the reason for poisoning an identifier. 2290 /// 2291 /// If that identifier is accessed while poisoned, then this reason will be 2292 /// used instead of the default "poisoned" diagnostic. 2293 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 2294 2295 /// Display reason for poisoned identifier. 2296 void HandlePoisonedIdentifier(Token & Identifier); 2297 MaybeHandlePoisonedIdentifier(Token & Identifier)2298 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 2299 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 2300 if(II->isPoisoned()) { 2301 HandlePoisonedIdentifier(Identifier); 2302 } 2303 } 2304 } 2305 2306 /// Check whether the next pp-token is one of the specificed token kind. this 2307 /// method should have no observable side-effect on the lexed tokens. isNextPPTokenOneOf(Ts...Ks)2308 template <typename... Ts> bool isNextPPTokenOneOf(Ts... Ks) { 2309 static_assert(sizeof...(Ts) > 0, 2310 "requires at least one tok::TokenKind specified"); 2311 // Do some quick tests for rejection cases. 2312 std::optional<Token> Val; 2313 if (CurLexer) 2314 Val = CurLexer->peekNextPPToken(); 2315 else 2316 Val = CurTokenLexer->peekNextPPToken(); 2317 2318 if (!Val) { 2319 // We have run off the end. If it's a source file we don't 2320 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the 2321 // macro stack. 2322 if (CurPPLexer) 2323 return false; 2324 for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { 2325 if (Entry.TheLexer) 2326 Val = Entry.TheLexer->peekNextPPToken(); 2327 else 2328 Val = Entry.TheTokenLexer->peekNextPPToken(); 2329 2330 if (Val) 2331 break; 2332 2333 // Ran off the end of a source file? 2334 if (Entry.ThePPLexer) 2335 return false; 2336 } 2337 } 2338 2339 // Okay, we found the token and return. Otherwise we found the end of the 2340 // translation unit. 2341 return Val->isOneOf(Ks...); 2342 } 2343 2344 private: 2345 /// Identifiers used for SEH handling in Borland. These are only 2346 /// allowed in particular circumstances 2347 // __except block 2348 IdentifierInfo *Ident__exception_code, 2349 *Ident___exception_code, 2350 *Ident_GetExceptionCode; 2351 // __except filter expression 2352 IdentifierInfo *Ident__exception_info, 2353 *Ident___exception_info, 2354 *Ident_GetExceptionInfo; 2355 // __finally 2356 IdentifierInfo *Ident__abnormal_termination, 2357 *Ident___abnormal_termination, 2358 *Ident_AbnormalTermination; 2359 2360 const char *getCurLexerEndPos(); 2361 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 2362 2363 public: 2364 void PoisonSEHIdentifiers(bool Poison = true); // Borland 2365 2366 /// Callback invoked when the lexer reads an identifier and has 2367 /// filled in the tokens IdentifierInfo member. 2368 /// 2369 /// This callback potentially macro expands it or turns it into a named 2370 /// token (like 'for'). 2371 /// 2372 /// \returns true if we actually computed a token, false if we need to 2373 /// lex again. 2374 bool HandleIdentifier(Token &Identifier); 2375 2376 /// Callback invoked when the lexer hits the end of the current file. 2377 /// 2378 /// This either returns the EOF token and returns true, or 2379 /// pops a level off the include stack and returns false, at which point the 2380 /// client should call lex again. 2381 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 2382 2383 /// Callback invoked when the current TokenLexer hits the end of its 2384 /// token stream. 2385 bool HandleEndOfTokenLexer(Token &Result); 2386 2387 /// Callback invoked when the lexer sees a # token at the start of a 2388 /// line. 2389 /// 2390 /// This consumes the directive, modifies the lexer/preprocessor state, and 2391 /// advances the lexer(s) so that the next token read is the correct one. 2392 void HandleDirective(Token &Result); 2393 2394 /// Ensure that the next token is a tok::eod token. 2395 /// 2396 /// If not, emit a diagnostic and consume up until the eod. 2397 /// If \p EnableMacros is true, then we consider macros that expand to zero 2398 /// tokens as being ok. 2399 /// 2400 /// \return The location of the end of the directive (the terminating 2401 /// newline). 2402 SourceLocation CheckEndOfDirective(const char *DirType, 2403 bool EnableMacros = false); 2404 2405 /// Read and discard all tokens remaining on the current line until 2406 /// the tok::eod token is found. Returns the range of the skipped tokens. DiscardUntilEndOfDirective()2407 SourceRange DiscardUntilEndOfDirective() { 2408 Token Tmp; 2409 return DiscardUntilEndOfDirective(Tmp); 2410 } 2411 2412 /// Same as above except retains the token that was found. 2413 SourceRange DiscardUntilEndOfDirective(Token &Tok); 2414 2415 /// Returns true if the preprocessor has seen a use of 2416 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()2417 bool SawDateOrTime() const { 2418 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 2419 } getCounterValue()2420 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)2421 void setCounterValue(unsigned V) { CounterValue = V; } 2422 getCurrentFPEvalMethod()2423 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const { 2424 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine && 2425 "FPEvalMethod should be set either from command line or from the " 2426 "target info"); 2427 return CurrentFPEvalMethod; 2428 } 2429 getTUFPEvalMethod()2430 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const { 2431 return TUFPEvalMethod; 2432 } 2433 getLastFPEvalPragmaLocation()2434 SourceLocation getLastFPEvalPragmaLocation() const { 2435 return LastFPEvalPragmaLocation; 2436 } 2437 setCurrentFPEvalMethod(SourceLocation PragmaLoc,LangOptions::FPEvalMethodKind Val)2438 void setCurrentFPEvalMethod(SourceLocation PragmaLoc, 2439 LangOptions::FPEvalMethodKind Val) { 2440 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2441 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2442 // This is the location of the '#pragma float_control" where the 2443 // execution state is modifed. 2444 LastFPEvalPragmaLocation = PragmaLoc; 2445 CurrentFPEvalMethod = Val; 2446 TUFPEvalMethod = Val; 2447 } 2448 setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)2449 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2450 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2451 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2452 TUFPEvalMethod = Val; 2453 } 2454 2455 /// Retrieves the module that we're currently building, if any. 2456 Module *getCurrentModule(); 2457 2458 /// Retrieves the module whose implementation we're current compiling, if any. 2459 Module *getCurrentModuleImplementation(); 2460 2461 /// If we are preprocessing a named module. isInNamedModule()2462 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); } 2463 2464 /// If we are proprocessing a named interface unit. 2465 /// Note that a module implementation partition is not considered as an 2466 /// named interface unit here although it is importable 2467 /// to ease the parsing. isInNamedInterfaceUnit()2468 bool isInNamedInterfaceUnit() const { 2469 return ModuleDeclState.isNamedInterface(); 2470 } 2471 2472 /// Get the named module name we're preprocessing. 2473 /// Requires we're preprocessing a named module. getNamedModuleName()2474 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); } 2475 2476 /// If we are implementing an implementation module unit. 2477 /// Note that the module implementation partition is not considered as an 2478 /// implementation unit. isInImplementationUnit()2479 bool isInImplementationUnit() const { 2480 return ModuleDeclState.isImplementationUnit(); 2481 } 2482 2483 /// If we're importing a standard C++20 Named Modules. isInImportingCXXNamedModules()2484 bool isInImportingCXXNamedModules() const { 2485 // NamedModuleImportPath will be non-empty only if we're importing 2486 // Standard C++ named modules. 2487 return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && 2488 !IsAtImport; 2489 } 2490 2491 /// Allocate a new MacroInfo object with the provided SourceLocation. 2492 MacroInfo *AllocateMacroInfo(SourceLocation L); 2493 2494 /// Turn the specified lexer token into a fully checked and spelled 2495 /// filename, e.g. as an operand of \#include. 2496 /// 2497 /// The caller is expected to provide a buffer that is large enough to hold 2498 /// the spelling of the filename, but is also expected to handle the case 2499 /// when this method decides to use a different buffer. 2500 /// 2501 /// \returns true if the input filename was in <>'s or false if it was 2502 /// in ""'s. 2503 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2504 2505 /// Given a "foo" or \<foo> reference, look up the indicated file. 2506 /// 2507 /// Returns std::nullopt on failure. \p isAngled indicates whether the file 2508 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2509 OptionalFileEntryRef 2510 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2511 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 2512 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath, 2513 SmallVectorImpl<char> *RelativePath, 2514 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2515 bool *IsFrameworkFound, bool SkipCache = false, 2516 bool OpenFile = true, bool CacheFailures = true); 2517 2518 /// Given a "Filename" or \<Filename> reference, look up the indicated embed 2519 /// resource. \p isAngled indicates whether the file reference is for 2520 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile 2521 /// is true, the file looked up is opened for reading, otherwise it only 2522 /// validates that the file exists. Quoted filenames are looked up relative 2523 /// to \p LookupFromFile if it is nonnull. 2524 /// 2525 /// Returns std::nullopt on failure. 2526 OptionalFileEntryRef 2527 LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, 2528 const FileEntry *LookupFromFile = nullptr); 2529 2530 /// Return true if we're in the top-level file, not in a \#include. 2531 bool isInPrimaryFile() const; 2532 2533 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2534 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2535 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2536 2537 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2538 bool *ShadowFlag = nullptr); 2539 2540 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2541 Module *LeaveSubmodule(bool ForPragma); 2542 2543 private: 2544 friend void TokenLexer::ExpandFunctionArguments(); 2545 PushIncludeMacroStack()2546 void PushIncludeMacroStack() { 2547 assert(CurLexerCallback != CLK_CachingLexer && 2548 "cannot push a caching lexer"); 2549 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule, 2550 std::move(CurLexer), CurPPLexer, 2551 std::move(CurTokenLexer), CurDirLookup); 2552 CurPPLexer = nullptr; 2553 } 2554 PopIncludeMacroStack()2555 void PopIncludeMacroStack() { 2556 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2557 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2558 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2559 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2560 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2561 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback; 2562 IncludeMacroStack.pop_back(); 2563 } 2564 2565 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2566 2567 /// Determine whether we need to create module macros for #defines in the 2568 /// current context. 2569 bool needModuleMacros() const; 2570 2571 /// Update the set of active module macros and ambiguity flag for a module 2572 /// macro name. 2573 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2574 2575 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2576 SourceLocation Loc); 2577 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2578 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2579 bool isPublic); 2580 2581 /// Lex and validate a macro name, which occurs after a 2582 /// \#define or \#undef. 2583 /// 2584 /// \param MacroNameTok Token that represents the name defined or undefined. 2585 /// \param IsDefineUndef Kind if preprocessor directive. 2586 /// \param ShadowFlag Points to flag that is set if macro name shadows 2587 /// a keyword. 2588 /// 2589 /// This emits a diagnostic, sets the token kind to eod, 2590 /// and discards the rest of the macro line if the macro name is invalid. 2591 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2592 bool *ShadowFlag = nullptr); 2593 2594 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2595 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2596 /// doing so performs certain validity checks including (but not limited to): 2597 /// - # (stringization) is followed by a macro parameter 2598 /// \param MacroNameTok - Token that represents the macro name 2599 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2600 /// 2601 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2602 /// returns a nullptr if an invalid sequence of tokens is encountered. 2603 MacroInfo *ReadOptionalMacroParameterListAndBody( 2604 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2605 2606 /// The ( starting an argument list of a macro definition has just been read. 2607 /// Lex the rest of the parameters and the closing ), updating \p MI with 2608 /// what we learn and saving in \p LastTok the last token read. 2609 /// Return true if an error occurs parsing the arg list. 2610 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2611 2612 /// Provide a suggestion for a typoed directive. If there is no typo, then 2613 /// just skip suggesting. 2614 /// 2615 /// \param Tok - Token that represents the directive 2616 /// \param Directive - String reference for the directive name 2617 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const; 2618 2619 /// We just read a \#if or related directive and decided that the 2620 /// subsequent tokens are in the \#if'd out portion of the 2621 /// file. Lex the rest of the file, until we see an \#endif. If \p 2622 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2623 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2624 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2625 /// already seen one so a \#else directive is a duplicate. When this returns, 2626 /// the caller can lex the first valid token. 2627 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2628 SourceLocation IfTokenLoc, 2629 bool FoundNonSkipPortion, bool FoundElse, 2630 SourceLocation ElseLoc = SourceLocation()); 2631 2632 /// Information about the result for evaluating an expression for a 2633 /// preprocessor directive. 2634 struct DirectiveEvalResult { 2635 /// The integral value of the expression. 2636 std::optional<llvm::APSInt> Value; 2637 2638 /// Whether the expression was evaluated as true or not. 2639 bool Conditional; 2640 2641 /// True if the expression contained identifiers that were undefined. 2642 bool IncludedUndefinedIds; 2643 2644 /// The source range for the expression. 2645 SourceRange ExprRange; 2646 }; 2647 2648 /// Evaluate an integer constant expression that may occur after a 2649 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2650 /// 2651 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2652 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, 2653 bool CheckForEoD = true); 2654 2655 /// Evaluate an integer constant expression that may occur after a 2656 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2657 /// 2658 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2659 /// \p EvaluatedDefined will contain the result of whether "defined" appeared 2660 /// in the evaluated expression or not. 2661 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, 2662 Token &Tok, 2663 bool &EvaluatedDefined, 2664 bool CheckForEoD = true); 2665 2666 /// Process a '__has_embed("path" [, ...])' expression. 2667 /// 2668 /// Returns predefined `__STDC_EMBED_*` macro values if 2669 /// successful. 2670 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II); 2671 2672 /// Process a '__has_include("path")' expression. 2673 /// 2674 /// Returns true if successful. 2675 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II); 2676 2677 /// Process '__has_include_next("path")' expression. 2678 /// 2679 /// Returns true if successful. 2680 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II); 2681 2682 /// Get the directory and file from which to start \#include_next lookup. 2683 std::pair<ConstSearchDirIterator, const FileEntry *> 2684 getIncludeNextStart(const Token &IncludeNextTok) const; 2685 2686 /// Install the standard preprocessor pragmas: 2687 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2688 void RegisterBuiltinPragmas(); 2689 2690 /// RegisterBuiltinMacro - Register the specified identifier in the identifier 2691 /// table and mark it as a builtin macro to be expanded. RegisterBuiltinMacro(const char * Name)2692 IdentifierInfo *RegisterBuiltinMacro(const char *Name) { 2693 // Get the identifier. 2694 IdentifierInfo *Id = getIdentifierInfo(Name); 2695 2696 // Mark it as being a macro that is builtin. 2697 MacroInfo *MI = AllocateMacroInfo(SourceLocation()); 2698 MI->setIsBuiltinMacro(); 2699 appendDefMacroDirective(Id, MI); 2700 return Id; 2701 } 2702 2703 /// Register builtin macros such as __LINE__ with the identifier table. 2704 void RegisterBuiltinMacros(); 2705 2706 /// If an identifier token is read that is to be expanded as a macro, handle 2707 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2708 /// otherwise the caller should lex again. 2709 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2710 2711 /// Cache macro expanded tokens for TokenLexers. 2712 // 2713 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2714 /// going to lex in the cache and when it finishes the tokens are removed 2715 /// from the end of the cache. 2716 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2717 ArrayRef<Token> tokens); 2718 2719 void removeCachedMacroExpandedTokensOfLastLexer(); 2720 2721 /// After reading "MACRO(", this method is invoked to read all of the formal 2722 /// arguments specified for the macro invocation. Returns null on error. 2723 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2724 SourceLocation &MacroEnd); 2725 2726 /// If an identifier token is read that is to be expanded 2727 /// as a builtin macro, handle it and return the next token as 'Tok'. 2728 void ExpandBuiltinMacro(Token &Tok); 2729 2730 /// Read a \c _Pragma directive, slice it up, process it, then 2731 /// return the first token after the directive. 2732 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2733 void Handle_Pragma(Token &Tok); 2734 2735 /// Like Handle_Pragma except the pragma text is not enclosed within 2736 /// a string literal. 2737 void HandleMicrosoft__pragma(Token &Tok); 2738 2739 /// Add a lexer to the top of the include stack and 2740 /// start lexing tokens from it instead of the current buffer. 2741 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir); 2742 2743 /// Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)2744 void setPredefinesFileID(FileID FID) { 2745 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2746 PredefinesFileID = FID; 2747 } 2748 2749 /// Set the FileID for the PCH through header. 2750 void setPCHThroughHeaderFileID(FileID FID); 2751 2752 /// Returns true if we are lexing from a file and not a 2753 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2754 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2755 return L ? !L->isPragmaLexer() : P != nullptr; 2756 } 2757 IsFileLexer(const IncludeStackInfo & I)2758 static bool IsFileLexer(const IncludeStackInfo& I) { 2759 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2760 } 2761 IsFileLexer()2762 bool IsFileLexer() const { 2763 return IsFileLexer(CurLexer.get(), CurPPLexer); 2764 } 2765 2766 //===--------------------------------------------------------------------===// 2767 // Standard Library Identification 2768 std::optional<CXXStandardLibraryVersionInfo> CXXStandardLibraryVersion; 2769 2770 public: 2771 std::optional<std::uint64_t> getStdLibCxxVersion(); 2772 bool NeedsStdLibCxxWorkaroundBefore(std::uint64_t FixedVersion); 2773 2774 private: 2775 //===--------------------------------------------------------------------===// 2776 // Caching stuff. 2777 void CachingLex(Token &Result); 2778 InCachingLexMode()2779 bool InCachingLexMode() const { 2780 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2781 // that we are past EOF, not that we are in CachingLex mode. 2782 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2783 } 2784 2785 void EnterCachingLexMode(); 2786 void EnterCachingLexModeUnchecked(); 2787 ExitCachingLexMode()2788 void ExitCachingLexMode() { 2789 if (InCachingLexMode()) 2790 RemoveTopOfLexerStack(); 2791 } 2792 2793 const Token &PeekAhead(unsigned N); 2794 void AnnotatePreviousCachedTokens(const Token &Tok); 2795 2796 //===--------------------------------------------------------------------===// 2797 /// Handle*Directive - implement the various preprocessor directives. These 2798 /// should side-effect the current preprocessor object so that the next call 2799 /// to Lex() will return the appropriate token next. 2800 void HandleLineDirective(); 2801 void HandleDigitDirective(Token &Tok); 2802 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2803 void HandleIdentSCCSDirective(Token &Tok); 2804 void HandleMacroPublicDirective(Token &Tok); 2805 void HandleMacroPrivateDirective(); 2806 2807 /// An additional notification that can be produced by a header inclusion or 2808 /// import to tell the parser what happened. 2809 struct ImportAction { 2810 enum ActionKind { 2811 None, 2812 ModuleBegin, 2813 ModuleImport, 2814 HeaderUnitImport, 2815 SkippedModuleImport, 2816 Failure, 2817 } Kind; 2818 Module *ModuleForHeader = nullptr; 2819 2820 ImportAction(ActionKind AK, Module *Mod = nullptr) KindImportAction2821 : Kind(AK), ModuleForHeader(Mod) { 2822 assert((AK == None || Mod || AK == Failure) && 2823 "no module for module action"); 2824 } 2825 }; 2826 2827 OptionalFileEntryRef LookupHeaderIncludeOrImport( 2828 ConstSearchDirIterator *CurDir, StringRef &Filename, 2829 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2830 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2831 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2832 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2833 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2834 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2835 // Binary data inclusion 2836 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, 2837 const FileEntry *LookupFromFile = nullptr); 2838 void HandleEmbedDirectiveImpl(SourceLocation HashLoc, 2839 const LexEmbedParametersResult &Params, 2840 StringRef BinaryContents, StringRef FileName); 2841 2842 // File inclusion. 2843 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2844 ConstSearchDirIterator LookupFrom = nullptr, 2845 const FileEntry *LookupFromFile = nullptr); 2846 ImportAction 2847 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2848 Token &FilenameTok, SourceLocation EndLoc, 2849 ConstSearchDirIterator LookupFrom = nullptr, 2850 const FileEntry *LookupFromFile = nullptr); 2851 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2852 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2853 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2854 void HandleMicrosoftImportDirective(Token &Tok); 2855 2856 public: 2857 /// Check that the given module is available, producing a diagnostic if not. 2858 /// \return \c true if the check failed (because the module is not available). 2859 /// \c false if the module appears to be usable. 2860 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2861 const TargetInfo &TargetInfo, 2862 const Module &M, DiagnosticsEngine &Diags); 2863 2864 // Module inclusion testing. 2865 /// Find the module that owns the source or header file that 2866 /// \p Loc points to. If the location is in a file that was included 2867 /// into a module, or is outside any module, returns nullptr. 2868 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual); 2869 2870 /// We want to produce a diagnostic at location IncLoc concerning an 2871 /// unreachable effect at location MLoc (eg, where a desired entity was 2872 /// declared or defined). Determine whether the right way to make MLoc 2873 /// reachable is by #include, and if so, what header should be included. 2874 /// 2875 /// This is not necessarily fast, and might load unexpected module maps, so 2876 /// should only be called by code that intends to produce an error. 2877 /// 2878 /// \param IncLoc The location at which the missing effect was detected. 2879 /// \param MLoc A location within an unimported module at which the desired 2880 /// effect occurred. 2881 /// \return A file that can be #included to provide the desired effect. Null 2882 /// if no such file could be determined or if a #include is not 2883 /// appropriate (eg, if a module should be imported instead). 2884 OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2885 SourceLocation MLoc); 2886 isRecordingPreamble()2887 bool isRecordingPreamble() const { 2888 return PreambleConditionalStack.isRecording(); 2889 } 2890 hasRecordedPreamble()2891 bool hasRecordedPreamble() const { 2892 return PreambleConditionalStack.hasRecordedPreamble(); 2893 } 2894 getPreambleConditionalStack()2895 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2896 return PreambleConditionalStack.getStack(); 2897 } 2898 setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2899 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2900 PreambleConditionalStack.setStack(s); 2901 } 2902 setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,std::optional<PreambleSkipInfo> SkipInfo)2903 void setReplayablePreambleConditionalStack( 2904 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) { 2905 PreambleConditionalStack.startReplaying(); 2906 PreambleConditionalStack.setStack(s); 2907 PreambleConditionalStack.SkipInfo = SkipInfo; 2908 } 2909 getPreambleSkipInfo()2910 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2911 return PreambleConditionalStack.SkipInfo; 2912 } 2913 2914 private: 2915 /// After processing predefined file, initialize the conditional stack from 2916 /// the preamble. 2917 void replayPreambleConditionalStack(); 2918 2919 // Macro handling. 2920 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2921 void HandleUndefDirective(); 2922 2923 // Conditional Inclusion. 2924 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2925 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2926 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2927 bool ReadAnyTokensBeforeDirective); 2928 void HandleEndifDirective(Token &EndifToken); 2929 void HandleElseDirective(Token &Result, const Token &HashToken); 2930 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken, 2931 tok::PPKeywordKind Kind); 2932 2933 // Pragmas. 2934 void HandlePragmaDirective(PragmaIntroducer Introducer); 2935 2936 public: 2937 void HandlePragmaOnce(Token &OnceTok); 2938 void HandlePragmaMark(Token &MarkTok); 2939 void HandlePragmaPoison(); 2940 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2941 void HandlePragmaDependency(Token &DependencyTok); 2942 void HandlePragmaPushMacro(Token &Tok); 2943 void HandlePragmaPopMacro(Token &Tok); 2944 void HandlePragmaIncludeAlias(Token &Tok); 2945 void HandlePragmaModuleBuild(Token &Tok); 2946 void HandlePragmaHdrstop(Token &Tok); 2947 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2948 2949 // Return true and store the first token only if any CommentHandler 2950 // has inserted some tokens and getCommentRetentionState() is false. 2951 bool HandleComment(Token &result, SourceRange Comment); 2952 2953 /// A macro is used, update information about macros that need unused 2954 /// warnings. 2955 void markMacroAsUsed(MacroInfo *MI); 2956 addMacroDeprecationMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2957 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, 2958 SourceLocation AnnotationLoc) { 2959 AnnotationInfos[II].DeprecationInfo = 2960 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2961 } 2962 addRestrictExpansionMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2963 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, 2964 SourceLocation AnnotationLoc) { 2965 AnnotationInfos[II].RestrictExpansionInfo = 2966 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2967 } 2968 addFinalLoc(const IdentifierInfo * II,SourceLocation AnnotationLoc)2969 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) { 2970 AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc; 2971 } 2972 getMacroAnnotations(const IdentifierInfo * II)2973 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const { 2974 return AnnotationInfos.find(II)->second; 2975 } 2976 2977 void emitMacroExpansionWarnings(const Token &Identifier, 2978 bool IsIfnDef = false) const { 2979 IdentifierInfo *Info = Identifier.getIdentifierInfo(); 2980 if (Info->isDeprecatedMacro()) 2981 emitMacroDeprecationWarning(Identifier); 2982 2983 if (Info->isRestrictExpansion() && 2984 !SourceMgr.isInMainFile(Identifier.getLocation())) 2985 emitRestrictExpansionWarning(Identifier); 2986 2987 if (!IsIfnDef) { 2988 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs) 2989 emitRestrictInfNaNWarning(Identifier, 0); 2990 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs) 2991 emitRestrictInfNaNWarning(Identifier, 1); 2992 } 2993 } 2994 2995 static void processPathForFileMacro(SmallVectorImpl<char> &Path, 2996 const LangOptions &LangOpts, 2997 const TargetInfo &TI); 2998 2999 static void processPathToFileName(SmallVectorImpl<char> &FileName, 3000 const PresumedLoc &PLoc, 3001 const LangOptions &LangOpts, 3002 const TargetInfo &TI); 3003 3004 private: 3005 void emitMacroDeprecationWarning(const Token &Identifier) const; 3006 void emitRestrictExpansionWarning(const Token &Identifier) const; 3007 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; 3008 void emitRestrictInfNaNWarning(const Token &Identifier, 3009 unsigned DiagSelection) const; 3010 3011 /// This boolean state keeps track if the current scanned token (by this PP) 3012 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a 3013 /// translation unit in a linear order. 3014 bool InSafeBufferOptOutRegion = false; 3015 3016 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out 3017 /// region if PP is currently in such a region. Hold undefined value 3018 /// otherwise. 3019 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. 3020 3021 using SafeBufferOptOutRegionsTy = 3022 SmallVector<std::pair<SourceLocation, SourceLocation>, 16>; 3023 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this 3024 // translation unit. Each region is represented by a pair of start and 3025 // end locations. 3026 SafeBufferOptOutRegionsTy SafeBufferOptOutMap; 3027 3028 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the 3029 // following structure to manage them by their ASTs. 3030 struct { 3031 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a 3032 // loaded AST. See `SourceManager::getUniqueLoadedASTID`. 3033 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions; 3034 3035 // Returns a reference to the safe buffer opt-out regions of the loaded 3036 // AST where `Loc` belongs to. (Construct if absent) 3037 SafeBufferOptOutRegionsTy & findAndConsLoadedOptOutMap__anon4672ff5f03083038 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) { 3039 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)]; 3040 } 3041 3042 // Returns a reference to the safe buffer opt-out regions of the loaded 3043 // AST where `Loc` belongs to. (This const function returns nullptr if 3044 // absent.) 3045 const SafeBufferOptOutRegionsTy * lookupLoadedOptOutMap__anon4672ff5f03083046 lookupLoadedOptOutMap(SourceLocation Loc, 3047 const SourceManager &SrcMgr) const { 3048 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc); 3049 auto Iter = LoadedRegions.find(FID); 3050 3051 if (Iter == LoadedRegions.end()) 3052 return nullptr; 3053 return &Iter->getSecond(); 3054 } 3055 } LoadedSafeBufferOptOutMap; 3056 3057 public: 3058 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out 3059 /// region. This `Loc` must be a source location that has been pre-processed. 3060 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const; 3061 3062 /// Alter the state of whether this PP currently is in a 3063 /// "-Wunsafe-buffer-usage" opt-out region. 3064 /// 3065 /// \param isEnter true if this PP is entering a region; otherwise, this PP 3066 /// is exiting a region 3067 /// \param Loc the location of the entry or exit of a 3068 /// region 3069 /// \return true iff it is INVALID to enter or exit a region, i.e., 3070 /// attempt to enter a region before exiting a previous region, or exiting a 3071 /// region that PP is not currently in. 3072 bool enterOrExitSafeBufferOptOutRegion(bool isEnter, 3073 const SourceLocation &Loc); 3074 3075 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 3076 /// opt-out region 3077 bool isPPInSafeBufferOptOutRegion(); 3078 3079 /// \param StartLoc output argument. It will be set to the start location of 3080 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function 3081 /// returns true. 3082 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 3083 /// opt-out region 3084 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); 3085 3086 /// \return a sequence of SourceLocations representing ordered opt-out regions 3087 /// specified by 3088 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. 3089 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const; 3090 3091 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a 3092 /// record of code `PP_UNSAFE_BUFFER_USAGE`. 3093 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor` 3094 /// is same as itself before the call. 3095 bool setDeserializedSafeBufferOptOutMap( 3096 const SmallVectorImpl<SourceLocation> &SrcLocSeqs); 3097 3098 /// Whether we've seen pp-directives which may have changed the preprocessing 3099 /// state. 3100 bool hasSeenNoTrivialPPDirective() const; 3101 3102 private: 3103 /// Helper functions to forward lexing to the actual lexer. They all share the 3104 /// same signature. CLK_Lexer(Preprocessor & P,Token & Result)3105 static bool CLK_Lexer(Preprocessor &P, Token &Result) { 3106 return P.CurLexer->Lex(Result); 3107 } CLK_TokenLexer(Preprocessor & P,Token & Result)3108 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) { 3109 return P.CurTokenLexer->Lex(Result); 3110 } CLK_CachingLexer(Preprocessor & P,Token & Result)3111 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) { 3112 P.CachingLex(Result); 3113 return true; 3114 } CLK_DependencyDirectivesLexer(Preprocessor & P,Token & Result)3115 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { 3116 return P.CurLexer->LexDependencyDirectiveToken(Result); 3117 } CLK_LexAfterModuleImport(Preprocessor & P,Token & Result)3118 static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { 3119 return P.LexAfterModuleImport(Result); 3120 } 3121 }; 3122 3123 /// Abstract base class that describes a handler that will receive 3124 /// source ranges for each of the comments encountered in the source file. 3125 class CommentHandler { 3126 public: 3127 virtual ~CommentHandler(); 3128 3129 // The handler shall return true if it has pushed any tokens 3130 // to be read using e.g. EnterToken or EnterTokenStream. 3131 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 3132 }; 3133 3134 /// Abstract base class that describes a handler that will receive 3135 /// source ranges for empty lines encountered in the source file. 3136 class EmptylineHandler { 3137 public: 3138 virtual ~EmptylineHandler(); 3139 3140 // The handler handles empty lines. 3141 virtual void HandleEmptyline(SourceRange Range) = 0; 3142 }; 3143 3144 /// Helper class to shuttle information about #embed directives from the 3145 /// preprocessor to the parser through an annotation token. 3146 struct EmbedAnnotationData { 3147 StringRef BinaryData; 3148 StringRef FileName; 3149 }; 3150 3151 /// Registry of pragma handlers added by plugins 3152 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 3153 3154 } // namespace clang 3155 3156 namespace llvm { 3157 extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>; 3158 } // namespace llvm 3159 3160 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 3161