1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/Module.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/HeaderSearch.h" 27 #include "clang/Lex/Lexer.h" 28 #include "clang/Lex/MacroInfo.h" 29 #include "clang/Lex/ModuleLoader.h" 30 #include "clang/Lex/ModuleMap.h" 31 #include "clang/Lex/PPCallbacks.h" 32 #include "clang/Lex/PPEmbedParameters.h" 33 #include "clang/Lex/Token.h" 34 #include "clang/Lex/TokenLexer.h" 35 #include "llvm/ADT/APSInt.h" 36 #include "llvm/ADT/ArrayRef.h" 37 #include "llvm/ADT/DenseMap.h" 38 #include "llvm/ADT/FoldingSet.h" 39 #include "llvm/ADT/FunctionExtras.h" 40 #include "llvm/ADT/PointerUnion.h" 41 #include "llvm/ADT/STLExtras.h" 42 #include "llvm/ADT/SmallPtrSet.h" 43 #include "llvm/ADT/SmallVector.h" 44 #include "llvm/ADT/StringRef.h" 45 #include "llvm/ADT/TinyPtrVector.h" 46 #include "llvm/ADT/iterator_range.h" 47 #include "llvm/Support/Allocator.h" 48 #include "llvm/Support/Casting.h" 49 #include "llvm/Support/Registry.h" 50 #include <cassert> 51 #include <cstddef> 52 #include <cstdint> 53 #include <map> 54 #include <memory> 55 #include <optional> 56 #include <string> 57 #include <utility> 58 #include <vector> 59 60 namespace llvm { 61 62 template<unsigned InternalLen> class SmallString; 63 64 } // namespace llvm 65 66 namespace clang { 67 68 class CodeCompletionHandler; 69 class CommentHandler; 70 class DirectoryEntry; 71 class EmptylineHandler; 72 class ExternalPreprocessorSource; 73 class FileEntry; 74 class FileManager; 75 class HeaderSearch; 76 class MacroArgs; 77 class PragmaHandler; 78 class PragmaNamespace; 79 class PreprocessingRecord; 80 class PreprocessorLexer; 81 class PreprocessorOptions; 82 class ScratchBuffer; 83 class TargetInfo; 84 85 namespace Builtin { 86 class Context; 87 } 88 89 /// Stores token information for comparing actual tokens with 90 /// predefined values. Only handles simple tokens and identifiers. 91 class TokenValue { 92 tok::TokenKind Kind; 93 IdentifierInfo *II; 94 95 public: TokenValue(tok::TokenKind Kind)96 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 97 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 98 assert(Kind != tok::identifier && 99 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 100 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 101 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 102 } 103 TokenValue(IdentifierInfo * II)104 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 105 106 bool operator==(const Token &Tok) const { 107 return Tok.getKind() == Kind && 108 (!II || II == Tok.getIdentifierInfo()); 109 } 110 }; 111 112 /// Context in which macro name is used. 113 enum MacroUse { 114 // other than #define or #undef 115 MU_Other = 0, 116 117 // macro name specified in #define 118 MU_Define = 1, 119 120 // macro name specified in #undef 121 MU_Undef = 2 122 }; 123 124 enum class EmbedResult { 125 Invalid = -1, // Parsing error occurred. 126 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__ 127 Found = 1, // Corresponds to __STDC_EMBED_FOUND__ 128 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__ 129 }; 130 131 /// Engages in a tight little dance with the lexer to efficiently 132 /// preprocess tokens. 133 /// 134 /// Lexers know only about tokens within a single source file, and don't 135 /// know anything about preprocessor-level issues like the \#include stack, 136 /// token expansion, etc. 137 class Preprocessor { 138 friend class VAOptDefinitionContext; 139 friend class VariadicMacroScopeGuard; 140 141 llvm::unique_function<void(const clang::Token &)> OnToken; 142 std::shared_ptr<PreprocessorOptions> PPOpts; 143 DiagnosticsEngine *Diags; 144 const LangOptions &LangOpts; 145 const TargetInfo *Target = nullptr; 146 const TargetInfo *AuxTarget = nullptr; 147 FileManager &FileMgr; 148 SourceManager &SourceMgr; 149 std::unique_ptr<ScratchBuffer> ScratchBuf; 150 HeaderSearch &HeaderInfo; 151 ModuleLoader &TheModuleLoader; 152 153 /// External source of macros. 154 ExternalPreprocessorSource *ExternalSource; 155 156 /// A BumpPtrAllocator object used to quickly allocate and release 157 /// objects internal to the Preprocessor. 158 llvm::BumpPtrAllocator BP; 159 160 /// Identifiers for builtin macros and other builtins. 161 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 162 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 163 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 164 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 165 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 166 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 167 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 168 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 169 IdentifierInfo *Ident__identifier; // __identifier 170 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 171 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 172 IdentifierInfo *Ident__has_feature; // __has_feature 173 IdentifierInfo *Ident__has_extension; // __has_extension 174 IdentifierInfo *Ident__has_builtin; // __has_builtin 175 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin 176 IdentifierInfo *Ident__has_attribute; // __has_attribute 177 IdentifierInfo *Ident__has_embed; // __has_embed 178 IdentifierInfo *Ident__has_include; // __has_include 179 IdentifierInfo *Ident__has_include_next; // __has_include_next 180 IdentifierInfo *Ident__has_warning; // __has_warning 181 IdentifierInfo *Ident__is_identifier; // __is_identifier 182 IdentifierInfo *Ident__building_module; // __building_module 183 IdentifierInfo *Ident__MODULE__; // __MODULE__ 184 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 185 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 186 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 187 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 188 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 189 IdentifierInfo *Ident__is_target_os; // __is_target_os 190 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 191 IdentifierInfo *Ident__is_target_variant_os; 192 IdentifierInfo *Ident__is_target_variant_environment; 193 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD 194 195 // Weak, only valid (and set) while InMacroArgs is true. 196 Token* ArgMacro; 197 198 SourceLocation DATELoc, TIMELoc; 199 200 // FEM_UnsetOnCommandLine means that an explicit evaluation method was 201 // not specified on the command line. The target is queried to set the 202 // default evaluation method. 203 LangOptions::FPEvalMethodKind CurrentFPEvalMethod = 204 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 205 206 // The most recent pragma location where the floating point evaluation 207 // method was modified. This is used to determine whether the 208 // 'pragma clang fp eval_method' was used whithin the current scope. 209 SourceLocation LastFPEvalPragmaLocation; 210 211 LangOptions::FPEvalMethodKind TUFPEvalMethod = 212 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 213 214 // Next __COUNTER__ value, starts at 0. 215 unsigned CounterValue = 0; 216 217 enum { 218 /// Maximum depth of \#includes. 219 MaxAllowedIncludeStackDepth = 200 220 }; 221 222 // State that is set before the preprocessor begins. 223 bool KeepComments : 1; 224 bool KeepMacroComments : 1; 225 bool SuppressIncludeNotFoundError : 1; 226 227 // State that changes while the preprocessor runs: 228 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 229 230 /// Whether the preprocessor owns the header search object. 231 bool OwnsHeaderSearch : 1; 232 233 /// True if macro expansion is disabled. 234 bool DisableMacroExpansion : 1; 235 236 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 237 /// when parsing preprocessor directives. 238 bool MacroExpansionInDirectivesOverride : 1; 239 240 class ResetMacroExpansionHelper; 241 242 /// Whether we have already loaded macros from the external source. 243 mutable bool ReadMacrosFromExternalSource : 1; 244 245 /// True if pragmas are enabled. 246 bool PragmasEnabled : 1; 247 248 /// True if the current build action is a preprocessing action. 249 bool PreprocessedOutput : 1; 250 251 /// True if we are currently preprocessing a #if or #elif directive 252 bool ParsingIfOrElifDirective; 253 254 /// True if we are pre-expanding macro arguments. 255 bool InMacroArgPreExpansion; 256 257 /// Mapping/lookup information for all identifiers in 258 /// the program, including program keywords. 259 mutable IdentifierTable Identifiers; 260 261 /// This table contains all the selectors in the program. 262 /// 263 /// Unlike IdentifierTable above, this table *isn't* populated by the 264 /// preprocessor. It is declared/expanded here because its role/lifetime is 265 /// conceptually similar to the IdentifierTable. In addition, the current 266 /// control flow (in clang::ParseAST()), make it convenient to put here. 267 /// 268 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 269 /// the lifetime of the preprocessor. 270 SelectorTable Selectors; 271 272 /// Information about builtins. 273 std::unique_ptr<Builtin::Context> BuiltinInfo; 274 275 /// Tracks all of the pragmas that the client registered 276 /// with this preprocessor. 277 std::unique_ptr<PragmaNamespace> PragmaHandlers; 278 279 /// Pragma handlers of the original source is stored here during the 280 /// parsing of a model file. 281 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 282 283 /// Tracks all of the comment handlers that the client registered 284 /// with this preprocessor. 285 std::vector<CommentHandler *> CommentHandlers; 286 287 /// Empty line handler. 288 EmptylineHandler *Emptyline = nullptr; 289 290 /// True to avoid tearing down the lexer etc on EOF 291 bool IncrementalProcessing = false; 292 293 public: 294 /// The kind of translation unit we are processing. 295 const TranslationUnitKind TUKind; 296 297 /// Returns a pointer into the given file's buffer that's guaranteed 298 /// to be between tokens. The returned pointer is always before \p Start. 299 /// The maximum distance betweenthe returned pointer and \p Start is 300 /// limited by a constant value, but also an implementation detail. 301 /// If no such check point exists, \c nullptr is returned. 302 const char *getCheckPoint(FileID FID, const char *Start) const; 303 304 private: 305 /// The code-completion handler. 306 CodeCompletionHandler *CodeComplete = nullptr; 307 308 /// The file that we're performing code-completion for, if any. 309 const FileEntry *CodeCompletionFile = nullptr; 310 311 /// The offset in file for the code-completion point. 312 unsigned CodeCompletionOffset = 0; 313 314 /// The location for the code-completion point. This gets instantiated 315 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 316 SourceLocation CodeCompletionLoc; 317 318 /// The start location for the file of the code-completion point. 319 /// 320 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 321 /// for preprocessing. 322 SourceLocation CodeCompletionFileLoc; 323 324 /// The source location of the \c import contextual keyword we just 325 /// lexed, if any. 326 SourceLocation ModuleImportLoc; 327 328 /// The import path for named module that we're currently processing. 329 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath; 330 331 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints; 332 unsigned CheckPointCounter = 0; 333 334 /// Whether the import is an `@import` or a standard c++ modules import. 335 bool IsAtImport = false; 336 337 /// Whether the last token we lexed was an '@'. 338 bool LastTokenWasAt = false; 339 340 /// A position within a C++20 import-seq. 341 class StdCXXImportSeq { 342 public: 343 enum State : int { 344 // Positive values represent a number of unclosed brackets. 345 AtTopLevel = 0, 346 AfterTopLevelTokenSeq = -1, 347 AfterExport = -2, 348 AfterImportSeq = -3, 349 }; 350 StdCXXImportSeq(State S)351 StdCXXImportSeq(State S) : S(S) {} 352 353 /// Saw any kind of open bracket. handleOpenBracket()354 void handleOpenBracket() { 355 S = static_cast<State>(std::max<int>(S, 0) + 1); 356 } 357 /// Saw any kind of close bracket other than '}'. handleCloseBracket()358 void handleCloseBracket() { 359 S = static_cast<State>(std::max<int>(S, 1) - 1); 360 } 361 /// Saw a close brace. handleCloseBrace()362 void handleCloseBrace() { 363 handleCloseBracket(); 364 if (S == AtTopLevel && !AfterHeaderName) 365 S = AfterTopLevelTokenSeq; 366 } 367 /// Saw a semicolon. handleSemi()368 void handleSemi() { 369 if (atTopLevel()) { 370 S = AfterTopLevelTokenSeq; 371 AfterHeaderName = false; 372 } 373 } 374 375 /// Saw an 'export' identifier. handleExport()376 void handleExport() { 377 if (S == AfterTopLevelTokenSeq) 378 S = AfterExport; 379 else if (S <= 0) 380 S = AtTopLevel; 381 } 382 /// Saw an 'import' identifier. handleImport()383 void handleImport() { 384 if (S == AfterTopLevelTokenSeq || S == AfterExport) 385 S = AfterImportSeq; 386 else if (S <= 0) 387 S = AtTopLevel; 388 } 389 390 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 391 /// until we reach a top-level semicolon. handleHeaderName()392 void handleHeaderName() { 393 if (S == AfterImportSeq) 394 AfterHeaderName = true; 395 handleMisc(); 396 } 397 398 /// Saw any other token. handleMisc()399 void handleMisc() { 400 if (S <= 0) 401 S = AtTopLevel; 402 } 403 atTopLevel()404 bool atTopLevel() { return S <= 0; } afterImportSeq()405 bool afterImportSeq() { return S == AfterImportSeq; } afterTopLevelSeq()406 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } 407 408 private: 409 State S; 410 /// Whether we're in the pp-import-suffix following the header-name in a 411 /// pp-import. If so, a close-brace is not sufficient to end the 412 /// top-level-token-seq of an import-seq. 413 bool AfterHeaderName = false; 414 }; 415 416 /// Our current position within a C++20 import-seq. 417 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq; 418 419 /// Track whether we are in a Global Module Fragment 420 class TrackGMF { 421 public: 422 enum GMFState : int { 423 GMFActive = 1, 424 MaybeGMF = 0, 425 BeforeGMFIntroducer = -1, 426 GMFAbsentOrEnded = -2, 427 }; 428 TrackGMF(GMFState S)429 TrackGMF(GMFState S) : S(S) {} 430 431 /// Saw a semicolon. handleSemi()432 void handleSemi() { 433 // If it is immediately after the first instance of the module keyword, 434 // then that introduces the GMF. 435 if (S == MaybeGMF) 436 S = GMFActive; 437 } 438 439 /// Saw an 'export' identifier. handleExport()440 void handleExport() { 441 // The presence of an 'export' keyword always ends or excludes a GMF. 442 S = GMFAbsentOrEnded; 443 } 444 445 /// Saw an 'import' identifier. handleImport(bool AfterTopLevelTokenSeq)446 void handleImport(bool AfterTopLevelTokenSeq) { 447 // If we see this before any 'module' kw, then we have no GMF. 448 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 449 S = GMFAbsentOrEnded; 450 } 451 452 /// Saw a 'module' identifier. handleModule(bool AfterTopLevelTokenSeq)453 void handleModule(bool AfterTopLevelTokenSeq) { 454 // This was the first module identifier and not preceded by any token 455 // that would exclude a GMF. It could begin a GMF, but only if directly 456 // followed by a semicolon. 457 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 458 S = MaybeGMF; 459 else 460 S = GMFAbsentOrEnded; 461 } 462 463 /// Saw any other token. handleMisc()464 void handleMisc() { 465 // We saw something other than ; after the 'module' kw, so not a GMF. 466 if (S == MaybeGMF) 467 S = GMFAbsentOrEnded; 468 } 469 inGMF()470 bool inGMF() { return S == GMFActive; } 471 472 private: 473 /// Track the transitions into and out of a Global Module Fragment, 474 /// if one is present. 475 GMFState S; 476 }; 477 478 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; 479 480 /// Track the status of the c++20 module decl. 481 /// 482 /// module-declaration: 483 /// 'export'[opt] 'module' module-name module-partition[opt] 484 /// attribute-specifier-seq[opt] ';' 485 /// 486 /// module-name: 487 /// module-name-qualifier[opt] identifier 488 /// 489 /// module-partition: 490 /// ':' module-name-qualifier[opt] identifier 491 /// 492 /// module-name-qualifier: 493 /// identifier '.' 494 /// module-name-qualifier identifier '.' 495 /// 496 /// Transition state: 497 /// 498 /// NotAModuleDecl --- export ---> FoundExport 499 /// NotAModuleDecl --- module ---> ImplementationCandidate 500 /// FoundExport --- module ---> InterfaceCandidate 501 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate 502 /// ImplementationCandidate --- period ---> ImplementationCandidate 503 /// ImplementationCandidate --- colon ---> ImplementationCandidate 504 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate 505 /// InterfaceCandidate --- period ---> InterfaceCandidate 506 /// InterfaceCandidate --- colon ---> InterfaceCandidate 507 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation 508 /// NamedModuleInterface --- Semi ---> NamedModuleInterface 509 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation 510 /// NamedModuleInterface --- Anything ---> NamedModuleInterface 511 /// 512 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad 513 /// soon since we don't support any module attributes yet. 514 class ModuleDeclSeq { 515 enum ModuleDeclState : int { 516 NotAModuleDecl, 517 FoundExport, 518 InterfaceCandidate, 519 ImplementationCandidate, 520 NamedModuleInterface, 521 NamedModuleImplementation, 522 }; 523 524 public: 525 ModuleDeclSeq() = default; 526 handleExport()527 void handleExport() { 528 if (State == NotAModuleDecl) 529 State = FoundExport; 530 else if (!isNamedModule()) 531 reset(); 532 } 533 handleModule()534 void handleModule() { 535 if (State == FoundExport) 536 State = InterfaceCandidate; 537 else if (State == NotAModuleDecl) 538 State = ImplementationCandidate; 539 else if (!isNamedModule()) 540 reset(); 541 } 542 handleIdentifier(IdentifierInfo * Identifier)543 void handleIdentifier(IdentifierInfo *Identifier) { 544 if (isModuleCandidate() && Identifier) 545 Name += Identifier->getName().str(); 546 else if (!isNamedModule()) 547 reset(); 548 } 549 handleColon()550 void handleColon() { 551 if (isModuleCandidate()) 552 Name += ":"; 553 else if (!isNamedModule()) 554 reset(); 555 } 556 handlePeriod()557 void handlePeriod() { 558 if (isModuleCandidate()) 559 Name += "."; 560 else if (!isNamedModule()) 561 reset(); 562 } 563 handleSemi()564 void handleSemi() { 565 if (!Name.empty() && isModuleCandidate()) { 566 if (State == InterfaceCandidate) 567 State = NamedModuleInterface; 568 else if (State == ImplementationCandidate) 569 State = NamedModuleImplementation; 570 else 571 llvm_unreachable("Unimaged ModuleDeclState."); 572 } else if (!isNamedModule()) 573 reset(); 574 } 575 handleMisc()576 void handleMisc() { 577 if (!isNamedModule()) 578 reset(); 579 } 580 isModuleCandidate()581 bool isModuleCandidate() const { 582 return State == InterfaceCandidate || State == ImplementationCandidate; 583 } 584 isNamedModule()585 bool isNamedModule() const { 586 return State == NamedModuleInterface || 587 State == NamedModuleImplementation; 588 } 589 isNamedInterface()590 bool isNamedInterface() const { return State == NamedModuleInterface; } 591 isImplementationUnit()592 bool isImplementationUnit() const { 593 return State == NamedModuleImplementation && !getName().contains(':'); 594 } 595 getName()596 StringRef getName() const { 597 assert(isNamedModule() && "Can't get name from a non named module"); 598 return Name; 599 } 600 getPrimaryName()601 StringRef getPrimaryName() const { 602 assert(isNamedModule() && "Can't get name from a non named module"); 603 return getName().split(':').first; 604 } 605 reset()606 void reset() { 607 Name.clear(); 608 State = NotAModuleDecl; 609 } 610 611 private: 612 ModuleDeclState State = NotAModuleDecl; 613 std::string Name; 614 }; 615 616 ModuleDeclSeq ModuleDeclState; 617 618 /// Whether the module import expects an identifier next. Otherwise, 619 /// it expects a '.' or ';'. 620 bool ModuleImportExpectsIdentifier = false; 621 622 /// The identifier and source location of the currently-active 623 /// \#pragma clang arc_cf_code_audited begin. 624 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 625 626 /// The source location of the currently-active 627 /// \#pragma clang assume_nonnull begin. 628 SourceLocation PragmaAssumeNonNullLoc; 629 630 /// Set only for preambles which end with an active 631 /// \#pragma clang assume_nonnull begin. 632 /// 633 /// When the preamble is loaded into the main file, 634 /// `PragmaAssumeNonNullLoc` will be set to this to 635 /// replay the unterminated assume_nonnull. 636 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc; 637 638 /// True if we hit the code-completion point. 639 bool CodeCompletionReached = false; 640 641 /// The code completion token containing the information 642 /// on the stem that is to be code completed. 643 IdentifierInfo *CodeCompletionII = nullptr; 644 645 /// Range for the code completion token. 646 SourceRange CodeCompletionTokenRange; 647 648 /// The directory that the main file should be considered to occupy, 649 /// if it does not correspond to a real file (as happens when building a 650 /// module). 651 OptionalDirectoryEntryRef MainFileDir; 652 653 /// The number of bytes that we will initially skip when entering the 654 /// main file, along with a flag that indicates whether skipping this number 655 /// of bytes will place the lexer at the start of a line. 656 /// 657 /// This is used when loading a precompiled preamble. 658 std::pair<int, bool> SkipMainFilePreamble; 659 660 /// Whether we hit an error due to reaching max allowed include depth. Allows 661 /// to avoid hitting the same error over and over again. 662 bool HasReachedMaxIncludeDepth = false; 663 664 /// The number of currently-active calls to Lex. 665 /// 666 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 667 /// require asking for multiple additional tokens. This counter makes it 668 /// possible for Lex to detect whether it's producing a token for the end 669 /// of phase 4 of translation or for some other situation. 670 unsigned LexLevel = 0; 671 672 /// The number of (LexLevel 0) preprocessor tokens. 673 unsigned TokenCount = 0; 674 675 /// Preprocess every token regardless of LexLevel. 676 bool PreprocessToken = false; 677 678 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 679 /// warning, or zero for unlimited. 680 unsigned MaxTokens = 0; 681 SourceLocation MaxTokensOverrideLoc; 682 683 public: 684 struct PreambleSkipInfo { 685 SourceLocation HashTokenLoc; 686 SourceLocation IfTokenLoc; 687 bool FoundNonSkipPortion; 688 bool FoundElse; 689 SourceLocation ElseLoc; 690 PreambleSkipInfoPreambleSkipInfo691 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 692 bool FoundNonSkipPortion, bool FoundElse, 693 SourceLocation ElseLoc) 694 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 695 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 696 ElseLoc(ElseLoc) {} 697 }; 698 699 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>; 700 701 private: 702 friend class ASTReader; 703 friend class MacroArgs; 704 705 class PreambleConditionalStackStore { 706 enum State { 707 Off = 0, 708 Recording = 1, 709 Replaying = 2, 710 }; 711 712 public: 713 PreambleConditionalStackStore() = default; 714 startRecording()715 void startRecording() { ConditionalStackState = Recording; } startReplaying()716 void startReplaying() { ConditionalStackState = Replaying; } isRecording()717 bool isRecording() const { return ConditionalStackState == Recording; } isReplaying()718 bool isReplaying() const { return ConditionalStackState == Replaying; } 719 getStack()720 ArrayRef<PPConditionalInfo> getStack() const { 721 return ConditionalStack; 722 } 723 doneReplaying()724 void doneReplaying() { 725 ConditionalStack.clear(); 726 ConditionalStackState = Off; 727 } 728 setStack(ArrayRef<PPConditionalInfo> s)729 void setStack(ArrayRef<PPConditionalInfo> s) { 730 if (!isRecording() && !isReplaying()) 731 return; 732 ConditionalStack.clear(); 733 ConditionalStack.append(s.begin(), s.end()); 734 } 735 hasRecordedPreamble()736 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 737 reachedEOFWhileSkipping()738 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); } 739 clearSkipInfo()740 void clearSkipInfo() { SkipInfo.reset(); } 741 742 std::optional<PreambleSkipInfo> SkipInfo; 743 744 private: 745 SmallVector<PPConditionalInfo, 4> ConditionalStack; 746 State ConditionalStackState = Off; 747 } PreambleConditionalStack; 748 749 /// The current top of the stack that we're lexing from if 750 /// not expanding a macro and we are lexing directly from source code. 751 /// 752 /// Only one of CurLexer, or CurTokenLexer will be non-null. 753 std::unique_ptr<Lexer> CurLexer; 754 755 /// The current top of the stack that we're lexing from 756 /// if not expanding a macro. 757 /// 758 /// This is an alias for CurLexer. 759 PreprocessorLexer *CurPPLexer = nullptr; 760 761 /// Used to find the current FileEntry, if CurLexer is non-null 762 /// and if applicable. 763 /// 764 /// This allows us to implement \#include_next and find directory-specific 765 /// properties. 766 ConstSearchDirIterator CurDirLookup = nullptr; 767 768 /// The current macro we are expanding, if we are expanding a macro. 769 /// 770 /// One of CurLexer and CurTokenLexer must be null. 771 std::unique_ptr<TokenLexer> CurTokenLexer; 772 773 /// The kind of lexer we're currently working with. 774 typedef bool (*LexerCallback)(Preprocessor &, Token &); 775 LexerCallback CurLexerCallback = &CLK_Lexer; 776 777 /// If the current lexer is for a submodule that is being built, this 778 /// is that submodule. 779 Module *CurLexerSubmodule = nullptr; 780 781 /// Keeps track of the stack of files currently 782 /// \#included, and macros currently being expanded from, not counting 783 /// CurLexer/CurTokenLexer. 784 struct IncludeStackInfo { 785 LexerCallback CurLexerCallback; 786 Module *TheSubmodule; 787 std::unique_ptr<Lexer> TheLexer; 788 PreprocessorLexer *ThePPLexer; 789 std::unique_ptr<TokenLexer> TheTokenLexer; 790 ConstSearchDirIterator TheDirLookup; 791 792 // The following constructors are completely useless copies of the default 793 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo794 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule, 795 std::unique_ptr<Lexer> &&TheLexer, 796 PreprocessorLexer *ThePPLexer, 797 std::unique_ptr<TokenLexer> &&TheTokenLexer, 798 ConstSearchDirIterator TheDirLookup) 799 : CurLexerCallback(std::move(CurLexerCallback)), 800 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 801 ThePPLexer(std::move(ThePPLexer)), 802 TheTokenLexer(std::move(TheTokenLexer)), 803 TheDirLookup(std::move(TheDirLookup)) {} 804 }; 805 std::vector<IncludeStackInfo> IncludeMacroStack; 806 807 /// Actions invoked when some preprocessor activity is 808 /// encountered (e.g. a file is \#included, etc). 809 std::unique_ptr<PPCallbacks> Callbacks; 810 811 struct MacroExpandsInfo { 812 Token Tok; 813 MacroDefinition MD; 814 SourceRange Range; 815 MacroExpandsInfoMacroExpandsInfo816 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 817 : Tok(Tok), MD(MD), Range(Range) {} 818 }; 819 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 820 821 /// Information about a name that has been used to define a module macro. 822 struct ModuleMacroInfo { 823 /// The most recent macro directive for this identifier. 824 MacroDirective *MD; 825 826 /// The active module macros for this identifier. 827 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 828 829 /// The generation number at which we last updated ActiveModuleMacros. 830 /// \see Preprocessor::VisibleModules. 831 unsigned ActiveModuleMacrosGeneration = 0; 832 833 /// Whether this macro name is ambiguous. 834 bool IsAmbiguous = false; 835 836 /// The module macros that are overridden by this macro. 837 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 838 ModuleMacroInfoModuleMacroInfo839 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 840 }; 841 842 /// The state of a macro for an identifier. 843 class MacroState { 844 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 845 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)846 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 847 const IdentifierInfo *II) const { 848 if (II->isOutOfDate()) 849 PP.updateOutOfDateIdentifier(*II); 850 // FIXME: Find a spare bit on IdentifierInfo and store a 851 // HasModuleMacros flag. 852 if (!II->hasMacroDefinition() || 853 (!PP.getLangOpts().Modules && 854 !PP.getLangOpts().ModulesLocalVisibility) || 855 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 856 return nullptr; 857 858 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 859 if (!Info) { 860 Info = new (PP.getPreprocessorAllocator()) 861 ModuleMacroInfo(State.get<MacroDirective *>()); 862 State = Info; 863 } 864 865 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 866 Info->ActiveModuleMacrosGeneration) 867 PP.updateModuleMacroInfo(II, *Info); 868 return Info; 869 } 870 871 public: MacroState()872 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)873 MacroState(MacroDirective *MD) : State(MD) {} 874 MacroState(MacroState && O)875 MacroState(MacroState &&O) noexcept : State(O.State) { 876 O.State = (MacroDirective *)nullptr; 877 } 878 879 MacroState &operator=(MacroState &&O) noexcept { 880 auto S = O.State; 881 O.State = (MacroDirective *)nullptr; 882 State = S; 883 return *this; 884 } 885 ~MacroState()886 ~MacroState() { 887 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 888 Info->~ModuleMacroInfo(); 889 } 890 getLatest()891 MacroDirective *getLatest() const { 892 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 893 return Info->MD; 894 return State.get<MacroDirective*>(); 895 } 896 setLatest(MacroDirective * MD)897 void setLatest(MacroDirective *MD) { 898 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 899 Info->MD = MD; 900 else 901 State = MD; 902 } 903 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)904 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 905 auto *Info = getModuleInfo(PP, II); 906 return Info ? Info->IsAmbiguous : false; 907 } 908 909 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)910 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 911 if (auto *Info = getModuleInfo(PP, II)) 912 return Info->ActiveModuleMacros; 913 return std::nullopt; 914 } 915 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)916 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 917 SourceManager &SourceMgr) const { 918 // FIXME: Incorporate module macros into the result of this. 919 if (auto *Latest = getLatest()) 920 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 921 return {}; 922 } 923 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)924 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 925 if (auto *Info = getModuleInfo(PP, II)) { 926 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 927 Info->ActiveModuleMacros.begin(), 928 Info->ActiveModuleMacros.end()); 929 Info->ActiveModuleMacros.clear(); 930 Info->IsAmbiguous = false; 931 } 932 } 933 getOverriddenMacros()934 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 935 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 936 return Info->OverriddenMacros; 937 return std::nullopt; 938 } 939 setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)940 void setOverriddenMacros(Preprocessor &PP, 941 ArrayRef<ModuleMacro *> Overrides) { 942 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 943 if (!Info) { 944 if (Overrides.empty()) 945 return; 946 Info = new (PP.getPreprocessorAllocator()) 947 ModuleMacroInfo(State.get<MacroDirective *>()); 948 State = Info; 949 } 950 Info->OverriddenMacros.clear(); 951 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 952 Overrides.begin(), Overrides.end()); 953 Info->ActiveModuleMacrosGeneration = 0; 954 } 955 }; 956 957 /// For each IdentifierInfo that was associated with a macro, we 958 /// keep a mapping to the history of all macro definitions and #undefs in 959 /// the reverse order (the latest one is in the head of the list). 960 /// 961 /// This mapping lives within the \p CurSubmoduleState. 962 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 963 964 struct SubmoduleState; 965 966 /// Information about a submodule that we're currently building. 967 struct BuildingSubmoduleInfo { 968 /// The module that we are building. 969 Module *M; 970 971 /// The location at which the module was included. 972 SourceLocation ImportLoc; 973 974 /// Whether we entered this submodule via a pragma. 975 bool IsPragma; 976 977 /// The previous SubmoduleState. 978 SubmoduleState *OuterSubmoduleState; 979 980 /// The number of pending module macro names when we started building this. 981 unsigned OuterPendingModuleMacroNames; 982 BuildingSubmoduleInfoBuildingSubmoduleInfo983 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 984 SubmoduleState *OuterSubmoduleState, 985 unsigned OuterPendingModuleMacroNames) 986 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 987 OuterSubmoduleState(OuterSubmoduleState), 988 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 989 }; 990 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 991 992 /// Information about a submodule's preprocessor state. 993 struct SubmoduleState { 994 /// The macros for the submodule. 995 MacroMap Macros; 996 997 /// The set of modules that are visible within the submodule. 998 VisibleModuleSet VisibleModules; 999 1000 // FIXME: CounterValue? 1001 // FIXME: PragmaPushMacroInfo? 1002 }; 1003 std::map<Module *, SubmoduleState> Submodules; 1004 1005 /// The preprocessor state for preprocessing outside of any submodule. 1006 SubmoduleState NullSubmoduleState; 1007 1008 /// The current submodule state. Will be \p NullSubmoduleState if we're not 1009 /// in a submodule. 1010 SubmoduleState *CurSubmoduleState; 1011 1012 /// The files that have been included. 1013 IncludedFilesSet IncludedFiles; 1014 1015 /// The set of top-level modules that affected preprocessing, but were not 1016 /// imported. 1017 llvm::SmallSetVector<Module *, 2> AffectingClangModules; 1018 1019 /// The set of known macros exported from modules. 1020 llvm::FoldingSet<ModuleMacro> ModuleMacros; 1021 1022 /// The names of potential module macros that we've not yet processed. 1023 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames; 1024 1025 /// The list of module macros, for each identifier, that are not overridden by 1026 /// any other module macro. 1027 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 1028 LeafModuleMacros; 1029 1030 /// Macros that we want to warn because they are not used at the end 1031 /// of the translation unit. 1032 /// 1033 /// We store just their SourceLocations instead of 1034 /// something like MacroInfo*. The benefit of this is that when we are 1035 /// deserializing from PCH, we don't need to deserialize identifier & macros 1036 /// just so that we can report that they are unused, we just warn using 1037 /// the SourceLocations of this set (that will be filled by the ASTReader). 1038 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>; 1039 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 1040 1041 /// This is a pair of an optional message and source location used for pragmas 1042 /// that annotate macros like pragma clang restrict_expansion and pragma clang 1043 /// deprecated. This pair stores the optional message and the location of the 1044 /// annotation pragma for use producing diagnostics and notes. 1045 using MsgLocationPair = std::pair<std::string, SourceLocation>; 1046 1047 struct MacroAnnotationInfo { 1048 SourceLocation Location; 1049 std::string Message; 1050 }; 1051 1052 struct MacroAnnotations { 1053 std::optional<MacroAnnotationInfo> DeprecationInfo; 1054 std::optional<MacroAnnotationInfo> RestrictExpansionInfo; 1055 std::optional<SourceLocation> FinalAnnotationLoc; 1056 makeDeprecationMacroAnnotations1057 static MacroAnnotations makeDeprecation(SourceLocation Loc, 1058 std::string Msg) { 1059 return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)}, 1060 std::nullopt, std::nullopt}; 1061 } 1062 makeRestrictExpansionMacroAnnotations1063 static MacroAnnotations makeRestrictExpansion(SourceLocation Loc, 1064 std::string Msg) { 1065 return MacroAnnotations{ 1066 std::nullopt, MacroAnnotationInfo{Loc, std::move(Msg)}, std::nullopt}; 1067 } 1068 makeFinalMacroAnnotations1069 static MacroAnnotations makeFinal(SourceLocation Loc) { 1070 return MacroAnnotations{std::nullopt, std::nullopt, Loc}; 1071 } 1072 }; 1073 1074 /// Warning information for macro annotations. 1075 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos; 1076 1077 /// A "freelist" of MacroArg objects that can be 1078 /// reused for quick allocation. 1079 MacroArgs *MacroArgCache = nullptr; 1080 1081 /// For each IdentifierInfo used in a \#pragma push_macro directive, 1082 /// we keep a MacroInfo stack used to restore the previous macro value. 1083 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 1084 PragmaPushMacroInfo; 1085 1086 // Various statistics we track for performance analysis. 1087 unsigned NumDirectives = 0; 1088 unsigned NumDefined = 0; 1089 unsigned NumUndefined = 0; 1090 unsigned NumPragma = 0; 1091 unsigned NumIf = 0; 1092 unsigned NumElse = 0; 1093 unsigned NumEndif = 0; 1094 unsigned NumEnteredSourceFiles = 0; 1095 unsigned MaxIncludeStackDepth = 0; 1096 unsigned NumMacroExpanded = 0; 1097 unsigned NumFnMacroExpanded = 0; 1098 unsigned NumBuiltinMacroExpanded = 0; 1099 unsigned NumFastMacroExpanded = 0; 1100 unsigned NumTokenPaste = 0; 1101 unsigned NumFastTokenPaste = 0; 1102 unsigned NumSkipped = 0; 1103 1104 /// The predefined macros that preprocessor should use from the 1105 /// command line etc. 1106 std::string Predefines; 1107 1108 /// The file ID for the preprocessor predefines. 1109 FileID PredefinesFileID; 1110 1111 /// The file ID for the PCH through header. 1112 FileID PCHThroughHeaderFileID; 1113 1114 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 1115 bool SkippingUntilPragmaHdrStop = false; 1116 1117 /// Whether tokens are being skipped until the through header is seen. 1118 bool SkippingUntilPCHThroughHeader = false; 1119 1120 /// \{ 1121 /// Cache of macro expanders to reduce malloc traffic. 1122 enum { TokenLexerCacheSize = 8 }; 1123 unsigned NumCachedTokenLexers; 1124 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 1125 /// \} 1126 1127 /// Keeps macro expanded tokens for TokenLexers. 1128 // 1129 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1130 /// going to lex in the cache and when it finishes the tokens are removed 1131 /// from the end of the cache. 1132 SmallVector<Token, 16> MacroExpandedTokens; 1133 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 1134 1135 /// A record of the macro definitions and expansions that 1136 /// occurred during preprocessing. 1137 /// 1138 /// This is an optional side structure that can be enabled with 1139 /// \c createPreprocessingRecord() prior to preprocessing. 1140 PreprocessingRecord *Record = nullptr; 1141 1142 /// Cached tokens state. 1143 using CachedTokensTy = SmallVector<Token, 1>; 1144 1145 /// Cached tokens are stored here when we do backtracking or 1146 /// lookahead. They are "lexed" by the CachingLex() method. 1147 CachedTokensTy CachedTokens; 1148 1149 /// The position of the cached token that CachingLex() should 1150 /// "lex" next. 1151 /// 1152 /// If it points beyond the CachedTokens vector, it means that a normal 1153 /// Lex() should be invoked. 1154 CachedTokensTy::size_type CachedLexPos = 0; 1155 1156 /// Stack of backtrack positions, allowing nested backtracks. 1157 /// 1158 /// The EnableBacktrackAtThisPos() method pushes a position to 1159 /// indicate where CachedLexPos should be set when the BackTrack() method is 1160 /// invoked (at which point the last position is popped). 1161 std::vector<CachedTokensTy::size_type> BacktrackPositions; 1162 1163 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. 1164 /// This is used to guard against calling this function recursively. 1165 /// 1166 /// See comments at the use-site for more context about why it is needed. 1167 bool SkippingExcludedConditionalBlock = false; 1168 1169 /// Keeps track of skipped range mappings that were recorded while skipping 1170 /// excluded conditional directives. It maps the source buffer pointer at 1171 /// the beginning of a skipped block, to the number of bytes that should be 1172 /// skipped. 1173 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges; 1174 1175 void updateOutOfDateIdentifier(const IdentifierInfo &II) const; 1176 1177 public: 1178 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 1179 DiagnosticsEngine &diags, const LangOptions &LangOpts, 1180 SourceManager &SM, HeaderSearch &Headers, 1181 ModuleLoader &TheModuleLoader, 1182 IdentifierInfoLookup *IILookup = nullptr, 1183 bool OwnsHeaderSearch = false, 1184 TranslationUnitKind TUKind = TU_Complete); 1185 1186 ~Preprocessor(); 1187 1188 /// Initialize the preprocessor using information about the target. 1189 /// 1190 /// \param Target is owned by the caller and must remain valid for the 1191 /// lifetime of the preprocessor. 1192 /// \param AuxTarget is owned by the caller and must remain valid for 1193 /// the lifetime of the preprocessor. 1194 void Initialize(const TargetInfo &Target, 1195 const TargetInfo *AuxTarget = nullptr); 1196 1197 /// Initialize the preprocessor to parse a model file 1198 /// 1199 /// To parse model files the preprocessor of the original source is reused to 1200 /// preserver the identifier table. However to avoid some duplicate 1201 /// information in the preprocessor some cleanup is needed before it is used 1202 /// to parse model files. This method does that cleanup. 1203 void InitializeForModelFile(); 1204 1205 /// Cleanup after model file parsing 1206 void FinalizeForModelFile(); 1207 1208 /// Retrieve the preprocessor options used to initialize this 1209 /// preprocessor. getPreprocessorOpts()1210 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 1211 getDiagnostics()1212 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)1213 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 1214 getLangOpts()1215 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()1216 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()1217 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()1218 FileManager &getFileManager() const { return FileMgr; } getSourceManager()1219 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()1220 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 1221 getIdentifierTable()1222 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()1223 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()1224 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()1225 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } getPreprocessorAllocator()1226 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 1227 setExternalSource(ExternalPreprocessorSource * Source)1228 void setExternalSource(ExternalPreprocessorSource *Source) { 1229 ExternalSource = Source; 1230 } 1231 getExternalSource()1232 ExternalPreprocessorSource *getExternalSource() const { 1233 return ExternalSource; 1234 } 1235 1236 /// Retrieve the module loader associated with this preprocessor. getModuleLoader()1237 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 1238 hadModuleLoaderFatalFailure()1239 bool hadModuleLoaderFatalFailure() const { 1240 return TheModuleLoader.HadFatalFailure; 1241 } 1242 1243 /// Retrieve the number of Directives that have been processed by the 1244 /// Preprocessor. getNumDirectives()1245 unsigned getNumDirectives() const { 1246 return NumDirectives; 1247 } 1248 1249 /// True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()1250 bool isParsingIfOrElifDirective() const { 1251 return ParsingIfOrElifDirective; 1252 } 1253 1254 /// Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)1255 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 1256 this->KeepComments = KeepComments | KeepMacroComments; 1257 this->KeepMacroComments = KeepMacroComments; 1258 } 1259 getCommentRetentionState()1260 bool getCommentRetentionState() const { return KeepComments; } 1261 setPragmasEnabled(bool Enabled)1262 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()1263 bool getPragmasEnabled() const { return PragmasEnabled; } 1264 SetSuppressIncludeNotFoundError(bool Suppress)1265 void SetSuppressIncludeNotFoundError(bool Suppress) { 1266 SuppressIncludeNotFoundError = Suppress; 1267 } 1268 GetSuppressIncludeNotFoundError()1269 bool GetSuppressIncludeNotFoundError() { 1270 return SuppressIncludeNotFoundError; 1271 } 1272 1273 /// Sets whether the preprocessor is responsible for producing output or if 1274 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)1275 void setPreprocessedOutput(bool IsPreprocessedOutput) { 1276 PreprocessedOutput = IsPreprocessedOutput; 1277 } 1278 1279 /// Returns true if the preprocessor is responsible for generating output, 1280 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()1281 bool isPreprocessedOutput() const { return PreprocessedOutput; } 1282 1283 /// Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)1284 bool isCurrentLexer(const PreprocessorLexer *L) const { 1285 return CurPPLexer == L; 1286 } 1287 1288 /// Return the current lexer being lexed from. 1289 /// 1290 /// Note that this ignores any potentially active macro expansions and _Pragma 1291 /// expansions going on at the time. getCurrentLexer()1292 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1293 1294 /// Return the current file lexer being lexed from. 1295 /// 1296 /// Note that this ignores any potentially active macro expansions and _Pragma 1297 /// expansions going on at the time. 1298 PreprocessorLexer *getCurrentFileLexer() const; 1299 1300 /// Return the submodule owning the file being lexed. This may not be 1301 /// the current module if we have changed modules since entering the file. getCurrentLexerSubmodule()1302 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1303 1304 /// Returns the FileID for the preprocessor predefines. getPredefinesFileID()1305 FileID getPredefinesFileID() const { return PredefinesFileID; } 1306 1307 /// \{ 1308 /// Accessors for preprocessor callbacks. 1309 /// 1310 /// Note that this class takes ownership of any PPCallbacks object given to 1311 /// it. getPPCallbacks()1312 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)1313 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1314 if (Callbacks) 1315 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1316 std::move(Callbacks)); 1317 Callbacks = std::move(C); 1318 } 1319 /// \} 1320 1321 /// Get the number of tokens processed so far. getTokenCount()1322 unsigned getTokenCount() const { return TokenCount; } 1323 1324 /// Get the max number of tokens before issuing a -Wmax-tokens warning. getMaxTokens()1325 unsigned getMaxTokens() const { return MaxTokens; } 1326 overrideMaxTokens(unsigned Value,SourceLocation Loc)1327 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1328 MaxTokens = Value; 1329 MaxTokensOverrideLoc = Loc; 1330 }; 1331 getMaxTokensOverrideLoc()1332 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1333 1334 /// Register a function that would be called on each token in the final 1335 /// expanded token stream. 1336 /// This also reports annotation tokens produced by the parser. setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1337 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1338 OnToken = std::move(F); 1339 } 1340 setPreprocessToken(bool Preprocess)1341 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1342 isMacroDefined(StringRef Id)1343 bool isMacroDefined(StringRef Id) { 1344 return isMacroDefined(&Identifiers.get(Id)); 1345 } isMacroDefined(const IdentifierInfo * II)1346 bool isMacroDefined(const IdentifierInfo *II) { 1347 return II->hasMacroDefinition() && 1348 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1349 } 1350 1351 /// Determine whether II is defined as a macro within the module M, 1352 /// if that is a module that we've already preprocessed. Does not check for 1353 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1354 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1355 if (!II->hasMacroDefinition()) 1356 return false; 1357 auto I = Submodules.find(M); 1358 if (I == Submodules.end()) 1359 return false; 1360 auto J = I->second.Macros.find(II); 1361 if (J == I->second.Macros.end()) 1362 return false; 1363 auto *MD = J->second.getLatest(); 1364 return MD && MD->isDefined(); 1365 } 1366 getMacroDefinition(const IdentifierInfo * II)1367 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1368 if (!II->hasMacroDefinition()) 1369 return {}; 1370 1371 MacroState &S = CurSubmoduleState->Macros[II]; 1372 auto *MD = S.getLatest(); 1373 while (isa_and_nonnull<VisibilityMacroDirective>(MD)) 1374 MD = MD->getPrevious(); 1375 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1376 S.getActiveModuleMacros(*this, II), 1377 S.isAmbiguous(*this, II)); 1378 } 1379 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1380 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1381 SourceLocation Loc) { 1382 if (!II->hadMacroDefinition()) 1383 return {}; 1384 1385 MacroState &S = CurSubmoduleState->Macros[II]; 1386 MacroDirective::DefInfo DI; 1387 if (auto *MD = S.getLatest()) 1388 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1389 // FIXME: Compute the set of active module macros at the specified location. 1390 return MacroDefinition(DI.getDirective(), 1391 S.getActiveModuleMacros(*this, II), 1392 S.isAmbiguous(*this, II)); 1393 } 1394 1395 /// Given an identifier, return its latest non-imported MacroDirective 1396 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)1397 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1398 if (!II->hasMacroDefinition()) 1399 return nullptr; 1400 1401 auto *MD = getLocalMacroDirectiveHistory(II); 1402 if (!MD || MD->getDefinition().isUndefined()) 1403 return nullptr; 1404 1405 return MD; 1406 } 1407 getMacroInfo(const IdentifierInfo * II)1408 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1409 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1410 } 1411 getMacroInfo(const IdentifierInfo * II)1412 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1413 if (!II->hasMacroDefinition()) 1414 return nullptr; 1415 if (auto MD = getMacroDefinition(II)) 1416 return MD.getMacroInfo(); 1417 return nullptr; 1418 } 1419 1420 /// Given an identifier, return the latest non-imported macro 1421 /// directive for that identifier. 1422 /// 1423 /// One can iterate over all previous macro directives from the most recent 1424 /// one. 1425 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1426 1427 /// Add a directive to the macro directive history for this identifier. 1428 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1429 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1430 SourceLocation Loc) { 1431 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1432 appendMacroDirective(II, MD); 1433 return MD; 1434 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1435 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1436 MacroInfo *MI) { 1437 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1438 } 1439 1440 /// Set a MacroDirective that was loaded from a PCH file. 1441 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1442 MacroDirective *MD); 1443 1444 /// Register an exported macro for a module and identifier. 1445 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, 1446 MacroInfo *Macro, 1447 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1448 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1449 1450 /// Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)1451 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1452 if (II->isOutOfDate()) 1453 updateOutOfDateIdentifier(*II); 1454 auto I = LeafModuleMacros.find(II); 1455 if (I != LeafModuleMacros.end()) 1456 return I->second; 1457 return std::nullopt; 1458 } 1459 1460 /// Get the list of submodules that we're currently building. getBuildingSubmodules()1461 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1462 return BuildingSubmoduleStack; 1463 } 1464 1465 /// \{ 1466 /// Iterators for the macro history table. Currently defined macros have 1467 /// IdentifierInfo::hasMacroDefinition() set and an empty 1468 /// MacroInfo::getUndefLoc() at the head of the list. 1469 using macro_iterator = MacroMap::const_iterator; 1470 1471 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1472 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1473 1474 llvm::iterator_range<macro_iterator> 1475 macros(bool IncludeExternalMacros = true) const { 1476 macro_iterator begin = macro_begin(IncludeExternalMacros); 1477 macro_iterator end = macro_end(IncludeExternalMacros); 1478 return llvm::make_range(begin, end); 1479 } 1480 1481 /// \} 1482 1483 /// Mark the given clang module as affecting the current clang module or translation unit. markClangModuleAsAffecting(Module * M)1484 void markClangModuleAsAffecting(Module *M) { 1485 assert(M->isModuleMapModule()); 1486 if (!BuildingSubmoduleStack.empty()) { 1487 if (M != BuildingSubmoduleStack.back().M) 1488 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M); 1489 } else { 1490 AffectingClangModules.insert(M); 1491 } 1492 } 1493 1494 /// Get the set of top-level clang modules that affected preprocessing, but were not 1495 /// imported. getAffectingClangModules()1496 const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const { 1497 return AffectingClangModules; 1498 } 1499 1500 /// Mark the file as included. 1501 /// Returns true if this is the first time the file was included. markIncluded(FileEntryRef File)1502 bool markIncluded(FileEntryRef File) { 1503 HeaderInfo.getFileInfo(File); 1504 return IncludedFiles.insert(File).second; 1505 } 1506 1507 /// Return true if this header has already been included. alreadyIncluded(FileEntryRef File)1508 bool alreadyIncluded(FileEntryRef File) const { 1509 HeaderInfo.getFileInfo(File); 1510 return IncludedFiles.count(File); 1511 } 1512 1513 /// Get the set of included files. getIncludedFiles()1514 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } getIncludedFiles()1515 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } 1516 1517 /// Return the name of the macro defined before \p Loc that has 1518 /// spelling \p Tokens. If there are multiple macros with same spelling, 1519 /// return the last one defined. 1520 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1521 ArrayRef<TokenValue> Tokens) const; 1522 1523 /// Get the predefines for this processor. 1524 /// Used by some third-party tools to inspect and add predefines (see 1525 /// https://github.com/llvm/llvm-project/issues/57483). getPredefines()1526 const std::string &getPredefines() const { return Predefines; } 1527 1528 /// Set the predefines for this Preprocessor. 1529 /// 1530 /// These predefines are automatically injected when parsing the main file. setPredefines(std::string P)1531 void setPredefines(std::string P) { Predefines = std::move(P); } 1532 1533 /// Return information about the specified preprocessor 1534 /// identifier token. getIdentifierInfo(StringRef Name)1535 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1536 return &Identifiers.get(Name); 1537 } 1538 1539 /// Add the specified pragma handler to this preprocessor. 1540 /// 1541 /// If \p Namespace is non-null, then it is a token required to exist on the 1542 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1543 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)1544 void AddPragmaHandler(PragmaHandler *Handler) { 1545 AddPragmaHandler(StringRef(), Handler); 1546 } 1547 1548 /// Remove the specific pragma handler from this preprocessor. 1549 /// 1550 /// If \p Namespace is non-null, then it should be the namespace that 1551 /// \p Handler was added to. It is an error to remove a handler that 1552 /// has not been registered. 1553 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)1554 void RemovePragmaHandler(PragmaHandler *Handler) { 1555 RemovePragmaHandler(StringRef(), Handler); 1556 } 1557 1558 /// Install empty handlers for all pragmas (making them ignored). 1559 void IgnorePragmas(); 1560 1561 /// Set empty line handler. setEmptylineHandler(EmptylineHandler * Handler)1562 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1563 getEmptylineHandler()1564 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1565 1566 /// Add the specified comment handler to the preprocessor. 1567 void addCommentHandler(CommentHandler *Handler); 1568 1569 /// Remove the specified comment handler. 1570 /// 1571 /// It is an error to remove a handler that has not been registered. 1572 void removeCommentHandler(CommentHandler *Handler); 1573 1574 /// Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)1575 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1576 CodeComplete = &Handler; 1577 } 1578 1579 /// Retrieve the current code-completion handler. getCodeCompletionHandler()1580 CodeCompletionHandler *getCodeCompletionHandler() const { 1581 return CodeComplete; 1582 } 1583 1584 /// Clear out the code completion handler. clearCodeCompletionHandler()1585 void clearCodeCompletionHandler() { 1586 CodeComplete = nullptr; 1587 } 1588 1589 /// Hook used by the lexer to invoke the "included file" code 1590 /// completion point. 1591 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1592 1593 /// Hook used by the lexer to invoke the "natural language" code 1594 /// completion point. 1595 void CodeCompleteNaturalLanguage(); 1596 1597 /// Set the code completion token for filtering purposes. setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1598 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1599 CodeCompletionII = Filter; 1600 } 1601 1602 /// Set the code completion token range for detecting replacement range later 1603 /// on. setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1604 void setCodeCompletionTokenRange(const SourceLocation Start, 1605 const SourceLocation End) { 1606 CodeCompletionTokenRange = {Start, End}; 1607 } getCodeCompletionTokenRange()1608 SourceRange getCodeCompletionTokenRange() const { 1609 return CodeCompletionTokenRange; 1610 } 1611 1612 /// Get the code completion token for filtering purposes. getCodeCompletionFilter()1613 StringRef getCodeCompletionFilter() { 1614 if (CodeCompletionII) 1615 return CodeCompletionII->getName(); 1616 return {}; 1617 } 1618 1619 /// Retrieve the preprocessing record, or NULL if there is no 1620 /// preprocessing record. getPreprocessingRecord()1621 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1622 1623 /// Create a new preprocessing record, which will keep track of 1624 /// all macro expansions, macro definitions, etc. 1625 void createPreprocessingRecord(); 1626 1627 /// Returns true if the FileEntry is the PCH through header. 1628 bool isPCHThroughHeader(const FileEntry *FE); 1629 1630 /// True if creating a PCH with a through header. 1631 bool creatingPCHWithThroughHeader(); 1632 1633 /// True if using a PCH with a through header. 1634 bool usingPCHWithThroughHeader(); 1635 1636 /// True if creating a PCH with a #pragma hdrstop. 1637 bool creatingPCHWithPragmaHdrStop(); 1638 1639 /// True if using a PCH with a #pragma hdrstop. 1640 bool usingPCHWithPragmaHdrStop(); 1641 1642 /// Skip tokens until after the #include of the through header or 1643 /// until after a #pragma hdrstop. 1644 void SkipTokensWhileUsingPCH(); 1645 1646 /// Process directives while skipping until the through header or 1647 /// #pragma hdrstop is found. 1648 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1649 SourceLocation HashLoc); 1650 1651 /// Enter the specified FileID as the main source file, 1652 /// which implicitly adds the builtin defines etc. 1653 void EnterMainSourceFile(); 1654 1655 /// Inform the preprocessor callbacks that processing is complete. 1656 void EndSourceFile(); 1657 1658 /// Add a source file to the top of the include stack and 1659 /// start lexing tokens from it instead of the current buffer. 1660 /// 1661 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1662 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, 1663 SourceLocation Loc, bool IsFirstIncludeOfFile = true); 1664 1665 /// Add a Macro to the top of the include stack and start lexing 1666 /// tokens from it instead of the current buffer. 1667 /// 1668 /// \param Args specifies the tokens input to a function-like macro. 1669 /// \param ILEnd specifies the location of the ')' for a function-like macro 1670 /// or the identifier for an object-like macro. 1671 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1672 MacroArgs *Args); 1673 1674 private: 1675 /// Add a "macro" context to the top of the include stack, 1676 /// which will cause the lexer to start returning the specified tokens. 1677 /// 1678 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1679 /// will not be subject to further macro expansion. Otherwise, these tokens 1680 /// will be re-macro-expanded when/if expansion is enabled. 1681 /// 1682 /// If \p OwnsTokens is false, this method assumes that the specified stream 1683 /// of tokens has a permanent owner somewhere, so they do not need to be 1684 /// copied. If it is true, it assumes the array of tokens is allocated with 1685 /// \c new[] and the Preprocessor will delete[] it. 1686 /// 1687 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1688 /// set, see the flag documentation for details. 1689 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1690 bool DisableMacroExpansion, bool OwnsTokens, 1691 bool IsReinject); 1692 1693 public: EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1694 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1695 bool DisableMacroExpansion, bool IsReinject) { 1696 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1697 IsReinject); 1698 } 1699 EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1700 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1701 bool IsReinject) { 1702 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1703 IsReinject); 1704 } 1705 1706 /// Pop the current lexer/macro exp off the top of the lexer stack. 1707 /// 1708 /// This should only be used in situations where the current state of the 1709 /// top-of-stack lexer is known. 1710 void RemoveTopOfLexerStack(); 1711 1712 /// From the point that this method is called, and until 1713 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1714 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1715 /// make the Preprocessor re-lex the same tokens. 1716 /// 1717 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1718 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1719 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1720 /// 1721 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1722 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1723 /// tokens will continue indefinitely. 1724 /// 1725 void EnableBacktrackAtThisPos(); 1726 1727 /// Disable the last EnableBacktrackAtThisPos call. 1728 void CommitBacktrackedTokens(); 1729 1730 /// Make Preprocessor re-lex the tokens that were lexed since 1731 /// EnableBacktrackAtThisPos() was previously called. 1732 void Backtrack(); 1733 1734 /// True if EnableBacktrackAtThisPos() was called and 1735 /// caching of tokens is on. isBacktrackEnabled()1736 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1737 1738 /// Lex the next token for this preprocessor. 1739 void Lex(Token &Result); 1740 1741 /// Lex all tokens for this preprocessor until (and excluding) end of file. 1742 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr); 1743 1744 /// Lex a token, forming a header-name token if possible. 1745 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1746 1747 /// Lex the parameters for an #embed directive, returns nullopt on error. 1748 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, 1749 bool ForHasEmbed); 1750 1751 bool LexAfterModuleImport(Token &Result); 1752 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1753 1754 void makeModuleVisible(Module *M, SourceLocation Loc); 1755 getModuleImportLoc(Module * M)1756 SourceLocation getModuleImportLoc(Module *M) const { 1757 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1758 } 1759 1760 /// Lex a string literal, which may be the concatenation of multiple 1761 /// string literals and may even come from macro expansion. 1762 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1763 bool LexStringLiteral(Token &Result, std::string &String, 1764 const char *DiagnosticTag, bool AllowMacroExpansion) { 1765 if (AllowMacroExpansion) 1766 Lex(Result); 1767 else 1768 LexUnexpandedToken(Result); 1769 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1770 AllowMacroExpansion); 1771 } 1772 1773 /// Complete the lexing of a string literal where the first token has 1774 /// already been lexed (see LexStringLiteral). 1775 bool FinishLexStringLiteral(Token &Result, std::string &String, 1776 const char *DiagnosticTag, 1777 bool AllowMacroExpansion); 1778 1779 /// Lex a token. If it's a comment, keep lexing until we get 1780 /// something not a comment. 1781 /// 1782 /// This is useful in -E -C mode where comments would foul up preprocessor 1783 /// directive handling. LexNonComment(Token & Result)1784 void LexNonComment(Token &Result) { 1785 do 1786 Lex(Result); 1787 while (Result.getKind() == tok::comment); 1788 } 1789 1790 /// Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1791 void LexUnexpandedToken(Token &Result) { 1792 // Disable macro expansion. 1793 bool OldVal = DisableMacroExpansion; 1794 DisableMacroExpansion = true; 1795 // Lex the token. 1796 Lex(Result); 1797 1798 // Reenable it. 1799 DisableMacroExpansion = OldVal; 1800 } 1801 1802 /// Like LexNonComment, but this disables macro expansion of 1803 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1804 void LexUnexpandedNonComment(Token &Result) { 1805 do 1806 LexUnexpandedToken(Result); 1807 while (Result.getKind() == tok::comment); 1808 } 1809 1810 /// Parses a simple integer literal to get its numeric value. Floating 1811 /// point literals and user defined literals are rejected. Used primarily to 1812 /// handle pragmas that accept integer arguments. 1813 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1814 1815 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1816 void SetMacroExpansionOnlyInDirectives() { 1817 DisableMacroExpansion = true; 1818 MacroExpansionInDirectivesOverride = true; 1819 } 1820 1821 /// Peeks ahead N tokens and returns that token without consuming any 1822 /// tokens. 1823 /// 1824 /// LookAhead(0) returns the next token that would be returned by Lex(), 1825 /// LookAhead(1) returns the token after it, etc. This returns normal 1826 /// tokens after phase 5. As such, it is equivalent to using 1827 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1828 const Token &LookAhead(unsigned N) { 1829 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1830 if (CachedLexPos + N < CachedTokens.size()) 1831 return CachedTokens[CachedLexPos+N]; 1832 else 1833 return PeekAhead(N+1); 1834 } 1835 1836 /// When backtracking is enabled and tokens are cached, 1837 /// this allows to revert a specific number of tokens. 1838 /// 1839 /// Note that the number of tokens being reverted should be up to the last 1840 /// backtrack position, not more. RevertCachedTokens(unsigned N)1841 void RevertCachedTokens(unsigned N) { 1842 assert(isBacktrackEnabled() && 1843 "Should only be called when tokens are cached for backtracking"); 1844 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1845 && "Should revert tokens up to the last backtrack position, not more"); 1846 assert(signed(CachedLexPos) - signed(N) >= 0 && 1847 "Corrupted backtrack positions ?"); 1848 CachedLexPos -= N; 1849 } 1850 1851 /// Enters a token in the token stream to be lexed next. 1852 /// 1853 /// If BackTrack() is called afterwards, the token will remain at the 1854 /// insertion point. 1855 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1856 /// flag set. See the flag documentation for details. EnterToken(const Token & Tok,bool IsReinject)1857 void EnterToken(const Token &Tok, bool IsReinject) { 1858 if (LexLevel) { 1859 // It's not correct in general to enter caching lex mode while in the 1860 // middle of a nested lexing action. 1861 auto TokCopy = std::make_unique<Token[]>(1); 1862 TokCopy[0] = Tok; 1863 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1864 } else { 1865 EnterCachingLexMode(); 1866 assert(IsReinject && "new tokens in the middle of cached stream"); 1867 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1868 } 1869 } 1870 1871 /// We notify the Preprocessor that if it is caching tokens (because 1872 /// backtrack is enabled) it should replace the most recent cached tokens 1873 /// with the given annotation token. This function has no effect if 1874 /// backtracking is not enabled. 1875 /// 1876 /// Note that the use of this function is just for optimization, so that the 1877 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1878 /// invoked. AnnotateCachedTokens(const Token & Tok)1879 void AnnotateCachedTokens(const Token &Tok) { 1880 assert(Tok.isAnnotation() && "Expected annotation token"); 1881 if (CachedLexPos != 0 && isBacktrackEnabled()) 1882 AnnotatePreviousCachedTokens(Tok); 1883 } 1884 1885 /// Get the location of the last cached token, suitable for setting the end 1886 /// location of an annotation token. getLastCachedTokenLocation()1887 SourceLocation getLastCachedTokenLocation() const { 1888 assert(CachedLexPos != 0); 1889 return CachedTokens[CachedLexPos-1].getLastLoc(); 1890 } 1891 1892 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1893 /// CachedTokens. 1894 bool IsPreviousCachedToken(const Token &Tok) const; 1895 1896 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1897 /// in \p NewToks. 1898 /// 1899 /// Useful when a token needs to be split in smaller ones and CachedTokens 1900 /// most recent token must to be updated to reflect that. 1901 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1902 1903 /// Replace the last token with an annotation token. 1904 /// 1905 /// Like AnnotateCachedTokens(), this routine replaces an 1906 /// already-parsed (and resolved) token with an annotation 1907 /// token. However, this routine only replaces the last token with 1908 /// the annotation token; it does not affect any other cached 1909 /// tokens. This function has no effect if backtracking is not 1910 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1911 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1912 assert(Tok.isAnnotation() && "Expected annotation token"); 1913 if (CachedLexPos != 0 && isBacktrackEnabled()) 1914 CachedTokens[CachedLexPos-1] = Tok; 1915 } 1916 1917 /// Enter an annotation token into the token stream. 1918 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1919 void *AnnotationVal); 1920 1921 /// Determine whether it's possible for a future call to Lex to produce an 1922 /// annotation token created by a previous call to EnterAnnotationToken. mightHavePendingAnnotationTokens()1923 bool mightHavePendingAnnotationTokens() { 1924 return CurLexerCallback != CLK_Lexer; 1925 } 1926 1927 /// Update the current token to represent the provided 1928 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1929 void TypoCorrectToken(const Token &Tok) { 1930 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1931 if (CachedLexPos != 0 && isBacktrackEnabled()) 1932 CachedTokens[CachedLexPos-1] = Tok; 1933 } 1934 1935 /// Recompute the current lexer kind based on the CurLexer/ 1936 /// CurTokenLexer pointers. 1937 void recomputeCurLexerKind(); 1938 1939 /// Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1940 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1941 1942 /// Enables the incremental processing 1943 void enableIncrementalProcessing(bool value = true) { 1944 IncrementalProcessing = value; 1945 } 1946 1947 /// Specify the point at which code-completion will be performed. 1948 /// 1949 /// \param File the file in which code completion should occur. If 1950 /// this file is included multiple times, code-completion will 1951 /// perform completion the first time it is included. If NULL, this 1952 /// function clears out the code-completion point. 1953 /// 1954 /// \param Line the line at which code completion should occur 1955 /// (1-based). 1956 /// 1957 /// \param Column the column at which code completion should occur 1958 /// (1-based). 1959 /// 1960 /// \returns true if an error occurred, false otherwise. 1961 bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, 1962 unsigned Column); 1963 1964 /// Determine if we are performing code completion. isCodeCompletionEnabled()1965 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1966 1967 /// Returns the location of the code-completion point. 1968 /// 1969 /// Returns an invalid location if code-completion is not enabled or the file 1970 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()1971 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1972 1973 /// Returns the start location of the file of code-completion point. 1974 /// 1975 /// Returns an invalid location if code-completion is not enabled or the file 1976 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()1977 SourceLocation getCodeCompletionFileLoc() const { 1978 return CodeCompletionFileLoc; 1979 } 1980 1981 /// Returns true if code-completion is enabled and we have hit the 1982 /// code-completion point. isCodeCompletionReached()1983 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1984 1985 /// Note that we hit the code-completion point. setCodeCompletionReached()1986 void setCodeCompletionReached() { 1987 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1988 CodeCompletionReached = true; 1989 // Silence any diagnostics that occur after we hit the code-completion. 1990 getDiagnostics().setSuppressAllDiagnostics(true); 1991 } 1992 1993 /// The location of the currently-active \#pragma clang 1994 /// arc_cf_code_audited begin. 1995 /// 1996 /// Returns an invalid location if there is no such pragma active. 1997 std::pair<IdentifierInfo *, SourceLocation> getPragmaARCCFCodeAuditedInfo()1998 getPragmaARCCFCodeAuditedInfo() const { 1999 return PragmaARCCFCodeAuditedInfo; 2000 } 2001 2002 /// Set the location of the currently-active \#pragma clang 2003 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)2004 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 2005 SourceLocation Loc) { 2006 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 2007 } 2008 2009 /// The location of the currently-active \#pragma clang 2010 /// assume_nonnull begin. 2011 /// 2012 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()2013 SourceLocation getPragmaAssumeNonNullLoc() const { 2014 return PragmaAssumeNonNullLoc; 2015 } 2016 2017 /// Set the location of the currently-active \#pragma clang 2018 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)2019 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 2020 PragmaAssumeNonNullLoc = Loc; 2021 } 2022 2023 /// Get the location of the recorded unterminated \#pragma clang 2024 /// assume_nonnull begin in the preamble, if one exists. 2025 /// 2026 /// Returns an invalid location if the premable did not end with 2027 /// such a pragma active or if there is no recorded preamble. getPreambleRecordedPragmaAssumeNonNullLoc()2028 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const { 2029 return PreambleRecordedPragmaAssumeNonNullLoc; 2030 } 2031 2032 /// Record the location of the unterminated \#pragma clang 2033 /// assume_nonnull begin in the preamble. setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)2034 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) { 2035 PreambleRecordedPragmaAssumeNonNullLoc = Loc; 2036 } 2037 2038 /// Set the directory in which the main file should be considered 2039 /// to have been found, if it is not a real file. setMainFileDir(DirectoryEntryRef Dir)2040 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; } 2041 2042 /// Instruct the preprocessor to skip part of the main source file. 2043 /// 2044 /// \param Bytes The number of bytes in the preamble to skip. 2045 /// 2046 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 2047 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)2048 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 2049 SkipMainFilePreamble.first = Bytes; 2050 SkipMainFilePreamble.second = StartOfLine; 2051 } 2052 2053 /// Forwarding function for diagnostics. This emits a diagnostic at 2054 /// the specified Token's location, translating the token's start 2055 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)2056 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 2057 return Diags->Report(Loc, DiagID); 2058 } 2059 Diag(const Token & Tok,unsigned DiagID)2060 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 2061 return Diags->Report(Tok.getLocation(), DiagID); 2062 } 2063 2064 /// Return the 'spelling' of the token at the given 2065 /// location; does not go up to the spelling location or down to the 2066 /// expansion location. 2067 /// 2068 /// \param buffer A buffer which will be used only if the token requires 2069 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 2070 /// \param invalid If non-null, will be set \c true if an error occurs. 2071 StringRef getSpelling(SourceLocation loc, 2072 SmallVectorImpl<char> &buffer, 2073 bool *invalid = nullptr) const { 2074 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 2075 } 2076 2077 /// Return the 'spelling' of the Tok token. 2078 /// 2079 /// The spelling of a token is the characters used to represent the token in 2080 /// the source file after trigraph expansion and escaped-newline folding. In 2081 /// particular, this wants to get the true, uncanonicalized, spelling of 2082 /// things like digraphs, UCNs, etc. 2083 /// 2084 /// \param Invalid If non-null, will be set \c true if an error occurs. 2085 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 2086 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 2087 } 2088 2089 /// Get the spelling of a token into a preallocated buffer, instead 2090 /// of as an std::string. 2091 /// 2092 /// The caller is required to allocate enough space for the token, which is 2093 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 2094 /// actual result is returned. 2095 /// 2096 /// Note that this method may do two possible things: it may either fill in 2097 /// the buffer specified with characters, or it may *change the input pointer* 2098 /// to point to a constant buffer with the data already in it (avoiding a 2099 /// copy). The caller is not allowed to modify the returned buffer pointer 2100 /// if an internal buffer is returned. 2101 unsigned getSpelling(const Token &Tok, const char *&Buffer, 2102 bool *Invalid = nullptr) const { 2103 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 2104 } 2105 2106 /// Get the spelling of a token into a SmallVector. 2107 /// 2108 /// Note that the returned StringRef may not point to the 2109 /// supplied buffer if a copy can be avoided. 2110 StringRef getSpelling(const Token &Tok, 2111 SmallVectorImpl<char> &Buffer, 2112 bool *Invalid = nullptr) const; 2113 2114 /// Relex the token at the specified location. 2115 /// \returns true if there was a failure, false on success. 2116 bool getRawToken(SourceLocation Loc, Token &Result, 2117 bool IgnoreWhiteSpace = false) { 2118 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 2119 } 2120 2121 /// Given a Token \p Tok that is a numeric constant with length 1, 2122 /// return the character. 2123 char 2124 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 2125 bool *Invalid = nullptr) const { 2126 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) && 2127 Tok.getLength() == 1 && "Called on unsupported token"); 2128 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 2129 2130 // If the token is carrying a literal data pointer, just use it. 2131 if (const char *D = Tok.getLiteralData()) 2132 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0'; 2133 2134 assert(Tok.is(tok::numeric_constant) && "binary data with no data"); 2135 // Otherwise, fall back on getCharacterData, which is slower, but always 2136 // works. 2137 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0'; 2138 } 2139 2140 /// Retrieve the name of the immediate macro expansion. 2141 /// 2142 /// This routine starts from a source location, and finds the name of the 2143 /// macro responsible for its immediate expansion. It looks through any 2144 /// intervening macro argument expansions to compute this. It returns a 2145 /// StringRef that refers to the SourceManager-owned buffer of the source 2146 /// where that macro name is spelled. Thus, the result shouldn't out-live 2147 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)2148 StringRef getImmediateMacroName(SourceLocation Loc) { 2149 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 2150 } 2151 2152 /// Plop the specified string into a scratch buffer and set the 2153 /// specified token's location and length to it. 2154 /// 2155 /// If specified, the source location provides a location of the expansion 2156 /// point of the token. 2157 void CreateString(StringRef Str, Token &Tok, 2158 SourceLocation ExpansionLocStart = SourceLocation(), 2159 SourceLocation ExpansionLocEnd = SourceLocation()); 2160 2161 /// Split the first Length characters out of the token starting at TokLoc 2162 /// and return a location pointing to the split token. Re-lexing from the 2163 /// split token will return the split token rather than the original. 2164 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 2165 2166 /// Computes the source location just past the end of the 2167 /// token at this source location. 2168 /// 2169 /// This routine can be used to produce a source location that 2170 /// points just past the end of the token referenced by \p Loc, and 2171 /// is generally used when a diagnostic needs to point just after a 2172 /// token where it expected something different that it received. If 2173 /// the returned source location would not be meaningful (e.g., if 2174 /// it points into a macro), this routine returns an invalid 2175 /// source location. 2176 /// 2177 /// \param Offset an offset from the end of the token, where the source 2178 /// location should refer to. The default offset (0) produces a source 2179 /// location pointing just past the end of the token; an offset of 1 produces 2180 /// a source location pointing to the last character in the token, etc. 2181 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 2182 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 2183 } 2184 2185 /// Returns true if the given MacroID location points at the first 2186 /// token of the macro expansion. 2187 /// 2188 /// \param MacroBegin If non-null and function returns true, it is set to 2189 /// begin location of the macro. 2190 bool isAtStartOfMacroExpansion(SourceLocation loc, 2191 SourceLocation *MacroBegin = nullptr) const { 2192 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 2193 MacroBegin); 2194 } 2195 2196 /// Returns true if the given MacroID location points at the last 2197 /// token of the macro expansion. 2198 /// 2199 /// \param MacroEnd If non-null and function returns true, it is set to 2200 /// end location of the macro. 2201 bool isAtEndOfMacroExpansion(SourceLocation loc, 2202 SourceLocation *MacroEnd = nullptr) const { 2203 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 2204 } 2205 2206 /// Print the token to stderr, used for debugging. 2207 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 2208 void DumpLocation(SourceLocation Loc) const; 2209 void DumpMacro(const MacroInfo &MI) const; 2210 void dumpMacroInfo(const IdentifierInfo *II); 2211 2212 /// Given a location that specifies the start of a 2213 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)2214 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 2215 unsigned Char) const { 2216 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 2217 } 2218 2219 /// Increment the counters for the number of token paste operations 2220 /// performed. 2221 /// 2222 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)2223 void IncrementPasteCounter(bool isFast) { 2224 if (isFast) 2225 ++NumFastTokenPaste; 2226 else 2227 ++NumTokenPaste; 2228 } 2229 2230 void PrintStats(); 2231 2232 size_t getTotalMemory() const; 2233 2234 /// When the macro expander pastes together a comment (/##/) in Microsoft 2235 /// mode, this method handles updating the current state, returning the 2236 /// token on the next source line. 2237 void HandleMicrosoftCommentPaste(Token &Tok); 2238 2239 //===--------------------------------------------------------------------===// 2240 // Preprocessor callback methods. These are invoked by a lexer as various 2241 // directives and events are found. 2242 2243 /// Given a tok::raw_identifier token, look up the 2244 /// identifier information for the token and install it into the token, 2245 /// updating the token kind accordingly. 2246 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 2247 2248 private: 2249 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 2250 2251 public: 2252 /// Specifies the reason for poisoning an identifier. 2253 /// 2254 /// If that identifier is accessed while poisoned, then this reason will be 2255 /// used instead of the default "poisoned" diagnostic. 2256 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 2257 2258 /// Display reason for poisoned identifier. 2259 void HandlePoisonedIdentifier(Token & Identifier); 2260 MaybeHandlePoisonedIdentifier(Token & Identifier)2261 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 2262 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 2263 if(II->isPoisoned()) { 2264 HandlePoisonedIdentifier(Identifier); 2265 } 2266 } 2267 } 2268 2269 private: 2270 /// Identifiers used for SEH handling in Borland. These are only 2271 /// allowed in particular circumstances 2272 // __except block 2273 IdentifierInfo *Ident__exception_code, 2274 *Ident___exception_code, 2275 *Ident_GetExceptionCode; 2276 // __except filter expression 2277 IdentifierInfo *Ident__exception_info, 2278 *Ident___exception_info, 2279 *Ident_GetExceptionInfo; 2280 // __finally 2281 IdentifierInfo *Ident__abnormal_termination, 2282 *Ident___abnormal_termination, 2283 *Ident_AbnormalTermination; 2284 2285 const char *getCurLexerEndPos(); 2286 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 2287 2288 public: 2289 void PoisonSEHIdentifiers(bool Poison = true); // Borland 2290 2291 /// Callback invoked when the lexer reads an identifier and has 2292 /// filled in the tokens IdentifierInfo member. 2293 /// 2294 /// This callback potentially macro expands it or turns it into a named 2295 /// token (like 'for'). 2296 /// 2297 /// \returns true if we actually computed a token, false if we need to 2298 /// lex again. 2299 bool HandleIdentifier(Token &Identifier); 2300 2301 /// Callback invoked when the lexer hits the end of the current file. 2302 /// 2303 /// This either returns the EOF token and returns true, or 2304 /// pops a level off the include stack and returns false, at which point the 2305 /// client should call lex again. 2306 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 2307 2308 /// Callback invoked when the current TokenLexer hits the end of its 2309 /// token stream. 2310 bool HandleEndOfTokenLexer(Token &Result); 2311 2312 /// Callback invoked when the lexer sees a # token at the start of a 2313 /// line. 2314 /// 2315 /// This consumes the directive, modifies the lexer/preprocessor state, and 2316 /// advances the lexer(s) so that the next token read is the correct one. 2317 void HandleDirective(Token &Result); 2318 2319 /// Ensure that the next token is a tok::eod token. 2320 /// 2321 /// If not, emit a diagnostic and consume up until the eod. 2322 /// If \p EnableMacros is true, then we consider macros that expand to zero 2323 /// tokens as being ok. 2324 /// 2325 /// \return The location of the end of the directive (the terminating 2326 /// newline). 2327 SourceLocation CheckEndOfDirective(const char *DirType, 2328 bool EnableMacros = false); 2329 2330 /// Read and discard all tokens remaining on the current line until 2331 /// the tok::eod token is found. Returns the range of the skipped tokens. DiscardUntilEndOfDirective()2332 SourceRange DiscardUntilEndOfDirective() { 2333 Token Tmp; 2334 return DiscardUntilEndOfDirective(Tmp); 2335 } 2336 2337 /// Same as above except retains the token that was found. 2338 SourceRange DiscardUntilEndOfDirective(Token &Tok); 2339 2340 /// Returns true if the preprocessor has seen a use of 2341 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()2342 bool SawDateOrTime() const { 2343 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 2344 } getCounterValue()2345 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)2346 void setCounterValue(unsigned V) { CounterValue = V; } 2347 getCurrentFPEvalMethod()2348 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const { 2349 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine && 2350 "FPEvalMethod should be set either from command line or from the " 2351 "target info"); 2352 return CurrentFPEvalMethod; 2353 } 2354 getTUFPEvalMethod()2355 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const { 2356 return TUFPEvalMethod; 2357 } 2358 getLastFPEvalPragmaLocation()2359 SourceLocation getLastFPEvalPragmaLocation() const { 2360 return LastFPEvalPragmaLocation; 2361 } 2362 setCurrentFPEvalMethod(SourceLocation PragmaLoc,LangOptions::FPEvalMethodKind Val)2363 void setCurrentFPEvalMethod(SourceLocation PragmaLoc, 2364 LangOptions::FPEvalMethodKind Val) { 2365 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2366 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2367 // This is the location of the '#pragma float_control" where the 2368 // execution state is modifed. 2369 LastFPEvalPragmaLocation = PragmaLoc; 2370 CurrentFPEvalMethod = Val; 2371 TUFPEvalMethod = Val; 2372 } 2373 setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)2374 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2375 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2376 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2377 TUFPEvalMethod = Val; 2378 } 2379 2380 /// Retrieves the module that we're currently building, if any. 2381 Module *getCurrentModule(); 2382 2383 /// Retrieves the module whose implementation we're current compiling, if any. 2384 Module *getCurrentModuleImplementation(); 2385 2386 /// If we are preprocessing a named module. isInNamedModule()2387 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); } 2388 2389 /// If we are proprocessing a named interface unit. 2390 /// Note that a module implementation partition is not considered as an 2391 /// named interface unit here although it is importable 2392 /// to ease the parsing. isInNamedInterfaceUnit()2393 bool isInNamedInterfaceUnit() const { 2394 return ModuleDeclState.isNamedInterface(); 2395 } 2396 2397 /// Get the named module name we're preprocessing. 2398 /// Requires we're preprocessing a named module. getNamedModuleName()2399 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); } 2400 2401 /// If we are implementing an implementation module unit. 2402 /// Note that the module implementation partition is not considered as an 2403 /// implementation unit. isInImplementationUnit()2404 bool isInImplementationUnit() const { 2405 return ModuleDeclState.isImplementationUnit(); 2406 } 2407 2408 /// If we're importing a standard C++20 Named Modules. isInImportingCXXNamedModules()2409 bool isInImportingCXXNamedModules() const { 2410 // NamedModuleImportPath will be non-empty only if we're importing 2411 // Standard C++ named modules. 2412 return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && 2413 !IsAtImport; 2414 } 2415 2416 /// Allocate a new MacroInfo object with the provided SourceLocation. 2417 MacroInfo *AllocateMacroInfo(SourceLocation L); 2418 2419 /// Turn the specified lexer token into a fully checked and spelled 2420 /// filename, e.g. as an operand of \#include. 2421 /// 2422 /// The caller is expected to provide a buffer that is large enough to hold 2423 /// the spelling of the filename, but is also expected to handle the case 2424 /// when this method decides to use a different buffer. 2425 /// 2426 /// \returns true if the input filename was in <>'s or false if it was 2427 /// in ""'s. 2428 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2429 2430 /// Given a "foo" or \<foo> reference, look up the indicated file. 2431 /// 2432 /// Returns std::nullopt on failure. \p isAngled indicates whether the file 2433 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2434 OptionalFileEntryRef 2435 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2436 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 2437 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath, 2438 SmallVectorImpl<char> *RelativePath, 2439 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2440 bool *IsFrameworkFound, bool SkipCache = false, 2441 bool OpenFile = true, bool CacheFailures = true); 2442 2443 /// Given a "Filename" or \<Filename> reference, look up the indicated embed 2444 /// resource. \p isAngled indicates whether the file reference is for 2445 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile 2446 /// is true, the file looked up is opened for reading, otherwise it only 2447 /// validates that the file exists. Quoted filenames are looked up relative 2448 /// to \p LookupFromFile if it is nonnull. 2449 /// 2450 /// Returns std::nullopt on failure. 2451 OptionalFileEntryRef 2452 LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, 2453 const FileEntry *LookupFromFile = nullptr); 2454 2455 /// Return true if we're in the top-level file, not in a \#include. 2456 bool isInPrimaryFile() const; 2457 2458 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2459 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2460 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2461 2462 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2463 bool *ShadowFlag = nullptr); 2464 2465 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2466 Module *LeaveSubmodule(bool ForPragma); 2467 2468 private: 2469 friend void TokenLexer::ExpandFunctionArguments(); 2470 PushIncludeMacroStack()2471 void PushIncludeMacroStack() { 2472 assert(CurLexerCallback != CLK_CachingLexer && 2473 "cannot push a caching lexer"); 2474 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule, 2475 std::move(CurLexer), CurPPLexer, 2476 std::move(CurTokenLexer), CurDirLookup); 2477 CurPPLexer = nullptr; 2478 } 2479 PopIncludeMacroStack()2480 void PopIncludeMacroStack() { 2481 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2482 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2483 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2484 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2485 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2486 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback; 2487 IncludeMacroStack.pop_back(); 2488 } 2489 2490 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2491 2492 /// Determine whether we need to create module macros for #defines in the 2493 /// current context. 2494 bool needModuleMacros() const; 2495 2496 /// Update the set of active module macros and ambiguity flag for a module 2497 /// macro name. 2498 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2499 2500 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2501 SourceLocation Loc); 2502 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2503 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2504 bool isPublic); 2505 2506 /// Lex and validate a macro name, which occurs after a 2507 /// \#define or \#undef. 2508 /// 2509 /// \param MacroNameTok Token that represents the name defined or undefined. 2510 /// \param IsDefineUndef Kind if preprocessor directive. 2511 /// \param ShadowFlag Points to flag that is set if macro name shadows 2512 /// a keyword. 2513 /// 2514 /// This emits a diagnostic, sets the token kind to eod, 2515 /// and discards the rest of the macro line if the macro name is invalid. 2516 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2517 bool *ShadowFlag = nullptr); 2518 2519 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2520 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2521 /// doing so performs certain validity checks including (but not limited to): 2522 /// - # (stringization) is followed by a macro parameter 2523 /// \param MacroNameTok - Token that represents the macro name 2524 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2525 /// 2526 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2527 /// returns a nullptr if an invalid sequence of tokens is encountered. 2528 MacroInfo *ReadOptionalMacroParameterListAndBody( 2529 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2530 2531 /// The ( starting an argument list of a macro definition has just been read. 2532 /// Lex the rest of the parameters and the closing ), updating \p MI with 2533 /// what we learn and saving in \p LastTok the last token read. 2534 /// Return true if an error occurs parsing the arg list. 2535 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2536 2537 /// Provide a suggestion for a typoed directive. If there is no typo, then 2538 /// just skip suggesting. 2539 /// 2540 /// \param Tok - Token that represents the directive 2541 /// \param Directive - String reference for the directive name 2542 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const; 2543 2544 /// We just read a \#if or related directive and decided that the 2545 /// subsequent tokens are in the \#if'd out portion of the 2546 /// file. Lex the rest of the file, until we see an \#endif. If \p 2547 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2548 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2549 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2550 /// already seen one so a \#else directive is a duplicate. When this returns, 2551 /// the caller can lex the first valid token. 2552 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2553 SourceLocation IfTokenLoc, 2554 bool FoundNonSkipPortion, bool FoundElse, 2555 SourceLocation ElseLoc = SourceLocation()); 2556 2557 /// Information about the result for evaluating an expression for a 2558 /// preprocessor directive. 2559 struct DirectiveEvalResult { 2560 /// The integral value of the expression. 2561 std::optional<llvm::APSInt> Value; 2562 2563 /// Whether the expression was evaluated as true or not. 2564 bool Conditional; 2565 2566 /// True if the expression contained identifiers that were undefined. 2567 bool IncludedUndefinedIds; 2568 2569 /// The source range for the expression. 2570 SourceRange ExprRange; 2571 }; 2572 2573 /// Evaluate an integer constant expression that may occur after a 2574 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2575 /// 2576 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2577 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, 2578 bool CheckForEoD = true); 2579 2580 /// Evaluate an integer constant expression that may occur after a 2581 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2582 /// 2583 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2584 /// \p EvaluatedDefined will contain the result of whether "defined" appeared 2585 /// in the evaluated expression or not. 2586 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, 2587 Token &Tok, 2588 bool &EvaluatedDefined, 2589 bool CheckForEoD = true); 2590 2591 /// Process a '__has_embed("path" [, ...])' expression. 2592 /// 2593 /// Returns predefined `__STDC_EMBED_*` macro values if 2594 /// successful. 2595 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II); 2596 2597 /// Process a '__has_include("path")' expression. 2598 /// 2599 /// Returns true if successful. 2600 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II); 2601 2602 /// Process '__has_include_next("path")' expression. 2603 /// 2604 /// Returns true if successful. 2605 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II); 2606 2607 /// Get the directory and file from which to start \#include_next lookup. 2608 std::pair<ConstSearchDirIterator, const FileEntry *> 2609 getIncludeNextStart(const Token &IncludeNextTok) const; 2610 2611 /// Install the standard preprocessor pragmas: 2612 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2613 void RegisterBuiltinPragmas(); 2614 2615 /// Register builtin macros such as __LINE__ with the identifier table. 2616 void RegisterBuiltinMacros(); 2617 2618 /// If an identifier token is read that is to be expanded as a macro, handle 2619 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2620 /// otherwise the caller should lex again. 2621 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2622 2623 /// Cache macro expanded tokens for TokenLexers. 2624 // 2625 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2626 /// going to lex in the cache and when it finishes the tokens are removed 2627 /// from the end of the cache. 2628 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2629 ArrayRef<Token> tokens); 2630 2631 void removeCachedMacroExpandedTokensOfLastLexer(); 2632 2633 /// Determine whether the next preprocessor token to be 2634 /// lexed is a '('. If so, consume the token and return true, if not, this 2635 /// method should have no observable side-effect on the lexed tokens. 2636 bool isNextPPTokenLParen(); 2637 2638 /// After reading "MACRO(", this method is invoked to read all of the formal 2639 /// arguments specified for the macro invocation. Returns null on error. 2640 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2641 SourceLocation &MacroEnd); 2642 2643 /// If an identifier token is read that is to be expanded 2644 /// as a builtin macro, handle it and return the next token as 'Tok'. 2645 void ExpandBuiltinMacro(Token &Tok); 2646 2647 /// Read a \c _Pragma directive, slice it up, process it, then 2648 /// return the first token after the directive. 2649 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2650 void Handle_Pragma(Token &Tok); 2651 2652 /// Like Handle_Pragma except the pragma text is not enclosed within 2653 /// a string literal. 2654 void HandleMicrosoft__pragma(Token &Tok); 2655 2656 /// Add a lexer to the top of the include stack and 2657 /// start lexing tokens from it instead of the current buffer. 2658 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir); 2659 2660 /// Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)2661 void setPredefinesFileID(FileID FID) { 2662 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2663 PredefinesFileID = FID; 2664 } 2665 2666 /// Set the FileID for the PCH through header. 2667 void setPCHThroughHeaderFileID(FileID FID); 2668 2669 /// Returns true if we are lexing from a file and not a 2670 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2671 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2672 return L ? !L->isPragmaLexer() : P != nullptr; 2673 } 2674 IsFileLexer(const IncludeStackInfo & I)2675 static bool IsFileLexer(const IncludeStackInfo& I) { 2676 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2677 } 2678 IsFileLexer()2679 bool IsFileLexer() const { 2680 return IsFileLexer(CurLexer.get(), CurPPLexer); 2681 } 2682 2683 //===--------------------------------------------------------------------===// 2684 // Caching stuff. 2685 void CachingLex(Token &Result); 2686 InCachingLexMode()2687 bool InCachingLexMode() const { 2688 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2689 // that we are past EOF, not that we are in CachingLex mode. 2690 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2691 } 2692 2693 void EnterCachingLexMode(); 2694 void EnterCachingLexModeUnchecked(); 2695 ExitCachingLexMode()2696 void ExitCachingLexMode() { 2697 if (InCachingLexMode()) 2698 RemoveTopOfLexerStack(); 2699 } 2700 2701 const Token &PeekAhead(unsigned N); 2702 void AnnotatePreviousCachedTokens(const Token &Tok); 2703 2704 //===--------------------------------------------------------------------===// 2705 /// Handle*Directive - implement the various preprocessor directives. These 2706 /// should side-effect the current preprocessor object so that the next call 2707 /// to Lex() will return the appropriate token next. 2708 void HandleLineDirective(); 2709 void HandleDigitDirective(Token &Tok); 2710 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2711 void HandleIdentSCCSDirective(Token &Tok); 2712 void HandleMacroPublicDirective(Token &Tok); 2713 void HandleMacroPrivateDirective(); 2714 2715 /// An additional notification that can be produced by a header inclusion or 2716 /// import to tell the parser what happened. 2717 struct ImportAction { 2718 enum ActionKind { 2719 None, 2720 ModuleBegin, 2721 ModuleImport, 2722 HeaderUnitImport, 2723 SkippedModuleImport, 2724 Failure, 2725 } Kind; 2726 Module *ModuleForHeader = nullptr; 2727 2728 ImportAction(ActionKind AK, Module *Mod = nullptr) KindImportAction2729 : Kind(AK), ModuleForHeader(Mod) { 2730 assert((AK == None || Mod || AK == Failure) && 2731 "no module for module action"); 2732 } 2733 }; 2734 2735 OptionalFileEntryRef LookupHeaderIncludeOrImport( 2736 ConstSearchDirIterator *CurDir, StringRef &Filename, 2737 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2738 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2739 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2740 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2741 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2742 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2743 // Binary data inclusion 2744 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, 2745 const FileEntry *LookupFromFile = nullptr); 2746 void HandleEmbedDirectiveImpl(SourceLocation HashLoc, 2747 const LexEmbedParametersResult &Params, 2748 StringRef BinaryContents); 2749 2750 // File inclusion. 2751 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2752 ConstSearchDirIterator LookupFrom = nullptr, 2753 const FileEntry *LookupFromFile = nullptr); 2754 ImportAction 2755 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2756 Token &FilenameTok, SourceLocation EndLoc, 2757 ConstSearchDirIterator LookupFrom = nullptr, 2758 const FileEntry *LookupFromFile = nullptr); 2759 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2760 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2761 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2762 void HandleMicrosoftImportDirective(Token &Tok); 2763 2764 public: 2765 /// Check that the given module is available, producing a diagnostic if not. 2766 /// \return \c true if the check failed (because the module is not available). 2767 /// \c false if the module appears to be usable. 2768 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2769 const TargetInfo &TargetInfo, 2770 const Module &M, DiagnosticsEngine &Diags); 2771 2772 // Module inclusion testing. 2773 /// Find the module that owns the source or header file that 2774 /// \p Loc points to. If the location is in a file that was included 2775 /// into a module, or is outside any module, returns nullptr. 2776 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual); 2777 2778 /// We want to produce a diagnostic at location IncLoc concerning an 2779 /// unreachable effect at location MLoc (eg, where a desired entity was 2780 /// declared or defined). Determine whether the right way to make MLoc 2781 /// reachable is by #include, and if so, what header should be included. 2782 /// 2783 /// This is not necessarily fast, and might load unexpected module maps, so 2784 /// should only be called by code that intends to produce an error. 2785 /// 2786 /// \param IncLoc The location at which the missing effect was detected. 2787 /// \param MLoc A location within an unimported module at which the desired 2788 /// effect occurred. 2789 /// \return A file that can be #included to provide the desired effect. Null 2790 /// if no such file could be determined or if a #include is not 2791 /// appropriate (eg, if a module should be imported instead). 2792 OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2793 SourceLocation MLoc); 2794 isRecordingPreamble()2795 bool isRecordingPreamble() const { 2796 return PreambleConditionalStack.isRecording(); 2797 } 2798 hasRecordedPreamble()2799 bool hasRecordedPreamble() const { 2800 return PreambleConditionalStack.hasRecordedPreamble(); 2801 } 2802 getPreambleConditionalStack()2803 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2804 return PreambleConditionalStack.getStack(); 2805 } 2806 setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2807 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2808 PreambleConditionalStack.setStack(s); 2809 } 2810 setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,std::optional<PreambleSkipInfo> SkipInfo)2811 void setReplayablePreambleConditionalStack( 2812 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) { 2813 PreambleConditionalStack.startReplaying(); 2814 PreambleConditionalStack.setStack(s); 2815 PreambleConditionalStack.SkipInfo = SkipInfo; 2816 } 2817 getPreambleSkipInfo()2818 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2819 return PreambleConditionalStack.SkipInfo; 2820 } 2821 2822 private: 2823 /// After processing predefined file, initialize the conditional stack from 2824 /// the preamble. 2825 void replayPreambleConditionalStack(); 2826 2827 // Macro handling. 2828 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2829 void HandleUndefDirective(); 2830 2831 // Conditional Inclusion. 2832 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2833 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2834 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2835 bool ReadAnyTokensBeforeDirective); 2836 void HandleEndifDirective(Token &EndifToken); 2837 void HandleElseDirective(Token &Result, const Token &HashToken); 2838 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken, 2839 tok::PPKeywordKind Kind); 2840 2841 // Pragmas. 2842 void HandlePragmaDirective(PragmaIntroducer Introducer); 2843 2844 public: 2845 void HandlePragmaOnce(Token &OnceTok); 2846 void HandlePragmaMark(Token &MarkTok); 2847 void HandlePragmaPoison(); 2848 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2849 void HandlePragmaDependency(Token &DependencyTok); 2850 void HandlePragmaPushMacro(Token &Tok); 2851 void HandlePragmaPopMacro(Token &Tok); 2852 void HandlePragmaIncludeAlias(Token &Tok); 2853 void HandlePragmaModuleBuild(Token &Tok); 2854 void HandlePragmaHdrstop(Token &Tok); 2855 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2856 2857 // Return true and store the first token only if any CommentHandler 2858 // has inserted some tokens and getCommentRetentionState() is false. 2859 bool HandleComment(Token &result, SourceRange Comment); 2860 2861 /// A macro is used, update information about macros that need unused 2862 /// warnings. 2863 void markMacroAsUsed(MacroInfo *MI); 2864 addMacroDeprecationMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2865 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, 2866 SourceLocation AnnotationLoc) { 2867 auto Annotations = AnnotationInfos.find(II); 2868 if (Annotations == AnnotationInfos.end()) 2869 AnnotationInfos.insert(std::make_pair( 2870 II, 2871 MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg)))); 2872 else 2873 Annotations->second.DeprecationInfo = 2874 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2875 } 2876 addRestrictExpansionMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2877 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, 2878 SourceLocation AnnotationLoc) { 2879 auto Annotations = AnnotationInfos.find(II); 2880 if (Annotations == AnnotationInfos.end()) 2881 AnnotationInfos.insert( 2882 std::make_pair(II, MacroAnnotations::makeRestrictExpansion( 2883 AnnotationLoc, std::move(Msg)))); 2884 else 2885 Annotations->second.RestrictExpansionInfo = 2886 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2887 } 2888 addFinalLoc(const IdentifierInfo * II,SourceLocation AnnotationLoc)2889 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) { 2890 auto Annotations = AnnotationInfos.find(II); 2891 if (Annotations == AnnotationInfos.end()) 2892 AnnotationInfos.insert( 2893 std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc))); 2894 else 2895 Annotations->second.FinalAnnotationLoc = AnnotationLoc; 2896 } 2897 getMacroAnnotations(const IdentifierInfo * II)2898 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const { 2899 return AnnotationInfos.find(II)->second; 2900 } 2901 2902 void emitMacroExpansionWarnings(const Token &Identifier, 2903 bool IsIfnDef = false) const { 2904 IdentifierInfo *Info = Identifier.getIdentifierInfo(); 2905 if (Info->isDeprecatedMacro()) 2906 emitMacroDeprecationWarning(Identifier); 2907 2908 if (Info->isRestrictExpansion() && 2909 !SourceMgr.isInMainFile(Identifier.getLocation())) 2910 emitRestrictExpansionWarning(Identifier); 2911 2912 if (!IsIfnDef) { 2913 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs) 2914 emitRestrictInfNaNWarning(Identifier, 0); 2915 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs) 2916 emitRestrictInfNaNWarning(Identifier, 1); 2917 } 2918 } 2919 2920 static void processPathForFileMacro(SmallVectorImpl<char> &Path, 2921 const LangOptions &LangOpts, 2922 const TargetInfo &TI); 2923 2924 static void processPathToFileName(SmallVectorImpl<char> &FileName, 2925 const PresumedLoc &PLoc, 2926 const LangOptions &LangOpts, 2927 const TargetInfo &TI); 2928 2929 private: 2930 void emitMacroDeprecationWarning(const Token &Identifier) const; 2931 void emitRestrictExpansionWarning(const Token &Identifier) const; 2932 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; 2933 void emitRestrictInfNaNWarning(const Token &Identifier, 2934 unsigned DiagSelection) const; 2935 2936 /// This boolean state keeps track if the current scanned token (by this PP) 2937 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a 2938 /// translation unit in a linear order. 2939 bool InSafeBufferOptOutRegion = false; 2940 2941 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out 2942 /// region if PP is currently in such a region. Hold undefined value 2943 /// otherwise. 2944 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. 2945 2946 using SafeBufferOptOutRegionsTy = 2947 SmallVector<std::pair<SourceLocation, SourceLocation>, 16>; 2948 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this 2949 // translation unit. Each region is represented by a pair of start and 2950 // end locations. 2951 SafeBufferOptOutRegionsTy SafeBufferOptOutMap; 2952 2953 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the 2954 // following structure to manage them by their ASTs. 2955 struct { 2956 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a 2957 // loaded AST. See `SourceManager::getUniqueLoadedASTID`. 2958 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions; 2959 2960 // Returns a reference to the safe buffer opt-out regions of the loaded 2961 // AST where `Loc` belongs to. (Construct if absent) 2962 SafeBufferOptOutRegionsTy & findAndConsLoadedOptOutMap__anon4672ff5f03082963 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) { 2964 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)]; 2965 } 2966 2967 // Returns a reference to the safe buffer opt-out regions of the loaded 2968 // AST where `Loc` belongs to. (This const function returns nullptr if 2969 // absent.) 2970 const SafeBufferOptOutRegionsTy * lookupLoadedOptOutMap__anon4672ff5f03082971 lookupLoadedOptOutMap(SourceLocation Loc, 2972 const SourceManager &SrcMgr) const { 2973 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc); 2974 auto Iter = LoadedRegions.find(FID); 2975 2976 if (Iter == LoadedRegions.end()) 2977 return nullptr; 2978 return &Iter->getSecond(); 2979 } 2980 } LoadedSafeBufferOptOutMap; 2981 2982 public: 2983 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out 2984 /// region. This `Loc` must be a source location that has been pre-processed. 2985 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const; 2986 2987 /// Alter the state of whether this PP currently is in a 2988 /// "-Wunsafe-buffer-usage" opt-out region. 2989 /// 2990 /// \param isEnter true if this PP is entering a region; otherwise, this PP 2991 /// is exiting a region 2992 /// \param Loc the location of the entry or exit of a 2993 /// region 2994 /// \return true iff it is INVALID to enter or exit a region, i.e., 2995 /// attempt to enter a region before exiting a previous region, or exiting a 2996 /// region that PP is not currently in. 2997 bool enterOrExitSafeBufferOptOutRegion(bool isEnter, 2998 const SourceLocation &Loc); 2999 3000 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 3001 /// opt-out region 3002 bool isPPInSafeBufferOptOutRegion(); 3003 3004 /// \param StartLoc output argument. It will be set to the start location of 3005 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function 3006 /// returns true. 3007 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 3008 /// opt-out region 3009 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); 3010 3011 /// \return a sequence of SourceLocations representing ordered opt-out regions 3012 /// specified by 3013 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. 3014 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const; 3015 3016 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a 3017 /// record of code `PP_UNSAFE_BUFFER_USAGE`. 3018 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor` 3019 /// is same as itself before the call. 3020 bool setDeserializedSafeBufferOptOutMap( 3021 const SmallVectorImpl<SourceLocation> &SrcLocSeqs); 3022 3023 private: 3024 /// Helper functions to forward lexing to the actual lexer. They all share the 3025 /// same signature. CLK_Lexer(Preprocessor & P,Token & Result)3026 static bool CLK_Lexer(Preprocessor &P, Token &Result) { 3027 return P.CurLexer->Lex(Result); 3028 } CLK_TokenLexer(Preprocessor & P,Token & Result)3029 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) { 3030 return P.CurTokenLexer->Lex(Result); 3031 } CLK_CachingLexer(Preprocessor & P,Token & Result)3032 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) { 3033 P.CachingLex(Result); 3034 return true; 3035 } CLK_DependencyDirectivesLexer(Preprocessor & P,Token & Result)3036 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { 3037 return P.CurLexer->LexDependencyDirectiveToken(Result); 3038 } CLK_LexAfterModuleImport(Preprocessor & P,Token & Result)3039 static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { 3040 return P.LexAfterModuleImport(Result); 3041 } 3042 }; 3043 3044 /// Abstract base class that describes a handler that will receive 3045 /// source ranges for each of the comments encountered in the source file. 3046 class CommentHandler { 3047 public: 3048 virtual ~CommentHandler(); 3049 3050 // The handler shall return true if it has pushed any tokens 3051 // to be read using e.g. EnterToken or EnterTokenStream. 3052 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 3053 }; 3054 3055 /// Abstract base class that describes a handler that will receive 3056 /// source ranges for empty lines encountered in the source file. 3057 class EmptylineHandler { 3058 public: 3059 virtual ~EmptylineHandler(); 3060 3061 // The handler handles empty lines. 3062 virtual void HandleEmptyline(SourceRange Range) = 0; 3063 }; 3064 3065 /// Helper class to shuttle information about #embed directives from the 3066 /// preprocessor to the parser through an annotation token. 3067 struct EmbedAnnotationData { 3068 StringRef BinaryData; 3069 }; 3070 3071 /// Registry of pragma handlers added by plugins 3072 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 3073 3074 } // namespace clang 3075 3076 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 3077