1 //===- ASTWriter.h - AST File Writer ----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the ASTWriter class, which writes an AST file 10 // containing a serialized representation of a translation unit. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_SERIALIZATION_ASTWRITER_H 15 #define LLVM_CLANG_SERIALIZATION_ASTWRITER_H 16 17 #include "clang/AST/ASTMutationListener.h" 18 #include "clang/AST/Decl.h" 19 #include "clang/AST/Type.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Sema/Sema.h" 23 #include "clang/Sema/SemaConsumer.h" 24 #include "clang/Serialization/ASTBitCodes.h" 25 #include "clang/Serialization/ASTDeserializationListener.h" 26 #include "clang/Serialization/PCHContainerOperations.h" 27 #include "clang/Serialization/SourceLocationEncoding.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/DenseSet.h" 31 #include "llvm/ADT/MapVector.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/SetVector.h" 34 #include "llvm/ADT/SmallVector.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Bitstream/BitstreamWriter.h" 37 #include <cassert> 38 #include <cstddef> 39 #include <cstdint> 40 #include <ctime> 41 #include <memory> 42 #include <queue> 43 #include <string> 44 #include <utility> 45 #include <vector> 46 47 namespace clang { 48 49 class ASTContext; 50 class ASTReader; 51 class Attr; 52 class CXXRecordDecl; 53 class FileEntry; 54 class FPOptionsOverride; 55 class FunctionDecl; 56 class HeaderSearch; 57 class HeaderSearchOptions; 58 class IdentifierResolver; 59 class LangOptions; 60 class MacroDefinitionRecord; 61 class MacroInfo; 62 class Module; 63 class InMemoryModuleCache; 64 class ModuleFileExtension; 65 class ModuleFileExtensionWriter; 66 class NamedDecl; 67 class ObjCInterfaceDecl; 68 class PreprocessingRecord; 69 class Preprocessor; 70 class RecordDecl; 71 class Sema; 72 class SourceManager; 73 class Stmt; 74 class StoredDeclsList; 75 class SwitchCase; 76 class Token; 77 78 namespace SrcMgr { 79 class FileInfo; 80 } // namespace SrcMgr 81 82 /// Writes an AST file containing the contents of a translation unit. 83 /// 84 /// The ASTWriter class produces a bitstream containing the serialized 85 /// representation of a given abstract syntax tree and its supporting 86 /// data structures. This bitstream can be de-serialized via an 87 /// instance of the ASTReader class. 88 class ASTWriter : public ASTDeserializationListener, 89 public ASTMutationListener { 90 public: 91 friend class ASTDeclWriter; 92 friend class ASTRecordWriter; 93 94 using RecordData = SmallVector<uint64_t, 64>; 95 using RecordDataImpl = SmallVectorImpl<uint64_t>; 96 using RecordDataRef = ArrayRef<uint64_t>; 97 98 private: 99 /// Map that provides the ID numbers of each type within the 100 /// output stream, plus those deserialized from a chained PCH. 101 /// 102 /// The ID numbers of types are consecutive (in order of discovery) 103 /// and start at 1. 0 is reserved for NULL. When types are actually 104 /// stored in the stream, the ID number is shifted by 2 bits to 105 /// allow for the const/volatile qualifiers. 106 /// 107 /// Keys in the map never have const/volatile qualifiers. 108 using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx, 109 serialization::UnsafeQualTypeDenseMapInfo>; 110 111 using LocSeq = SourceLocationSequence; 112 113 /// The bitstream writer used to emit this precompiled header. 114 llvm::BitstreamWriter &Stream; 115 116 /// The buffer associated with the bitstream. 117 const SmallVectorImpl<char> &Buffer; 118 119 /// The PCM manager which manages memory buffers for pcm files. 120 InMemoryModuleCache &ModuleCache; 121 122 /// The ASTContext we're writing. 123 ASTContext *Context = nullptr; 124 125 /// The preprocessor we're writing. 126 Preprocessor *PP = nullptr; 127 128 /// The reader of existing AST files, if we're chaining. 129 ASTReader *Chain = nullptr; 130 131 /// The module we're currently writing, if any. 132 Module *WritingModule = nullptr; 133 134 /// The byte range representing all the UNHASHED_CONTROL_BLOCK. 135 std::pair<uint64_t, uint64_t> UnhashedControlBlockRange; 136 /// The bit offset of the AST block hash blob. 137 uint64_t ASTBlockHashOffset = 0; 138 /// The bit offset of the signature blob. 139 uint64_t SignatureOffset = 0; 140 141 /// The bit offset of the first bit inside the AST_BLOCK. 142 uint64_t ASTBlockStartOffset = 0; 143 144 /// The byte range representing all the AST_BLOCK. 145 std::pair<uint64_t, uint64_t> ASTBlockRange; 146 147 /// The base directory for any relative paths we emit. 148 std::string BaseDirectory; 149 150 /// Indicates whether timestamps should be written to the produced 151 /// module file. This is the case for files implicitly written to the 152 /// module cache, where we need the timestamps to determine if the module 153 /// file is up to date, but not otherwise. 154 bool IncludeTimestamps; 155 156 /// Indicates whether the AST file being written is an implicit module. 157 /// If that's the case, we may be able to skip writing some information that 158 /// are guaranteed to be the same in the importer by the context hash. 159 bool BuildingImplicitModule = false; 160 161 /// Indicates when the AST writing is actively performing 162 /// serialization, rather than just queueing updates. 163 bool WritingAST = false; 164 165 /// Indicates that we are done serializing the collection of decls 166 /// and types to emit. 167 bool DoneWritingDeclsAndTypes = false; 168 169 /// Indicates that the AST contained compiler errors. 170 bool ASTHasCompilerErrors = false; 171 172 /// Indicates that we're going to generate the reduced BMI for C++20 173 /// named modules. 174 bool GeneratingReducedBMI = false; 175 176 /// Mapping from input file entries to the index into the 177 /// offset table where information about that input file is stored. 178 llvm::DenseMap<const FileEntry *, uint32_t> InputFileIDs; 179 180 /// Stores a declaration or a type to be written to the AST file. 181 class DeclOrType { 182 public: DeclOrType(Decl * D)183 DeclOrType(Decl *D) : Stored(D), IsType(false) {} DeclOrType(QualType T)184 DeclOrType(QualType T) : Stored(T.getAsOpaquePtr()), IsType(true) {} 185 isType()186 bool isType() const { return IsType; } isDecl()187 bool isDecl() const { return !IsType; } 188 getType()189 QualType getType() const { 190 assert(isType() && "Not a type!"); 191 return QualType::getFromOpaquePtr(Stored); 192 } 193 getDecl()194 Decl *getDecl() const { 195 assert(isDecl() && "Not a decl!"); 196 return static_cast<Decl *>(Stored); 197 } 198 199 private: 200 void *Stored; 201 bool IsType; 202 }; 203 204 /// The declarations and types to emit. 205 std::queue<DeclOrType> DeclTypesToEmit; 206 207 /// The delayed namespace to emit. Only meaningful for reduced BMI. 208 /// 209 /// In reduced BMI, we want to elide the unreachable declarations in 210 /// the global module fragment. However, in ASTWriterDecl, when we see 211 /// a namespace, all the declarations in the namespace would be emitted. 212 /// So the optimization become meaningless. To solve the issue, we 213 /// delay recording all the declarations until we emit all the declarations. 214 /// Then we can safely record the reached declarations only. 215 llvm::SmallVector<NamespaceDecl *, 16> DelayedNamespace; 216 217 /// The first ID number we can use for our own declarations. 218 LocalDeclID FirstDeclID = LocalDeclID(clang::NUM_PREDEF_DECL_IDS); 219 220 /// The decl ID that will be assigned to the next new decl. 221 LocalDeclID NextDeclID = FirstDeclID; 222 223 /// Map that provides the ID numbers of each declaration within 224 /// the output stream, as well as those deserialized from a chained PCH. 225 /// 226 /// The ID numbers of declarations are consecutive (in order of 227 /// discovery) and start at 2. 1 is reserved for the translation 228 /// unit, while 0 is reserved for NULL. 229 llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs; 230 231 /// Set of predefined decls. This is a helper data to determine if a decl 232 /// is predefined. It should be more clear and safer to query the set 233 /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. 234 llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls; 235 236 /// Offset of each declaration in the bitstream, indexed by 237 /// the declaration's ID. 238 std::vector<serialization::DeclOffset> DeclOffsets; 239 240 /// The offset of the DECLTYPES_BLOCK. The offsets in DeclOffsets 241 /// are relative to this value. 242 uint64_t DeclTypesBlockStartOffset = 0; 243 244 /// Sorted (by file offset) vector of pairs of file offset/LocalDeclID. 245 using LocDeclIDsTy = SmallVector<std::pair<unsigned, LocalDeclID>, 64>; 246 struct DeclIDInFileInfo { 247 LocDeclIDsTy DeclIDs; 248 249 /// Set when the DeclIDs vectors from all files are joined, this 250 /// indicates the index that this particular vector has in the global one. 251 unsigned FirstDeclIndex; 252 }; 253 using FileDeclIDsTy = 254 llvm::DenseMap<FileID, std::unique_ptr<DeclIDInFileInfo>>; 255 256 /// Map from file SLocEntries to info about the file-level declarations 257 /// that it contains. 258 FileDeclIDsTy FileDeclIDs; 259 260 void associateDeclWithFile(const Decl *D, LocalDeclID); 261 262 /// The first ID number we can use for our own types. 263 serialization::TypeID FirstTypeID = serialization::NUM_PREDEF_TYPE_IDS; 264 265 /// The type ID that will be assigned to the next new type. 266 serialization::TypeID NextTypeID = FirstTypeID; 267 268 /// Map that provides the ID numbers of each type within the 269 /// output stream, plus those deserialized from a chained PCH. 270 /// 271 /// The ID numbers of types are consecutive (in order of discovery) 272 /// and start at 1. 0 is reserved for NULL. When types are actually 273 /// stored in the stream, the ID number is shifted by 2 bits to 274 /// allow for the const/volatile qualifiers. 275 /// 276 /// Keys in the map never have const/volatile qualifiers. 277 TypeIdxMap TypeIdxs; 278 279 /// Offset of each type in the bitstream, indexed by 280 /// the type's ID. 281 std::vector<serialization::UnalignedUInt64> TypeOffsets; 282 283 /// The first ID number we can use for our own identifiers. 284 serialization::IdentifierID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS; 285 286 /// The identifier ID that will be assigned to the next new identifier. 287 serialization::IdentifierID NextIdentID = FirstIdentID; 288 289 /// Map that provides the ID numbers of each identifier in 290 /// the output stream. 291 /// 292 /// The ID numbers for identifiers are consecutive (in order of 293 /// discovery), starting at 1. An ID of zero refers to a NULL 294 /// IdentifierInfo. 295 llvm::MapVector<const IdentifierInfo *, serialization::IdentifierID> IdentifierIDs; 296 297 /// The first ID number we can use for our own macros. 298 serialization::MacroID FirstMacroID = serialization::NUM_PREDEF_MACRO_IDS; 299 300 /// The identifier ID that will be assigned to the next new identifier. 301 serialization::MacroID NextMacroID = FirstMacroID; 302 303 /// Map that provides the ID numbers of each macro. 304 llvm::DenseMap<MacroInfo *, serialization::MacroID> MacroIDs; 305 306 struct MacroInfoToEmitData { 307 const IdentifierInfo *Name; 308 MacroInfo *MI; 309 serialization::MacroID ID; 310 }; 311 312 /// The macro infos to emit. 313 std::vector<MacroInfoToEmitData> MacroInfosToEmit; 314 315 llvm::DenseMap<const IdentifierInfo *, uint32_t> 316 IdentMacroDirectivesOffsetMap; 317 318 /// @name FlushStmt Caches 319 /// @{ 320 321 /// Set of parent Stmts for the currently serializing sub-stmt. 322 llvm::DenseSet<Stmt *> ParentStmts; 323 324 /// Offsets of sub-stmts already serialized. The offset points 325 /// just after the stmt record. 326 llvm::DenseMap<Stmt *, uint64_t> SubStmtEntries; 327 328 /// @} 329 330 /// Offsets of each of the identifier IDs into the identifier 331 /// table. 332 std::vector<uint32_t> IdentifierOffsets; 333 334 /// The first ID number we can use for our own submodules. 335 serialization::SubmoduleID FirstSubmoduleID = 336 serialization::NUM_PREDEF_SUBMODULE_IDS; 337 338 /// The submodule ID that will be assigned to the next new submodule. 339 serialization::SubmoduleID NextSubmoduleID = FirstSubmoduleID; 340 341 /// The first ID number we can use for our own selectors. 342 serialization::SelectorID FirstSelectorID = 343 serialization::NUM_PREDEF_SELECTOR_IDS; 344 345 /// The selector ID that will be assigned to the next new selector. 346 serialization::SelectorID NextSelectorID = FirstSelectorID; 347 348 /// Map that provides the ID numbers of each Selector. 349 llvm::MapVector<Selector, serialization::SelectorID> SelectorIDs; 350 351 /// Offset of each selector within the method pool/selector 352 /// table, indexed by the Selector ID (-1). 353 std::vector<uint32_t> SelectorOffsets; 354 355 /// Mapping from macro definitions (as they occur in the preprocessing 356 /// record) to the macro IDs. 357 llvm::DenseMap<const MacroDefinitionRecord *, 358 serialization::PreprocessedEntityID> MacroDefinitions; 359 360 /// Cache of indices of anonymous declarations within their lexical 361 /// contexts. 362 llvm::DenseMap<const Decl *, unsigned> AnonymousDeclarationNumbers; 363 364 /// The external top level module during the writing process. Used to 365 /// generate signature for the module file being written. 366 /// 367 /// Only meaningful for standard C++ named modules. See the comments in 368 /// createSignatureForNamedModule() for details. 369 llvm::DenseSet<Module *> TouchedTopLevelModules; 370 371 /// An update to a Decl. 372 class DeclUpdate { 373 /// A DeclUpdateKind. 374 unsigned Kind; 375 union { 376 const Decl *Dcl; 377 void *Type; 378 SourceLocation::UIntTy Loc; 379 unsigned Val; 380 Module *Mod; 381 const Attr *Attribute; 382 }; 383 384 public: DeclUpdate(unsigned Kind)385 DeclUpdate(unsigned Kind) : Kind(Kind), Dcl(nullptr) {} DeclUpdate(unsigned Kind,const Decl * Dcl)386 DeclUpdate(unsigned Kind, const Decl *Dcl) : Kind(Kind), Dcl(Dcl) {} DeclUpdate(unsigned Kind,QualType Type)387 DeclUpdate(unsigned Kind, QualType Type) 388 : Kind(Kind), Type(Type.getAsOpaquePtr()) {} DeclUpdate(unsigned Kind,SourceLocation Loc)389 DeclUpdate(unsigned Kind, SourceLocation Loc) 390 : Kind(Kind), Loc(Loc.getRawEncoding()) {} DeclUpdate(unsigned Kind,unsigned Val)391 DeclUpdate(unsigned Kind, unsigned Val) : Kind(Kind), Val(Val) {} DeclUpdate(unsigned Kind,Module * M)392 DeclUpdate(unsigned Kind, Module *M) : Kind(Kind), Mod(M) {} DeclUpdate(unsigned Kind,const Attr * Attribute)393 DeclUpdate(unsigned Kind, const Attr *Attribute) 394 : Kind(Kind), Attribute(Attribute) {} 395 getKind()396 unsigned getKind() const { return Kind; } getDecl()397 const Decl *getDecl() const { return Dcl; } getType()398 QualType getType() const { return QualType::getFromOpaquePtr(Type); } 399 getLoc()400 SourceLocation getLoc() const { 401 return SourceLocation::getFromRawEncoding(Loc); 402 } 403 getNumber()404 unsigned getNumber() const { return Val; } getModule()405 Module *getModule() const { return Mod; } getAttr()406 const Attr *getAttr() const { return Attribute; } 407 }; 408 409 using UpdateRecord = SmallVector<DeclUpdate, 1>; 410 using DeclUpdateMap = llvm::MapVector<const Decl *, UpdateRecord>; 411 412 /// Mapping from declarations that came from a chained PCH to the 413 /// record containing modifications to them. 414 DeclUpdateMap DeclUpdates; 415 416 /// DeclUpdates added during parsing the GMF. We split these from 417 /// DeclUpdates since we want to add these updates in GMF on need. 418 /// Only meaningful for reduced BMI. 419 DeclUpdateMap DeclUpdatesFromGMF; 420 421 using FirstLatestDeclMap = llvm::DenseMap<Decl *, Decl *>; 422 423 /// Map of first declarations from a chained PCH that point to the 424 /// most recent declarations in another PCH. 425 FirstLatestDeclMap FirstLatestDecls; 426 427 /// Declarations encountered that might be external 428 /// definitions. 429 /// 430 /// We keep track of external definitions and other 'interesting' declarations 431 /// as we are emitting declarations to the AST file. The AST file contains a 432 /// separate record for these declarations, which are provided to the AST 433 /// consumer by the AST reader. This is behavior is required to properly cope with, 434 /// e.g., tentative variable definitions that occur within 435 /// headers. The declarations themselves are stored as declaration 436 /// IDs, since they will be written out to an EAGERLY_DESERIALIZED_DECLS 437 /// record. 438 RecordData EagerlyDeserializedDecls; 439 RecordData ModularCodegenDecls; 440 441 /// DeclContexts that have received extensions since their serialized 442 /// form. 443 /// 444 /// For namespaces, when we're chaining and encountering a namespace, we check 445 /// if its primary namespace comes from the chain. If it does, we add the 446 /// primary to this set, so that we can write out lexical content updates for 447 /// it. 448 llvm::SmallSetVector<const DeclContext *, 16> UpdatedDeclContexts; 449 450 /// Keeps track of declarations that we must emit, even though we're 451 /// not guaranteed to be able to find them by walking the AST starting at the 452 /// translation unit. 453 SmallVector<const Decl *, 16> DeclsToEmitEvenIfUnreferenced; 454 455 /// The set of Objective-C class that have categories we 456 /// should serialize. 457 llvm::SetVector<ObjCInterfaceDecl *> ObjCClassesWithCategories; 458 459 /// The set of declarations that may have redeclaration chains that 460 /// need to be serialized. 461 llvm::SmallVector<const Decl *, 16> Redeclarations; 462 463 /// A cache of the first local declaration for "interesting" 464 /// redeclaration chains. 465 llvm::DenseMap<const Decl *, const Decl *> FirstLocalDeclCache; 466 467 /// Mapping from SwitchCase statements to IDs. 468 llvm::DenseMap<SwitchCase *, unsigned> SwitchCaseIDs; 469 470 /// The number of statements written to the AST file. 471 unsigned NumStatements = 0; 472 473 /// The number of macros written to the AST file. 474 unsigned NumMacros = 0; 475 476 /// The number of lexical declcontexts written to the AST 477 /// file. 478 unsigned NumLexicalDeclContexts = 0; 479 480 /// The number of visible declcontexts written to the AST 481 /// file. 482 unsigned NumVisibleDeclContexts = 0; 483 484 /// A mapping from each known submodule to its ID number, which will 485 /// be a positive integer. 486 llvm::DenseMap<const Module *, unsigned> SubmoduleIDs; 487 488 /// A list of the module file extension writers. 489 std::vector<std::unique_ptr<ModuleFileExtensionWriter>> 490 ModuleFileExtensionWriters; 491 492 /// Mapping from a source location entry to whether it is affecting or not. 493 llvm::BitVector IsSLocAffecting; 494 495 /// Mapping from \c FileID to an index into the FileID adjustment table. 496 std::vector<FileID> NonAffectingFileIDs; 497 std::vector<unsigned> NonAffectingFileIDAdjustments; 498 499 /// Mapping from an offset to an index into the offset adjustment table. 500 std::vector<SourceRange> NonAffectingRanges; 501 std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments; 502 503 /// A list of classes in named modules which need to emit the VTable in 504 /// the corresponding object file. 505 llvm::SmallVector<CXXRecordDecl *> PendingEmittingVTables; 506 507 /// Computes input files that didn't affect compilation of the current module, 508 /// and initializes data structures necessary for leaving those files out 509 /// during \c SourceManager serialization. 510 void computeNonAffectingInputFiles(); 511 512 /// Some affecting files can be included from files that are not affecting. 513 /// This function erases source locations pointing into such files. 514 SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr, 515 const SrcMgr::FileInfo &File); 516 517 /// Returns an adjusted \c FileID, accounting for any non-affecting input 518 /// files. 519 FileID getAdjustedFileID(FileID FID) const; 520 /// Returns an adjusted number of \c FileIDs created within the specified \c 521 /// FileID, accounting for any non-affecting input files. 522 unsigned getAdjustedNumCreatedFIDs(FileID FID) const; 523 /// Returns an adjusted \c SourceLocation, accounting for any non-affecting 524 /// input files. 525 SourceLocation getAdjustedLocation(SourceLocation Loc) const; 526 /// Returns an adjusted \c SourceRange, accounting for any non-affecting input 527 /// files. 528 SourceRange getAdjustedRange(SourceRange Range) const; 529 /// Returns an adjusted \c SourceLocation offset, accounting for any 530 /// non-affecting input files. 531 SourceLocation::UIntTy getAdjustedOffset(SourceLocation::UIntTy Offset) const; 532 /// Returns an adjustment for offset into SourceManager, accounting for any 533 /// non-affecting input files. 534 SourceLocation::UIntTy getAdjustment(SourceLocation::UIntTy Offset) const; 535 536 /// Retrieve or create a submodule ID for this module. 537 unsigned getSubmoduleID(Module *Mod); 538 539 /// Write the given subexpression to the bitstream. 540 void WriteSubStmt(Stmt *S); 541 542 void WriteBlockInfoBlock(); 543 void WriteControlBlock(Preprocessor &PP, ASTContext &Context, 544 StringRef isysroot); 545 546 /// Write out the signature and diagnostic options, and return the signature. 547 void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context); 548 ASTFileSignature backpatchSignature(); 549 550 /// Calculate hash of the pcm content. 551 std::pair<ASTFileSignature, ASTFileSignature> createSignature() const; 552 ASTFileSignature createSignatureForNamedModule() const; 553 554 void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts); 555 void WriteSourceManagerBlock(SourceManager &SourceMgr, 556 const Preprocessor &PP); 557 void WritePreprocessor(const Preprocessor &PP, bool IsModule); 558 void WriteHeaderSearch(const HeaderSearch &HS); 559 void WritePreprocessorDetail(PreprocessingRecord &PPRec, 560 uint64_t MacroOffsetsBase); 561 void WriteSubmodules(Module *WritingModule); 562 563 void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, 564 bool isModule); 565 566 unsigned TypeExtQualAbbrev = 0; 567 void WriteTypeAbbrevs(); 568 void WriteType(QualType T); 569 570 bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); 571 572 void GenerateNameLookupTable(const DeclContext *DC, 573 llvm::SmallVectorImpl<char> &LookupTable); 574 uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, 575 const DeclContext *DC); 576 uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC); 577 void WriteTypeDeclOffsets(); 578 void WriteFileDeclIDsMap(); 579 void WriteComments(); 580 void WriteSelectors(Sema &SemaRef); 581 void WriteReferencedSelectorsPool(Sema &SemaRef); 582 void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver, 583 bool IsModule); 584 void WriteDeclAndTypes(ASTContext &Context); 585 void PrepareWritingSpecialDecls(Sema &SemaRef); 586 void WriteSpecialDeclRecords(Sema &SemaRef); 587 void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord); 588 void WriteDeclContextVisibleUpdate(const DeclContext *DC); 589 void WriteFPPragmaOptions(const FPOptionsOverride &Opts); 590 void WriteOpenCLExtensions(Sema &SemaRef); 591 void WriteCUDAPragmas(Sema &SemaRef); 592 void WriteObjCCategories(); 593 void WriteLateParsedTemplates(Sema &SemaRef); 594 void WriteOptimizePragmaOptions(Sema &SemaRef); 595 void WriteMSStructPragmaOptions(Sema &SemaRef); 596 void WriteMSPointersToMembersPragmaOptions(Sema &SemaRef); 597 void WritePackPragmaOptions(Sema &SemaRef); 598 void WriteFloatControlPragmaOptions(Sema &SemaRef); 599 void WriteModuleFileExtension(Sema &SemaRef, 600 ModuleFileExtensionWriter &Writer); 601 602 unsigned DeclParmVarAbbrev = 0; 603 unsigned DeclContextLexicalAbbrev = 0; 604 unsigned DeclContextVisibleLookupAbbrev = 0; 605 unsigned UpdateVisibleAbbrev = 0; 606 unsigned DeclRecordAbbrev = 0; 607 unsigned DeclTypedefAbbrev = 0; 608 unsigned DeclVarAbbrev = 0; 609 unsigned DeclFieldAbbrev = 0; 610 unsigned DeclEnumAbbrev = 0; 611 unsigned DeclObjCIvarAbbrev = 0; 612 unsigned DeclCXXMethodAbbrev = 0; 613 unsigned DeclDependentNonTemplateCXXMethodAbbrev = 0; 614 unsigned DeclTemplateCXXMethodAbbrev = 0; 615 unsigned DeclMemberSpecializedCXXMethodAbbrev = 0; 616 unsigned DeclTemplateSpecializedCXXMethodAbbrev = 0; 617 unsigned DeclDependentSpecializationCXXMethodAbbrev = 0; 618 unsigned DeclTemplateTypeParmAbbrev = 0; 619 unsigned DeclUsingShadowAbbrev = 0; 620 621 unsigned DeclRefExprAbbrev = 0; 622 unsigned CharacterLiteralAbbrev = 0; 623 unsigned IntegerLiteralAbbrev = 0; 624 unsigned ExprImplicitCastAbbrev = 0; 625 unsigned BinaryOperatorAbbrev = 0; 626 unsigned CompoundAssignOperatorAbbrev = 0; 627 unsigned CallExprAbbrev = 0; 628 unsigned CXXOperatorCallExprAbbrev = 0; 629 unsigned CXXMemberCallExprAbbrev = 0; 630 631 unsigned CompoundStmtAbbrev = 0; 632 633 void WriteDeclAbbrevs(); 634 void WriteDecl(ASTContext &Context, Decl *D); 635 636 ASTFileSignature WriteASTCore(Sema &SemaRef, StringRef isysroot, 637 Module *WritingModule); 638 639 public: 640 /// Create a new precompiled header writer that outputs to 641 /// the given bitstream. 642 ASTWriter(llvm::BitstreamWriter &Stream, SmallVectorImpl<char> &Buffer, 643 InMemoryModuleCache &ModuleCache, 644 ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions, 645 bool IncludeTimestamps = true, bool BuildingImplicitModule = false, 646 bool GeneratingReducedBMI = false); 647 ~ASTWriter() override; 648 getASTContext()649 ASTContext &getASTContext() const { 650 assert(Context && "requested AST context when not writing AST"); 651 return *Context; 652 } 653 654 const LangOptions &getLangOpts() const; 655 656 /// Get a timestamp for output into the AST file. The actual timestamp 657 /// of the specified file may be ignored if we have been instructed to not 658 /// include timestamps in the output file. 659 time_t getTimestampForOutput(const FileEntry *E) const; 660 661 /// Write a precompiled header for the given semantic analysis. 662 /// 663 /// \param SemaRef a reference to the semantic analysis object that processed 664 /// the AST to be written into the precompiled header. 665 /// 666 /// \param WritingModule The module that we are writing. If null, we are 667 /// writing a precompiled header. 668 /// 669 /// \param isysroot if non-empty, write a relocatable file whose headers 670 /// are relative to the given system root. If we're writing a module, its 671 /// build directory will be used in preference to this if both are available. 672 /// 673 /// \return the module signature, which eventually will be a hash of 674 /// the module but currently is merely a random 32-bit number. 675 ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile, 676 Module *WritingModule, StringRef isysroot, 677 bool ShouldCacheASTInMemory = false); 678 679 /// Emit a token. 680 void AddToken(const Token &Tok, RecordDataImpl &Record); 681 682 /// Emit a AlignPackInfo. 683 void AddAlignPackInfo(const Sema::AlignPackInfo &Info, 684 RecordDataImpl &Record); 685 686 /// Emit a FileID. 687 void AddFileID(FileID FID, RecordDataImpl &Record); 688 689 /// Emit a source location. 690 void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, 691 LocSeq *Seq = nullptr); 692 693 /// Return the raw encodings for source locations. 694 SourceLocationEncoding::RawLocEncoding 695 getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr); 696 697 /// Emit a source range. 698 void AddSourceRange(SourceRange Range, RecordDataImpl &Record, 699 LocSeq *Seq = nullptr); 700 701 /// Emit a reference to an identifier. 702 void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record); 703 704 /// Get the unique number used to refer to the given selector. 705 serialization::SelectorID getSelectorRef(Selector Sel); 706 707 /// Get the unique number used to refer to the given identifier. 708 serialization::IdentifierID getIdentifierRef(const IdentifierInfo *II); 709 710 /// Get the unique number used to refer to the given macro. 711 serialization::MacroID getMacroRef(MacroInfo *MI, const IdentifierInfo *Name); 712 713 /// Determine the ID of an already-emitted macro. 714 serialization::MacroID getMacroID(MacroInfo *MI); 715 716 uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name); 717 718 /// Emit a reference to a type. 719 void AddTypeRef(QualType T, RecordDataImpl &Record); 720 721 /// Force a type to be emitted and get its ID. 722 serialization::TypeID GetOrCreateTypeID(QualType T); 723 724 /// Find the first local declaration of a given local redeclarable 725 /// decl. 726 const Decl *getFirstLocalDecl(const Decl *D); 727 728 /// Is this a local declaration (that is, one that will be written to 729 /// our AST file)? This is the case for declarations that are neither imported 730 /// from another AST file nor predefined. IsLocalDecl(const Decl * D)731 bool IsLocalDecl(const Decl *D) { 732 if (D->isFromASTFile()) 733 return false; 734 auto I = DeclIDs.find(D); 735 return (I == DeclIDs.end() || I->second >= clang::NUM_PREDEF_DECL_IDS); 736 }; 737 738 /// Emit a reference to a declaration. 739 void AddDeclRef(const Decl *D, RecordDataImpl &Record); 740 // Emit a reference to a declaration if the declaration was emitted. 741 void AddEmittedDeclRef(const Decl *D, RecordDataImpl &Record); 742 743 /// Force a declaration to be emitted and get its local ID to the module file 744 /// been writing. 745 LocalDeclID GetDeclRef(const Decl *D); 746 747 /// Determine the local declaration ID of an already-emitted 748 /// declaration. 749 LocalDeclID getDeclID(const Decl *D); 750 751 /// Whether or not the declaration got emitted. If not, it wouldn't be 752 /// emitted. 753 /// 754 /// This may only be called after we've done the job to write the 755 /// declarations (marked by DoneWritingDeclsAndTypes). 756 /// 757 /// A declaration may only be omitted in reduced BMI. 758 bool wasDeclEmitted(const Decl *D) const; 759 760 unsigned getAnonymousDeclarationNumber(const NamedDecl *D); 761 762 /// Add a string to the given record. 763 void AddString(StringRef Str, RecordDataImpl &Record); 764 765 /// Convert a path from this build process into one that is appropriate 766 /// for emission in the module file. 767 bool PreparePathForOutput(SmallVectorImpl<char> &Path); 768 769 /// Add a path to the given record. 770 void AddPath(StringRef Path, RecordDataImpl &Record); 771 772 /// Emit the current record with the given path as a blob. 773 void EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record, 774 StringRef Path); 775 776 /// Add a version tuple to the given record 777 void AddVersionTuple(const VersionTuple &Version, RecordDataImpl &Record); 778 779 /// Retrieve or create a submodule ID for this module, or return 0 if 780 /// the submodule is neither local (a submodle of the currently-written module) 781 /// nor from an imported module. 782 unsigned getLocalOrImportedSubmoduleID(const Module *Mod); 783 784 /// Note that the identifier II occurs at the given offset 785 /// within the identifier table. 786 void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset); 787 788 /// Note that the selector Sel occurs at the given offset 789 /// within the method pool/selector table. 790 void SetSelectorOffset(Selector Sel, uint32_t Offset); 791 792 /// Record an ID for the given switch-case statement. 793 unsigned RecordSwitchCaseID(SwitchCase *S); 794 795 /// Retrieve the ID for the given switch-case statement. 796 unsigned getSwitchCaseID(SwitchCase *S); 797 798 void ClearSwitchCaseIDs(); 799 getTypeExtQualAbbrev()800 unsigned getTypeExtQualAbbrev() const { 801 return TypeExtQualAbbrev; 802 } 803 getDeclParmVarAbbrev()804 unsigned getDeclParmVarAbbrev() const { return DeclParmVarAbbrev; } getDeclRecordAbbrev()805 unsigned getDeclRecordAbbrev() const { return DeclRecordAbbrev; } getDeclTypedefAbbrev()806 unsigned getDeclTypedefAbbrev() const { return DeclTypedefAbbrev; } getDeclVarAbbrev()807 unsigned getDeclVarAbbrev() const { return DeclVarAbbrev; } getDeclFieldAbbrev()808 unsigned getDeclFieldAbbrev() const { return DeclFieldAbbrev; } getDeclEnumAbbrev()809 unsigned getDeclEnumAbbrev() const { return DeclEnumAbbrev; } getDeclObjCIvarAbbrev()810 unsigned getDeclObjCIvarAbbrev() const { return DeclObjCIvarAbbrev; } getDeclCXXMethodAbbrev(FunctionDecl::TemplatedKind Kind)811 unsigned getDeclCXXMethodAbbrev(FunctionDecl::TemplatedKind Kind) const { 812 switch (Kind) { 813 case FunctionDecl::TK_NonTemplate: 814 return DeclCXXMethodAbbrev; 815 case FunctionDecl::TK_FunctionTemplate: 816 return DeclTemplateCXXMethodAbbrev; 817 case FunctionDecl::TK_MemberSpecialization: 818 return DeclMemberSpecializedCXXMethodAbbrev; 819 case FunctionDecl::TK_FunctionTemplateSpecialization: 820 return DeclTemplateSpecializedCXXMethodAbbrev; 821 case FunctionDecl::TK_DependentNonTemplate: 822 return DeclDependentNonTemplateCXXMethodAbbrev; 823 case FunctionDecl::TK_DependentFunctionTemplateSpecialization: 824 return DeclDependentSpecializationCXXMethodAbbrev; 825 } 826 llvm_unreachable("Unknwon Template Kind!"); 827 } getDeclTemplateTypeParmAbbrev()828 unsigned getDeclTemplateTypeParmAbbrev() const { 829 return DeclTemplateTypeParmAbbrev; 830 } getDeclUsingShadowAbbrev()831 unsigned getDeclUsingShadowAbbrev() const { return DeclUsingShadowAbbrev; } 832 getDeclRefExprAbbrev()833 unsigned getDeclRefExprAbbrev() const { return DeclRefExprAbbrev; } getCharacterLiteralAbbrev()834 unsigned getCharacterLiteralAbbrev() const { return CharacterLiteralAbbrev; } getIntegerLiteralAbbrev()835 unsigned getIntegerLiteralAbbrev() const { return IntegerLiteralAbbrev; } getExprImplicitCastAbbrev()836 unsigned getExprImplicitCastAbbrev() const { return ExprImplicitCastAbbrev; } getBinaryOperatorAbbrev()837 unsigned getBinaryOperatorAbbrev() const { return BinaryOperatorAbbrev; } getCompoundAssignOperatorAbbrev()838 unsigned getCompoundAssignOperatorAbbrev() const { 839 return CompoundAssignOperatorAbbrev; 840 } getCallExprAbbrev()841 unsigned getCallExprAbbrev() const { return CallExprAbbrev; } getCXXOperatorCallExprAbbrev()842 unsigned getCXXOperatorCallExprAbbrev() { return CXXOperatorCallExprAbbrev; } getCXXMemberCallExprAbbrev()843 unsigned getCXXMemberCallExprAbbrev() { return CXXMemberCallExprAbbrev; } 844 getCompoundStmtAbbrev()845 unsigned getCompoundStmtAbbrev() const { return CompoundStmtAbbrev; } 846 hasChain()847 bool hasChain() const { return Chain; } getChain()848 ASTReader *getChain() const { return Chain; } 849 isWritingModule()850 bool isWritingModule() const { return WritingModule; } 851 isWritingStdCXXNamedModules()852 bool isWritingStdCXXNamedModules() const { 853 return WritingModule && WritingModule->isNamedModule(); 854 } 855 isGeneratingReducedBMI()856 bool isGeneratingReducedBMI() const { return GeneratingReducedBMI; } 857 getDoneWritingDeclsAndTypes()858 bool getDoneWritingDeclsAndTypes() const { return DoneWritingDeclsAndTypes; } 859 isDeclPredefined(const Decl * D)860 bool isDeclPredefined(const Decl *D) const { 861 return PredefinedDecls.count(D); 862 } 863 864 void handleVTable(CXXRecordDecl *RD); 865 866 private: 867 // ASTDeserializationListener implementation 868 void ReaderInitialized(ASTReader *Reader) override; 869 void IdentifierRead(serialization::IdentifierID ID, IdentifierInfo *II) override; 870 void MacroRead(serialization::MacroID ID, MacroInfo *MI) override; 871 void TypeRead(serialization::TypeIdx Idx, QualType T) override; 872 void SelectorRead(serialization::SelectorID ID, Selector Sel) override; 873 void MacroDefinitionRead(serialization::PreprocessedEntityID ID, 874 MacroDefinitionRecord *MD) override; 875 void ModuleRead(serialization::SubmoduleID ID, Module *Mod) override; 876 877 // ASTMutationListener implementation. 878 void CompletedTagDefinition(const TagDecl *D) override; 879 void AddedVisibleDecl(const DeclContext *DC, const Decl *D) override; 880 void AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) override; 881 void AddedCXXTemplateSpecialization( 882 const ClassTemplateDecl *TD, 883 const ClassTemplateSpecializationDecl *D) override; 884 void AddedCXXTemplateSpecialization( 885 const VarTemplateDecl *TD, 886 const VarTemplateSpecializationDecl *D) override; 887 void AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD, 888 const FunctionDecl *D) override; 889 void ResolvedExceptionSpec(const FunctionDecl *FD) override; 890 void DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) override; 891 void ResolvedOperatorDelete(const CXXDestructorDecl *DD, 892 const FunctionDecl *Delete, 893 Expr *ThisArg) override; 894 void CompletedImplicitDefinition(const FunctionDecl *D) override; 895 void InstantiationRequested(const ValueDecl *D) override; 896 void VariableDefinitionInstantiated(const VarDecl *D) override; 897 void FunctionDefinitionInstantiated(const FunctionDecl *D) override; 898 void DefaultArgumentInstantiated(const ParmVarDecl *D) override; 899 void DefaultMemberInitializerInstantiated(const FieldDecl *D) override; 900 void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD, 901 const ObjCInterfaceDecl *IFD) override; 902 void DeclarationMarkedUsed(const Decl *D) override; 903 void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override; 904 void DeclarationMarkedOpenMPDeclareTarget(const Decl *D, 905 const Attr *Attr) override; 906 void DeclarationMarkedOpenMPAllocate(const Decl *D, const Attr *A) override; 907 void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override; 908 void AddedAttributeToRecord(const Attr *Attr, 909 const RecordDecl *Record) override; 910 void EnteringModulePurview() override; 911 void AddedManglingNumber(const Decl *D, unsigned) override; 912 void AddedStaticLocalNumbers(const Decl *D, unsigned) override; 913 void AddedAnonymousNamespace(const TranslationUnitDecl *, 914 NamespaceDecl *AnonNamespace) override; 915 }; 916 917 /// AST and semantic-analysis consumer that generates a 918 /// precompiled header from the parsed source code. 919 class PCHGenerator : public SemaConsumer { 920 void anchor() override; 921 922 Preprocessor &PP; 923 std::string OutputFile; 924 std::string isysroot; 925 Sema *SemaPtr; 926 std::shared_ptr<PCHBuffer> Buffer; 927 llvm::BitstreamWriter Stream; 928 ASTWriter Writer; 929 bool AllowASTWithErrors; 930 bool ShouldCacheASTInMemory; 931 932 protected: getWriter()933 ASTWriter &getWriter() { return Writer; } getWriter()934 const ASTWriter &getWriter() const { return Writer; } getPCH()935 SmallVectorImpl<char> &getPCH() const { return Buffer->Data; } 936 isComplete()937 bool isComplete() const { return Buffer->IsComplete; } getBufferPtr()938 PCHBuffer *getBufferPtr() { return Buffer.get(); } getOutputFile()939 StringRef getOutputFile() const { return OutputFile; } getDiagnostics()940 DiagnosticsEngine &getDiagnostics() const { 941 return SemaPtr->getDiagnostics(); 942 } getPreprocessor()943 Preprocessor &getPreprocessor() { return PP; } 944 945 virtual Module *getEmittingModule(ASTContext &Ctx); 946 947 public: 948 PCHGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, 949 StringRef OutputFile, StringRef isysroot, 950 std::shared_ptr<PCHBuffer> Buffer, 951 ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions, 952 bool AllowASTWithErrors = false, bool IncludeTimestamps = true, 953 bool BuildingImplicitModule = false, 954 bool ShouldCacheASTInMemory = false, 955 bool GeneratingReducedBMI = false); 956 ~PCHGenerator() override; 957 InitializeSema(Sema & S)958 void InitializeSema(Sema &S) override { SemaPtr = &S; } 959 void HandleTranslationUnit(ASTContext &Ctx) override; HandleVTable(CXXRecordDecl * RD)960 void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); } 961 ASTMutationListener *GetASTMutationListener() override; 962 ASTDeserializationListener *GetASTDeserializationListener() override; hasEmittedPCH()963 bool hasEmittedPCH() const { return Buffer->IsComplete; } 964 }; 965 966 class CXX20ModulesGenerator : public PCHGenerator { 967 void anchor() override; 968 969 protected: 970 virtual Module *getEmittingModule(ASTContext &Ctx) override; 971 972 CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, 973 StringRef OutputFile, bool GeneratingReducedBMI); 974 975 public: CXX20ModulesGenerator(Preprocessor & PP,InMemoryModuleCache & ModuleCache,StringRef OutputFile)976 CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, 977 StringRef OutputFile) 978 : CXX20ModulesGenerator(PP, ModuleCache, OutputFile, 979 /*GeneratingReducedBMI=*/false) {} 980 981 void HandleTranslationUnit(ASTContext &Ctx) override; 982 }; 983 984 class ReducedBMIGenerator : public CXX20ModulesGenerator { 985 void anchor() override; 986 987 public: ReducedBMIGenerator(Preprocessor & PP,InMemoryModuleCache & ModuleCache,StringRef OutputFile)988 ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, 989 StringRef OutputFile) 990 : CXX20ModulesGenerator(PP, ModuleCache, OutputFile, 991 /*GeneratingReducedBMI=*/true) {} 992 }; 993 994 /// If we can elide the definition of \param D in reduced BMI. 995 /// 996 /// Generally, we can elide the definition of a declaration if it won't affect 997 /// the ABI. e.g., the non-inline function bodies. 998 bool CanElideDeclDef(const Decl *D); 999 1000 /// A simple helper class to pack several bits in order into (a) 32 bit 1001 /// integer(s). 1002 class BitsPacker { 1003 constexpr static uint32_t BitIndexUpbound = 32u; 1004 1005 public: 1006 BitsPacker() = default; 1007 BitsPacker(const BitsPacker &) = delete; 1008 BitsPacker(BitsPacker &&) = delete; 1009 BitsPacker operator=(const BitsPacker &) = delete; 1010 BitsPacker operator=(BitsPacker &&) = delete; 1011 ~BitsPacker() = default; 1012 canWriteNextNBits(uint32_t BitsWidth)1013 bool canWriteNextNBits(uint32_t BitsWidth) const { 1014 return CurrentBitIndex + BitsWidth < BitIndexUpbound; 1015 } 1016 reset(uint32_t Value)1017 void reset(uint32_t Value) { 1018 UnderlyingValue = Value; 1019 CurrentBitIndex = 0; 1020 } 1021 addBit(bool Value)1022 void addBit(bool Value) { addBits(Value, 1); } addBits(uint32_t Value,uint32_t BitsWidth)1023 void addBits(uint32_t Value, uint32_t BitsWidth) { 1024 assert(BitsWidth < BitIndexUpbound); 1025 assert((Value < (1u << BitsWidth)) && "Passing narrower bit width!"); 1026 assert(canWriteNextNBits(BitsWidth) && 1027 "Inserting too much bits into a value!"); 1028 1029 UnderlyingValue |= Value << CurrentBitIndex; 1030 CurrentBitIndex += BitsWidth; 1031 } 1032 uint32_t()1033 operator uint32_t() { return UnderlyingValue; } 1034 1035 private: 1036 uint32_t UnderlyingValue = 0; 1037 uint32_t CurrentBitIndex = 0; 1038 }; 1039 1040 } // namespace clang 1041 1042 #endif // LLVM_CLANG_SERIALIZATION_ASTWRITER_H 1043