xref: /freebsd/contrib/llvm-project/clang/lib/Serialization/GlobalModuleIndex.cpp (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1 //===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the GlobalModuleIndex class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Serialization/GlobalModuleIndex.h"
14 #include "ASTReaderInternals.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Serialization/ASTBitCodes.h"
18 #include "clang/Serialization/ModuleFile.h"
19 #include "clang/Serialization/PCHContainerOperations.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Bitstream/BitstreamReader.h"
25 #include "llvm/Bitstream/BitstreamWriter.h"
26 #include "llvm/Support/DJB.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/LockFileManager.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/OnDiskHashTable.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TimeProfiler.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <cstdio>
35 using namespace clang;
36 using namespace serialization;
37 
38 //----------------------------------------------------------------------------//
39 // Shared constants
40 //----------------------------------------------------------------------------//
41 namespace {
42   enum {
43     /// The block containing the index.
44     GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
45   };
46 
47   /// Describes the record types in the index.
48   enum IndexRecordTypes {
49     /// Contains version information and potentially other metadata,
50     /// used to determine if we can read this global index file.
51     INDEX_METADATA,
52     /// Describes a module, including its file name and dependencies.
53     MODULE,
54     /// The index for identifiers.
55     IDENTIFIER_INDEX
56   };
57 }
58 
59 /// The name of the global index file.
60 static const char * const IndexFileName = "modules.idx";
61 
62 /// The global index file version.
63 static const unsigned CurrentVersion = 1;
64 
65 //----------------------------------------------------------------------------//
66 // Global module index reader.
67 //----------------------------------------------------------------------------//
68 
69 namespace {
70 
71 /// Trait used to read the identifier index from the on-disk hash
72 /// table.
73 class IdentifierIndexReaderTrait {
74 public:
75   typedef StringRef external_key_type;
76   typedef StringRef internal_key_type;
77   typedef SmallVector<unsigned, 2> data_type;
78   typedef unsigned hash_value_type;
79   typedef unsigned offset_type;
80 
81   static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
82     return a == b;
83   }
84 
85   static hash_value_type ComputeHash(const internal_key_type& a) {
86     return llvm::djbHash(a);
87   }
88 
89   static std::pair<unsigned, unsigned>
90   ReadKeyDataLength(const unsigned char*& d) {
91     using namespace llvm::support;
92     unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
93     unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
94     return std::make_pair(KeyLen, DataLen);
95   }
96 
97   static const internal_key_type&
98   GetInternalKey(const external_key_type& x) { return x; }
99 
100   static const external_key_type&
101   GetExternalKey(const internal_key_type& x) { return x; }
102 
103   static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
104     return StringRef((const char *)d, n);
105   }
106 
107   static data_type ReadData(const internal_key_type& k,
108                             const unsigned char* d,
109                             unsigned DataLen) {
110     using namespace llvm::support;
111 
112     data_type Result;
113     while (DataLen > 0) {
114       unsigned ID = endian::readNext<uint32_t, little, unaligned>(d);
115       Result.push_back(ID);
116       DataLen -= 4;
117     }
118 
119     return Result;
120   }
121 };
122 
123 typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
124     IdentifierIndexTable;
125 
126 }
127 
128 GlobalModuleIndex::GlobalModuleIndex(
129     std::unique_ptr<llvm::MemoryBuffer> IndexBuffer,
130     llvm::BitstreamCursor Cursor)
131     : Buffer(std::move(IndexBuffer)), IdentifierIndex(), NumIdentifierLookups(),
132       NumIdentifierLookupHits() {
133   auto Fail = [&](llvm::Error &&Err) {
134     report_fatal_error("Module index '" + Buffer->getBufferIdentifier() +
135                        "' failed: " + toString(std::move(Err)));
136   };
137 
138   llvm::TimeTraceScope TimeScope("Module LoadIndex");
139   // Read the global index.
140   bool InGlobalIndexBlock = false;
141   bool Done = false;
142   while (!Done) {
143     llvm::BitstreamEntry Entry;
144     if (Expected<llvm::BitstreamEntry> Res = Cursor.advance())
145       Entry = Res.get();
146     else
147       Fail(Res.takeError());
148 
149     switch (Entry.Kind) {
150     case llvm::BitstreamEntry::Error:
151       return;
152 
153     case llvm::BitstreamEntry::EndBlock:
154       if (InGlobalIndexBlock) {
155         InGlobalIndexBlock = false;
156         Done = true;
157         continue;
158       }
159       return;
160 
161 
162     case llvm::BitstreamEntry::Record:
163       // Entries in the global index block are handled below.
164       if (InGlobalIndexBlock)
165         break;
166 
167       return;
168 
169     case llvm::BitstreamEntry::SubBlock:
170       if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
171         if (llvm::Error Err = Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
172           Fail(std::move(Err));
173         InGlobalIndexBlock = true;
174       } else if (llvm::Error Err = Cursor.SkipBlock())
175         Fail(std::move(Err));
176       continue;
177     }
178 
179     SmallVector<uint64_t, 64> Record;
180     StringRef Blob;
181     Expected<unsigned> MaybeIndexRecord =
182         Cursor.readRecord(Entry.ID, Record, &Blob);
183     if (!MaybeIndexRecord)
184       Fail(MaybeIndexRecord.takeError());
185     IndexRecordTypes IndexRecord =
186         static_cast<IndexRecordTypes>(MaybeIndexRecord.get());
187     switch (IndexRecord) {
188     case INDEX_METADATA:
189       // Make sure that the version matches.
190       if (Record.size() < 1 || Record[0] != CurrentVersion)
191         return;
192       break;
193 
194     case MODULE: {
195       unsigned Idx = 0;
196       unsigned ID = Record[Idx++];
197 
198       // Make room for this module's information.
199       if (ID == Modules.size())
200         Modules.push_back(ModuleInfo());
201       else
202         Modules.resize(ID + 1);
203 
204       // Size/modification time for this module file at the time the
205       // global index was built.
206       Modules[ID].Size = Record[Idx++];
207       Modules[ID].ModTime = Record[Idx++];
208 
209       // File name.
210       unsigned NameLen = Record[Idx++];
211       Modules[ID].FileName.assign(Record.begin() + Idx,
212                                   Record.begin() + Idx + NameLen);
213       Idx += NameLen;
214 
215       // Dependencies
216       unsigned NumDeps = Record[Idx++];
217       Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
218                                       Record.begin() + Idx,
219                                       Record.begin() + Idx + NumDeps);
220       Idx += NumDeps;
221 
222       // Make sure we're at the end of the record.
223       assert(Idx == Record.size() && "More module info?");
224 
225       // Record this module as an unresolved module.
226       // FIXME: this doesn't work correctly for module names containing path
227       // separators.
228       StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
229       // Remove the -<hash of ModuleMapPath>
230       ModuleName = ModuleName.rsplit('-').first;
231       UnresolvedModules[ModuleName] = ID;
232       break;
233     }
234 
235     case IDENTIFIER_INDEX:
236       // Wire up the identifier index.
237       if (Record[0]) {
238         IdentifierIndex = IdentifierIndexTable::Create(
239             (const unsigned char *)Blob.data() + Record[0],
240             (const unsigned char *)Blob.data() + sizeof(uint32_t),
241             (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
242       }
243       break;
244     }
245   }
246 }
247 
248 GlobalModuleIndex::~GlobalModuleIndex() {
249   delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
250 }
251 
252 std::pair<GlobalModuleIndex *, llvm::Error>
253 GlobalModuleIndex::readIndex(StringRef Path) {
254   // Load the index file, if it's there.
255   llvm::SmallString<128> IndexPath;
256   IndexPath += Path;
257   llvm::sys::path::append(IndexPath, IndexFileName);
258 
259   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
260       llvm::MemoryBuffer::getFile(IndexPath.c_str());
261   if (!BufferOrErr)
262     return std::make_pair(nullptr,
263                           llvm::errorCodeToError(BufferOrErr.getError()));
264   std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
265 
266   /// The main bitstream cursor for the main block.
267   llvm::BitstreamCursor Cursor(*Buffer);
268 
269   // Sniff for the signature.
270   for (unsigned char C : {'B', 'C', 'G', 'I'}) {
271     if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Cursor.Read(8)) {
272       if (Res.get() != C)
273         return std::make_pair(
274             nullptr, llvm::createStringError(std::errc::illegal_byte_sequence,
275                                              "expected signature BCGI"));
276     } else
277       return std::make_pair(nullptr, Res.takeError());
278   }
279 
280   return std::make_pair(new GlobalModuleIndex(std::move(Buffer), std::move(Cursor)),
281                         llvm::Error::success());
282 }
283 
284 void GlobalModuleIndex::getModuleDependencies(
285        ModuleFile *File,
286        SmallVectorImpl<ModuleFile *> &Dependencies) {
287   // Look for information about this module file.
288   llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
289     = ModulesByFile.find(File);
290   if (Known == ModulesByFile.end())
291     return;
292 
293   // Record dependencies.
294   Dependencies.clear();
295   ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
296   for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
297     if (ModuleFile *MF = Modules[I].File)
298       Dependencies.push_back(MF);
299   }
300 }
301 
302 bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
303   Hits.clear();
304 
305   // If there's no identifier index, there is nothing we can do.
306   if (!IdentifierIndex)
307     return false;
308 
309   // Look into the identifier index.
310   ++NumIdentifierLookups;
311   IdentifierIndexTable &Table
312     = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
313   IdentifierIndexTable::iterator Known = Table.find(Name);
314   if (Known == Table.end()) {
315     return false;
316   }
317 
318   SmallVector<unsigned, 2> ModuleIDs = *Known;
319   for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
320     if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
321       Hits.insert(MF);
322   }
323 
324   ++NumIdentifierLookupHits;
325   return true;
326 }
327 
328 bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) {
329   // Look for the module in the global module index based on the module name.
330   StringRef Name = File->ModuleName;
331   llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
332   if (Known == UnresolvedModules.end()) {
333     return true;
334   }
335 
336   // Rectify this module with the global module index.
337   ModuleInfo &Info = Modules[Known->second];
338 
339   //  If the size and modification time match what we expected, record this
340   // module file.
341   bool Failed = true;
342   if (File->File->getSize() == Info.Size &&
343       File->File->getModificationTime() == Info.ModTime) {
344     Info.File = File;
345     ModulesByFile[File] = Known->second;
346 
347     Failed = false;
348   }
349 
350   // One way or another, we have resolved this module file.
351   UnresolvedModules.erase(Known);
352   return Failed;
353 }
354 
355 void GlobalModuleIndex::printStats() {
356   std::fprintf(stderr, "*** Global Module Index Statistics:\n");
357   if (NumIdentifierLookups) {
358     fprintf(stderr, "  %u / %u identifier lookups succeeded (%f%%)\n",
359             NumIdentifierLookupHits, NumIdentifierLookups,
360             (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
361   }
362   std::fprintf(stderr, "\n");
363 }
364 
365 LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
366   llvm::errs() << "*** Global Module Index Dump:\n";
367   llvm::errs() << "Module files:\n";
368   for (auto &MI : Modules) {
369     llvm::errs() << "** " << MI.FileName << "\n";
370     if (MI.File)
371       MI.File->dump();
372     else
373       llvm::errs() << "\n";
374   }
375   llvm::errs() << "\n";
376 }
377 
378 //----------------------------------------------------------------------------//
379 // Global module index writer.
380 //----------------------------------------------------------------------------//
381 
382 namespace {
383   /// Provides information about a specific module file.
384   struct ModuleFileInfo {
385     /// The numberic ID for this module file.
386     unsigned ID;
387 
388     /// The set of modules on which this module depends. Each entry is
389     /// a module ID.
390     SmallVector<unsigned, 4> Dependencies;
391     ASTFileSignature Signature;
392   };
393 
394   struct ImportedModuleFileInfo {
395     off_t StoredSize;
396     time_t StoredModTime;
397     ASTFileSignature StoredSignature;
398     ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
399         : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
400   };
401 
402   /// Builder that generates the global module index file.
403   class GlobalModuleIndexBuilder {
404     FileManager &FileMgr;
405     const PCHContainerReader &PCHContainerRdr;
406 
407     /// Mapping from files to module file information.
408     typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap;
409 
410     /// Information about each of the known module files.
411     ModuleFilesMap ModuleFiles;
412 
413     /// Mapping from the imported module file to the imported
414     /// information.
415     typedef std::multimap<const FileEntry *, ImportedModuleFileInfo>
416         ImportedModuleFilesMap;
417 
418     /// Information about each importing of a module file.
419     ImportedModuleFilesMap ImportedModuleFiles;
420 
421     /// Mapping from identifiers to the list of module file IDs that
422     /// consider this identifier to be interesting.
423     typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
424 
425     /// A mapping from all interesting identifiers to the set of module
426     /// files in which those identifiers are considered interesting.
427     InterestingIdentifierMap InterestingIdentifiers;
428 
429     /// Write the block-info block for the global module index file.
430     void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
431 
432     /// Retrieve the module file information for the given file.
433     ModuleFileInfo &getModuleFileInfo(const FileEntry *File) {
434       llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known
435         = ModuleFiles.find(File);
436       if (Known != ModuleFiles.end())
437         return Known->second;
438 
439       unsigned NewID = ModuleFiles.size();
440       ModuleFileInfo &Info = ModuleFiles[File];
441       Info.ID = NewID;
442       return Info;
443     }
444 
445   public:
446     explicit GlobalModuleIndexBuilder(
447         FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
448         : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
449 
450     /// Load the contents of the given module file into the builder.
451     llvm::Error loadModuleFile(const FileEntry *File);
452 
453     /// Write the index to the given bitstream.
454     /// \returns true if an error occurred, false otherwise.
455     bool writeIndex(llvm::BitstreamWriter &Stream);
456   };
457 }
458 
459 static void emitBlockID(unsigned ID, const char *Name,
460                         llvm::BitstreamWriter &Stream,
461                         SmallVectorImpl<uint64_t> &Record) {
462   Record.clear();
463   Record.push_back(ID);
464   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
465 
466   // Emit the block name if present.
467   if (!Name || Name[0] == 0) return;
468   Record.clear();
469   while (*Name)
470     Record.push_back(*Name++);
471   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
472 }
473 
474 static void emitRecordID(unsigned ID, const char *Name,
475                          llvm::BitstreamWriter &Stream,
476                          SmallVectorImpl<uint64_t> &Record) {
477   Record.clear();
478   Record.push_back(ID);
479   while (*Name)
480     Record.push_back(*Name++);
481   Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
482 }
483 
484 void
485 GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
486   SmallVector<uint64_t, 64> Record;
487   Stream.EnterBlockInfoBlock();
488 
489 #define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
490 #define RECORD(X) emitRecordID(X, #X, Stream, Record)
491   BLOCK(GLOBAL_INDEX_BLOCK);
492   RECORD(INDEX_METADATA);
493   RECORD(MODULE);
494   RECORD(IDENTIFIER_INDEX);
495 #undef RECORD
496 #undef BLOCK
497 
498   Stream.ExitBlock();
499 }
500 
501 namespace {
502   class InterestingASTIdentifierLookupTrait
503     : public serialization::reader::ASTIdentifierLookupTraitBase {
504 
505   public:
506     /// The identifier and whether it is "interesting".
507     typedef std::pair<StringRef, bool> data_type;
508 
509     data_type ReadData(const internal_key_type& k,
510                        const unsigned char* d,
511                        unsigned DataLen) {
512       // The first bit indicates whether this identifier is interesting.
513       // That's all we care about.
514       using namespace llvm::support;
515       unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
516       bool IsInteresting = RawID & 0x01;
517       return std::make_pair(k, IsInteresting);
518     }
519   };
520 }
521 
522 llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
523   // Open the module file.
524 
525   auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
526   if (!Buffer)
527     return llvm::createStringError(Buffer.getError(),
528                                    "failed getting buffer for module file");
529 
530   // Initialize the input stream
531   llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
532 
533   // Sniff for the signature.
534   for (unsigned char C : {'C', 'P', 'C', 'H'})
535     if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = InStream.Read(8)) {
536       if (Res.get() != C)
537         return llvm::createStringError(std::errc::illegal_byte_sequence,
538                                        "expected signature CPCH");
539     } else
540       return Res.takeError();
541 
542   // Record this module file and assign it a unique ID (if it doesn't have
543   // one already).
544   unsigned ID = getModuleFileInfo(File).ID;
545 
546   // Search for the blocks and records we care about.
547   enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
548   bool Done = false;
549   while (!Done) {
550     Expected<llvm::BitstreamEntry> MaybeEntry = InStream.advance();
551     if (!MaybeEntry)
552       return MaybeEntry.takeError();
553     llvm::BitstreamEntry Entry = MaybeEntry.get();
554 
555     switch (Entry.Kind) {
556     case llvm::BitstreamEntry::Error:
557       Done = true;
558       continue;
559 
560     case llvm::BitstreamEntry::Record:
561       // In the 'other' state, just skip the record. We don't care.
562       if (State == Other) {
563         if (llvm::Expected<unsigned> Skipped = InStream.skipRecord(Entry.ID))
564           continue;
565         else
566           return Skipped.takeError();
567       }
568 
569       // Handle potentially-interesting records below.
570       break;
571 
572     case llvm::BitstreamEntry::SubBlock:
573       if (Entry.ID == CONTROL_BLOCK_ID) {
574         if (llvm::Error Err = InStream.EnterSubBlock(CONTROL_BLOCK_ID))
575           return Err;
576 
577         // Found the control block.
578         State = ControlBlock;
579         continue;
580       }
581 
582       if (Entry.ID == AST_BLOCK_ID) {
583         if (llvm::Error Err = InStream.EnterSubBlock(AST_BLOCK_ID))
584           return Err;
585 
586         // Found the AST block.
587         State = ASTBlock;
588         continue;
589       }
590 
591       if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
592         if (llvm::Error Err = InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
593           return Err;
594 
595         // Found the Diagnostic Options block.
596         State = DiagnosticOptionsBlock;
597         continue;
598       }
599 
600       if (llvm::Error Err = InStream.SkipBlock())
601         return Err;
602 
603       continue;
604 
605     case llvm::BitstreamEntry::EndBlock:
606       State = Other;
607       continue;
608     }
609 
610     // Read the given record.
611     SmallVector<uint64_t, 64> Record;
612     StringRef Blob;
613     Expected<unsigned> MaybeCode = InStream.readRecord(Entry.ID, Record, &Blob);
614     if (!MaybeCode)
615       return MaybeCode.takeError();
616     unsigned Code = MaybeCode.get();
617 
618     // Handle module dependencies.
619     if (State == ControlBlock && Code == IMPORTS) {
620       // Load each of the imported PCH files.
621       unsigned Idx = 0, N = Record.size();
622       while (Idx < N) {
623         // Read information about the AST file.
624 
625         // Skip the imported kind
626         ++Idx;
627 
628         // Skip if it is standard C++ module
629         ++Idx;
630 
631         // Skip the import location
632         ++Idx;
633 
634         // Load stored size/modification time.
635         off_t StoredSize = (off_t)Record[Idx++];
636         time_t StoredModTime = (time_t)Record[Idx++];
637 
638         // Skip the stored signature.
639         // FIXME: we could read the signature out of the import and validate it.
640         auto FirstSignatureByte = Record.begin() + Idx;
641         ASTFileSignature StoredSignature = ASTFileSignature::create(
642             FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
643         Idx += ASTFileSignature::size;
644 
645         // Skip the module name (currently this is only used for prebuilt
646         // modules while here we are only dealing with cached).
647         Idx += Record[Idx] + 1;
648 
649         // Retrieve the imported file name.
650         unsigned Length = Record[Idx++];
651         SmallString<128> ImportedFile(Record.begin() + Idx,
652                                       Record.begin() + Idx + Length);
653         Idx += Length;
654 
655         // Find the imported module file.
656         auto DependsOnFile
657           = FileMgr.getFile(ImportedFile, /*OpenFile=*/false,
658                             /*CacheFailure=*/false);
659 
660         if (!DependsOnFile)
661           return llvm::createStringError(std::errc::bad_file_descriptor,
662                                          "imported file \"%s\" not found",
663                                          ImportedFile.c_str());
664 
665         // Save the information in ImportedModuleFileInfo so we can verify after
666         // loading all pcms.
667         ImportedModuleFiles.insert(std::make_pair(
668             *DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
669                                                    StoredSignature)));
670 
671         // Record the dependency.
672         unsigned DependsOnID = getModuleFileInfo(*DependsOnFile).ID;
673         getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
674       }
675 
676       continue;
677     }
678 
679     // Handle the identifier table
680     if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
681       typedef llvm::OnDiskIterableChainedHashTable<
682           InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
683       std::unique_ptr<InterestingIdentifierTable> Table(
684           InterestingIdentifierTable::Create(
685               (const unsigned char *)Blob.data() + Record[0],
686               (const unsigned char *)Blob.data() + sizeof(uint32_t),
687               (const unsigned char *)Blob.data()));
688       for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
689                                                      DEnd = Table->data_end();
690            D != DEnd; ++D) {
691         std::pair<StringRef, bool> Ident = *D;
692         if (Ident.second)
693           InterestingIdentifiers[Ident.first].push_back(ID);
694         else
695           (void)InterestingIdentifiers[Ident.first];
696       }
697     }
698 
699     // Get Signature.
700     if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
701       getModuleFileInfo(File).Signature = ASTFileSignature::create(
702           Record.begin(), Record.begin() + ASTFileSignature::size);
703 
704     // We don't care about this record.
705   }
706 
707   return llvm::Error::success();
708 }
709 
710 namespace {
711 
712 /// Trait used to generate the identifier index as an on-disk hash
713 /// table.
714 class IdentifierIndexWriterTrait {
715 public:
716   typedef StringRef key_type;
717   typedef StringRef key_type_ref;
718   typedef SmallVector<unsigned, 2> data_type;
719   typedef const SmallVector<unsigned, 2> &data_type_ref;
720   typedef unsigned hash_value_type;
721   typedef unsigned offset_type;
722 
723   static hash_value_type ComputeHash(key_type_ref Key) {
724     return llvm::djbHash(Key);
725   }
726 
727   std::pair<unsigned,unsigned>
728   EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
729     using namespace llvm::support;
730     endian::Writer LE(Out, little);
731     unsigned KeyLen = Key.size();
732     unsigned DataLen = Data.size() * 4;
733     LE.write<uint16_t>(KeyLen);
734     LE.write<uint16_t>(DataLen);
735     return std::make_pair(KeyLen, DataLen);
736   }
737 
738   void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
739     Out.write(Key.data(), KeyLen);
740   }
741 
742   void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
743                 unsigned DataLen) {
744     using namespace llvm::support;
745     for (unsigned I = 0, N = Data.size(); I != N; ++I)
746       endian::write<uint32_t>(Out, Data[I], little);
747   }
748 };
749 
750 }
751 
752 bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
753   for (auto MapEntry : ImportedModuleFiles) {
754     auto *File = MapEntry.first;
755     ImportedModuleFileInfo &Info = MapEntry.second;
756     if (getModuleFileInfo(File).Signature) {
757       if (getModuleFileInfo(File).Signature != Info.StoredSignature)
758         // Verify Signature.
759         return true;
760     } else if (Info.StoredSize != File->getSize() ||
761                Info.StoredModTime != File->getModificationTime())
762       // Verify Size and ModTime.
763       return true;
764   }
765 
766   using namespace llvm;
767   llvm::TimeTraceScope TimeScope("Module WriteIndex");
768 
769   // Emit the file header.
770   Stream.Emit((unsigned)'B', 8);
771   Stream.Emit((unsigned)'C', 8);
772   Stream.Emit((unsigned)'G', 8);
773   Stream.Emit((unsigned)'I', 8);
774 
775   // Write the block-info block, which describes the records in this bitcode
776   // file.
777   emitBlockInfoBlock(Stream);
778 
779   Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
780 
781   // Write the metadata.
782   SmallVector<uint64_t, 2> Record;
783   Record.push_back(CurrentVersion);
784   Stream.EmitRecord(INDEX_METADATA, Record);
785 
786   // Write the set of known module files.
787   for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
788                                 MEnd = ModuleFiles.end();
789        M != MEnd; ++M) {
790     Record.clear();
791     Record.push_back(M->second.ID);
792     Record.push_back(M->first->getSize());
793     Record.push_back(M->first->getModificationTime());
794 
795     // File name
796     StringRef Name(M->first->getName());
797     Record.push_back(Name.size());
798     Record.append(Name.begin(), Name.end());
799 
800     // Dependencies
801     Record.push_back(M->second.Dependencies.size());
802     Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
803     Stream.EmitRecord(MODULE, Record);
804   }
805 
806   // Write the identifier -> module file mapping.
807   {
808     llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
809     IdentifierIndexWriterTrait Trait;
810 
811     // Populate the hash table.
812     for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
813                                             IEnd = InterestingIdentifiers.end();
814          I != IEnd; ++I) {
815       Generator.insert(I->first(), I->second, Trait);
816     }
817 
818     // Create the on-disk hash table in a buffer.
819     SmallString<4096> IdentifierTable;
820     uint32_t BucketOffset;
821     {
822       using namespace llvm::support;
823       llvm::raw_svector_ostream Out(IdentifierTable);
824       // Make sure that no bucket is at offset 0
825       endian::write<uint32_t>(Out, 0, little);
826       BucketOffset = Generator.Emit(Out, Trait);
827     }
828 
829     // Create a blob abbreviation
830     auto Abbrev = std::make_shared<BitCodeAbbrev>();
831     Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
832     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
833     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
834     unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
835 
836     // Write the identifier table
837     uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
838     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
839   }
840 
841   Stream.ExitBlock();
842   return false;
843 }
844 
845 llvm::Error
846 GlobalModuleIndex::writeIndex(FileManager &FileMgr,
847                               const PCHContainerReader &PCHContainerRdr,
848                               StringRef Path) {
849   llvm::SmallString<128> IndexPath;
850   IndexPath += Path;
851   llvm::sys::path::append(IndexPath, IndexFileName);
852 
853   // Coordinate building the global index file with other processes that might
854   // try to do the same.
855   llvm::LockFileManager Locked(IndexPath);
856   switch (Locked) {
857   case llvm::LockFileManager::LFS_Error:
858     return llvm::createStringError(std::errc::io_error, "LFS error");
859 
860   case llvm::LockFileManager::LFS_Owned:
861     // We're responsible for building the index ourselves. Do so below.
862     break;
863 
864   case llvm::LockFileManager::LFS_Shared:
865     // Someone else is responsible for building the index. We don't care
866     // when they finish, so we're done.
867     return llvm::createStringError(std::errc::device_or_resource_busy,
868                                    "someone else is building the index");
869   }
870 
871   // The module index builder.
872   GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
873 
874   // Load each of the module files.
875   std::error_code EC;
876   for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
877        D != DEnd && !EC;
878        D.increment(EC)) {
879     // If this isn't a module file, we don't care.
880     if (llvm::sys::path::extension(D->path()) != ".pcm") {
881       // ... unless it's a .pcm.lock file, which indicates that someone is
882       // in the process of rebuilding a module. They'll rebuild the index
883       // at the end of that translation unit, so we don't have to.
884       if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
885         return llvm::createStringError(std::errc::device_or_resource_busy,
886                                        "someone else is building the index");
887 
888       continue;
889     }
890 
891     // If we can't find the module file, skip it.
892     auto ModuleFile = FileMgr.getFile(D->path());
893     if (!ModuleFile)
894       continue;
895 
896     // Load this module file.
897     if (llvm::Error Err = Builder.loadModuleFile(*ModuleFile))
898       return Err;
899   }
900 
901   // The output buffer, into which the global index will be written.
902   SmallString<16> OutputBuffer;
903   {
904     llvm::BitstreamWriter OutputStream(OutputBuffer);
905     if (Builder.writeIndex(OutputStream))
906       return llvm::createStringError(std::errc::io_error,
907                                      "failed writing index");
908   }
909 
910   return llvm::writeToOutput(IndexPath, [&OutputBuffer](llvm::raw_ostream &OS) {
911     OS << OutputBuffer;
912     return llvm::Error::success();
913   });
914 }
915 
916 namespace {
917   class GlobalIndexIdentifierIterator : public IdentifierIterator {
918     /// The current position within the identifier lookup table.
919     IdentifierIndexTable::key_iterator Current;
920 
921     /// The end position within the identifier lookup table.
922     IdentifierIndexTable::key_iterator End;
923 
924   public:
925     explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
926       Current = Idx.key_begin();
927       End = Idx.key_end();
928     }
929 
930     StringRef Next() override {
931       if (Current == End)
932         return StringRef();
933 
934       StringRef Result = *Current;
935       ++Current;
936       return Result;
937     }
938   };
939 }
940 
941 IdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const {
942   IdentifierIndexTable &Table =
943     *static_cast<IdentifierIndexTable *>(IdentifierIndex);
944   return new GlobalIndexIdentifierIterator(Table);
945 }
946