xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
10 
11 #include "clang/Basic/MakeSupport.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/Support/BLAKE3.h"
17 #include "llvm/Support/StringSaver.h"
18 #include <optional>
19 
20 using namespace clang;
21 using namespace tooling;
22 using namespace dependencies;
23 
24 const std::vector<std::string> &ModuleDeps::getBuildArguments() {
25   assert(!std::holds_alternative<std::monostate>(BuildInfo) &&
26          "Using uninitialized ModuleDeps");
27   if (const auto *CI = std::get_if<CowCompilerInvocation>(&BuildInfo))
28     BuildInfo = CI->getCC1CommandLine();
29   return std::get<std::vector<std::string>>(BuildInfo);
30 }
31 
32 static void
33 optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, ASTReader &Reader,
34                          const serialization::ModuleFile &MF,
35                          const PrebuiltModuleVFSMapT &PrebuiltModuleVFSMap,
36                          ScanningOptimizations OptimizeArgs) {
37   if (any(OptimizeArgs & ScanningOptimizations::HeaderSearch)) {
38     // Only preserve search paths that were used during the dependency scan.
39     std::vector<HeaderSearchOptions::Entry> Entries;
40     std::swap(Opts.UserEntries, Entries);
41 
42     llvm::BitVector SearchPathUsage(Entries.size());
43     llvm::DenseSet<const serialization::ModuleFile *> Visited;
44     std::function<void(const serialization::ModuleFile *)> VisitMF =
45         [&](const serialization::ModuleFile *MF) {
46           SearchPathUsage |= MF->SearchPathUsage;
47           Visited.insert(MF);
48           for (const serialization::ModuleFile *Import : MF->Imports)
49             if (!Visited.contains(Import))
50               VisitMF(Import);
51         };
52     VisitMF(&MF);
53 
54     if (SearchPathUsage.size() != Entries.size())
55       llvm::report_fatal_error(
56           "Inconsistent search path options between modules detected");
57 
58     for (auto Idx : SearchPathUsage.set_bits())
59       Opts.UserEntries.push_back(std::move(Entries[Idx]));
60   }
61   if (any(OptimizeArgs & ScanningOptimizations::VFS)) {
62     std::vector<std::string> VFSOverlayFiles;
63     std::swap(Opts.VFSOverlayFiles, VFSOverlayFiles);
64 
65     llvm::BitVector VFSUsage(VFSOverlayFiles.size());
66     llvm::DenseSet<const serialization::ModuleFile *> Visited;
67     std::function<void(const serialization::ModuleFile *)> VisitMF =
68         [&](const serialization::ModuleFile *MF) {
69           Visited.insert(MF);
70           if (MF->Kind == serialization::MK_ImplicitModule) {
71             VFSUsage |= MF->VFSUsage;
72             // We only need to recurse into implicit modules. Other module types
73             // will have the correct set of VFSs for anything they depend on.
74             for (const serialization::ModuleFile *Import : MF->Imports)
75               if (!Visited.contains(Import))
76                 VisitMF(Import);
77           } else {
78             // This is not an implicitly built module, so it may have different
79             // VFS options. Fall back to a string comparison instead.
80             auto VFSMap = PrebuiltModuleVFSMap.find(MF->FileName);
81             if (VFSMap == PrebuiltModuleVFSMap.end())
82               return;
83             for (std::size_t I = 0, E = VFSOverlayFiles.size(); I != E; ++I) {
84               if (VFSMap->second.contains(VFSOverlayFiles[I]))
85                 VFSUsage[I] = true;
86             }
87           }
88         };
89     VisitMF(&MF);
90 
91     if (VFSUsage.size() != VFSOverlayFiles.size())
92       llvm::report_fatal_error(
93           "Inconsistent -ivfsoverlay options between modules detected");
94 
95     for (auto Idx : VFSUsage.set_bits())
96       Opts.VFSOverlayFiles.push_back(std::move(VFSOverlayFiles[Idx]));
97   }
98 }
99 
100 static void optimizeDiagnosticOpts(DiagnosticOptions &Opts,
101                                    bool IsSystemModule) {
102   // If this is not a system module or -Wsystem-headers was passed, don't
103   // optimize.
104   if (!IsSystemModule)
105     return;
106   bool Wsystem_headers = false;
107   for (StringRef Opt : Opts.Warnings) {
108     bool isPositive = !Opt.consume_front("no-");
109     if (Opt == "system-headers")
110       Wsystem_headers = isPositive;
111   }
112   if (Wsystem_headers)
113     return;
114 
115   // Remove all warning flags. System modules suppress most, but not all,
116   // warnings.
117   Opts.Warnings.clear();
118   Opts.UndefPrefixes.clear();
119   Opts.Remarks.clear();
120 }
121 
122 static std::vector<std::string> splitString(std::string S, char Separator) {
123   SmallVector<StringRef> Segments;
124   StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
125   std::vector<std::string> Result;
126   Result.reserve(Segments.size());
127   for (StringRef Segment : Segments)
128     Result.push_back(Segment.str());
129   return Result;
130 }
131 
132 void ModuleDepCollector::addOutputPaths(CowCompilerInvocation &CI,
133                                         ModuleDeps &Deps) {
134   CI.getMutFrontendOpts().OutputFile =
135       Controller.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile);
136   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
137     CI.getMutDiagnosticOpts().DiagnosticSerializationFile =
138         Controller.lookupModuleOutput(
139             Deps.ID, ModuleOutputKind::DiagnosticSerializationFile);
140   if (!CI.getDependencyOutputOpts().OutputFile.empty()) {
141     CI.getMutDependencyOutputOpts().OutputFile = Controller.lookupModuleOutput(
142         Deps.ID, ModuleOutputKind::DependencyFile);
143     CI.getMutDependencyOutputOpts().Targets =
144         splitString(Controller.lookupModuleOutput(
145                         Deps.ID, ModuleOutputKind::DependencyTargets),
146                     '\0');
147     if (!CI.getDependencyOutputOpts().OutputFile.empty() &&
148         CI.getDependencyOutputOpts().Targets.empty()) {
149       // Fallback to -o as dependency target, as in the driver.
150       SmallString<128> Target;
151       quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target);
152       CI.getMutDependencyOutputOpts().Targets.push_back(std::string(Target));
153     }
154   }
155 }
156 
157 void dependencies::resetBenignCodeGenOptions(frontend::ActionKind ProgramAction,
158                                              const LangOptions &LangOpts,
159                                              CodeGenOptions &CGOpts) {
160   // TODO: Figure out better way to set options to their default value.
161   if (ProgramAction == frontend::GenerateModule) {
162     CGOpts.MainFileName.clear();
163     CGOpts.DwarfDebugFlags.clear();
164   }
165   if (ProgramAction == frontend::GeneratePCH ||
166       (ProgramAction == frontend::GenerateModule && !LangOpts.ModulesCodegen)) {
167     CGOpts.DebugCompilationDir.clear();
168     CGOpts.CoverageCompilationDir.clear();
169     CGOpts.CoverageDataFile.clear();
170     CGOpts.CoverageNotesFile.clear();
171     CGOpts.ProfileInstrumentUsePath.clear();
172     CGOpts.SampleProfileFile.clear();
173     CGOpts.ProfileRemappingFile.clear();
174   }
175 }
176 
177 static CowCompilerInvocation
178 makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
179   CI.resetNonModularOptions();
180   CI.clearImplicitModuleBuildOptions();
181 
182   // The scanner takes care to avoid passing non-affecting module maps to the
183   // explicit compiles. No need to do extra work just to find out there are no
184   // module map files to prune.
185   CI.getHeaderSearchOpts().ModulesPruneNonAffectingModuleMaps = false;
186 
187   // Remove options incompatible with explicit module build or are likely to
188   // differ between identical modules discovered from different translation
189   // units.
190   CI.getFrontendOpts().Inputs.clear();
191   CI.getFrontendOpts().OutputFile.clear();
192   // LLVM options are not going to affect the AST
193   CI.getFrontendOpts().LLVMArgs.clear();
194 
195   resetBenignCodeGenOptions(frontend::GenerateModule, CI.getLangOpts(),
196                             CI.getCodeGenOpts());
197 
198   // Map output paths that affect behaviour to "-" so their existence is in the
199   // context hash. The final path will be computed in addOutputPaths.
200   if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty())
201     CI.getDiagnosticOpts().DiagnosticSerializationFile = "-";
202   if (!CI.getDependencyOutputOpts().OutputFile.empty())
203     CI.getDependencyOutputOpts().OutputFile = "-";
204   CI.getDependencyOutputOpts().Targets.clear();
205 
206   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
207   CI.getFrontendOpts().ARCMTAction = FrontendOptions::ARCMT_None;
208   CI.getFrontendOpts().ObjCMTAction = FrontendOptions::ObjCMT_None;
209   CI.getFrontendOpts().MTMigrateDir.clear();
210   CI.getLangOpts().ModuleName.clear();
211 
212   // Remove any macro definitions that are explicitly ignored.
213   if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
214     llvm::erase_if(
215         CI.getPreprocessorOpts().Macros,
216         [&CI](const std::pair<std::string, bool> &Def) {
217           StringRef MacroDef = Def.first;
218           return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
219               llvm::CachedHashString(MacroDef.split('=').first));
220         });
221     // Remove the now unused option.
222     CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
223   }
224 
225   return CI;
226 }
227 
228 CowCompilerInvocation
229 ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs(
230     const ModuleDeps &Deps,
231     llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const {
232   CowCompilerInvocation CI = CommonInvocation;
233 
234   CI.getMutLangOpts().ModuleName = Deps.ID.ModuleName;
235   CI.getMutFrontendOpts().IsSystemModule = Deps.IsSystem;
236 
237   // Inputs
238   InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(),
239                                InputKind::Format::ModuleMap);
240   CI.getMutFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
241                                               ModuleMapInputKind);
242 
243   auto CurrentModuleMapEntry =
244       ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile);
245   assert(CurrentModuleMapEntry && "module map file entry not found");
246 
247   // Remove directly passed modulemap files. They will get added back if they
248   // were actually used.
249   CI.getMutFrontendOpts().ModuleMapFiles.clear();
250 
251   auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps);
252   for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) {
253     // TODO: Track these as `FileEntryRef` to simplify the equality check below.
254     auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile);
255     assert(ModuleMapEntry && "module map file entry not found");
256 
257     // Don't report module maps describing eagerly-loaded dependency. This
258     // information will be deserialized from the PCM.
259     // TODO: Verify this works fine when modulemap for module A is eagerly
260     // loaded from A.pcm, and module map passed on the command line contains
261     // definition of a submodule: "explicit module A.Private { ... }".
262     if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry))
263       continue;
264 
265     // Don't report module map file of the current module unless it also
266     // describes a dependency (for symmetry).
267     if (*ModuleMapEntry == *CurrentModuleMapEntry &&
268         !DepModuleMapFiles.contains(*ModuleMapEntry))
269       continue;
270 
271     CI.getMutFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
272   }
273 
274   // Report the prebuilt modules this module uses.
275   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
276     CI.getMutFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
277 
278   // Add module file inputs from dependencies.
279   addModuleFiles(CI, Deps.ClangModuleDeps);
280 
281   if (!CI.getDiagnosticOpts().SystemHeaderWarningsModules.empty()) {
282     // Apply -Wsystem-headers-in-module for the current module.
283     if (llvm::is_contained(CI.getDiagnosticOpts().SystemHeaderWarningsModules,
284                            Deps.ID.ModuleName))
285       CI.getMutDiagnosticOpts().Warnings.push_back("system-headers");
286     // Remove the now unused option(s).
287     CI.getMutDiagnosticOpts().SystemHeaderWarningsModules.clear();
288   }
289 
290   Optimize(CI);
291 
292   return CI;
293 }
294 
295 llvm::DenseSet<const FileEntry *> ModuleDepCollector::collectModuleMapFiles(
296     ArrayRef<ModuleID> ClangModuleDeps) const {
297   llvm::DenseSet<const FileEntry *> ModuleMapFiles;
298   for (const ModuleID &MID : ClangModuleDeps) {
299     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
300     assert(MD && "Inconsistent dependency info");
301     // TODO: Track ClangModuleMapFile as `FileEntryRef`.
302     auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile);
303     assert(FE && "Missing module map file that was previously found");
304     ModuleMapFiles.insert(*FE);
305   }
306   return ModuleMapFiles;
307 }
308 
309 void ModuleDepCollector::addModuleMapFiles(
310     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
311   if (EagerLoadModules)
312     return; // Only pcm is needed for eager load.
313 
314   for (const ModuleID &MID : ClangModuleDeps) {
315     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
316     assert(MD && "Inconsistent dependency info");
317     CI.getFrontendOpts().ModuleMapFiles.push_back(MD->ClangModuleMapFile);
318   }
319 }
320 
321 void ModuleDepCollector::addModuleFiles(
322     CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
323   for (const ModuleID &MID : ClangModuleDeps) {
324     std::string PCMPath =
325         Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
326     if (EagerLoadModules)
327       CI.getFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
328     else
329       CI.getHeaderSearchOpts().PrebuiltModuleFiles.insert(
330           {MID.ModuleName, std::move(PCMPath)});
331   }
332 }
333 
334 void ModuleDepCollector::addModuleFiles(
335     CowCompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
336   for (const ModuleID &MID : ClangModuleDeps) {
337     std::string PCMPath =
338         Controller.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
339     if (EagerLoadModules)
340       CI.getMutFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
341     else
342       CI.getMutHeaderSearchOpts().PrebuiltModuleFiles.insert(
343           {MID.ModuleName, std::move(PCMPath)});
344   }
345 }
346 
347 static bool needsModules(FrontendInputFile FIF) {
348   switch (FIF.getKind().getLanguage()) {
349   case Language::Unknown:
350   case Language::Asm:
351   case Language::LLVM_IR:
352     return false;
353   default:
354     return true;
355   }
356 }
357 
358 void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
359   CI.clearImplicitModuleBuildOptions();
360   resetBenignCodeGenOptions(CI.getFrontendOpts().ProgramAction,
361                             CI.getLangOpts(), CI.getCodeGenOpts());
362 
363   if (llvm::any_of(CI.getFrontendOpts().Inputs, needsModules)) {
364     Preprocessor &PP = ScanInstance.getPreprocessor();
365     if (Module *CurrentModule = PP.getCurrentModuleImplementation())
366       if (OptionalFileEntryRef CurrentModuleMap =
367               PP.getHeaderSearchInfo()
368                   .getModuleMap()
369                   .getModuleMapFileForUniquing(CurrentModule))
370         CI.getFrontendOpts().ModuleMapFiles.emplace_back(
371             CurrentModuleMap->getNameAsRequested());
372 
373     SmallVector<ModuleID> DirectDeps;
374     for (const auto &KV : ModularDeps)
375       if (DirectModularDeps.contains(KV.first))
376         DirectDeps.push_back(KV.second->ID);
377 
378     // TODO: Report module maps the same way it's done for modular dependencies.
379     addModuleMapFiles(CI, DirectDeps);
380 
381     addModuleFiles(CI, DirectDeps);
382 
383     for (const auto &KV : DirectPrebuiltModularDeps)
384       CI.getFrontendOpts().ModuleFiles.push_back(KV.second.PCMFile);
385   }
386 }
387 
388 static std::string getModuleContextHash(const ModuleDeps &MD,
389                                         const CowCompilerInvocation &CI,
390                                         bool EagerLoadModules,
391                                         llvm::vfs::FileSystem &VFS) {
392   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
393       HashBuilder;
394   SmallString<32> Scratch;
395 
396   // Hash the compiler version and serialization version to ensure the module
397   // will be readable.
398   HashBuilder.add(getClangFullRepositoryVersion());
399   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
400   llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
401   if (CWD)
402     HashBuilder.add(*CWD);
403 
404   // Hash the BuildInvocation without any input files.
405   SmallString<0> ArgVec;
406   ArgVec.reserve(4096);
407   CI.generateCC1CommandLine([&](const Twine &Arg) {
408     Arg.toVector(ArgVec);
409     ArgVec.push_back('\0');
410   });
411   HashBuilder.add(ArgVec);
412 
413   // Hash the module dependencies. These paths may differ even if the invocation
414   // is identical if they depend on the contents of the files in the TU -- for
415   // example, case-insensitive paths to modulemap files. Usually such a case
416   // would indicate a missed optimization to canonicalize, but it may be
417   // difficult to canonicalize all cases when there is a VFS.
418   for (const auto &ID : MD.ClangModuleDeps) {
419     HashBuilder.add(ID.ModuleName);
420     HashBuilder.add(ID.ContextHash);
421   }
422 
423   HashBuilder.add(EagerLoadModules);
424 
425   llvm::BLAKE3Result<16> Hash = HashBuilder.final();
426   std::array<uint64_t, 2> Words;
427   static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
428   std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
429   return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
430 }
431 
432 void ModuleDepCollector::associateWithContextHash(
433     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
434   Deps.ID.ContextHash = getModuleContextHash(
435       Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
436   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
437   (void)Inserted;
438   assert(Inserted && "duplicate module mapping");
439 }
440 
441 void ModuleDepCollectorPP::LexedFileChanged(FileID FID,
442                                             LexedFileChangeReason Reason,
443                                             SrcMgr::CharacteristicKind FileType,
444                                             FileID PrevFID,
445                                             SourceLocation Loc) {
446   if (Reason != LexedFileChangeReason::EnterFile)
447     return;
448 
449   // This has to be delayed as the context hash can change at the start of
450   // `CompilerInstance::ExecuteAction`.
451   if (MDC.ContextHash.empty()) {
452     MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
453     MDC.Consumer.handleContextHash(MDC.ContextHash);
454   }
455 
456   SourceManager &SM = MDC.ScanInstance.getSourceManager();
457 
458   // Dependency generation really does want to go all the way to the
459   // file entry for a source location to find out what is depended on.
460   // We do not want #line markers to affect dependency generation!
461   if (std::optional<StringRef> Filename = SM.getNonBuiltinFilenameForID(FID))
462     MDC.addFileDep(llvm::sys::path::remove_leading_dotslash(*Filename));
463 }
464 
465 void ModuleDepCollectorPP::InclusionDirective(
466     SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
467     bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
468     StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule,
469     bool ModuleImported, SrcMgr::CharacteristicKind FileType) {
470   if (!File && !ModuleImported) {
471     // This is a non-modular include that HeaderSearch failed to find. Add it
472     // here as `FileChanged` will never see it.
473     MDC.addFileDep(FileName);
474   }
475   handleImport(SuggestedModule);
476 }
477 
478 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
479                                         ModuleIdPath Path,
480                                         const Module *Imported) {
481   if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) {
482     P1689ModuleInfo RequiredModule;
483     RequiredModule.ModuleName = Path[0].first->getName().str();
484     RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
485     MDC.RequiredStdCXXModules.push_back(RequiredModule);
486     return;
487   }
488 
489   handleImport(Imported);
490 }
491 
492 void ModuleDepCollectorPP::handleImport(const Module *Imported) {
493   if (!Imported)
494     return;
495 
496   const Module *TopLevelModule = Imported->getTopLevelModule();
497 
498   if (MDC.isPrebuiltModule(TopLevelModule))
499     MDC.DirectPrebuiltModularDeps.insert(
500         {TopLevelModule, PrebuiltModuleDep{TopLevelModule}});
501   else
502     MDC.DirectModularDeps.insert(TopLevelModule);
503 }
504 
505 void ModuleDepCollectorPP::EndOfMainFile() {
506   FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
507   MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
508                                  .getFileEntryRefForID(MainFileID)
509                                  ->getName());
510 
511   auto &PP = MDC.ScanInstance.getPreprocessor();
512   if (PP.isInNamedModule()) {
513     P1689ModuleInfo ProvidedModule;
514     ProvidedModule.ModuleName = PP.getNamedModuleName();
515     ProvidedModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule;
516     ProvidedModule.IsStdCXXModuleInterface = PP.isInNamedInterfaceUnit();
517     // Don't put implementation (non partition) unit as Provide.
518     // Put the module as required instead. Since the implementation
519     // unit will import the primary module implicitly.
520     if (PP.isInImplementationUnit())
521       MDC.RequiredStdCXXModules.push_back(ProvidedModule);
522     else
523       MDC.ProvidedStdCXXModule = ProvidedModule;
524   }
525 
526   if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
527     MDC.addFileDep(MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
528 
529   for (const Module *M :
530        MDC.ScanInstance.getPreprocessor().getAffectingClangModules())
531     if (!MDC.isPrebuiltModule(M))
532       MDC.DirectModularDeps.insert(M);
533 
534   for (const Module *M : MDC.DirectModularDeps)
535     handleTopLevelModule(M);
536 
537   MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
538 
539   if (MDC.IsStdModuleP1689Format)
540     MDC.Consumer.handleProvidedAndRequiredStdCXXModules(
541         MDC.ProvidedStdCXXModule, MDC.RequiredStdCXXModules);
542 
543   for (auto &&I : MDC.ModularDeps)
544     MDC.Consumer.handleModuleDependency(*I.second);
545 
546   for (const Module *M : MDC.DirectModularDeps) {
547     auto It = MDC.ModularDeps.find(M);
548     // Only report direct dependencies that were successfully handled.
549     if (It != MDC.ModularDeps.end())
550       MDC.Consumer.handleDirectModuleDependency(MDC.ModularDeps[M]->ID);
551   }
552 
553   for (auto &&I : MDC.FileDeps)
554     MDC.Consumer.handleFileDependency(I);
555 
556   for (auto &&I : MDC.DirectPrebuiltModularDeps)
557     MDC.Consumer.handlePrebuiltModuleDependency(I.second);
558 }
559 
560 std::optional<ModuleID>
561 ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
562   assert(M == M->getTopLevelModule() && "Expected top level module!");
563 
564   // A top-level module might not be actually imported as a module when
565   // -fmodule-name is used to compile a translation unit that imports this
566   // module. In that case it can be skipped. The appropriate header
567   // dependencies will still be reported as expected.
568   if (!M->getASTFile())
569     return {};
570 
571   // If this module has been handled already, just return its ID.
572   auto ModI = MDC.ModularDeps.insert({M, nullptr});
573   if (!ModI.second)
574     return ModI.first->second->ID;
575 
576   ModI.first->second = std::make_unique<ModuleDeps>();
577   ModuleDeps &MD = *ModI.first->second;
578 
579   MD.ID.ModuleName = M->getFullModuleName();
580   MD.IsSystem = M->IsSystem;
581   // For modules which use export_as link name, the linked product that of the
582   // corresponding export_as-named module.
583   if (!M->UseExportAsModuleLinkName)
584     MD.LinkLibraries = M->LinkLibraries;
585 
586   ModuleMap &ModMapInfo =
587       MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
588 
589   OptionalFileEntryRef ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M);
590 
591   if (ModuleMap) {
592     SmallString<128> Path = ModuleMap->getNameAsRequested();
593     ModMapInfo.canonicalizeModuleMapPath(Path);
594     MD.ClangModuleMapFile = std::string(Path);
595   }
596 
597   serialization::ModuleFile *MF =
598       MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
599           *M->getASTFile());
600   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
601       *MF, /*IncludeSystem=*/true,
602       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
603         // __inferred_module.map is the result of the way in which an implicit
604         // module build handles inferred modules. It adds an overlay VFS with
605         // this file in the proper directory and relies on the rest of Clang to
606         // handle it like normal. With explicitly built modules we don't need
607         // to play VFS tricks, so replace it with the correct module map.
608         if (StringRef(IFI.Filename).ends_with("__inferred_module.map")) {
609           MDC.addFileDep(MD, ModuleMap->getName());
610           return;
611         }
612         MDC.addFileDep(MD, IFI.Filename);
613       });
614 
615   llvm::DenseSet<const Module *> SeenDeps;
616   addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
617   addAllSubmoduleDeps(M, MD, SeenDeps);
618   addAllAffectingClangModules(M, MD, SeenDeps);
619 
620   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
621       *MF, /*IncludeSystem=*/true,
622       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
623         if (!(IFI.TopLevel && IFI.ModuleMap))
624           return;
625         if (StringRef(IFI.FilenameAsRequested)
626                 .ends_with("__inferred_module.map"))
627           return;
628         MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
629       });
630 
631   CowCompilerInvocation CI =
632       MDC.getInvocationAdjustedForModuleBuildWithoutOutputs(
633           MD, [&](CowCompilerInvocation &BuildInvocation) {
634             if (any(MDC.OptimizeArgs & (ScanningOptimizations::HeaderSearch |
635                                         ScanningOptimizations::VFS)))
636               optimizeHeaderSearchOpts(BuildInvocation.getMutHeaderSearchOpts(),
637                                        *MDC.ScanInstance.getASTReader(), *MF,
638                                        MDC.PrebuiltModuleVFSMap,
639                                        MDC.OptimizeArgs);
640             if (any(MDC.OptimizeArgs & ScanningOptimizations::SystemWarnings))
641               optimizeDiagnosticOpts(
642                   BuildInvocation.getMutDiagnosticOpts(),
643                   BuildInvocation.getFrontendOpts().IsSystemModule);
644           });
645 
646   MDC.associateWithContextHash(CI, MD);
647 
648   // Finish the compiler invocation. Requires dependencies and the context hash.
649   MDC.addOutputPaths(CI, MD);
650 
651   MD.BuildInfo = std::move(CI);
652 
653   return MD.ID;
654 }
655 
656 static void forEachSubmoduleSorted(const Module *M,
657                                    llvm::function_ref<void(const Module *)> F) {
658   // Submodule order depends on order of header includes for inferred submodules
659   // we don't care about the exact order, so sort so that it's consistent across
660   // TUs to improve sharing.
661   SmallVector<const Module *> Submodules(M->submodules());
662   llvm::stable_sort(Submodules, [](const Module *A, const Module *B) {
663     return A->Name < B->Name;
664   });
665   for (const Module *SubM : Submodules)
666     F(SubM);
667 }
668 
669 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
670     const Module *M, ModuleDeps &MD,
671     llvm::DenseSet<const Module *> &SeenSubmodules) {
672   addModulePrebuiltDeps(M, MD, SeenSubmodules);
673 
674   forEachSubmoduleSorted(M, [&](const Module *SubM) {
675     addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
676   });
677 }
678 
679 void ModuleDepCollectorPP::addModulePrebuiltDeps(
680     const Module *M, ModuleDeps &MD,
681     llvm::DenseSet<const Module *> &SeenSubmodules) {
682   for (const Module *Import : M->Imports)
683     if (Import->getTopLevelModule() != M->getTopLevelModule())
684       if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
685         if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
686           MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
687 }
688 
689 void ModuleDepCollectorPP::addAllSubmoduleDeps(
690     const Module *M, ModuleDeps &MD,
691     llvm::DenseSet<const Module *> &AddedModules) {
692   addModuleDep(M, MD, AddedModules);
693 
694   forEachSubmoduleSorted(M, [&](const Module *SubM) {
695     addAllSubmoduleDeps(SubM, MD, AddedModules);
696   });
697 }
698 
699 void ModuleDepCollectorPP::addModuleDep(
700     const Module *M, ModuleDeps &MD,
701     llvm::DenseSet<const Module *> &AddedModules) {
702   for (const Module *Import : M->Imports) {
703     if (Import->getTopLevelModule() != M->getTopLevelModule() &&
704         !MDC.isPrebuiltModule(Import)) {
705       if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule()))
706         if (AddedModules.insert(Import->getTopLevelModule()).second)
707           MD.ClangModuleDeps.push_back(*ImportID);
708     }
709   }
710 }
711 
712 void ModuleDepCollectorPP::addAllAffectingClangModules(
713     const Module *M, ModuleDeps &MD,
714     llvm::DenseSet<const Module *> &AddedModules) {
715   addAffectingClangModule(M, MD, AddedModules);
716 
717   for (const Module *SubM : M->submodules())
718     addAllAffectingClangModules(SubM, MD, AddedModules);
719 }
720 
721 void ModuleDepCollectorPP::addAffectingClangModule(
722     const Module *M, ModuleDeps &MD,
723     llvm::DenseSet<const Module *> &AddedModules) {
724   for (const Module *Affecting : M->AffectingClangModules) {
725     assert(Affecting == Affecting->getTopLevelModule() &&
726            "Not quite import not top-level module");
727     if (Affecting != M->getTopLevelModule() &&
728         !MDC.isPrebuiltModule(Affecting)) {
729       if (auto ImportID = handleTopLevelModule(Affecting))
730         if (AddedModules.insert(Affecting).second)
731           MD.ClangModuleDeps.push_back(*ImportID);
732     }
733   }
734 }
735 
736 ModuleDepCollector::ModuleDepCollector(
737     std::unique_ptr<DependencyOutputOptions> Opts,
738     CompilerInstance &ScanInstance, DependencyConsumer &C,
739     DependencyActionController &Controller, CompilerInvocation OriginalCI,
740     PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
741     ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
742     bool IsStdModuleP1689Format)
743     : ScanInstance(ScanInstance), Consumer(C), Controller(Controller),
744       PrebuiltModuleVFSMap(std::move(PrebuiltModuleVFSMap)),
745       Opts(std::move(Opts)),
746       CommonInvocation(
747           makeCommonInvocationForModuleBuild(std::move(OriginalCI))),
748       OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules),
749       IsStdModuleP1689Format(IsStdModuleP1689Format) {}
750 
751 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
752   PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
753 }
754 
755 void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
756 
757 bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
758   std::string Name(M->getTopLevelModuleName());
759   const auto &PrebuiltModuleFiles =
760       ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
761   auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
762   if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
763     return false;
764   assert("Prebuilt module came from the expected AST file" &&
765          PrebuiltModuleFileIt->second == M->getASTFile()->getName());
766   return true;
767 }
768 
769 static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path,
770                                           SmallVectorImpl<char> &Storage) {
771   if (llvm::sys::path::is_absolute(Path) &&
772       !llvm::sys::path::is_style_windows(llvm::sys::path::Style::native))
773     return Path;
774   Storage.assign(Path.begin(), Path.end());
775   CI.getFileManager().makeAbsolutePath(Storage);
776   llvm::sys::path::make_preferred(Storage);
777   return StringRef(Storage.data(), Storage.size());
778 }
779 
780 void ModuleDepCollector::addFileDep(StringRef Path) {
781   if (IsStdModuleP1689Format) {
782     // Within P1689 format, we don't want all the paths to be absolute path
783     // since it may violate the tranditional make style dependencies info.
784     FileDeps.push_back(std::string(Path));
785     return;
786   }
787 
788   llvm::SmallString<256> Storage;
789   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
790   FileDeps.push_back(std::string(Path));
791 }
792 
793 void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) {
794   if (IsStdModuleP1689Format) {
795     MD.FileDeps.insert(Path);
796     return;
797   }
798 
799   llvm::SmallString<256> Storage;
800   Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
801   MD.FileDeps.insert(Path);
802 }
803