//===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" #include "clang/Basic/MakeSupport.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "llvm/Support/StringSaver.h" using namespace clang; using namespace tooling; using namespace dependencies; static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, ASTReader &Reader, const serialization::ModuleFile &MF) { // Only preserve search paths that were used during the dependency scan. std::vector Entries = Opts.UserEntries; Opts.UserEntries.clear(); llvm::BitVector SearchPathUsage(Entries.size()); llvm::DenseSet Visited; std::function VisitMF = [&](const serialization::ModuleFile *MF) { SearchPathUsage |= MF->SearchPathUsage; Visited.insert(MF); for (const serialization::ModuleFile *Import : MF->Imports) if (!Visited.contains(Import)) VisitMF(Import); }; VisitMF(&MF); for (auto Idx : SearchPathUsage.set_bits()) Opts.UserEntries.push_back(Entries[Idx]); } CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( const ModuleDeps &Deps, llvm::function_ref Optimize) const { // Make a deep copy of the original Clang invocation. CompilerInvocation CI(OriginalInvocation); CI.getLangOpts()->resetNonModularOptions(); CI.getPreprocessorOpts().resetNonModularOptions(); // Remove options incompatible with explicit module build or are likely to // differ between identical modules discovered from different translation // units. CI.getFrontendOpts().Inputs.clear(); CI.getFrontendOpts().OutputFile.clear(); CI.getCodeGenOpts().MainFileName.clear(); CI.getCodeGenOpts().DwarfDebugFlags.clear(); CI.getDiagnosticOpts().DiagnosticSerializationFile.clear(); CI.getDependencyOutputOpts().OutputFile.clear(); CI.getDependencyOutputOpts().Targets.clear(); CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; CI.getLangOpts()->ModuleName = Deps.ID.ModuleName; CI.getFrontendOpts().IsSystemModule = Deps.IsSystem; // Disable implicit modules and canonicalize options that are only used by // implicit modules. CI.getLangOpts()->ImplicitModules = false; CI.getHeaderSearchOpts().ImplicitModuleMaps = false; CI.getHeaderSearchOpts().ModuleCachePath.clear(); CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false; CI.getHeaderSearchOpts().BuildSessionTimestamp = 0; // The specific values we canonicalize to for pruning don't affect behaviour, /// so use the default values so they will be dropped from the command-line. CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60; CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60; // Report the prebuilt modules this module uses. for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; Optimize(CI); // The original invocation probably didn't have strict context hash enabled. // We will use the context hash of this invocation to distinguish between // multiple incompatible versions of the same module and will use it when // reporting dependencies to the clients. Let's make sure we're using // **strict** context hash in order to prevent accidental sharing of // incompatible modules (e.g. with differences in search paths). CI.getHeaderSearchOpts().ModulesStrictContextHash = true; return CI; } static std::vector serializeCompilerInvocation(const CompilerInvocation &CI) { // Set up string allocator. llvm::BumpPtrAllocator Alloc; llvm::StringSaver Strings(Alloc); auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); }; // Synthesize full command line from the CompilerInvocation, including "-cc1". SmallVector Args{"-cc1"}; CI.generateCC1CommandLine(Args, SA); // Convert arguments to the return type. return std::vector{Args.begin(), Args.end()}; } static std::vector splitString(std::string S, char Separator) { SmallVector Segments; StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); std::vector Result; Result.reserve(Segments.size()); for (StringRef Segment : Segments) Result.push_back(Segment.str()); return Result; } std::vector ModuleDeps::getCanonicalCommandLine( llvm::function_ref LookupModuleOutput) const { CompilerInvocation CI(BuildInvocation); FrontendOptions &FrontendOpts = CI.getFrontendOpts(); InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), InputKind::Format::ModuleMap); FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); FrontendOpts.OutputFile = LookupModuleOutput(ID, ModuleOutputKind::ModuleFile); if (HadSerializedDiagnostics) CI.getDiagnosticOpts().DiagnosticSerializationFile = LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile); if (HadDependencyFile) { DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts(); DepOpts.OutputFile = LookupModuleOutput(ID, ModuleOutputKind::DependencyFile); DepOpts.Targets = splitString( LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0'); if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) { // Fallback to -o as dependency target, as in the driver. SmallString<128> Target; quoteMakeTarget(FrontendOpts.OutputFile, Target); DepOpts.Targets.push_back(std::string(Target)); } } for (ModuleID MID : ClangModuleDeps) FrontendOpts.ModuleFiles.push_back( LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); return serializeCompilerInvocation(CI); } std::vector ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const { return serializeCompilerInvocation(BuildInvocation); } void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) { if (Reason != PPCallbacks::EnterFile) return; // This has to be delayed as the context hash can change at the start of // `CompilerInstance::ExecuteAction`. if (MDC.ContextHash.empty()) { MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash(); MDC.Consumer.handleContextHash(MDC.ContextHash); } SourceManager &SM = MDC.ScanInstance.getSourceManager(); // Dependency generation really does want to go all the way to the // file entry for a source location to find out what is depended on. // We do not want #line markers to affect dependency generation! if (Optional Filename = SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc)))) MDC.FileDeps.push_back( std::string(llvm::sys::path::remove_leading_dotslash(*Filename))); } void ModuleDepCollectorPP::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, Optional File, StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { if (!File && !Imported) { // This is a non-modular include that HeaderSearch failed to find. Add it // here as `FileChanged` will never see it. MDC.FileDeps.push_back(std::string(FileName)); } handleImport(Imported); } void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) { handleImport(Imported); } void ModuleDepCollectorPP::handleImport(const Module *Imported) { if (!Imported) return; const Module *TopLevelModule = Imported->getTopLevelModule(); if (MDC.isPrebuiltModule(TopLevelModule)) DirectPrebuiltModularDeps.insert(TopLevelModule); else DirectModularDeps.insert(TopLevelModule); } void ModuleDepCollectorPP::EndOfMainFile() { FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID(); MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager() .getFileEntryForID(MainFileID) ->getName()); if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty()) MDC.FileDeps.push_back( MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude); for (const Module *M : DirectModularDeps) { // A top-level module might not be actually imported as a module when // -fmodule-name is used to compile a translation unit that imports this // module. In that case it can be skipped. The appropriate header // dependencies will still be reported as expected. if (!M->getASTFile()) continue; handleTopLevelModule(M); } MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts); for (auto &&I : MDC.ModularDeps) MDC.Consumer.handleModuleDependency(*I.second); for (auto &&I : MDC.FileDeps) MDC.Consumer.handleFileDependency(I); for (auto &&I : DirectPrebuiltModularDeps) MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I}); } ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { assert(M == M->getTopLevelModule() && "Expected top level module!"); // If this module has been handled already, just return its ID. auto ModI = MDC.ModularDeps.insert({M, nullptr}); if (!ModI.second) return ModI.first->second->ID; ModI.first->second = std::make_unique(); ModuleDeps &MD = *ModI.first->second; MD.ID.ModuleName = M->getFullModuleName(); MD.ImportedByMainFile = DirectModularDeps.contains(M); MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); MD.IsSystem = M->IsSystem; const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor() .getHeaderSearchInfo() .getModuleMap() .getModuleMapFileForUniquing(M); if (ModuleMap) { StringRef Path = ModuleMap->tryGetRealPathName(); if (Path.empty()) Path = ModuleMap->getName(); MD.ClangModuleMapFile = std::string(Path); } serialization::ModuleFile *MF = MDC.ScanInstance.getASTReader()->getModuleManager().lookup( M->getASTFile()); MDC.ScanInstance.getASTReader()->visitInputFiles( *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { // __inferred_module.map is the result of the way in which an implicit // module build handles inferred modules. It adds an overlay VFS with // this file in the proper directory and relies on the rest of Clang to // handle it like normal. With explicitly built modules we don't need // to play VFS tricks, so replace it with the correct module map. if (IF.getFile()->getName().endswith("__inferred_module.map")) { MD.FileDeps.insert(ModuleMap->getName()); return; } MD.FileDeps.insert(IF.getFile()->getName()); }); // We usually don't need to list the module map files of our dependencies when // building a module explicitly: their semantics will be deserialized from PCM // files. // // However, some module maps loaded implicitly during the dependency scan can // describe anti-dependencies. That happens when this module, let's call it // M1, is marked as '[no_undeclared_includes]' and tries to access a header // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;' // declaration. The explicit build needs the module map for M2 so that it // knows that textually including "M2/M2.h" is not allowed. // E.g., '__has_include("M2/M2.h")' should return false, but without M2's // module map the explicit build would return true. // // An alternative approach would be to tell the explicit build what its // textual dependencies are, instead of having it re-discover its // anti-dependencies. For example, we could create and use an `-ivfs-overlay` // with `fall-through: false` that explicitly listed the dependencies. // However, that's more complicated to implement and harder to reason about. if (M->NoUndeclaredIncludes) { // We don't have a good way to determine which module map described the // anti-dependency (let alone what's the corresponding top-level module // map). We simply specify all the module maps in the order they were loaded // during the implicit build during scan. // TODO: Resolve this by serializing and only using Module::UndeclaredUses. MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( *MF, [&](const FileEntry *FE) { if (FE->getName().endswith("__inferred_module.map")) return; // The top-level modulemap of this module will be the input file. We // don't need to specify it as a module map. if (FE == ModuleMap) return; MD.ModuleMapFileDeps.push_back(FE->getName().str()); }); } // Add direct prebuilt module dependencies now, so that we can use them when // creating a CompilerInvocation and computing context hash for this // ModuleDeps instance. llvm::DenseSet SeenModules; addAllSubmodulePrebuiltDeps(M, MD, SeenModules); MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( MD, [&](CompilerInvocation &BuildInvocation) { if (MDC.OptimizeArgs) optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), *MDC.ScanInstance.getASTReader(), *MF); }); MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts() .DiagnosticSerializationFile.empty(); MD.HadDependencyFile = !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in // the context hash since it can affect the command-line. MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); llvm::DenseSet AddedModules; addAllSubmoduleDeps(M, MD, AddedModules); return MD.ID; } static void forEachSubmoduleSorted(const Module *M, llvm::function_ref F) { // Submodule order depends on order of header includes for inferred submodules // we don't care about the exact order, so sort so that it's consistent across // TUs to improve sharing. SmallVector Submodules(M->submodule_begin(), M->submodule_end()); llvm::stable_sort(Submodules, [](const Module *A, const Module *B) { return A->Name < B->Name; }); for (const Module *SubM : Submodules) F(SubM); } void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( const Module *M, ModuleDeps &MD, llvm::DenseSet &SeenSubmodules) { addModulePrebuiltDeps(M, MD, SeenSubmodules); forEachSubmoduleSorted(M, [&](const Module *SubM) { addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules); }); } void ModuleDepCollectorPP::addModulePrebuiltDeps( const Module *M, ModuleDeps &MD, llvm::DenseSet &SeenSubmodules) { for (const Module *Import : M->Imports) if (Import->getTopLevelModule() != M->getTopLevelModule()) if (MDC.isPrebuiltModule(Import->getTopLevelModule())) if (SeenSubmodules.insert(Import->getTopLevelModule()).second) MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule()); } void ModuleDepCollectorPP::addAllSubmoduleDeps( const Module *M, ModuleDeps &MD, llvm::DenseSet &AddedModules) { addModuleDep(M, MD, AddedModules); forEachSubmoduleSorted(M, [&](const Module *SubM) { addAllSubmoduleDeps(SubM, MD, AddedModules); }); } void ModuleDepCollectorPP::addModuleDep( const Module *M, ModuleDeps &MD, llvm::DenseSet &AddedModules) { for (const Module *Import : M->Imports) { if (Import->getTopLevelModule() != M->getTopLevelModule() && !MDC.isPrebuiltModule(Import)) { ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule()); if (AddedModules.insert(Import->getTopLevelModule()).second) MD.ClangModuleDeps.push_back(ImportID); } } } ModuleDepCollector::ModuleDepCollector( std::unique_ptr Opts, CompilerInstance &ScanInstance, DependencyConsumer &C, CompilerInvocation &&OriginalCI, bool OptimizeArgs) : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)), OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {} void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { PP.addPPCallbacks(std::make_unique(*this)); } void ModuleDepCollector::attachToASTReader(ASTReader &R) {} bool ModuleDepCollector::isPrebuiltModule(const Module *M) { std::string Name(M->getTopLevelModuleName()); const auto &PrebuiltModuleFiles = ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) return false; assert("Prebuilt module came from the expected AST file" && PrebuiltModuleFileIt->second == M->getASTFile()->getName()); return true; }