1 //===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h" 10 11 #include "clang/Basic/MakeSupport.h" 12 #include "clang/Frontend/CompilerInstance.h" 13 #include "clang/Lex/Preprocessor.h" 14 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" 15 #include "llvm/Support/StringSaver.h" 16 17 using namespace clang; 18 using namespace tooling; 19 using namespace dependencies; 20 21 static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, 22 ASTReader &Reader, 23 const serialization::ModuleFile &MF) { 24 // Only preserve search paths that were used during the dependency scan. 25 std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; 26 Opts.UserEntries.clear(); 27 28 llvm::BitVector SearchPathUsage(Entries.size()); 29 llvm::DenseSet<const serialization::ModuleFile *> Visited; 30 std::function<void(const serialization::ModuleFile *)> VisitMF = 31 [&](const serialization::ModuleFile *MF) { 32 SearchPathUsage |= MF->SearchPathUsage; 33 Visited.insert(MF); 34 for (const serialization::ModuleFile *Import : MF->Imports) 35 if (!Visited.contains(Import)) 36 VisitMF(Import); 37 }; 38 VisitMF(&MF); 39 40 for (auto Idx : SearchPathUsage.set_bits()) 41 Opts.UserEntries.push_back(Entries[Idx]); 42 } 43 44 CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( 45 const ModuleDeps &Deps, 46 llvm::function_ref<void(CompilerInvocation &)> Optimize) const { 47 // Make a deep copy of the original Clang invocation. 48 CompilerInvocation CI(OriginalInvocation); 49 50 CI.getLangOpts()->resetNonModularOptions(); 51 CI.getPreprocessorOpts().resetNonModularOptions(); 52 53 // Remove options incompatible with explicit module build or are likely to 54 // differ between identical modules discovered from different translation 55 // units. 56 CI.getFrontendOpts().Inputs.clear(); 57 CI.getFrontendOpts().OutputFile.clear(); 58 CI.getCodeGenOpts().MainFileName.clear(); 59 CI.getCodeGenOpts().DwarfDebugFlags.clear(); 60 CI.getDiagnosticOpts().DiagnosticSerializationFile.clear(); 61 CI.getDependencyOutputOpts().OutputFile.clear(); 62 CI.getDependencyOutputOpts().Targets.clear(); 63 64 CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; 65 CI.getLangOpts()->ModuleName = Deps.ID.ModuleName; 66 CI.getFrontendOpts().IsSystemModule = Deps.IsSystem; 67 68 // Disable implicit modules and canonicalize options that are only used by 69 // implicit modules. 70 CI.getLangOpts()->ImplicitModules = false; 71 CI.getHeaderSearchOpts().ImplicitModuleMaps = false; 72 CI.getHeaderSearchOpts().ModuleCachePath.clear(); 73 CI.getHeaderSearchOpts().ModulesValidateOncePerBuildSession = false; 74 CI.getHeaderSearchOpts().BuildSessionTimestamp = 0; 75 // The specific values we canonicalize to for pruning don't affect behaviour, 76 /// so use the default values so they will be dropped from the command-line. 77 CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60; 78 CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60; 79 80 // Report the prebuilt modules this module uses. 81 for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) 82 CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); 83 84 CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; 85 86 Optimize(CI); 87 88 // The original invocation probably didn't have strict context hash enabled. 89 // We will use the context hash of this invocation to distinguish between 90 // multiple incompatible versions of the same module and will use it when 91 // reporting dependencies to the clients. Let's make sure we're using 92 // **strict** context hash in order to prevent accidental sharing of 93 // incompatible modules (e.g. with differences in search paths). 94 CI.getHeaderSearchOpts().ModulesStrictContextHash = true; 95 96 return CI; 97 } 98 99 static std::vector<std::string> 100 serializeCompilerInvocation(const CompilerInvocation &CI) { 101 // Set up string allocator. 102 llvm::BumpPtrAllocator Alloc; 103 llvm::StringSaver Strings(Alloc); 104 auto SA = [&Strings](const Twine &Arg) { return Strings.save(Arg).data(); }; 105 106 // Synthesize full command line from the CompilerInvocation, including "-cc1". 107 SmallVector<const char *, 32> Args{"-cc1"}; 108 CI.generateCC1CommandLine(Args, SA); 109 110 // Convert arguments to the return type. 111 return std::vector<std::string>{Args.begin(), Args.end()}; 112 } 113 114 static std::vector<std::string> splitString(std::string S, char Separator) { 115 SmallVector<StringRef> Segments; 116 StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); 117 std::vector<std::string> Result; 118 Result.reserve(Segments.size()); 119 for (StringRef Segment : Segments) 120 Result.push_back(Segment.str()); 121 return Result; 122 } 123 124 std::vector<std::string> ModuleDeps::getCanonicalCommandLine( 125 llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)> 126 LookupModuleOutput) const { 127 CompilerInvocation CI(BuildInvocation); 128 FrontendOptions &FrontendOpts = CI.getFrontendOpts(); 129 130 InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), 131 InputKind::Format::ModuleMap); 132 FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); 133 FrontendOpts.OutputFile = 134 LookupModuleOutput(ID, ModuleOutputKind::ModuleFile); 135 if (HadSerializedDiagnostics) 136 CI.getDiagnosticOpts().DiagnosticSerializationFile = 137 LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile); 138 if (HadDependencyFile) { 139 DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts(); 140 DepOpts.OutputFile = 141 LookupModuleOutput(ID, ModuleOutputKind::DependencyFile); 142 DepOpts.Targets = splitString( 143 LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0'); 144 if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) { 145 // Fallback to -o as dependency target, as in the driver. 146 SmallString<128> Target; 147 quoteMakeTarget(FrontendOpts.OutputFile, Target); 148 DepOpts.Targets.push_back(std::string(Target)); 149 } 150 } 151 152 for (ModuleID MID : ClangModuleDeps) 153 FrontendOpts.ModuleFiles.push_back( 154 LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); 155 156 return serializeCompilerInvocation(CI); 157 } 158 159 std::vector<std::string> 160 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const { 161 return serializeCompilerInvocation(BuildInvocation); 162 } 163 164 void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, 165 FileChangeReason Reason, 166 SrcMgr::CharacteristicKind FileType, 167 FileID PrevFID) { 168 if (Reason != PPCallbacks::EnterFile) 169 return; 170 171 // This has to be delayed as the context hash can change at the start of 172 // `CompilerInstance::ExecuteAction`. 173 if (MDC.ContextHash.empty()) { 174 MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash(); 175 MDC.Consumer.handleContextHash(MDC.ContextHash); 176 } 177 178 SourceManager &SM = MDC.ScanInstance.getSourceManager(); 179 180 // Dependency generation really does want to go all the way to the 181 // file entry for a source location to find out what is depended on. 182 // We do not want #line markers to affect dependency generation! 183 if (Optional<StringRef> Filename = 184 SM.getNonBuiltinFilenameForID(SM.getFileID(SM.getExpansionLoc(Loc)))) 185 MDC.FileDeps.push_back( 186 std::string(llvm::sys::path::remove_leading_dotslash(*Filename))); 187 } 188 189 void ModuleDepCollectorPP::InclusionDirective( 190 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 191 bool IsAngled, CharSourceRange FilenameRange, Optional<FileEntryRef> File, 192 StringRef SearchPath, StringRef RelativePath, const Module *Imported, 193 SrcMgr::CharacteristicKind FileType) { 194 if (!File && !Imported) { 195 // This is a non-modular include that HeaderSearch failed to find. Add it 196 // here as `FileChanged` will never see it. 197 MDC.FileDeps.push_back(std::string(FileName)); 198 } 199 handleImport(Imported); 200 } 201 202 void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, 203 ModuleIdPath Path, 204 const Module *Imported) { 205 handleImport(Imported); 206 } 207 208 void ModuleDepCollectorPP::handleImport(const Module *Imported) { 209 if (!Imported) 210 return; 211 212 const Module *TopLevelModule = Imported->getTopLevelModule(); 213 214 if (MDC.isPrebuiltModule(TopLevelModule)) 215 DirectPrebuiltModularDeps.insert(TopLevelModule); 216 else 217 DirectModularDeps.insert(TopLevelModule); 218 } 219 220 void ModuleDepCollectorPP::EndOfMainFile() { 221 FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID(); 222 MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager() 223 .getFileEntryForID(MainFileID) 224 ->getName()); 225 226 if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty()) 227 MDC.FileDeps.push_back( 228 MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude); 229 230 for (const Module *M : DirectModularDeps) { 231 // A top-level module might not be actually imported as a module when 232 // -fmodule-name is used to compile a translation unit that imports this 233 // module. In that case it can be skipped. The appropriate header 234 // dependencies will still be reported as expected. 235 if (!M->getASTFile()) 236 continue; 237 handleTopLevelModule(M); 238 } 239 240 MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts); 241 242 for (auto &&I : MDC.ModularDeps) 243 MDC.Consumer.handleModuleDependency(*I.second); 244 245 for (auto &&I : MDC.FileDeps) 246 MDC.Consumer.handleFileDependency(I); 247 248 for (auto &&I : DirectPrebuiltModularDeps) 249 MDC.Consumer.handlePrebuiltModuleDependency(PrebuiltModuleDep{I}); 250 } 251 252 ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { 253 assert(M == M->getTopLevelModule() && "Expected top level module!"); 254 255 // If this module has been handled already, just return its ID. 256 auto ModI = MDC.ModularDeps.insert({M, nullptr}); 257 if (!ModI.second) 258 return ModI.first->second->ID; 259 260 ModI.first->second = std::make_unique<ModuleDeps>(); 261 ModuleDeps &MD = *ModI.first->second; 262 263 MD.ID.ModuleName = M->getFullModuleName(); 264 MD.ImportedByMainFile = DirectModularDeps.contains(M); 265 MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); 266 MD.IsSystem = M->IsSystem; 267 268 const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor() 269 .getHeaderSearchInfo() 270 .getModuleMap() 271 .getModuleMapFileForUniquing(M); 272 273 if (ModuleMap) { 274 StringRef Path = ModuleMap->tryGetRealPathName(); 275 if (Path.empty()) 276 Path = ModuleMap->getName(); 277 MD.ClangModuleMapFile = std::string(Path); 278 } 279 280 serialization::ModuleFile *MF = 281 MDC.ScanInstance.getASTReader()->getModuleManager().lookup( 282 M->getASTFile()); 283 MDC.ScanInstance.getASTReader()->visitInputFiles( 284 *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) { 285 // __inferred_module.map is the result of the way in which an implicit 286 // module build handles inferred modules. It adds an overlay VFS with 287 // this file in the proper directory and relies on the rest of Clang to 288 // handle it like normal. With explicitly built modules we don't need 289 // to play VFS tricks, so replace it with the correct module map. 290 if (IF.getFile()->getName().endswith("__inferred_module.map")) { 291 MD.FileDeps.insert(ModuleMap->getName()); 292 return; 293 } 294 MD.FileDeps.insert(IF.getFile()->getName()); 295 }); 296 297 // We usually don't need to list the module map files of our dependencies when 298 // building a module explicitly: their semantics will be deserialized from PCM 299 // files. 300 // 301 // However, some module maps loaded implicitly during the dependency scan can 302 // describe anti-dependencies. That happens when this module, let's call it 303 // M1, is marked as '[no_undeclared_includes]' and tries to access a header 304 // "M2/M2.h" from another module, M2, but doesn't have a 'use M2;' 305 // declaration. The explicit build needs the module map for M2 so that it 306 // knows that textually including "M2/M2.h" is not allowed. 307 // E.g., '__has_include("M2/M2.h")' should return false, but without M2's 308 // module map the explicit build would return true. 309 // 310 // An alternative approach would be to tell the explicit build what its 311 // textual dependencies are, instead of having it re-discover its 312 // anti-dependencies. For example, we could create and use an `-ivfs-overlay` 313 // with `fall-through: false` that explicitly listed the dependencies. 314 // However, that's more complicated to implement and harder to reason about. 315 if (M->NoUndeclaredIncludes) { 316 // We don't have a good way to determine which module map described the 317 // anti-dependency (let alone what's the corresponding top-level module 318 // map). We simply specify all the module maps in the order they were loaded 319 // during the implicit build during scan. 320 // TODO: Resolve this by serializing and only using Module::UndeclaredUses. 321 MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( 322 *MF, [&](const FileEntry *FE) { 323 if (FE->getName().endswith("__inferred_module.map")) 324 return; 325 // The top-level modulemap of this module will be the input file. We 326 // don't need to specify it as a module map. 327 if (FE == ModuleMap) 328 return; 329 MD.ModuleMapFileDeps.push_back(FE->getName().str()); 330 }); 331 } 332 333 // Add direct prebuilt module dependencies now, so that we can use them when 334 // creating a CompilerInvocation and computing context hash for this 335 // ModuleDeps instance. 336 llvm::DenseSet<const Module *> SeenModules; 337 addAllSubmodulePrebuiltDeps(M, MD, SeenModules); 338 339 MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( 340 MD, [&](CompilerInvocation &BuildInvocation) { 341 if (MDC.OptimizeArgs) 342 optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), 343 *MDC.ScanInstance.getASTReader(), *MF); 344 }); 345 MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts() 346 .DiagnosticSerializationFile.empty(); 347 MD.HadDependencyFile = 348 !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); 349 // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in 350 // the context hash since it can affect the command-line. 351 MD.ID.ContextHash = MD.BuildInvocation.getModuleHash(); 352 353 llvm::DenseSet<const Module *> AddedModules; 354 addAllSubmoduleDeps(M, MD, AddedModules); 355 356 return MD.ID; 357 } 358 359 static void forEachSubmoduleSorted(const Module *M, 360 llvm::function_ref<void(const Module *)> F) { 361 // Submodule order depends on order of header includes for inferred submodules 362 // we don't care about the exact order, so sort so that it's consistent across 363 // TUs to improve sharing. 364 SmallVector<const Module *> Submodules(M->submodule_begin(), 365 M->submodule_end()); 366 llvm::stable_sort(Submodules, [](const Module *A, const Module *B) { 367 return A->Name < B->Name; 368 }); 369 for (const Module *SubM : Submodules) 370 F(SubM); 371 } 372 373 void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( 374 const Module *M, ModuleDeps &MD, 375 llvm::DenseSet<const Module *> &SeenSubmodules) { 376 addModulePrebuiltDeps(M, MD, SeenSubmodules); 377 378 forEachSubmoduleSorted(M, [&](const Module *SubM) { 379 addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules); 380 }); 381 } 382 383 void ModuleDepCollectorPP::addModulePrebuiltDeps( 384 const Module *M, ModuleDeps &MD, 385 llvm::DenseSet<const Module *> &SeenSubmodules) { 386 for (const Module *Import : M->Imports) 387 if (Import->getTopLevelModule() != M->getTopLevelModule()) 388 if (MDC.isPrebuiltModule(Import->getTopLevelModule())) 389 if (SeenSubmodules.insert(Import->getTopLevelModule()).second) 390 MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule()); 391 } 392 393 void ModuleDepCollectorPP::addAllSubmoduleDeps( 394 const Module *M, ModuleDeps &MD, 395 llvm::DenseSet<const Module *> &AddedModules) { 396 addModuleDep(M, MD, AddedModules); 397 398 forEachSubmoduleSorted(M, [&](const Module *SubM) { 399 addAllSubmoduleDeps(SubM, MD, AddedModules); 400 }); 401 } 402 403 void ModuleDepCollectorPP::addModuleDep( 404 const Module *M, ModuleDeps &MD, 405 llvm::DenseSet<const Module *> &AddedModules) { 406 for (const Module *Import : M->Imports) { 407 if (Import->getTopLevelModule() != M->getTopLevelModule() && 408 !MDC.isPrebuiltModule(Import)) { 409 ModuleID ImportID = handleTopLevelModule(Import->getTopLevelModule()); 410 if (AddedModules.insert(Import->getTopLevelModule()).second) 411 MD.ClangModuleDeps.push_back(ImportID); 412 } 413 } 414 } 415 416 ModuleDepCollector::ModuleDepCollector( 417 std::unique_ptr<DependencyOutputOptions> Opts, 418 CompilerInstance &ScanInstance, DependencyConsumer &C, 419 CompilerInvocation &&OriginalCI, bool OptimizeArgs) 420 : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)), 421 OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {} 422 423 void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { 424 PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this)); 425 } 426 427 void ModuleDepCollector::attachToASTReader(ASTReader &R) {} 428 429 bool ModuleDepCollector::isPrebuiltModule(const Module *M) { 430 std::string Name(M->getTopLevelModuleName()); 431 const auto &PrebuiltModuleFiles = 432 ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; 433 auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); 434 if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) 435 return false; 436 assert("Prebuilt module came from the expected AST file" && 437 PrebuiltModuleFileIt->second == M->getASTFile()->getName()); 438 return true; 439 } 440