xref: /freebsd/contrib/llvm-project/clang/tools/clang-scan-deps/ClangScanDeps.cpp (revision 7ed1628066eaf55b86f35af86efe804508201cc8)
1 //===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Driver/Compilation.h"
10 #include "clang/Driver/Driver.h"
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Frontend/TextDiagnosticPrinter.h"
13 #include "clang/Tooling/CommonOptionsParser.h"
14 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
15 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
16 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
17 #include "clang/Tooling/JSONCompilationDatabase.h"
18 #include "clang/Tooling/Tooling.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/Format.h"
24 #include "llvm/Support/JSON.h"
25 #include "llvm/Support/LLVMDriver.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/TargetSelect.h"
29 #include "llvm/Support/ThreadPool.h"
30 #include "llvm/Support/Threading.h"
31 #include "llvm/Support/Timer.h"
32 #include "llvm/TargetParser/Host.h"
33 #include <mutex>
34 #include <optional>
35 #include <thread>
36 
37 #include "Opts.inc"
38 
39 using namespace clang;
40 using namespace tooling::dependencies;
41 
42 namespace {
43 
44 using namespace llvm::opt;
45 enum ID {
46   OPT_INVALID = 0, // This is not an option ID.
47 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
48 #include "Opts.inc"
49 #undef OPTION
50 };
51 
52 #define PREFIX(NAME, VALUE)                                                    \
53   constexpr llvm::StringLiteral NAME##_init[] = VALUE;                         \
54   constexpr llvm::ArrayRef<llvm::StringLiteral> NAME(                          \
55       NAME##_init, std::size(NAME##_init) - 1);
56 #include "Opts.inc"
57 #undef PREFIX
58 
59 const llvm::opt::OptTable::Info InfoTable[] = {
60 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
61 #include "Opts.inc"
62 #undef OPTION
63 };
64 
65 class ScanDepsOptTable : public llvm::opt::GenericOptTable {
66 public:
ScanDepsOptTable()67   ScanDepsOptTable() : GenericOptTable(InfoTable) {
68     setGroupedShortOptions(true);
69   }
70 };
71 
72 enum ResourceDirRecipeKind {
73   RDRK_ModifyCompilerPath,
74   RDRK_InvokeCompiler,
75 };
76 
77 static std::string OutputFileName = "-";
78 static ScanningMode ScanMode = ScanningMode::DependencyDirectivesScan;
79 static ScanningOutputFormat Format = ScanningOutputFormat::Make;
80 static ScanningOptimizations OptimizeArgs;
81 static std::string ModuleFilesDir;
82 static bool EagerLoadModules;
83 static unsigned NumThreads = 0;
84 static std::string CompilationDB;
85 static std::string ModuleName;
86 static std::vector<std::string> ModuleDepTargets;
87 static bool DeprecatedDriverCommand;
88 static ResourceDirRecipeKind ResourceDirRecipe;
89 static bool Verbose;
90 static bool PrintTiming;
91 static llvm::BumpPtrAllocator Alloc;
92 static llvm::StringSaver Saver{Alloc};
93 static std::vector<const char *> CommandLine;
94 
95 #ifndef NDEBUG
96 static constexpr bool DoRoundTripDefault = true;
97 #else
98 static constexpr bool DoRoundTripDefault = false;
99 #endif
100 
101 static bool RoundTripArgs = DoRoundTripDefault;
102 
ParseArgs(int argc,char ** argv)103 static void ParseArgs(int argc, char **argv) {
104   ScanDepsOptTable Tbl;
105   llvm::StringRef ToolName = argv[0];
106   llvm::opt::InputArgList Args =
107       Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
108         llvm::errs() << Msg << '\n';
109         std::exit(1);
110       });
111 
112   if (Args.hasArg(OPT_help)) {
113     Tbl.printHelp(llvm::outs(), "clang-scan-deps [options]", "clang-scan-deps");
114     std::exit(0);
115   }
116   if (Args.hasArg(OPT_version)) {
117     llvm::outs() << ToolName << '\n';
118     llvm::cl::PrintVersionMessage();
119     std::exit(0);
120   }
121   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_mode_EQ)) {
122     auto ModeType =
123         llvm::StringSwitch<std::optional<ScanningMode>>(A->getValue())
124             .Case("preprocess-dependency-directives",
125                   ScanningMode::DependencyDirectivesScan)
126             .Case("preprocess", ScanningMode::CanonicalPreprocessing)
127             .Default(std::nullopt);
128     if (!ModeType) {
129       llvm::errs() << ToolName
130                    << ": for the --mode option: Cannot find option named '"
131                    << A->getValue() << "'\n";
132       std::exit(1);
133     }
134     ScanMode = *ModeType;
135   }
136 
137   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_format_EQ)) {
138     auto FormatType =
139         llvm::StringSwitch<std::optional<ScanningOutputFormat>>(A->getValue())
140             .Case("make", ScanningOutputFormat::Make)
141             .Case("p1689", ScanningOutputFormat::P1689)
142             .Case("experimental-full", ScanningOutputFormat::Full)
143             .Default(std::nullopt);
144     if (!FormatType) {
145       llvm::errs() << ToolName
146                    << ": for the --format option: Cannot find option named '"
147                    << A->getValue() << "'\n";
148       std::exit(1);
149     }
150     Format = *FormatType;
151   }
152 
153   std::vector<std::string> OptimizationFlags =
154       Args.getAllArgValues(OPT_optimize_args_EQ);
155   OptimizeArgs = ScanningOptimizations::None;
156   for (const auto &Arg : OptimizationFlags) {
157     auto Optimization =
158         llvm::StringSwitch<std::optional<ScanningOptimizations>>(Arg)
159             .Case("none", ScanningOptimizations::None)
160             .Case("header-search", ScanningOptimizations::HeaderSearch)
161             .Case("system-warnings", ScanningOptimizations::SystemWarnings)
162             .Case("vfs", ScanningOptimizations::VFS)
163             .Case("canonicalize-macros", ScanningOptimizations::Macros)
164             .Case("all", ScanningOptimizations::All)
165             .Default(std::nullopt);
166     if (!Optimization) {
167       llvm::errs()
168           << ToolName
169           << ": for the --optimize-args option: Cannot find option named '"
170           << Arg << "'\n";
171       std::exit(1);
172     }
173     OptimizeArgs |= *Optimization;
174   }
175   if (OptimizationFlags.empty())
176     OptimizeArgs = ScanningOptimizations::Default;
177 
178   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_files_dir_EQ))
179     ModuleFilesDir = A->getValue();
180 
181   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_o))
182     OutputFileName = A->getValue();
183 
184   EagerLoadModules = Args.hasArg(OPT_eager_load_pcm);
185 
186   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_j)) {
187     StringRef S{A->getValue()};
188     if (!llvm::to_integer(S, NumThreads, 0)) {
189       llvm::errs() << ToolName << ": for the -j option: '" << S
190                    << "' value invalid for uint argument!\n";
191       std::exit(1);
192     }
193   }
194 
195   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_compilation_database_EQ))
196     CompilationDB = A->getValue();
197 
198   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_module_name_EQ))
199     ModuleName = A->getValue();
200 
201   for (const llvm::opt::Arg *A : Args.filtered(OPT_dependency_target_EQ))
202     ModuleDepTargets.emplace_back(A->getValue());
203 
204   DeprecatedDriverCommand = Args.hasArg(OPT_deprecated_driver_command);
205 
206   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_resource_dir_recipe_EQ)) {
207     auto Kind =
208         llvm::StringSwitch<std::optional<ResourceDirRecipeKind>>(A->getValue())
209             .Case("modify-compiler-path", RDRK_ModifyCompilerPath)
210             .Case("invoke-compiler", RDRK_InvokeCompiler)
211             .Default(std::nullopt);
212     if (!Kind) {
213       llvm::errs() << ToolName
214                    << ": for the --resource-dir-recipe option: Cannot find "
215                       "option named '"
216                    << A->getValue() << "'\n";
217       std::exit(1);
218     }
219     ResourceDirRecipe = *Kind;
220   }
221 
222   PrintTiming = Args.hasArg(OPT_print_timing);
223 
224   Verbose = Args.hasArg(OPT_verbose);
225 
226   RoundTripArgs = Args.hasArg(OPT_round_trip_args);
227 
228   if (const llvm::opt::Arg *A = Args.getLastArgNoClaim(OPT_DASH_DASH))
229     CommandLine.assign(A->getValues().begin(), A->getValues().end());
230 }
231 
232 class SharedStream {
233 public:
SharedStream(raw_ostream & OS)234   SharedStream(raw_ostream &OS) : OS(OS) {}
applyLocked(llvm::function_ref<void (raw_ostream & OS)> Fn)235   void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) {
236     std::unique_lock<std::mutex> LockGuard(Lock);
237     Fn(OS);
238     OS.flush();
239   }
240 
241 private:
242   std::mutex Lock;
243   raw_ostream &OS;
244 };
245 
246 class ResourceDirectoryCache {
247 public:
248   /// findResourceDir finds the resource directory relative to the clang
249   /// compiler being used in Args, by running it with "-print-resource-dir"
250   /// option and cache the results for reuse. \returns resource directory path
251   /// associated with the given invocation command or empty string if the
252   /// compiler path is NOT an absolute path.
findResourceDir(const tooling::CommandLineArguments & Args,bool ClangCLMode)253   StringRef findResourceDir(const tooling::CommandLineArguments &Args,
254                             bool ClangCLMode) {
255     if (Args.size() < 1)
256       return "";
257 
258     const std::string &ClangBinaryPath = Args[0];
259     if (!llvm::sys::path::is_absolute(ClangBinaryPath))
260       return "";
261 
262     const std::string &ClangBinaryName =
263         std::string(llvm::sys::path::filename(ClangBinaryPath));
264 
265     std::unique_lock<std::mutex> LockGuard(CacheLock);
266     const auto &CachedResourceDir = Cache.find(ClangBinaryPath);
267     if (CachedResourceDir != Cache.end())
268       return CachedResourceDir->second;
269 
270     std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName};
271     if (ClangCLMode)
272       PrintResourceDirArgs.push_back("/clang:-print-resource-dir");
273     else
274       PrintResourceDirArgs.push_back("-print-resource-dir");
275 
276     llvm::SmallString<64> OutputFile, ErrorFile;
277     llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
278                                        "" /*no-suffix*/, OutputFile);
279     llvm::sys::fs::createTemporaryFile("print-resource-dir-error",
280                                        "" /*no-suffix*/, ErrorFile);
281     llvm::FileRemover OutputRemover(OutputFile.c_str());
282     llvm::FileRemover ErrorRemover(ErrorFile.c_str());
283     std::optional<StringRef> Redirects[] = {
284         {""}, // Stdin
285         OutputFile.str(),
286         ErrorFile.str(),
287     };
288     if (llvm::sys::ExecuteAndWait(ClangBinaryPath, PrintResourceDirArgs, {},
289                                   Redirects)) {
290       auto ErrorBuf = llvm::MemoryBuffer::getFile(ErrorFile.c_str());
291       llvm::errs() << ErrorBuf.get()->getBuffer();
292       return "";
293     }
294 
295     auto OutputBuf = llvm::MemoryBuffer::getFile(OutputFile.c_str());
296     if (!OutputBuf)
297       return "";
298     StringRef Output = OutputBuf.get()->getBuffer().rtrim('\n');
299 
300     Cache[ClangBinaryPath] = Output.str();
301     return Cache[ClangBinaryPath];
302   }
303 
304 private:
305   std::map<std::string, std::string> Cache;
306   std::mutex CacheLock;
307 };
308 
309 } // end anonymous namespace
310 
311 /// Takes the result of a dependency scan and prints error / dependency files
312 /// based on the result.
313 ///
314 /// \returns True on error.
315 static bool
handleMakeDependencyToolResult(const std::string & Input,llvm::Expected<std::string> & MaybeFile,SharedStream & OS,SharedStream & Errs)316 handleMakeDependencyToolResult(const std::string &Input,
317                                llvm::Expected<std::string> &MaybeFile,
318                                SharedStream &OS, SharedStream &Errs) {
319   if (!MaybeFile) {
320     llvm::handleAllErrors(
321         MaybeFile.takeError(), [&Input, &Errs](llvm::StringError &Err) {
322           Errs.applyLocked([&](raw_ostream &OS) {
323             OS << "Error while scanning dependencies for " << Input << ":\n";
324             OS << Err.getMessage();
325           });
326         });
327     return true;
328   }
329   OS.applyLocked([&](raw_ostream &OS) { OS << *MaybeFile; });
330   return false;
331 }
332 
toJSONSorted(const llvm::StringSet<> & Set)333 static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
334   std::vector<llvm::StringRef> Strings;
335   for (auto &&I : Set)
336     Strings.push_back(I.getKey());
337   llvm::sort(Strings);
338   return llvm::json::Array(Strings);
339 }
340 
341 // Technically, we don't need to sort the dependency list to get determinism.
342 // Leaving these be will simply preserve the import order.
toJSONSorted(std::vector<ModuleID> V)343 static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) {
344   llvm::sort(V);
345 
346   llvm::json::Array Ret;
347   for (const ModuleID &MID : V)
348     Ret.push_back(llvm::json::Object(
349         {{"module-name", MID.ModuleName}, {"context-hash", MID.ContextHash}}));
350   return Ret;
351 }
352 
353 static llvm::json::Array
toJSONSorted(llvm::SmallVector<Module::LinkLibrary,2> & LinkLibs)354 toJSONSorted(llvm::SmallVector<Module::LinkLibrary, 2> &LinkLibs) {
355   llvm::sort(LinkLibs, [](const Module::LinkLibrary &lhs,
356                           const Module::LinkLibrary &rhs) {
357     return lhs.Library < rhs.Library;
358   });
359 
360   llvm::json::Array Ret;
361   for (const Module::LinkLibrary &LL : LinkLibs)
362     Ret.push_back(llvm::json::Object(
363         {{"link-name", LL.Library}, {"isFramework", LL.IsFramework}}));
364   return Ret;
365 }
366 
367 // Thread safe.
368 class FullDeps {
369 public:
FullDeps(size_t NumInputs)370   FullDeps(size_t NumInputs) : Inputs(NumInputs) {}
371 
mergeDeps(StringRef Input,TranslationUnitDeps TUDeps,size_t InputIndex)372   void mergeDeps(StringRef Input, TranslationUnitDeps TUDeps,
373                  size_t InputIndex) {
374     mergeDeps(std::move(TUDeps.ModuleGraph), InputIndex);
375 
376     InputDeps ID;
377     ID.FileName = std::string(Input);
378     ID.ContextHash = std::move(TUDeps.ID.ContextHash);
379     ID.FileDeps = std::move(TUDeps.FileDeps);
380     ID.ModuleDeps = std::move(TUDeps.ClangModuleDeps);
381     ID.DriverCommandLine = std::move(TUDeps.DriverCommandLine);
382     ID.Commands = std::move(TUDeps.Commands);
383 
384     assert(InputIndex < Inputs.size() && "Input index out of bounds");
385     assert(Inputs[InputIndex].FileName.empty() && "Result already populated");
386     Inputs[InputIndex] = std::move(ID);
387   }
388 
mergeDeps(ModuleDepsGraph Graph,size_t InputIndex)389   void mergeDeps(ModuleDepsGraph Graph, size_t InputIndex) {
390     std::vector<ModuleDeps *> NewMDs;
391     {
392       std::unique_lock<std::mutex> ul(Lock);
393       for (const ModuleDeps &MD : Graph) {
394         auto I = Modules.find({MD.ID, 0});
395         if (I != Modules.end()) {
396           I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
397           continue;
398         }
399         auto Res = Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)});
400         NewMDs.push_back(&Res->second);
401       }
402       // First call to \c getBuildArguments is somewhat expensive. Let's call it
403       // on the current thread (instead of the main one), and outside the
404       // critical section.
405       for (ModuleDeps *MD : NewMDs)
406         (void)MD->getBuildArguments();
407     }
408   }
409 
roundTripCommand(ArrayRef<std::string> ArgStrs,DiagnosticsEngine & Diags)410   bool roundTripCommand(ArrayRef<std::string> ArgStrs,
411                         DiagnosticsEngine &Diags) {
412     if (ArgStrs.empty() || ArgStrs[0] != "-cc1")
413       return false;
414     SmallVector<const char *> Args;
415     for (const std::string &Arg : ArgStrs)
416       Args.push_back(Arg.c_str());
417     return !CompilerInvocation::checkCC1RoundTrip(Args, Diags);
418   }
419 
420   // Returns \c true if any command lines fail to round-trip. We expect
421   // commands already be canonical when output by the scanner.
roundTripCommands(raw_ostream & ErrOS)422   bool roundTripCommands(raw_ostream &ErrOS) {
423     IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{};
424     TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts);
425     IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
426         CompilerInstance::createDiagnostics(&*DiagOpts, &DiagConsumer,
427                                             /*ShouldOwnClient=*/false);
428 
429     for (auto &&M : Modules)
430       if (roundTripCommand(M.second.getBuildArguments(), *Diags))
431         return true;
432 
433     for (auto &&I : Inputs)
434       for (const auto &Cmd : I.Commands)
435         if (roundTripCommand(Cmd.Arguments, *Diags))
436           return true;
437 
438     return false;
439   }
440 
printFullOutput(raw_ostream & OS)441   void printFullOutput(raw_ostream &OS) {
442     // Skip sorting modules and constructing the JSON object if the output
443     // cannot be observed anyway. This makes timings less noisy.
444     if (&OS == &llvm::nulls())
445       return;
446 
447     // Sort the modules by name to get a deterministic order.
448     std::vector<IndexedModuleID> ModuleIDs;
449     for (auto &&M : Modules)
450       ModuleIDs.push_back(M.first);
451     llvm::sort(ModuleIDs);
452 
453     using namespace llvm::json;
454 
455     Array OutModules;
456     for (auto &&ModID : ModuleIDs) {
457       auto &MD = Modules[ModID];
458       Object O{{"name", MD.ID.ModuleName},
459                {"context-hash", MD.ID.ContextHash},
460                {"file-deps", toJSONSorted(MD.FileDeps)},
461                {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)},
462                {"clang-modulemap-file", MD.ClangModuleMapFile},
463                {"command-line", MD.getBuildArguments()},
464                {"link-libraries", toJSONSorted(MD.LinkLibraries)}};
465       OutModules.push_back(std::move(O));
466     }
467 
468     Array TUs;
469     for (auto &&I : Inputs) {
470       Array Commands;
471       if (I.DriverCommandLine.empty()) {
472         for (const auto &Cmd : I.Commands) {
473           Object O{
474               {"input-file", I.FileName},
475               {"clang-context-hash", I.ContextHash},
476               {"file-deps", I.FileDeps},
477               {"clang-module-deps", toJSONSorted(I.ModuleDeps)},
478               {"executable", Cmd.Executable},
479               {"command-line", Cmd.Arguments},
480           };
481           Commands.push_back(std::move(O));
482         }
483       } else {
484         Object O{
485             {"input-file", I.FileName},
486             {"clang-context-hash", I.ContextHash},
487             {"file-deps", I.FileDeps},
488             {"clang-module-deps", toJSONSorted(I.ModuleDeps)},
489             {"executable", "clang"},
490             {"command-line", I.DriverCommandLine},
491         };
492         Commands.push_back(std::move(O));
493       }
494       TUs.push_back(Object{
495           {"commands", std::move(Commands)},
496       });
497     }
498 
499     Object Output{
500         {"modules", std::move(OutModules)},
501         {"translation-units", std::move(TUs)},
502     };
503 
504     OS << llvm::formatv("{0:2}\n", Value(std::move(Output)));
505   }
506 
507 private:
508   struct IndexedModuleID {
509     ModuleID ID;
510 
511     // FIXME: This is mutable so that it can still be updated after insertion
512     //  into an unordered associative container. This is "fine", since this
513     //  field doesn't contribute to the hash, but it's a brittle hack.
514     mutable size_t InputIndex;
515 
operator ==FullDeps::IndexedModuleID516     bool operator==(const IndexedModuleID &Other) const {
517       return ID == Other.ID;
518     }
519 
operator <FullDeps::IndexedModuleID520     bool operator<(const IndexedModuleID &Other) const {
521       /// We need the output of clang-scan-deps to be deterministic. However,
522       /// the dependency graph may contain two modules with the same name. How
523       /// do we decide which one to print first? If we made that decision based
524       /// on the context hash, the ordering would be deterministic, but
525       /// different across machines. This can happen for example when the inputs
526       /// or the SDKs (which both contribute to the "context" hash) live in
527       /// different absolute locations. We solve that by tracking the index of
528       /// the first input TU that (transitively) imports the dependency, which
529       /// is always the same for the same input, resulting in deterministic
530       /// sorting that's also reproducible across machines.
531       return std::tie(ID.ModuleName, InputIndex) <
532              std::tie(Other.ID.ModuleName, Other.InputIndex);
533     }
534 
535     struct Hasher {
operator ()FullDeps::IndexedModuleID::Hasher536       std::size_t operator()(const IndexedModuleID &IMID) const {
537         return llvm::hash_value(IMID.ID);
538       }
539     };
540   };
541 
542   struct InputDeps {
543     std::string FileName;
544     std::string ContextHash;
545     std::vector<std::string> FileDeps;
546     std::vector<ModuleID> ModuleDeps;
547     std::vector<std::string> DriverCommandLine;
548     std::vector<Command> Commands;
549   };
550 
551   std::mutex Lock;
552   std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher>
553       Modules;
554   std::vector<InputDeps> Inputs;
555 };
556 
handleTranslationUnitResult(StringRef Input,llvm::Expected<TranslationUnitDeps> & MaybeTUDeps,FullDeps & FD,size_t InputIndex,SharedStream & OS,SharedStream & Errs)557 static bool handleTranslationUnitResult(
558     StringRef Input, llvm::Expected<TranslationUnitDeps> &MaybeTUDeps,
559     FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
560   if (!MaybeTUDeps) {
561     llvm::handleAllErrors(
562         MaybeTUDeps.takeError(), [&Input, &Errs](llvm::StringError &Err) {
563           Errs.applyLocked([&](raw_ostream &OS) {
564             OS << "Error while scanning dependencies for " << Input << ":\n";
565             OS << Err.getMessage();
566           });
567         });
568     return true;
569   }
570   FD.mergeDeps(Input, std::move(*MaybeTUDeps), InputIndex);
571   return false;
572 }
573 
handleModuleResult(StringRef ModuleName,llvm::Expected<ModuleDepsGraph> & MaybeModuleGraph,FullDeps & FD,size_t InputIndex,SharedStream & OS,SharedStream & Errs)574 static bool handleModuleResult(
575     StringRef ModuleName, llvm::Expected<ModuleDepsGraph> &MaybeModuleGraph,
576     FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
577   if (!MaybeModuleGraph) {
578     llvm::handleAllErrors(MaybeModuleGraph.takeError(),
579                           [&ModuleName, &Errs](llvm::StringError &Err) {
580                             Errs.applyLocked([&](raw_ostream &OS) {
581                               OS << "Error while scanning dependencies for "
582                                  << ModuleName << ":\n";
583                               OS << Err.getMessage();
584                             });
585                           });
586     return true;
587   }
588   FD.mergeDeps(std::move(*MaybeModuleGraph), InputIndex);
589   return false;
590 }
591 
592 class P1689Deps {
593 public:
printDependencies(raw_ostream & OS)594   void printDependencies(raw_ostream &OS) {
595     addSourcePathsToRequires();
596     // Sort the modules by name to get a deterministic order.
597     llvm::sort(Rules, [](const P1689Rule &A, const P1689Rule &B) {
598       return A.PrimaryOutput < B.PrimaryOutput;
599     });
600 
601     using namespace llvm::json;
602     Array OutputRules;
603     for (const P1689Rule &R : Rules) {
604       Object O{{"primary-output", R.PrimaryOutput}};
605 
606       if (R.Provides) {
607         Array Provides;
608         Object Provided{{"logical-name", R.Provides->ModuleName},
609                         {"source-path", R.Provides->SourcePath},
610                         {"is-interface", R.Provides->IsStdCXXModuleInterface}};
611         Provides.push_back(std::move(Provided));
612         O.insert({"provides", std::move(Provides)});
613       }
614 
615       Array Requires;
616       for (const P1689ModuleInfo &Info : R.Requires) {
617         Object RequiredInfo{{"logical-name", Info.ModuleName}};
618         if (!Info.SourcePath.empty())
619           RequiredInfo.insert({"source-path", Info.SourcePath});
620         Requires.push_back(std::move(RequiredInfo));
621       }
622 
623       if (!Requires.empty())
624         O.insert({"requires", std::move(Requires)});
625 
626       OutputRules.push_back(std::move(O));
627     }
628 
629     Object Output{
630         {"version", 1}, {"revision", 0}, {"rules", std::move(OutputRules)}};
631 
632     OS << llvm::formatv("{0:2}\n", Value(std::move(Output)));
633   }
634 
addRules(P1689Rule & Rule)635   void addRules(P1689Rule &Rule) {
636     std::unique_lock<std::mutex> LockGuard(Lock);
637     Rules.push_back(Rule);
638   }
639 
640 private:
addSourcePathsToRequires()641   void addSourcePathsToRequires() {
642     llvm::DenseMap<StringRef, StringRef> ModuleSourceMapper;
643     for (const P1689Rule &R : Rules)
644       if (R.Provides && !R.Provides->SourcePath.empty())
645         ModuleSourceMapper[R.Provides->ModuleName] = R.Provides->SourcePath;
646 
647     for (P1689Rule &R : Rules) {
648       for (P1689ModuleInfo &Info : R.Requires) {
649         auto Iter = ModuleSourceMapper.find(Info.ModuleName);
650         if (Iter != ModuleSourceMapper.end())
651           Info.SourcePath = Iter->second;
652       }
653     }
654   }
655 
656   std::mutex Lock;
657   std::vector<P1689Rule> Rules;
658 };
659 
660 static bool
handleP1689DependencyToolResult(const std::string & Input,llvm::Expected<P1689Rule> & MaybeRule,P1689Deps & PD,SharedStream & Errs)661 handleP1689DependencyToolResult(const std::string &Input,
662                                 llvm::Expected<P1689Rule> &MaybeRule,
663                                 P1689Deps &PD, SharedStream &Errs) {
664   if (!MaybeRule) {
665     llvm::handleAllErrors(
666         MaybeRule.takeError(), [&Input, &Errs](llvm::StringError &Err) {
667           Errs.applyLocked([&](raw_ostream &OS) {
668             OS << "Error while scanning dependencies for " << Input << ":\n";
669             OS << Err.getMessage();
670           });
671         });
672     return true;
673   }
674   PD.addRules(*MaybeRule);
675   return false;
676 }
677 
678 /// Construct a path for the explicitly built PCM.
constructPCMPath(ModuleID MID,StringRef OutputDir)679 static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) {
680   SmallString<256> ExplicitPCMPath(OutputDir);
681   llvm::sys::path::append(ExplicitPCMPath, MID.ContextHash,
682                           MID.ModuleName + "-" + MID.ContextHash + ".pcm");
683   return std::string(ExplicitPCMPath);
684 }
685 
lookupModuleOutput(const ModuleID & MID,ModuleOutputKind MOK,StringRef OutputDir)686 static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK,
687                                       StringRef OutputDir) {
688   std::string PCMPath = constructPCMPath(MID, OutputDir);
689   switch (MOK) {
690   case ModuleOutputKind::ModuleFile:
691     return PCMPath;
692   case ModuleOutputKind::DependencyFile:
693     return PCMPath + ".d";
694   case ModuleOutputKind::DependencyTargets:
695     // Null-separate the list of targets.
696     return join(ModuleDepTargets, StringRef("\0", 1));
697   case ModuleOutputKind::DiagnosticSerializationFile:
698     return PCMPath + ".diag";
699   }
700   llvm_unreachable("Fully covered switch above!");
701 }
702 
getModuleCachePath(ArrayRef<std::string> Args)703 static std::string getModuleCachePath(ArrayRef<std::string> Args) {
704   for (StringRef Arg : llvm::reverse(Args)) {
705     Arg.consume_front("/clang:");
706     if (Arg.consume_front("-fmodules-cache-path="))
707       return std::string(Arg);
708   }
709   SmallString<128> Path;
710   driver::Driver::getDefaultModuleCachePath(Path);
711   return std::string(Path);
712 }
713 
714 /// Attempts to construct the compilation database from '-compilation-database'
715 /// or from the arguments following the positional '--'.
716 static std::unique_ptr<tooling::CompilationDatabase>
getCompilationDatabase(int argc,char ** argv,std::string & ErrorMessage)717 getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) {
718   ParseArgs(argc, argv);
719 
720   if (!(CommandLine.empty() ^ CompilationDB.empty())) {
721     llvm::errs() << "The compilation command line must be provided either via "
722                     "'-compilation-database' or after '--'.";
723     return nullptr;
724   }
725 
726   if (!CompilationDB.empty())
727     return tooling::JSONCompilationDatabase::loadFromFile(
728         CompilationDB, ErrorMessage,
729         tooling::JSONCommandLineSyntax::AutoDetect);
730 
731   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
732       CompilerInstance::createDiagnostics(new DiagnosticOptions);
733   driver::Driver TheDriver(CommandLine[0], llvm::sys::getDefaultTargetTriple(),
734                            *Diags);
735   TheDriver.setCheckInputsExist(false);
736   std::unique_ptr<driver::Compilation> C(
737       TheDriver.BuildCompilation(CommandLine));
738   if (!C || C->getJobs().empty())
739     return nullptr;
740 
741   auto Cmd = C->getJobs().begin();
742   auto CI = std::make_unique<CompilerInvocation>();
743   CompilerInvocation::CreateFromArgs(*CI, Cmd->getArguments(), *Diags,
744                                      CommandLine[0]);
745   if (!CI)
746     return nullptr;
747 
748   FrontendOptions &FEOpts = CI->getFrontendOpts();
749   if (FEOpts.Inputs.size() != 1) {
750     llvm::errs()
751         << "Exactly one input file is required in the per-file mode ('--').\n";
752     return nullptr;
753   }
754 
755   // There might be multiple jobs for a compilation. Extract the specified
756   // output filename from the last job.
757   auto LastCmd = C->getJobs().end();
758   LastCmd--;
759   if (LastCmd->getOutputFilenames().size() != 1) {
760     llvm::errs()
761         << "Exactly one output file is required in the per-file mode ('--').\n";
762     return nullptr;
763   }
764   StringRef OutputFile = LastCmd->getOutputFilenames().front();
765 
766   class InplaceCompilationDatabase : public tooling::CompilationDatabase {
767   public:
768     InplaceCompilationDatabase(StringRef InputFile, StringRef OutputFile,
769                                ArrayRef<const char *> CommandLine)
770         : Command(".", InputFile, {}, OutputFile) {
771       for (auto *C : CommandLine)
772         Command.CommandLine.push_back(C);
773     }
774 
775     std::vector<tooling::CompileCommand>
776     getCompileCommands(StringRef FilePath) const override {
777       if (FilePath != Command.Filename)
778         return {};
779       return {Command};
780     }
781 
782     std::vector<std::string> getAllFiles() const override {
783       return {Command.Filename};
784     }
785 
786     std::vector<tooling::CompileCommand>
787     getAllCompileCommands() const override {
788       return {Command};
789     }
790 
791   private:
792     tooling::CompileCommand Command;
793   };
794 
795   return std::make_unique<InplaceCompilationDatabase>(
796       FEOpts.Inputs[0].getFile(), OutputFile, CommandLine);
797 }
798 
clang_scan_deps_main(int argc,char ** argv,const llvm::ToolContext &)799 int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
800   llvm::InitializeAllTargetInfos();
801   std::string ErrorMessage;
802   std::unique_ptr<tooling::CompilationDatabase> Compilations =
803       getCompilationDatabase(argc, argv, ErrorMessage);
804   if (!Compilations) {
805     llvm::errs() << ErrorMessage << "\n";
806     return 1;
807   }
808 
809   llvm::cl::PrintOptionValues();
810 
811   // Expand response files in advance, so that we can "see" all the arguments
812   // when adjusting below.
813   Compilations = expandResponseFiles(std::move(Compilations),
814                                      llvm::vfs::getRealFileSystem());
815 
816   Compilations = inferTargetAndDriverMode(std::move(Compilations));
817 
818   Compilations = inferToolLocation(std::move(Compilations));
819 
820   // The command options are rewritten to run Clang in preprocessor only mode.
821   auto AdjustingCompilations =
822       std::make_unique<tooling::ArgumentsAdjustingCompilations>(
823           std::move(Compilations));
824   ResourceDirectoryCache ResourceDirCache;
825 
826   AdjustingCompilations->appendArgumentsAdjuster(
827       [&ResourceDirCache](const tooling::CommandLineArguments &Args,
828                           StringRef FileName) {
829         std::string LastO;
830         bool HasResourceDir = false;
831         bool ClangCLMode = false;
832         auto FlagsEnd = llvm::find(Args, "--");
833         if (FlagsEnd != Args.begin()) {
834           ClangCLMode =
835               llvm::sys::path::stem(Args[0]).contains_insensitive("clang-cl") ||
836               llvm::is_contained(Args, "--driver-mode=cl");
837 
838           // Reverse scan, starting at the end or at the element before "--".
839           auto R = std::make_reverse_iterator(FlagsEnd);
840           auto E = Args.rend();
841           // Don't include Args[0] in the iteration; that's the executable, not
842           // an option.
843           if (E != R)
844             E--;
845           for (auto I = R; I != E; ++I) {
846             StringRef Arg = *I;
847             if (ClangCLMode) {
848               // Ignore arguments that are preceded by "-Xclang".
849               if ((I + 1) != E && I[1] == "-Xclang")
850                 continue;
851               if (LastO.empty()) {
852                 // With clang-cl, the output obj file can be specified with
853                 // "/opath", "/o path", "/Fopath", and the dash counterparts.
854                 // Also, clang-cl adds ".obj" extension if none is found.
855                 if ((Arg == "-o" || Arg == "/o") && I != R)
856                   LastO = I[-1]; // Next argument (reverse iterator)
857                 else if (Arg.starts_with("/Fo") || Arg.starts_with("-Fo"))
858                   LastO = Arg.drop_front(3).str();
859                 else if (Arg.starts_with("/o") || Arg.starts_with("-o"))
860                   LastO = Arg.drop_front(2).str();
861 
862                 if (!LastO.empty() && !llvm::sys::path::has_extension(LastO))
863                   LastO.append(".obj");
864               }
865             }
866             if (Arg == "-resource-dir")
867               HasResourceDir = true;
868           }
869         }
870         tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd);
871         // The clang-cl driver passes "-o -" to the frontend. Inject the real
872         // file here to ensure "-MT" can be deduced if need be.
873         if (ClangCLMode && !LastO.empty()) {
874           AdjustedArgs.push_back("/clang:-o");
875           AdjustedArgs.push_back("/clang:" + LastO);
876         }
877 
878         if (!HasResourceDir && ResourceDirRecipe == RDRK_InvokeCompiler) {
879           StringRef ResourceDir =
880               ResourceDirCache.findResourceDir(Args, ClangCLMode);
881           if (!ResourceDir.empty()) {
882             AdjustedArgs.push_back("-resource-dir");
883             AdjustedArgs.push_back(std::string(ResourceDir));
884           }
885         }
886         AdjustedArgs.insert(AdjustedArgs.end(), FlagsEnd, Args.end());
887         return AdjustedArgs;
888       });
889 
890   SharedStream Errs(llvm::errs());
891 
892   std::optional<llvm::raw_fd_ostream> FileOS;
893   llvm::raw_ostream &ThreadUnsafeDependencyOS = [&]() -> llvm::raw_ostream & {
894     if (OutputFileName == "-")
895       return llvm::outs();
896 
897     if (OutputFileName == "/dev/null")
898       return llvm::nulls();
899 
900     std::error_code EC;
901     FileOS.emplace(OutputFileName, EC);
902     if (EC) {
903       llvm::errs() << "Failed to open output file '" << OutputFileName
904                    << "': " << llvm::errorCodeToError(EC) << '\n';
905       std::exit(1);
906     }
907     return *FileOS;
908   }();
909   SharedStream DependencyOS(ThreadUnsafeDependencyOS);
910 
911   std::vector<tooling::CompileCommand> Inputs =
912       AdjustingCompilations->getAllCompileCommands();
913 
914   std::atomic<bool> HadErrors(false);
915   std::optional<FullDeps> FD;
916   P1689Deps PD;
917 
918   std::mutex Lock;
919   size_t Index = 0;
920   auto GetNextInputIndex = [&]() -> std::optional<size_t> {
921     std::unique_lock<std::mutex> LockGuard(Lock);
922     if (Index < Inputs.size())
923       return Index++;
924     return {};
925   };
926 
927   if (Format == ScanningOutputFormat::Full)
928     FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
929 
930   auto ScanningTask = [&](DependencyScanningService &Service) {
931     DependencyScanningTool WorkerTool(Service);
932 
933     llvm::DenseSet<ModuleID> AlreadySeenModules;
934     while (auto MaybeInputIndex = GetNextInputIndex()) {
935       size_t LocalIndex = *MaybeInputIndex;
936       const tooling::CompileCommand *Input = &Inputs[LocalIndex];
937       std::string Filename = std::move(Input->Filename);
938       std::string CWD = std::move(Input->Directory);
939 
940       std::optional<StringRef> MaybeModuleName;
941       if (!ModuleName.empty())
942         MaybeModuleName = ModuleName;
943 
944       std::string OutputDir(ModuleFilesDir);
945       if (OutputDir.empty())
946         OutputDir = getModuleCachePath(Input->CommandLine);
947       auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
948         return ::lookupModuleOutput(MID, MOK, OutputDir);
949       };
950 
951       // Run the tool on it.
952       if (Format == ScanningOutputFormat::Make) {
953         auto MaybeFile = WorkerTool.getDependencyFile(Input->CommandLine, CWD);
954         if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
955                                            Errs))
956           HadErrors = true;
957       } else if (Format == ScanningOutputFormat::P1689) {
958         // It is useful to generate the make-format dependency output during
959         // the scanning for P1689. Otherwise the users need to scan again for
960         // it. We will generate the make-format dependency output if we find
961         // `-MF` in the command lines.
962         std::string MakeformatOutputPath;
963         std::string MakeformatOutput;
964 
965         auto MaybeRule = WorkerTool.getP1689ModuleDependencyFile(
966             *Input, CWD, MakeformatOutput, MakeformatOutputPath);
967 
968         if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
969           HadErrors = true;
970 
971         if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
972             !HadErrors) {
973           static std::mutex Lock;
974           // With compilation database, we may open different files
975           // concurrently or we may write the same file concurrently. So we
976           // use a map here to allow multiple compile commands to write to the
977           // same file. Also we need a lock here to avoid data race.
978           static llvm::StringMap<llvm::raw_fd_ostream> OSs;
979           std::unique_lock<std::mutex> LockGuard(Lock);
980 
981           auto OSIter = OSs.find(MakeformatOutputPath);
982           if (OSIter == OSs.end()) {
983             std::error_code EC;
984             OSIter =
985                 OSs.try_emplace(MakeformatOutputPath, MakeformatOutputPath, EC)
986                     .first;
987             if (EC)
988               llvm::errs() << "Failed to open P1689 make format output file \""
989                            << MakeformatOutputPath << "\" for " << EC.message()
990                            << "\n";
991           }
992 
993           SharedStream MakeformatOS(OSIter->second);
994           llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
995           if (handleMakeDependencyToolResult(Filename, MaybeOutput,
996                                              MakeformatOS, Errs))
997             HadErrors = true;
998         }
999       } else if (MaybeModuleName) {
1000         auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
1001             *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
1002             LookupOutput);
1003         if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
1004                                LocalIndex, DependencyOS, Errs))
1005           HadErrors = true;
1006       } else {
1007         auto MaybeTUDeps = WorkerTool.getTranslationUnitDependencies(
1008             Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
1009         if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
1010                                         DependencyOS, Errs))
1011           HadErrors = true;
1012       }
1013     }
1014   };
1015 
1016   DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
1017                                     EagerLoadModules);
1018 
1019   llvm::Timer T;
1020   T.startTimer();
1021 
1022   if (Inputs.size() == 1) {
1023     ScanningTask(Service);
1024   } else {
1025     llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
1026 
1027     if (Verbose) {
1028       llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
1029                    << " files using " << Pool.getMaxConcurrency()
1030                    << " workers\n";
1031     }
1032 
1033     for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
1034       Pool.async([ScanningTask, &Service]() { ScanningTask(Service); });
1035 
1036     Pool.wait();
1037   }
1038 
1039   T.stopTimer();
1040   if (PrintTiming)
1041     llvm::errs() << llvm::format(
1042         "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n",
1043         T.getTotalTime().getWallTime(), T.getTotalTime().getProcessTime());
1044 
1045   if (RoundTripArgs)
1046     if (FD && FD->roundTripCommands(llvm::errs()))
1047       HadErrors = true;
1048 
1049   if (Format == ScanningOutputFormat::Full)
1050     FD->printFullOutput(ThreadUnsafeDependencyOS);
1051   else if (Format == ScanningOutputFormat::P1689)
1052     PD.printDependencies(ThreadUnsafeDependencyOS);
1053 
1054   return HadErrors;
1055 }
1056