xref: /freebsd/contrib/llvm-project/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/Module.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Frontend/CompilerInvocation.h"
16 #include "clang/Frontend/Utils.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Serialization/ASTReader.h"
20 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Hashing.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <optional>
26 #include <string>
27 #include <unordered_map>
28 #include <variant>
29 
30 namespace clang {
31 namespace tooling {
32 namespace dependencies {
33 
34 class DependencyActionController;
35 class DependencyConsumer;
36 class PrebuiltModuleASTAttrs;
37 
38 /// Modular dependency that has already been built prior to the dependency scan.
39 struct PrebuiltModuleDep {
40   std::string ModuleName;
41   std::string PCMFile;
42   std::string ModuleMapFile;
43 
PrebuiltModuleDepPrebuiltModuleDep44   explicit PrebuiltModuleDep(const Module *M)
45       : ModuleName(M->getTopLevelModuleName()),
46         PCMFile(M->getASTFile()->getName()),
47         ModuleMapFile(M->PresumedModuleMapFile) {}
48 };
49 
50 /// Attributes loaded from AST files of prebuilt modules collected prior to
51 /// ModuleDepCollector creation.
52 using PrebuiltModulesAttrsMap = llvm::StringMap<PrebuiltModuleASTAttrs>;
53 class PrebuiltModuleASTAttrs {
54 public:
55   /// When a module is discovered to not be in stable directories, traverse &
56   /// update all modules that depend on it.
57   void
58   updateDependentsNotInStableDirs(PrebuiltModulesAttrsMap &PrebuiltModulesMap);
59 
60   /// Read-only access to whether the module is made up of dependencies in
61   /// stable directories.
isInStableDir()62   bool isInStableDir() const { return IsInStableDirs; }
63 
64   /// Read-only access to vfs map files.
getVFS()65   const llvm::StringSet<> &getVFS() const { return VFSMap; }
66 
67   /// Update the VFSMap to the one discovered from serializing the AST file.
setVFS(llvm::StringSet<> && VFS)68   void setVFS(llvm::StringSet<> &&VFS) { VFSMap = std::move(VFS); }
69 
70   /// Add a direct dependent module file, so it can be updated if the current
71   /// module is from stable directores.
addDependent(StringRef ModuleFile)72   void addDependent(StringRef ModuleFile) {
73     ModuleFileDependents.insert(ModuleFile);
74   }
75 
76   /// Update whether the prebuilt module resolves entirely in a stable
77   /// directories.
78   void setInStableDir(bool V = false) {
79     // Cannot reset attribute once it's false.
80     if (!IsInStableDirs)
81       return;
82     IsInStableDirs = V;
83   }
84 
85 private:
86   llvm::StringSet<> VFSMap;
87   bool IsInStableDirs = true;
88   std::set<StringRef> ModuleFileDependents;
89 };
90 
91 /// This is used to identify a specific module.
92 struct ModuleID {
93   /// The name of the module. This may include `:` for C++20 module partitions,
94   /// or a header-name for C++20 header units.
95   std::string ModuleName;
96 
97   /// The context hash of a module represents the compiler options that affect
98   /// the resulting command-line invocation.
99   ///
100   /// Modules with the same name and ContextHash but different invocations could
101   /// cause non-deterministic build results.
102   ///
103   /// Modules with the same name but a different \c ContextHash should be
104   /// treated as separate modules for the purpose of a build.
105   std::string ContextHash;
106 
107   bool operator==(const ModuleID &Other) const {
108     return std::tie(ModuleName, ContextHash) ==
109            std::tie(Other.ModuleName, Other.ContextHash);
110   }
111 
112   bool operator<(const ModuleID& Other) const {
113     return std::tie(ModuleName, ContextHash) <
114            std::tie(Other.ModuleName, Other.ContextHash);
115   }
116 };
117 
118 /// P1689ModuleInfo - Represents the needed information of standard C++20
119 /// modules for P1689 format.
120 struct P1689ModuleInfo {
121   /// The name of the module. This may include `:` for partitions.
122   std::string ModuleName;
123 
124   /// Optional. The source path to the module.
125   std::string SourcePath;
126 
127   /// If this module is a standard c++ interface unit.
128   bool IsStdCXXModuleInterface = true;
129 
130   enum class ModuleType {
131     NamedCXXModule
132     // To be supported
133     // AngleHeaderUnit,
134     // QuoteHeaderUnit
135   };
136   ModuleType Type = ModuleType::NamedCXXModule;
137 };
138 
139 /// An output from a module compilation, such as the path of the module file.
140 enum class ModuleOutputKind {
141   /// The module file (.pcm). Required.
142   ModuleFile,
143   /// The path of the dependency file (.d), if any.
144   DependencyFile,
145   /// The null-separated list of names to use as the targets in the dependency
146   /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
147   DependencyTargets,
148   /// The path of the serialized diagnostic file (.dia), if any.
149   DiagnosticSerializationFile,
150 };
151 
152 struct ModuleDeps {
153   /// The identifier of the module.
154   ModuleID ID;
155 
156   /// Whether this is a "system" module.
157   bool IsSystem;
158 
159   /// Whether this module is fully composed of file & module inputs from
160   /// locations likely to stay the same across the active development and build
161   /// cycle. For example, when all those input paths only resolve in Sysroot.
162   ///
163   /// External paths, as opposed to virtual file paths, are always used
164   /// for computing this value.
165   bool IsInStableDirectories;
166 
167   /// The path to the modulemap file which defines this module.
168   ///
169   /// This can be used to explicitly build this module. This file will
170   /// additionally appear in \c FileDeps as a dependency.
171   std::string ClangModuleMapFile;
172 
173   /// A collection of absolute paths to module map files that this module needs
174   /// to know about. The ordering is significant.
175   std::vector<std::string> ModuleMapFileDeps;
176 
177   /// A collection of prebuilt modular dependencies this module directly depends
178   /// on, not including transitive dependencies.
179   std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
180 
181   /// A list of module identifiers this module directly depends on, not
182   /// including transitive dependencies.
183   ///
184   /// This may include modules with a different context hash when it can be
185   /// determined that the differences are benign for this compilation.
186   std::vector<ModuleID> ClangModuleDeps;
187 
188   /// The set of libraries or frameworks to link against when
189   /// an entity from this module is used.
190   llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries;
191 
192   /// Invokes \c Cb for all file dependencies of this module. Each provided
193   /// \c StringRef is only valid within the individual callback invocation.
194   void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
195 
196   /// Get (or compute) the compiler invocation that can be used to build this
197   /// module. Does not include argv[0].
198   const std::vector<std::string> &getBuildArguments() const;
199 
200 private:
201   friend class ModuleDepCollector;
202   friend class ModuleDepCollectorPP;
203 
204   /// The base directory for relative paths in \c FileDeps.
205   std::string FileDepsBaseDir;
206 
207   /// A collection of paths to files that this module directly depends on, not
208   /// including transitive dependencies.
209   std::vector<std::string> FileDeps;
210 
211   mutable std::variant<std::monostate, CowCompilerInvocation,
212                        std::vector<std::string>>
213       BuildInfo;
214 };
215 
216 class ModuleDepCollector;
217 
218 /// Callback that records textual includes and direct modular includes/imports
219 /// during preprocessing. At the end of the main file, it also collects
220 /// transitive modular dependencies and passes everything to the
221 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
222 class ModuleDepCollectorPP final : public PPCallbacks {
223 public:
ModuleDepCollectorPP(ModuleDepCollector & MDC)224   ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
225 
226   void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
227                         SrcMgr::CharacteristicKind FileType, FileID PrevFID,
228                         SourceLocation Loc) override;
229   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
230                           StringRef FileName, bool IsAngled,
231                           CharSourceRange FilenameRange,
232                           OptionalFileEntryRef File, StringRef SearchPath,
233                           StringRef RelativePath, const Module *SuggestedModule,
234                           bool ModuleImported,
235                           SrcMgr::CharacteristicKind FileType) override;
236   void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
237                     const Module *Imported) override;
238 
239   void EndOfMainFile() override;
240 
241 private:
242   /// The parent dependency collector.
243   ModuleDepCollector &MDC;
244 
245   void handleImport(const Module *Imported);
246 
247   /// Adds direct modular dependencies that have already been built to the
248   /// ModuleDeps instance.
249   void
250   addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
251                               llvm::DenseSet<const Module *> &SeenSubmodules);
252   void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
253                              llvm::DenseSet<const Module *> &SeenSubmodules);
254 
255   /// Traverses the previously collected direct modular dependencies to discover
256   /// transitive modular dependencies and fills the parent \c ModuleDepCollector
257   /// with both.
258   /// Returns the ID or nothing if the dependency is spurious and is ignored.
259   std::optional<ModuleID> handleTopLevelModule(const Module *M);
260   void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
261                            llvm::DenseSet<const Module *> &AddedModules);
262   void addModuleDep(const Module *M, ModuleDeps &MD,
263                     llvm::DenseSet<const Module *> &AddedModules);
264 
265   /// Traverses the affecting modules and updates \c MD with references to the
266   /// parent \c ModuleDepCollector info.
267   void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
268                               llvm::DenseSet<const Module *> &AddedModules);
269   void addAffectingClangModule(const Module *M, ModuleDeps &MD,
270                           llvm::DenseSet<const Module *> &AddedModules);
271 
272   /// Add discovered module dependency for the given module.
273   void addOneModuleDep(const Module *M, const ModuleID ID, ModuleDeps &MD);
274 };
275 
276 /// Collects modular and non-modular dependencies of the main file by attaching
277 /// \c ModuleDepCollectorPP to the preprocessor.
278 class ModuleDepCollector final : public DependencyCollector {
279 public:
280   ModuleDepCollector(DependencyScanningService &Service,
281                      std::unique_ptr<DependencyOutputOptions> Opts,
282                      CompilerInstance &ScanInstance, DependencyConsumer &C,
283                      DependencyActionController &Controller,
284                      CompilerInvocation OriginalCI,
285                      const PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
286                      const ArrayRef<StringRef> StableDirs);
287 
288   void attachToPreprocessor(Preprocessor &PP) override;
289   void attachToASTReader(ASTReader &R) override;
290 
291   /// Apply any changes implied by the discovered dependencies to the given
292   /// invocation, (e.g. disable implicit modules, add explicit module paths).
293   void applyDiscoveredDependencies(CompilerInvocation &CI);
294 
295 private:
296   friend ModuleDepCollectorPP;
297 
298   /// The parent dependency scanning service.
299   DependencyScanningService &Service;
300   /// The compiler instance for scanning the current translation unit.
301   CompilerInstance &ScanInstance;
302   /// The consumer of collected dependency information.
303   DependencyConsumer &Consumer;
304   /// Callbacks for computing dependency information.
305   DependencyActionController &Controller;
306   /// Mapping from prebuilt AST filepaths to their attributes referenced during
307   /// dependency collecting.
308   const PrebuiltModulesAttrsMap PrebuiltModulesASTMap;
309   /// Directory paths known to be stable through an active development and build
310   /// cycle.
311   const ArrayRef<StringRef> StableDirs;
312   /// Path to the main source file.
313   std::string MainFile;
314   /// Non-modular file dependencies. This includes the main source file and
315   /// textually included header files.
316   std::vector<std::string> FileDeps;
317   /// Direct and transitive modular dependencies of the main source file.
318   llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
319   /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
320   /// a preprocessor. Storage owned by \c ModularDeps.
321   llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
322   /// Direct modular dependencies that have already been built.
323   llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
324   /// Working set of direct modular dependencies.
325   llvm::SetVector<const Module *> DirectModularDeps;
326   /// Working set of direct modular dependencies, as they were imported.
327   llvm::SmallPtrSet<const Module *, 32> DirectImports;
328   /// All direct and transitive visible modules.
329   llvm::StringSet<> VisibleModules;
330 
331   /// Options that control the dependency output generation.
332   std::unique_ptr<DependencyOutputOptions> Opts;
333   /// A Clang invocation that's based on the original TU invocation and that has
334   /// been partially transformed into one that can perform explicit build of
335   /// a discovered modular dependency. Note that this still needs to be adjusted
336   /// for each individual module.
337   CowCompilerInvocation CommonInvocation;
338 
339   std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
340   std::vector<P1689ModuleInfo> RequiredStdCXXModules;
341 
342   /// Checks whether the module is known as being prebuilt.
343   bool isPrebuiltModule(const Module *M);
344 
345   /// Computes all visible modules resolved from direct imports.
346   void addVisibleModules();
347 
348   /// Adds \p Path to \c FileDeps, making it absolute if necessary.
349   void addFileDep(StringRef Path);
350   /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
351   void addFileDep(ModuleDeps &MD, StringRef Path);
352 
353   /// Get a Clang invocation adjusted to build the given modular dependency.
354   /// This excludes paths that are yet-to-be-provided by the build system.
355   CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
356       const ModuleDeps &Deps,
357       llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
358 
359   /// Collect module map files for given modules.
360   llvm::DenseSet<const FileEntry *>
361   collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
362 
363   /// Add module map files to the invocation, if needed.
364   void addModuleMapFiles(CompilerInvocation &CI,
365                          ArrayRef<ModuleID> ClangModuleDeps) const;
366   /// Add module files (pcm) to the invocation, if needed.
367   void addModuleFiles(CompilerInvocation &CI,
368                       ArrayRef<ModuleID> ClangModuleDeps) const;
369   void addModuleFiles(CowCompilerInvocation &CI,
370                       ArrayRef<ModuleID> ClangModuleDeps) const;
371 
372   /// Add paths that require looking up outputs to the given dependencies.
373   void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
374 
375   /// Compute the context hash for \p Deps, and create the mapping
376   /// \c ModuleDepsByID[Deps.ID] = &Deps.
377   void associateWithContextHash(const CowCompilerInvocation &CI, bool IgnoreCWD,
378                                 ModuleDeps &Deps);
379 };
380 
381 /// Resets codegen options that don't affect modules/PCH.
382 void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction,
383                                const LangOptions &LangOpts,
384                                CodeGenOptions &CGOpts);
385 
386 /// Determine if \c Input can be resolved within a stable directory.
387 ///
388 /// \param Directories Paths known to be in a stable location. e.g. Sysroot.
389 /// \param Input Path to evaluate.
390 bool isPathInStableDir(const ArrayRef<StringRef> Directories,
391                        const StringRef Input);
392 
393 /// Determine if options collected from a module's
394 /// compilation can safely be considered as stable.
395 ///
396 /// \param Directories Paths known to be in a stable location. e.g. Sysroot.
397 /// \param HSOpts Header search options derived from the compiler invocation.
398 bool areOptionsInStableDir(const ArrayRef<StringRef> Directories,
399                            const HeaderSearchOptions &HSOpts);
400 
401 } // end namespace dependencies
402 } // end namespace tooling
403 } // end namespace clang
404 
405 namespace llvm {
hash_value(const clang::tooling::dependencies::ModuleID & ID)406 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) {
407   return hash_combine(ID.ModuleName, ID.ContextHash);
408 }
409 
410 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
411   using ModuleID = clang::tooling::dependencies::ModuleID;
412   static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
413   static inline ModuleID getTombstoneKey() {
414     return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
415   }
416   static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
417   static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
418     return LHS == RHS;
419   }
420 };
421 } // namespace llvm
422 
423 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
424