1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/Module.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Frontend/CompilerInvocation.h"
16 #include "clang/Frontend/Utils.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Serialization/ASTReader.h"
20 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Hashing.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <optional>
26 #include <string>
27 #include <unordered_map>
28 #include <variant>
29
30 namespace clang {
31 namespace tooling {
32 namespace dependencies {
33
34 class DependencyActionController;
35 class DependencyConsumer;
36
37 /// Modular dependency that has already been built prior to the dependency scan.
38 struct PrebuiltModuleDep {
39 std::string ModuleName;
40 std::string PCMFile;
41 std::string ModuleMapFile;
42
PrebuiltModuleDepPrebuiltModuleDep43 explicit PrebuiltModuleDep(const Module *M)
44 : ModuleName(M->getTopLevelModuleName()),
45 PCMFile(M->getASTFile()->getName()),
46 ModuleMapFile(M->PresumedModuleMapFile) {}
47 };
48
49 /// This is used to identify a specific module.
50 struct ModuleID {
51 /// The name of the module. This may include `:` for C++20 module partitions,
52 /// or a header-name for C++20 header units.
53 std::string ModuleName;
54
55 /// The context hash of a module represents the compiler options that affect
56 /// the resulting command-line invocation.
57 ///
58 /// Modules with the same name and ContextHash but different invocations could
59 /// cause non-deterministic build results.
60 ///
61 /// Modules with the same name but a different \c ContextHash should be
62 /// treated as separate modules for the purpose of a build.
63 std::string ContextHash;
64
65 bool operator==(const ModuleID &Other) const {
66 return std::tie(ModuleName, ContextHash) ==
67 std::tie(Other.ModuleName, Other.ContextHash);
68 }
69
70 bool operator<(const ModuleID& Other) const {
71 return std::tie(ModuleName, ContextHash) <
72 std::tie(Other.ModuleName, Other.ContextHash);
73 }
74 };
75
76 /// P1689ModuleInfo - Represents the needed information of standard C++20
77 /// modules for P1689 format.
78 struct P1689ModuleInfo {
79 /// The name of the module. This may include `:` for partitions.
80 std::string ModuleName;
81
82 /// Optional. The source path to the module.
83 std::string SourcePath;
84
85 /// If this module is a standard c++ interface unit.
86 bool IsStdCXXModuleInterface = true;
87
88 enum class ModuleType {
89 NamedCXXModule
90 // To be supported
91 // AngleHeaderUnit,
92 // QuoteHeaderUnit
93 };
94 ModuleType Type = ModuleType::NamedCXXModule;
95 };
96
97 /// An output from a module compilation, such as the path of the module file.
98 enum class ModuleOutputKind {
99 /// The module file (.pcm). Required.
100 ModuleFile,
101 /// The path of the dependency file (.d), if any.
102 DependencyFile,
103 /// The null-separated list of names to use as the targets in the dependency
104 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
105 DependencyTargets,
106 /// The path of the serialized diagnostic file (.dia), if any.
107 DiagnosticSerializationFile,
108 };
109
110 struct ModuleDeps {
111 /// The identifier of the module.
112 ModuleID ID;
113
114 /// Whether this is a "system" module.
115 bool IsSystem;
116
117 /// The path to the modulemap file which defines this module.
118 ///
119 /// This can be used to explicitly build this module. This file will
120 /// additionally appear in \c FileDeps as a dependency.
121 std::string ClangModuleMapFile;
122
123 /// A collection of absolute paths to files that this module directly depends
124 /// on, not including transitive dependencies.
125 llvm::StringSet<> FileDeps;
126
127 /// A collection of absolute paths to module map files that this module needs
128 /// to know about. The ordering is significant.
129 std::vector<std::string> ModuleMapFileDeps;
130
131 /// A collection of prebuilt modular dependencies this module directly depends
132 /// on, not including transitive dependencies.
133 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
134
135 /// A list of module identifiers this module directly depends on, not
136 /// including transitive dependencies.
137 ///
138 /// This may include modules with a different context hash when it can be
139 /// determined that the differences are benign for this compilation.
140 std::vector<ModuleID> ClangModuleDeps;
141
142 /// The set of libraries or frameworks to link against when
143 /// an entity from this module is used.
144 llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries;
145
146 /// Get (or compute) the compiler invocation that can be used to build this
147 /// module. Does not include argv[0].
148 const std::vector<std::string> &getBuildArguments();
149
150 private:
151 friend class ModuleDepCollectorPP;
152
153 std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
154 BuildInfo;
155 };
156
157 using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>;
158
159 class ModuleDepCollector;
160
161 /// Callback that records textual includes and direct modular includes/imports
162 /// during preprocessing. At the end of the main file, it also collects
163 /// transitive modular dependencies and passes everything to the
164 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
165 class ModuleDepCollectorPP final : public PPCallbacks {
166 public:
ModuleDepCollectorPP(ModuleDepCollector & MDC)167 ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
168
169 void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
170 SrcMgr::CharacteristicKind FileType, FileID PrevFID,
171 SourceLocation Loc) override;
172 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
173 StringRef FileName, bool IsAngled,
174 CharSourceRange FilenameRange,
175 OptionalFileEntryRef File, StringRef SearchPath,
176 StringRef RelativePath, const Module *SuggestedModule,
177 bool ModuleImported,
178 SrcMgr::CharacteristicKind FileType) override;
179 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
180 const Module *Imported) override;
181
182 void EndOfMainFile() override;
183
184 private:
185 /// The parent dependency collector.
186 ModuleDepCollector &MDC;
187
188 void handleImport(const Module *Imported);
189
190 /// Adds direct modular dependencies that have already been built to the
191 /// ModuleDeps instance.
192 void
193 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
194 llvm::DenseSet<const Module *> &SeenSubmodules);
195 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
196 llvm::DenseSet<const Module *> &SeenSubmodules);
197
198 /// Traverses the previously collected direct modular dependencies to discover
199 /// transitive modular dependencies and fills the parent \c ModuleDepCollector
200 /// with both.
201 /// Returns the ID or nothing if the dependency is spurious and is ignored.
202 std::optional<ModuleID> handleTopLevelModule(const Module *M);
203 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
204 llvm::DenseSet<const Module *> &AddedModules);
205 void addModuleDep(const Module *M, ModuleDeps &MD,
206 llvm::DenseSet<const Module *> &AddedModules);
207
208 /// Traverses the affecting modules and updates \c MD with references to the
209 /// parent \c ModuleDepCollector info.
210 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
211 llvm::DenseSet<const Module *> &AddedModules);
212 void addAffectingClangModule(const Module *M, ModuleDeps &MD,
213 llvm::DenseSet<const Module *> &AddedModules);
214 };
215
216 /// Collects modular and non-modular dependencies of the main file by attaching
217 /// \c ModuleDepCollectorPP to the preprocessor.
218 class ModuleDepCollector final : public DependencyCollector {
219 public:
220 ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
221 CompilerInstance &ScanInstance, DependencyConsumer &C,
222 DependencyActionController &Controller,
223 CompilerInvocation OriginalCI,
224 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
225 ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
226 bool IsStdModuleP1689Format);
227
228 void attachToPreprocessor(Preprocessor &PP) override;
229 void attachToASTReader(ASTReader &R) override;
230
231 /// Apply any changes implied by the discovered dependencies to the given
232 /// invocation, (e.g. disable implicit modules, add explicit module paths).
233 void applyDiscoveredDependencies(CompilerInvocation &CI);
234
235 private:
236 friend ModuleDepCollectorPP;
237
238 /// The compiler instance for scanning the current translation unit.
239 CompilerInstance &ScanInstance;
240 /// The consumer of collected dependency information.
241 DependencyConsumer &Consumer;
242 /// Callbacks for computing dependency information.
243 DependencyActionController &Controller;
244 /// Mapping from prebuilt AST files to their sorted list of VFS overlay files.
245 PrebuiltModuleVFSMapT PrebuiltModuleVFSMap;
246 /// Path to the main source file.
247 std::string MainFile;
248 /// Hash identifying the compilation conditions of the current TU.
249 std::string ContextHash;
250 /// Non-modular file dependencies. This includes the main source file and
251 /// textually included header files.
252 std::vector<std::string> FileDeps;
253 /// Direct and transitive modular dependencies of the main source file.
254 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
255 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
256 /// a preprocessor. Storage owned by \c ModularDeps.
257 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
258 /// Direct modular dependencies that have already been built.
259 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
260 /// Working set of direct modular dependencies.
261 llvm::SetVector<const Module *> DirectModularDeps;
262 /// Options that control the dependency output generation.
263 std::unique_ptr<DependencyOutputOptions> Opts;
264 /// A Clang invocation that's based on the original TU invocation and that has
265 /// been partially transformed into one that can perform explicit build of
266 /// a discovered modular dependency. Note that this still needs to be adjusted
267 /// for each individual module.
268 CowCompilerInvocation CommonInvocation;
269 /// Whether to optimize the modules' command-line arguments.
270 ScanningOptimizations OptimizeArgs;
271 /// Whether to set up command-lines to load PCM files eagerly.
272 bool EagerLoadModules;
273 /// If we're generating dependency output in P1689 format
274 /// for standard C++ modules.
275 bool IsStdModuleP1689Format;
276
277 std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
278 std::vector<P1689ModuleInfo> RequiredStdCXXModules;
279
280 /// Checks whether the module is known as being prebuilt.
281 bool isPrebuiltModule(const Module *M);
282
283 /// Adds \p Path to \c FileDeps, making it absolute if necessary.
284 void addFileDep(StringRef Path);
285 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
286 void addFileDep(ModuleDeps &MD, StringRef Path);
287
288 /// Get a Clang invocation adjusted to build the given modular dependency.
289 /// This excludes paths that are yet-to-be-provided by the build system.
290 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
291 const ModuleDeps &Deps,
292 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
293
294 /// Collect module map files for given modules.
295 llvm::DenseSet<const FileEntry *>
296 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
297
298 /// Add module map files to the invocation, if needed.
299 void addModuleMapFiles(CompilerInvocation &CI,
300 ArrayRef<ModuleID> ClangModuleDeps) const;
301 /// Add module files (pcm) to the invocation, if needed.
302 void addModuleFiles(CompilerInvocation &CI,
303 ArrayRef<ModuleID> ClangModuleDeps) const;
304 void addModuleFiles(CowCompilerInvocation &CI,
305 ArrayRef<ModuleID> ClangModuleDeps) const;
306
307 /// Add paths that require looking up outputs to the given dependencies.
308 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
309
310 /// Compute the context hash for \p Deps, and create the mapping
311 /// \c ModuleDepsByID[Deps.ID] = &Deps.
312 void associateWithContextHash(const CowCompilerInvocation &CI,
313 ModuleDeps &Deps);
314 };
315
316 /// Resets codegen options that don't affect modules/PCH.
317 void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction,
318 const LangOptions &LangOpts,
319 CodeGenOptions &CGOpts);
320
321 } // end namespace dependencies
322 } // end namespace tooling
323 } // end namespace clang
324
325 namespace llvm {
hash_value(const clang::tooling::dependencies::ModuleID & ID)326 inline hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID) {
327 return hash_combine(ID.ModuleName, ID.ContextHash);
328 }
329
330 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
331 using ModuleID = clang::tooling::dependencies::ModuleID;
332 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
333 static inline ModuleID getTombstoneKey() {
334 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
335 }
336 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
337 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
338 return LHS == RHS;
339 }
340 };
341 } // namespace llvm
342
343 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
344