xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/ModuleDependencyCollector.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===--- ModuleDependencyCollector.cpp - Collect module dependencies ------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // Collect the dependencies of a set of modules.
10*0b57cec5SDimitry Andric //
11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
12*0b57cec5SDimitry Andric 
13*0b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h"
14*0b57cec5SDimitry Andric #include "clang/Frontend/Utils.h"
15*0b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h"
16*0b57cec5SDimitry Andric #include "clang/Serialization/ASTReader.h"
17*0b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
18*0b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h"
19*0b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h"
20*0b57cec5SDimitry Andric #include "llvm/Support/Path.h"
21*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
22*0b57cec5SDimitry Andric 
23*0b57cec5SDimitry Andric using namespace clang;
24*0b57cec5SDimitry Andric 
25*0b57cec5SDimitry Andric namespace {
26*0b57cec5SDimitry Andric /// Private implementations for ModuleDependencyCollector
27*0b57cec5SDimitry Andric class ModuleDependencyListener : public ASTReaderListener {
28*0b57cec5SDimitry Andric   ModuleDependencyCollector &Collector;
29*0b57cec5SDimitry Andric public:
30*0b57cec5SDimitry Andric   ModuleDependencyListener(ModuleDependencyCollector &Collector)
31*0b57cec5SDimitry Andric       : Collector(Collector) {}
32*0b57cec5SDimitry Andric   bool needsInputFileVisitation() override { return true; }
33*0b57cec5SDimitry Andric   bool needsSystemInputFileVisitation() override { return true; }
34*0b57cec5SDimitry Andric   bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden,
35*0b57cec5SDimitry Andric                       bool IsExplicitModule) override {
36*0b57cec5SDimitry Andric     Collector.addFile(Filename);
37*0b57cec5SDimitry Andric     return true;
38*0b57cec5SDimitry Andric   }
39*0b57cec5SDimitry Andric };
40*0b57cec5SDimitry Andric 
41*0b57cec5SDimitry Andric struct ModuleDependencyPPCallbacks : public PPCallbacks {
42*0b57cec5SDimitry Andric   ModuleDependencyCollector &Collector;
43*0b57cec5SDimitry Andric   SourceManager &SM;
44*0b57cec5SDimitry Andric   ModuleDependencyPPCallbacks(ModuleDependencyCollector &Collector,
45*0b57cec5SDimitry Andric                               SourceManager &SM)
46*0b57cec5SDimitry Andric       : Collector(Collector), SM(SM) {}
47*0b57cec5SDimitry Andric 
48*0b57cec5SDimitry Andric   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
49*0b57cec5SDimitry Andric                           StringRef FileName, bool IsAngled,
50*0b57cec5SDimitry Andric                           CharSourceRange FilenameRange, const FileEntry *File,
51*0b57cec5SDimitry Andric                           StringRef SearchPath, StringRef RelativePath,
52*0b57cec5SDimitry Andric                           const Module *Imported,
53*0b57cec5SDimitry Andric                           SrcMgr::CharacteristicKind FileType) override {
54*0b57cec5SDimitry Andric     if (!File)
55*0b57cec5SDimitry Andric       return;
56*0b57cec5SDimitry Andric     Collector.addFile(File->getName());
57*0b57cec5SDimitry Andric   }
58*0b57cec5SDimitry Andric };
59*0b57cec5SDimitry Andric 
60*0b57cec5SDimitry Andric struct ModuleDependencyMMCallbacks : public ModuleMapCallbacks {
61*0b57cec5SDimitry Andric   ModuleDependencyCollector &Collector;
62*0b57cec5SDimitry Andric   ModuleDependencyMMCallbacks(ModuleDependencyCollector &Collector)
63*0b57cec5SDimitry Andric       : Collector(Collector) {}
64*0b57cec5SDimitry Andric 
65*0b57cec5SDimitry Andric   void moduleMapAddHeader(StringRef HeaderPath) override {
66*0b57cec5SDimitry Andric     if (llvm::sys::path::is_absolute(HeaderPath))
67*0b57cec5SDimitry Andric       Collector.addFile(HeaderPath);
68*0b57cec5SDimitry Andric   }
69*0b57cec5SDimitry Andric   void moduleMapAddUmbrellaHeader(FileManager *FileMgr,
70*0b57cec5SDimitry Andric                                   const FileEntry *Header) override {
71*0b57cec5SDimitry Andric     StringRef HeaderFilename = Header->getName();
72*0b57cec5SDimitry Andric     moduleMapAddHeader(HeaderFilename);
73*0b57cec5SDimitry Andric     // The FileManager can find and cache the symbolic link for a framework
74*0b57cec5SDimitry Andric     // header before its real path, this means a module can have some of its
75*0b57cec5SDimitry Andric     // headers to use other paths. Although this is usually not a problem, it's
76*0b57cec5SDimitry Andric     // inconsistent, and not collecting the original path header leads to
77*0b57cec5SDimitry Andric     // umbrella clashes while rebuilding modules in the crash reproducer. For
78*0b57cec5SDimitry Andric     // example:
79*0b57cec5SDimitry Andric     //    ApplicationServices.framework/Frameworks/ImageIO.framework/ImageIO.h
80*0b57cec5SDimitry Andric     // instead of:
81*0b57cec5SDimitry Andric     //    ImageIO.framework/ImageIO.h
82*0b57cec5SDimitry Andric     //
83*0b57cec5SDimitry Andric     // FIXME: this shouldn't be necessary once we have FileName instances
84*0b57cec5SDimitry Andric     // around instead of FileEntry ones. For now, make sure we collect all
85*0b57cec5SDimitry Andric     // that we need for the reproducer to work correctly.
86*0b57cec5SDimitry Andric     StringRef UmbreallDirFromHeader =
87*0b57cec5SDimitry Andric         llvm::sys::path::parent_path(HeaderFilename);
88*0b57cec5SDimitry Andric     StringRef UmbrellaDir = Header->getDir()->getName();
89*0b57cec5SDimitry Andric     if (!UmbrellaDir.equals(UmbreallDirFromHeader)) {
90*0b57cec5SDimitry Andric       SmallString<128> AltHeaderFilename;
91*0b57cec5SDimitry Andric       llvm::sys::path::append(AltHeaderFilename, UmbrellaDir,
92*0b57cec5SDimitry Andric                               llvm::sys::path::filename(HeaderFilename));
93*0b57cec5SDimitry Andric       if (FileMgr->getFile(AltHeaderFilename))
94*0b57cec5SDimitry Andric         moduleMapAddHeader(AltHeaderFilename);
95*0b57cec5SDimitry Andric     }
96*0b57cec5SDimitry Andric   }
97*0b57cec5SDimitry Andric };
98*0b57cec5SDimitry Andric 
99*0b57cec5SDimitry Andric }
100*0b57cec5SDimitry Andric 
101*0b57cec5SDimitry Andric void ModuleDependencyCollector::attachToASTReader(ASTReader &R) {
102*0b57cec5SDimitry Andric   R.addListener(llvm::make_unique<ModuleDependencyListener>(*this));
103*0b57cec5SDimitry Andric }
104*0b57cec5SDimitry Andric 
105*0b57cec5SDimitry Andric void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
106*0b57cec5SDimitry Andric   PP.addPPCallbacks(llvm::make_unique<ModuleDependencyPPCallbacks>(
107*0b57cec5SDimitry Andric       *this, PP.getSourceManager()));
108*0b57cec5SDimitry Andric   PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks(
109*0b57cec5SDimitry Andric       llvm::make_unique<ModuleDependencyMMCallbacks>(*this));
110*0b57cec5SDimitry Andric }
111*0b57cec5SDimitry Andric 
112*0b57cec5SDimitry Andric static bool isCaseSensitivePath(StringRef Path) {
113*0b57cec5SDimitry Andric   SmallString<256> TmpDest = Path, UpperDest, RealDest;
114*0b57cec5SDimitry Andric   // Remove component traversals, links, etc.
115*0b57cec5SDimitry Andric   if (llvm::sys::fs::real_path(Path, TmpDest))
116*0b57cec5SDimitry Andric     return true; // Current default value in vfs.yaml
117*0b57cec5SDimitry Andric   Path = TmpDest;
118*0b57cec5SDimitry Andric 
119*0b57cec5SDimitry Andric   // Change path to all upper case and ask for its real path, if the latter
120*0b57cec5SDimitry Andric   // exists and is equal to Path, it's not case sensitive. Default to case
121*0b57cec5SDimitry Andric   // sensitive in the absence of realpath, since this is what the VFSWriter
122*0b57cec5SDimitry Andric   // already expects when sensitivity isn't setup.
123*0b57cec5SDimitry Andric   for (auto &C : Path)
124*0b57cec5SDimitry Andric     UpperDest.push_back(toUppercase(C));
125*0b57cec5SDimitry Andric   if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
126*0b57cec5SDimitry Andric     return false;
127*0b57cec5SDimitry Andric   return true;
128*0b57cec5SDimitry Andric }
129*0b57cec5SDimitry Andric 
130*0b57cec5SDimitry Andric void ModuleDependencyCollector::writeFileMap() {
131*0b57cec5SDimitry Andric   if (Seen.empty())
132*0b57cec5SDimitry Andric     return;
133*0b57cec5SDimitry Andric 
134*0b57cec5SDimitry Andric   StringRef VFSDir = getDest();
135*0b57cec5SDimitry Andric 
136*0b57cec5SDimitry Andric   // Default to use relative overlay directories in the VFS yaml file. This
137*0b57cec5SDimitry Andric   // allows crash reproducer scripts to work across machines.
138*0b57cec5SDimitry Andric   VFSWriter.setOverlayDir(VFSDir);
139*0b57cec5SDimitry Andric 
140*0b57cec5SDimitry Andric   // Explicitly set case sensitivity for the YAML writer. For that, find out
141*0b57cec5SDimitry Andric   // the sensitivity at the path where the headers all collected to.
142*0b57cec5SDimitry Andric   VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir));
143*0b57cec5SDimitry Andric 
144*0b57cec5SDimitry Andric   // Do not rely on real path names when executing the crash reproducer scripts
145*0b57cec5SDimitry Andric   // since we only want to actually use the files we have on the VFS cache.
146*0b57cec5SDimitry Andric   VFSWriter.setUseExternalNames(false);
147*0b57cec5SDimitry Andric 
148*0b57cec5SDimitry Andric   std::error_code EC;
149*0b57cec5SDimitry Andric   SmallString<256> YAMLPath = VFSDir;
150*0b57cec5SDimitry Andric   llvm::sys::path::append(YAMLPath, "vfs.yaml");
151*0b57cec5SDimitry Andric   llvm::raw_fd_ostream OS(YAMLPath, EC, llvm::sys::fs::F_Text);
152*0b57cec5SDimitry Andric   if (EC) {
153*0b57cec5SDimitry Andric     HasErrors = true;
154*0b57cec5SDimitry Andric     return;
155*0b57cec5SDimitry Andric   }
156*0b57cec5SDimitry Andric   VFSWriter.write(OS);
157*0b57cec5SDimitry Andric }
158*0b57cec5SDimitry Andric 
159*0b57cec5SDimitry Andric bool ModuleDependencyCollector::getRealPath(StringRef SrcPath,
160*0b57cec5SDimitry Andric                                             SmallVectorImpl<char> &Result) {
161*0b57cec5SDimitry Andric   using namespace llvm::sys;
162*0b57cec5SDimitry Andric   SmallString<256> RealPath;
163*0b57cec5SDimitry Andric   StringRef FileName = path::filename(SrcPath);
164*0b57cec5SDimitry Andric   std::string Dir = path::parent_path(SrcPath).str();
165*0b57cec5SDimitry Andric   auto DirWithSymLink = SymLinkMap.find(Dir);
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric   // Use real_path to fix any symbolic link component present in a path.
168*0b57cec5SDimitry Andric   // Computing the real path is expensive, cache the search through the
169*0b57cec5SDimitry Andric   // parent path directory.
170*0b57cec5SDimitry Andric   if (DirWithSymLink == SymLinkMap.end()) {
171*0b57cec5SDimitry Andric     if (llvm::sys::fs::real_path(Dir, RealPath))
172*0b57cec5SDimitry Andric       return false;
173*0b57cec5SDimitry Andric     SymLinkMap[Dir] = RealPath.str();
174*0b57cec5SDimitry Andric   } else {
175*0b57cec5SDimitry Andric     RealPath = DirWithSymLink->second;
176*0b57cec5SDimitry Andric   }
177*0b57cec5SDimitry Andric 
178*0b57cec5SDimitry Andric   path::append(RealPath, FileName);
179*0b57cec5SDimitry Andric   Result.swap(RealPath);
180*0b57cec5SDimitry Andric   return true;
181*0b57cec5SDimitry Andric }
182*0b57cec5SDimitry Andric 
183*0b57cec5SDimitry Andric std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
184*0b57cec5SDimitry Andric                                                       StringRef Dst) {
185*0b57cec5SDimitry Andric   using namespace llvm::sys;
186*0b57cec5SDimitry Andric 
187*0b57cec5SDimitry Andric   // We need an absolute src path to append to the root.
188*0b57cec5SDimitry Andric   SmallString<256> AbsoluteSrc = Src;
189*0b57cec5SDimitry Andric   fs::make_absolute(AbsoluteSrc);
190*0b57cec5SDimitry Andric   // Canonicalize src to a native path to avoid mixed separator styles.
191*0b57cec5SDimitry Andric   path::native(AbsoluteSrc);
192*0b57cec5SDimitry Andric   // Remove redundant leading "./" pieces and consecutive separators.
193*0b57cec5SDimitry Andric   AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
194*0b57cec5SDimitry Andric 
195*0b57cec5SDimitry Andric   // Canonicalize the source path by removing "..", "." components.
196*0b57cec5SDimitry Andric   SmallString<256> VirtualPath = AbsoluteSrc;
197*0b57cec5SDimitry Andric   path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
198*0b57cec5SDimitry Andric 
199*0b57cec5SDimitry Andric   // If a ".." component is present after a symlink component, remove_dots may
200*0b57cec5SDimitry Andric   // lead to the wrong real destination path. Let the source be canonicalized
201*0b57cec5SDimitry Andric   // like that but make sure we always use the real path for the destination.
202*0b57cec5SDimitry Andric   SmallString<256> CopyFrom;
203*0b57cec5SDimitry Andric   if (!getRealPath(AbsoluteSrc, CopyFrom))
204*0b57cec5SDimitry Andric     CopyFrom = VirtualPath;
205*0b57cec5SDimitry Andric   SmallString<256> CacheDst = getDest();
206*0b57cec5SDimitry Andric 
207*0b57cec5SDimitry Andric   if (Dst.empty()) {
208*0b57cec5SDimitry Andric     // The common case is to map the virtual path to the same path inside the
209*0b57cec5SDimitry Andric     // cache.
210*0b57cec5SDimitry Andric     path::append(CacheDst, path::relative_path(CopyFrom));
211*0b57cec5SDimitry Andric   } else {
212*0b57cec5SDimitry Andric     // When collecting entries from input vfsoverlays, copy the external
213*0b57cec5SDimitry Andric     // contents into the cache but still map from the source.
214*0b57cec5SDimitry Andric     if (!fs::exists(Dst))
215*0b57cec5SDimitry Andric       return std::error_code();
216*0b57cec5SDimitry Andric     path::append(CacheDst, Dst);
217*0b57cec5SDimitry Andric     CopyFrom = Dst;
218*0b57cec5SDimitry Andric   }
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric   // Copy the file into place.
221*0b57cec5SDimitry Andric   if (std::error_code EC = fs::create_directories(path::parent_path(CacheDst),
222*0b57cec5SDimitry Andric                                                   /*IgnoreExisting=*/true))
223*0b57cec5SDimitry Andric     return EC;
224*0b57cec5SDimitry Andric   if (std::error_code EC = fs::copy_file(CopyFrom, CacheDst))
225*0b57cec5SDimitry Andric     return EC;
226*0b57cec5SDimitry Andric 
227*0b57cec5SDimitry Andric   // Always map a canonical src path to its real path into the YAML, by doing
228*0b57cec5SDimitry Andric   // this we map different virtual src paths to the same entry in the VFS
229*0b57cec5SDimitry Andric   // overlay, which is a way to emulate symlink inside the VFS; this is also
230*0b57cec5SDimitry Andric   // needed for correctness, not doing that can lead to module redefinition
231*0b57cec5SDimitry Andric   // errors.
232*0b57cec5SDimitry Andric   addFileMapping(VirtualPath, CacheDst);
233*0b57cec5SDimitry Andric   return std::error_code();
234*0b57cec5SDimitry Andric }
235*0b57cec5SDimitry Andric 
236*0b57cec5SDimitry Andric void ModuleDependencyCollector::addFile(StringRef Filename, StringRef FileDst) {
237*0b57cec5SDimitry Andric   if (insertSeen(Filename))
238*0b57cec5SDimitry Andric     if (copyToRoot(Filename, FileDst))
239*0b57cec5SDimitry Andric       HasErrors = true;
240*0b57cec5SDimitry Andric }
241