1*0b57cec5SDimitry Andric //===--- ModuleDependencyCollector.cpp - Collect module dependencies ------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // Collect the dependencies of a set of modules. 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h" 14*0b57cec5SDimitry Andric #include "clang/Frontend/Utils.h" 15*0b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h" 16*0b57cec5SDimitry Andric #include "clang/Serialization/ASTReader.h" 17*0b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 18*0b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 19*0b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h" 20*0b57cec5SDimitry Andric #include "llvm/Support/Path.h" 21*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 22*0b57cec5SDimitry Andric 23*0b57cec5SDimitry Andric using namespace clang; 24*0b57cec5SDimitry Andric 25*0b57cec5SDimitry Andric namespace { 26*0b57cec5SDimitry Andric /// Private implementations for ModuleDependencyCollector 27*0b57cec5SDimitry Andric class ModuleDependencyListener : public ASTReaderListener { 28*0b57cec5SDimitry Andric ModuleDependencyCollector &Collector; 29*0b57cec5SDimitry Andric public: 30*0b57cec5SDimitry Andric ModuleDependencyListener(ModuleDependencyCollector &Collector) 31*0b57cec5SDimitry Andric : Collector(Collector) {} 32*0b57cec5SDimitry Andric bool needsInputFileVisitation() override { return true; } 33*0b57cec5SDimitry Andric bool needsSystemInputFileVisitation() override { return true; } 34*0b57cec5SDimitry Andric bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden, 35*0b57cec5SDimitry Andric bool IsExplicitModule) override { 36*0b57cec5SDimitry Andric Collector.addFile(Filename); 37*0b57cec5SDimitry Andric return true; 38*0b57cec5SDimitry Andric } 39*0b57cec5SDimitry Andric }; 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric struct ModuleDependencyPPCallbacks : public PPCallbacks { 42*0b57cec5SDimitry Andric ModuleDependencyCollector &Collector; 43*0b57cec5SDimitry Andric SourceManager &SM; 44*0b57cec5SDimitry Andric ModuleDependencyPPCallbacks(ModuleDependencyCollector &Collector, 45*0b57cec5SDimitry Andric SourceManager &SM) 46*0b57cec5SDimitry Andric : Collector(Collector), SM(SM) {} 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 49*0b57cec5SDimitry Andric StringRef FileName, bool IsAngled, 50*0b57cec5SDimitry Andric CharSourceRange FilenameRange, const FileEntry *File, 51*0b57cec5SDimitry Andric StringRef SearchPath, StringRef RelativePath, 52*0b57cec5SDimitry Andric const Module *Imported, 53*0b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType) override { 54*0b57cec5SDimitry Andric if (!File) 55*0b57cec5SDimitry Andric return; 56*0b57cec5SDimitry Andric Collector.addFile(File->getName()); 57*0b57cec5SDimitry Andric } 58*0b57cec5SDimitry Andric }; 59*0b57cec5SDimitry Andric 60*0b57cec5SDimitry Andric struct ModuleDependencyMMCallbacks : public ModuleMapCallbacks { 61*0b57cec5SDimitry Andric ModuleDependencyCollector &Collector; 62*0b57cec5SDimitry Andric ModuleDependencyMMCallbacks(ModuleDependencyCollector &Collector) 63*0b57cec5SDimitry Andric : Collector(Collector) {} 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric void moduleMapAddHeader(StringRef HeaderPath) override { 66*0b57cec5SDimitry Andric if (llvm::sys::path::is_absolute(HeaderPath)) 67*0b57cec5SDimitry Andric Collector.addFile(HeaderPath); 68*0b57cec5SDimitry Andric } 69*0b57cec5SDimitry Andric void moduleMapAddUmbrellaHeader(FileManager *FileMgr, 70*0b57cec5SDimitry Andric const FileEntry *Header) override { 71*0b57cec5SDimitry Andric StringRef HeaderFilename = Header->getName(); 72*0b57cec5SDimitry Andric moduleMapAddHeader(HeaderFilename); 73*0b57cec5SDimitry Andric // The FileManager can find and cache the symbolic link for a framework 74*0b57cec5SDimitry Andric // header before its real path, this means a module can have some of its 75*0b57cec5SDimitry Andric // headers to use other paths. Although this is usually not a problem, it's 76*0b57cec5SDimitry Andric // inconsistent, and not collecting the original path header leads to 77*0b57cec5SDimitry Andric // umbrella clashes while rebuilding modules in the crash reproducer. For 78*0b57cec5SDimitry Andric // example: 79*0b57cec5SDimitry Andric // ApplicationServices.framework/Frameworks/ImageIO.framework/ImageIO.h 80*0b57cec5SDimitry Andric // instead of: 81*0b57cec5SDimitry Andric // ImageIO.framework/ImageIO.h 82*0b57cec5SDimitry Andric // 83*0b57cec5SDimitry Andric // FIXME: this shouldn't be necessary once we have FileName instances 84*0b57cec5SDimitry Andric // around instead of FileEntry ones. For now, make sure we collect all 85*0b57cec5SDimitry Andric // that we need for the reproducer to work correctly. 86*0b57cec5SDimitry Andric StringRef UmbreallDirFromHeader = 87*0b57cec5SDimitry Andric llvm::sys::path::parent_path(HeaderFilename); 88*0b57cec5SDimitry Andric StringRef UmbrellaDir = Header->getDir()->getName(); 89*0b57cec5SDimitry Andric if (!UmbrellaDir.equals(UmbreallDirFromHeader)) { 90*0b57cec5SDimitry Andric SmallString<128> AltHeaderFilename; 91*0b57cec5SDimitry Andric llvm::sys::path::append(AltHeaderFilename, UmbrellaDir, 92*0b57cec5SDimitry Andric llvm::sys::path::filename(HeaderFilename)); 93*0b57cec5SDimitry Andric if (FileMgr->getFile(AltHeaderFilename)) 94*0b57cec5SDimitry Andric moduleMapAddHeader(AltHeaderFilename); 95*0b57cec5SDimitry Andric } 96*0b57cec5SDimitry Andric } 97*0b57cec5SDimitry Andric }; 98*0b57cec5SDimitry Andric 99*0b57cec5SDimitry Andric } 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric void ModuleDependencyCollector::attachToASTReader(ASTReader &R) { 102*0b57cec5SDimitry Andric R.addListener(llvm::make_unique<ModuleDependencyListener>(*this)); 103*0b57cec5SDimitry Andric } 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) { 106*0b57cec5SDimitry Andric PP.addPPCallbacks(llvm::make_unique<ModuleDependencyPPCallbacks>( 107*0b57cec5SDimitry Andric *this, PP.getSourceManager())); 108*0b57cec5SDimitry Andric PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks( 109*0b57cec5SDimitry Andric llvm::make_unique<ModuleDependencyMMCallbacks>(*this)); 110*0b57cec5SDimitry Andric } 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric static bool isCaseSensitivePath(StringRef Path) { 113*0b57cec5SDimitry Andric SmallString<256> TmpDest = Path, UpperDest, RealDest; 114*0b57cec5SDimitry Andric // Remove component traversals, links, etc. 115*0b57cec5SDimitry Andric if (llvm::sys::fs::real_path(Path, TmpDest)) 116*0b57cec5SDimitry Andric return true; // Current default value in vfs.yaml 117*0b57cec5SDimitry Andric Path = TmpDest; 118*0b57cec5SDimitry Andric 119*0b57cec5SDimitry Andric // Change path to all upper case and ask for its real path, if the latter 120*0b57cec5SDimitry Andric // exists and is equal to Path, it's not case sensitive. Default to case 121*0b57cec5SDimitry Andric // sensitive in the absence of realpath, since this is what the VFSWriter 122*0b57cec5SDimitry Andric // already expects when sensitivity isn't setup. 123*0b57cec5SDimitry Andric for (auto &C : Path) 124*0b57cec5SDimitry Andric UpperDest.push_back(toUppercase(C)); 125*0b57cec5SDimitry Andric if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest)) 126*0b57cec5SDimitry Andric return false; 127*0b57cec5SDimitry Andric return true; 128*0b57cec5SDimitry Andric } 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric void ModuleDependencyCollector::writeFileMap() { 131*0b57cec5SDimitry Andric if (Seen.empty()) 132*0b57cec5SDimitry Andric return; 133*0b57cec5SDimitry Andric 134*0b57cec5SDimitry Andric StringRef VFSDir = getDest(); 135*0b57cec5SDimitry Andric 136*0b57cec5SDimitry Andric // Default to use relative overlay directories in the VFS yaml file. This 137*0b57cec5SDimitry Andric // allows crash reproducer scripts to work across machines. 138*0b57cec5SDimitry Andric VFSWriter.setOverlayDir(VFSDir); 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric // Explicitly set case sensitivity for the YAML writer. For that, find out 141*0b57cec5SDimitry Andric // the sensitivity at the path where the headers all collected to. 142*0b57cec5SDimitry Andric VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir)); 143*0b57cec5SDimitry Andric 144*0b57cec5SDimitry Andric // Do not rely on real path names when executing the crash reproducer scripts 145*0b57cec5SDimitry Andric // since we only want to actually use the files we have on the VFS cache. 146*0b57cec5SDimitry Andric VFSWriter.setUseExternalNames(false); 147*0b57cec5SDimitry Andric 148*0b57cec5SDimitry Andric std::error_code EC; 149*0b57cec5SDimitry Andric SmallString<256> YAMLPath = VFSDir; 150*0b57cec5SDimitry Andric llvm::sys::path::append(YAMLPath, "vfs.yaml"); 151*0b57cec5SDimitry Andric llvm::raw_fd_ostream OS(YAMLPath, EC, llvm::sys::fs::F_Text); 152*0b57cec5SDimitry Andric if (EC) { 153*0b57cec5SDimitry Andric HasErrors = true; 154*0b57cec5SDimitry Andric return; 155*0b57cec5SDimitry Andric } 156*0b57cec5SDimitry Andric VFSWriter.write(OS); 157*0b57cec5SDimitry Andric } 158*0b57cec5SDimitry Andric 159*0b57cec5SDimitry Andric bool ModuleDependencyCollector::getRealPath(StringRef SrcPath, 160*0b57cec5SDimitry Andric SmallVectorImpl<char> &Result) { 161*0b57cec5SDimitry Andric using namespace llvm::sys; 162*0b57cec5SDimitry Andric SmallString<256> RealPath; 163*0b57cec5SDimitry Andric StringRef FileName = path::filename(SrcPath); 164*0b57cec5SDimitry Andric std::string Dir = path::parent_path(SrcPath).str(); 165*0b57cec5SDimitry Andric auto DirWithSymLink = SymLinkMap.find(Dir); 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric // Use real_path to fix any symbolic link component present in a path. 168*0b57cec5SDimitry Andric // Computing the real path is expensive, cache the search through the 169*0b57cec5SDimitry Andric // parent path directory. 170*0b57cec5SDimitry Andric if (DirWithSymLink == SymLinkMap.end()) { 171*0b57cec5SDimitry Andric if (llvm::sys::fs::real_path(Dir, RealPath)) 172*0b57cec5SDimitry Andric return false; 173*0b57cec5SDimitry Andric SymLinkMap[Dir] = RealPath.str(); 174*0b57cec5SDimitry Andric } else { 175*0b57cec5SDimitry Andric RealPath = DirWithSymLink->second; 176*0b57cec5SDimitry Andric } 177*0b57cec5SDimitry Andric 178*0b57cec5SDimitry Andric path::append(RealPath, FileName); 179*0b57cec5SDimitry Andric Result.swap(RealPath); 180*0b57cec5SDimitry Andric return true; 181*0b57cec5SDimitry Andric } 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src, 184*0b57cec5SDimitry Andric StringRef Dst) { 185*0b57cec5SDimitry Andric using namespace llvm::sys; 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric // We need an absolute src path to append to the root. 188*0b57cec5SDimitry Andric SmallString<256> AbsoluteSrc = Src; 189*0b57cec5SDimitry Andric fs::make_absolute(AbsoluteSrc); 190*0b57cec5SDimitry Andric // Canonicalize src to a native path to avoid mixed separator styles. 191*0b57cec5SDimitry Andric path::native(AbsoluteSrc); 192*0b57cec5SDimitry Andric // Remove redundant leading "./" pieces and consecutive separators. 193*0b57cec5SDimitry Andric AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc); 194*0b57cec5SDimitry Andric 195*0b57cec5SDimitry Andric // Canonicalize the source path by removing "..", "." components. 196*0b57cec5SDimitry Andric SmallString<256> VirtualPath = AbsoluteSrc; 197*0b57cec5SDimitry Andric path::remove_dots(VirtualPath, /*remove_dot_dot=*/true); 198*0b57cec5SDimitry Andric 199*0b57cec5SDimitry Andric // If a ".." component is present after a symlink component, remove_dots may 200*0b57cec5SDimitry Andric // lead to the wrong real destination path. Let the source be canonicalized 201*0b57cec5SDimitry Andric // like that but make sure we always use the real path for the destination. 202*0b57cec5SDimitry Andric SmallString<256> CopyFrom; 203*0b57cec5SDimitry Andric if (!getRealPath(AbsoluteSrc, CopyFrom)) 204*0b57cec5SDimitry Andric CopyFrom = VirtualPath; 205*0b57cec5SDimitry Andric SmallString<256> CacheDst = getDest(); 206*0b57cec5SDimitry Andric 207*0b57cec5SDimitry Andric if (Dst.empty()) { 208*0b57cec5SDimitry Andric // The common case is to map the virtual path to the same path inside the 209*0b57cec5SDimitry Andric // cache. 210*0b57cec5SDimitry Andric path::append(CacheDst, path::relative_path(CopyFrom)); 211*0b57cec5SDimitry Andric } else { 212*0b57cec5SDimitry Andric // When collecting entries from input vfsoverlays, copy the external 213*0b57cec5SDimitry Andric // contents into the cache but still map from the source. 214*0b57cec5SDimitry Andric if (!fs::exists(Dst)) 215*0b57cec5SDimitry Andric return std::error_code(); 216*0b57cec5SDimitry Andric path::append(CacheDst, Dst); 217*0b57cec5SDimitry Andric CopyFrom = Dst; 218*0b57cec5SDimitry Andric } 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric // Copy the file into place. 221*0b57cec5SDimitry Andric if (std::error_code EC = fs::create_directories(path::parent_path(CacheDst), 222*0b57cec5SDimitry Andric /*IgnoreExisting=*/true)) 223*0b57cec5SDimitry Andric return EC; 224*0b57cec5SDimitry Andric if (std::error_code EC = fs::copy_file(CopyFrom, CacheDst)) 225*0b57cec5SDimitry Andric return EC; 226*0b57cec5SDimitry Andric 227*0b57cec5SDimitry Andric // Always map a canonical src path to its real path into the YAML, by doing 228*0b57cec5SDimitry Andric // this we map different virtual src paths to the same entry in the VFS 229*0b57cec5SDimitry Andric // overlay, which is a way to emulate symlink inside the VFS; this is also 230*0b57cec5SDimitry Andric // needed for correctness, not doing that can lead to module redefinition 231*0b57cec5SDimitry Andric // errors. 232*0b57cec5SDimitry Andric addFileMapping(VirtualPath, CacheDst); 233*0b57cec5SDimitry Andric return std::error_code(); 234*0b57cec5SDimitry Andric } 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric void ModuleDependencyCollector::addFile(StringRef Filename, StringRef FileDst) { 237*0b57cec5SDimitry Andric if (insertSeen(Filename)) 238*0b57cec5SDimitry Andric if (copyToRoot(Filename, FileDst)) 239*0b57cec5SDimitry Andric HasErrors = true; 240*0b57cec5SDimitry Andric } 241