//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/Threading.h" #include using namespace clang; using namespace tooling; using namespace dependencies; llvm::ErrorOr DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { // Load the file and its content from the file system. auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); if (!MaybeFile) return MaybeFile.getError(); auto File = std::move(*MaybeFile); auto MaybeStat = File->status(); if (!MaybeStat) return MaybeStat.getError(); auto Stat = std::move(*MaybeStat); auto MaybeBuffer = File->getBuffer(Stat.getName()); if (!MaybeBuffer) return MaybeBuffer.getError(); auto Buffer = std::move(*MaybeBuffer); // If the file size changed between read and stat, pretend it didn't. if (Stat.getSize() != Buffer->getBufferSize()) Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); return TentativeEntry(Stat, std::move(Buffer)); } bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated( EntryRef Ref) { auto &Entry = Ref.Entry; if (Entry.isError() || Entry.isDirectory()) return false; CachedFileContents *Contents = Entry.getCachedContents(); assert(Contents && "contents not initialized"); // Double-checked locking. if (Contents->DepDirectives.load()) return true; std::lock_guard GuardLock(Contents->ValueLock); // Double-checked locking. if (Contents->DepDirectives.load()) return true; SmallVector Directives; // Scan the file for preprocessor directives that might affect the // dependencies. if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), Contents->DepDirectiveTokens, Directives)) { Contents->DepDirectiveTokens.clear(); // FIXME: Propagate the diagnostic if desired by the client. Contents->DepDirectives.store(new std::optional()); return false; } // This function performed double-checked locking using `DepDirectives`. // Assigning it must be the last thing this function does, otherwise other // threads may skip the critical section (`DepDirectives != nullptr`), leading // to a data race. Contents->DepDirectives.store( new std::optional(std::move(Directives))); return true; } DependencyScanningFilesystemSharedCache:: DependencyScanningFilesystemSharedCache() { // This heuristic was chosen using a empirical testing on a // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache // sharding gives a performance edge by reducing the lock contention. // FIXME: A better heuristic might also consider the OS to account for // the different cost of lock contention on different OSes. NumShards = std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); CacheShards = std::make_unique(NumShards); } DependencyScanningFilesystemSharedCache::CacheShard & DependencyScanningFilesystemSharedCache::getShardForFilename( StringRef Filename) const { assert(llvm::sys::path::is_absolute_gnu(Filename)); return CacheShards[llvm::hash_value(Filename) % NumShards]; } DependencyScanningFilesystemSharedCache::CacheShard & DependencyScanningFilesystemSharedCache::getShardForUID( llvm::sys::fs::UniqueID UID) const { auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); return CacheShards[Hash % NumShards]; } const CachedFileSystemEntry * DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( StringRef Filename) const { assert(llvm::sys::path::is_absolute_gnu(Filename)); std::lock_guard LockGuard(CacheLock); auto It = CacheByFilename.find(Filename); return It == CacheByFilename.end() ? nullptr : It->getValue().first; } const CachedFileSystemEntry * DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( llvm::sys::fs::UniqueID UID) const { std::lock_guard LockGuard(CacheLock); auto It = EntriesByUID.find(UID); return It == EntriesByUID.end() ? nullptr : It->getSecond(); } const CachedFileSystemEntry & DependencyScanningFilesystemSharedCache::CacheShard:: getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr Stat) { std::lock_guard LockGuard(CacheLock); auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}}); auto &[CachedEntry, CachedRealPath] = It->getValue(); if (!CachedEntry) { // The entry is not present in the shared cache. Either the cache doesn't // know about the file at all, or it only knows about its real path. assert((Inserted || CachedRealPath) && "existing file with empty pair"); CachedEntry = new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); } return *CachedEntry; } const CachedFileSystemEntry & DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr Contents) { std::lock_guard LockGuard(CacheLock); auto [It, Inserted] = EntriesByUID.insert({UID, nullptr}); auto &CachedEntry = It->getSecond(); if (Inserted) { CachedFileContents *StoredContents = nullptr; if (Contents) StoredContents = new (ContentsStorage.Allocate()) CachedFileContents(std::move(Contents)); CachedEntry = new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat), StoredContents); } return *CachedEntry; } const CachedFileSystemEntry & DependencyScanningFilesystemSharedCache::CacheShard:: getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry) { std::lock_guard LockGuard(CacheLock); auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}}); auto &[CachedEntry, CachedRealPath] = It->getValue(); if (!Inserted || !CachedEntry) CachedEntry = &Entry; return *CachedEntry; } const CachedRealPath * DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( StringRef Filename) const { assert(llvm::sys::path::is_absolute_gnu(Filename)); std::lock_guard LockGuard(CacheLock); auto It = CacheByFilename.find(Filename); return It == CacheByFilename.end() ? nullptr : It->getValue().second; } const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: getOrEmplaceRealPathForFilename(StringRef Filename, llvm::ErrorOr RealPath) { std::lock_guard LockGuard(CacheLock); const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; if (!StoredRealPath) { auto OwnedRealPath = [&]() -> CachedRealPath { if (!RealPath) return RealPath.getError(); return RealPath->str(); }(); StoredRealPath = new (RealPathStorage.Allocate()) CachedRealPath(std::move(OwnedRealPath)); } return *StoredRealPath; } static bool shouldCacheStatFailures(StringRef Filename) { StringRef Ext = llvm::sys::path::extension(Filename); if (Ext.empty()) return false; // This may be the module cache directory. return true; } DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, IntrusiveRefCntPtr FS) : llvm::RTTIExtends(std::move(FS)), SharedCache(SharedCache), WorkingDirForCacheLookup(llvm::errc::invalid_argument) { updateWorkingDirForCacheLookup(); } const CachedFileSystemEntry & DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( TentativeEntry TEntry) { auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), std::move(TEntry.Status), std::move(TEntry.Contents)); } const CachedFileSystemEntry * DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( StringRef Filename) { if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) return Entry; auto &Shard = SharedCache.getShardForFilename(Filename); if (const auto *Entry = Shard.findEntryByFilename(Filename)) return &LocalCache.insertEntryForFilename(Filename, *Entry); return nullptr; } llvm::ErrorOr DependencyScanningWorkerFilesystem::computeAndStoreResult( StringRef OriginalFilename, StringRef FilenameForLookup) { llvm::ErrorOr Stat = getUnderlyingFS().status(OriginalFilename); if (!Stat) { if (!shouldCacheStatFailures(OriginalFilename)) return Stat.getError(); const auto &Entry = getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); return insertLocalEntryForFilename(FilenameForLookup, Entry); } if (const auto *Entry = findSharedEntryByUID(*Stat)) return insertLocalEntryForFilename(FilenameForLookup, *Entry); auto TEntry = Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); const CachedFileSystemEntry *SharedEntry = [&]() { if (TEntry) { const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); } return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, TEntry.getError()); }(); return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); } llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( StringRef OriginalFilename) { SmallString<256> PathBuf; auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); if (!FilenameForLookup) return FilenameForLookup.getError(); if (const auto *Entry = findEntryByFilenameWithWriteThrough(*FilenameForLookup)) return EntryRef(OriginalFilename, *Entry).unwrapError(); auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup); if (!MaybeEntry) return MaybeEntry.getError(); return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); } llvm::ErrorOr DependencyScanningWorkerFilesystem::status(const Twine &Path) { SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); if (Filename.ends_with(".pcm")) return getUnderlyingFS().status(Path); llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); return Result->getStatus(); } bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) { // While some VFS overlay filesystems may implement more-efficient // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem` // typically wraps `RealFileSystem` which does not specialize `exists`, // so it is not likely to benefit from such optimizations. Instead, // it is more-valuable to have this query go through the // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`. llvm::ErrorOr Status = status(Path); return Status && Status->exists(); } namespace { /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using /// this subclass. class DepScanFile final : public llvm::vfs::File { public: DepScanFile(std::unique_ptr Buffer, llvm::vfs::Status Stat) : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} static llvm::ErrorOr> create(EntryRef Entry); llvm::ErrorOr status() override { return Stat; } llvm::ErrorOr> getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, bool IsVolatile) override { return std::move(Buffer); } std::error_code close() override { return {}; } private: std::unique_ptr Buffer; llvm::vfs::Status Stat; }; } // end anonymous namespace llvm::ErrorOr> DepScanFile::create(EntryRef Entry) { assert(!Entry.isError() && "error"); if (Entry.isDirectory()) return std::make_error_code(std::errc::is_a_directory); auto Result = std::make_unique( llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getStatus().getName(), /*RequiresNullTerminator=*/false), Entry.getStatus()); return llvm::ErrorOr>( std::unique_ptr(std::move(Result))); } llvm::ErrorOr> DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); if (Filename.ends_with(".pcm")) return getUnderlyingFS().openFileForRead(Path); llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); if (!Result) return Result.getError(); return DepScanFile::create(Result.get()); } std::error_code DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, SmallVectorImpl &Output) { SmallString<256> OwnedFilename; StringRef OriginalFilename = Path.toStringRef(OwnedFilename); SmallString<256> PathBuf; auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); if (!FilenameForLookup) return FilenameForLookup.getError(); auto HandleCachedRealPath = [&Output](const CachedRealPath &RealPath) -> std::error_code { if (!RealPath) return RealPath.getError(); Output.assign(RealPath->begin(), RealPath->end()); return {}; }; // If we already have the result in local cache, no work required. if (const auto *RealPath = LocalCache.findRealPathByFilename(*FilenameForLookup)) return HandleCachedRealPath(*RealPath); // If we have the result in the shared cache, cache it locally. auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup); if (const auto *ShardRealPath = Shard.findRealPathByFilename(*FilenameForLookup)) { const auto &RealPath = LocalCache.insertRealPathForFilename( *FilenameForLookup, *ShardRealPath); return HandleCachedRealPath(RealPath); } // If we don't know the real path, compute it... std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output); llvm::ErrorOr ComputedRealPath = EC; if (!EC) ComputedRealPath = StringRef{Output.data(), Output.size()}; // ...and try to write it into the shared cache. In case some other thread won // this race and already wrote its own result there, just adopt it. Write // whatever is in the shared cache into the local one. const auto &RealPath = Shard.getOrEmplaceRealPathForFilename( *FilenameForLookup, ComputedRealPath); return HandleCachedRealPath( LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath)); } std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( const Twine &Path) { std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); updateWorkingDirForCacheLookup(); return EC; } void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { llvm::ErrorOr CWD = getUnderlyingFS().getCurrentWorkingDirectory(); if (!CWD) { WorkingDirForCacheLookup = CWD.getError(); } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) { WorkingDirForCacheLookup = llvm::errc::invalid_argument; } else { WorkingDirForCacheLookup = *CWD; } assert(!WorkingDirForCacheLookup || llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); } llvm::ErrorOr DependencyScanningWorkerFilesystem::tryGetFilenameForLookup( StringRef OriginalFilename, llvm::SmallVectorImpl &PathBuf) const { StringRef FilenameForLookup; if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) { FilenameForLookup = OriginalFilename; } else if (!WorkingDirForCacheLookup) { return WorkingDirForCacheLookup.getError(); } else { StringRef RelFilename = OriginalFilename; RelFilename.consume_front("./"); PathBuf.assign(WorkingDirForCacheLookup->begin(), WorkingDirForCacheLookup->end()); llvm::sys::path::append(PathBuf, RelFilename); FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()}; } assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); return FilenameForLookup; } const char DependencyScanningWorkerFilesystem::ID = 0;