1a7dea167SDimitry Andric //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2a7dea167SDimitry Andric // 3a7dea167SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a7dea167SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5a7dea167SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a7dea167SDimitry Andric // 7a7dea167SDimitry Andric //===----------------------------------------------------------------------===// 8a7dea167SDimitry Andric 9a7dea167SDimitry Andric #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10a7dea167SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 110eae32dcSDimitry Andric #include "llvm/Support/SmallVectorMemoryBuffer.h" 12a7dea167SDimitry Andric #include "llvm/Support/Threading.h" 13*bdd1243dSDimitry Andric #include <optional> 14a7dea167SDimitry Andric 15a7dea167SDimitry Andric using namespace clang; 16a7dea167SDimitry Andric using namespace tooling; 17a7dea167SDimitry Andric using namespace dependencies; 18a7dea167SDimitry Andric 1904eeddc0SDimitry Andric llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 2004eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 21a7dea167SDimitry Andric // Load the file and its content from the file system. 2204eeddc0SDimitry Andric auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 23a7dea167SDimitry Andric if (!MaybeFile) 24a7dea167SDimitry Andric return MaybeFile.getError(); 250eae32dcSDimitry Andric auto File = std::move(*MaybeFile); 26a7dea167SDimitry Andric 270eae32dcSDimitry Andric auto MaybeStat = File->status(); 280eae32dcSDimitry Andric if (!MaybeStat) 290eae32dcSDimitry Andric return MaybeStat.getError(); 300eae32dcSDimitry Andric auto Stat = std::move(*MaybeStat); 310eae32dcSDimitry Andric 320eae32dcSDimitry Andric auto MaybeBuffer = File->getBuffer(Stat.getName()); 33a7dea167SDimitry Andric if (!MaybeBuffer) 34a7dea167SDimitry Andric return MaybeBuffer.getError(); 350eae32dcSDimitry Andric auto Buffer = std::move(*MaybeBuffer); 360eae32dcSDimitry Andric 3704eeddc0SDimitry Andric // If the file size changed between read and stat, pretend it didn't. 3804eeddc0SDimitry Andric if (Stat.getSize() != Buffer->getBufferSize()) 3904eeddc0SDimitry Andric Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 4004eeddc0SDimitry Andric 4104eeddc0SDimitry Andric return TentativeEntry(Stat, std::move(Buffer)); 420eae32dcSDimitry Andric } 430eae32dcSDimitry Andric 4481ad6265SDimitry Andric EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( 4504eeddc0SDimitry Andric const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { 4604eeddc0SDimitry Andric if (Entry.isError() || Entry.isDirectory() || Disable || 4781ad6265SDimitry Andric !shouldScanForDirectives(Filename)) 4881ad6265SDimitry Andric return EntryRef(Filename, Entry); 4904eeddc0SDimitry Andric 5081ad6265SDimitry Andric CachedFileContents *Contents = Entry.getCachedContents(); 5104eeddc0SDimitry Andric assert(Contents && "contents not initialized"); 5204eeddc0SDimitry Andric 5304eeddc0SDimitry Andric // Double-checked locking. 5481ad6265SDimitry Andric if (Contents->DepDirectives.load()) 5581ad6265SDimitry Andric return EntryRef(Filename, Entry); 5604eeddc0SDimitry Andric 5704eeddc0SDimitry Andric std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 5804eeddc0SDimitry Andric 5904eeddc0SDimitry Andric // Double-checked locking. 6081ad6265SDimitry Andric if (Contents->DepDirectives.load()) 6181ad6265SDimitry Andric return EntryRef(Filename, Entry); 62a7dea167SDimitry Andric 6381ad6265SDimitry Andric SmallVector<dependency_directives_scan::Directive, 64> Directives; 6481ad6265SDimitry Andric // Scan the file for preprocessor directives that might affect the 6581ad6265SDimitry Andric // dependencies. 6681ad6265SDimitry Andric if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), 6781ad6265SDimitry Andric Contents->DepDirectiveTokens, 6881ad6265SDimitry Andric Directives)) { 6981ad6265SDimitry Andric Contents->DepDirectiveTokens.clear(); 700eae32dcSDimitry Andric // FIXME: Propagate the diagnostic if desired by the client. 71*bdd1243dSDimitry Andric Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>()); 7281ad6265SDimitry Andric return EntryRef(Filename, Entry); 73a7dea167SDimitry Andric } 74a7dea167SDimitry Andric 7581ad6265SDimitry Andric // This function performed double-checked locking using `DepDirectives`. 7681ad6265SDimitry Andric // Assigning it must be the last thing this function does, otherwise other 7781ad6265SDimitry Andric // threads may skip the 7881ad6265SDimitry Andric // critical section (`DepDirectives != nullptr`), leading to a data race. 7981ad6265SDimitry Andric Contents->DepDirectives.store( 80*bdd1243dSDimitry Andric new std::optional<DependencyDirectivesTy>(std::move(Directives))); 8181ad6265SDimitry Andric return EntryRef(Filename, Entry); 82a7dea167SDimitry Andric } 83a7dea167SDimitry Andric 840eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache:: 850eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache() { 86a7dea167SDimitry Andric // This heuristic was chosen using a empirical testing on a 87a7dea167SDimitry Andric // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 88a7dea167SDimitry Andric // sharding gives a performance edge by reducing the lock contention. 89a7dea167SDimitry Andric // FIXME: A better heuristic might also consider the OS to account for 90a7dea167SDimitry Andric // the different cost of lock contention on different OSes. 915ffd83dbSDimitry Andric NumShards = 925ffd83dbSDimitry Andric std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 93a7dea167SDimitry Andric CacheShards = std::make_unique<CacheShard[]>(NumShards); 94a7dea167SDimitry Andric } 95a7dea167SDimitry Andric 9604eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard & 9704eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::getShardForFilename( 9804eeddc0SDimitry Andric StringRef Filename) const { 9904eeddc0SDimitry Andric return CacheShards[llvm::hash_value(Filename) % NumShards]; 10004eeddc0SDimitry Andric } 10104eeddc0SDimitry Andric 10204eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard & 10304eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::getShardForUID( 10404eeddc0SDimitry Andric llvm::sys::fs::UniqueID UID) const { 10504eeddc0SDimitry Andric auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 10604eeddc0SDimitry Andric return CacheShards[Hash % NumShards]; 10704eeddc0SDimitry Andric } 10804eeddc0SDimitry Andric 10904eeddc0SDimitry Andric const CachedFileSystemEntry * 11004eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 11104eeddc0SDimitry Andric StringRef Filename) const { 11204eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 11304eeddc0SDimitry Andric auto It = EntriesByFilename.find(Filename); 11404eeddc0SDimitry Andric return It == EntriesByFilename.end() ? nullptr : It->getValue(); 11504eeddc0SDimitry Andric } 11604eeddc0SDimitry Andric 11704eeddc0SDimitry Andric const CachedFileSystemEntry * 11804eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 11904eeddc0SDimitry Andric llvm::sys::fs::UniqueID UID) const { 12004eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 12104eeddc0SDimitry Andric auto It = EntriesByUID.find(UID); 12204eeddc0SDimitry Andric return It == EntriesByUID.end() ? nullptr : It->getSecond(); 12304eeddc0SDimitry Andric } 12404eeddc0SDimitry Andric 12504eeddc0SDimitry Andric const CachedFileSystemEntry & 12604eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard:: 12704eeddc0SDimitry Andric getOrEmplaceEntryForFilename(StringRef Filename, 12804eeddc0SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> Stat) { 12904eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 13004eeddc0SDimitry Andric auto Insertion = EntriesByFilename.insert({Filename, nullptr}); 13104eeddc0SDimitry Andric if (Insertion.second) 13204eeddc0SDimitry Andric Insertion.first->second = 13304eeddc0SDimitry Andric new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 13404eeddc0SDimitry Andric return *Insertion.first->second; 13504eeddc0SDimitry Andric } 13604eeddc0SDimitry Andric 13704eeddc0SDimitry Andric const CachedFileSystemEntry & 13804eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 13904eeddc0SDimitry Andric llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 14004eeddc0SDimitry Andric std::unique_ptr<llvm::MemoryBuffer> Contents) { 14104eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 14204eeddc0SDimitry Andric auto Insertion = EntriesByUID.insert({UID, nullptr}); 14304eeddc0SDimitry Andric if (Insertion.second) { 14404eeddc0SDimitry Andric CachedFileContents *StoredContents = nullptr; 14504eeddc0SDimitry Andric if (Contents) 14604eeddc0SDimitry Andric StoredContents = new (ContentsStorage.Allocate()) 14704eeddc0SDimitry Andric CachedFileContents(std::move(Contents)); 14804eeddc0SDimitry Andric Insertion.first->second = new (EntryStorage.Allocate()) 14904eeddc0SDimitry Andric CachedFileSystemEntry(std::move(Stat), StoredContents); 15004eeddc0SDimitry Andric } 15104eeddc0SDimitry Andric return *Insertion.first->second; 15204eeddc0SDimitry Andric } 15304eeddc0SDimitry Andric 15404eeddc0SDimitry Andric const CachedFileSystemEntry & 15504eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard:: 15604eeddc0SDimitry Andric getOrInsertEntryForFilename(StringRef Filename, 15704eeddc0SDimitry Andric const CachedFileSystemEntry &Entry) { 15804eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 15904eeddc0SDimitry Andric return *EntriesByFilename.insert({Filename, &Entry}).first->getValue(); 160a7dea167SDimitry Andric } 161a7dea167SDimitry Andric 162480093f4SDimitry Andric /// Whitelist file extensions that should be minimized, treating no extension as 163480093f4SDimitry Andric /// a source file that should be minimized. 164480093f4SDimitry Andric /// 165480093f4SDimitry Andric /// This is kinda hacky, it would be better if we knew what kind of file Clang 166480093f4SDimitry Andric /// was expecting instead. 16781ad6265SDimitry Andric static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) { 168480093f4SDimitry Andric StringRef Ext = llvm::sys::path::extension(Filename); 169480093f4SDimitry Andric if (Ext.empty()) 170480093f4SDimitry Andric return true; // C++ standard library 171480093f4SDimitry Andric return llvm::StringSwitch<bool>(Ext) 172480093f4SDimitry Andric .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 173480093f4SDimitry Andric .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 174480093f4SDimitry Andric .CasesLower(".m", ".mm", true) 175480093f4SDimitry Andric .CasesLower(".i", ".ii", ".mi", ".mmi", true) 176480093f4SDimitry Andric .CasesLower(".def", ".inc", true) 177480093f4SDimitry Andric .Default(false); 178480093f4SDimitry Andric } 179480093f4SDimitry Andric 180480093f4SDimitry Andric static bool shouldCacheStatFailures(StringRef Filename) { 181480093f4SDimitry Andric StringRef Ext = llvm::sys::path::extension(Filename); 182480093f4SDimitry Andric if (Ext.empty()) 183480093f4SDimitry Andric return false; // This may be the module cache directory. 1844824e7fdSDimitry Andric // Only cache stat failures on source files. 18581ad6265SDimitry Andric return shouldScanForDirectivesBasedOnExtension(Filename); 186480093f4SDimitry Andric } 187480093f4SDimitry Andric 18881ad6265SDimitry Andric bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( 18904eeddc0SDimitry Andric StringRef Filename) { 19081ad6265SDimitry Andric return shouldScanForDirectivesBasedOnExtension(Filename); 1914824e7fdSDimitry Andric } 1924824e7fdSDimitry Andric 19304eeddc0SDimitry Andric const CachedFileSystemEntry & 19404eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 19504eeddc0SDimitry Andric TentativeEntry TEntry) { 19604eeddc0SDimitry Andric auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 19704eeddc0SDimitry Andric return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 19804eeddc0SDimitry Andric std::move(TEntry.Status), 19904eeddc0SDimitry Andric std::move(TEntry.Contents)); 20004eeddc0SDimitry Andric } 20104eeddc0SDimitry Andric 20204eeddc0SDimitry Andric const CachedFileSystemEntry * 20304eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 20404eeddc0SDimitry Andric StringRef Filename) { 20504eeddc0SDimitry Andric if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 20604eeddc0SDimitry Andric return Entry; 20704eeddc0SDimitry Andric auto &Shard = SharedCache.getShardForFilename(Filename); 20804eeddc0SDimitry Andric if (const auto *Entry = Shard.findEntryByFilename(Filename)) 20904eeddc0SDimitry Andric return &LocalCache.insertEntryForFilename(Filename, *Entry); 21004eeddc0SDimitry Andric return nullptr; 21104eeddc0SDimitry Andric } 21204eeddc0SDimitry Andric 21304eeddc0SDimitry Andric llvm::ErrorOr<const CachedFileSystemEntry &> 21404eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { 21504eeddc0SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename); 21604eeddc0SDimitry Andric if (!Stat) { 21704eeddc0SDimitry Andric if (!shouldCacheStatFailures(Filename)) 21804eeddc0SDimitry Andric return Stat.getError(); 21904eeddc0SDimitry Andric const auto &Entry = 22004eeddc0SDimitry Andric getOrEmplaceSharedEntryForFilename(Filename, Stat.getError()); 22104eeddc0SDimitry Andric return insertLocalEntryForFilename(Filename, Entry); 22204eeddc0SDimitry Andric } 22304eeddc0SDimitry Andric 22404eeddc0SDimitry Andric if (const auto *Entry = findSharedEntryByUID(*Stat)) 22504eeddc0SDimitry Andric return insertLocalEntryForFilename(Filename, *Entry); 22604eeddc0SDimitry Andric 22704eeddc0SDimitry Andric auto TEntry = 22804eeddc0SDimitry Andric Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename); 22904eeddc0SDimitry Andric 23004eeddc0SDimitry Andric const CachedFileSystemEntry *SharedEntry = [&]() { 23104eeddc0SDimitry Andric if (TEntry) { 23204eeddc0SDimitry Andric const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 23304eeddc0SDimitry Andric return &getOrInsertSharedEntryForFilename(Filename, UIDEntry); 23404eeddc0SDimitry Andric } 23504eeddc0SDimitry Andric return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError()); 23604eeddc0SDimitry Andric }(); 23704eeddc0SDimitry Andric 23804eeddc0SDimitry Andric return insertLocalEntryForFilename(Filename, *SharedEntry); 239fe6060f1SDimitry Andric } 240fe6060f1SDimitry Andric 2410eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> 242a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 24381ad6265SDimitry Andric StringRef Filename, bool DisableDirectivesScanning) { 24404eeddc0SDimitry Andric if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) 24581ad6265SDimitry Andric return scanForDirectivesIfNecessary(*Entry, Filename, 24681ad6265SDimitry Andric DisableDirectivesScanning) 24704eeddc0SDimitry Andric .unwrapError(); 24804eeddc0SDimitry Andric auto MaybeEntry = computeAndStoreResult(Filename); 24904eeddc0SDimitry Andric if (!MaybeEntry) 25004eeddc0SDimitry Andric return MaybeEntry.getError(); 25181ad6265SDimitry Andric return scanForDirectivesIfNecessary(*MaybeEntry, Filename, 25281ad6265SDimitry Andric DisableDirectivesScanning) 25304eeddc0SDimitry Andric .unwrapError(); 254a7dea167SDimitry Andric } 255a7dea167SDimitry Andric 256a7dea167SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> 257a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::status(const Twine &Path) { 258a7dea167SDimitry Andric SmallString<256> OwnedFilename; 259a7dea167SDimitry Andric StringRef Filename = Path.toStringRef(OwnedFilename); 2600eae32dcSDimitry Andric 2610eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 262a7dea167SDimitry Andric if (!Result) 263a7dea167SDimitry Andric return Result.getError(); 2640eae32dcSDimitry Andric return Result->getStatus(); 265a7dea167SDimitry Andric } 266a7dea167SDimitry Andric 267a7dea167SDimitry Andric namespace { 268a7dea167SDimitry Andric 269a7dea167SDimitry Andric /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 270a7dea167SDimitry Andric /// this subclass. 27181ad6265SDimitry Andric class DepScanFile final : public llvm::vfs::File { 272a7dea167SDimitry Andric public: 27381ad6265SDimitry Andric DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 274a7dea167SDimitry Andric llvm::vfs::Status Stat) 275a7dea167SDimitry Andric : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 276a7dea167SDimitry Andric 27781ad6265SDimitry Andric static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); 278a7dea167SDimitry Andric 279e8d8bef9SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 280a7dea167SDimitry Andric 281a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 282a7dea167SDimitry Andric getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 283a7dea167SDimitry Andric bool IsVolatile) override { 284a7dea167SDimitry Andric return std::move(Buffer); 285a7dea167SDimitry Andric } 286a7dea167SDimitry Andric 287a7dea167SDimitry Andric std::error_code close() override { return {}; } 288a7dea167SDimitry Andric 289a7dea167SDimitry Andric private: 290a7dea167SDimitry Andric std::unique_ptr<llvm::MemoryBuffer> Buffer; 291a7dea167SDimitry Andric llvm::vfs::Status Stat; 292a7dea167SDimitry Andric }; 293a7dea167SDimitry Andric 294e8d8bef9SDimitry Andric } // end anonymous namespace 295e8d8bef9SDimitry Andric 29681ad6265SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 29781ad6265SDimitry Andric DepScanFile::create(EntryRef Entry) { 29804eeddc0SDimitry Andric assert(!Entry.isError() && "error"); 29904eeddc0SDimitry Andric 3000eae32dcSDimitry Andric if (Entry.isDirectory()) 3010eae32dcSDimitry Andric return std::make_error_code(std::errc::is_a_directory); 3020eae32dcSDimitry Andric 30381ad6265SDimitry Andric auto Result = std::make_unique<DepScanFile>( 30404eeddc0SDimitry Andric llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 30504eeddc0SDimitry Andric Entry.getStatus().getName(), 306a7dea167SDimitry Andric /*RequiresNullTerminator=*/false), 30704eeddc0SDimitry Andric Entry.getStatus()); 3080eae32dcSDimitry Andric 309a7dea167SDimitry Andric return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 310a7dea167SDimitry Andric std::unique_ptr<llvm::vfs::File>(std::move(Result))); 311a7dea167SDimitry Andric } 312a7dea167SDimitry Andric 313a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 314a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 315a7dea167SDimitry Andric SmallString<256> OwnedFilename; 316a7dea167SDimitry Andric StringRef Filename = Path.toStringRef(OwnedFilename); 317a7dea167SDimitry Andric 3180eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 319a7dea167SDimitry Andric if (!Result) 320a7dea167SDimitry Andric return Result.getError(); 32181ad6265SDimitry Andric return DepScanFile::create(Result.get()); 322a7dea167SDimitry Andric } 323