1a7dea167SDimitry Andric //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2a7dea167SDimitry Andric // 3a7dea167SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a7dea167SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5a7dea167SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a7dea167SDimitry Andric // 7a7dea167SDimitry Andric //===----------------------------------------------------------------------===// 8a7dea167SDimitry Andric 9a7dea167SDimitry Andric #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10a7dea167SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 110eae32dcSDimitry Andric #include "llvm/Support/SmallVectorMemoryBuffer.h" 12a7dea167SDimitry Andric #include "llvm/Support/Threading.h" 13a7dea167SDimitry Andric 14a7dea167SDimitry Andric using namespace clang; 15a7dea167SDimitry Andric using namespace tooling; 16a7dea167SDimitry Andric using namespace dependencies; 17a7dea167SDimitry Andric 1804eeddc0SDimitry Andric llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 1904eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 20a7dea167SDimitry Andric // Load the file and its content from the file system. 2104eeddc0SDimitry Andric auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 22a7dea167SDimitry Andric if (!MaybeFile) 23a7dea167SDimitry Andric return MaybeFile.getError(); 240eae32dcSDimitry Andric auto File = std::move(*MaybeFile); 25a7dea167SDimitry Andric 260eae32dcSDimitry Andric auto MaybeStat = File->status(); 270eae32dcSDimitry Andric if (!MaybeStat) 280eae32dcSDimitry Andric return MaybeStat.getError(); 290eae32dcSDimitry Andric auto Stat = std::move(*MaybeStat); 300eae32dcSDimitry Andric 310eae32dcSDimitry Andric auto MaybeBuffer = File->getBuffer(Stat.getName()); 32a7dea167SDimitry Andric if (!MaybeBuffer) 33a7dea167SDimitry Andric return MaybeBuffer.getError(); 340eae32dcSDimitry Andric auto Buffer = std::move(*MaybeBuffer); 350eae32dcSDimitry Andric 3604eeddc0SDimitry Andric // If the file size changed between read and stat, pretend it didn't. 3704eeddc0SDimitry Andric if (Stat.getSize() != Buffer->getBufferSize()) 3804eeddc0SDimitry Andric Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 3904eeddc0SDimitry Andric 4004eeddc0SDimitry Andric return TentativeEntry(Stat, std::move(Buffer)); 410eae32dcSDimitry Andric } 420eae32dcSDimitry Andric 43*81ad6265SDimitry Andric EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( 4404eeddc0SDimitry Andric const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { 4504eeddc0SDimitry Andric if (Entry.isError() || Entry.isDirectory() || Disable || 46*81ad6265SDimitry Andric !shouldScanForDirectives(Filename)) 47*81ad6265SDimitry Andric return EntryRef(Filename, Entry); 4804eeddc0SDimitry Andric 49*81ad6265SDimitry Andric CachedFileContents *Contents = Entry.getCachedContents(); 5004eeddc0SDimitry Andric assert(Contents && "contents not initialized"); 5104eeddc0SDimitry Andric 5204eeddc0SDimitry Andric // Double-checked locking. 53*81ad6265SDimitry Andric if (Contents->DepDirectives.load()) 54*81ad6265SDimitry Andric return EntryRef(Filename, Entry); 5504eeddc0SDimitry Andric 5604eeddc0SDimitry Andric std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 5704eeddc0SDimitry Andric 5804eeddc0SDimitry Andric // Double-checked locking. 59*81ad6265SDimitry Andric if (Contents->DepDirectives.load()) 60*81ad6265SDimitry Andric return EntryRef(Filename, Entry); 61a7dea167SDimitry Andric 62*81ad6265SDimitry Andric SmallVector<dependency_directives_scan::Directive, 64> Directives; 63*81ad6265SDimitry Andric // Scan the file for preprocessor directives that might affect the 64*81ad6265SDimitry Andric // dependencies. 65*81ad6265SDimitry Andric if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), 66*81ad6265SDimitry Andric Contents->DepDirectiveTokens, 67*81ad6265SDimitry Andric Directives)) { 68*81ad6265SDimitry Andric Contents->DepDirectiveTokens.clear(); 690eae32dcSDimitry Andric // FIXME: Propagate the diagnostic if desired by the client. 70*81ad6265SDimitry Andric Contents->DepDirectives.store(new Optional<DependencyDirectivesTy>()); 71*81ad6265SDimitry Andric return EntryRef(Filename, Entry); 72a7dea167SDimitry Andric } 73a7dea167SDimitry Andric 74*81ad6265SDimitry Andric // This function performed double-checked locking using `DepDirectives`. 75*81ad6265SDimitry Andric // Assigning it must be the last thing this function does, otherwise other 76*81ad6265SDimitry Andric // threads may skip the 77*81ad6265SDimitry Andric // critical section (`DepDirectives != nullptr`), leading to a data race. 78*81ad6265SDimitry Andric Contents->DepDirectives.store( 79*81ad6265SDimitry Andric new Optional<DependencyDirectivesTy>(std::move(Directives))); 80*81ad6265SDimitry Andric return EntryRef(Filename, Entry); 81a7dea167SDimitry Andric } 82a7dea167SDimitry Andric 830eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache:: 840eae32dcSDimitry Andric DependencyScanningFilesystemSharedCache() { 85a7dea167SDimitry Andric // This heuristic was chosen using a empirical testing on a 86a7dea167SDimitry Andric // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 87a7dea167SDimitry Andric // sharding gives a performance edge by reducing the lock contention. 88a7dea167SDimitry Andric // FIXME: A better heuristic might also consider the OS to account for 89a7dea167SDimitry Andric // the different cost of lock contention on different OSes. 905ffd83dbSDimitry Andric NumShards = 915ffd83dbSDimitry Andric std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 92a7dea167SDimitry Andric CacheShards = std::make_unique<CacheShard[]>(NumShards); 93a7dea167SDimitry Andric } 94a7dea167SDimitry Andric 9504eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard & 9604eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::getShardForFilename( 9704eeddc0SDimitry Andric StringRef Filename) const { 9804eeddc0SDimitry Andric return CacheShards[llvm::hash_value(Filename) % NumShards]; 9904eeddc0SDimitry Andric } 10004eeddc0SDimitry Andric 10104eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard & 10204eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::getShardForUID( 10304eeddc0SDimitry Andric llvm::sys::fs::UniqueID UID) const { 10404eeddc0SDimitry Andric auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 10504eeddc0SDimitry Andric return CacheShards[Hash % NumShards]; 10604eeddc0SDimitry Andric } 10704eeddc0SDimitry Andric 10804eeddc0SDimitry Andric const CachedFileSystemEntry * 10904eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 11004eeddc0SDimitry Andric StringRef Filename) const { 11104eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 11204eeddc0SDimitry Andric auto It = EntriesByFilename.find(Filename); 11304eeddc0SDimitry Andric return It == EntriesByFilename.end() ? nullptr : It->getValue(); 11404eeddc0SDimitry Andric } 11504eeddc0SDimitry Andric 11604eeddc0SDimitry Andric const CachedFileSystemEntry * 11704eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 11804eeddc0SDimitry Andric llvm::sys::fs::UniqueID UID) const { 11904eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 12004eeddc0SDimitry Andric auto It = EntriesByUID.find(UID); 12104eeddc0SDimitry Andric return It == EntriesByUID.end() ? nullptr : It->getSecond(); 12204eeddc0SDimitry Andric } 12304eeddc0SDimitry Andric 12404eeddc0SDimitry Andric const CachedFileSystemEntry & 12504eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard:: 12604eeddc0SDimitry Andric getOrEmplaceEntryForFilename(StringRef Filename, 12704eeddc0SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> Stat) { 12804eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 12904eeddc0SDimitry Andric auto Insertion = EntriesByFilename.insert({Filename, nullptr}); 13004eeddc0SDimitry Andric if (Insertion.second) 13104eeddc0SDimitry Andric Insertion.first->second = 13204eeddc0SDimitry Andric new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 13304eeddc0SDimitry Andric return *Insertion.first->second; 13404eeddc0SDimitry Andric } 13504eeddc0SDimitry Andric 13604eeddc0SDimitry Andric const CachedFileSystemEntry & 13704eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 13804eeddc0SDimitry Andric llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 13904eeddc0SDimitry Andric std::unique_ptr<llvm::MemoryBuffer> Contents) { 14004eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 14104eeddc0SDimitry Andric auto Insertion = EntriesByUID.insert({UID, nullptr}); 14204eeddc0SDimitry Andric if (Insertion.second) { 14304eeddc0SDimitry Andric CachedFileContents *StoredContents = nullptr; 14404eeddc0SDimitry Andric if (Contents) 14504eeddc0SDimitry Andric StoredContents = new (ContentsStorage.Allocate()) 14604eeddc0SDimitry Andric CachedFileContents(std::move(Contents)); 14704eeddc0SDimitry Andric Insertion.first->second = new (EntryStorage.Allocate()) 14804eeddc0SDimitry Andric CachedFileSystemEntry(std::move(Stat), StoredContents); 14904eeddc0SDimitry Andric } 15004eeddc0SDimitry Andric return *Insertion.first->second; 15104eeddc0SDimitry Andric } 15204eeddc0SDimitry Andric 15304eeddc0SDimitry Andric const CachedFileSystemEntry & 15404eeddc0SDimitry Andric DependencyScanningFilesystemSharedCache::CacheShard:: 15504eeddc0SDimitry Andric getOrInsertEntryForFilename(StringRef Filename, 15604eeddc0SDimitry Andric const CachedFileSystemEntry &Entry) { 15704eeddc0SDimitry Andric std::lock_guard<std::mutex> LockGuard(CacheLock); 15804eeddc0SDimitry Andric return *EntriesByFilename.insert({Filename, &Entry}).first->getValue(); 159a7dea167SDimitry Andric } 160a7dea167SDimitry Andric 161480093f4SDimitry Andric /// Whitelist file extensions that should be minimized, treating no extension as 162480093f4SDimitry Andric /// a source file that should be minimized. 163480093f4SDimitry Andric /// 164480093f4SDimitry Andric /// This is kinda hacky, it would be better if we knew what kind of file Clang 165480093f4SDimitry Andric /// was expecting instead. 166*81ad6265SDimitry Andric static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) { 167480093f4SDimitry Andric StringRef Ext = llvm::sys::path::extension(Filename); 168480093f4SDimitry Andric if (Ext.empty()) 169480093f4SDimitry Andric return true; // C++ standard library 170480093f4SDimitry Andric return llvm::StringSwitch<bool>(Ext) 171480093f4SDimitry Andric .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 172480093f4SDimitry Andric .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 173480093f4SDimitry Andric .CasesLower(".m", ".mm", true) 174480093f4SDimitry Andric .CasesLower(".i", ".ii", ".mi", ".mmi", true) 175480093f4SDimitry Andric .CasesLower(".def", ".inc", true) 176480093f4SDimitry Andric .Default(false); 177480093f4SDimitry Andric } 178480093f4SDimitry Andric 179480093f4SDimitry Andric static bool shouldCacheStatFailures(StringRef Filename) { 180480093f4SDimitry Andric StringRef Ext = llvm::sys::path::extension(Filename); 181480093f4SDimitry Andric if (Ext.empty()) 182480093f4SDimitry Andric return false; // This may be the module cache directory. 1834824e7fdSDimitry Andric // Only cache stat failures on source files. 184*81ad6265SDimitry Andric return shouldScanForDirectivesBasedOnExtension(Filename); 185480093f4SDimitry Andric } 186480093f4SDimitry Andric 187*81ad6265SDimitry Andric bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( 18804eeddc0SDimitry Andric StringRef Filename) { 189*81ad6265SDimitry Andric return shouldScanForDirectivesBasedOnExtension(Filename); 1904824e7fdSDimitry Andric } 1914824e7fdSDimitry Andric 19204eeddc0SDimitry Andric const CachedFileSystemEntry & 19304eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 19404eeddc0SDimitry Andric TentativeEntry TEntry) { 19504eeddc0SDimitry Andric auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 19604eeddc0SDimitry Andric return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 19704eeddc0SDimitry Andric std::move(TEntry.Status), 19804eeddc0SDimitry Andric std::move(TEntry.Contents)); 19904eeddc0SDimitry Andric } 20004eeddc0SDimitry Andric 20104eeddc0SDimitry Andric const CachedFileSystemEntry * 20204eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 20304eeddc0SDimitry Andric StringRef Filename) { 20404eeddc0SDimitry Andric if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 20504eeddc0SDimitry Andric return Entry; 20604eeddc0SDimitry Andric auto &Shard = SharedCache.getShardForFilename(Filename); 20704eeddc0SDimitry Andric if (const auto *Entry = Shard.findEntryByFilename(Filename)) 20804eeddc0SDimitry Andric return &LocalCache.insertEntryForFilename(Filename, *Entry); 20904eeddc0SDimitry Andric return nullptr; 21004eeddc0SDimitry Andric } 21104eeddc0SDimitry Andric 21204eeddc0SDimitry Andric llvm::ErrorOr<const CachedFileSystemEntry &> 21304eeddc0SDimitry Andric DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { 21404eeddc0SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename); 21504eeddc0SDimitry Andric if (!Stat) { 21604eeddc0SDimitry Andric if (!shouldCacheStatFailures(Filename)) 21704eeddc0SDimitry Andric return Stat.getError(); 21804eeddc0SDimitry Andric const auto &Entry = 21904eeddc0SDimitry Andric getOrEmplaceSharedEntryForFilename(Filename, Stat.getError()); 22004eeddc0SDimitry Andric return insertLocalEntryForFilename(Filename, Entry); 22104eeddc0SDimitry Andric } 22204eeddc0SDimitry Andric 22304eeddc0SDimitry Andric if (const auto *Entry = findSharedEntryByUID(*Stat)) 22404eeddc0SDimitry Andric return insertLocalEntryForFilename(Filename, *Entry); 22504eeddc0SDimitry Andric 22604eeddc0SDimitry Andric auto TEntry = 22704eeddc0SDimitry Andric Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename); 22804eeddc0SDimitry Andric 22904eeddc0SDimitry Andric const CachedFileSystemEntry *SharedEntry = [&]() { 23004eeddc0SDimitry Andric if (TEntry) { 23104eeddc0SDimitry Andric const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 23204eeddc0SDimitry Andric return &getOrInsertSharedEntryForFilename(Filename, UIDEntry); 23304eeddc0SDimitry Andric } 23404eeddc0SDimitry Andric return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError()); 23504eeddc0SDimitry Andric }(); 23604eeddc0SDimitry Andric 23704eeddc0SDimitry Andric return insertLocalEntryForFilename(Filename, *SharedEntry); 238fe6060f1SDimitry Andric } 239fe6060f1SDimitry Andric 2400eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> 241a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 242*81ad6265SDimitry Andric StringRef Filename, bool DisableDirectivesScanning) { 24304eeddc0SDimitry Andric if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) 244*81ad6265SDimitry Andric return scanForDirectivesIfNecessary(*Entry, Filename, 245*81ad6265SDimitry Andric DisableDirectivesScanning) 24604eeddc0SDimitry Andric .unwrapError(); 24704eeddc0SDimitry Andric auto MaybeEntry = computeAndStoreResult(Filename); 24804eeddc0SDimitry Andric if (!MaybeEntry) 24904eeddc0SDimitry Andric return MaybeEntry.getError(); 250*81ad6265SDimitry Andric return scanForDirectivesIfNecessary(*MaybeEntry, Filename, 251*81ad6265SDimitry Andric DisableDirectivesScanning) 25204eeddc0SDimitry Andric .unwrapError(); 253a7dea167SDimitry Andric } 254a7dea167SDimitry Andric 255a7dea167SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> 256a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::status(const Twine &Path) { 257a7dea167SDimitry Andric SmallString<256> OwnedFilename; 258a7dea167SDimitry Andric StringRef Filename = Path.toStringRef(OwnedFilename); 2590eae32dcSDimitry Andric 2600eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 261a7dea167SDimitry Andric if (!Result) 262a7dea167SDimitry Andric return Result.getError(); 2630eae32dcSDimitry Andric return Result->getStatus(); 264a7dea167SDimitry Andric } 265a7dea167SDimitry Andric 266a7dea167SDimitry Andric namespace { 267a7dea167SDimitry Andric 268a7dea167SDimitry Andric /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 269a7dea167SDimitry Andric /// this subclass. 270*81ad6265SDimitry Andric class DepScanFile final : public llvm::vfs::File { 271a7dea167SDimitry Andric public: 272*81ad6265SDimitry Andric DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 273a7dea167SDimitry Andric llvm::vfs::Status Stat) 274a7dea167SDimitry Andric : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 275a7dea167SDimitry Andric 276*81ad6265SDimitry Andric static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); 277a7dea167SDimitry Andric 278e8d8bef9SDimitry Andric llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 279a7dea167SDimitry Andric 280a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 281a7dea167SDimitry Andric getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 282a7dea167SDimitry Andric bool IsVolatile) override { 283a7dea167SDimitry Andric return std::move(Buffer); 284a7dea167SDimitry Andric } 285a7dea167SDimitry Andric 286a7dea167SDimitry Andric std::error_code close() override { return {}; } 287a7dea167SDimitry Andric 288a7dea167SDimitry Andric private: 289a7dea167SDimitry Andric std::unique_ptr<llvm::MemoryBuffer> Buffer; 290a7dea167SDimitry Andric llvm::vfs::Status Stat; 291a7dea167SDimitry Andric }; 292a7dea167SDimitry Andric 293e8d8bef9SDimitry Andric } // end anonymous namespace 294e8d8bef9SDimitry Andric 295*81ad6265SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 296*81ad6265SDimitry Andric DepScanFile::create(EntryRef Entry) { 29704eeddc0SDimitry Andric assert(!Entry.isError() && "error"); 29804eeddc0SDimitry Andric 2990eae32dcSDimitry Andric if (Entry.isDirectory()) 3000eae32dcSDimitry Andric return std::make_error_code(std::errc::is_a_directory); 3010eae32dcSDimitry Andric 302*81ad6265SDimitry Andric auto Result = std::make_unique<DepScanFile>( 30304eeddc0SDimitry Andric llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 30404eeddc0SDimitry Andric Entry.getStatus().getName(), 305a7dea167SDimitry Andric /*RequiresNullTerminator=*/false), 30604eeddc0SDimitry Andric Entry.getStatus()); 3070eae32dcSDimitry Andric 308a7dea167SDimitry Andric return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 309a7dea167SDimitry Andric std::unique_ptr<llvm::vfs::File>(std::move(Result))); 310a7dea167SDimitry Andric } 311a7dea167SDimitry Andric 312a7dea167SDimitry Andric llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 313a7dea167SDimitry Andric DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 314a7dea167SDimitry Andric SmallString<256> OwnedFilename; 315a7dea167SDimitry Andric StringRef Filename = Path.toStringRef(OwnedFilename); 316a7dea167SDimitry Andric 3170eae32dcSDimitry Andric llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 318a7dea167SDimitry Andric if (!Result) 319a7dea167SDimitry Andric return Result.getError(); 320*81ad6265SDimitry Andric return DepScanFile::create(Result.get()); 321a7dea167SDimitry Andric } 322