1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "llvm/Support/MemoryBuffer.h" 11 #include "llvm/Support/Threading.h" 12 #include <optional> 13 14 using namespace clang; 15 using namespace tooling; 16 using namespace dependencies; 17 18 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 19 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 20 // Load the file and its content from the file system. 21 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 22 if (!MaybeFile) 23 return MaybeFile.getError(); 24 auto File = std::move(*MaybeFile); 25 26 auto MaybeStat = File->status(); 27 if (!MaybeStat) 28 return MaybeStat.getError(); 29 auto Stat = std::move(*MaybeStat); 30 31 auto MaybeBuffer = File->getBuffer(Stat.getName()); 32 if (!MaybeBuffer) 33 return MaybeBuffer.getError(); 34 auto Buffer = std::move(*MaybeBuffer); 35 36 // If the file size changed between read and stat, pretend it didn't. 37 if (Stat.getSize() != Buffer->getBufferSize()) 38 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 39 40 return TentativeEntry(Stat, std::move(Buffer)); 41 } 42 43 bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated( 44 EntryRef Ref) { 45 auto &Entry = Ref.Entry; 46 47 if (Entry.isError() || Entry.isDirectory()) 48 return false; 49 50 CachedFileContents *Contents = Entry.getCachedContents(); 51 assert(Contents && "contents not initialized"); 52 53 // Double-checked locking. 54 if (Contents->DepDirectives.load()) 55 return true; 56 57 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 58 59 // Double-checked locking. 60 if (Contents->DepDirectives.load()) 61 return true; 62 63 SmallVector<dependency_directives_scan::Directive, 64> Directives; 64 // Scan the file for preprocessor directives that might affect the 65 // dependencies. 66 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), 67 Contents->DepDirectiveTokens, 68 Directives)) { 69 Contents->DepDirectiveTokens.clear(); 70 // FIXME: Propagate the diagnostic if desired by the client. 71 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>()); 72 return false; 73 } 74 75 // This function performed double-checked locking using `DepDirectives`. 76 // Assigning it must be the last thing this function does, otherwise other 77 // threads may skip the critical section (`DepDirectives != nullptr`), leading 78 // to a data race. 79 Contents->DepDirectives.store( 80 new std::optional<DependencyDirectivesTy>(std::move(Directives))); 81 return true; 82 } 83 84 DependencyScanningFilesystemSharedCache:: 85 DependencyScanningFilesystemSharedCache() { 86 // This heuristic was chosen using a empirical testing on a 87 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 88 // sharding gives a performance edge by reducing the lock contention. 89 // FIXME: A better heuristic might also consider the OS to account for 90 // the different cost of lock contention on different OSes. 91 NumShards = 92 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 93 CacheShards = std::make_unique<CacheShard[]>(NumShards); 94 } 95 96 DependencyScanningFilesystemSharedCache::CacheShard & 97 DependencyScanningFilesystemSharedCache::getShardForFilename( 98 StringRef Filename) const { 99 assert(llvm::sys::path::is_absolute_gnu(Filename)); 100 return CacheShards[llvm::hash_value(Filename) % NumShards]; 101 } 102 103 DependencyScanningFilesystemSharedCache::CacheShard & 104 DependencyScanningFilesystemSharedCache::getShardForUID( 105 llvm::sys::fs::UniqueID UID) const { 106 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 107 return CacheShards[Hash % NumShards]; 108 } 109 110 std::vector<DependencyScanningFilesystemSharedCache::OutOfDateEntry> 111 DependencyScanningFilesystemSharedCache::getOutOfDateEntries( 112 llvm::vfs::FileSystem &UnderlyingFS) const { 113 // Iterate through all shards and look for cached stat errors. 114 std::vector<OutOfDateEntry> InvalidDiagInfo; 115 for (unsigned i = 0; i < NumShards; i++) { 116 const CacheShard &Shard = CacheShards[i]; 117 std::lock_guard<std::mutex> LockGuard(Shard.CacheLock); 118 for (const auto &[Path, CachedPair] : Shard.CacheByFilename) { 119 const CachedFileSystemEntry *Entry = CachedPair.first; 120 llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path); 121 if (Status) { 122 if (Entry->getError()) { 123 // This is the case where we have cached the non-existence 124 // of the file at Path first, and a file at the path is created 125 // later. The cache entry is not invalidated (as we have no good 126 // way to do it now), which may lead to missing file build errors. 127 InvalidDiagInfo.emplace_back(Path.data()); 128 } else { 129 llvm::vfs::Status CachedStatus = Entry->getStatus(); 130 if (Status->getType() == llvm::sys::fs::file_type::regular_file && 131 Status->getType() == CachedStatus.getType()) { 132 // We only check regular files. Directory files sizes could change 133 // due to content changes, and reporting directory size changes can 134 // lead to false positives. 135 // TODO: At the moment, we do not detect symlinks to files whose 136 // size may change. We need to decide if we want to detect cached 137 // symlink size changes. We can also expand this to detect file 138 // type changes. 139 uint64_t CachedSize = CachedStatus.getSize(); 140 uint64_t ActualSize = Status->getSize(); 141 if (CachedSize != ActualSize) { 142 // This is the case where the cached file has a different size 143 // from the actual file that comes from the underlying FS. 144 InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize); 145 } 146 } 147 } 148 } 149 } 150 } 151 return InvalidDiagInfo; 152 } 153 154 const CachedFileSystemEntry * 155 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 156 StringRef Filename) const { 157 assert(llvm::sys::path::is_absolute_gnu(Filename)); 158 std::lock_guard<std::mutex> LockGuard(CacheLock); 159 auto It = CacheByFilename.find(Filename); 160 return It == CacheByFilename.end() ? nullptr : It->getValue().first; 161 } 162 163 const CachedFileSystemEntry * 164 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 165 llvm::sys::fs::UniqueID UID) const { 166 std::lock_guard<std::mutex> LockGuard(CacheLock); 167 auto It = EntriesByUID.find(UID); 168 return It == EntriesByUID.end() ? nullptr : It->getSecond(); 169 } 170 171 const CachedFileSystemEntry & 172 DependencyScanningFilesystemSharedCache::CacheShard:: 173 getOrEmplaceEntryForFilename(StringRef Filename, 174 llvm::ErrorOr<llvm::vfs::Status> Stat) { 175 std::lock_guard<std::mutex> LockGuard(CacheLock); 176 auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}}); 177 auto &[CachedEntry, CachedRealPath] = It->getValue(); 178 if (!CachedEntry) { 179 // The entry is not present in the shared cache. Either the cache doesn't 180 // know about the file at all, or it only knows about its real path. 181 assert((Inserted || CachedRealPath) && "existing file with empty pair"); 182 CachedEntry = 183 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 184 } 185 return *CachedEntry; 186 } 187 188 const CachedFileSystemEntry & 189 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 190 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 191 std::unique_ptr<llvm::MemoryBuffer> Contents) { 192 std::lock_guard<std::mutex> LockGuard(CacheLock); 193 auto [It, Inserted] = EntriesByUID.try_emplace(UID); 194 auto &CachedEntry = It->getSecond(); 195 if (Inserted) { 196 CachedFileContents *StoredContents = nullptr; 197 if (Contents) 198 StoredContents = new (ContentsStorage.Allocate()) 199 CachedFileContents(std::move(Contents)); 200 CachedEntry = new (EntryStorage.Allocate()) 201 CachedFileSystemEntry(std::move(Stat), StoredContents); 202 } 203 return *CachedEntry; 204 } 205 206 const CachedFileSystemEntry & 207 DependencyScanningFilesystemSharedCache::CacheShard:: 208 getOrInsertEntryForFilename(StringRef Filename, 209 const CachedFileSystemEntry &Entry) { 210 std::lock_guard<std::mutex> LockGuard(CacheLock); 211 auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}}); 212 auto &[CachedEntry, CachedRealPath] = It->getValue(); 213 if (!Inserted || !CachedEntry) 214 CachedEntry = &Entry; 215 return *CachedEntry; 216 } 217 218 const CachedRealPath * 219 DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( 220 StringRef Filename) const { 221 assert(llvm::sys::path::is_absolute_gnu(Filename)); 222 std::lock_guard<std::mutex> LockGuard(CacheLock); 223 auto It = CacheByFilename.find(Filename); 224 return It == CacheByFilename.end() ? nullptr : It->getValue().second; 225 } 226 227 const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: 228 getOrEmplaceRealPathForFilename(StringRef Filename, 229 llvm::ErrorOr<llvm::StringRef> RealPath) { 230 std::lock_guard<std::mutex> LockGuard(CacheLock); 231 232 const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; 233 if (!StoredRealPath) { 234 auto OwnedRealPath = [&]() -> CachedRealPath { 235 if (!RealPath) 236 return RealPath.getError(); 237 return RealPath->str(); 238 }(); 239 240 StoredRealPath = new (RealPathStorage.Allocate()) 241 CachedRealPath(std::move(OwnedRealPath)); 242 } 243 244 return *StoredRealPath; 245 } 246 247 bool DependencyScanningWorkerFilesystem::shouldBypass(StringRef Path) const { 248 return BypassedPathPrefix && Path.starts_with(*BypassedPathPrefix); 249 } 250 251 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( 252 DependencyScanningFilesystemSharedCache &SharedCache, 253 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) 254 : llvm::RTTIExtends<DependencyScanningWorkerFilesystem, 255 llvm::vfs::ProxyFileSystem>(std::move(FS)), 256 SharedCache(SharedCache), 257 WorkingDirForCacheLookup(llvm::errc::invalid_argument) { 258 updateWorkingDirForCacheLookup(); 259 } 260 261 const CachedFileSystemEntry & 262 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 263 TentativeEntry TEntry) { 264 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 265 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 266 std::move(TEntry.Status), 267 std::move(TEntry.Contents)); 268 } 269 270 const CachedFileSystemEntry * 271 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 272 StringRef Filename) { 273 if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 274 return Entry; 275 auto &Shard = SharedCache.getShardForFilename(Filename); 276 if (const auto *Entry = Shard.findEntryByFilename(Filename)) 277 return &LocalCache.insertEntryForFilename(Filename, *Entry); 278 return nullptr; 279 } 280 281 llvm::ErrorOr<const CachedFileSystemEntry &> 282 DependencyScanningWorkerFilesystem::computeAndStoreResult( 283 StringRef OriginalFilename, StringRef FilenameForLookup) { 284 llvm::ErrorOr<llvm::vfs::Status> Stat = 285 getUnderlyingFS().status(OriginalFilename); 286 if (!Stat) { 287 const auto &Entry = 288 getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); 289 return insertLocalEntryForFilename(FilenameForLookup, Entry); 290 } 291 292 if (const auto *Entry = findSharedEntryByUID(*Stat)) 293 return insertLocalEntryForFilename(FilenameForLookup, *Entry); 294 295 auto TEntry = 296 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); 297 298 const CachedFileSystemEntry *SharedEntry = [&]() { 299 if (TEntry) { 300 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 301 return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); 302 } 303 return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, 304 TEntry.getError()); 305 }(); 306 307 return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); 308 } 309 310 llvm::ErrorOr<EntryRef> 311 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 312 StringRef OriginalFilename) { 313 SmallString<256> PathBuf; 314 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); 315 if (!FilenameForLookup) 316 return FilenameForLookup.getError(); 317 318 if (const auto *Entry = 319 findEntryByFilenameWithWriteThrough(*FilenameForLookup)) 320 return EntryRef(OriginalFilename, *Entry).unwrapError(); 321 auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup); 322 if (!MaybeEntry) 323 return MaybeEntry.getError(); 324 return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); 325 } 326 327 llvm::ErrorOr<llvm::vfs::Status> 328 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 329 SmallString<256> OwnedFilename; 330 StringRef Filename = Path.toStringRef(OwnedFilename); 331 332 if (shouldBypass(Filename)) 333 return getUnderlyingFS().status(Path); 334 335 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 336 if (!Result) 337 return Result.getError(); 338 return Result->getStatus(); 339 } 340 341 bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) { 342 // While some VFS overlay filesystems may implement more-efficient 343 // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem` 344 // typically wraps `RealFileSystem` which does not specialize `exists`, 345 // so it is not likely to benefit from such optimizations. Instead, 346 // it is more-valuable to have this query go through the 347 // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`. 348 llvm::ErrorOr<llvm::vfs::Status> Status = status(Path); 349 return Status && Status->exists(); 350 } 351 352 namespace { 353 354 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 355 /// this subclass. 356 class DepScanFile final : public llvm::vfs::File { 357 public: 358 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 359 llvm::vfs::Status Stat) 360 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 361 362 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); 363 364 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 365 366 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 367 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 368 bool IsVolatile) override { 369 return std::move(Buffer); 370 } 371 372 std::error_code close() override { return {}; } 373 374 private: 375 std::unique_ptr<llvm::MemoryBuffer> Buffer; 376 llvm::vfs::Status Stat; 377 }; 378 379 } // end anonymous namespace 380 381 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 382 DepScanFile::create(EntryRef Entry) { 383 assert(!Entry.isError() && "error"); 384 385 if (Entry.isDirectory()) 386 return std::make_error_code(std::errc::is_a_directory); 387 388 auto Result = std::make_unique<DepScanFile>( 389 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 390 Entry.getStatus().getName(), 391 /*RequiresNullTerminator=*/false), 392 Entry.getStatus()); 393 394 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 395 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 396 } 397 398 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 399 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 400 SmallString<256> OwnedFilename; 401 StringRef Filename = Path.toStringRef(OwnedFilename); 402 403 if (shouldBypass(Filename)) 404 return getUnderlyingFS().openFileForRead(Path); 405 406 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 407 if (!Result) 408 return Result.getError(); 409 return DepScanFile::create(Result.get()); 410 } 411 412 std::error_code 413 DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, 414 SmallVectorImpl<char> &Output) { 415 SmallString<256> OwnedFilename; 416 StringRef OriginalFilename = Path.toStringRef(OwnedFilename); 417 418 if (shouldBypass(OriginalFilename)) 419 return getUnderlyingFS().getRealPath(Path, Output); 420 421 SmallString<256> PathBuf; 422 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); 423 if (!FilenameForLookup) 424 return FilenameForLookup.getError(); 425 426 auto HandleCachedRealPath = 427 [&Output](const CachedRealPath &RealPath) -> std::error_code { 428 if (!RealPath) 429 return RealPath.getError(); 430 Output.assign(RealPath->begin(), RealPath->end()); 431 return {}; 432 }; 433 434 // If we already have the result in local cache, no work required. 435 if (const auto *RealPath = 436 LocalCache.findRealPathByFilename(*FilenameForLookup)) 437 return HandleCachedRealPath(*RealPath); 438 439 // If we have the result in the shared cache, cache it locally. 440 auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup); 441 if (const auto *ShardRealPath = 442 Shard.findRealPathByFilename(*FilenameForLookup)) { 443 const auto &RealPath = LocalCache.insertRealPathForFilename( 444 *FilenameForLookup, *ShardRealPath); 445 return HandleCachedRealPath(RealPath); 446 } 447 448 // If we don't know the real path, compute it... 449 std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output); 450 llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC; 451 if (!EC) 452 ComputedRealPath = StringRef{Output.data(), Output.size()}; 453 454 // ...and try to write it into the shared cache. In case some other thread won 455 // this race and already wrote its own result there, just adopt it. Write 456 // whatever is in the shared cache into the local one. 457 const auto &RealPath = Shard.getOrEmplaceRealPathForFilename( 458 *FilenameForLookup, ComputedRealPath); 459 return HandleCachedRealPath( 460 LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath)); 461 } 462 463 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( 464 const Twine &Path) { 465 std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); 466 updateWorkingDirForCacheLookup(); 467 return EC; 468 } 469 470 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { 471 llvm::ErrorOr<std::string> CWD = 472 getUnderlyingFS().getCurrentWorkingDirectory(); 473 if (!CWD) { 474 WorkingDirForCacheLookup = CWD.getError(); 475 } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) { 476 WorkingDirForCacheLookup = llvm::errc::invalid_argument; 477 } else { 478 WorkingDirForCacheLookup = *CWD; 479 } 480 assert(!WorkingDirForCacheLookup || 481 llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); 482 } 483 484 llvm::ErrorOr<StringRef> 485 DependencyScanningWorkerFilesystem::tryGetFilenameForLookup( 486 StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const { 487 StringRef FilenameForLookup; 488 if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) { 489 FilenameForLookup = OriginalFilename; 490 } else if (!WorkingDirForCacheLookup) { 491 return WorkingDirForCacheLookup.getError(); 492 } else { 493 StringRef RelFilename = OriginalFilename; 494 RelFilename.consume_front("./"); 495 PathBuf.assign(WorkingDirForCacheLookup->begin(), 496 WorkingDirForCacheLookup->end()); 497 llvm::sys::path::append(PathBuf, RelFilename); 498 FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()}; 499 } 500 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); 501 return FilenameForLookup; 502 } 503 504 const char DependencyScanningWorkerFilesystem::ID = 0; 505