1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "llvm/Support/MemoryBuffer.h" 11 #include "llvm/Support/SmallVectorMemoryBuffer.h" 12 #include "llvm/Support/Threading.h" 13 #include <optional> 14 15 using namespace clang; 16 using namespace tooling; 17 using namespace dependencies; 18 19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 21 // Load the file and its content from the file system. 22 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 23 if (!MaybeFile) 24 return MaybeFile.getError(); 25 auto File = std::move(*MaybeFile); 26 27 auto MaybeStat = File->status(); 28 if (!MaybeStat) 29 return MaybeStat.getError(); 30 auto Stat = std::move(*MaybeStat); 31 32 auto MaybeBuffer = File->getBuffer(Stat.getName()); 33 if (!MaybeBuffer) 34 return MaybeBuffer.getError(); 35 auto Buffer = std::move(*MaybeBuffer); 36 37 // If the file size changed between read and stat, pretend it didn't. 38 if (Stat.getSize() != Buffer->getBufferSize()) 39 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 40 41 return TentativeEntry(Stat, std::move(Buffer)); 42 } 43 44 bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated( 45 EntryRef Ref) { 46 auto &Entry = Ref.Entry; 47 48 if (Entry.isError() || Entry.isDirectory()) 49 return false; 50 51 CachedFileContents *Contents = Entry.getCachedContents(); 52 assert(Contents && "contents not initialized"); 53 54 // Double-checked locking. 55 if (Contents->DepDirectives.load()) 56 return true; 57 58 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 59 60 // Double-checked locking. 61 if (Contents->DepDirectives.load()) 62 return true; 63 64 SmallVector<dependency_directives_scan::Directive, 64> Directives; 65 // Scan the file for preprocessor directives that might affect the 66 // dependencies. 67 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), 68 Contents->DepDirectiveTokens, 69 Directives)) { 70 Contents->DepDirectiveTokens.clear(); 71 // FIXME: Propagate the diagnostic if desired by the client. 72 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>()); 73 return false; 74 } 75 76 // This function performed double-checked locking using `DepDirectives`. 77 // Assigning it must be the last thing this function does, otherwise other 78 // threads may skip the critical section (`DepDirectives != nullptr`), leading 79 // to a data race. 80 Contents->DepDirectives.store( 81 new std::optional<DependencyDirectivesTy>(std::move(Directives))); 82 return true; 83 } 84 85 DependencyScanningFilesystemSharedCache:: 86 DependencyScanningFilesystemSharedCache() { 87 // This heuristic was chosen using a empirical testing on a 88 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 89 // sharding gives a performance edge by reducing the lock contention. 90 // FIXME: A better heuristic might also consider the OS to account for 91 // the different cost of lock contention on different OSes. 92 NumShards = 93 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 94 CacheShards = std::make_unique<CacheShard[]>(NumShards); 95 } 96 97 DependencyScanningFilesystemSharedCache::CacheShard & 98 DependencyScanningFilesystemSharedCache::getShardForFilename( 99 StringRef Filename) const { 100 assert(llvm::sys::path::is_absolute_gnu(Filename)); 101 return CacheShards[llvm::hash_value(Filename) % NumShards]; 102 } 103 104 DependencyScanningFilesystemSharedCache::CacheShard & 105 DependencyScanningFilesystemSharedCache::getShardForUID( 106 llvm::sys::fs::UniqueID UID) const { 107 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 108 return CacheShards[Hash % NumShards]; 109 } 110 111 const CachedFileSystemEntry * 112 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 113 StringRef Filename) const { 114 assert(llvm::sys::path::is_absolute_gnu(Filename)); 115 std::lock_guard<std::mutex> LockGuard(CacheLock); 116 auto It = CacheByFilename.find(Filename); 117 return It == CacheByFilename.end() ? nullptr : It->getValue().first; 118 } 119 120 const CachedFileSystemEntry * 121 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 122 llvm::sys::fs::UniqueID UID) const { 123 std::lock_guard<std::mutex> LockGuard(CacheLock); 124 auto It = EntriesByUID.find(UID); 125 return It == EntriesByUID.end() ? nullptr : It->getSecond(); 126 } 127 128 const CachedFileSystemEntry & 129 DependencyScanningFilesystemSharedCache::CacheShard:: 130 getOrEmplaceEntryForFilename(StringRef Filename, 131 llvm::ErrorOr<llvm::vfs::Status> Stat) { 132 std::lock_guard<std::mutex> LockGuard(CacheLock); 133 auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}}); 134 auto &[CachedEntry, CachedRealPath] = It->getValue(); 135 if (!CachedEntry) { 136 // The entry is not present in the shared cache. Either the cache doesn't 137 // know about the file at all, or it only knows about its real path. 138 assert((Inserted || CachedRealPath) && "existing file with empty pair"); 139 CachedEntry = 140 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 141 } 142 return *CachedEntry; 143 } 144 145 const CachedFileSystemEntry & 146 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 147 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 148 std::unique_ptr<llvm::MemoryBuffer> Contents) { 149 std::lock_guard<std::mutex> LockGuard(CacheLock); 150 auto [It, Inserted] = EntriesByUID.insert({UID, nullptr}); 151 auto &CachedEntry = It->getSecond(); 152 if (Inserted) { 153 CachedFileContents *StoredContents = nullptr; 154 if (Contents) 155 StoredContents = new (ContentsStorage.Allocate()) 156 CachedFileContents(std::move(Contents)); 157 CachedEntry = new (EntryStorage.Allocate()) 158 CachedFileSystemEntry(std::move(Stat), StoredContents); 159 } 160 return *CachedEntry; 161 } 162 163 const CachedFileSystemEntry & 164 DependencyScanningFilesystemSharedCache::CacheShard:: 165 getOrInsertEntryForFilename(StringRef Filename, 166 const CachedFileSystemEntry &Entry) { 167 std::lock_guard<std::mutex> LockGuard(CacheLock); 168 auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}}); 169 auto &[CachedEntry, CachedRealPath] = It->getValue(); 170 if (!Inserted || !CachedEntry) 171 CachedEntry = &Entry; 172 return *CachedEntry; 173 } 174 175 const CachedRealPath * 176 DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( 177 StringRef Filename) const { 178 assert(llvm::sys::path::is_absolute_gnu(Filename)); 179 std::lock_guard<std::mutex> LockGuard(CacheLock); 180 auto It = CacheByFilename.find(Filename); 181 return It == CacheByFilename.end() ? nullptr : It->getValue().second; 182 } 183 184 const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: 185 getOrEmplaceRealPathForFilename(StringRef Filename, 186 llvm::ErrorOr<llvm::StringRef> RealPath) { 187 std::lock_guard<std::mutex> LockGuard(CacheLock); 188 189 const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; 190 if (!StoredRealPath) { 191 auto OwnedRealPath = [&]() -> CachedRealPath { 192 if (!RealPath) 193 return RealPath.getError(); 194 return RealPath->str(); 195 }(); 196 197 StoredRealPath = new (RealPathStorage.Allocate()) 198 CachedRealPath(std::move(OwnedRealPath)); 199 } 200 201 return *StoredRealPath; 202 } 203 204 static bool shouldCacheStatFailures(StringRef Filename) { 205 StringRef Ext = llvm::sys::path::extension(Filename); 206 if (Ext.empty()) 207 return false; // This may be the module cache directory. 208 return true; 209 } 210 211 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( 212 DependencyScanningFilesystemSharedCache &SharedCache, 213 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) 214 : llvm::RTTIExtends<DependencyScanningWorkerFilesystem, 215 llvm::vfs::ProxyFileSystem>(std::move(FS)), 216 SharedCache(SharedCache), 217 WorkingDirForCacheLookup(llvm::errc::invalid_argument) { 218 updateWorkingDirForCacheLookup(); 219 } 220 221 const CachedFileSystemEntry & 222 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 223 TentativeEntry TEntry) { 224 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 225 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 226 std::move(TEntry.Status), 227 std::move(TEntry.Contents)); 228 } 229 230 const CachedFileSystemEntry * 231 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 232 StringRef Filename) { 233 if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 234 return Entry; 235 auto &Shard = SharedCache.getShardForFilename(Filename); 236 if (const auto *Entry = Shard.findEntryByFilename(Filename)) 237 return &LocalCache.insertEntryForFilename(Filename, *Entry); 238 return nullptr; 239 } 240 241 llvm::ErrorOr<const CachedFileSystemEntry &> 242 DependencyScanningWorkerFilesystem::computeAndStoreResult( 243 StringRef OriginalFilename, StringRef FilenameForLookup) { 244 llvm::ErrorOr<llvm::vfs::Status> Stat = 245 getUnderlyingFS().status(OriginalFilename); 246 if (!Stat) { 247 if (!shouldCacheStatFailures(OriginalFilename)) 248 return Stat.getError(); 249 const auto &Entry = 250 getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); 251 return insertLocalEntryForFilename(FilenameForLookup, Entry); 252 } 253 254 if (const auto *Entry = findSharedEntryByUID(*Stat)) 255 return insertLocalEntryForFilename(FilenameForLookup, *Entry); 256 257 auto TEntry = 258 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); 259 260 const CachedFileSystemEntry *SharedEntry = [&]() { 261 if (TEntry) { 262 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 263 return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); 264 } 265 return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, 266 TEntry.getError()); 267 }(); 268 269 return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); 270 } 271 272 llvm::ErrorOr<EntryRef> 273 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 274 StringRef OriginalFilename) { 275 SmallString<256> PathBuf; 276 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); 277 if (!FilenameForLookup) 278 return FilenameForLookup.getError(); 279 280 if (const auto *Entry = 281 findEntryByFilenameWithWriteThrough(*FilenameForLookup)) 282 return EntryRef(OriginalFilename, *Entry).unwrapError(); 283 auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup); 284 if (!MaybeEntry) 285 return MaybeEntry.getError(); 286 return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); 287 } 288 289 llvm::ErrorOr<llvm::vfs::Status> 290 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 291 SmallString<256> OwnedFilename; 292 StringRef Filename = Path.toStringRef(OwnedFilename); 293 294 if (Filename.ends_with(".pcm")) 295 return getUnderlyingFS().status(Path); 296 297 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 298 if (!Result) 299 return Result.getError(); 300 return Result->getStatus(); 301 } 302 303 bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) { 304 // While some VFS overlay filesystems may implement more-efficient 305 // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem` 306 // typically wraps `RealFileSystem` which does not specialize `exists`, 307 // so it is not likely to benefit from such optimizations. Instead, 308 // it is more-valuable to have this query go through the 309 // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`. 310 llvm::ErrorOr<llvm::vfs::Status> Status = status(Path); 311 return Status && Status->exists(); 312 } 313 314 namespace { 315 316 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 317 /// this subclass. 318 class DepScanFile final : public llvm::vfs::File { 319 public: 320 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 321 llvm::vfs::Status Stat) 322 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 323 324 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); 325 326 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 327 328 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 329 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 330 bool IsVolatile) override { 331 return std::move(Buffer); 332 } 333 334 std::error_code close() override { return {}; } 335 336 private: 337 std::unique_ptr<llvm::MemoryBuffer> Buffer; 338 llvm::vfs::Status Stat; 339 }; 340 341 } // end anonymous namespace 342 343 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 344 DepScanFile::create(EntryRef Entry) { 345 assert(!Entry.isError() && "error"); 346 347 if (Entry.isDirectory()) 348 return std::make_error_code(std::errc::is_a_directory); 349 350 auto Result = std::make_unique<DepScanFile>( 351 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 352 Entry.getStatus().getName(), 353 /*RequiresNullTerminator=*/false), 354 Entry.getStatus()); 355 356 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 357 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 358 } 359 360 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 361 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 362 SmallString<256> OwnedFilename; 363 StringRef Filename = Path.toStringRef(OwnedFilename); 364 365 if (Filename.ends_with(".pcm")) 366 return getUnderlyingFS().openFileForRead(Path); 367 368 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 369 if (!Result) 370 return Result.getError(); 371 return DepScanFile::create(Result.get()); 372 } 373 374 std::error_code 375 DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, 376 SmallVectorImpl<char> &Output) { 377 SmallString<256> OwnedFilename; 378 StringRef OriginalFilename = Path.toStringRef(OwnedFilename); 379 380 SmallString<256> PathBuf; 381 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf); 382 if (!FilenameForLookup) 383 return FilenameForLookup.getError(); 384 385 auto HandleCachedRealPath = 386 [&Output](const CachedRealPath &RealPath) -> std::error_code { 387 if (!RealPath) 388 return RealPath.getError(); 389 Output.assign(RealPath->begin(), RealPath->end()); 390 return {}; 391 }; 392 393 // If we already have the result in local cache, no work required. 394 if (const auto *RealPath = 395 LocalCache.findRealPathByFilename(*FilenameForLookup)) 396 return HandleCachedRealPath(*RealPath); 397 398 // If we have the result in the shared cache, cache it locally. 399 auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup); 400 if (const auto *ShardRealPath = 401 Shard.findRealPathByFilename(*FilenameForLookup)) { 402 const auto &RealPath = LocalCache.insertRealPathForFilename( 403 *FilenameForLookup, *ShardRealPath); 404 return HandleCachedRealPath(RealPath); 405 } 406 407 // If we don't know the real path, compute it... 408 std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output); 409 llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC; 410 if (!EC) 411 ComputedRealPath = StringRef{Output.data(), Output.size()}; 412 413 // ...and try to write it into the shared cache. In case some other thread won 414 // this race and already wrote its own result there, just adopt it. Write 415 // whatever is in the shared cache into the local one. 416 const auto &RealPath = Shard.getOrEmplaceRealPathForFilename( 417 *FilenameForLookup, ComputedRealPath); 418 return HandleCachedRealPath( 419 LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath)); 420 } 421 422 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( 423 const Twine &Path) { 424 std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); 425 updateWorkingDirForCacheLookup(); 426 return EC; 427 } 428 429 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { 430 llvm::ErrorOr<std::string> CWD = 431 getUnderlyingFS().getCurrentWorkingDirectory(); 432 if (!CWD) { 433 WorkingDirForCacheLookup = CWD.getError(); 434 } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) { 435 WorkingDirForCacheLookup = llvm::errc::invalid_argument; 436 } else { 437 WorkingDirForCacheLookup = *CWD; 438 } 439 assert(!WorkingDirForCacheLookup || 440 llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); 441 } 442 443 llvm::ErrorOr<StringRef> 444 DependencyScanningWorkerFilesystem::tryGetFilenameForLookup( 445 StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const { 446 StringRef FilenameForLookup; 447 if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) { 448 FilenameForLookup = OriginalFilename; 449 } else if (!WorkingDirForCacheLookup) { 450 return WorkingDirForCacheLookup.getError(); 451 } else { 452 StringRef RelFilename = OriginalFilename; 453 RelFilename.consume_front("./"); 454 PathBuf.assign(WorkingDirForCacheLookup->begin(), 455 WorkingDirForCacheLookup->end()); 456 llvm::sys::path::append(PathBuf, RelFilename); 457 FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()}; 458 } 459 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); 460 return FilenameForLookup; 461 } 462 463 const char DependencyScanningWorkerFilesystem::ID = 0; 464