1 //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Lex/DependencyDirectivesScanner.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/Support/Allocator.h" 17 #include "llvm/Support/ErrorOr.h" 18 #include "llvm/Support/VirtualFileSystem.h" 19 #include <mutex> 20 #include <optional> 21 #include <variant> 22 23 namespace clang { 24 namespace tooling { 25 namespace dependencies { 26 27 using DependencyDirectivesTy = 28 SmallVector<dependency_directives_scan::Directive, 20>; 29 30 /// Contents and directive tokens of a cached file entry. Single instance can 31 /// be shared between multiple entries. 32 struct CachedFileContents { CachedFileContentsCachedFileContents33 CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents) 34 : Original(std::move(Contents)), DepDirectives(nullptr) {} 35 36 /// Owning storage for the original contents. 37 std::unique_ptr<llvm::MemoryBuffer> Original; 38 39 /// The mutex that must be locked before mutating directive tokens. 40 std::mutex ValueLock; 41 SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens; 42 /// Accessor to the directive tokens that's atomic to avoid data races. 43 /// \p CachedFileContents has ownership of the pointer. 44 std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives; 45 ~CachedFileContentsCachedFileContents46 ~CachedFileContents() { delete DepDirectives.load(); } 47 }; 48 49 /// An in-memory representation of a file system entity that is of interest to 50 /// the dependency scanning filesystem. 51 /// 52 /// It represents one of the following: 53 /// - opened file with contents and a stat value, 54 /// - opened file with contents, directive tokens and a stat value, 55 /// - directory entry with its stat value, 56 /// - filesystem error. 57 /// 58 /// Single instance of this class can be shared across different filenames (e.g. 59 /// a regular file and a symlink). For this reason the status filename is empty 60 /// and is only materialized by \c EntryRef that knows the requested filename. 61 class CachedFileSystemEntry { 62 public: 63 /// Creates an entry without contents: either a filesystem error or 64 /// a directory with stat value. CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)65 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat) 66 : MaybeStat(std::move(Stat)), Contents(nullptr) { 67 clearStatName(); 68 } 69 70 /// Creates an entry representing a file with contents. CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,CachedFileContents * Contents)71 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat, 72 CachedFileContents *Contents) 73 : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) { 74 clearStatName(); 75 } 76 77 /// \returns True if the entry is a filesystem error. isError()78 bool isError() const { return !MaybeStat; } 79 80 /// \returns True if the current entry represents a directory. isDirectory()81 bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } 82 83 /// \returns Original contents of the file. getOriginalContents()84 StringRef getOriginalContents() const { 85 assert(!isError() && "error"); 86 assert(!MaybeStat->isDirectory() && "not a file"); 87 assert(Contents && "contents not initialized"); 88 return Contents->Original->getBuffer(); 89 } 90 91 /// \returns The scanned preprocessor directive tokens of the file that are 92 /// used to speed up preprocessing, if available. 93 std::optional<ArrayRef<dependency_directives_scan::Directive>> getDirectiveTokens()94 getDirectiveTokens() const { 95 assert(!isError() && "error"); 96 assert(!isDirectory() && "not a file"); 97 assert(Contents && "contents not initialized"); 98 if (auto *Directives = Contents->DepDirectives.load()) { 99 if (Directives->has_value()) 100 return ArrayRef<dependency_directives_scan::Directive>(**Directives); 101 } 102 return std::nullopt; 103 } 104 105 /// \returns The error. getError()106 std::error_code getError() const { return MaybeStat.getError(); } 107 108 /// \returns The entry status with empty filename. getStatus()109 llvm::vfs::Status getStatus() const { 110 assert(!isError() && "error"); 111 assert(MaybeStat->getName().empty() && "stat name must be empty"); 112 return *MaybeStat; 113 } 114 115 /// \returns The unique ID of the entry. getUniqueID()116 llvm::sys::fs::UniqueID getUniqueID() const { 117 assert(!isError() && "error"); 118 return MaybeStat->getUniqueID(); 119 } 120 121 /// \returns The data structure holding both contents and directive tokens. getCachedContents()122 CachedFileContents *getCachedContents() const { 123 assert(!isError() && "error"); 124 assert(!isDirectory() && "not a file"); 125 return Contents; 126 } 127 128 private: clearStatName()129 void clearStatName() { 130 if (MaybeStat) 131 MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, ""); 132 } 133 134 /// Either the filesystem error or status of the entry. 135 /// The filename is empty and only materialized by \c EntryRef. 136 llvm::ErrorOr<llvm::vfs::Status> MaybeStat; 137 138 /// Non-owning pointer to the file contents. 139 /// 140 /// We're using pointer here to keep the size of this class small. Instances 141 /// representing directories and filesystem errors don't hold any contents 142 /// anyway. 143 CachedFileContents *Contents; 144 }; 145 146 using CachedRealPath = llvm::ErrorOr<std::string>; 147 148 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the 149 /// underlying real file system, and the scanned preprocessor directives of 150 /// files. 151 /// 152 /// It is sharded based on the hash of the key to reduce the lock contention for 153 /// the worker threads. 154 class DependencyScanningFilesystemSharedCache { 155 public: 156 struct CacheShard { 157 /// The mutex that needs to be locked before mutation of any member. 158 mutable std::mutex CacheLock; 159 160 /// Map from filenames to cached entries and real paths. 161 llvm::StringMap< 162 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, 163 llvm::BumpPtrAllocator> 164 CacheByFilename; 165 166 /// Map from unique IDs to cached entries. 167 llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *> 168 EntriesByUID; 169 170 /// The backing storage for cached entries. 171 llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage; 172 173 /// The backing storage for cached contents. 174 llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage; 175 176 /// The backing storage for cached real paths. 177 llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage; 178 179 /// Returns entry associated with the filename or nullptr if none is found. 180 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; 181 182 /// Returns entry associated with the unique ID or nullptr if none is found. 183 const CachedFileSystemEntry * 184 findEntryByUID(llvm::sys::fs::UniqueID UID) const; 185 186 /// Returns entry associated with the filename if there is some. Otherwise, 187 /// constructs new one with the given status, associates it with the 188 /// filename and returns the result. 189 const CachedFileSystemEntry & 190 getOrEmplaceEntryForFilename(StringRef Filename, 191 llvm::ErrorOr<llvm::vfs::Status> Stat); 192 193 /// Returns entry associated with the unique ID if there is some. Otherwise, 194 /// constructs new one with the given status and contents, associates it 195 /// with the unique ID and returns the result. 196 const CachedFileSystemEntry & 197 getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 198 std::unique_ptr<llvm::MemoryBuffer> Contents); 199 200 /// Returns entry associated with the filename if there is some. Otherwise, 201 /// associates the given entry with the filename and returns it. 202 const CachedFileSystemEntry & 203 getOrInsertEntryForFilename(StringRef Filename, 204 const CachedFileSystemEntry &Entry); 205 206 /// Returns the real path associated with the filename or nullptr if none is 207 /// found. 208 const CachedRealPath *findRealPathByFilename(StringRef Filename) const; 209 210 /// Returns the real path associated with the filename if there is some. 211 /// Otherwise, constructs new one with the given one, associates it with the 212 /// filename and returns the result. 213 const CachedRealPath & 214 getOrEmplaceRealPathForFilename(StringRef Filename, 215 llvm::ErrorOr<StringRef> RealPath); 216 }; 217 218 DependencyScanningFilesystemSharedCache(); 219 220 /// Returns shard for the given key. 221 CacheShard &getShardForFilename(StringRef Filename) const; 222 CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const; 223 224 struct OutOfDateEntry { 225 // A null terminated string that contains a path. 226 const char *Path = nullptr; 227 228 struct NegativelyCachedInfo {}; 229 struct SizeChangedInfo { 230 uint64_t CachedSize = 0; 231 uint64_t ActualSize = 0; 232 }; 233 234 std::variant<NegativelyCachedInfo, SizeChangedInfo> Info; 235 OutOfDateEntryOutOfDateEntry236 OutOfDateEntry(const char *Path) 237 : Path(Path), Info(NegativelyCachedInfo{}) {} 238 OutOfDateEntryOutOfDateEntry239 OutOfDateEntry(const char *Path, uint64_t CachedSize, uint64_t ActualSize) 240 : Path(Path), Info(SizeChangedInfo{CachedSize, ActualSize}) {} 241 }; 242 243 /// Visits all cached entries and re-stat an entry using UnderlyingFS to check 244 /// if the cache contains out-of-date entries. An entry can be out-of-date for 245 /// two reasons: 246 /// 1. The entry contains a stat error, indicating the file did not exist 247 /// in the cache, but the file exists on the UnderlyingFS. 248 /// 2. The entry is associated with a file whose size is different from the 249 /// size of the file on the same path on the UnderlyingFS. 250 std::vector<OutOfDateEntry> 251 getOutOfDateEntries(llvm::vfs::FileSystem &UnderlyingFS) const; 252 253 private: 254 std::unique_ptr<CacheShard[]> CacheShards; 255 unsigned NumShards; 256 }; 257 258 /// This class is a local cache, that caches the 'stat' and 'open' calls to the 259 /// underlying real file system. 260 class DependencyScanningFilesystemLocalCache { 261 llvm::StringMap< 262 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, 263 llvm::BumpPtrAllocator> 264 Cache; 265 266 public: 267 /// Returns entry associated with the filename or nullptr if none is found. findEntryByFilename(StringRef Filename)268 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { 269 assert(llvm::sys::path::is_absolute_gnu(Filename)); 270 auto It = Cache.find(Filename); 271 return It == Cache.end() ? nullptr : It->getValue().first; 272 } 273 274 /// Associates the given entry with the filename and returns the given entry 275 /// pointer (for convenience). 276 const CachedFileSystemEntry & insertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)277 insertEntryForFilename(StringRef Filename, 278 const CachedFileSystemEntry &Entry) { 279 assert(llvm::sys::path::is_absolute_gnu(Filename)); 280 auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}}); 281 auto &[CachedEntry, CachedRealPath] = It->getValue(); 282 if (!Inserted) { 283 // The file is already present in the local cache. If we got here, it only 284 // contains the real path. Let's make sure the entry is populated too. 285 assert((!CachedEntry && CachedRealPath) && "entry already present"); 286 CachedEntry = &Entry; 287 } 288 return *CachedEntry; 289 } 290 291 /// Returns real path associated with the filename or nullptr if none is 292 /// found. findRealPathByFilename(StringRef Filename)293 const CachedRealPath *findRealPathByFilename(StringRef Filename) const { 294 assert(llvm::sys::path::is_absolute_gnu(Filename)); 295 auto It = Cache.find(Filename); 296 return It == Cache.end() ? nullptr : It->getValue().second; 297 } 298 299 /// Associates the given real path with the filename and returns the given 300 /// entry pointer (for convenience). 301 const CachedRealPath & insertRealPathForFilename(StringRef Filename,const CachedRealPath & RealPath)302 insertRealPathForFilename(StringRef Filename, 303 const CachedRealPath &RealPath) { 304 assert(llvm::sys::path::is_absolute_gnu(Filename)); 305 auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}}); 306 auto &[CachedEntry, CachedRealPath] = It->getValue(); 307 if (!Inserted) { 308 // The file is already present in the local cache. If we got here, it only 309 // contains the entry. Let's make sure the real path is populated too. 310 assert((!CachedRealPath && CachedEntry) && "real path already present"); 311 CachedRealPath = &RealPath; 312 } 313 return *CachedRealPath; 314 } 315 }; 316 317 /// Reference to a CachedFileSystemEntry. 318 /// If the underlying entry is an opened file, this wrapper returns the file 319 /// contents and the scanned preprocessor directives. 320 class EntryRef { 321 /// The filename used to access this entry. 322 std::string Filename; 323 324 /// The underlying cached entry. 325 const CachedFileSystemEntry &Entry; 326 327 friend class DependencyScanningWorkerFilesystem; 328 329 public: EntryRef(StringRef Name,const CachedFileSystemEntry & Entry)330 EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) 331 : Filename(Name), Entry(Entry) {} 332 getStatus()333 llvm::vfs::Status getStatus() const { 334 llvm::vfs::Status Stat = Entry.getStatus(); 335 if (!Stat.isDirectory()) 336 Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); 337 return llvm::vfs::Status::copyWithNewName(Stat, Filename); 338 } 339 isError()340 bool isError() const { return Entry.isError(); } isDirectory()341 bool isDirectory() const { return Entry.isDirectory(); } 342 343 /// If the cached entry represents an error, promotes it into `ErrorOr`. unwrapError()344 llvm::ErrorOr<EntryRef> unwrapError() const { 345 if (isError()) 346 return Entry.getError(); 347 return *this; 348 } 349 getContents()350 StringRef getContents() const { return Entry.getOriginalContents(); } 351 352 std::optional<ArrayRef<dependency_directives_scan::Directive>> getDirectiveTokens()353 getDirectiveTokens() const { 354 return Entry.getDirectiveTokens(); 355 } 356 }; 357 358 /// A virtual file system optimized for the dependency discovery. 359 /// 360 /// It is primarily designed to work with source files whose contents was 361 /// preprocessed to remove any tokens that are unlikely to affect the dependency 362 /// computation. 363 /// 364 /// This is not a thread safe VFS. A single instance is meant to be used only in 365 /// one thread. Multiple instances are allowed to service multiple threads 366 /// running in parallel. 367 class DependencyScanningWorkerFilesystem 368 : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem, 369 llvm::vfs::ProxyFileSystem> { 370 public: 371 static const char ID; 372 373 DependencyScanningWorkerFilesystem( 374 DependencyScanningFilesystemSharedCache &SharedCache, 375 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); 376 377 llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; 378 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 379 openFileForRead(const Twine &Path) override; 380 381 std::error_code getRealPath(const Twine &Path, 382 SmallVectorImpl<char> &Output) override; 383 384 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 385 386 /// Make it so that no paths bypass this VFS. resetBypassedPathPrefix()387 void resetBypassedPathPrefix() { BypassedPathPrefix.reset(); } 388 /// Set the prefix for paths that should bypass this VFS and go straight to 389 /// the underlying VFS. setBypassedPathPrefix(StringRef Prefix)390 void setBypassedPathPrefix(StringRef Prefix) { BypassedPathPrefix = Prefix; } 391 392 /// Returns entry for the given filename. 393 /// 394 /// Attempts to use the local and shared caches first, then falls back to 395 /// using the underlying filesystem. 396 llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename); 397 398 /// Ensure the directive tokens are populated for this file entry. 399 /// 400 /// Returns true if the directive tokens are populated for this file entry, 401 /// false if not (i.e. this entry is not a file or its scan fails). 402 bool ensureDirectiveTokensArePopulated(EntryRef Entry); 403 404 /// \returns The scanned preprocessor directive tokens of the file that are 405 /// used to speed up preprocessing, if available. 406 std::optional<ArrayRef<dependency_directives_scan::Directive>> getDirectiveTokens(const Twine & Path)407 getDirectiveTokens(const Twine &Path) { 408 if (llvm::ErrorOr<EntryRef> Entry = getOrCreateFileSystemEntry(Path.str())) 409 if (ensureDirectiveTokensArePopulated(*Entry)) 410 return Entry->getDirectiveTokens(); 411 return std::nullopt; 412 } 413 414 /// Check whether \p Path exists. By default checks cached result of \c 415 /// status(), and falls back on FS if unable to do so. 416 bool exists(const Twine &Path) override; 417 418 private: 419 /// For a filename that's not yet associated with any entry in the caches, 420 /// uses the underlying filesystem to either look up the entry based in the 421 /// shared cache indexed by unique ID, or creates new entry from scratch. 422 /// \p FilenameForLookup will always be an absolute path, and different than 423 /// \p OriginalFilename if \p OriginalFilename is relative. 424 llvm::ErrorOr<const CachedFileSystemEntry &> 425 computeAndStoreResult(StringRef OriginalFilename, 426 StringRef FilenameForLookup); 427 428 /// Represents a filesystem entry that has been stat-ed (and potentially read) 429 /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. 430 struct TentativeEntry { 431 llvm::vfs::Status Status; 432 std::unique_ptr<llvm::MemoryBuffer> Contents; 433 434 TentativeEntry(llvm::vfs::Status Status, 435 std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr) StatusTentativeEntry436 : Status(std::move(Status)), Contents(std::move(Contents)) {} 437 }; 438 439 /// Reads file at the given path. Enforces consistency between the file size 440 /// in status and size of read contents. 441 llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename); 442 443 /// Returns entry associated with the unique ID of the given tentative entry 444 /// if there is some in the shared cache. Otherwise, constructs new one, 445 /// associates it with the unique ID and returns the result. 446 const CachedFileSystemEntry & 447 getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); 448 449 /// Returns entry associated with the filename or nullptr if none is found. 450 /// 451 /// Returns entry from local cache if there is some. Otherwise, if the entry 452 /// is found in the shared cache, writes it through the local cache and 453 /// returns it. Otherwise returns nullptr. 454 const CachedFileSystemEntry * 455 findEntryByFilenameWithWriteThrough(StringRef Filename); 456 457 /// Returns entry associated with the unique ID in the shared cache or nullptr 458 /// if none is found. 459 const CachedFileSystemEntry * findSharedEntryByUID(llvm::vfs::Status Stat)460 findSharedEntryByUID(llvm::vfs::Status Stat) const { 461 return SharedCache.getShardForUID(Stat.getUniqueID()) 462 .findEntryByUID(Stat.getUniqueID()); 463 } 464 465 /// Associates the given entry with the filename in the local cache and 466 /// returns it. 467 const CachedFileSystemEntry & insertLocalEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)468 insertLocalEntryForFilename(StringRef Filename, 469 const CachedFileSystemEntry &Entry) { 470 return LocalCache.insertEntryForFilename(Filename, Entry); 471 } 472 473 /// Returns entry associated with the filename in the shared cache if there is 474 /// some. Otherwise, constructs new one with the given error code, associates 475 /// it with the filename and returns the result. 476 const CachedFileSystemEntry & getOrEmplaceSharedEntryForFilename(StringRef Filename,std::error_code EC)477 getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) { 478 return SharedCache.getShardForFilename(Filename) 479 .getOrEmplaceEntryForFilename(Filename, EC); 480 } 481 482 /// Returns entry associated with the filename in the shared cache if there is 483 /// some. Otherwise, associates the given entry with the filename and returns 484 /// it. 485 const CachedFileSystemEntry & getOrInsertSharedEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)486 getOrInsertSharedEntryForFilename(StringRef Filename, 487 const CachedFileSystemEntry &Entry) { 488 return SharedCache.getShardForFilename(Filename) 489 .getOrInsertEntryForFilename(Filename, Entry); 490 } 491 printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)492 void printImpl(raw_ostream &OS, PrintType Type, 493 unsigned IndentLevel) const override { 494 printIndent(OS, IndentLevel); 495 OS << "DependencyScanningFilesystem\n"; 496 getUnderlyingFS().print(OS, Type, IndentLevel + 1); 497 } 498 499 /// Whether this path should bypass this VFS and go straight to the underlying 500 /// VFS. 501 bool shouldBypass(StringRef Path) const; 502 503 /// The global cache shared between worker threads. 504 DependencyScanningFilesystemSharedCache &SharedCache; 505 /// The local cache is used by the worker thread to cache file system queries 506 /// locally instead of querying the global cache every time. 507 DependencyScanningFilesystemLocalCache LocalCache; 508 509 /// Prefix of paths that should go straight to the underlying VFS. 510 std::optional<std::string> BypassedPathPrefix; 511 512 /// The working directory to use for making relative paths absolute before 513 /// using them for cache lookups. 514 llvm::ErrorOr<std::string> WorkingDirForCacheLookup; 515 516 void updateWorkingDirForCacheLookup(); 517 518 llvm::ErrorOr<StringRef> 519 tryGetFilenameForLookup(StringRef OriginalFilename, 520 llvm::SmallVectorImpl<char> &PathBuf) const; 521 }; 522 523 } // end namespace dependencies 524 } // end namespace tooling 525 } // end namespace clang 526 527 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H 528