1 //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H 10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H 11 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Lex/DependencyDirectivesScanner.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/Support/Allocator.h" 17 #include "llvm/Support/ErrorOr.h" 18 #include "llvm/Support/VirtualFileSystem.h" 19 #include <mutex> 20 #include <optional> 21 22 namespace clang { 23 namespace tooling { 24 namespace dependencies { 25 26 using DependencyDirectivesTy = 27 SmallVector<dependency_directives_scan::Directive, 20>; 28 29 /// Contents and directive tokens of a cached file entry. Single instance can 30 /// be shared between multiple entries. 31 struct CachedFileContents { CachedFileContentsCachedFileContents32 CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents) 33 : Original(std::move(Contents)), DepDirectives(nullptr) {} 34 35 /// Owning storage for the original contents. 36 std::unique_ptr<llvm::MemoryBuffer> Original; 37 38 /// The mutex that must be locked before mutating directive tokens. 39 std::mutex ValueLock; 40 SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens; 41 /// Accessor to the directive tokens that's atomic to avoid data races. 42 /// \p CachedFileContents has ownership of the pointer. 43 std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives; 44 ~CachedFileContentsCachedFileContents45 ~CachedFileContents() { delete DepDirectives.load(); } 46 }; 47 48 /// An in-memory representation of a file system entity that is of interest to 49 /// the dependency scanning filesystem. 50 /// 51 /// It represents one of the following: 52 /// - opened file with contents and a stat value, 53 /// - opened file with contents, directive tokens and a stat value, 54 /// - directory entry with its stat value, 55 /// - filesystem error. 56 /// 57 /// Single instance of this class can be shared across different filenames (e.g. 58 /// a regular file and a symlink). For this reason the status filename is empty 59 /// and is only materialized by \c EntryRef that knows the requested filename. 60 class CachedFileSystemEntry { 61 public: 62 /// Creates an entry without contents: either a filesystem error or 63 /// a directory with stat value. CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)64 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat) 65 : MaybeStat(std::move(Stat)), Contents(nullptr) { 66 clearStatName(); 67 } 68 69 /// Creates an entry representing a file with contents. CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,CachedFileContents * Contents)70 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat, 71 CachedFileContents *Contents) 72 : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) { 73 clearStatName(); 74 } 75 76 /// \returns True if the entry is a filesystem error. isError()77 bool isError() const { return !MaybeStat; } 78 79 /// \returns True if the current entry represents a directory. isDirectory()80 bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } 81 82 /// \returns Original contents of the file. getOriginalContents()83 StringRef getOriginalContents() const { 84 assert(!isError() && "error"); 85 assert(!MaybeStat->isDirectory() && "not a file"); 86 assert(Contents && "contents not initialized"); 87 return Contents->Original->getBuffer(); 88 } 89 90 /// \returns The scanned preprocessor directive tokens of the file that are 91 /// used to speed up preprocessing, if available. 92 std::optional<ArrayRef<dependency_directives_scan::Directive>> getDirectiveTokens()93 getDirectiveTokens() const { 94 assert(!isError() && "error"); 95 assert(!isDirectory() && "not a file"); 96 assert(Contents && "contents not initialized"); 97 if (auto *Directives = Contents->DepDirectives.load()) { 98 if (Directives->has_value()) 99 return ArrayRef<dependency_directives_scan::Directive>(**Directives); 100 } 101 return std::nullopt; 102 } 103 104 /// \returns The error. getError()105 std::error_code getError() const { return MaybeStat.getError(); } 106 107 /// \returns The entry status with empty filename. getStatus()108 llvm::vfs::Status getStatus() const { 109 assert(!isError() && "error"); 110 assert(MaybeStat->getName().empty() && "stat name must be empty"); 111 return *MaybeStat; 112 } 113 114 /// \returns The unique ID of the entry. getUniqueID()115 llvm::sys::fs::UniqueID getUniqueID() const { 116 assert(!isError() && "error"); 117 return MaybeStat->getUniqueID(); 118 } 119 120 /// \returns The data structure holding both contents and directive tokens. getCachedContents()121 CachedFileContents *getCachedContents() const { 122 assert(!isError() && "error"); 123 assert(!isDirectory() && "not a file"); 124 return Contents; 125 } 126 127 private: clearStatName()128 void clearStatName() { 129 if (MaybeStat) 130 MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, ""); 131 } 132 133 /// Either the filesystem error or status of the entry. 134 /// The filename is empty and only materialized by \c EntryRef. 135 llvm::ErrorOr<llvm::vfs::Status> MaybeStat; 136 137 /// Non-owning pointer to the file contents. 138 /// 139 /// We're using pointer here to keep the size of this class small. Instances 140 /// representing directories and filesystem errors don't hold any contents 141 /// anyway. 142 CachedFileContents *Contents; 143 }; 144 145 using CachedRealPath = llvm::ErrorOr<std::string>; 146 147 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the 148 /// underlying real file system, and the scanned preprocessor directives of 149 /// files. 150 /// 151 /// It is sharded based on the hash of the key to reduce the lock contention for 152 /// the worker threads. 153 class DependencyScanningFilesystemSharedCache { 154 public: 155 struct CacheShard { 156 /// The mutex that needs to be locked before mutation of any member. 157 mutable std::mutex CacheLock; 158 159 /// Map from filenames to cached entries and real paths. 160 llvm::StringMap< 161 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, 162 llvm::BumpPtrAllocator> 163 CacheByFilename; 164 165 /// Map from unique IDs to cached entries. 166 llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *> 167 EntriesByUID; 168 169 /// The backing storage for cached entries. 170 llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage; 171 172 /// The backing storage for cached contents. 173 llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage; 174 175 /// The backing storage for cached real paths. 176 llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage; 177 178 /// Returns entry associated with the filename or nullptr if none is found. 179 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; 180 181 /// Returns entry associated with the unique ID or nullptr if none is found. 182 const CachedFileSystemEntry * 183 findEntryByUID(llvm::sys::fs::UniqueID UID) const; 184 185 /// Returns entry associated with the filename if there is some. Otherwise, 186 /// constructs new one with the given status, associates it with the 187 /// filename and returns the result. 188 const CachedFileSystemEntry & 189 getOrEmplaceEntryForFilename(StringRef Filename, 190 llvm::ErrorOr<llvm::vfs::Status> Stat); 191 192 /// Returns entry associated with the unique ID if there is some. Otherwise, 193 /// constructs new one with the given status and contents, associates it 194 /// with the unique ID and returns the result. 195 const CachedFileSystemEntry & 196 getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 197 std::unique_ptr<llvm::MemoryBuffer> Contents); 198 199 /// Returns entry associated with the filename if there is some. Otherwise, 200 /// associates the given entry with the filename and returns it. 201 const CachedFileSystemEntry & 202 getOrInsertEntryForFilename(StringRef Filename, 203 const CachedFileSystemEntry &Entry); 204 205 /// Returns the real path associated with the filename or nullptr if none is 206 /// found. 207 const CachedRealPath *findRealPathByFilename(StringRef Filename) const; 208 209 /// Returns the real path associated with the filename if there is some. 210 /// Otherwise, constructs new one with the given one, associates it with the 211 /// filename and returns the result. 212 const CachedRealPath & 213 getOrEmplaceRealPathForFilename(StringRef Filename, 214 llvm::ErrorOr<StringRef> RealPath); 215 }; 216 217 DependencyScanningFilesystemSharedCache(); 218 219 /// Returns shard for the given key. 220 CacheShard &getShardForFilename(StringRef Filename) const; 221 CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const; 222 223 private: 224 std::unique_ptr<CacheShard[]> CacheShards; 225 unsigned NumShards; 226 }; 227 228 /// This class is a local cache, that caches the 'stat' and 'open' calls to the 229 /// underlying real file system. 230 class DependencyScanningFilesystemLocalCache { 231 llvm::StringMap< 232 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, 233 llvm::BumpPtrAllocator> 234 Cache; 235 236 public: 237 /// Returns entry associated with the filename or nullptr if none is found. findEntryByFilename(StringRef Filename)238 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { 239 assert(llvm::sys::path::is_absolute_gnu(Filename)); 240 auto It = Cache.find(Filename); 241 return It == Cache.end() ? nullptr : It->getValue().first; 242 } 243 244 /// Associates the given entry with the filename and returns the given entry 245 /// pointer (for convenience). 246 const CachedFileSystemEntry & insertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)247 insertEntryForFilename(StringRef Filename, 248 const CachedFileSystemEntry &Entry) { 249 assert(llvm::sys::path::is_absolute_gnu(Filename)); 250 auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}}); 251 auto &[CachedEntry, CachedRealPath] = It->getValue(); 252 if (!Inserted) { 253 // The file is already present in the local cache. If we got here, it only 254 // contains the real path. Let's make sure the entry is populated too. 255 assert((!CachedEntry && CachedRealPath) && "entry already present"); 256 CachedEntry = &Entry; 257 } 258 return *CachedEntry; 259 } 260 261 /// Returns real path associated with the filename or nullptr if none is 262 /// found. findRealPathByFilename(StringRef Filename)263 const CachedRealPath *findRealPathByFilename(StringRef Filename) const { 264 assert(llvm::sys::path::is_absolute_gnu(Filename)); 265 auto It = Cache.find(Filename); 266 return It == Cache.end() ? nullptr : It->getValue().second; 267 } 268 269 /// Associates the given real path with the filename and returns the given 270 /// entry pointer (for convenience). 271 const CachedRealPath & insertRealPathForFilename(StringRef Filename,const CachedRealPath & RealPath)272 insertRealPathForFilename(StringRef Filename, 273 const CachedRealPath &RealPath) { 274 assert(llvm::sys::path::is_absolute_gnu(Filename)); 275 auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}}); 276 auto &[CachedEntry, CachedRealPath] = It->getValue(); 277 if (!Inserted) { 278 // The file is already present in the local cache. If we got here, it only 279 // contains the entry. Let's make sure the real path is populated too. 280 assert((!CachedRealPath && CachedEntry) && "real path already present"); 281 CachedRealPath = &RealPath; 282 } 283 return *CachedRealPath; 284 } 285 }; 286 287 /// Reference to a CachedFileSystemEntry. 288 /// If the underlying entry is an opened file, this wrapper returns the file 289 /// contents and the scanned preprocessor directives. 290 class EntryRef { 291 /// The filename used to access this entry. 292 std::string Filename; 293 294 /// The underlying cached entry. 295 const CachedFileSystemEntry &Entry; 296 297 friend class DependencyScanningWorkerFilesystem; 298 299 public: EntryRef(StringRef Name,const CachedFileSystemEntry & Entry)300 EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) 301 : Filename(Name), Entry(Entry) {} 302 getStatus()303 llvm::vfs::Status getStatus() const { 304 llvm::vfs::Status Stat = Entry.getStatus(); 305 if (!Stat.isDirectory()) 306 Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); 307 return llvm::vfs::Status::copyWithNewName(Stat, Filename); 308 } 309 isError()310 bool isError() const { return Entry.isError(); } isDirectory()311 bool isDirectory() const { return Entry.isDirectory(); } 312 313 /// If the cached entry represents an error, promotes it into `ErrorOr`. unwrapError()314 llvm::ErrorOr<EntryRef> unwrapError() const { 315 if (isError()) 316 return Entry.getError(); 317 return *this; 318 } 319 getContents()320 StringRef getContents() const { return Entry.getOriginalContents(); } 321 322 std::optional<ArrayRef<dependency_directives_scan::Directive>> getDirectiveTokens()323 getDirectiveTokens() const { 324 return Entry.getDirectiveTokens(); 325 } 326 }; 327 328 /// A virtual file system optimized for the dependency discovery. 329 /// 330 /// It is primarily designed to work with source files whose contents was 331 /// preprocessed to remove any tokens that are unlikely to affect the dependency 332 /// computation. 333 /// 334 /// This is not a thread safe VFS. A single instance is meant to be used only in 335 /// one thread. Multiple instances are allowed to service multiple threads 336 /// running in parallel. 337 class DependencyScanningWorkerFilesystem 338 : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem, 339 llvm::vfs::ProxyFileSystem> { 340 public: 341 static const char ID; 342 343 DependencyScanningWorkerFilesystem( 344 DependencyScanningFilesystemSharedCache &SharedCache, 345 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); 346 347 llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; 348 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 349 openFileForRead(const Twine &Path) override; 350 351 std::error_code getRealPath(const Twine &Path, 352 SmallVectorImpl<char> &Output) override; 353 354 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 355 356 /// Returns entry for the given filename. 357 /// 358 /// Attempts to use the local and shared caches first, then falls back to 359 /// using the underlying filesystem. 360 llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename); 361 362 /// Ensure the directive tokens are populated for this file entry. 363 /// 364 /// Returns true if the directive tokens are populated for this file entry, 365 /// false if not (i.e. this entry is not a file or its scan fails). 366 bool ensureDirectiveTokensArePopulated(EntryRef Entry); 367 368 /// Check whether \p Path exists. By default checks cached result of \c 369 /// status(), and falls back on FS if unable to do so. 370 bool exists(const Twine &Path) override; 371 372 private: 373 /// For a filename that's not yet associated with any entry in the caches, 374 /// uses the underlying filesystem to either look up the entry based in the 375 /// shared cache indexed by unique ID, or creates new entry from scratch. 376 /// \p FilenameForLookup will always be an absolute path, and different than 377 /// \p OriginalFilename if \p OriginalFilename is relative. 378 llvm::ErrorOr<const CachedFileSystemEntry &> 379 computeAndStoreResult(StringRef OriginalFilename, 380 StringRef FilenameForLookup); 381 382 /// Represents a filesystem entry that has been stat-ed (and potentially read) 383 /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. 384 struct TentativeEntry { 385 llvm::vfs::Status Status; 386 std::unique_ptr<llvm::MemoryBuffer> Contents; 387 388 TentativeEntry(llvm::vfs::Status Status, 389 std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr) StatusTentativeEntry390 : Status(std::move(Status)), Contents(std::move(Contents)) {} 391 }; 392 393 /// Reads file at the given path. Enforces consistency between the file size 394 /// in status and size of read contents. 395 llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename); 396 397 /// Returns entry associated with the unique ID of the given tentative entry 398 /// if there is some in the shared cache. Otherwise, constructs new one, 399 /// associates it with the unique ID and returns the result. 400 const CachedFileSystemEntry & 401 getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); 402 403 /// Returns entry associated with the filename or nullptr if none is found. 404 /// 405 /// Returns entry from local cache if there is some. Otherwise, if the entry 406 /// is found in the shared cache, writes it through the local cache and 407 /// returns it. Otherwise returns nullptr. 408 const CachedFileSystemEntry * 409 findEntryByFilenameWithWriteThrough(StringRef Filename); 410 411 /// Returns entry associated with the unique ID in the shared cache or nullptr 412 /// if none is found. 413 const CachedFileSystemEntry * findSharedEntryByUID(llvm::vfs::Status Stat)414 findSharedEntryByUID(llvm::vfs::Status Stat) const { 415 return SharedCache.getShardForUID(Stat.getUniqueID()) 416 .findEntryByUID(Stat.getUniqueID()); 417 } 418 419 /// Associates the given entry with the filename in the local cache and 420 /// returns it. 421 const CachedFileSystemEntry & insertLocalEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)422 insertLocalEntryForFilename(StringRef Filename, 423 const CachedFileSystemEntry &Entry) { 424 return LocalCache.insertEntryForFilename(Filename, Entry); 425 } 426 427 /// Returns entry associated with the filename in the shared cache if there is 428 /// some. Otherwise, constructs new one with the given error code, associates 429 /// it with the filename and returns the result. 430 const CachedFileSystemEntry & getOrEmplaceSharedEntryForFilename(StringRef Filename,std::error_code EC)431 getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) { 432 return SharedCache.getShardForFilename(Filename) 433 .getOrEmplaceEntryForFilename(Filename, EC); 434 } 435 436 /// Returns entry associated with the filename in the shared cache if there is 437 /// some. Otherwise, associates the given entry with the filename and returns 438 /// it. 439 const CachedFileSystemEntry & getOrInsertSharedEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)440 getOrInsertSharedEntryForFilename(StringRef Filename, 441 const CachedFileSystemEntry &Entry) { 442 return SharedCache.getShardForFilename(Filename) 443 .getOrInsertEntryForFilename(Filename, Entry); 444 } 445 printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)446 void printImpl(raw_ostream &OS, PrintType Type, 447 unsigned IndentLevel) const override { 448 printIndent(OS, IndentLevel); 449 OS << "DependencyScanningFilesystem\n"; 450 getUnderlyingFS().print(OS, Type, IndentLevel + 1); 451 } 452 453 /// The global cache shared between worker threads. 454 DependencyScanningFilesystemSharedCache &SharedCache; 455 /// The local cache is used by the worker thread to cache file system queries 456 /// locally instead of querying the global cache every time. 457 DependencyScanningFilesystemLocalCache LocalCache; 458 459 /// The working directory to use for making relative paths absolute before 460 /// using them for cache lookups. 461 llvm::ErrorOr<std::string> WorkingDirForCacheLookup; 462 463 void updateWorkingDirForCacheLookup(); 464 465 llvm::ErrorOr<StringRef> 466 tryGetFilenameForLookup(StringRef OriginalFilename, 467 llvm::SmallVectorImpl<char> &PathBuf) const; 468 }; 469 470 } // end namespace dependencies 471 } // end namespace tooling 472 } // end namespace clang 473 474 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H 475