1 //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11 
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Lex/DependencyDirectivesScanner.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/Allocator.h"
17 #include "llvm/Support/ErrorOr.h"
18 #include "llvm/Support/VirtualFileSystem.h"
19 #include <mutex>
20 #include <optional>
21 
22 namespace clang {
23 namespace tooling {
24 namespace dependencies {
25 
26 using DependencyDirectivesTy =
27     SmallVector<dependency_directives_scan::Directive, 20>;
28 
29 /// Contents and directive tokens of a cached file entry. Single instance can
30 /// be shared between multiple entries.
31 struct CachedFileContents {
CachedFileContentsCachedFileContents32   CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33       : Original(std::move(Contents)), DepDirectives(nullptr) {}
34 
35   /// Owning storage for the original contents.
36   std::unique_ptr<llvm::MemoryBuffer> Original;
37 
38   /// The mutex that must be locked before mutating directive tokens.
39   std::mutex ValueLock;
40   SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
41   /// Accessor to the directive tokens that's atomic to avoid data races.
42   /// \p CachedFileContents has ownership of the pointer.
43   std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44 
~CachedFileContentsCachedFileContents45   ~CachedFileContents() { delete DepDirectives.load(); }
46 };
47 
48 /// An in-memory representation of a file system entity that is of interest to
49 /// the dependency scanning filesystem.
50 ///
51 /// It represents one of the following:
52 /// - opened file with contents and a stat value,
53 /// - opened file with contents, directive tokens and a stat value,
54 /// - directory entry with its stat value,
55 /// - filesystem error.
56 ///
57 /// Single instance of this class can be shared across different filenames (e.g.
58 /// a regular file and a symlink). For this reason the status filename is empty
59 /// and is only materialized by \c EntryRef that knows the requested filename.
60 class CachedFileSystemEntry {
61 public:
62   /// Creates an entry without contents: either a filesystem error or
63   /// a directory with stat value.
CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)64   CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
65       : MaybeStat(std::move(Stat)), Contents(nullptr) {
66     clearStatName();
67   }
68 
69   /// Creates an entry representing a file with contents.
CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,CachedFileContents * Contents)70   CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
71                         CachedFileContents *Contents)
72       : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
73     clearStatName();
74   }
75 
76   /// \returns True if the entry is a filesystem error.
isError()77   bool isError() const { return !MaybeStat; }
78 
79   /// \returns True if the current entry represents a directory.
isDirectory()80   bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81 
82   /// \returns Original contents of the file.
getOriginalContents()83   StringRef getOriginalContents() const {
84     assert(!isError() && "error");
85     assert(!MaybeStat->isDirectory() && "not a file");
86     assert(Contents && "contents not initialized");
87     return Contents->Original->getBuffer();
88   }
89 
90   /// \returns The scanned preprocessor directive tokens of the file that are
91   /// used to speed up preprocessing, if available.
92   std::optional<ArrayRef<dependency_directives_scan::Directive>>
getDirectiveTokens()93   getDirectiveTokens() const {
94     assert(!isError() && "error");
95     assert(!isDirectory() && "not a file");
96     assert(Contents && "contents not initialized");
97     if (auto *Directives = Contents->DepDirectives.load()) {
98       if (Directives->has_value())
99         return ArrayRef<dependency_directives_scan::Directive>(**Directives);
100     }
101     return std::nullopt;
102   }
103 
104   /// \returns The error.
getError()105   std::error_code getError() const { return MaybeStat.getError(); }
106 
107   /// \returns The entry status with empty filename.
getStatus()108   llvm::vfs::Status getStatus() const {
109     assert(!isError() && "error");
110     assert(MaybeStat->getName().empty() && "stat name must be empty");
111     return *MaybeStat;
112   }
113 
114   /// \returns The unique ID of the entry.
getUniqueID()115   llvm::sys::fs::UniqueID getUniqueID() const {
116     assert(!isError() && "error");
117     return MaybeStat->getUniqueID();
118   }
119 
120   /// \returns The data structure holding both contents and directive tokens.
getCachedContents()121   CachedFileContents *getCachedContents() const {
122     assert(!isError() && "error");
123     assert(!isDirectory() && "not a file");
124     return Contents;
125   }
126 
127 private:
clearStatName()128   void clearStatName() {
129     if (MaybeStat)
130       MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
131   }
132 
133   /// Either the filesystem error or status of the entry.
134   /// The filename is empty and only materialized by \c EntryRef.
135   llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136 
137   /// Non-owning pointer to the file contents.
138   ///
139   /// We're using pointer here to keep the size of this class small. Instances
140   /// representing directories and filesystem errors don't hold any contents
141   /// anyway.
142   CachedFileContents *Contents;
143 };
144 
145 using CachedRealPath = llvm::ErrorOr<std::string>;
146 
147 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
148 /// underlying real file system, and the scanned preprocessor directives of
149 /// files.
150 ///
151 /// It is sharded based on the hash of the key to reduce the lock contention for
152 /// the worker threads.
153 class DependencyScanningFilesystemSharedCache {
154 public:
155   struct CacheShard {
156     /// The mutex that needs to be locked before mutation of any member.
157     mutable std::mutex CacheLock;
158 
159     /// Map from filenames to cached entries and real paths.
160     llvm::StringMap<
161         std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
162         llvm::BumpPtrAllocator>
163         CacheByFilename;
164 
165     /// Map from unique IDs to cached entries.
166     llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
167         EntriesByUID;
168 
169     /// The backing storage for cached entries.
170     llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
171 
172     /// The backing storage for cached contents.
173     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
174 
175     /// The backing storage for cached real paths.
176     llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
177 
178     /// Returns entry associated with the filename or nullptr if none is found.
179     const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
180 
181     /// Returns entry associated with the unique ID or nullptr if none is found.
182     const CachedFileSystemEntry *
183     findEntryByUID(llvm::sys::fs::UniqueID UID) const;
184 
185     /// Returns entry associated with the filename if there is some. Otherwise,
186     /// constructs new one with the given status, associates it with the
187     /// filename and returns the result.
188     const CachedFileSystemEntry &
189     getOrEmplaceEntryForFilename(StringRef Filename,
190                                  llvm::ErrorOr<llvm::vfs::Status> Stat);
191 
192     /// Returns entry associated with the unique ID if there is some. Otherwise,
193     /// constructs new one with the given status and contents, associates it
194     /// with the unique ID and returns the result.
195     const CachedFileSystemEntry &
196     getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
197                             std::unique_ptr<llvm::MemoryBuffer> Contents);
198 
199     /// Returns entry associated with the filename if there is some. Otherwise,
200     /// associates the given entry with the filename and returns it.
201     const CachedFileSystemEntry &
202     getOrInsertEntryForFilename(StringRef Filename,
203                                 const CachedFileSystemEntry &Entry);
204 
205     /// Returns the real path associated with the filename or nullptr if none is
206     /// found.
207     const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
208 
209     /// Returns the real path associated with the filename if there is some.
210     /// Otherwise, constructs new one with the given one, associates it with the
211     /// filename and returns the result.
212     const CachedRealPath &
213     getOrEmplaceRealPathForFilename(StringRef Filename,
214                                     llvm::ErrorOr<StringRef> RealPath);
215   };
216 
217   DependencyScanningFilesystemSharedCache();
218 
219   /// Returns shard for the given key.
220   CacheShard &getShardForFilename(StringRef Filename) const;
221   CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
222 
223 private:
224   std::unique_ptr<CacheShard[]> CacheShards;
225   unsigned NumShards;
226 };
227 
228 /// This class is a local cache, that caches the 'stat' and 'open' calls to the
229 /// underlying real file system.
230 class DependencyScanningFilesystemLocalCache {
231   llvm::StringMap<
232       std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
233       llvm::BumpPtrAllocator>
234       Cache;
235 
236 public:
237   /// Returns entry associated with the filename or nullptr if none is found.
findEntryByFilename(StringRef Filename)238   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
239     assert(llvm::sys::path::is_absolute_gnu(Filename));
240     auto It = Cache.find(Filename);
241     return It == Cache.end() ? nullptr : It->getValue().first;
242   }
243 
244   /// Associates the given entry with the filename and returns the given entry
245   /// pointer (for convenience).
246   const CachedFileSystemEntry &
insertEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)247   insertEntryForFilename(StringRef Filename,
248                          const CachedFileSystemEntry &Entry) {
249     assert(llvm::sys::path::is_absolute_gnu(Filename));
250     auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
251     auto &[CachedEntry, CachedRealPath] = It->getValue();
252     if (!Inserted) {
253       // The file is already present in the local cache. If we got here, it only
254       // contains the real path. Let's make sure the entry is populated too.
255       assert((!CachedEntry && CachedRealPath) && "entry already present");
256       CachedEntry = &Entry;
257     }
258     return *CachedEntry;
259   }
260 
261   /// Returns real path associated with the filename or nullptr if none is
262   /// found.
findRealPathByFilename(StringRef Filename)263   const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
264     assert(llvm::sys::path::is_absolute_gnu(Filename));
265     auto It = Cache.find(Filename);
266     return It == Cache.end() ? nullptr : It->getValue().second;
267   }
268 
269   /// Associates the given real path with the filename and returns the given
270   /// entry pointer (for convenience).
271   const CachedRealPath &
insertRealPathForFilename(StringRef Filename,const CachedRealPath & RealPath)272   insertRealPathForFilename(StringRef Filename,
273                             const CachedRealPath &RealPath) {
274     assert(llvm::sys::path::is_absolute_gnu(Filename));
275     auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
276     auto &[CachedEntry, CachedRealPath] = It->getValue();
277     if (!Inserted) {
278       // The file is already present in the local cache. If we got here, it only
279       // contains the entry. Let's make sure the real path is populated too.
280       assert((!CachedRealPath && CachedEntry) && "real path already present");
281       CachedRealPath = &RealPath;
282     }
283     return *CachedRealPath;
284   }
285 };
286 
287 /// Reference to a CachedFileSystemEntry.
288 /// If the underlying entry is an opened file, this wrapper returns the file
289 /// contents and the scanned preprocessor directives.
290 class EntryRef {
291   /// The filename used to access this entry.
292   std::string Filename;
293 
294   /// The underlying cached entry.
295   const CachedFileSystemEntry &Entry;
296 
297   friend class DependencyScanningWorkerFilesystem;
298 
299 public:
EntryRef(StringRef Name,const CachedFileSystemEntry & Entry)300   EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
301       : Filename(Name), Entry(Entry) {}
302 
getStatus()303   llvm::vfs::Status getStatus() const {
304     llvm::vfs::Status Stat = Entry.getStatus();
305     if (!Stat.isDirectory())
306       Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
307     return llvm::vfs::Status::copyWithNewName(Stat, Filename);
308   }
309 
isError()310   bool isError() const { return Entry.isError(); }
isDirectory()311   bool isDirectory() const { return Entry.isDirectory(); }
312 
313   /// If the cached entry represents an error, promotes it into `ErrorOr`.
unwrapError()314   llvm::ErrorOr<EntryRef> unwrapError() const {
315     if (isError())
316       return Entry.getError();
317     return *this;
318   }
319 
getContents()320   StringRef getContents() const { return Entry.getOriginalContents(); }
321 
322   std::optional<ArrayRef<dependency_directives_scan::Directive>>
getDirectiveTokens()323   getDirectiveTokens() const {
324     return Entry.getDirectiveTokens();
325   }
326 };
327 
328 /// A virtual file system optimized for the dependency discovery.
329 ///
330 /// It is primarily designed to work with source files whose contents was
331 /// preprocessed to remove any tokens that are unlikely to affect the dependency
332 /// computation.
333 ///
334 /// This is not a thread safe VFS. A single instance is meant to be used only in
335 /// one thread. Multiple instances are allowed to service multiple threads
336 /// running in parallel.
337 class DependencyScanningWorkerFilesystem
338     : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
339                                llvm::vfs::ProxyFileSystem> {
340 public:
341   static const char ID;
342 
343   DependencyScanningWorkerFilesystem(
344       DependencyScanningFilesystemSharedCache &SharedCache,
345       IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
346 
347   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
348   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
349   openFileForRead(const Twine &Path) override;
350 
351   std::error_code getRealPath(const Twine &Path,
352                               SmallVectorImpl<char> &Output) override;
353 
354   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
355 
356   /// Returns entry for the given filename.
357   ///
358   /// Attempts to use the local and shared caches first, then falls back to
359   /// using the underlying filesystem.
360   llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
361 
362   /// Ensure the directive tokens are populated for this file entry.
363   ///
364   /// Returns true if the directive tokens are populated for this file entry,
365   /// false if not (i.e. this entry is not a file or its scan fails).
366   bool ensureDirectiveTokensArePopulated(EntryRef Entry);
367 
368   /// Check whether \p Path exists. By default checks cached result of \c
369   /// status(), and falls back on FS if unable to do so.
370   bool exists(const Twine &Path) override;
371 
372 private:
373   /// For a filename that's not yet associated with any entry in the caches,
374   /// uses the underlying filesystem to either look up the entry based in the
375   /// shared cache indexed by unique ID, or creates new entry from scratch.
376   /// \p FilenameForLookup will always be an absolute path, and different than
377   /// \p OriginalFilename if \p OriginalFilename is relative.
378   llvm::ErrorOr<const CachedFileSystemEntry &>
379   computeAndStoreResult(StringRef OriginalFilename,
380                         StringRef FilenameForLookup);
381 
382   /// Represents a filesystem entry that has been stat-ed (and potentially read)
383   /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
384   struct TentativeEntry {
385     llvm::vfs::Status Status;
386     std::unique_ptr<llvm::MemoryBuffer> Contents;
387 
388     TentativeEntry(llvm::vfs::Status Status,
389                    std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
StatusTentativeEntry390         : Status(std::move(Status)), Contents(std::move(Contents)) {}
391   };
392 
393   /// Reads file at the given path. Enforces consistency between the file size
394   /// in status and size of read contents.
395   llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
396 
397   /// Returns entry associated with the unique ID of the given tentative entry
398   /// if there is some in the shared cache. Otherwise, constructs new one,
399   /// associates it with the unique ID and returns the result.
400   const CachedFileSystemEntry &
401   getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
402 
403   /// Returns entry associated with the filename or nullptr if none is found.
404   ///
405   /// Returns entry from local cache if there is some. Otherwise, if the entry
406   /// is found in the shared cache, writes it through the local cache and
407   /// returns it. Otherwise returns nullptr.
408   const CachedFileSystemEntry *
409   findEntryByFilenameWithWriteThrough(StringRef Filename);
410 
411   /// Returns entry associated with the unique ID in the shared cache or nullptr
412   /// if none is found.
413   const CachedFileSystemEntry *
findSharedEntryByUID(llvm::vfs::Status Stat)414   findSharedEntryByUID(llvm::vfs::Status Stat) const {
415     return SharedCache.getShardForUID(Stat.getUniqueID())
416         .findEntryByUID(Stat.getUniqueID());
417   }
418 
419   /// Associates the given entry with the filename in the local cache and
420   /// returns it.
421   const CachedFileSystemEntry &
insertLocalEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)422   insertLocalEntryForFilename(StringRef Filename,
423                               const CachedFileSystemEntry &Entry) {
424     return LocalCache.insertEntryForFilename(Filename, Entry);
425   }
426 
427   /// Returns entry associated with the filename in the shared cache if there is
428   /// some. Otherwise, constructs new one with the given error code, associates
429   /// it with the filename and returns the result.
430   const CachedFileSystemEntry &
getOrEmplaceSharedEntryForFilename(StringRef Filename,std::error_code EC)431   getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
432     return SharedCache.getShardForFilename(Filename)
433         .getOrEmplaceEntryForFilename(Filename, EC);
434   }
435 
436   /// Returns entry associated with the filename in the shared cache if there is
437   /// some. Otherwise, associates the given entry with the filename and returns
438   /// it.
439   const CachedFileSystemEntry &
getOrInsertSharedEntryForFilename(StringRef Filename,const CachedFileSystemEntry & Entry)440   getOrInsertSharedEntryForFilename(StringRef Filename,
441                                     const CachedFileSystemEntry &Entry) {
442     return SharedCache.getShardForFilename(Filename)
443         .getOrInsertEntryForFilename(Filename, Entry);
444   }
445 
printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)446   void printImpl(raw_ostream &OS, PrintType Type,
447                  unsigned IndentLevel) const override {
448     printIndent(OS, IndentLevel);
449     OS << "DependencyScanningFilesystem\n";
450     getUnderlyingFS().print(OS, Type, IndentLevel + 1);
451   }
452 
453   /// The global cache shared between worker threads.
454   DependencyScanningFilesystemSharedCache &SharedCache;
455   /// The local cache is used by the worker thread to cache file system queries
456   /// locally instead of querying the global cache every time.
457   DependencyScanningFilesystemLocalCache LocalCache;
458 
459   /// The working directory to use for making relative paths absolute before
460   /// using them for cache lookups.
461   llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
462 
463   void updateWorkingDirForCacheLookup();
464 
465   llvm::ErrorOr<StringRef>
466   tryGetFilenameForLookup(StringRef OriginalFilename,
467                           llvm::SmallVectorImpl<char> &PathBuf) const;
468 };
469 
470 } // end namespace dependencies
471 } // end namespace tooling
472 } // end namespace clang
473 
474 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
475