xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "llvm/Support/MemoryBuffer.h"
11 #include "llvm/Support/Threading.h"
12 #include <optional>
13 
14 using namespace clang;
15 using namespace tooling;
16 using namespace dependencies;
17 
18 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
19 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
20   // Load the file and its content from the file system.
21   auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
22   if (!MaybeFile)
23     return MaybeFile.getError();
24   auto File = std::move(*MaybeFile);
25 
26   auto MaybeStat = File->status();
27   if (!MaybeStat)
28     return MaybeStat.getError();
29   auto Stat = std::move(*MaybeStat);
30 
31   auto MaybeBuffer = File->getBuffer(Stat.getName());
32   if (!MaybeBuffer)
33     return MaybeBuffer.getError();
34   auto Buffer = std::move(*MaybeBuffer);
35 
36   // If the file size changed between read and stat, pretend it didn't.
37   if (Stat.getSize() != Buffer->getBufferSize())
38     Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
39 
40   return TentativeEntry(Stat, std::move(Buffer));
41 }
42 
43 bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
44     EntryRef Ref) {
45   auto &Entry = Ref.Entry;
46 
47   if (Entry.isError() || Entry.isDirectory())
48     return false;
49 
50   CachedFileContents *Contents = Entry.getCachedContents();
51   assert(Contents && "contents not initialized");
52 
53   // Double-checked locking.
54   if (Contents->DepDirectives.load())
55     return true;
56 
57   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58 
59   // Double-checked locking.
60   if (Contents->DepDirectives.load())
61     return true;
62 
63   SmallVector<dependency_directives_scan::Directive, 64> Directives;
64   // Scan the file for preprocessor directives that might affect the
65   // dependencies.
66   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67                                         Contents->DepDirectiveTokens,
68                                         Directives)) {
69     Contents->DepDirectiveTokens.clear();
70     // FIXME: Propagate the diagnostic if desired by the client.
71     Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72     return false;
73   }
74 
75   // This function performed double-checked locking using `DepDirectives`.
76   // Assigning it must be the last thing this function does, otherwise other
77   // threads may skip the critical section (`DepDirectives != nullptr`), leading
78   // to a data race.
79   Contents->DepDirectives.store(
80       new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81   return true;
82 }
83 
84 DependencyScanningFilesystemSharedCache::
85     DependencyScanningFilesystemSharedCache() {
86   // This heuristic was chosen using a empirical testing on a
87   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88   // sharding gives a performance edge by reducing the lock contention.
89   // FIXME: A better heuristic might also consider the OS to account for
90   // the different cost of lock contention on different OSes.
91   NumShards =
92       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93   CacheShards = std::make_unique<CacheShard[]>(NumShards);
94 }
95 
96 DependencyScanningFilesystemSharedCache::CacheShard &
97 DependencyScanningFilesystemSharedCache::getShardForFilename(
98     StringRef Filename) const {
99   assert(llvm::sys::path::is_absolute_gnu(Filename));
100   return CacheShards[llvm::hash_value(Filename) % NumShards];
101 }
102 
103 DependencyScanningFilesystemSharedCache::CacheShard &
104 DependencyScanningFilesystemSharedCache::getShardForUID(
105     llvm::sys::fs::UniqueID UID) const {
106   auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
107   return CacheShards[Hash % NumShards];
108 }
109 
110 std::vector<DependencyScanningFilesystemSharedCache::OutOfDateEntry>
111 DependencyScanningFilesystemSharedCache::getOutOfDateEntries(
112     llvm::vfs::FileSystem &UnderlyingFS) const {
113   // Iterate through all shards and look for cached stat errors.
114   std::vector<OutOfDateEntry> InvalidDiagInfo;
115   for (unsigned i = 0; i < NumShards; i++) {
116     const CacheShard &Shard = CacheShards[i];
117     std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
118     for (const auto &[Path, CachedPair] : Shard.CacheByFilename) {
119       const CachedFileSystemEntry *Entry = CachedPair.first;
120       llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
121       if (Status) {
122         if (Entry->getError()) {
123           // This is the case where we have cached the non-existence
124           // of the file at Path first, and a file at the path is created
125           // later. The cache entry is not invalidated (as we have no good
126           // way to do it now), which may lead to missing file build errors.
127           InvalidDiagInfo.emplace_back(Path.data());
128         } else {
129           llvm::vfs::Status CachedStatus = Entry->getStatus();
130           if (Status->getType() == llvm::sys::fs::file_type::regular_file &&
131               Status->getType() == CachedStatus.getType()) {
132             // We only check regular files. Directory files sizes could change
133             // due to content changes, and reporting directory size changes can
134             // lead to false positives.
135             // TODO: At the moment, we do not detect symlinks to files whose
136             // size may change. We need to decide if we want to detect cached
137             // symlink size changes. We can also expand this to detect file
138             // type changes.
139             uint64_t CachedSize = CachedStatus.getSize();
140             uint64_t ActualSize = Status->getSize();
141             if (CachedSize != ActualSize) {
142               // This is the case where the cached file has a different size
143               // from the actual file that comes from the underlying FS.
144               InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize);
145             }
146           }
147         }
148       }
149     }
150   }
151   return InvalidDiagInfo;
152 }
153 
154 const CachedFileSystemEntry *
155 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
156     StringRef Filename) const {
157   assert(llvm::sys::path::is_absolute_gnu(Filename));
158   std::lock_guard<std::mutex> LockGuard(CacheLock);
159   auto It = CacheByFilename.find(Filename);
160   return It == CacheByFilename.end() ? nullptr : It->getValue().first;
161 }
162 
163 const CachedFileSystemEntry *
164 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
165     llvm::sys::fs::UniqueID UID) const {
166   std::lock_guard<std::mutex> LockGuard(CacheLock);
167   auto It = EntriesByUID.find(UID);
168   return It == EntriesByUID.end() ? nullptr : It->getSecond();
169 }
170 
171 const CachedFileSystemEntry &
172 DependencyScanningFilesystemSharedCache::CacheShard::
173     getOrEmplaceEntryForFilename(StringRef Filename,
174                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
175   std::lock_guard<std::mutex> LockGuard(CacheLock);
176   auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
177   auto &[CachedEntry, CachedRealPath] = It->getValue();
178   if (!CachedEntry) {
179     // The entry is not present in the shared cache. Either the cache doesn't
180     // know about the file at all, or it only knows about its real path.
181     assert((Inserted || CachedRealPath) && "existing file with empty pair");
182     CachedEntry =
183         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
184   }
185   return *CachedEntry;
186 }
187 
188 const CachedFileSystemEntry &
189 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
190     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
191     std::unique_ptr<llvm::MemoryBuffer> Contents) {
192   std::lock_guard<std::mutex> LockGuard(CacheLock);
193   auto [It, Inserted] = EntriesByUID.try_emplace(UID);
194   auto &CachedEntry = It->getSecond();
195   if (Inserted) {
196     CachedFileContents *StoredContents = nullptr;
197     if (Contents)
198       StoredContents = new (ContentsStorage.Allocate())
199           CachedFileContents(std::move(Contents));
200     CachedEntry = new (EntryStorage.Allocate())
201         CachedFileSystemEntry(std::move(Stat), StoredContents);
202   }
203   return *CachedEntry;
204 }
205 
206 const CachedFileSystemEntry &
207 DependencyScanningFilesystemSharedCache::CacheShard::
208     getOrInsertEntryForFilename(StringRef Filename,
209                                 const CachedFileSystemEntry &Entry) {
210   std::lock_guard<std::mutex> LockGuard(CacheLock);
211   auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
212   auto &[CachedEntry, CachedRealPath] = It->getValue();
213   if (!Inserted || !CachedEntry)
214     CachedEntry = &Entry;
215   return *CachedEntry;
216 }
217 
218 const CachedRealPath *
219 DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
220     StringRef Filename) const {
221   assert(llvm::sys::path::is_absolute_gnu(Filename));
222   std::lock_guard<std::mutex> LockGuard(CacheLock);
223   auto It = CacheByFilename.find(Filename);
224   return It == CacheByFilename.end() ? nullptr : It->getValue().second;
225 }
226 
227 const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
228     getOrEmplaceRealPathForFilename(StringRef Filename,
229                                     llvm::ErrorOr<llvm::StringRef> RealPath) {
230   std::lock_guard<std::mutex> LockGuard(CacheLock);
231 
232   const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
233   if (!StoredRealPath) {
234     auto OwnedRealPath = [&]() -> CachedRealPath {
235       if (!RealPath)
236         return RealPath.getError();
237       return RealPath->str();
238     }();
239 
240     StoredRealPath = new (RealPathStorage.Allocate())
241         CachedRealPath(std::move(OwnedRealPath));
242   }
243 
244   return *StoredRealPath;
245 }
246 
247 bool DependencyScanningWorkerFilesystem::shouldBypass(StringRef Path) const {
248   return BypassedPathPrefix && Path.starts_with(*BypassedPathPrefix);
249 }
250 
251 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
252     DependencyScanningFilesystemSharedCache &SharedCache,
253     IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
254     : llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
255                         llvm::vfs::ProxyFileSystem>(std::move(FS)),
256       SharedCache(SharedCache),
257       WorkingDirForCacheLookup(llvm::errc::invalid_argument) {
258   updateWorkingDirForCacheLookup();
259 }
260 
261 const CachedFileSystemEntry &
262 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
263     TentativeEntry TEntry) {
264   auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
265   return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
266                                        std::move(TEntry.Status),
267                                        std::move(TEntry.Contents));
268 }
269 
270 const CachedFileSystemEntry *
271 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
272     StringRef Filename) {
273   if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
274     return Entry;
275   auto &Shard = SharedCache.getShardForFilename(Filename);
276   if (const auto *Entry = Shard.findEntryByFilename(Filename))
277     return &LocalCache.insertEntryForFilename(Filename, *Entry);
278   return nullptr;
279 }
280 
281 llvm::ErrorOr<const CachedFileSystemEntry &>
282 DependencyScanningWorkerFilesystem::computeAndStoreResult(
283     StringRef OriginalFilename, StringRef FilenameForLookup) {
284   llvm::ErrorOr<llvm::vfs::Status> Stat =
285       getUnderlyingFS().status(OriginalFilename);
286   if (!Stat) {
287     const auto &Entry =
288         getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError());
289     return insertLocalEntryForFilename(FilenameForLookup, Entry);
290   }
291 
292   if (const auto *Entry = findSharedEntryByUID(*Stat))
293     return insertLocalEntryForFilename(FilenameForLookup, *Entry);
294 
295   auto TEntry =
296       Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename);
297 
298   const CachedFileSystemEntry *SharedEntry = [&]() {
299     if (TEntry) {
300       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
301       return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry);
302     }
303     return &getOrEmplaceSharedEntryForFilename(FilenameForLookup,
304                                                TEntry.getError());
305   }();
306 
307   return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry);
308 }
309 
310 llvm::ErrorOr<EntryRef>
311 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
312     StringRef OriginalFilename) {
313   SmallString<256> PathBuf;
314   auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
315   if (!FilenameForLookup)
316     return FilenameForLookup.getError();
317 
318   if (const auto *Entry =
319           findEntryByFilenameWithWriteThrough(*FilenameForLookup))
320     return EntryRef(OriginalFilename, *Entry).unwrapError();
321   auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup);
322   if (!MaybeEntry)
323     return MaybeEntry.getError();
324   return EntryRef(OriginalFilename, *MaybeEntry).unwrapError();
325 }
326 
327 llvm::ErrorOr<llvm::vfs::Status>
328 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
329   SmallString<256> OwnedFilename;
330   StringRef Filename = Path.toStringRef(OwnedFilename);
331 
332   if (shouldBypass(Filename))
333     return getUnderlyingFS().status(Path);
334 
335   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
336   if (!Result)
337     return Result.getError();
338   return Result->getStatus();
339 }
340 
341 bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) {
342   // While some VFS overlay filesystems may implement more-efficient
343   // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem`
344   // typically wraps `RealFileSystem` which does not specialize `exists`,
345   // so it is not likely to benefit from such optimizations. Instead,
346   // it is more-valuable to have this query go through the
347   // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`.
348   llvm::ErrorOr<llvm::vfs::Status> Status = status(Path);
349   return Status && Status->exists();
350 }
351 
352 namespace {
353 
354 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
355 /// this subclass.
356 class DepScanFile final : public llvm::vfs::File {
357 public:
358   DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
359               llvm::vfs::Status Stat)
360       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
361 
362   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
363 
364   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
365 
366   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
367   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
368             bool IsVolatile) override {
369     return std::move(Buffer);
370   }
371 
372   std::error_code close() override { return {}; }
373 
374 private:
375   std::unique_ptr<llvm::MemoryBuffer> Buffer;
376   llvm::vfs::Status Stat;
377 };
378 
379 } // end anonymous namespace
380 
381 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
382 DepScanFile::create(EntryRef Entry) {
383   assert(!Entry.isError() && "error");
384 
385   if (Entry.isDirectory())
386     return std::make_error_code(std::errc::is_a_directory);
387 
388   auto Result = std::make_unique<DepScanFile>(
389       llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
390                                        Entry.getStatus().getName(),
391                                        /*RequiresNullTerminator=*/false),
392       Entry.getStatus());
393 
394   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
395       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
396 }
397 
398 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
399 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
400   SmallString<256> OwnedFilename;
401   StringRef Filename = Path.toStringRef(OwnedFilename);
402 
403   if (shouldBypass(Filename))
404     return getUnderlyingFS().openFileForRead(Path);
405 
406   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
407   if (!Result)
408     return Result.getError();
409   return DepScanFile::create(Result.get());
410 }
411 
412 std::error_code
413 DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
414                                                 SmallVectorImpl<char> &Output) {
415   SmallString<256> OwnedFilename;
416   StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
417 
418   if (shouldBypass(OriginalFilename))
419     return getUnderlyingFS().getRealPath(Path, Output);
420 
421   SmallString<256> PathBuf;
422   auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
423   if (!FilenameForLookup)
424     return FilenameForLookup.getError();
425 
426   auto HandleCachedRealPath =
427       [&Output](const CachedRealPath &RealPath) -> std::error_code {
428     if (!RealPath)
429       return RealPath.getError();
430     Output.assign(RealPath->begin(), RealPath->end());
431     return {};
432   };
433 
434   // If we already have the result in local cache, no work required.
435   if (const auto *RealPath =
436           LocalCache.findRealPathByFilename(*FilenameForLookup))
437     return HandleCachedRealPath(*RealPath);
438 
439   // If we have the result in the shared cache, cache it locally.
440   auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
441   if (const auto *ShardRealPath =
442           Shard.findRealPathByFilename(*FilenameForLookup)) {
443     const auto &RealPath = LocalCache.insertRealPathForFilename(
444         *FilenameForLookup, *ShardRealPath);
445     return HandleCachedRealPath(RealPath);
446   }
447 
448   // If we don't know the real path, compute it...
449   std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
450   llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
451   if (!EC)
452     ComputedRealPath = StringRef{Output.data(), Output.size()};
453 
454   // ...and try to write it into the shared cache. In case some other thread won
455   // this race and already wrote its own result there, just adopt it. Write
456   // whatever is in the shared cache into the local one.
457   const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
458       *FilenameForLookup, ComputedRealPath);
459   return HandleCachedRealPath(
460       LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
461 }
462 
463 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
464     const Twine &Path) {
465   std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
466   updateWorkingDirForCacheLookup();
467   return EC;
468 }
469 
470 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
471   llvm::ErrorOr<std::string> CWD =
472       getUnderlyingFS().getCurrentWorkingDirectory();
473   if (!CWD) {
474     WorkingDirForCacheLookup = CWD.getError();
475   } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) {
476     WorkingDirForCacheLookup = llvm::errc::invalid_argument;
477   } else {
478     WorkingDirForCacheLookup = *CWD;
479   }
480   assert(!WorkingDirForCacheLookup ||
481          llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
482 }
483 
484 llvm::ErrorOr<StringRef>
485 DependencyScanningWorkerFilesystem::tryGetFilenameForLookup(
486     StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const {
487   StringRef FilenameForLookup;
488   if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
489     FilenameForLookup = OriginalFilename;
490   } else if (!WorkingDirForCacheLookup) {
491     return WorkingDirForCacheLookup.getError();
492   } else {
493     StringRef RelFilename = OriginalFilename;
494     RelFilename.consume_front("./");
495     PathBuf.assign(WorkingDirForCacheLookup->begin(),
496                    WorkingDirForCacheLookup->end());
497     llvm::sys::path::append(PathBuf, RelFilename);
498     FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()};
499   }
500   assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
501   return FilenameForLookup;
502 }
503 
504 const char DependencyScanningWorkerFilesystem::ID = 0;
505