1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "llvm/Support/MemoryBuffer.h" 11 #include "llvm/Support/SmallVectorMemoryBuffer.h" 12 #include "llvm/Support/Threading.h" 13 #include <optional> 14 15 using namespace clang; 16 using namespace tooling; 17 using namespace dependencies; 18 19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 21 // Load the file and its content from the file system. 22 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 23 if (!MaybeFile) 24 return MaybeFile.getError(); 25 auto File = std::move(*MaybeFile); 26 27 auto MaybeStat = File->status(); 28 if (!MaybeStat) 29 return MaybeStat.getError(); 30 auto Stat = std::move(*MaybeStat); 31 32 auto MaybeBuffer = File->getBuffer(Stat.getName()); 33 if (!MaybeBuffer) 34 return MaybeBuffer.getError(); 35 auto Buffer = std::move(*MaybeBuffer); 36 37 // If the file size changed between read and stat, pretend it didn't. 38 if (Stat.getSize() != Buffer->getBufferSize()) 39 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 40 41 return TentativeEntry(Stat, std::move(Buffer)); 42 } 43 44 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( 45 const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { 46 if (Entry.isError() || Entry.isDirectory() || Disable || 47 !shouldScanForDirectives(Filename)) 48 return EntryRef(Filename, Entry); 49 50 CachedFileContents *Contents = Entry.getCachedContents(); 51 assert(Contents && "contents not initialized"); 52 53 // Double-checked locking. 54 if (Contents->DepDirectives.load()) 55 return EntryRef(Filename, Entry); 56 57 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 58 59 // Double-checked locking. 60 if (Contents->DepDirectives.load()) 61 return EntryRef(Filename, Entry); 62 63 SmallVector<dependency_directives_scan::Directive, 64> Directives; 64 // Scan the file for preprocessor directives that might affect the 65 // dependencies. 66 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(), 67 Contents->DepDirectiveTokens, 68 Directives)) { 69 Contents->DepDirectiveTokens.clear(); 70 // FIXME: Propagate the diagnostic if desired by the client. 71 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>()); 72 return EntryRef(Filename, Entry); 73 } 74 75 // This function performed double-checked locking using `DepDirectives`. 76 // Assigning it must be the last thing this function does, otherwise other 77 // threads may skip the 78 // critical section (`DepDirectives != nullptr`), leading to a data race. 79 Contents->DepDirectives.store( 80 new std::optional<DependencyDirectivesTy>(std::move(Directives))); 81 return EntryRef(Filename, Entry); 82 } 83 84 DependencyScanningFilesystemSharedCache:: 85 DependencyScanningFilesystemSharedCache() { 86 // This heuristic was chosen using a empirical testing on a 87 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 88 // sharding gives a performance edge by reducing the lock contention. 89 // FIXME: A better heuristic might also consider the OS to account for 90 // the different cost of lock contention on different OSes. 91 NumShards = 92 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 93 CacheShards = std::make_unique<CacheShard[]>(NumShards); 94 } 95 96 DependencyScanningFilesystemSharedCache::CacheShard & 97 DependencyScanningFilesystemSharedCache::getShardForFilename( 98 StringRef Filename) const { 99 assert(llvm::sys::path::is_absolute_gnu(Filename)); 100 return CacheShards[llvm::hash_value(Filename) % NumShards]; 101 } 102 103 DependencyScanningFilesystemSharedCache::CacheShard & 104 DependencyScanningFilesystemSharedCache::getShardForUID( 105 llvm::sys::fs::UniqueID UID) const { 106 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 107 return CacheShards[Hash % NumShards]; 108 } 109 110 const CachedFileSystemEntry * 111 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 112 StringRef Filename) const { 113 assert(llvm::sys::path::is_absolute_gnu(Filename)); 114 std::lock_guard<std::mutex> LockGuard(CacheLock); 115 auto It = EntriesByFilename.find(Filename); 116 return It == EntriesByFilename.end() ? nullptr : It->getValue(); 117 } 118 119 const CachedFileSystemEntry * 120 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 121 llvm::sys::fs::UniqueID UID) const { 122 std::lock_guard<std::mutex> LockGuard(CacheLock); 123 auto It = EntriesByUID.find(UID); 124 return It == EntriesByUID.end() ? nullptr : It->getSecond(); 125 } 126 127 const CachedFileSystemEntry & 128 DependencyScanningFilesystemSharedCache::CacheShard:: 129 getOrEmplaceEntryForFilename(StringRef Filename, 130 llvm::ErrorOr<llvm::vfs::Status> Stat) { 131 std::lock_guard<std::mutex> LockGuard(CacheLock); 132 auto Insertion = EntriesByFilename.insert({Filename, nullptr}); 133 if (Insertion.second) 134 Insertion.first->second = 135 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 136 return *Insertion.first->second; 137 } 138 139 const CachedFileSystemEntry & 140 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 141 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 142 std::unique_ptr<llvm::MemoryBuffer> Contents) { 143 std::lock_guard<std::mutex> LockGuard(CacheLock); 144 auto Insertion = EntriesByUID.insert({UID, nullptr}); 145 if (Insertion.second) { 146 CachedFileContents *StoredContents = nullptr; 147 if (Contents) 148 StoredContents = new (ContentsStorage.Allocate()) 149 CachedFileContents(std::move(Contents)); 150 Insertion.first->second = new (EntryStorage.Allocate()) 151 CachedFileSystemEntry(std::move(Stat), StoredContents); 152 } 153 return *Insertion.first->second; 154 } 155 156 const CachedFileSystemEntry & 157 DependencyScanningFilesystemSharedCache::CacheShard:: 158 getOrInsertEntryForFilename(StringRef Filename, 159 const CachedFileSystemEntry &Entry) { 160 std::lock_guard<std::mutex> LockGuard(CacheLock); 161 return *EntriesByFilename.insert({Filename, &Entry}).first->getValue(); 162 } 163 164 /// Whitelist file extensions that should be minimized, treating no extension as 165 /// a source file that should be minimized. 166 /// 167 /// This is kinda hacky, it would be better if we knew what kind of file Clang 168 /// was expecting instead. 169 static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) { 170 StringRef Ext = llvm::sys::path::extension(Filename); 171 if (Ext.empty()) 172 return true; // C++ standard library 173 return llvm::StringSwitch<bool>(Ext) 174 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 175 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 176 .CasesLower(".m", ".mm", true) 177 .CasesLower(".i", ".ii", ".mi", ".mmi", true) 178 .CasesLower(".def", ".inc", true) 179 .Default(false); 180 } 181 182 static bool shouldCacheStatFailures(StringRef Filename) { 183 StringRef Ext = llvm::sys::path::extension(Filename); 184 if (Ext.empty()) 185 return false; // This may be the module cache directory. 186 // Only cache stat failures on files that are not expected to change during 187 // the build. 188 StringRef FName = llvm::sys::path::filename(Filename); 189 if (FName == "module.modulemap" || FName == "module.map") 190 return true; 191 return shouldScanForDirectivesBasedOnExtension(Filename); 192 } 193 194 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( 195 DependencyScanningFilesystemSharedCache &SharedCache, 196 IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) 197 : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), 198 WorkingDirForCacheLookup(llvm::errc::invalid_argument) { 199 updateWorkingDirForCacheLookup(); 200 } 201 202 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( 203 StringRef Filename) { 204 return shouldScanForDirectivesBasedOnExtension(Filename); 205 } 206 207 const CachedFileSystemEntry & 208 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 209 TentativeEntry TEntry) { 210 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 211 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 212 std::move(TEntry.Status), 213 std::move(TEntry.Contents)); 214 } 215 216 const CachedFileSystemEntry * 217 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 218 StringRef Filename) { 219 if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 220 return Entry; 221 auto &Shard = SharedCache.getShardForFilename(Filename); 222 if (const auto *Entry = Shard.findEntryByFilename(Filename)) 223 return &LocalCache.insertEntryForFilename(Filename, *Entry); 224 return nullptr; 225 } 226 227 llvm::ErrorOr<const CachedFileSystemEntry &> 228 DependencyScanningWorkerFilesystem::computeAndStoreResult( 229 StringRef OriginalFilename, StringRef FilenameForLookup) { 230 llvm::ErrorOr<llvm::vfs::Status> Stat = 231 getUnderlyingFS().status(OriginalFilename); 232 if (!Stat) { 233 if (!shouldCacheStatFailures(OriginalFilename)) 234 return Stat.getError(); 235 const auto &Entry = 236 getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); 237 return insertLocalEntryForFilename(FilenameForLookup, Entry); 238 } 239 240 if (const auto *Entry = findSharedEntryByUID(*Stat)) 241 return insertLocalEntryForFilename(FilenameForLookup, *Entry); 242 243 auto TEntry = 244 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); 245 246 const CachedFileSystemEntry *SharedEntry = [&]() { 247 if (TEntry) { 248 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 249 return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); 250 } 251 return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, 252 TEntry.getError()); 253 }(); 254 255 return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); 256 } 257 258 llvm::ErrorOr<EntryRef> 259 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 260 StringRef OriginalFilename, bool DisableDirectivesScanning) { 261 StringRef FilenameForLookup; 262 SmallString<256> PathBuf; 263 if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) { 264 FilenameForLookup = OriginalFilename; 265 } else if (!WorkingDirForCacheLookup) { 266 return WorkingDirForCacheLookup.getError(); 267 } else { 268 StringRef RelFilename = OriginalFilename; 269 RelFilename.consume_front("./"); 270 PathBuf = *WorkingDirForCacheLookup; 271 llvm::sys::path::append(PathBuf, RelFilename); 272 FilenameForLookup = PathBuf.str(); 273 } 274 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); 275 if (const auto *Entry = 276 findEntryByFilenameWithWriteThrough(FilenameForLookup)) 277 return scanForDirectivesIfNecessary(*Entry, OriginalFilename, 278 DisableDirectivesScanning) 279 .unwrapError(); 280 auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup); 281 if (!MaybeEntry) 282 return MaybeEntry.getError(); 283 return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename, 284 DisableDirectivesScanning) 285 .unwrapError(); 286 } 287 288 llvm::ErrorOr<llvm::vfs::Status> 289 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 290 SmallString<256> OwnedFilename; 291 StringRef Filename = Path.toStringRef(OwnedFilename); 292 293 if (Filename.endswith(".pcm")) 294 return getUnderlyingFS().status(Path); 295 296 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 297 if (!Result) 298 return Result.getError(); 299 return Result->getStatus(); 300 } 301 302 namespace { 303 304 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 305 /// this subclass. 306 class DepScanFile final : public llvm::vfs::File { 307 public: 308 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 309 llvm::vfs::Status Stat) 310 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 311 312 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry); 313 314 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 315 316 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 317 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 318 bool IsVolatile) override { 319 return std::move(Buffer); 320 } 321 322 std::error_code close() override { return {}; } 323 324 private: 325 std::unique_ptr<llvm::MemoryBuffer> Buffer; 326 llvm::vfs::Status Stat; 327 }; 328 329 } // end anonymous namespace 330 331 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 332 DepScanFile::create(EntryRef Entry) { 333 assert(!Entry.isError() && "error"); 334 335 if (Entry.isDirectory()) 336 return std::make_error_code(std::errc::is_a_directory); 337 338 auto Result = std::make_unique<DepScanFile>( 339 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 340 Entry.getStatus().getName(), 341 /*RequiresNullTerminator=*/false), 342 Entry.getStatus()); 343 344 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 345 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 346 } 347 348 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 349 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 350 SmallString<256> OwnedFilename; 351 StringRef Filename = Path.toStringRef(OwnedFilename); 352 353 if (Filename.endswith(".pcm")) 354 return getUnderlyingFS().openFileForRead(Path); 355 356 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 357 if (!Result) 358 return Result.getError(); 359 return DepScanFile::create(Result.get()); 360 } 361 362 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( 363 const Twine &Path) { 364 std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); 365 updateWorkingDirForCacheLookup(); 366 return EC; 367 } 368 369 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { 370 llvm::ErrorOr<std::string> CWD = 371 getUnderlyingFS().getCurrentWorkingDirectory(); 372 if (!CWD) { 373 WorkingDirForCacheLookup = CWD.getError(); 374 } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) { 375 WorkingDirForCacheLookup = llvm::errc::invalid_argument; 376 } else { 377 WorkingDirForCacheLookup = *CWD; 378 } 379 assert(!WorkingDirForCacheLookup || 380 llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); 381 } 382