1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" 10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h" 11 #include "llvm/Support/MemoryBuffer.h" 12 #include "llvm/Support/SmallVectorMemoryBuffer.h" 13 #include "llvm/Support/Threading.h" 14 15 using namespace clang; 16 using namespace tooling; 17 using namespace dependencies; 18 19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry> 20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { 21 // Load the file and its content from the file system. 22 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); 23 if (!MaybeFile) 24 return MaybeFile.getError(); 25 auto File = std::move(*MaybeFile); 26 27 auto MaybeStat = File->status(); 28 if (!MaybeStat) 29 return MaybeStat.getError(); 30 auto Stat = std::move(*MaybeStat); 31 32 auto MaybeBuffer = File->getBuffer(Stat.getName()); 33 if (!MaybeBuffer) 34 return MaybeBuffer.getError(); 35 auto Buffer = std::move(*MaybeBuffer); 36 37 // If the file size changed between read and stat, pretend it didn't. 38 if (Stat.getSize() != Buffer->getBufferSize()) 39 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); 40 41 return TentativeEntry(Stat, std::move(Buffer)); 42 } 43 44 EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( 45 const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { 46 if (Entry.isError() || Entry.isDirectory() || Disable || 47 !shouldMinimize(Filename, Entry.getUniqueID())) 48 return EntryRef(/*Minimized=*/false, Filename, Entry); 49 50 CachedFileContents *Contents = Entry.getContents(); 51 assert(Contents && "contents not initialized"); 52 53 // Double-checked locking. 54 if (Contents->MinimizedAccess.load()) 55 return EntryRef(/*Minimized=*/true, Filename, Entry); 56 57 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock); 58 59 // Double-checked locking. 60 if (Contents->MinimizedAccess.load()) 61 return EntryRef(/*Minimized=*/true, Filename, Entry); 62 63 llvm::SmallString<1024> MinimizedFileContents; 64 // Minimize the file down to directives that might affect the dependencies. 65 SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens; 66 if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(), 67 MinimizedFileContents, Tokens)) { 68 // FIXME: Propagate the diagnostic if desired by the client. 69 // Use the original file if the minimization failed. 70 Contents->MinimizedStorage = 71 llvm::MemoryBuffer::getMemBuffer(*Contents->Original); 72 Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); 73 return EntryRef(/*Minimized=*/true, Filename, Entry); 74 } 75 76 // The contents produced by the minimizer must be null terminated. 77 assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' && 78 "not null terminated contents"); 79 80 // Compute the skipped PP ranges that speedup skipping over inactive 81 // preprocessor blocks. 82 llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32> 83 SkippedRanges; 84 minimize_source_to_dependency_directives::computeSkippedRanges(Tokens, 85 SkippedRanges); 86 PreprocessorSkippedRangeMapping Mapping; 87 for (const auto &Range : SkippedRanges) { 88 if (Range.Length < 16) { 89 // Ignore small ranges as non-profitable. 90 // FIXME: This is a heuristic, its worth investigating the tradeoffs 91 // when it should be applied. 92 continue; 93 } 94 Mapping[Range.Offset] = Range.Length; 95 } 96 Contents->PPSkippedRangeMapping = std::move(Mapping); 97 98 Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>( 99 std::move(MinimizedFileContents)); 100 // This function performed double-checked locking using `MinimizedAccess`. 101 // Assigning it must be the last thing this function does. If we were to 102 // assign it before `PPSkippedRangeMapping`, other threads may skip the 103 // critical section (`MinimizedAccess != nullptr`) and access the mappings 104 // that are about to be initialized, leading to a data race. 105 Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); 106 return EntryRef(/*Minimized=*/true, Filename, Entry); 107 } 108 109 DependencyScanningFilesystemSharedCache:: 110 DependencyScanningFilesystemSharedCache() { 111 // This heuristic was chosen using a empirical testing on a 112 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache 113 // sharding gives a performance edge by reducing the lock contention. 114 // FIXME: A better heuristic might also consider the OS to account for 115 // the different cost of lock contention on different OSes. 116 NumShards = 117 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); 118 CacheShards = std::make_unique<CacheShard[]>(NumShards); 119 } 120 121 DependencyScanningFilesystemSharedCache::CacheShard & 122 DependencyScanningFilesystemSharedCache::getShardForFilename( 123 StringRef Filename) const { 124 return CacheShards[llvm::hash_value(Filename) % NumShards]; 125 } 126 127 DependencyScanningFilesystemSharedCache::CacheShard & 128 DependencyScanningFilesystemSharedCache::getShardForUID( 129 llvm::sys::fs::UniqueID UID) const { 130 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); 131 return CacheShards[Hash % NumShards]; 132 } 133 134 const CachedFileSystemEntry * 135 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( 136 StringRef Filename) const { 137 std::lock_guard<std::mutex> LockGuard(CacheLock); 138 auto It = EntriesByFilename.find(Filename); 139 return It == EntriesByFilename.end() ? nullptr : It->getValue(); 140 } 141 142 const CachedFileSystemEntry * 143 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( 144 llvm::sys::fs::UniqueID UID) const { 145 std::lock_guard<std::mutex> LockGuard(CacheLock); 146 auto It = EntriesByUID.find(UID); 147 return It == EntriesByUID.end() ? nullptr : It->getSecond(); 148 } 149 150 const CachedFileSystemEntry & 151 DependencyScanningFilesystemSharedCache::CacheShard:: 152 getOrEmplaceEntryForFilename(StringRef Filename, 153 llvm::ErrorOr<llvm::vfs::Status> Stat) { 154 std::lock_guard<std::mutex> LockGuard(CacheLock); 155 auto Insertion = EntriesByFilename.insert({Filename, nullptr}); 156 if (Insertion.second) 157 Insertion.first->second = 158 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); 159 return *Insertion.first->second; 160 } 161 162 const CachedFileSystemEntry & 163 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( 164 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, 165 std::unique_ptr<llvm::MemoryBuffer> Contents) { 166 std::lock_guard<std::mutex> LockGuard(CacheLock); 167 auto Insertion = EntriesByUID.insert({UID, nullptr}); 168 if (Insertion.second) { 169 CachedFileContents *StoredContents = nullptr; 170 if (Contents) 171 StoredContents = new (ContentsStorage.Allocate()) 172 CachedFileContents(std::move(Contents)); 173 Insertion.first->second = new (EntryStorage.Allocate()) 174 CachedFileSystemEntry(std::move(Stat), StoredContents); 175 } 176 return *Insertion.first->second; 177 } 178 179 const CachedFileSystemEntry & 180 DependencyScanningFilesystemSharedCache::CacheShard:: 181 getOrInsertEntryForFilename(StringRef Filename, 182 const CachedFileSystemEntry &Entry) { 183 std::lock_guard<std::mutex> LockGuard(CacheLock); 184 return *EntriesByFilename.insert({Filename, &Entry}).first->getValue(); 185 } 186 187 /// Whitelist file extensions that should be minimized, treating no extension as 188 /// a source file that should be minimized. 189 /// 190 /// This is kinda hacky, it would be better if we knew what kind of file Clang 191 /// was expecting instead. 192 static bool shouldMinimizeBasedOnExtension(StringRef Filename) { 193 StringRef Ext = llvm::sys::path::extension(Filename); 194 if (Ext.empty()) 195 return true; // C++ standard library 196 return llvm::StringSwitch<bool>(Ext) 197 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) 198 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) 199 .CasesLower(".m", ".mm", true) 200 .CasesLower(".i", ".ii", ".mi", ".mmi", true) 201 .CasesLower(".def", ".inc", true) 202 .Default(false); 203 } 204 205 static bool shouldCacheStatFailures(StringRef Filename) { 206 StringRef Ext = llvm::sys::path::extension(Filename); 207 if (Ext.empty()) 208 return false; // This may be the module cache directory. 209 // Only cache stat failures on source files. 210 return shouldMinimizeBasedOnExtension(Filename); 211 } 212 213 void DependencyScanningWorkerFilesystem::disableMinimization( 214 StringRef Filename) { 215 // Since we're not done setting up `NotToBeMinimized` yet, we need to disable 216 // minimization explicitly. 217 if (llvm::ErrorOr<EntryRef> Result = 218 getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true)) 219 NotToBeMinimized.insert(Result->getStatus().getUniqueID()); 220 } 221 222 bool DependencyScanningWorkerFilesystem::shouldMinimize( 223 StringRef Filename, llvm::sys::fs::UniqueID UID) { 224 return shouldMinimizeBasedOnExtension(Filename) && 225 !NotToBeMinimized.contains(UID); 226 } 227 228 const CachedFileSystemEntry & 229 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( 230 TentativeEntry TEntry) { 231 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); 232 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), 233 std::move(TEntry.Status), 234 std::move(TEntry.Contents)); 235 } 236 237 const CachedFileSystemEntry * 238 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( 239 StringRef Filename) { 240 if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) 241 return Entry; 242 auto &Shard = SharedCache.getShardForFilename(Filename); 243 if (const auto *Entry = Shard.findEntryByFilename(Filename)) 244 return &LocalCache.insertEntryForFilename(Filename, *Entry); 245 return nullptr; 246 } 247 248 llvm::ErrorOr<const CachedFileSystemEntry &> 249 DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { 250 llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename); 251 if (!Stat) { 252 if (!shouldCacheStatFailures(Filename)) 253 return Stat.getError(); 254 const auto &Entry = 255 getOrEmplaceSharedEntryForFilename(Filename, Stat.getError()); 256 return insertLocalEntryForFilename(Filename, Entry); 257 } 258 259 if (const auto *Entry = findSharedEntryByUID(*Stat)) 260 return insertLocalEntryForFilename(Filename, *Entry); 261 262 auto TEntry = 263 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename); 264 265 const CachedFileSystemEntry *SharedEntry = [&]() { 266 if (TEntry) { 267 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); 268 return &getOrInsertSharedEntryForFilename(Filename, UIDEntry); 269 } 270 return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError()); 271 }(); 272 273 return insertLocalEntryForFilename(Filename, *SharedEntry); 274 } 275 276 llvm::ErrorOr<EntryRef> 277 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( 278 StringRef Filename, bool DisableMinimization) { 279 if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) 280 return minimizeIfNecessary(*Entry, Filename, DisableMinimization) 281 .unwrapError(); 282 auto MaybeEntry = computeAndStoreResult(Filename); 283 if (!MaybeEntry) 284 return MaybeEntry.getError(); 285 return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization) 286 .unwrapError(); 287 } 288 289 llvm::ErrorOr<llvm::vfs::Status> 290 DependencyScanningWorkerFilesystem::status(const Twine &Path) { 291 SmallString<256> OwnedFilename; 292 StringRef Filename = Path.toStringRef(OwnedFilename); 293 294 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 295 if (!Result) 296 return Result.getError(); 297 return Result->getStatus(); 298 } 299 300 namespace { 301 302 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using 303 /// this subclass. 304 class MinimizedVFSFile final : public llvm::vfs::File { 305 public: 306 MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer, 307 llvm::vfs::Status Stat) 308 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {} 309 310 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 311 create(EntryRef Entry, 312 ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings); 313 314 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; } 315 316 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 317 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator, 318 bool IsVolatile) override { 319 return std::move(Buffer); 320 } 321 322 std::error_code close() override { return {}; } 323 324 private: 325 std::unique_ptr<llvm::MemoryBuffer> Buffer; 326 llvm::vfs::Status Stat; 327 }; 328 329 } // end anonymous namespace 330 331 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create( 332 EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { 333 assert(!Entry.isError() && "error"); 334 335 if (Entry.isDirectory()) 336 return std::make_error_code(std::errc::is_a_directory); 337 338 auto Result = std::make_unique<MinimizedVFSFile>( 339 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), 340 Entry.getStatus().getName(), 341 /*RequiresNullTerminator=*/false), 342 Entry.getStatus()); 343 344 const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); 345 if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings) 346 (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings; 347 348 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>( 349 std::unique_ptr<llvm::vfs::File>(std::move(Result))); 350 } 351 352 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> 353 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { 354 SmallString<256> OwnedFilename; 355 StringRef Filename = Path.toStringRef(OwnedFilename); 356 357 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename); 358 if (!Result) 359 return Result.getError(); 360 return MinimizedVFSFile::create(Result.get(), PPSkipMappings); 361 } 362