10b57cec5SDimitry Andric //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the pruning of a directory based on least recently used. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Support/CachePruning.h" 145ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 150b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 160b57cec5SDimitry Andric #include "llvm/Support/Errc.h" 170b57cec5SDimitry Andric #include "llvm/Support/Error.h" 180b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h" 190b57cec5SDimitry Andric #include "llvm/Support/Path.h" 20bdd1243dSDimitry Andric #include "llvm/Support/WithColor.h" 210b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #define DEBUG_TYPE "cache-pruning" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric #include <set> 260b57cec5SDimitry Andric #include <system_error> 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric using namespace llvm; 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric namespace { 310b57cec5SDimitry Andric struct FileInfo { 320b57cec5SDimitry Andric sys::TimePoint<> Time; 330b57cec5SDimitry Andric uint64_t Size; 340b57cec5SDimitry Andric std::string Path; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric /// Used to determine which files to prune first. Also used to determine 370b57cec5SDimitry Andric /// set membership, so must take into account all fields. 380b57cec5SDimitry Andric bool operator<(const FileInfo &Other) const { 390b57cec5SDimitry Andric return std::tie(Time, Other.Size, Path) < 400b57cec5SDimitry Andric std::tie(Other.Time, Size, Other.Path); 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric }; 430b57cec5SDimitry Andric } // anonymous namespace 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric /// Write a new timestamp file with the given path. This is used for the pruning 460b57cec5SDimitry Andric /// interval option. 470b57cec5SDimitry Andric static void writeTimestampFile(StringRef TimestampFile) { 480b57cec5SDimitry Andric std::error_code EC; 498bcb0991SDimitry Andric raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None); 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric static Expected<std::chrono::seconds> parseDuration(StringRef Duration) { 530b57cec5SDimitry Andric if (Duration.empty()) 540b57cec5SDimitry Andric return make_error<StringError>("Duration must not be empty", 550b57cec5SDimitry Andric inconvertibleErrorCode()); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric StringRef NumStr = Duration.slice(0, Duration.size()-1); 580b57cec5SDimitry Andric uint64_t Num; 590b57cec5SDimitry Andric if (NumStr.getAsInteger(0, Num)) 600b57cec5SDimitry Andric return make_error<StringError>("'" + NumStr + "' not an integer", 610b57cec5SDimitry Andric inconvertibleErrorCode()); 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric switch (Duration.back()) { 640b57cec5SDimitry Andric case 's': 650b57cec5SDimitry Andric return std::chrono::seconds(Num); 660b57cec5SDimitry Andric case 'm': 670b57cec5SDimitry Andric return std::chrono::minutes(Num); 680b57cec5SDimitry Andric case 'h': 690b57cec5SDimitry Andric return std::chrono::hours(Num); 700b57cec5SDimitry Andric default: 710b57cec5SDimitry Andric return make_error<StringError>("'" + Duration + 720b57cec5SDimitry Andric "' must end with one of 's', 'm' or 'h'", 730b57cec5SDimitry Andric inconvertibleErrorCode()); 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric Expected<CachePruningPolicy> 780b57cec5SDimitry Andric llvm::parseCachePruningPolicy(StringRef PolicyStr) { 790b57cec5SDimitry Andric CachePruningPolicy Policy; 800b57cec5SDimitry Andric std::pair<StringRef, StringRef> P = {"", PolicyStr}; 810b57cec5SDimitry Andric while (!P.second.empty()) { 820b57cec5SDimitry Andric P = P.second.split(':'); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric StringRef Key, Value; 850b57cec5SDimitry Andric std::tie(Key, Value) = P.first.split('='); 860b57cec5SDimitry Andric if (Key == "prune_interval") { 870b57cec5SDimitry Andric auto DurationOrErr = parseDuration(Value); 880b57cec5SDimitry Andric if (!DurationOrErr) 890b57cec5SDimitry Andric return DurationOrErr.takeError(); 900b57cec5SDimitry Andric Policy.Interval = *DurationOrErr; 910b57cec5SDimitry Andric } else if (Key == "prune_after") { 920b57cec5SDimitry Andric auto DurationOrErr = parseDuration(Value); 930b57cec5SDimitry Andric if (!DurationOrErr) 940b57cec5SDimitry Andric return DurationOrErr.takeError(); 950b57cec5SDimitry Andric Policy.Expiration = *DurationOrErr; 960b57cec5SDimitry Andric } else if (Key == "cache_size") { 970b57cec5SDimitry Andric if (Value.back() != '%') 980b57cec5SDimitry Andric return make_error<StringError>("'" + Value + "' must be a percentage", 990b57cec5SDimitry Andric inconvertibleErrorCode()); 1000b57cec5SDimitry Andric StringRef SizeStr = Value.drop_back(); 1010b57cec5SDimitry Andric uint64_t Size; 1020b57cec5SDimitry Andric if (SizeStr.getAsInteger(0, Size)) 1030b57cec5SDimitry Andric return make_error<StringError>("'" + SizeStr + "' not an integer", 1040b57cec5SDimitry Andric inconvertibleErrorCode()); 1050b57cec5SDimitry Andric if (Size > 100) 1060b57cec5SDimitry Andric return make_error<StringError>("'" + SizeStr + 1070b57cec5SDimitry Andric "' must be between 0 and 100", 1080b57cec5SDimitry Andric inconvertibleErrorCode()); 1090b57cec5SDimitry Andric Policy.MaxSizePercentageOfAvailableSpace = Size; 1100b57cec5SDimitry Andric } else if (Key == "cache_size_bytes") { 1110b57cec5SDimitry Andric uint64_t Mult = 1; 1120b57cec5SDimitry Andric switch (tolower(Value.back())) { 1130b57cec5SDimitry Andric case 'k': 1140b57cec5SDimitry Andric Mult = 1024; 1150b57cec5SDimitry Andric Value = Value.drop_back(); 1160b57cec5SDimitry Andric break; 1170b57cec5SDimitry Andric case 'm': 1180b57cec5SDimitry Andric Mult = 1024 * 1024; 1190b57cec5SDimitry Andric Value = Value.drop_back(); 1200b57cec5SDimitry Andric break; 1210b57cec5SDimitry Andric case 'g': 1220b57cec5SDimitry Andric Mult = 1024 * 1024 * 1024; 1230b57cec5SDimitry Andric Value = Value.drop_back(); 1240b57cec5SDimitry Andric break; 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric uint64_t Size; 1270b57cec5SDimitry Andric if (Value.getAsInteger(0, Size)) 1280b57cec5SDimitry Andric return make_error<StringError>("'" + Value + "' not an integer", 1290b57cec5SDimitry Andric inconvertibleErrorCode()); 1300b57cec5SDimitry Andric Policy.MaxSizeBytes = Size * Mult; 1310b57cec5SDimitry Andric } else if (Key == "cache_size_files") { 1320b57cec5SDimitry Andric if (Value.getAsInteger(0, Policy.MaxSizeFiles)) 1330b57cec5SDimitry Andric return make_error<StringError>("'" + Value + "' not an integer", 1340b57cec5SDimitry Andric inconvertibleErrorCode()); 1350b57cec5SDimitry Andric } else { 1360b57cec5SDimitry Andric return make_error<StringError>("Unknown key: '" + Key + "'", 1370b57cec5SDimitry Andric inconvertibleErrorCode()); 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric return Policy; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric /// Prune the cache of files that haven't been accessed in a long time. 145bdd1243dSDimitry Andric bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy, 146bdd1243dSDimitry Andric const std::vector<std::unique_ptr<MemoryBuffer>> &Files) { 1470b57cec5SDimitry Andric using namespace std::chrono; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric if (Path.empty()) 1500b57cec5SDimitry Andric return false; 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric bool isPathDir; 1530b57cec5SDimitry Andric if (sys::fs::is_directory(Path, isPathDir)) 1540b57cec5SDimitry Andric return false; 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric if (!isPathDir) 1570b57cec5SDimitry Andric return false; 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric Policy.MaxSizePercentageOfAvailableSpace = 1600b57cec5SDimitry Andric std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u); 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric if (Policy.Expiration == seconds(0) && 1630b57cec5SDimitry Andric Policy.MaxSizePercentageOfAvailableSpace == 0 && 1640b57cec5SDimitry Andric Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) { 1650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n"); 1660b57cec5SDimitry Andric // Nothing will be pruned, early exit 1670b57cec5SDimitry Andric return false; 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric // Try to stat() the timestamp file. 1710b57cec5SDimitry Andric SmallString<128> TimestampFile(Path); 1720b57cec5SDimitry Andric sys::path::append(TimestampFile, "llvmcache.timestamp"); 1730b57cec5SDimitry Andric sys::fs::file_status FileStatus; 1740b57cec5SDimitry Andric const auto CurrentTime = system_clock::now(); 1750b57cec5SDimitry Andric if (auto EC = sys::fs::status(TimestampFile, FileStatus)) { 1760b57cec5SDimitry Andric if (EC == errc::no_such_file_or_directory) { 1770b57cec5SDimitry Andric // If the timestamp file wasn't there, create one now. 1780b57cec5SDimitry Andric writeTimestampFile(TimestampFile); 1790b57cec5SDimitry Andric } else { 1800b57cec5SDimitry Andric // Unknown error? 1810b57cec5SDimitry Andric return false; 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric } else { 1840b57cec5SDimitry Andric if (!Policy.Interval) 1850b57cec5SDimitry Andric return false; 1860b57cec5SDimitry Andric if (Policy.Interval != seconds(0)) { 1870b57cec5SDimitry Andric // Check whether the time stamp is older than our pruning interval. 1880b57cec5SDimitry Andric // If not, do nothing. 1890b57cec5SDimitry Andric const auto TimeStampModTime = FileStatus.getLastModificationTime(); 1900b57cec5SDimitry Andric auto TimeStampAge = CurrentTime - TimeStampModTime; 1910b57cec5SDimitry Andric if (TimeStampAge <= *Policy.Interval) { 1920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Timestamp file too recent (" 1930b57cec5SDimitry Andric << duration_cast<seconds>(TimeStampAge).count() 1940b57cec5SDimitry Andric << "s old), do not prune.\n"); 1950b57cec5SDimitry Andric return false; 1960b57cec5SDimitry Andric } 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric // Write a new timestamp file so that nobody else attempts to prune. 1990b57cec5SDimitry Andric // There is a benign race condition here, if two processes happen to 2000b57cec5SDimitry Andric // notice at the same time that the timestamp is out-of-date. 2010b57cec5SDimitry Andric writeTimestampFile(TimestampFile); 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric // Keep track of files to delete to get below the size limit. 2050b57cec5SDimitry Andric // Order by time of last use so that recently used files are preserved. 2060b57cec5SDimitry Andric std::set<FileInfo> FileInfos; 2070b57cec5SDimitry Andric uint64_t TotalSize = 0; 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric // Walk the entire directory cache, looking for unused files. 2100b57cec5SDimitry Andric std::error_code EC; 2110b57cec5SDimitry Andric SmallString<128> CachePathNative; 2120b57cec5SDimitry Andric sys::path::native(Path, CachePathNative); 2130b57cec5SDimitry Andric // Walk all of the files within this directory. 2140b57cec5SDimitry Andric for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd; 2150b57cec5SDimitry Andric File != FileEnd && !EC; File.increment(EC)) { 216e8d8bef9SDimitry Andric // Ignore filenames not beginning with "llvmcache-" or "Thin-". This 2170b57cec5SDimitry Andric // includes the timestamp file as well as any files created by the user. 2180b57cec5SDimitry Andric // This acts as a safeguard against data loss if the user specifies the 2190b57cec5SDimitry Andric // wrong directory as their cache directory. 220e8d8bef9SDimitry Andric StringRef filename = sys::path::filename(File->path()); 221*5f757f3fSDimitry Andric if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-")) 2220b57cec5SDimitry Andric continue; 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric // Look at this file. If we can't stat it, there's nothing interesting 2250b57cec5SDimitry Andric // there. 2260b57cec5SDimitry Andric ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status(); 2270b57cec5SDimitry Andric if (!StatusOrErr) { 2280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n"); 2290b57cec5SDimitry Andric continue; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric // If the file hasn't been used recently enough, delete it 2330b57cec5SDimitry Andric const auto FileAccessTime = StatusOrErr->getLastAccessedTime(); 2340b57cec5SDimitry Andric auto FileAge = CurrentTime - FileAccessTime; 2350b57cec5SDimitry Andric if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) { 2360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Remove " << File->path() << " (" 2370b57cec5SDimitry Andric << duration_cast<seconds>(FileAge).count() 2380b57cec5SDimitry Andric << "s old)\n"); 2390b57cec5SDimitry Andric sys::fs::remove(File->path()); 2400b57cec5SDimitry Andric continue; 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric // Leave it here for now, but add it to the list of size-based pruning. 2440b57cec5SDimitry Andric TotalSize += StatusOrErr->getSize(); 2450b57cec5SDimitry Andric FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()}); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric auto FileInfo = FileInfos.begin(); 2490b57cec5SDimitry Andric size_t NumFiles = FileInfos.size(); 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric auto RemoveCacheFile = [&]() { 2520b57cec5SDimitry Andric // Remove the file. 2530b57cec5SDimitry Andric sys::fs::remove(FileInfo->Path); 2540b57cec5SDimitry Andric // Update size 2550b57cec5SDimitry Andric TotalSize -= FileInfo->Size; 2560b57cec5SDimitry Andric NumFiles--; 2570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size " 2580b57cec5SDimitry Andric << FileInfo->Size << "), new occupancy is " << TotalSize 2590b57cec5SDimitry Andric << "%\n"); 2600b57cec5SDimitry Andric ++FileInfo; 2610b57cec5SDimitry Andric }; 2620b57cec5SDimitry Andric 263bdd1243dSDimitry Andric // files.size() is greater the number of inputs by one. However, a timestamp 264bdd1243dSDimitry Andric // file is created and stored in the cache directory if --thinlto-cache-policy 265bdd1243dSDimitry Andric // option is used. Therefore, files.size() is used as ActualNums. 266bdd1243dSDimitry Andric const size_t ActualNums = Files.size(); 267bdd1243dSDimitry Andric if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles) 268bdd1243dSDimitry Andric WithColor::warning() 269bdd1243dSDimitry Andric << "ThinLTO cache pruning happens since the number of created files (" 270bdd1243dSDimitry Andric << ActualNums << ") exceeds the maximum number of files (" 271bdd1243dSDimitry Andric << Policy.MaxSizeFiles 272bdd1243dSDimitry Andric << "); consider adjusting --thinlto-cache-policy\n"; 273bdd1243dSDimitry Andric 2740b57cec5SDimitry Andric // Prune for number of files. 2750b57cec5SDimitry Andric if (Policy.MaxSizeFiles) 2760b57cec5SDimitry Andric while (NumFiles > Policy.MaxSizeFiles) 2770b57cec5SDimitry Andric RemoveCacheFile(); 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric // Prune for size now if needed 2800b57cec5SDimitry Andric if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) { 2810b57cec5SDimitry Andric auto ErrOrSpaceInfo = sys::fs::disk_space(Path); 2820b57cec5SDimitry Andric if (!ErrOrSpaceInfo) { 2830b57cec5SDimitry Andric report_fatal_error("Can't get available size"); 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get(); 2860b57cec5SDimitry Andric auto AvailableSpace = TotalSize + SpaceInfo.free; 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric if (Policy.MaxSizePercentageOfAvailableSpace == 0) 2890b57cec5SDimitry Andric Policy.MaxSizePercentageOfAvailableSpace = 100; 2900b57cec5SDimitry Andric if (Policy.MaxSizeBytes == 0) 2910b57cec5SDimitry Andric Policy.MaxSizeBytes = AvailableSpace; 2920b57cec5SDimitry Andric auto TotalSizeTarget = std::min<uint64_t>( 2930b57cec5SDimitry Andric AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull, 2940b57cec5SDimitry Andric Policy.MaxSizeBytes); 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace) 2970b57cec5SDimitry Andric << "% target is: " 2980b57cec5SDimitry Andric << Policy.MaxSizePercentageOfAvailableSpace << "%, " 2990b57cec5SDimitry Andric << Policy.MaxSizeBytes << " bytes\n"); 3000b57cec5SDimitry Andric 301bdd1243dSDimitry Andric size_t ActualSizes = 0; 302bdd1243dSDimitry Andric for (const auto &File : Files) 303bdd1243dSDimitry Andric if (File) 304bdd1243dSDimitry Andric ActualSizes += File->getBufferSize(); 305bdd1243dSDimitry Andric 306bdd1243dSDimitry Andric if (ActualSizes > TotalSizeTarget) 307bdd1243dSDimitry Andric WithColor::warning() 308bdd1243dSDimitry Andric << "ThinLTO cache pruning happens since the total size of the cache " 309bdd1243dSDimitry Andric "files consumed by the current link job (" 310bdd1243dSDimitry Andric << ActualSizes << " bytes) exceeds maximum cache size (" 311bdd1243dSDimitry Andric << TotalSizeTarget 312bdd1243dSDimitry Andric << " bytes); consider adjusting --thinlto-cache-policy\n"; 313bdd1243dSDimitry Andric 3140b57cec5SDimitry Andric // Remove the oldest accessed files first, till we get below the threshold. 3150b57cec5SDimitry Andric while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end()) 3160b57cec5SDimitry Andric RemoveCacheFile(); 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric return true; 3190b57cec5SDimitry Andric } 320