1 //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the pruning of a directory based on least recently used. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/CachePruning.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Support/Errc.h" 17 #include "llvm/Support/Error.h" 18 #include "llvm/Support/FileSystem.h" 19 #include "llvm/Support/Path.h" 20 #include "llvm/Support/WithColor.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 #define DEBUG_TYPE "cache-pruning" 24 25 #include <set> 26 #include <system_error> 27 28 using namespace llvm; 29 30 namespace { 31 struct FileInfo { 32 sys::TimePoint<> Time; 33 uint64_t Size; 34 std::string Path; 35 36 /// Used to determine which files to prune first. Also used to determine 37 /// set membership, so must take into account all fields. 38 bool operator<(const FileInfo &Other) const { 39 return std::tie(Time, Other.Size, Path) < 40 std::tie(Other.Time, Size, Other.Path); 41 } 42 }; 43 } // anonymous namespace 44 45 /// Write a new timestamp file with the given path. This is used for the pruning 46 /// interval option. 47 static void writeTimestampFile(StringRef TimestampFile) { 48 std::error_code EC; 49 raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None); 50 } 51 52 static Expected<std::chrono::seconds> parseDuration(StringRef Duration) { 53 if (Duration.empty()) 54 return make_error<StringError>("Duration must not be empty", 55 inconvertibleErrorCode()); 56 57 StringRef NumStr = Duration.slice(0, Duration.size()-1); 58 uint64_t Num; 59 if (NumStr.getAsInteger(0, Num)) 60 return make_error<StringError>("'" + NumStr + "' not an integer", 61 inconvertibleErrorCode()); 62 63 switch (Duration.back()) { 64 case 's': 65 return std::chrono::seconds(Num); 66 case 'm': 67 return std::chrono::minutes(Num); 68 case 'h': 69 return std::chrono::hours(Num); 70 default: 71 return make_error<StringError>("'" + Duration + 72 "' must end with one of 's', 'm' or 'h'", 73 inconvertibleErrorCode()); 74 } 75 } 76 77 Expected<CachePruningPolicy> 78 llvm::parseCachePruningPolicy(StringRef PolicyStr) { 79 CachePruningPolicy Policy; 80 std::pair<StringRef, StringRef> P = {"", PolicyStr}; 81 while (!P.second.empty()) { 82 P = P.second.split(':'); 83 84 StringRef Key, Value; 85 std::tie(Key, Value) = P.first.split('='); 86 if (Key == "prune_interval") { 87 auto DurationOrErr = parseDuration(Value); 88 if (!DurationOrErr) 89 return DurationOrErr.takeError(); 90 Policy.Interval = *DurationOrErr; 91 } else if (Key == "prune_after") { 92 auto DurationOrErr = parseDuration(Value); 93 if (!DurationOrErr) 94 return DurationOrErr.takeError(); 95 Policy.Expiration = *DurationOrErr; 96 } else if (Key == "cache_size") { 97 if (Value.back() != '%') 98 return make_error<StringError>("'" + Value + "' must be a percentage", 99 inconvertibleErrorCode()); 100 StringRef SizeStr = Value.drop_back(); 101 uint64_t Size; 102 if (SizeStr.getAsInteger(0, Size)) 103 return make_error<StringError>("'" + SizeStr + "' not an integer", 104 inconvertibleErrorCode()); 105 if (Size > 100) 106 return make_error<StringError>("'" + SizeStr + 107 "' must be between 0 and 100", 108 inconvertibleErrorCode()); 109 Policy.MaxSizePercentageOfAvailableSpace = Size; 110 } else if (Key == "cache_size_bytes") { 111 uint64_t Mult = 1; 112 switch (tolower(Value.back())) { 113 case 'k': 114 Mult = 1024; 115 Value = Value.drop_back(); 116 break; 117 case 'm': 118 Mult = 1024 * 1024; 119 Value = Value.drop_back(); 120 break; 121 case 'g': 122 Mult = 1024 * 1024 * 1024; 123 Value = Value.drop_back(); 124 break; 125 } 126 uint64_t Size; 127 if (Value.getAsInteger(0, Size)) 128 return make_error<StringError>("'" + Value + "' not an integer", 129 inconvertibleErrorCode()); 130 Policy.MaxSizeBytes = Size * Mult; 131 } else if (Key == "cache_size_files") { 132 if (Value.getAsInteger(0, Policy.MaxSizeFiles)) 133 return make_error<StringError>("'" + Value + "' not an integer", 134 inconvertibleErrorCode()); 135 } else { 136 return make_error<StringError>("Unknown key: '" + Key + "'", 137 inconvertibleErrorCode()); 138 } 139 } 140 141 return Policy; 142 } 143 144 /// Prune the cache of files that haven't been accessed in a long time. 145 bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy, 146 const std::vector<std::unique_ptr<MemoryBuffer>> &Files) { 147 using namespace std::chrono; 148 149 if (Path.empty()) 150 return false; 151 152 bool isPathDir; 153 if (sys::fs::is_directory(Path, isPathDir)) 154 return false; 155 156 if (!isPathDir) 157 return false; 158 159 Policy.MaxSizePercentageOfAvailableSpace = 160 std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u); 161 162 if (Policy.Expiration == seconds(0) && 163 Policy.MaxSizePercentageOfAvailableSpace == 0 && 164 Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) { 165 LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n"); 166 // Nothing will be pruned, early exit 167 return false; 168 } 169 170 // Try to stat() the timestamp file. 171 SmallString<128> TimestampFile(Path); 172 sys::path::append(TimestampFile, "llvmcache.timestamp"); 173 sys::fs::file_status FileStatus; 174 const auto CurrentTime = system_clock::now(); 175 if (auto EC = sys::fs::status(TimestampFile, FileStatus)) { 176 if (EC == errc::no_such_file_or_directory) { 177 // If the timestamp file wasn't there, create one now. 178 writeTimestampFile(TimestampFile); 179 } else { 180 // Unknown error? 181 return false; 182 } 183 } else { 184 if (!Policy.Interval) 185 return false; 186 if (Policy.Interval != seconds(0)) { 187 // Check whether the time stamp is older than our pruning interval. 188 // If not, do nothing. 189 const auto TimeStampModTime = FileStatus.getLastModificationTime(); 190 auto TimeStampAge = CurrentTime - TimeStampModTime; 191 if (TimeStampAge <= *Policy.Interval) { 192 LLVM_DEBUG(dbgs() << "Timestamp file too recent (" 193 << duration_cast<seconds>(TimeStampAge).count() 194 << "s old), do not prune.\n"); 195 return false; 196 } 197 } 198 // Write a new timestamp file so that nobody else attempts to prune. 199 // There is a benign race condition here, if two processes happen to 200 // notice at the same time that the timestamp is out-of-date. 201 writeTimestampFile(TimestampFile); 202 } 203 204 // Keep track of files to delete to get below the size limit. 205 // Order by time of last use so that recently used files are preserved. 206 std::set<FileInfo> FileInfos; 207 uint64_t TotalSize = 0; 208 209 // Walk the entire directory cache, looking for unused files. 210 std::error_code EC; 211 SmallString<128> CachePathNative; 212 sys::path::native(Path, CachePathNative); 213 // Walk all of the files within this directory. 214 for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd; 215 File != FileEnd && !EC; File.increment(EC)) { 216 // Ignore filenames not beginning with "llvmcache-" or "Thin-". This 217 // includes the timestamp file as well as any files created by the user. 218 // This acts as a safeguard against data loss if the user specifies the 219 // wrong directory as their cache directory. 220 StringRef filename = sys::path::filename(File->path()); 221 if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-")) 222 continue; 223 224 // Look at this file. If we can't stat it, there's nothing interesting 225 // there. 226 ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status(); 227 if (!StatusOrErr) { 228 LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n"); 229 continue; 230 } 231 232 // If the file hasn't been used recently enough, delete it 233 const auto FileAccessTime = StatusOrErr->getLastAccessedTime(); 234 auto FileAge = CurrentTime - FileAccessTime; 235 if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) { 236 LLVM_DEBUG(dbgs() << "Remove " << File->path() << " (" 237 << duration_cast<seconds>(FileAge).count() 238 << "s old)\n"); 239 sys::fs::remove(File->path()); 240 continue; 241 } 242 243 // Leave it here for now, but add it to the list of size-based pruning. 244 TotalSize += StatusOrErr->getSize(); 245 FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()}); 246 } 247 248 auto FileInfo = FileInfos.begin(); 249 size_t NumFiles = FileInfos.size(); 250 251 auto RemoveCacheFile = [&]() { 252 // Remove the file. 253 sys::fs::remove(FileInfo->Path); 254 // Update size 255 TotalSize -= FileInfo->Size; 256 NumFiles--; 257 LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size " 258 << FileInfo->Size << "), new occupancy is " << TotalSize 259 << "%\n"); 260 ++FileInfo; 261 }; 262 263 // files.size() is greater the number of inputs by one. However, a timestamp 264 // file is created and stored in the cache directory if --thinlto-cache-policy 265 // option is used. Therefore, files.size() is used as ActualNums. 266 const size_t ActualNums = Files.size(); 267 if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles) 268 WithColor::warning() 269 << "ThinLTO cache pruning happens since the number of created files (" 270 << ActualNums << ") exceeds the maximum number of files (" 271 << Policy.MaxSizeFiles 272 << "); consider adjusting --thinlto-cache-policy\n"; 273 274 // Prune for number of files. 275 if (Policy.MaxSizeFiles) 276 while (NumFiles > Policy.MaxSizeFiles) 277 RemoveCacheFile(); 278 279 // Prune for size now if needed 280 if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) { 281 auto ErrOrSpaceInfo = sys::fs::disk_space(Path); 282 if (!ErrOrSpaceInfo) { 283 report_fatal_error("Can't get available size"); 284 } 285 sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get(); 286 auto AvailableSpace = TotalSize + SpaceInfo.free; 287 288 if (Policy.MaxSizePercentageOfAvailableSpace == 0) 289 Policy.MaxSizePercentageOfAvailableSpace = 100; 290 if (Policy.MaxSizeBytes == 0) 291 Policy.MaxSizeBytes = AvailableSpace; 292 auto TotalSizeTarget = std::min<uint64_t>( 293 AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull, 294 Policy.MaxSizeBytes); 295 296 LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace) 297 << "% target is: " 298 << Policy.MaxSizePercentageOfAvailableSpace << "%, " 299 << Policy.MaxSizeBytes << " bytes\n"); 300 301 size_t ActualSizes = 0; 302 for (const auto &File : Files) 303 if (File) 304 ActualSizes += File->getBufferSize(); 305 306 if (ActualSizes > TotalSizeTarget) 307 WithColor::warning() 308 << "ThinLTO cache pruning happens since the total size of the cache " 309 "files consumed by the current link job (" 310 << ActualSizes << " bytes) exceeds maximum cache size (" 311 << TotalSizeTarget 312 << " bytes); consider adjusting --thinlto-cache-policy\n"; 313 314 // Remove the oldest accessed files first, till we get below the threshold. 315 while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end()) 316 RemoveCacheFile(); 317 } 318 return true; 319 } 320