1 //===- FuzzerMerge.cpp - merging corpora ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Merging corpora. 9 //===----------------------------------------------------------------------===// 10 11 #include "FuzzerCommand.h" 12 #include "FuzzerMerge.h" 13 #include "FuzzerIO.h" 14 #include "FuzzerInternal.h" 15 #include "FuzzerTracePC.h" 16 #include "FuzzerUtil.h" 17 18 #include <fstream> 19 #include <iterator> 20 #include <set> 21 #include <sstream> 22 23 namespace fuzzer { 24 25 bool Merger::Parse(const std::string &Str, bool ParseCoverage) { 26 std::istringstream SS(Str); 27 return Parse(SS, ParseCoverage); 28 } 29 30 void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { 31 if (!Parse(IS, ParseCoverage)) { 32 Printf("MERGE: failed to parse the control file (unexpected error)\n"); 33 exit(1); 34 } 35 } 36 37 // The control file example: 38 // 39 // 3 # The number of inputs 40 // 1 # The number of inputs in the first corpus, <= the previous number 41 // file0 42 // file1 43 // file2 # One file name per line. 44 // STARTED 0 123 # FileID, file size 45 // FT 0 1 4 6 8 # FileID COV1 COV2 ... 46 // COV 0 7 8 9 # FileID COV1 COV1 47 // STARTED 1 456 # If FT is missing, the input crashed while processing. 48 // STARTED 2 567 49 // FT 2 8 9 50 // COV 2 11 12 51 bool Merger::Parse(std::istream &IS, bool ParseCoverage) { 52 LastFailure.clear(); 53 std::string Line; 54 55 // Parse NumFiles. 56 if (!std::getline(IS, Line, '\n')) return false; 57 std::istringstream L1(Line); 58 size_t NumFiles = 0; 59 L1 >> NumFiles; 60 if (NumFiles == 0 || NumFiles > 10000000) return false; 61 62 // Parse NumFilesInFirstCorpus. 63 if (!std::getline(IS, Line, '\n')) return false; 64 std::istringstream L2(Line); 65 NumFilesInFirstCorpus = NumFiles + 1; 66 L2 >> NumFilesInFirstCorpus; 67 if (NumFilesInFirstCorpus > NumFiles) return false; 68 69 // Parse file names. 70 Files.resize(NumFiles); 71 for (size_t i = 0; i < NumFiles; i++) 72 if (!std::getline(IS, Files[i].Name, '\n')) 73 return false; 74 75 // Parse STARTED, FT, and COV lines. 76 size_t ExpectedStartMarker = 0; 77 const size_t kInvalidStartMarker = -1; 78 size_t LastSeenStartMarker = kInvalidStartMarker; 79 Vector<uint32_t> TmpFeatures; 80 Set<uint32_t> PCs; 81 while (std::getline(IS, Line, '\n')) { 82 std::istringstream ISS1(Line); 83 std::string Marker; 84 size_t N; 85 ISS1 >> Marker; 86 ISS1 >> N; 87 if (Marker == "STARTED") { 88 // STARTED FILE_ID FILE_SIZE 89 if (ExpectedStartMarker != N) 90 return false; 91 ISS1 >> Files[ExpectedStartMarker].Size; 92 LastSeenStartMarker = ExpectedStartMarker; 93 assert(ExpectedStartMarker < Files.size()); 94 ExpectedStartMarker++; 95 } else if (Marker == "FT") { 96 // FT FILE_ID COV1 COV2 COV3 ... 97 size_t CurrentFileIdx = N; 98 if (CurrentFileIdx != LastSeenStartMarker) 99 return false; 100 LastSeenStartMarker = kInvalidStartMarker; 101 if (ParseCoverage) { 102 TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. 103 while (ISS1 >> N) 104 TmpFeatures.push_back(N); 105 std::sort(TmpFeatures.begin(), TmpFeatures.end()); 106 Files[CurrentFileIdx].Features = TmpFeatures; 107 } 108 } else if (Marker == "COV") { 109 size_t CurrentFileIdx = N; 110 if (ParseCoverage) 111 while (ISS1 >> N) 112 if (PCs.insert(N).second) 113 Files[CurrentFileIdx].Cov.push_back(N); 114 } else { 115 return false; 116 } 117 } 118 if (LastSeenStartMarker != kInvalidStartMarker) 119 LastFailure = Files[LastSeenStartMarker].Name; 120 121 FirstNotProcessedFile = ExpectedStartMarker; 122 return true; 123 } 124 125 size_t Merger::ApproximateMemoryConsumption() const { 126 size_t Res = 0; 127 for (const auto &F: Files) 128 Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); 129 return Res; 130 } 131 132 // Decides which files need to be merged (add those to NewFiles). 133 // Returns the number of new features added. 134 size_t Merger::Merge(const Set<uint32_t> &InitialFeatures, 135 Set<uint32_t> *NewFeatures, 136 const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, 137 Vector<std::string> *NewFiles) { 138 NewFiles->clear(); 139 assert(NumFilesInFirstCorpus <= Files.size()); 140 Set<uint32_t> AllFeatures = InitialFeatures; 141 142 // What features are in the initial corpus? 143 for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { 144 auto &Cur = Files[i].Features; 145 AllFeatures.insert(Cur.begin(), Cur.end()); 146 } 147 // Remove all features that we already know from all other inputs. 148 for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { 149 auto &Cur = Files[i].Features; 150 Vector<uint32_t> Tmp; 151 std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), 152 AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); 153 Cur.swap(Tmp); 154 } 155 156 // Sort. Give preference to 157 // * smaller files 158 // * files with more features. 159 std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(), 160 [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool { 161 if (a.Size != b.Size) 162 return a.Size < b.Size; 163 return a.Features.size() > b.Features.size(); 164 }); 165 166 // One greedy pass: add the file's features to AllFeatures. 167 // If new features were added, add this file to NewFiles. 168 for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { 169 auto &Cur = Files[i].Features; 170 // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), 171 // Files[i].Size, Cur.size()); 172 bool FoundNewFeatures = false; 173 for (auto Fe: Cur) { 174 if (AllFeatures.insert(Fe).second) { 175 FoundNewFeatures = true; 176 NewFeatures->insert(Fe); 177 } 178 } 179 if (FoundNewFeatures) 180 NewFiles->push_back(Files[i].Name); 181 for (auto Cov : Files[i].Cov) 182 if (InitialCov.find(Cov) == InitialCov.end()) 183 NewCov->insert(Cov); 184 } 185 return NewFeatures->size(); 186 } 187 188 Set<uint32_t> Merger::AllFeatures() const { 189 Set<uint32_t> S; 190 for (auto &File : Files) 191 S.insert(File.Features.begin(), File.Features.end()); 192 return S; 193 } 194 195 // Inner process. May crash if the target crashes. 196 void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { 197 Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); 198 Merger M; 199 std::ifstream IF(CFPath); 200 M.ParseOrExit(IF, false); 201 IF.close(); 202 if (!M.LastFailure.empty()) 203 Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", 204 M.LastFailure.c_str()); 205 206 Printf("MERGE-INNER: %zd total files;" 207 " %zd processed earlier; will process %zd files now\n", 208 M.Files.size(), M.FirstNotProcessedFile, 209 M.Files.size() - M.FirstNotProcessedFile); 210 211 std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); 212 Set<size_t> AllFeatures; 213 Set<const TracePC::PCTableEntry *> AllPCs; 214 for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { 215 Fuzzer::MaybeExitGracefully(); 216 auto U = FileToVector(M.Files[i].Name); 217 if (U.size() > MaxInputLen) { 218 U.resize(MaxInputLen); 219 U.shrink_to_fit(); 220 } 221 std::ostringstream StartedLine; 222 // Write the pre-run marker. 223 OF << "STARTED " << i << " " << U.size() << "\n"; 224 OF.flush(); // Flush is important since Command::Execute may crash. 225 // Run. 226 TPC.ResetMaps(); 227 ExecuteCallback(U.data(), U.size()); 228 // Collect coverage. We are iterating over the files in this order: 229 // * First, files in the initial corpus ordered by size, smallest first. 230 // * Then, all other files, smallest first. 231 // So it makes no sense to record all features for all files, instead we 232 // only record features that were not seen before. 233 Set<size_t> UniqFeatures; 234 TPC.CollectFeatures([&](size_t Feature) { 235 if (AllFeatures.insert(Feature).second) 236 UniqFeatures.insert(Feature); 237 }); 238 TPC.UpdateObservedPCs(); 239 // Show stats. 240 if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) 241 PrintStats("pulse "); 242 // Write the post-run marker and the coverage. 243 OF << "FT " << i; 244 for (size_t F : UniqFeatures) 245 OF << " " << F; 246 OF << "\n"; 247 OF << "COV " << i; 248 TPC.ForEachObservedPC([&](const TracePC::PCTableEntry *TE) { 249 if (AllPCs.insert(TE).second) 250 OF << " " << TPC.PCTableEntryIdx(TE); 251 }); 252 OF << "\n"; 253 OF.flush(); 254 } 255 PrintStats("DONE "); 256 } 257 258 static void WriteNewControlFile(const std::string &CFPath, 259 const Vector<SizedFile> &OldCorpus, 260 const Vector<SizedFile> &NewCorpus) { 261 RemoveFile(CFPath); 262 std::ofstream ControlFile(CFPath); 263 ControlFile << (OldCorpus.size() + NewCorpus.size()) << "\n"; 264 ControlFile << OldCorpus.size() << "\n"; 265 for (auto &SF: OldCorpus) 266 ControlFile << SF.File << "\n"; 267 for (auto &SF: NewCorpus) 268 ControlFile << SF.File << "\n"; 269 if (!ControlFile) { 270 Printf("MERGE-OUTER: failed to write to the control file: %s\n", 271 CFPath.c_str()); 272 exit(1); 273 } 274 } 275 276 // Outer process. Does not call the target code and thus should not fail. 277 void CrashResistantMerge(const Vector<std::string> &Args, 278 const Vector<SizedFile> &OldCorpus, 279 const Vector<SizedFile> &NewCorpus, 280 Vector<std::string> *NewFiles, 281 const Set<uint32_t> &InitialFeatures, 282 Set<uint32_t> *NewFeatures, 283 const Set<uint32_t> &InitialCov, 284 Set<uint32_t> *NewCov, 285 const std::string &CFPath, 286 bool V /*Verbose*/) { 287 if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge. 288 size_t NumAttempts = 0; 289 if (FileSize(CFPath)) { 290 VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n", 291 CFPath.c_str()); 292 Merger M; 293 std::ifstream IF(CFPath); 294 if (M.Parse(IF, /*ParseCoverage=*/false)) { 295 VPrintf(V, "MERGE-OUTER: control file ok, %zd files total," 296 " first not processed file %zd\n", 297 M.Files.size(), M.FirstNotProcessedFile); 298 if (!M.LastFailure.empty()) 299 VPrintf(V, "MERGE-OUTER: '%s' will be skipped as unlucky " 300 "(merge has stumbled on it the last time)\n", 301 M.LastFailure.c_str()); 302 if (M.FirstNotProcessedFile >= M.Files.size()) { 303 VPrintf( 304 V, "MERGE-OUTER: nothing to do, merge has been completed before\n"); 305 exit(0); 306 } 307 308 NumAttempts = M.Files.size() - M.FirstNotProcessedFile; 309 } else { 310 VPrintf(V, "MERGE-OUTER: bad control file, will overwrite it\n"); 311 } 312 } 313 314 if (!NumAttempts) { 315 // The supplied control file is empty or bad, create a fresh one. 316 NumAttempts = OldCorpus.size() + NewCorpus.size(); 317 VPrintf(V, "MERGE-OUTER: %zd files, %zd in the initial corpus\n", 318 NumAttempts, OldCorpus.size()); 319 WriteNewControlFile(CFPath, OldCorpus, NewCorpus); 320 } 321 322 // Execute the inner process until it passes. 323 // Every inner process should execute at least one input. 324 Command BaseCmd(Args); 325 BaseCmd.removeFlag("merge"); 326 BaseCmd.removeFlag("fork"); 327 BaseCmd.removeFlag("collect_data_flow"); 328 for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) { 329 Fuzzer::MaybeExitGracefully(); 330 VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt); 331 Command Cmd(BaseCmd); 332 Cmd.addFlag("merge_control_file", CFPath); 333 Cmd.addFlag("merge_inner", "1"); 334 if (!V) { 335 Cmd.setOutputFile(getDevNull()); 336 Cmd.combineOutAndErr(); 337 } 338 auto ExitCode = ExecuteCommand(Cmd); 339 if (!ExitCode) { 340 VPrintf(V, "MERGE-OUTER: succesfull in %zd attempt(s)\n", Attempt); 341 break; 342 } 343 } 344 // Read the control file and do the merge. 345 Merger M; 346 std::ifstream IF(CFPath); 347 IF.seekg(0, IF.end); 348 VPrintf(V, "MERGE-OUTER: the control file has %zd bytes\n", 349 (size_t)IF.tellg()); 350 IF.seekg(0, IF.beg); 351 M.ParseOrExit(IF, true); 352 IF.close(); 353 VPrintf(V, 354 "MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", 355 M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); 356 M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles); 357 VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; " 358 "%zd new coverage edges\n", 359 NewFiles->size(), NewFeatures->size(), NewCov->size()); 360 } 361 362 } // namespace fuzzer 363