1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Spawn and orchestrate separate fuzzing processes. 9 //===----------------------------------------------------------------------===// 10 11 #include "FuzzerCommand.h" 12 #include "FuzzerFork.h" 13 #include "FuzzerIO.h" 14 #include "FuzzerInternal.h" 15 #include "FuzzerMerge.h" 16 #include "FuzzerSHA1.h" 17 #include "FuzzerTracePC.h" 18 #include "FuzzerUtil.h" 19 20 #include <atomic> 21 #include <chrono> 22 #include <condition_variable> 23 #include <fstream> 24 #include <memory> 25 #include <mutex> 26 #include <queue> 27 #include <sstream> 28 #include <thread> 29 30 namespace fuzzer { 31 32 struct Stats { 33 size_t number_of_executed_units = 0; 34 size_t peak_rss_mb = 0; 35 size_t average_exec_per_sec = 0; 36 }; 37 38 static Stats ParseFinalStatsFromLog(const std::string &LogPath) { 39 std::ifstream In(LogPath); 40 std::string Line; 41 Stats Res; 42 struct { 43 const char *Name; 44 size_t *Var; 45 } NameVarPairs[] = { 46 {"stat::number_of_executed_units:", &Res.number_of_executed_units}, 47 {"stat::peak_rss_mb:", &Res.peak_rss_mb}, 48 {"stat::average_exec_per_sec:", &Res.average_exec_per_sec}, 49 {nullptr, nullptr}, 50 }; 51 while (std::getline(In, Line, '\n')) { 52 if (Line.find("stat::") != 0) continue; 53 std::istringstream ISS(Line); 54 std::string Name; 55 size_t Val; 56 ISS >> Name >> Val; 57 for (size_t i = 0; NameVarPairs[i].Name; i++) 58 if (Name == NameVarPairs[i].Name) 59 *NameVarPairs[i].Var = Val; 60 } 61 return Res; 62 } 63 64 struct FuzzJob { 65 // Inputs. 66 Command Cmd; 67 std::string CorpusDir; 68 std::string FeaturesDir; 69 std::string LogPath; 70 std::string SeedListPath; 71 std::string CFPath; 72 size_t JobId; 73 74 int DftTimeInSeconds = 0; 75 76 // Fuzzing Outputs. 77 int ExitCode; 78 79 ~FuzzJob() { 80 RemoveFile(CFPath); 81 RemoveFile(LogPath); 82 RemoveFile(SeedListPath); 83 RmDirRecursive(CorpusDir); 84 RmDirRecursive(FeaturesDir); 85 } 86 }; 87 88 struct GlobalEnv { 89 std::vector<std::string> Args; 90 std::vector<std::string> CorpusDirs; 91 std::string MainCorpusDir; 92 std::string TempDir; 93 std::string DFTDir; 94 std::string DataFlowBinary; 95 std::set<uint32_t> Features, Cov; 96 std::set<std::string> FilesWithDFT; 97 std::vector<std::string> Files; 98 std::vector<std::size_t> FilesSizes; 99 Random *Rand; 100 std::chrono::system_clock::time_point ProcessStartTime; 101 int Verbosity = 0; 102 int Group = 0; 103 int NumCorpuses = 8; 104 105 size_t NumTimeouts = 0; 106 size_t NumOOMs = 0; 107 size_t NumCrashes = 0; 108 109 110 size_t NumRuns = 0; 111 112 std::string StopFile() { return DirPlusFile(TempDir, "STOP"); } 113 114 size_t secondsSinceProcessStartUp() const { 115 return std::chrono::duration_cast<std::chrono::seconds>( 116 std::chrono::system_clock::now() - ProcessStartTime) 117 .count(); 118 } 119 120 FuzzJob *CreateNewJob(size_t JobId) { 121 Command Cmd(Args); 122 Cmd.removeFlag("fork"); 123 Cmd.removeFlag("runs"); 124 Cmd.removeFlag("collect_data_flow"); 125 for (auto &C : CorpusDirs) // Remove all corpora from the args. 126 Cmd.removeArgument(C); 127 Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload. 128 Cmd.addFlag("print_final_stats", "1"); 129 Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing. 130 Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId))); 131 Cmd.addFlag("stop_file", StopFile()); 132 if (!DataFlowBinary.empty()) { 133 Cmd.addFlag("data_flow_trace", DFTDir); 134 if (!Cmd.hasFlag("focus_function")) 135 Cmd.addFlag("focus_function", "auto"); 136 } 137 auto Job = new FuzzJob; 138 std::string Seeds; 139 if (size_t CorpusSubsetSize = 140 std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { 141 auto Time1 = std::chrono::system_clock::now(); 142 if (Group) { // whether to group the corpus. 143 size_t AverageCorpusSize = Files.size() / NumCorpuses + 1; 144 size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize; 145 for (size_t i = 0; i < CorpusSubsetSize; i++) { 146 size_t RandNum = (*Rand)(AverageCorpusSize); 147 size_t Index = RandNum + StartIndex; 148 Index = Index < Files.size() ? Index 149 : Rand->SkewTowardsLast(Files.size()); 150 auto &SF = Files[Index]; 151 Seeds += (Seeds.empty() ? "" : ",") + SF; 152 CollectDFT(SF); 153 } 154 } else { 155 for (size_t i = 0; i < CorpusSubsetSize; i++) { 156 auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; 157 Seeds += (Seeds.empty() ? "" : ",") + SF; 158 CollectDFT(SF); 159 } 160 } 161 auto Time2 = std::chrono::system_clock::now(); 162 auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count(); 163 assert(DftTimeInSeconds < std::numeric_limits<int>::max()); 164 Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds); 165 } 166 if (!Seeds.empty()) { 167 Job->SeedListPath = 168 DirPlusFile(TempDir, std::to_string(JobId) + ".seeds"); 169 WriteToFile(Seeds, Job->SeedListPath); 170 Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath); 171 } 172 Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log"); 173 Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId)); 174 Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId)); 175 Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge"); 176 Job->JobId = JobId; 177 178 179 Cmd.addArgument(Job->CorpusDir); 180 Cmd.addFlag("features_dir", Job->FeaturesDir); 181 182 for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) { 183 RmDirRecursive(D); 184 MkDir(D); 185 } 186 187 Cmd.setOutputFile(Job->LogPath); 188 Cmd.combineOutAndErr(); 189 190 Job->Cmd = Cmd; 191 192 if (Verbosity >= 2) 193 Printf("Job %zd/%p Created: %s\n", JobId, Job, 194 Job->Cmd.toString().c_str()); 195 // Start from very short runs and gradually increase them. 196 return Job; 197 } 198 199 void RunOneMergeJob(FuzzJob *Job) { 200 auto Stats = ParseFinalStatsFromLog(Job->LogPath); 201 NumRuns += Stats.number_of_executed_units; 202 203 std::vector<SizedFile> TempFiles, MergeCandidates; 204 // Read all newly created inputs and their feature sets. 205 // Choose only those inputs that have new features. 206 GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); 207 std::sort(TempFiles.begin(), TempFiles.end()); 208 for (auto &F : TempFiles) { 209 auto FeatureFile = F.File; 210 FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir); 211 auto FeatureBytes = FileToVector(FeatureFile, 0, false); 212 assert((FeatureBytes.size() % sizeof(uint32_t)) == 0); 213 std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t)); 214 memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size()); 215 for (auto Ft : NewFeatures) { 216 if (!Features.count(Ft)) { 217 MergeCandidates.push_back(F); 218 break; 219 } 220 } 221 } 222 // if (!FilesToAdd.empty() || Job->ExitCode != 0) 223 Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd " 224 "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n", 225 NumRuns, Cov.size(), Features.size(), Files.size(), 226 Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes, 227 secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds); 228 229 if (MergeCandidates.empty()) return; 230 231 std::vector<std::string> FilesToAdd; 232 std::set<uint32_t> NewFeatures, NewCov; 233 bool IsSetCoverMerge = 234 !Job->Cmd.getFlagValue("set_cover_merge").compare("1"); 235 CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, 236 &NewFeatures, Cov, &NewCov, Job->CFPath, false, 237 IsSetCoverMerge); 238 for (auto &Path : FilesToAdd) { 239 auto U = FileToVector(Path); 240 auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); 241 WriteToFile(U, NewPath); 242 if (Group) { // Insert the queue according to the size of the seed. 243 size_t UnitSize = U.size(); 244 auto Idx = 245 std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) - 246 FilesSizes.begin(); 247 FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize); 248 Files.insert(Files.begin() + Idx, NewPath); 249 } else { 250 Files.push_back(NewPath); 251 } 252 } 253 Features.insert(NewFeatures.begin(), NewFeatures.end()); 254 Cov.insert(NewCov.begin(), NewCov.end()); 255 for (auto Idx : NewCov) 256 if (auto *TE = TPC.PCTableEntryByIdx(Idx)) 257 if (TPC.PcIsFuncEntry(TE)) 258 PrintPC(" NEW_FUNC: %p %F %L\n", "", 259 TPC.GetNextInstructionPc(TE->PC)); 260 } 261 262 void CollectDFT(const std::string &InputPath) { 263 if (DataFlowBinary.empty()) return; 264 if (!FilesWithDFT.insert(InputPath).second) return; 265 Command Cmd(Args); 266 Cmd.removeFlag("fork"); 267 Cmd.removeFlag("runs"); 268 Cmd.addFlag("data_flow_trace", DFTDir); 269 Cmd.addArgument(InputPath); 270 for (auto &C : CorpusDirs) // Remove all corpora from the args. 271 Cmd.removeArgument(C); 272 Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log")); 273 Cmd.combineOutAndErr(); 274 // Printf("CollectDFT: %s\n", Cmd.toString().c_str()); 275 ExecuteCommand(Cmd); 276 } 277 278 }; 279 280 struct JobQueue { 281 std::queue<FuzzJob *> Qu; 282 std::mutex Mu; 283 std::condition_variable Cv; 284 285 void Push(FuzzJob *Job) { 286 { 287 std::lock_guard<std::mutex> Lock(Mu); 288 Qu.push(Job); 289 } 290 Cv.notify_one(); 291 } 292 FuzzJob *Pop() { 293 std::unique_lock<std::mutex> Lk(Mu); 294 // std::lock_guard<std::mutex> Lock(Mu); 295 Cv.wait(Lk, [&]{return !Qu.empty();}); 296 assert(!Qu.empty()); 297 auto Job = Qu.front(); 298 Qu.pop(); 299 return Job; 300 } 301 }; 302 303 void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) { 304 while (auto Job = FuzzQ->Pop()) { 305 // Printf("WorkerThread: job %p\n", Job); 306 Job->ExitCode = ExecuteCommand(Job->Cmd); 307 MergeQ->Push(Job); 308 } 309 } 310 311 // This is just a skeleton of an experimental -fork=1 feature. 312 void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, 313 const std::vector<std::string> &Args, 314 const std::vector<std::string> &CorpusDirs, int NumJobs) { 315 Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs); 316 317 GlobalEnv Env; 318 Env.Args = Args; 319 Env.CorpusDirs = CorpusDirs; 320 Env.Rand = &Rand; 321 Env.Verbosity = Options.Verbosity; 322 Env.ProcessStartTime = std::chrono::system_clock::now(); 323 Env.DataFlowBinary = Options.CollectDataFlow; 324 Env.Group = Options.ForkCorpusGroups; 325 326 std::vector<SizedFile> SeedFiles; 327 for (auto &Dir : CorpusDirs) 328 GetSizedFilesFromDir(Dir, &SeedFiles); 329 std::sort(SeedFiles.begin(), SeedFiles.end()); 330 Env.TempDir = TempPath("FuzzWithFork", ".dir"); 331 Env.DFTDir = DirPlusFile(Env.TempDir, "DFT"); 332 RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. 333 MkDir(Env.TempDir); 334 MkDir(Env.DFTDir); 335 336 337 if (CorpusDirs.empty()) 338 MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C")); 339 else 340 Env.MainCorpusDir = CorpusDirs[0]; 341 342 if (Options.KeepSeed) { 343 for (auto &File : SeedFiles) 344 Env.Files.push_back(File.File); 345 } else { 346 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); 347 std::set<uint32_t> NewFeatures, NewCov; 348 CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features, 349 &NewFeatures, Env.Cov, &NewCov, CFPath, 350 /*Verbose=*/false, /*IsSetCoverMerge=*/false); 351 Env.Features.insert(NewFeatures.begin(), NewFeatures.end()); 352 Env.Cov.insert(NewCov.begin(), NewCov.end()); 353 RemoveFile(CFPath); 354 } 355 356 if (Env.Group) { 357 for (auto &path : Env.Files) 358 Env.FilesSizes.push_back(FileSize(path)); 359 } 360 361 Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, 362 Env.Files.size(), Env.TempDir.c_str()); 363 364 int ExitCode = 0; 365 366 JobQueue FuzzQ, MergeQ; 367 368 auto StopJobs = [&]() { 369 for (int i = 0; i < NumJobs; i++) 370 FuzzQ.Push(nullptr); 371 MergeQ.Push(nullptr); 372 WriteToFile(Unit({1}), Env.StopFile()); 373 }; 374 375 size_t MergeCycle = 20; 376 size_t JobExecuted = 0; 377 size_t JobId = 1; 378 std::vector<std::thread> Threads; 379 for (int t = 0; t < NumJobs; t++) { 380 Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ)); 381 FuzzQ.Push(Env.CreateNewJob(JobId++)); 382 } 383 384 while (true) { 385 std::unique_ptr<FuzzJob> Job(MergeQ.Pop()); 386 if (!Job) 387 break; 388 ExitCode = Job->ExitCode; 389 if (ExitCode == Options.InterruptExitCode) { 390 Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid()); 391 StopJobs(); 392 break; 393 } 394 Fuzzer::MaybeExitGracefully(); 395 396 Env.RunOneMergeJob(Job.get()); 397 398 // merge the corpus . 399 JobExecuted++; 400 if (Env.Group && JobExecuted >= MergeCycle) { 401 std::vector<SizedFile> CurrentSeedFiles; 402 for (auto &Dir : CorpusDirs) 403 GetSizedFilesFromDir(Dir, &CurrentSeedFiles); 404 std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end()); 405 406 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); 407 std::set<uint32_t> TmpNewFeatures, TmpNewCov; 408 std::set<uint32_t> TmpFeatures, TmpCov; 409 Env.Files.clear(); 410 Env.FilesSizes.clear(); 411 CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files, 412 TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov, 413 CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false); 414 for (auto &path : Env.Files) 415 Env.FilesSizes.push_back(FileSize(path)); 416 RemoveFile(CFPath); 417 JobExecuted = 0; 418 MergeCycle += 5; 419 } 420 421 // Since the number of corpus seeds will gradually increase, in order to 422 // control the number in each group to be about three times the number of 423 // seeds selected each time, the number of groups is dynamically adjusted. 424 if (Env.Files.size() < 2000) 425 Env.NumCorpuses = 12; 426 else if (Env.Files.size() < 6000) 427 Env.NumCorpuses = 20; 428 else if (Env.Files.size() < 12000) 429 Env.NumCorpuses = 32; 430 else if (Env.Files.size() < 16000) 431 Env.NumCorpuses = 40; 432 else if (Env.Files.size() < 24000) 433 Env.NumCorpuses = 60; 434 else 435 Env.NumCorpuses = 80; 436 437 // Continue if our crash is one of the ignored ones. 438 if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) 439 Env.NumTimeouts++; 440 else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode) 441 Env.NumOOMs++; 442 else if (ExitCode != 0) { 443 Env.NumCrashes++; 444 if (Options.IgnoreCrashes) { 445 std::ifstream In(Job->LogPath); 446 std::string Line; 447 while (std::getline(In, Line, '\n')) 448 if (Line.find("ERROR:") != Line.npos || 449 Line.find("runtime error:") != Line.npos) 450 Printf("%s\n", Line.c_str()); 451 } else { 452 // And exit if we don't ignore this crash. 453 Printf("INFO: log from the inner process:\n%s", 454 FileToString(Job->LogPath).c_str()); 455 StopJobs(); 456 break; 457 } 458 } 459 460 // Stop if we are over the time budget. 461 // This is not precise, since other threads are still running 462 // and we will wait while joining them. 463 // We also don't stop instantly: other jobs need to finish. 464 if (Options.MaxTotalTimeSec > 0 && 465 Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) { 466 Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n", 467 Env.secondsSinceProcessStartUp()); 468 StopJobs(); 469 break; 470 } 471 if (Env.NumRuns >= Options.MaxNumberOfRuns) { 472 Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n", 473 Env.NumRuns); 474 StopJobs(); 475 break; 476 } 477 478 FuzzQ.Push(Env.CreateNewJob(JobId++)); 479 } 480 481 for (auto &T : Threads) 482 T.join(); 483 484 // The workers have terminated. Don't try to remove the directory before they 485 // terminate to avoid a race condition preventing cleanup on Windows. 486 RmDirRecursive(Env.TempDir); 487 488 // Use the exit code from the last child process. 489 Printf("INFO: exiting: %d time: %zds\n", ExitCode, 490 Env.secondsSinceProcessStartUp()); 491 exit(ExitCode); 492 } 493 494 } // namespace fuzzer 495