1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Spawn and orchestrate separate fuzzing processes. 9 //===----------------------------------------------------------------------===// 10 11 #include "FuzzerCommand.h" 12 #include "FuzzerFork.h" 13 #include "FuzzerIO.h" 14 #include "FuzzerInternal.h" 15 #include "FuzzerMerge.h" 16 #include "FuzzerSHA1.h" 17 #include "FuzzerTracePC.h" 18 #include "FuzzerUtil.h" 19 20 #include <atomic> 21 #include <chrono> 22 #include <condition_variable> 23 #include <fstream> 24 #include <memory> 25 #include <mutex> 26 #include <queue> 27 #include <sstream> 28 #include <thread> 29 30 namespace fuzzer { 31 32 struct Stats { 33 size_t number_of_executed_units = 0; 34 size_t peak_rss_mb = 0; 35 size_t average_exec_per_sec = 0; 36 }; 37 38 static Stats ParseFinalStatsFromLog(const std::string &LogPath) { 39 std::ifstream In(LogPath); 40 std::string Line; 41 Stats Res; 42 struct { 43 const char *Name; 44 size_t *Var; 45 } NameVarPairs[] = { 46 {"stat::number_of_executed_units:", &Res.number_of_executed_units}, 47 {"stat::peak_rss_mb:", &Res.peak_rss_mb}, 48 {"stat::average_exec_per_sec:", &Res.average_exec_per_sec}, 49 {nullptr, nullptr}, 50 }; 51 while (std::getline(In, Line, '\n')) { 52 if (Line.find("stat::") != 0) continue; 53 std::istringstream ISS(Line); 54 std::string Name; 55 size_t Val; 56 ISS >> Name >> Val; 57 for (size_t i = 0; NameVarPairs[i].Name; i++) 58 if (Name == NameVarPairs[i].Name) 59 *NameVarPairs[i].Var = Val; 60 } 61 return Res; 62 } 63 64 struct FuzzJob { 65 // Inputs. 66 Command Cmd; 67 std::string CorpusDir; 68 std::string FeaturesDir; 69 std::string LogPath; 70 std::string SeedListPath; 71 std::string CFPath; 72 size_t JobId; 73 74 int DftTimeInSeconds = 0; 75 76 // Fuzzing Outputs. 77 int ExitCode; 78 79 ~FuzzJob() { 80 RemoveFile(CFPath); 81 RemoveFile(LogPath); 82 RemoveFile(SeedListPath); 83 RmDirRecursive(CorpusDir); 84 RmDirRecursive(FeaturesDir); 85 } 86 }; 87 88 struct GlobalEnv { 89 Vector<std::string> Args; 90 Vector<std::string> CorpusDirs; 91 std::string MainCorpusDir; 92 std::string TempDir; 93 std::string DFTDir; 94 std::string DataFlowBinary; 95 Set<uint32_t> Features, Cov; 96 Set<std::string> FilesWithDFT; 97 Vector<std::string> Files; 98 Random *Rand; 99 std::chrono::system_clock::time_point ProcessStartTime; 100 int Verbosity = 0; 101 102 size_t NumTimeouts = 0; 103 size_t NumOOMs = 0; 104 size_t NumCrashes = 0; 105 106 107 size_t NumRuns = 0; 108 109 std::string StopFile() { return DirPlusFile(TempDir, "STOP"); } 110 111 size_t secondsSinceProcessStartUp() const { 112 return std::chrono::duration_cast<std::chrono::seconds>( 113 std::chrono::system_clock::now() - ProcessStartTime) 114 .count(); 115 } 116 117 FuzzJob *CreateNewJob(size_t JobId) { 118 Command Cmd(Args); 119 Cmd.removeFlag("fork"); 120 Cmd.removeFlag("runs"); 121 Cmd.removeFlag("collect_data_flow"); 122 for (auto &C : CorpusDirs) // Remove all corpora from the args. 123 Cmd.removeArgument(C); 124 Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload. 125 Cmd.addFlag("print_final_stats", "1"); 126 Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing. 127 Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId))); 128 Cmd.addFlag("stop_file", StopFile()); 129 if (!DataFlowBinary.empty()) { 130 Cmd.addFlag("data_flow_trace", DFTDir); 131 if (!Cmd.hasFlag("focus_function")) 132 Cmd.addFlag("focus_function", "auto"); 133 } 134 auto Job = new FuzzJob; 135 std::string Seeds; 136 if (size_t CorpusSubsetSize = 137 std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { 138 auto Time1 = std::chrono::system_clock::now(); 139 for (size_t i = 0; i < CorpusSubsetSize; i++) { 140 auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; 141 Seeds += (Seeds.empty() ? "" : ",") + SF; 142 CollectDFT(SF); 143 } 144 auto Time2 = std::chrono::system_clock::now(); 145 Job->DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count(); 146 } 147 if (!Seeds.empty()) { 148 Job->SeedListPath = 149 DirPlusFile(TempDir, std::to_string(JobId) + ".seeds"); 150 WriteToFile(Seeds, Job->SeedListPath); 151 Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath); 152 } 153 Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log"); 154 Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId)); 155 Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId)); 156 Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge"); 157 Job->JobId = JobId; 158 159 160 Cmd.addArgument(Job->CorpusDir); 161 Cmd.addFlag("features_dir", Job->FeaturesDir); 162 163 for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) { 164 RmDirRecursive(D); 165 MkDir(D); 166 } 167 168 Cmd.setOutputFile(Job->LogPath); 169 Cmd.combineOutAndErr(); 170 171 Job->Cmd = Cmd; 172 173 if (Verbosity >= 2) 174 Printf("Job %zd/%p Created: %s\n", JobId, Job, 175 Job->Cmd.toString().c_str()); 176 // Start from very short runs and gradually increase them. 177 return Job; 178 } 179 180 void RunOneMergeJob(FuzzJob *Job) { 181 auto Stats = ParseFinalStatsFromLog(Job->LogPath); 182 NumRuns += Stats.number_of_executed_units; 183 184 Vector<SizedFile> TempFiles, MergeCandidates; 185 // Read all newly created inputs and their feature sets. 186 // Choose only those inputs that have new features. 187 GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); 188 std::sort(TempFiles.begin(), TempFiles.end()); 189 for (auto &F : TempFiles) { 190 auto FeatureFile = F.File; 191 FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir); 192 auto FeatureBytes = FileToVector(FeatureFile, 0, false); 193 assert((FeatureBytes.size() % sizeof(uint32_t)) == 0); 194 Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t)); 195 memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size()); 196 for (auto Ft : NewFeatures) { 197 if (!Features.count(Ft)) { 198 MergeCandidates.push_back(F); 199 break; 200 } 201 } 202 } 203 // if (!FilesToAdd.empty() || Job->ExitCode != 0) 204 Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd " 205 "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n", 206 NumRuns, Cov.size(), Features.size(), Files.size(), 207 Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes, 208 secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds); 209 210 if (MergeCandidates.empty()) return; 211 212 Vector<std::string> FilesToAdd; 213 Set<uint32_t> NewFeatures, NewCov; 214 CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, 215 &NewFeatures, Cov, &NewCov, Job->CFPath, false); 216 for (auto &Path : FilesToAdd) { 217 auto U = FileToVector(Path); 218 auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); 219 WriteToFile(U, NewPath); 220 Files.push_back(NewPath); 221 } 222 Features.insert(NewFeatures.begin(), NewFeatures.end()); 223 Cov.insert(NewCov.begin(), NewCov.end()); 224 for (auto Idx : NewCov) 225 if (auto *TE = TPC.PCTableEntryByIdx(Idx)) 226 if (TPC.PcIsFuncEntry(TE)) 227 PrintPC(" NEW_FUNC: %p %F %L\n", "", 228 TPC.GetNextInstructionPc(TE->PC)); 229 230 } 231 232 233 void CollectDFT(const std::string &InputPath) { 234 if (DataFlowBinary.empty()) return; 235 if (!FilesWithDFT.insert(InputPath).second) return; 236 Command Cmd(Args); 237 Cmd.removeFlag("fork"); 238 Cmd.removeFlag("runs"); 239 Cmd.addFlag("data_flow_trace", DFTDir); 240 Cmd.addArgument(InputPath); 241 for (auto &C : CorpusDirs) // Remove all corpora from the args. 242 Cmd.removeArgument(C); 243 Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log")); 244 Cmd.combineOutAndErr(); 245 // Printf("CollectDFT: %s\n", Cmd.toString().c_str()); 246 ExecuteCommand(Cmd); 247 } 248 249 }; 250 251 struct JobQueue { 252 std::queue<FuzzJob *> Qu; 253 std::mutex Mu; 254 std::condition_variable Cv; 255 256 void Push(FuzzJob *Job) { 257 { 258 std::lock_guard<std::mutex> Lock(Mu); 259 Qu.push(Job); 260 } 261 Cv.notify_one(); 262 } 263 FuzzJob *Pop() { 264 std::unique_lock<std::mutex> Lk(Mu); 265 // std::lock_guard<std::mutex> Lock(Mu); 266 Cv.wait(Lk, [&]{return !Qu.empty();}); 267 assert(!Qu.empty()); 268 auto Job = Qu.front(); 269 Qu.pop(); 270 return Job; 271 } 272 }; 273 274 void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) { 275 while (auto Job = FuzzQ->Pop()) { 276 // Printf("WorkerThread: job %p\n", Job); 277 Job->ExitCode = ExecuteCommand(Job->Cmd); 278 MergeQ->Push(Job); 279 } 280 } 281 282 // This is just a skeleton of an experimental -fork=1 feature. 283 void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, 284 const Vector<std::string> &Args, 285 const Vector<std::string> &CorpusDirs, int NumJobs) { 286 Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs); 287 288 GlobalEnv Env; 289 Env.Args = Args; 290 Env.CorpusDirs = CorpusDirs; 291 Env.Rand = &Rand; 292 Env.Verbosity = Options.Verbosity; 293 Env.ProcessStartTime = std::chrono::system_clock::now(); 294 Env.DataFlowBinary = Options.CollectDataFlow; 295 296 Vector<SizedFile> SeedFiles; 297 for (auto &Dir : CorpusDirs) 298 GetSizedFilesFromDir(Dir, &SeedFiles); 299 std::sort(SeedFiles.begin(), SeedFiles.end()); 300 Env.TempDir = TempPath("FuzzWithFork", ".dir"); 301 Env.DFTDir = DirPlusFile(Env.TempDir, "DFT"); 302 RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. 303 MkDir(Env.TempDir); 304 MkDir(Env.DFTDir); 305 306 307 if (CorpusDirs.empty()) 308 MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C")); 309 else 310 Env.MainCorpusDir = CorpusDirs[0]; 311 312 if (Options.KeepSeed) { 313 for (auto &File : SeedFiles) 314 Env.Files.push_back(File.File); 315 } else { 316 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); 317 CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, 318 {}, &Env.Cov, CFPath, false); 319 RemoveFile(CFPath); 320 } 321 Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, 322 Env.Files.size(), Env.TempDir.c_str()); 323 324 int ExitCode = 0; 325 326 JobQueue FuzzQ, MergeQ; 327 328 auto StopJobs = [&]() { 329 for (int i = 0; i < NumJobs; i++) 330 FuzzQ.Push(nullptr); 331 MergeQ.Push(nullptr); 332 WriteToFile(Unit({1}), Env.StopFile()); 333 }; 334 335 size_t JobId = 1; 336 Vector<std::thread> Threads; 337 for (int t = 0; t < NumJobs; t++) { 338 Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ)); 339 FuzzQ.Push(Env.CreateNewJob(JobId++)); 340 } 341 342 while (true) { 343 std::unique_ptr<FuzzJob> Job(MergeQ.Pop()); 344 if (!Job) 345 break; 346 ExitCode = Job->ExitCode; 347 if (ExitCode == Options.InterruptExitCode) { 348 Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid()); 349 StopJobs(); 350 break; 351 } 352 Fuzzer::MaybeExitGracefully(); 353 354 Env.RunOneMergeJob(Job.get()); 355 356 // Continue if our crash is one of the ignorred ones. 357 if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) 358 Env.NumTimeouts++; 359 else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode) 360 Env.NumOOMs++; 361 else if (ExitCode != 0) { 362 Env.NumCrashes++; 363 if (Options.IgnoreCrashes) { 364 std::ifstream In(Job->LogPath); 365 std::string Line; 366 while (std::getline(In, Line, '\n')) 367 if (Line.find("ERROR:") != Line.npos || 368 Line.find("runtime error:") != Line.npos) 369 Printf("%s\n", Line.c_str()); 370 } else { 371 // And exit if we don't ignore this crash. 372 Printf("INFO: log from the inner process:\n%s", 373 FileToString(Job->LogPath).c_str()); 374 StopJobs(); 375 break; 376 } 377 } 378 379 // Stop if we are over the time budget. 380 // This is not precise, since other threads are still running 381 // and we will wait while joining them. 382 // We also don't stop instantly: other jobs need to finish. 383 if (Options.MaxTotalTimeSec > 0 && 384 Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) { 385 Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n", 386 Env.secondsSinceProcessStartUp()); 387 StopJobs(); 388 break; 389 } 390 if (Env.NumRuns >= Options.MaxNumberOfRuns) { 391 Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n", 392 Env.NumRuns); 393 StopJobs(); 394 break; 395 } 396 397 FuzzQ.Push(Env.CreateNewJob(JobId++)); 398 } 399 400 for (auto &T : Threads) 401 T.join(); 402 403 // The workers have terminated. Don't try to remove the directory before they 404 // terminate to avoid a race condition preventing cleanup on Windows. 405 RmDirRecursive(Env.TempDir); 406 407 // Use the exit code from the last child process. 408 Printf("INFO: exiting: %d time: %zds\n", ExitCode, 409 Env.secondsSinceProcessStartUp()); 410 exit(ExitCode); 411 } 412 413 } // namespace fuzzer 414