10b57cec5SDimitry Andric //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // Merging Corpora. 90b57cec5SDimitry Andric // 100b57cec5SDimitry Andric // The task: 110b57cec5SDimitry Andric // Take the existing corpus (possibly empty) and merge new inputs into 120b57cec5SDimitry Andric // it so that only inputs with new coverage ('features') are added. 130b57cec5SDimitry Andric // The process should tolerate the crashes, OOMs, leaks, etc. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // Algorithm: 16*5ffd83dbSDimitry Andric // The outer process collects the set of files and writes their names 170b57cec5SDimitry Andric // into a temporary "control" file, then repeatedly launches the inner 180b57cec5SDimitry Andric // process until all inputs are processed. 190b57cec5SDimitry Andric // The outer process does not actually execute the target code. 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric // The inner process reads the control file and sees a) list of all the inputs 220b57cec5SDimitry Andric // and b) the last processed input. Then it starts processing the inputs one 230b57cec5SDimitry Andric // by one. Before processing every input it writes one line to control file: 240b57cec5SDimitry Andric // STARTED INPUT_ID INPUT_SIZE 25*5ffd83dbSDimitry Andric // After processing an input it writes the following lines: 26*5ffd83dbSDimitry Andric // FT INPUT_ID Feature1 Feature2 Feature3 ... 27*5ffd83dbSDimitry Andric // COV INPUT_ID Coverage1 Coverage2 Coverage3 ... 280b57cec5SDimitry Andric // If a crash happens while processing an input the last line in the control 290b57cec5SDimitry Andric // file will be "STARTED INPUT_ID" and so the next process will know 300b57cec5SDimitry Andric // where to resume. 310b57cec5SDimitry Andric // 32*5ffd83dbSDimitry Andric // Once all inputs are processed by the inner process(es) the outer process 330b57cec5SDimitry Andric // reads the control files and does the merge based entirely on the contents 340b57cec5SDimitry Andric // of control file. 350b57cec5SDimitry Andric // It uses a single pass greedy algorithm choosing first the smallest inputs 360b57cec5SDimitry Andric // within the same size the inputs that have more new features. 370b57cec5SDimitry Andric // 380b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric #ifndef LLVM_FUZZER_MERGE_H 410b57cec5SDimitry Andric #define LLVM_FUZZER_MERGE_H 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric #include "FuzzerDefs.h" 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric #include <istream> 460b57cec5SDimitry Andric #include <ostream> 470b57cec5SDimitry Andric #include <set> 480b57cec5SDimitry Andric #include <vector> 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric namespace fuzzer { 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric struct MergeFileInfo { 530b57cec5SDimitry Andric std::string Name; 540b57cec5SDimitry Andric size_t Size = 0; 550b57cec5SDimitry Andric Vector<uint32_t> Features, Cov; 560b57cec5SDimitry Andric }; 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric struct Merger { 590b57cec5SDimitry Andric Vector<MergeFileInfo> Files; 600b57cec5SDimitry Andric size_t NumFilesInFirstCorpus = 0; 610b57cec5SDimitry Andric size_t FirstNotProcessedFile = 0; 620b57cec5SDimitry Andric std::string LastFailure; 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric bool Parse(std::istream &IS, bool ParseCoverage); 650b57cec5SDimitry Andric bool Parse(const std::string &Str, bool ParseCoverage); 660b57cec5SDimitry Andric void ParseOrExit(std::istream &IS, bool ParseCoverage); 670b57cec5SDimitry Andric size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures, 680b57cec5SDimitry Andric const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov, 690b57cec5SDimitry Andric Vector<std::string> *NewFiles); 700b57cec5SDimitry Andric size_t ApproximateMemoryConsumption() const; 710b57cec5SDimitry Andric Set<uint32_t> AllFeatures() const; 720b57cec5SDimitry Andric }; 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric void CrashResistantMerge(const Vector<std::string> &Args, 750b57cec5SDimitry Andric const Vector<SizedFile> &OldCorpus, 760b57cec5SDimitry Andric const Vector<SizedFile> &NewCorpus, 770b57cec5SDimitry Andric Vector<std::string> *NewFiles, 780b57cec5SDimitry Andric const Set<uint32_t> &InitialFeatures, 790b57cec5SDimitry Andric Set<uint32_t> *NewFeatures, 800b57cec5SDimitry Andric const Set<uint32_t> &InitialCov, 810b57cec5SDimitry Andric Set<uint32_t> *NewCov, 820b57cec5SDimitry Andric const std::string &CFPath, 830b57cec5SDimitry Andric bool Verbose); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric } // namespace fuzzer 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric #endif // LLVM_FUZZER_MERGE_H 88