xref: /freebsd/contrib/llvm-project/compiler-rt/lib/fuzzer/FuzzerMerge.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric // Merging Corpora.
9*0b57cec5SDimitry Andric //
10*0b57cec5SDimitry Andric // The task:
11*0b57cec5SDimitry Andric //   Take the existing corpus (possibly empty) and merge new inputs into
12*0b57cec5SDimitry Andric //   it so that only inputs with new coverage ('features') are added.
13*0b57cec5SDimitry Andric //   The process should tolerate the crashes, OOMs, leaks, etc.
14*0b57cec5SDimitry Andric //
15*0b57cec5SDimitry Andric // Algorithm:
16*0b57cec5SDimitry Andric //   The outter process collects the set of files and writes their names
17*0b57cec5SDimitry Andric //   into a temporary "control" file, then repeatedly launches the inner
18*0b57cec5SDimitry Andric //   process until all inputs are processed.
19*0b57cec5SDimitry Andric //   The outer process does not actually execute the target code.
20*0b57cec5SDimitry Andric //
21*0b57cec5SDimitry Andric //   The inner process reads the control file and sees a) list of all the inputs
22*0b57cec5SDimitry Andric //   and b) the last processed input. Then it starts processing the inputs one
23*0b57cec5SDimitry Andric //   by one. Before processing every input it writes one line to control file:
24*0b57cec5SDimitry Andric //   STARTED INPUT_ID INPUT_SIZE
25*0b57cec5SDimitry Andric //   After processing an input it write another line:
26*0b57cec5SDimitry Andric //   DONE INPUT_ID Feature1 Feature2 Feature3 ...
27*0b57cec5SDimitry Andric //   If a crash happens while processing an input the last line in the control
28*0b57cec5SDimitry Andric //   file will be "STARTED INPUT_ID" and so the next process will know
29*0b57cec5SDimitry Andric //   where to resume.
30*0b57cec5SDimitry Andric //
31*0b57cec5SDimitry Andric //   Once all inputs are processed by the innner process(es) the outer process
32*0b57cec5SDimitry Andric //   reads the control files and does the merge based entirely on the contents
33*0b57cec5SDimitry Andric //   of control file.
34*0b57cec5SDimitry Andric //   It uses a single pass greedy algorithm choosing first the smallest inputs
35*0b57cec5SDimitry Andric //   within the same size the inputs that have more new features.
36*0b57cec5SDimitry Andric //
37*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
38*0b57cec5SDimitry Andric 
39*0b57cec5SDimitry Andric #ifndef LLVM_FUZZER_MERGE_H
40*0b57cec5SDimitry Andric #define LLVM_FUZZER_MERGE_H
41*0b57cec5SDimitry Andric 
42*0b57cec5SDimitry Andric #include "FuzzerDefs.h"
43*0b57cec5SDimitry Andric 
44*0b57cec5SDimitry Andric #include <istream>
45*0b57cec5SDimitry Andric #include <ostream>
46*0b57cec5SDimitry Andric #include <set>
47*0b57cec5SDimitry Andric #include <vector>
48*0b57cec5SDimitry Andric 
49*0b57cec5SDimitry Andric namespace fuzzer {
50*0b57cec5SDimitry Andric 
51*0b57cec5SDimitry Andric struct MergeFileInfo {
52*0b57cec5SDimitry Andric   std::string Name;
53*0b57cec5SDimitry Andric   size_t Size = 0;
54*0b57cec5SDimitry Andric   Vector<uint32_t> Features, Cov;
55*0b57cec5SDimitry Andric };
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric struct Merger {
58*0b57cec5SDimitry Andric   Vector<MergeFileInfo> Files;
59*0b57cec5SDimitry Andric   size_t NumFilesInFirstCorpus = 0;
60*0b57cec5SDimitry Andric   size_t FirstNotProcessedFile = 0;
61*0b57cec5SDimitry Andric   std::string LastFailure;
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric   bool Parse(std::istream &IS, bool ParseCoverage);
64*0b57cec5SDimitry Andric   bool Parse(const std::string &Str, bool ParseCoverage);
65*0b57cec5SDimitry Andric   void ParseOrExit(std::istream &IS, bool ParseCoverage);
66*0b57cec5SDimitry Andric   size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
67*0b57cec5SDimitry Andric                const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
68*0b57cec5SDimitry Andric                Vector<std::string> *NewFiles);
69*0b57cec5SDimitry Andric   size_t ApproximateMemoryConsumption() const;
70*0b57cec5SDimitry Andric   Set<uint32_t> AllFeatures() const;
71*0b57cec5SDimitry Andric };
72*0b57cec5SDimitry Andric 
73*0b57cec5SDimitry Andric void CrashResistantMerge(const Vector<std::string> &Args,
74*0b57cec5SDimitry Andric                          const Vector<SizedFile> &OldCorpus,
75*0b57cec5SDimitry Andric                          const Vector<SizedFile> &NewCorpus,
76*0b57cec5SDimitry Andric                          Vector<std::string> *NewFiles,
77*0b57cec5SDimitry Andric                          const Set<uint32_t> &InitialFeatures,
78*0b57cec5SDimitry Andric                          Set<uint32_t> *NewFeatures,
79*0b57cec5SDimitry Andric                          const Set<uint32_t> &InitialCov,
80*0b57cec5SDimitry Andric                          Set<uint32_t> *NewCov,
81*0b57cec5SDimitry Andric                          const std::string &CFPath,
82*0b57cec5SDimitry Andric                          bool Verbose);
83*0b57cec5SDimitry Andric 
84*0b57cec5SDimitry Andric }  // namespace fuzzer
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric #endif  // LLVM_FUZZER_MERGE_H
87