xref: /freebsd/contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp (revision 0c428864495af9dc7d2af4d0a5ae21732af9c739)
1 //===-- IRMutator.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/FuzzMutate/IRMutator.h"
10 #include "llvm/ADT/Optional.h"
11 #include "llvm/Analysis/TargetLibraryInfo.h"
12 #include "llvm/Bitcode/BitcodeReader.h"
13 #include "llvm/Bitcode/BitcodeWriter.h"
14 #include "llvm/FuzzMutate/Operations.h"
15 #include "llvm/FuzzMutate/Random.h"
16 #include "llvm/FuzzMutate/RandomIRBuilder.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/IR/Verifier.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/SourceMgr.h"
25 #include "llvm/Transforms/Scalar/DCE.h"
26 
27 using namespace llvm;
28 
29 static void createEmptyFunction(Module &M) {
30   // TODO: Some arguments and a return value would probably be more interesting.
31   LLVMContext &Context = M.getContext();
32   Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Context), {},
33                                                    /*isVarArg=*/false),
34                                  GlobalValue::ExternalLinkage, "f", &M);
35   BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
36   ReturnInst::Create(Context, BB);
37 }
38 
39 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
40   auto RS = makeSampler<Function *>(IB.Rand);
41   for (Function &F : M)
42     if (!F.isDeclaration())
43       RS.sample(&F, /*Weight=*/1);
44 
45   if (RS.isEmpty())
46     createEmptyFunction(M);
47   else
48     mutate(*RS.getSelection(), IB);
49 }
50 
51 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
52   mutate(*makeSampler(IB.Rand, make_pointer_range(F)).getSelection(), IB);
53 }
54 
55 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
56   mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
57 }
58 
59 void IRMutator::mutateModule(Module &M, int Seed, size_t CurSize,
60                              size_t MaxSize) {
61   std::vector<Type *> Types;
62   for (const auto &Getter : AllowedTypes)
63     Types.push_back(Getter(M.getContext()));
64   RandomIRBuilder IB(Seed, Types);
65 
66   auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
67   for (const auto &Strategy : Strategies)
68     RS.sample(Strategy.get(),
69               Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
70   auto Strategy = RS.getSelection();
71 
72   Strategy->mutate(M, IB);
73 }
74 
75 static void eliminateDeadCode(Function &F) {
76   FunctionPassManager FPM;
77   FPM.addPass(DCEPass());
78   FunctionAnalysisManager FAM;
79   FAM.registerPass([&] { return TargetLibraryAnalysis(); });
80   FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
81   FPM.run(F, FAM);
82 }
83 
84 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
85   IRMutationStrategy::mutate(F, IB);
86   eliminateDeadCode(F);
87 }
88 
89 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
90   std::vector<fuzzerop::OpDescriptor> Ops;
91   describeFuzzerIntOps(Ops);
92   describeFuzzerFloatOps(Ops);
93   describeFuzzerControlFlowOps(Ops);
94   describeFuzzerPointerOps(Ops);
95   describeFuzzerAggregateOps(Ops);
96   describeFuzzerVectorOps(Ops);
97   return Ops;
98 }
99 
100 Optional<fuzzerop::OpDescriptor>
101 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
102   auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
103     return Op.SourcePreds[0].matches({}, Src);
104   };
105   auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
106   if (RS.isEmpty())
107     return None;
108   return *RS;
109 }
110 
111 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
112   SmallVector<Instruction *, 32> Insts;
113   for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
114     Insts.push_back(&*I);
115   if (Insts.size() < 1)
116     return;
117 
118   // Choose an insertion point for our new instruction.
119   size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
120 
121   auto InstsBefore = makeArrayRef(Insts).slice(0, IP);
122   auto InstsAfter = makeArrayRef(Insts).slice(IP);
123 
124   // Choose a source, which will be used to constrain the operation selection.
125   SmallVector<Value *, 2> Srcs;
126   Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
127 
128   // Choose an operation that's constrained to be valid for the type of the
129   // source, collect any other sources it needs, and then build it.
130   auto OpDesc = chooseOperation(Srcs[0], IB);
131   // Bail if no operation was found
132   if (!OpDesc)
133     return;
134 
135   for (const auto &Pred : makeArrayRef(OpDesc->SourcePreds).slice(1))
136     Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
137 
138   if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
139     // Find a sink and wire up the results of the operation.
140     IB.connectToSink(BB, InstsAfter, Op);
141   }
142 }
143 
144 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
145                                           uint64_t CurrentWeight) {
146   // If we have less than 200 bytes, panic and try to always delete.
147   if (CurrentSize > MaxSize - 200)
148     return CurrentWeight ? CurrentWeight * 100 : 1;
149   // Draw a line starting from when we only have 1k left and increasing linearly
150   // to double the current weight.
151   int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
152                  (static_cast<int64_t>(MaxSize) -
153                   static_cast<int64_t>(CurrentSize) - 1000) /
154                  1000;
155   // Clamp negative weights to zero.
156   if (Line < 0)
157     return 0;
158   return Line;
159 }
160 
161 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
162   auto RS = makeSampler<Instruction *>(IB.Rand);
163   for (Instruction &Inst : instructions(F)) {
164     // TODO: We can't handle these instructions.
165     if (Inst.isTerminator() || Inst.isEHPad() ||
166         Inst.isSwiftError() || isa<PHINode>(Inst))
167       continue;
168 
169     RS.sample(&Inst, /*Weight=*/1);
170   }
171   if (RS.isEmpty())
172     return;
173 
174   // Delete the instruction.
175   mutate(*RS.getSelection(), IB);
176   // Clean up any dead code that's left over after removing the instruction.
177   eliminateDeadCode(F);
178 }
179 
180 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
181   assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
182 
183   if (Inst.getType()->isVoidTy()) {
184     // Instructions with void type (ie, store) have no uses to worry about. Just
185     // erase it and move on.
186     Inst.eraseFromParent();
187     return;
188   }
189 
190   // Otherwise we need to find some other value with the right type to keep the
191   // users happy.
192   auto Pred = fuzzerop::onlyType(Inst.getType());
193   auto RS = makeSampler<Value *>(IB.Rand);
194   SmallVector<Instruction *, 32> InstsBefore;
195   BasicBlock *BB = Inst.getParent();
196   for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
197        ++I) {
198     if (Pred.matches({}, &*I))
199       RS.sample(&*I, /*Weight=*/1);
200     InstsBefore.push_back(&*I);
201   }
202   if (!RS)
203     RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
204 
205   Inst.replaceAllUsesWith(RS.getSelection());
206   Inst.eraseFromParent();
207 }
208 
209 void InstModificationIRStrategy::mutate(Instruction &Inst,
210                                         RandomIRBuilder &IB) {
211   SmallVector<std::function<void()>, 8> Modifications;
212   CmpInst *CI = nullptr;
213   GetElementPtrInst *GEP = nullptr;
214   switch (Inst.getOpcode()) {
215   default:
216     break;
217   case Instruction::Add:
218   case Instruction::Mul:
219   case Instruction::Sub:
220   case Instruction::Shl:
221     Modifications.push_back([&Inst]() { Inst.setHasNoSignedWrap(true); }),
222         Modifications.push_back([&Inst]() { Inst.setHasNoSignedWrap(false); });
223     Modifications.push_back([&Inst]() { Inst.setHasNoUnsignedWrap(true); });
224     Modifications.push_back([&Inst]() { Inst.setHasNoUnsignedWrap(false); });
225 
226     break;
227   case Instruction::ICmp:
228     CI = cast<ICmpInst>(&Inst);
229     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_EQ); });
230     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_NE); });
231     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_UGT); });
232     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_UGE); });
233     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_ULT); });
234     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_ULE); });
235     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SGT); });
236     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SGE); });
237     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SLT); });
238     Modifications.push_back([CI]() { CI->setPredicate(CmpInst::ICMP_SLE); });
239     break;
240   case Instruction::GetElementPtr:
241     GEP = cast<GetElementPtrInst>(&Inst);
242     Modifications.push_back([GEP]() { GEP->setIsInBounds(true); });
243     Modifications.push_back([GEP]() { GEP->setIsInBounds(false); });
244     break;
245   }
246 
247   auto RS = makeSampler(IB.Rand, Modifications);
248   if (RS)
249     RS.getSelection()();
250 }
251 
252 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
253                                           LLVMContext &Context) {
254 
255   if (Size <= 1)
256     // We get bogus data given an empty corpus - just create a new module.
257     return std::make_unique<Module>("M", Context);
258 
259   auto Buffer = MemoryBuffer::getMemBuffer(
260       StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
261       /*RequiresNullTerminator=*/false);
262 
263   SMDiagnostic Err;
264   auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
265   if (Error E = M.takeError()) {
266     errs() << toString(std::move(E)) << "\n";
267     return nullptr;
268   }
269   return std::move(M.get());
270 }
271 
272 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
273   std::string Buf;
274   {
275     raw_string_ostream OS(Buf);
276     WriteBitcodeToFile(M, OS);
277   }
278   if (Buf.size() > MaxSize)
279     return 0;
280   memcpy(Dest, Buf.data(), Buf.size());
281   return Buf.size();
282 }
283 
284 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
285                                              LLVMContext &Context) {
286   auto M = parseModule(Data, Size, Context);
287   if (!M || verifyModule(*M, &errs()))
288     return nullptr;
289 
290   return M;
291 }
292