xref: /freebsd/contrib/llvm-project/llvm/lib/FuzzMutate/IRMutator.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- IRMutator.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/FuzzMutate/IRMutator.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/Analysis/TargetLibraryInfo.h"
13 #include "llvm/Bitcode/BitcodeReader.h"
14 #include "llvm/Bitcode/BitcodeWriter.h"
15 #include "llvm/FuzzMutate/Operations.h"
16 #include "llvm/FuzzMutate/Random.h"
17 #include "llvm/FuzzMutate/RandomIRBuilder.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/FMF.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/PassInstrumentation.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Transforms/Scalar/DCE.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include <map>
32 #include <optional>
33 
34 using namespace llvm;
35 
mutate(Module & M,RandomIRBuilder & IB)36 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
37   auto RS = makeSampler<Function *>(IB.Rand);
38   for (Function &F : M)
39     if (!F.isDeclaration())
40       RS.sample(&F, /*Weight=*/1);
41 
42   while (RS.totalWeight() < IB.MinFunctionNum) {
43     Function *F = IB.createFunctionDefinition(M);
44     RS.sample(F, /*Weight=*/1);
45   }
46   mutate(*RS.getSelection(), IB);
47 }
48 
mutate(Function & F,RandomIRBuilder & IB)49 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
50   auto Range = make_filter_range(make_pointer_range(F),
51                                  [](BasicBlock *BB) { return !BB->isEHPad(); });
52 
53   mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
54 }
55 
mutate(BasicBlock & BB,RandomIRBuilder & IB)56 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
57   mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
58 }
59 
getModuleSize(const Module & M)60 size_t llvm::IRMutator::getModuleSize(const Module &M) {
61   return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
62 }
63 
mutateModule(Module & M,int Seed,size_t MaxSize)64 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
65   std::vector<Type *> Types;
66   for (const auto &Getter : AllowedTypes)
67     Types.push_back(Getter(M.getContext()));
68   RandomIRBuilder IB(Seed, Types);
69 
70   size_t CurSize = IRMutator::getModuleSize(M);
71   auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
72   for (const auto &Strategy : Strategies)
73     RS.sample(Strategy.get(),
74               Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
75   if (RS.totalWeight() == 0)
76     return;
77   auto Strategy = RS.getSelection();
78 
79   Strategy->mutate(M, IB);
80 }
81 
eliminateDeadCode(Function & F)82 static void eliminateDeadCode(Function &F) {
83   FunctionPassManager FPM;
84   FPM.addPass(DCEPass());
85   FunctionAnalysisManager FAM;
86   FAM.registerPass([&] { return TargetLibraryAnalysis(); });
87   FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
88   FPM.run(F, FAM);
89 }
90 
mutate(Function & F,RandomIRBuilder & IB)91 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
92   IRMutationStrategy::mutate(F, IB);
93   eliminateDeadCode(F);
94 }
95 
getDefaultOps()96 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
97   std::vector<fuzzerop::OpDescriptor> Ops;
98   describeFuzzerIntOps(Ops);
99   describeFuzzerFloatOps(Ops);
100   describeFuzzerControlFlowOps(Ops);
101   describeFuzzerPointerOps(Ops);
102   describeFuzzerAggregateOps(Ops);
103   describeFuzzerVectorOps(Ops);
104   return Ops;
105 }
106 
107 std::optional<fuzzerop::OpDescriptor>
chooseOperation(Value * Src,RandomIRBuilder & IB)108 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
109   auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
110     return Op.SourcePreds[0].matches({}, Src);
111   };
112   auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
113   if (RS.isEmpty())
114     return std::nullopt;
115   return *RS;
116 }
117 
118 static inline iterator_range<BasicBlock::iterator>
getInsertionRange(BasicBlock & BB)119 getInsertionRange(BasicBlock &BB) {
120   auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end();
121   return make_range(BB.getFirstInsertionPt(), End);
122 }
123 
mutate(BasicBlock & BB,RandomIRBuilder & IB)124 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
125   SmallVector<Instruction *, 32> Insts;
126   for (Instruction &I : getInsertionRange(BB))
127     Insts.push_back(&I);
128   if (Insts.size() < 1)
129     return;
130 
131   // Choose an insertion point for our new instruction.
132   size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
133 
134   auto InstsBefore = ArrayRef(Insts).slice(0, IP);
135   auto InstsAfter = ArrayRef(Insts).slice(IP);
136 
137   // Choose a source, which will be used to constrain the operation selection.
138   SmallVector<Value *, 2> Srcs;
139   Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
140 
141   // Choose an operation that's constrained to be valid for the type of the
142   // source, collect any other sources it needs, and then build it.
143   auto OpDesc = chooseOperation(Srcs[0], IB);
144   // Bail if no operation was found
145   if (!OpDesc)
146     return;
147 
148   for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
149     Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
150 
151   if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
152     // Find a sink and wire up the results of the operation.
153     IB.connectToSink(BB, InstsAfter, Op);
154   }
155 }
156 
getWeight(size_t CurrentSize,size_t MaxSize,uint64_t CurrentWeight)157 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
158                                           uint64_t CurrentWeight) {
159   // If we have less than 200 bytes, panic and try to always delete.
160   if (CurrentSize > MaxSize - 200)
161     return CurrentWeight ? CurrentWeight * 100 : 1;
162   // Draw a line starting from when we only have 1k left and increasing linearly
163   // to double the current weight.
164   int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
165                  (static_cast<int64_t>(MaxSize) -
166                   static_cast<int64_t>(CurrentSize) - 1000) /
167                  1000;
168   // Clamp negative weights to zero.
169   if (Line < 0)
170     return 0;
171   return Line;
172 }
173 
mutate(Function & F,RandomIRBuilder & IB)174 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
175   auto RS = makeSampler<Instruction *>(IB.Rand);
176   for (Instruction &Inst : instructions(F)) {
177     // TODO: We can't handle these instructions.
178     if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
179         isa<PHINode>(Inst))
180       continue;
181 
182     RS.sample(&Inst, /*Weight=*/1);
183   }
184   if (RS.isEmpty())
185     return;
186 
187   // Delete the instruction.
188   mutate(*RS.getSelection(), IB);
189   // Clean up any dead code that's left over after removing the instruction.
190   eliminateDeadCode(F);
191 }
192 
mutate(Instruction & Inst,RandomIRBuilder & IB)193 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
194   assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
195 
196   if (Inst.getType()->isVoidTy()) {
197     // Instructions with void type (ie, store) have no uses to worry about. Just
198     // erase it and move on.
199     Inst.eraseFromParent();
200     return;
201   }
202 
203   // Otherwise we need to find some other value with the right type to keep the
204   // users happy.
205   auto Pred = fuzzerop::onlyType(Inst.getType());
206   auto RS = makeSampler<Value *>(IB.Rand);
207   SmallVector<Instruction *, 32> InstsBefore;
208   BasicBlock *BB = Inst.getParent();
209   for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
210        ++I) {
211     if (Pred.matches({}, &*I))
212       RS.sample(&*I, /*Weight=*/1);
213     InstsBefore.push_back(&*I);
214   }
215   if (!RS)
216     RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
217 
218   Inst.replaceAllUsesWith(RS.getSelection());
219   Inst.eraseFromParent();
220 }
221 
mutate(Instruction & Inst,RandomIRBuilder & IB)222 void InstModificationIRStrategy::mutate(Instruction &Inst,
223                                         RandomIRBuilder &IB) {
224   SmallVector<std::function<void()>, 8> Modifications;
225   CmpInst *CI = nullptr;
226   GetElementPtrInst *GEP = nullptr;
227   switch (Inst.getOpcode()) {
228   default:
229     break;
230   // Add nsw, nuw flag
231   case Instruction::Add:
232   case Instruction::Mul:
233   case Instruction::Sub:
234   case Instruction::Shl:
235     Modifications.push_back(
236         [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
237     Modifications.push_back(
238         [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
239     break;
240   case Instruction::ICmp:
241     CI = cast<ICmpInst>(&Inst);
242     for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
243          p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
244       Modifications.push_back(
245           [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
246     }
247     break;
248   // Add inbound flag.
249   case Instruction::GetElementPtr:
250     GEP = cast<GetElementPtrInst>(&Inst);
251     Modifications.push_back(
252         [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
253     break;
254   // Add exact flag.
255   case Instruction::UDiv:
256   case Instruction::SDiv:
257   case Instruction::LShr:
258   case Instruction::AShr:
259     Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
260     break;
261 
262   case Instruction::FCmp:
263     CI = cast<FCmpInst>(&Inst);
264     for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
265          p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
266       Modifications.push_back(
267           [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
268     }
269     break;
270   }
271 
272   // Add fast math flag if possible.
273   if (isa<FPMathOperator>(&Inst)) {
274     // Try setting everything unless they are already on.
275     Modifications.push_back(
276         [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
277     // Try unsetting everything unless they are already off.
278     Modifications.push_back(
279         [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
280     // Individual setting by flipping the bit
281     Modifications.push_back(
282         [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
283     Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
284     Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
285     Modifications.push_back(
286         [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
287     Modifications.push_back(
288         [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
289     Modifications.push_back(
290         [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
291     Modifications.push_back(
292         [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
293   }
294 
295   // Randomly switch operands of instructions
296   std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
297   switch (Inst.getOpcode()) {
298   case Instruction::SDiv:
299   case Instruction::UDiv:
300   case Instruction::SRem:
301   case Instruction::URem:
302   case Instruction::FDiv:
303   case Instruction::FRem: {
304     // Verify that the after shuffle the second operand is not
305     // constant 0.
306     Value *Operand = Inst.getOperand(0);
307     if (Constant *C = dyn_cast<Constant>(Operand)) {
308       if (!C->isZeroValue()) {
309         ShuffleItems = {0, 1};
310       }
311     }
312     break;
313   }
314   case Instruction::Select:
315     ShuffleItems = {1, 2};
316     break;
317   case Instruction::Add:
318   case Instruction::Sub:
319   case Instruction::Mul:
320   case Instruction::Shl:
321   case Instruction::LShr:
322   case Instruction::AShr:
323   case Instruction::And:
324   case Instruction::Or:
325   case Instruction::Xor:
326   case Instruction::FAdd:
327   case Instruction::FSub:
328   case Instruction::FMul:
329   case Instruction::ICmp:
330   case Instruction::FCmp:
331   case Instruction::ShuffleVector:
332     ShuffleItems = {0, 1};
333     break;
334   }
335   if (ShuffleItems != NoneItem) {
336     Modifications.push_back([&Inst, &ShuffleItems]() {
337       Value *Op0 = Inst.getOperand(ShuffleItems.first);
338       Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
339       Inst.setOperand(ShuffleItems.second, Op0);
340     });
341   }
342 
343   auto RS = makeSampler(IB.Rand, Modifications);
344   if (RS)
345     RS.getSelection()();
346 }
347 
348 /// Return a case value that is not already taken to make sure we don't have two
349 /// cases with same value.
getUniqueCaseValue(SmallSet<uint64_t,4> & CasesTaken,uint64_t MaxValue,RandomIRBuilder & IB)350 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
351                                    uint64_t MaxValue, RandomIRBuilder &IB) {
352   uint64_t tmp;
353   do {
354     tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
355   } while (CasesTaken.count(tmp) != 0);
356   CasesTaken.insert(tmp);
357   return tmp;
358 }
359 
mutate(BasicBlock & BB,RandomIRBuilder & IB)360 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
361   Module *M = BB.getParent()->getParent();
362   // If nullptr is selected, we will create a new function declaration.
363   SmallVector<Function *, 32> Functions({nullptr});
364   for (Function &F : M->functions()) {
365     Functions.push_back(&F);
366   }
367 
368   auto RS = makeSampler(IB.Rand, Functions);
369   Function *F = RS.getSelection();
370   // Some functions accept metadata type or token type as arguments.
371   // We don't call those functions for now.
372   // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
373   // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
374   auto IsUnsupportedTy = [](Type *T) {
375     return T->isMetadataTy() || T->isTokenTy();
376   };
377   if (!F || IsUnsupportedTy(F->getReturnType()) ||
378       any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
379     F = IB.createFunctionDeclaration(*M);
380   }
381 
382   FunctionType *FTy = F->getFunctionType();
383   SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
384   if (!F->arg_empty()) {
385     for (Type *ArgTy : FTy->params()) {
386       SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
387     }
388   }
389   bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
390   auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
391                                          Instruction *Inst) {
392     StringRef Name = isRetVoid ? nullptr : "C";
393     CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst);
394     // Don't return this call inst if it return void as it can't be sinked.
395     return isRetVoid ? nullptr : Call;
396   };
397 
398   SmallVector<Instruction *, 32> Insts;
399   for (Instruction &I : getInsertionRange(BB))
400     Insts.push_back(&I);
401   if (Insts.size() < 1)
402     return;
403 
404   // Choose an insertion point for our new call instruction.
405   uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
406 
407   auto InstsBefore = ArrayRef(Insts).slice(0, IP);
408   auto InstsAfter = ArrayRef(Insts).slice(IP);
409 
410   // Choose a source, which will be used to constrain the operation selection.
411   SmallVector<Value *, 2> Srcs;
412 
413   for (const auto &Pred : ArrayRef(SourcePreds)) {
414     Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
415   }
416 
417   if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
418     // Find a sink and wire up the results of the operation.
419     IB.connectToSink(BB, InstsAfter, Op);
420   }
421 }
422 
mutate(BasicBlock & BB,RandomIRBuilder & IB)423 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
424   SmallVector<Instruction *, 32> Insts;
425   for (Instruction &I : getInsertionRange(BB))
426     Insts.push_back(&I);
427   if (Insts.size() < 1)
428     return;
429 
430   // Choose a point where we split the block.
431   uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
432   auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
433 
434   // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
435   // directly jumps to `Sink`. Here, we have to create a new terminator for
436   // `Source`.
437   BasicBlock *Block = Insts[IP]->getParent();
438   BasicBlock *Source = Block;
439   BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
440 
441   Function *F = BB.getParent();
442   LLVMContext &C = F->getParent()->getContext();
443   // A coin decides if it is branch or switch
444   if (uniform<uint64_t>(IB.Rand, 0, 1)) {
445     // Branch
446     BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
447     BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
448     Value *Cond =
449         IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
450                               fuzzerop::onlyType(Type::getInt1Ty(C)), false);
451     BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
452     // Remove the old terminator.
453     ReplaceInstWithInst(Source->getTerminator(), Branch);
454     // Connect these blocks to `Sink`
455     connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
456   } else {
457     // Switch
458     // Determine Integer type, it IS possible we use a boolean to switch.
459     auto RS =
460         makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
461                       return Ty->isIntegerTy();
462                     }));
463     assert(RS && "There is no integer type in all allowed types, is the "
464                  "setting correct?");
465     Type *Ty = RS.getSelection();
466     IntegerType *IntTy = cast<IntegerType>(Ty);
467 
468     uint64_t BitSize = IntTy->getBitWidth();
469     uint64_t MaxCaseVal =
470         (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
471     // Create Switch inst in Block
472     Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
473                                         fuzzerop::onlyType(IntTy), false);
474     BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
475     uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
476     NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
477     SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
478     // Remove the old terminator.
479     ReplaceInstWithInst(Source->getTerminator(), Switch);
480 
481     // Create blocks, for each block assign a case value.
482     SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
483     SmallSet<uint64_t, 4> CasesTaken;
484     for (uint64_t i = 0; i < NumCases; i++) {
485       uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
486       BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
487       ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
488       Switch->addCase(OnValue, CaseBlock);
489       Blocks.push_back(CaseBlock);
490     }
491 
492     // Connect these blocks to `Sink`
493     connectBlocksToSink(Blocks, Sink, IB);
494   }
495 }
496 
497 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
498 /// even have terminator.
connectBlocksToSink(ArrayRef<BasicBlock * > Blocks,BasicBlock * Sink,RandomIRBuilder & IB)499 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
500                                             BasicBlock *Sink,
501                                             RandomIRBuilder &IB) {
502   uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
503   for (uint64_t i = 0; i < Blocks.size(); i++) {
504     // We have at least one block that directly goes to sink.
505     CFGToSink ToSink = (i == DirectSinkIdx)
506                            ? CFGToSink::DirectSink
507                            : static_cast<CFGToSink>(uniform<uint64_t>(
508                                  IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
509     BasicBlock *BB = Blocks[i];
510     Function *F = BB->getParent();
511     LLVMContext &C = F->getParent()->getContext();
512     switch (ToSink) {
513     case CFGToSink::Return: {
514       Type *RetTy = F->getReturnType();
515       Value *RetValue = nullptr;
516       if (!RetTy->isVoidTy())
517         RetValue =
518             IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
519       ReturnInst::Create(C, RetValue, BB);
520       break;
521     }
522     case CFGToSink::DirectSink: {
523       BranchInst::Create(Sink, BB);
524       break;
525     }
526     case CFGToSink::SinkOrSelfLoop: {
527       SmallVector<BasicBlock *, 2> Branches({Sink, BB});
528       // A coin decides which block is true branch.
529       uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
530       Value *Cond = IB.findOrCreateSource(
531           *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
532       BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
533       break;
534     }
535     case CFGToSink::EndOfCFGToLink:
536       llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
537     }
538   }
539 }
540 
mutate(BasicBlock & BB,RandomIRBuilder & IB)541 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
542   // Can't insert PHI node to entry node.
543   if (&BB == &BB.getParent()->getEntryBlock())
544     return;
545   Type *Ty = IB.randomType();
546   PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front());
547 
548   // Use a map to make sure the same incoming basic block has the same value.
549   DenseMap<BasicBlock *, Value *> IncomingValues;
550   for (BasicBlock *Pred : predecessors(&BB)) {
551     Value *Src = IncomingValues[Pred];
552     // If `Pred` is not in the map yet, we'll get a nullptr.
553     if (!Src) {
554       SmallVector<Instruction *, 32> Insts;
555       for (auto I = Pred->begin(); I != Pred->end(); ++I)
556         Insts.push_back(&*I);
557       // There is no need to inform IB what previously used values are if we are
558       // using `onlyType`
559       Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
560       IncomingValues[Pred] = Src;
561     }
562     PHI->addIncoming(Src, Pred);
563   }
564   SmallVector<Instruction *, 32> InstsAfter;
565   for (Instruction &I : getInsertionRange(BB))
566     InstsAfter.push_back(&I);
567   IB.connectToSink(BB, InstsAfter, PHI);
568 }
569 
mutate(Function & F,RandomIRBuilder & IB)570 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
571   for (BasicBlock &BB : F) {
572     this->mutate(BB, IB);
573   }
574 }
mutate(BasicBlock & BB,RandomIRBuilder & IB)575 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
576   SmallVector<Instruction *, 32> Insts;
577   for (Instruction &I : getInsertionRange(BB))
578     Insts.push_back(&I);
579   if (Insts.size() < 1)
580     return;
581   // Choose an Instruction to mutate.
582   uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
583   Instruction *Inst = Insts[Idx];
584   // `Idx + 1` so we don't sink to ourselves.
585   auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
586   Type *Ty = Inst->getType();
587   // Don't sink terminators, void function calls, token, etc.
588   if (!Ty->isVoidTy() && !Ty->isTokenTy())
589     // Find a new sink and wire up the results of the operation.
590     IB.connectToSink(BB, InstsAfter, Inst);
591 }
592 
mutate(BasicBlock & BB,RandomIRBuilder & IB)593 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
594   // A deterministic alternative to SmallPtrSet with the same lookup
595   // performance.
596   std::map<size_t, Instruction *> AliveInsts;
597   std::map<Instruction *, size_t> AliveInstsLookup;
598   size_t InsertIdx = 0;
599   for (auto &I : make_early_inc_range(make_range(
600            BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) {
601     // First gather all instructions that can be shuffled. Don't take
602     // terminator.
603     AliveInsts.insert({InsertIdx, &I});
604     AliveInstsLookup.insert({&I, InsertIdx++});
605     // Then remove these instructions from the block
606     I.removeFromParent();
607   }
608 
609   // Shuffle these instructions using topological sort.
610   // Returns false if all current instruction's dependencies in this block have
611   // been shuffled. If so, this instruction can be shuffled too.
612   auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
613     for (Value *O : AliveInsts[Index]->operands()) {
614       Instruction *P = dyn_cast<Instruction>(O);
615       if (P && AliveInstsLookup.count(P))
616         return true;
617     }
618     return false;
619   };
620   // Get all alive instructions that depend on the current instruction.
621   // Takes Instruction* instead of index because the instruction is already
622   // shuffled.
623   auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
624     SmallSetVector<size_t, 8> Children;
625     for (Value *U : I->users()) {
626       Instruction *P = dyn_cast<Instruction>(U);
627       if (P && AliveInstsLookup.count(P))
628         Children.insert(AliveInstsLookup[P]);
629     }
630     return Children;
631   };
632   SmallSet<size_t, 8> RootIndices;
633   SmallVector<Instruction *, 8> Insts;
634   for (const auto &[Index, Inst] : AliveInsts) {
635     if (!hasAliveParent(Index))
636       RootIndices.insert(Index);
637   }
638   // Topological sort by randomly selecting a node without a parent, or root.
639   while (!RootIndices.empty()) {
640     auto RS = makeSampler<size_t>(IB.Rand);
641     for (size_t RootIdx : RootIndices)
642       RS.sample(RootIdx, 1);
643     size_t RootIdx = RS.getSelection();
644 
645     RootIndices.erase(RootIdx);
646     Instruction *Root = AliveInsts[RootIdx];
647     AliveInsts.erase(RootIdx);
648     AliveInstsLookup.erase(Root);
649     Insts.push_back(Root);
650 
651     for (size_t Child : getAliveChildren(Root)) {
652       if (!hasAliveParent(Child)) {
653         RootIndices.insert(Child);
654       }
655     }
656   }
657 
658   Instruction *Terminator = BB.getTerminator();
659   // Then put instructions back.
660   for (Instruction *I : Insts) {
661     I->insertBefore(Terminator);
662   }
663 }
664 
parseModule(const uint8_t * Data,size_t Size,LLVMContext & Context)665 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
666                                           LLVMContext &Context) {
667 
668   if (Size <= 1)
669     // We get bogus data given an empty corpus - just create a new module.
670     return std::make_unique<Module>("M", Context);
671 
672   auto Buffer = MemoryBuffer::getMemBuffer(
673       StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
674       /*RequiresNullTerminator=*/false);
675 
676   SMDiagnostic Err;
677   auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
678   if (Error E = M.takeError()) {
679     errs() << toString(std::move(E)) << "\n";
680     return nullptr;
681   }
682   return std::move(M.get());
683 }
684 
writeModule(const Module & M,uint8_t * Dest,size_t MaxSize)685 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
686   std::string Buf;
687   {
688     raw_string_ostream OS(Buf);
689     WriteBitcodeToFile(M, OS);
690   }
691   if (Buf.size() > MaxSize)
692     return 0;
693   memcpy(Dest, Buf.data(), Buf.size());
694   return Buf.size();
695 }
696 
parseAndVerify(const uint8_t * Data,size_t Size,LLVMContext & Context)697 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
698                                              LLVMContext &Context) {
699   auto M = parseModule(Data, Size, Context);
700   if (!M || verifyModule(*M, &errs()))
701     return nullptr;
702 
703   return M;
704 }
705