1 //===-- IRMutator.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/FuzzMutate/IRMutator.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/Analysis/TargetLibraryInfo.h"
13 #include "llvm/Bitcode/BitcodeReader.h"
14 #include "llvm/Bitcode/BitcodeWriter.h"
15 #include "llvm/FuzzMutate/Operations.h"
16 #include "llvm/FuzzMutate/Random.h"
17 #include "llvm/FuzzMutate/RandomIRBuilder.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/FMF.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/PassInstrumentation.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Transforms/Scalar/DCE.h"
30 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
31 #include <map>
32 #include <optional>
33
34 using namespace llvm;
35
mutate(Module & M,RandomIRBuilder & IB)36 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
37 auto RS = makeSampler<Function *>(IB.Rand);
38 for (Function &F : M)
39 if (!F.isDeclaration())
40 RS.sample(&F, /*Weight=*/1);
41
42 while (RS.totalWeight() < IB.MinFunctionNum) {
43 Function *F = IB.createFunctionDefinition(M);
44 RS.sample(F, /*Weight=*/1);
45 }
46 mutate(*RS.getSelection(), IB);
47 }
48
mutate(Function & F,RandomIRBuilder & IB)49 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
50 auto Range = make_filter_range(make_pointer_range(F),
51 [](BasicBlock *BB) { return !BB->isEHPad(); });
52
53 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
54 }
55
mutate(BasicBlock & BB,RandomIRBuilder & IB)56 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
57 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
58 }
59
getModuleSize(const Module & M)60 size_t llvm::IRMutator::getModuleSize(const Module &M) {
61 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
62 }
63
mutateModule(Module & M,int Seed,size_t MaxSize)64 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
65 std::vector<Type *> Types;
66 for (const auto &Getter : AllowedTypes)
67 Types.push_back(Getter(M.getContext()));
68 RandomIRBuilder IB(Seed, Types);
69
70 size_t CurSize = IRMutator::getModuleSize(M);
71 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
72 for (const auto &Strategy : Strategies)
73 RS.sample(Strategy.get(),
74 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
75 if (RS.totalWeight() == 0)
76 return;
77 auto Strategy = RS.getSelection();
78
79 Strategy->mutate(M, IB);
80 }
81
eliminateDeadCode(Function & F)82 static void eliminateDeadCode(Function &F) {
83 FunctionPassManager FPM;
84 FPM.addPass(DCEPass());
85 FunctionAnalysisManager FAM;
86 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
87 FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
88 FPM.run(F, FAM);
89 }
90
mutate(Function & F,RandomIRBuilder & IB)91 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
92 IRMutationStrategy::mutate(F, IB);
93 eliminateDeadCode(F);
94 }
95
getDefaultOps()96 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
97 std::vector<fuzzerop::OpDescriptor> Ops;
98 describeFuzzerIntOps(Ops);
99 describeFuzzerFloatOps(Ops);
100 describeFuzzerControlFlowOps(Ops);
101 describeFuzzerPointerOps(Ops);
102 describeFuzzerAggregateOps(Ops);
103 describeFuzzerVectorOps(Ops);
104 return Ops;
105 }
106
107 std::optional<fuzzerop::OpDescriptor>
chooseOperation(Value * Src,RandomIRBuilder & IB)108 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
109 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
110 return Op.SourcePreds[0].matches({}, Src);
111 };
112 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
113 if (RS.isEmpty())
114 return std::nullopt;
115 return *RS;
116 }
117
118 static inline iterator_range<BasicBlock::iterator>
getInsertionRange(BasicBlock & BB)119 getInsertionRange(BasicBlock &BB) {
120 auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end();
121 return make_range(BB.getFirstInsertionPt(), End);
122 }
123
mutate(BasicBlock & BB,RandomIRBuilder & IB)124 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
125 SmallVector<Instruction *, 32> Insts;
126 for (Instruction &I : getInsertionRange(BB))
127 Insts.push_back(&I);
128 if (Insts.size() < 1)
129 return;
130
131 // Choose an insertion point for our new instruction.
132 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
133
134 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
135 auto InstsAfter = ArrayRef(Insts).slice(IP);
136
137 // Choose a source, which will be used to constrain the operation selection.
138 SmallVector<Value *, 2> Srcs;
139 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
140
141 // Choose an operation that's constrained to be valid for the type of the
142 // source, collect any other sources it needs, and then build it.
143 auto OpDesc = chooseOperation(Srcs[0], IB);
144 // Bail if no operation was found
145 if (!OpDesc)
146 return;
147
148 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
149 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
150
151 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
152 // Find a sink and wire up the results of the operation.
153 IB.connectToSink(BB, InstsAfter, Op);
154 }
155 }
156
getWeight(size_t CurrentSize,size_t MaxSize,uint64_t CurrentWeight)157 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
158 uint64_t CurrentWeight) {
159 // If we have less than 200 bytes, panic and try to always delete.
160 if (CurrentSize > MaxSize - 200)
161 return CurrentWeight ? CurrentWeight * 100 : 1;
162 // Draw a line starting from when we only have 1k left and increasing linearly
163 // to double the current weight.
164 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
165 (static_cast<int64_t>(MaxSize) -
166 static_cast<int64_t>(CurrentSize) - 1000) /
167 1000;
168 // Clamp negative weights to zero.
169 if (Line < 0)
170 return 0;
171 return Line;
172 }
173
mutate(Function & F,RandomIRBuilder & IB)174 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
175 auto RS = makeSampler<Instruction *>(IB.Rand);
176 for (Instruction &Inst : instructions(F)) {
177 // TODO: We can't handle these instructions.
178 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
179 isa<PHINode>(Inst))
180 continue;
181
182 RS.sample(&Inst, /*Weight=*/1);
183 }
184 if (RS.isEmpty())
185 return;
186
187 // Delete the instruction.
188 mutate(*RS.getSelection(), IB);
189 // Clean up any dead code that's left over after removing the instruction.
190 eliminateDeadCode(F);
191 }
192
mutate(Instruction & Inst,RandomIRBuilder & IB)193 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
194 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
195
196 if (Inst.getType()->isVoidTy()) {
197 // Instructions with void type (ie, store) have no uses to worry about. Just
198 // erase it and move on.
199 Inst.eraseFromParent();
200 return;
201 }
202
203 // Otherwise we need to find some other value with the right type to keep the
204 // users happy.
205 auto Pred = fuzzerop::onlyType(Inst.getType());
206 auto RS = makeSampler<Value *>(IB.Rand);
207 SmallVector<Instruction *, 32> InstsBefore;
208 BasicBlock *BB = Inst.getParent();
209 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
210 ++I) {
211 if (Pred.matches({}, &*I))
212 RS.sample(&*I, /*Weight=*/1);
213 InstsBefore.push_back(&*I);
214 }
215 if (!RS)
216 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
217
218 Inst.replaceAllUsesWith(RS.getSelection());
219 Inst.eraseFromParent();
220 }
221
mutate(Instruction & Inst,RandomIRBuilder & IB)222 void InstModificationIRStrategy::mutate(Instruction &Inst,
223 RandomIRBuilder &IB) {
224 SmallVector<std::function<void()>, 8> Modifications;
225 CmpInst *CI = nullptr;
226 GetElementPtrInst *GEP = nullptr;
227 switch (Inst.getOpcode()) {
228 default:
229 break;
230 // Add nsw, nuw flag
231 case Instruction::Add:
232 case Instruction::Mul:
233 case Instruction::Sub:
234 case Instruction::Shl:
235 Modifications.push_back(
236 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
237 Modifications.push_back(
238 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
239 break;
240 case Instruction::ICmp:
241 CI = cast<ICmpInst>(&Inst);
242 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
243 p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
244 Modifications.push_back(
245 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
246 }
247 break;
248 // Add inbound flag.
249 case Instruction::GetElementPtr:
250 GEP = cast<GetElementPtrInst>(&Inst);
251 Modifications.push_back(
252 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
253 break;
254 // Add exact flag.
255 case Instruction::UDiv:
256 case Instruction::SDiv:
257 case Instruction::LShr:
258 case Instruction::AShr:
259 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
260 break;
261
262 case Instruction::FCmp:
263 CI = cast<FCmpInst>(&Inst);
264 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
265 p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
266 Modifications.push_back(
267 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
268 }
269 break;
270 }
271
272 // Add fast math flag if possible.
273 if (isa<FPMathOperator>(&Inst)) {
274 // Try setting everything unless they are already on.
275 Modifications.push_back(
276 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
277 // Try unsetting everything unless they are already off.
278 Modifications.push_back(
279 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
280 // Individual setting by flipping the bit
281 Modifications.push_back(
282 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
283 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
284 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
285 Modifications.push_back(
286 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
287 Modifications.push_back(
288 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
289 Modifications.push_back(
290 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
291 Modifications.push_back(
292 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
293 }
294
295 // Randomly switch operands of instructions
296 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
297 switch (Inst.getOpcode()) {
298 case Instruction::SDiv:
299 case Instruction::UDiv:
300 case Instruction::SRem:
301 case Instruction::URem:
302 case Instruction::FDiv:
303 case Instruction::FRem: {
304 // Verify that the after shuffle the second operand is not
305 // constant 0.
306 Value *Operand = Inst.getOperand(0);
307 if (Constant *C = dyn_cast<Constant>(Operand)) {
308 if (!C->isZeroValue()) {
309 ShuffleItems = {0, 1};
310 }
311 }
312 break;
313 }
314 case Instruction::Select:
315 ShuffleItems = {1, 2};
316 break;
317 case Instruction::Add:
318 case Instruction::Sub:
319 case Instruction::Mul:
320 case Instruction::Shl:
321 case Instruction::LShr:
322 case Instruction::AShr:
323 case Instruction::And:
324 case Instruction::Or:
325 case Instruction::Xor:
326 case Instruction::FAdd:
327 case Instruction::FSub:
328 case Instruction::FMul:
329 case Instruction::ICmp:
330 case Instruction::FCmp:
331 case Instruction::ShuffleVector:
332 ShuffleItems = {0, 1};
333 break;
334 }
335 if (ShuffleItems != NoneItem) {
336 Modifications.push_back([&Inst, &ShuffleItems]() {
337 Value *Op0 = Inst.getOperand(ShuffleItems.first);
338 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
339 Inst.setOperand(ShuffleItems.second, Op0);
340 });
341 }
342
343 auto RS = makeSampler(IB.Rand, Modifications);
344 if (RS)
345 RS.getSelection()();
346 }
347
348 /// Return a case value that is not already taken to make sure we don't have two
349 /// cases with same value.
getUniqueCaseValue(SmallSet<uint64_t,4> & CasesTaken,uint64_t MaxValue,RandomIRBuilder & IB)350 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
351 uint64_t MaxValue, RandomIRBuilder &IB) {
352 uint64_t tmp;
353 do {
354 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
355 } while (CasesTaken.count(tmp) != 0);
356 CasesTaken.insert(tmp);
357 return tmp;
358 }
359
mutate(BasicBlock & BB,RandomIRBuilder & IB)360 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
361 Module *M = BB.getParent()->getParent();
362 // If nullptr is selected, we will create a new function declaration.
363 SmallVector<Function *, 32> Functions({nullptr});
364 for (Function &F : M->functions()) {
365 Functions.push_back(&F);
366 }
367
368 auto RS = makeSampler(IB.Rand, Functions);
369 Function *F = RS.getSelection();
370 // Some functions accept metadata type or token type as arguments.
371 // We don't call those functions for now.
372 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
373 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
374 auto IsUnsupportedTy = [](Type *T) {
375 return T->isMetadataTy() || T->isTokenTy();
376 };
377 if (!F || IsUnsupportedTy(F->getReturnType()) ||
378 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
379 F = IB.createFunctionDeclaration(*M);
380 }
381
382 FunctionType *FTy = F->getFunctionType();
383 SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
384 if (!F->arg_empty()) {
385 for (Type *ArgTy : FTy->params()) {
386 SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
387 }
388 }
389 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
390 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
391 Instruction *Inst) {
392 StringRef Name = isRetVoid ? nullptr : "C";
393 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst);
394 // Don't return this call inst if it return void as it can't be sinked.
395 return isRetVoid ? nullptr : Call;
396 };
397
398 SmallVector<Instruction *, 32> Insts;
399 for (Instruction &I : getInsertionRange(BB))
400 Insts.push_back(&I);
401 if (Insts.size() < 1)
402 return;
403
404 // Choose an insertion point for our new call instruction.
405 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
406
407 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
408 auto InstsAfter = ArrayRef(Insts).slice(IP);
409
410 // Choose a source, which will be used to constrain the operation selection.
411 SmallVector<Value *, 2> Srcs;
412
413 for (const auto &Pred : ArrayRef(SourcePreds)) {
414 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
415 }
416
417 if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
418 // Find a sink and wire up the results of the operation.
419 IB.connectToSink(BB, InstsAfter, Op);
420 }
421 }
422
mutate(BasicBlock & BB,RandomIRBuilder & IB)423 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
424 SmallVector<Instruction *, 32> Insts;
425 for (Instruction &I : getInsertionRange(BB))
426 Insts.push_back(&I);
427 if (Insts.size() < 1)
428 return;
429
430 // Choose a point where we split the block.
431 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
432 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
433
434 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
435 // directly jumps to `Sink`. Here, we have to create a new terminator for
436 // `Source`.
437 BasicBlock *Block = Insts[IP]->getParent();
438 BasicBlock *Source = Block;
439 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
440
441 Function *F = BB.getParent();
442 LLVMContext &C = F->getParent()->getContext();
443 // A coin decides if it is branch or switch
444 if (uniform<uint64_t>(IB.Rand, 0, 1)) {
445 // Branch
446 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
447 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
448 Value *Cond =
449 IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
450 fuzzerop::onlyType(Type::getInt1Ty(C)), false);
451 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
452 // Remove the old terminator.
453 ReplaceInstWithInst(Source->getTerminator(), Branch);
454 // Connect these blocks to `Sink`
455 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
456 } else {
457 // Switch
458 // Determine Integer type, it IS possible we use a boolean to switch.
459 auto RS =
460 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
461 return Ty->isIntegerTy();
462 }));
463 assert(RS && "There is no integer type in all allowed types, is the "
464 "setting correct?");
465 Type *Ty = RS.getSelection();
466 IntegerType *IntTy = cast<IntegerType>(Ty);
467
468 uint64_t BitSize = IntTy->getBitWidth();
469 uint64_t MaxCaseVal =
470 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
471 // Create Switch inst in Block
472 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
473 fuzzerop::onlyType(IntTy), false);
474 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
475 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
476 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
477 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
478 // Remove the old terminator.
479 ReplaceInstWithInst(Source->getTerminator(), Switch);
480
481 // Create blocks, for each block assign a case value.
482 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
483 SmallSet<uint64_t, 4> CasesTaken;
484 for (uint64_t i = 0; i < NumCases; i++) {
485 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
486 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
487 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
488 Switch->addCase(OnValue, CaseBlock);
489 Blocks.push_back(CaseBlock);
490 }
491
492 // Connect these blocks to `Sink`
493 connectBlocksToSink(Blocks, Sink, IB);
494 }
495 }
496
497 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
498 /// even have terminator.
connectBlocksToSink(ArrayRef<BasicBlock * > Blocks,BasicBlock * Sink,RandomIRBuilder & IB)499 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
500 BasicBlock *Sink,
501 RandomIRBuilder &IB) {
502 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
503 for (uint64_t i = 0; i < Blocks.size(); i++) {
504 // We have at least one block that directly goes to sink.
505 CFGToSink ToSink = (i == DirectSinkIdx)
506 ? CFGToSink::DirectSink
507 : static_cast<CFGToSink>(uniform<uint64_t>(
508 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
509 BasicBlock *BB = Blocks[i];
510 Function *F = BB->getParent();
511 LLVMContext &C = F->getParent()->getContext();
512 switch (ToSink) {
513 case CFGToSink::Return: {
514 Type *RetTy = F->getReturnType();
515 Value *RetValue = nullptr;
516 if (!RetTy->isVoidTy())
517 RetValue =
518 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
519 ReturnInst::Create(C, RetValue, BB);
520 break;
521 }
522 case CFGToSink::DirectSink: {
523 BranchInst::Create(Sink, BB);
524 break;
525 }
526 case CFGToSink::SinkOrSelfLoop: {
527 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
528 // A coin decides which block is true branch.
529 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
530 Value *Cond = IB.findOrCreateSource(
531 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
532 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
533 break;
534 }
535 case CFGToSink::EndOfCFGToLink:
536 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
537 }
538 }
539 }
540
mutate(BasicBlock & BB,RandomIRBuilder & IB)541 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
542 // Can't insert PHI node to entry node.
543 if (&BB == &BB.getParent()->getEntryBlock())
544 return;
545 Type *Ty = IB.randomType();
546 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front());
547
548 // Use a map to make sure the same incoming basic block has the same value.
549 DenseMap<BasicBlock *, Value *> IncomingValues;
550 for (BasicBlock *Pred : predecessors(&BB)) {
551 Value *Src = IncomingValues[Pred];
552 // If `Pred` is not in the map yet, we'll get a nullptr.
553 if (!Src) {
554 SmallVector<Instruction *, 32> Insts;
555 for (auto I = Pred->begin(); I != Pred->end(); ++I)
556 Insts.push_back(&*I);
557 // There is no need to inform IB what previously used values are if we are
558 // using `onlyType`
559 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
560 IncomingValues[Pred] = Src;
561 }
562 PHI->addIncoming(Src, Pred);
563 }
564 SmallVector<Instruction *, 32> InstsAfter;
565 for (Instruction &I : getInsertionRange(BB))
566 InstsAfter.push_back(&I);
567 IB.connectToSink(BB, InstsAfter, PHI);
568 }
569
mutate(Function & F,RandomIRBuilder & IB)570 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
571 for (BasicBlock &BB : F) {
572 this->mutate(BB, IB);
573 }
574 }
mutate(BasicBlock & BB,RandomIRBuilder & IB)575 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
576 SmallVector<Instruction *, 32> Insts;
577 for (Instruction &I : getInsertionRange(BB))
578 Insts.push_back(&I);
579 if (Insts.size() < 1)
580 return;
581 // Choose an Instruction to mutate.
582 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
583 Instruction *Inst = Insts[Idx];
584 // `Idx + 1` so we don't sink to ourselves.
585 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
586 Type *Ty = Inst->getType();
587 // Don't sink terminators, void function calls, token, etc.
588 if (!Ty->isVoidTy() && !Ty->isTokenTy())
589 // Find a new sink and wire up the results of the operation.
590 IB.connectToSink(BB, InstsAfter, Inst);
591 }
592
mutate(BasicBlock & BB,RandomIRBuilder & IB)593 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
594 // A deterministic alternative to SmallPtrSet with the same lookup
595 // performance.
596 std::map<size_t, Instruction *> AliveInsts;
597 std::map<Instruction *, size_t> AliveInstsLookup;
598 size_t InsertIdx = 0;
599 for (auto &I : make_early_inc_range(make_range(
600 BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) {
601 // First gather all instructions that can be shuffled. Don't take
602 // terminator.
603 AliveInsts.insert({InsertIdx, &I});
604 AliveInstsLookup.insert({&I, InsertIdx++});
605 // Then remove these instructions from the block
606 I.removeFromParent();
607 }
608
609 // Shuffle these instructions using topological sort.
610 // Returns false if all current instruction's dependencies in this block have
611 // been shuffled. If so, this instruction can be shuffled too.
612 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
613 for (Value *O : AliveInsts[Index]->operands()) {
614 Instruction *P = dyn_cast<Instruction>(O);
615 if (P && AliveInstsLookup.count(P))
616 return true;
617 }
618 return false;
619 };
620 // Get all alive instructions that depend on the current instruction.
621 // Takes Instruction* instead of index because the instruction is already
622 // shuffled.
623 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
624 SmallSetVector<size_t, 8> Children;
625 for (Value *U : I->users()) {
626 Instruction *P = dyn_cast<Instruction>(U);
627 if (P && AliveInstsLookup.count(P))
628 Children.insert(AliveInstsLookup[P]);
629 }
630 return Children;
631 };
632 SmallSet<size_t, 8> RootIndices;
633 SmallVector<Instruction *, 8> Insts;
634 for (const auto &[Index, Inst] : AliveInsts) {
635 if (!hasAliveParent(Index))
636 RootIndices.insert(Index);
637 }
638 // Topological sort by randomly selecting a node without a parent, or root.
639 while (!RootIndices.empty()) {
640 auto RS = makeSampler<size_t>(IB.Rand);
641 for (size_t RootIdx : RootIndices)
642 RS.sample(RootIdx, 1);
643 size_t RootIdx = RS.getSelection();
644
645 RootIndices.erase(RootIdx);
646 Instruction *Root = AliveInsts[RootIdx];
647 AliveInsts.erase(RootIdx);
648 AliveInstsLookup.erase(Root);
649 Insts.push_back(Root);
650
651 for (size_t Child : getAliveChildren(Root)) {
652 if (!hasAliveParent(Child)) {
653 RootIndices.insert(Child);
654 }
655 }
656 }
657
658 Instruction *Terminator = BB.getTerminator();
659 // Then put instructions back.
660 for (Instruction *I : Insts) {
661 I->insertBefore(Terminator);
662 }
663 }
664
parseModule(const uint8_t * Data,size_t Size,LLVMContext & Context)665 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
666 LLVMContext &Context) {
667
668 if (Size <= 1)
669 // We get bogus data given an empty corpus - just create a new module.
670 return std::make_unique<Module>("M", Context);
671
672 auto Buffer = MemoryBuffer::getMemBuffer(
673 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
674 /*RequiresNullTerminator=*/false);
675
676 SMDiagnostic Err;
677 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
678 if (Error E = M.takeError()) {
679 errs() << toString(std::move(E)) << "\n";
680 return nullptr;
681 }
682 return std::move(M.get());
683 }
684
writeModule(const Module & M,uint8_t * Dest,size_t MaxSize)685 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
686 std::string Buf;
687 {
688 raw_string_ostream OS(Buf);
689 WriteBitcodeToFile(M, OS);
690 }
691 if (Buf.size() > MaxSize)
692 return 0;
693 memcpy(Dest, Buf.data(), Buf.size());
694 return Buf.size();
695 }
696
parseAndVerify(const uint8_t * Data,size_t Size,LLVMContext & Context)697 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
698 LLVMContext &Context) {
699 auto M = parseModule(Data, Size, Context);
700 if (!M || verifyModule(*M, &errs()))
701 return nullptr;
702
703 return M;
704 }
705