1 //===-- IRMutator.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/FuzzMutate/IRMutator.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/Analysis/TargetLibraryInfo.h"
13 #include "llvm/Bitcode/BitcodeReader.h"
14 #include "llvm/Bitcode/BitcodeWriter.h"
15 #include "llvm/FuzzMutate/Operations.h"
16 #include "llvm/FuzzMutate/Random.h"
17 #include "llvm/FuzzMutate/RandomIRBuilder.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/FMF.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/IntrinsicsAMDGPU.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/IR/Operator.h"
27 #include "llvm/IR/PassInstrumentation.h"
28 #include "llvm/IR/Verifier.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/SourceMgr.h"
31 #include "llvm/Transforms/Scalar/DCE.h"
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
33 #include <map>
34 #include <optional>
35
36 using namespace llvm;
37
mutate(Module & M,RandomIRBuilder & IB)38 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
39 auto RS = makeSampler<Function *>(IB.Rand);
40 for (Function &F : M)
41 if (!F.isDeclaration())
42 RS.sample(&F, /*Weight=*/1);
43
44 while (RS.totalWeight() < IB.MinFunctionNum) {
45 Function *F = IB.createFunctionDefinition(M);
46 RS.sample(F, /*Weight=*/1);
47 }
48 mutate(*RS.getSelection(), IB);
49 }
50
mutate(Function & F,RandomIRBuilder & IB)51 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
52 auto Range = make_filter_range(make_pointer_range(F),
53 [](BasicBlock *BB) { return !BB->isEHPad(); });
54
55 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
56 }
57
mutate(BasicBlock & BB,RandomIRBuilder & IB)58 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
59 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
60 }
61
getModuleSize(const Module & M)62 size_t llvm::IRMutator::getModuleSize(const Module &M) {
63 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
64 }
65
mutateModule(Module & M,int Seed,size_t MaxSize)66 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
67 std::vector<Type *> Types;
68 for (const auto &Getter : AllowedTypes)
69 Types.push_back(Getter(M.getContext()));
70 RandomIRBuilder IB(Seed, Types);
71
72 size_t CurSize = IRMutator::getModuleSize(M);
73 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
74 for (const auto &Strategy : Strategies)
75 RS.sample(Strategy.get(),
76 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
77 if (RS.totalWeight() == 0)
78 return;
79 auto Strategy = RS.getSelection();
80
81 Strategy->mutate(M, IB);
82 }
83
eliminateDeadCode(Function & F)84 static void eliminateDeadCode(Function &F) {
85 FunctionPassManager FPM;
86 FPM.addPass(DCEPass());
87 FunctionAnalysisManager FAM;
88 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
89 FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
90 FPM.run(F, FAM);
91 }
92
mutate(Function & F,RandomIRBuilder & IB)93 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
94 IRMutationStrategy::mutate(F, IB);
95 eliminateDeadCode(F);
96 }
97
getDefaultOps()98 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
99 std::vector<fuzzerop::OpDescriptor> Ops;
100 describeFuzzerIntOps(Ops);
101 describeFuzzerFloatOps(Ops);
102 describeFuzzerControlFlowOps(Ops);
103 describeFuzzerPointerOps(Ops);
104 describeFuzzerAggregateOps(Ops);
105 describeFuzzerVectorOps(Ops);
106 return Ops;
107 }
108
109 std::optional<fuzzerop::OpDescriptor>
chooseOperation(Value * Src,RandomIRBuilder & IB)110 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
111 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
112 return Op.SourcePreds[0].matches({}, Src);
113 };
114 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
115 if (RS.isEmpty())
116 return std::nullopt;
117 return *RS;
118 }
119
getEffectiveTerminator(BasicBlock & BB)120 static inline Instruction *getEffectiveTerminator(BasicBlock &BB) {
121 if (Instruction *I = BB.getTerminatingMustTailCall()) {
122 return I;
123 } else {
124 // Certain intrinsics, such as @llvm.amdgcn.cs.chain, must be immediately
125 // followed by an unreachable instruction..
126 if (UnreachableInst *UI = dyn_cast<UnreachableInst>(BB.getTerminator())) {
127 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI->getPrevNode())) {
128 return II;
129 }
130 }
131 }
132
133 return BB.getTerminator();
134 }
135
getEndIterator(BasicBlock & BB)136 static inline BasicBlock::iterator getEndIterator(BasicBlock &BB) {
137 auto End = BB.end();
138
139 if (BB.empty()) {
140 return End;
141 }
142
143 Instruction *EffectiveTerminator = getEffectiveTerminator(BB);
144 if (EffectiveTerminator != BB.getTerminator()) {
145 // Adjust range for special cases such as tail call.
146 End = std::prev(BB.end());
147 }
148
149 return End;
150 }
151
152 static inline iterator_range<BasicBlock::iterator>
getInsertionRange(BasicBlock & BB)153 getInsertionRange(BasicBlock &BB) {
154 auto End = getEndIterator(BB);
155 return make_range(BB.getFirstInsertionPt(), End);
156 }
157
mutate(BasicBlock & BB,RandomIRBuilder & IB)158 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
159 SmallVector<Instruction *, 32> Insts(
160 llvm::make_pointer_range(getInsertionRange(BB)));
161 if (Insts.size() < 1)
162 return;
163
164 // Choose an insertion point for our new instruction.
165 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
166
167 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
168 auto InstsAfter = ArrayRef(Insts).slice(IP);
169
170 // Choose a source, which will be used to constrain the operation selection.
171 SmallVector<Value *, 2> Srcs;
172 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
173
174 // Choose an operation that's constrained to be valid for the type of the
175 // source, collect any other sources it needs, and then build it.
176 auto OpDesc = chooseOperation(Srcs[0], IB);
177 // Bail if no operation was found
178 if (!OpDesc)
179 return;
180
181 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
182 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
183
184 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP]->getIterator())) {
185 // Find a sink and wire up the results of the operation.
186 IB.connectToSink(BB, InstsAfter, Op);
187 }
188 }
189
getWeight(size_t CurrentSize,size_t MaxSize,uint64_t CurrentWeight)190 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
191 uint64_t CurrentWeight) {
192 // If we have less than 200 bytes, panic and try to always delete.
193 if (CurrentSize > MaxSize - 200)
194 return CurrentWeight ? CurrentWeight * 100 : 1;
195 // Draw a line starting from when we only have 1k left and increasing linearly
196 // to double the current weight.
197 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
198 (static_cast<int64_t>(MaxSize) -
199 static_cast<int64_t>(CurrentSize) - 1000) /
200 1000;
201 // Clamp negative weights to zero.
202 if (Line < 0)
203 return 0;
204 return Line;
205 }
206
mutate(Function & F,RandomIRBuilder & IB)207 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
208 auto RS = makeSampler<Instruction *>(IB.Rand);
209 for (Instruction &Inst : instructions(F)) {
210 // TODO: We can't handle these instructions.
211 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
212 isa<PHINode>(Inst))
213 continue;
214
215 RS.sample(&Inst, /*Weight=*/1);
216 }
217 if (RS.isEmpty())
218 return;
219
220 // Delete the instruction.
221 mutate(*RS.getSelection(), IB);
222 // Clean up any dead code that's left over after removing the instruction.
223 eliminateDeadCode(F);
224 }
225
mutate(Instruction & Inst,RandomIRBuilder & IB)226 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
227 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
228
229 if (Inst.getType()->isVoidTy()) {
230 // Instructions with void type (ie, store) have no uses to worry about. Just
231 // erase it and move on.
232 Inst.eraseFromParent();
233 return;
234 }
235
236 // Otherwise we need to find some other value with the right type to keep the
237 // users happy.
238 auto Pred = fuzzerop::onlyType(Inst.getType());
239 auto RS = makeSampler<Value *>(IB.Rand);
240 SmallVector<Instruction *, 32> InstsBefore;
241 BasicBlock *BB = Inst.getParent();
242 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
243 ++I) {
244 if (Pred.matches({}, &*I))
245 RS.sample(&*I, /*Weight=*/1);
246 InstsBefore.push_back(&*I);
247 }
248 if (!RS)
249 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
250
251 Inst.replaceAllUsesWith(RS.getSelection());
252 Inst.eraseFromParent();
253 }
254
mutate(Instruction & Inst,RandomIRBuilder & IB)255 void InstModificationIRStrategy::mutate(Instruction &Inst,
256 RandomIRBuilder &IB) {
257 SmallVector<std::function<void()>, 8> Modifications;
258 CmpInst *CI = nullptr;
259 GetElementPtrInst *GEP = nullptr;
260 switch (Inst.getOpcode()) {
261 default:
262 break;
263 // Add nsw, nuw flag
264 case Instruction::Add:
265 case Instruction::Mul:
266 case Instruction::Sub:
267 case Instruction::Shl:
268 Modifications.push_back(
269 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
270 Modifications.push_back(
271 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
272 break;
273 case Instruction::ICmp:
274 CI = cast<ICmpInst>(&Inst);
275 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
276 p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
277 Modifications.push_back(
278 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
279 }
280 break;
281 // Add inbound flag.
282 case Instruction::GetElementPtr:
283 GEP = cast<GetElementPtrInst>(&Inst);
284 Modifications.push_back(
285 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
286 break;
287 // Add exact flag.
288 case Instruction::UDiv:
289 case Instruction::SDiv:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
293 break;
294
295 case Instruction::FCmp:
296 CI = cast<FCmpInst>(&Inst);
297 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
298 p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
299 Modifications.push_back(
300 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
301 }
302 break;
303 }
304
305 // Add fast math flag if possible.
306 if (isa<FPMathOperator>(&Inst)) {
307 // Try setting everything unless they are already on.
308 Modifications.push_back(
309 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
310 // Try unsetting everything unless they are already off.
311 Modifications.push_back(
312 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
313 // Individual setting by flipping the bit
314 Modifications.push_back(
315 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
316 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
317 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
318 Modifications.push_back(
319 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
320 Modifications.push_back(
321 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
322 Modifications.push_back(
323 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
324 Modifications.push_back(
325 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
326 }
327
328 // Randomly switch operands of instructions
329 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
330 switch (Inst.getOpcode()) {
331 case Instruction::SDiv:
332 case Instruction::UDiv:
333 case Instruction::SRem:
334 case Instruction::URem:
335 case Instruction::FDiv:
336 case Instruction::FRem: {
337 // Verify that the after shuffle the second operand is not
338 // constant 0.
339 Value *Operand = Inst.getOperand(0);
340 if (Constant *C = dyn_cast<Constant>(Operand)) {
341 if (!C->isZeroValue()) {
342 ShuffleItems = {0, 1};
343 }
344 }
345 break;
346 }
347 case Instruction::Select:
348 ShuffleItems = {1, 2};
349 break;
350 case Instruction::Add:
351 case Instruction::Sub:
352 case Instruction::Mul:
353 case Instruction::Shl:
354 case Instruction::LShr:
355 case Instruction::AShr:
356 case Instruction::And:
357 case Instruction::Or:
358 case Instruction::Xor:
359 case Instruction::FAdd:
360 case Instruction::FSub:
361 case Instruction::FMul:
362 case Instruction::ICmp:
363 case Instruction::FCmp:
364 case Instruction::ShuffleVector:
365 ShuffleItems = {0, 1};
366 break;
367 }
368 if (ShuffleItems != NoneItem) {
369 Modifications.push_back([&Inst, &ShuffleItems]() {
370 Value *Op0 = Inst.getOperand(ShuffleItems.first);
371 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
372 Inst.setOperand(ShuffleItems.second, Op0);
373 });
374 }
375
376 auto RS = makeSampler(IB.Rand, Modifications);
377 if (RS)
378 RS.getSelection()();
379 }
380
381 /// Return a case value that is not already taken to make sure we don't have two
382 /// cases with same value.
getUniqueCaseValue(SmallSet<uint64_t,4> & CasesTaken,uint64_t MaxValue,RandomIRBuilder & IB)383 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
384 uint64_t MaxValue, RandomIRBuilder &IB) {
385 uint64_t tmp;
386 do {
387 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
388 } while (CasesTaken.count(tmp) != 0);
389 CasesTaken.insert(tmp);
390 return tmp;
391 }
392
393 /// Determines whether a function is unsupported by the current mutator's
394 /// implementation. The function returns true if any of the following criteria
395 /// are met:
396 /// * The function accepts metadata or token types as arguments.
397 /// * The function has ABI attributes that could cause UB.
398 /// * The function uses a non-callable CC that may result in UB.
isUnsupportedFunction(Function * F)399 static bool isUnsupportedFunction(Function *F) {
400 // Some functions accept metadata type or token type as arguments.
401 // We don't call those functions for now.
402 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
403 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
404 auto IsUnsupportedTy = [](Type *T) {
405 return T->isMetadataTy() || T->isTokenTy();
406 };
407
408 if (IsUnsupportedTy(F->getReturnType()) ||
409 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
410 return true;
411 }
412
413 // ABI attributes must be specified both at the function
414 // declaration/definition and call-site, otherwise the
415 // behavior may be undefined.
416 // We don't call those functions for now to prevent UB from happening.
417 auto IsABIAttribute = [](AttributeSet A) {
418 static const Attribute::AttrKind ABIAttrs[] = {
419 Attribute::StructRet, Attribute::ByVal,
420 Attribute::InAlloca, Attribute::InReg,
421 Attribute::StackAlignment, Attribute::SwiftSelf,
422 Attribute::SwiftAsync, Attribute::SwiftError,
423 Attribute::Preallocated, Attribute::ByRef,
424 Attribute::ZExt, Attribute::SExt};
425
426 return llvm::any_of(ABIAttrs, [&](Attribute::AttrKind kind) {
427 return A.hasAttribute(kind);
428 });
429 };
430
431 auto FuncAttrs = F->getAttributes();
432 if (IsABIAttribute(FuncAttrs.getRetAttrs())) {
433 return true;
434 }
435 for (size_t i = 0; i < F->arg_size(); i++) {
436 if (IsABIAttribute(FuncAttrs.getParamAttrs(i))) {
437 return true;
438 }
439 }
440
441 // If it is not satisfied, the IR will be invalid.
442 if (!isCallableCC(F->getCallingConv())) {
443 return true;
444 }
445
446 // This intrinsic has specific requirements for its parameters and the caller
447 // must adhere to certain calling conventions.
448 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
449 return true;
450 }
451
452 return false;
453 }
454
mutate(BasicBlock & BB,RandomIRBuilder & IB)455 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
456 Module *M = BB.getParent()->getParent();
457 // If nullptr is selected, we will create a new function declaration.
458 SmallVector<Function *, 32> Functions({nullptr});
459 for (Function &F : M->functions()) {
460 Functions.push_back(&F);
461 }
462
463 auto RS = makeSampler(IB.Rand, Functions);
464 Function *F = RS.getSelection();
465
466 if (!F || isUnsupportedFunction(F)) {
467 F = IB.createFunctionDeclaration(*M);
468 }
469
470 FunctionType *FTy = F->getFunctionType();
471 SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
472 if (!F->arg_empty()) {
473 for (Type *ArgTy : FTy->params()) {
474 SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
475 }
476 }
477 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
478 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
479 BasicBlock::iterator InsertPt) {
480 StringRef Name = isRetVoid ? nullptr : "C";
481 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, InsertPt);
482 Call->setCallingConv(F->getCallingConv());
483 // Don't return this call inst if it return void as it can't be sinked.
484 return isRetVoid ? nullptr : Call;
485 };
486
487 SmallVector<Instruction *, 32> Insts(
488 llvm::make_pointer_range(getInsertionRange(BB)));
489 if (Insts.size() < 1)
490 return;
491
492 // Choose an insertion point for our new call instruction.
493 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
494
495 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
496 auto InstsAfter = ArrayRef(Insts).slice(IP);
497
498 // Choose a source, which will be used to constrain the operation selection.
499 SmallVector<Value *, 2> Srcs;
500
501 for (const auto &Pred : ArrayRef(SourcePreds)) {
502 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
503 }
504
505 if (Value *Op = BuilderFunc(Srcs, Insts[IP]->getIterator())) {
506 // Find a sink and wire up the results of the operation.
507 IB.connectToSink(BB, InstsAfter, Op);
508 }
509 }
510
mutate(BasicBlock & BB,RandomIRBuilder & IB)511 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
512 SmallVector<Instruction *, 32> Insts(
513 llvm::make_pointer_range(getInsertionRange(BB)));
514 if (Insts.size() < 1)
515 return;
516
517 // Choose a point where we split the block.
518 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
519 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
520
521 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
522 // directly jumps to `Sink`. Here, we have to create a new terminator for
523 // `Source`.
524 BasicBlock *Block = Insts[IP]->getParent();
525 BasicBlock *Source = Block;
526 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
527
528 Function *F = BB.getParent();
529 LLVMContext &C = F->getParent()->getContext();
530 // A coin decides if it is branch or switch
531 if (uniform<uint64_t>(IB.Rand, 0, 1)) {
532 // Branch
533 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
534 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
535 Value *Cond =
536 IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
537 fuzzerop::onlyType(Type::getInt1Ty(C)), false);
538 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
539 // Remove the old terminator.
540 ReplaceInstWithInst(Source->getTerminator(), Branch);
541 // Connect these blocks to `Sink`
542 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
543 } else {
544 // Switch
545 // Determine Integer type, it IS possible we use a boolean to switch.
546 auto RS =
547 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
548 return Ty->isIntegerTy();
549 }));
550 assert(RS && "There is no integer type in all allowed types, is the "
551 "setting correct?");
552 Type *Ty = RS.getSelection();
553 IntegerType *IntTy = cast<IntegerType>(Ty);
554
555 uint64_t BitSize = IntTy->getBitWidth();
556 uint64_t MaxCaseVal =
557 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
558 // Create Switch inst in Block
559 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
560 fuzzerop::onlyType(IntTy), false);
561 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
562 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
563 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
564 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
565 // Remove the old terminator.
566 ReplaceInstWithInst(Source->getTerminator(), Switch);
567
568 // Create blocks, for each block assign a case value.
569 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
570 SmallSet<uint64_t, 4> CasesTaken;
571 for (uint64_t i = 0; i < NumCases; i++) {
572 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
573 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
574 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
575 Switch->addCase(OnValue, CaseBlock);
576 Blocks.push_back(CaseBlock);
577 }
578
579 // Connect these blocks to `Sink`
580 connectBlocksToSink(Blocks, Sink, IB);
581 }
582 }
583
584 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
585 /// even have terminator.
connectBlocksToSink(ArrayRef<BasicBlock * > Blocks,BasicBlock * Sink,RandomIRBuilder & IB)586 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
587 BasicBlock *Sink,
588 RandomIRBuilder &IB) {
589 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
590 for (uint64_t i = 0; i < Blocks.size(); i++) {
591 // We have at least one block that directly goes to sink.
592 CFGToSink ToSink = (i == DirectSinkIdx)
593 ? CFGToSink::DirectSink
594 : static_cast<CFGToSink>(uniform<uint64_t>(
595 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
596 BasicBlock *BB = Blocks[i];
597 Function *F = BB->getParent();
598 LLVMContext &C = F->getParent()->getContext();
599 switch (ToSink) {
600 case CFGToSink::Return: {
601 Type *RetTy = F->getReturnType();
602 Value *RetValue = nullptr;
603 if (!RetTy->isVoidTy())
604 RetValue =
605 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
606 ReturnInst::Create(C, RetValue, BB);
607 break;
608 }
609 case CFGToSink::DirectSink: {
610 BranchInst::Create(Sink, BB);
611 break;
612 }
613 case CFGToSink::SinkOrSelfLoop: {
614 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
615 // A coin decides which block is true branch.
616 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
617 Value *Cond = IB.findOrCreateSource(
618 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
619 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
620 break;
621 }
622 case CFGToSink::EndOfCFGToLink:
623 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
624 }
625 }
626 }
627
mutate(BasicBlock & BB,RandomIRBuilder & IB)628 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
629 // Can't insert PHI node to entry node.
630 if (&BB == &BB.getParent()->getEntryBlock())
631 return;
632 Type *Ty = IB.randomType();
633 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", BB.begin());
634
635 // Use a map to make sure the same incoming basic block has the same value.
636 DenseMap<BasicBlock *, Value *> IncomingValues;
637 for (BasicBlock *Pred : predecessors(&BB)) {
638 Value *Src = IncomingValues[Pred];
639 // If `Pred` is not in the map yet, we'll get a nullptr.
640 if (!Src) {
641 SmallVector<Instruction *, 32> Insts;
642 for (auto I = Pred->begin(); I != Pred->end(); ++I)
643 Insts.push_back(&*I);
644 // There is no need to inform IB what previously used values are if we are
645 // using `onlyType`
646 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
647 IncomingValues[Pred] = Src;
648 }
649 PHI->addIncoming(Src, Pred);
650 }
651 SmallVector<Instruction *, 32> InstsAfter(
652 llvm::make_pointer_range(getInsertionRange(BB)));
653 IB.connectToSink(BB, InstsAfter, PHI);
654 }
655
mutate(Function & F,RandomIRBuilder & IB)656 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
657 for (BasicBlock &BB : F) {
658 this->mutate(BB, IB);
659 }
660 }
mutate(BasicBlock & BB,RandomIRBuilder & IB)661 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
662 SmallVector<Instruction *, 32> Insts(
663 llvm::make_pointer_range(getInsertionRange(BB)));
664 if (Insts.size() < 1)
665 return;
666 // Choose an Instruction to mutate.
667 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
668 Instruction *Inst = Insts[Idx];
669 // `Idx + 1` so we don't sink to ourselves.
670 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
671 Type *Ty = Inst->getType();
672 // Don't sink terminators, void function calls, token, etc.
673 if (!Ty->isVoidTy() && !Ty->isTokenTy())
674 // Find a new sink and wire up the results of the operation.
675 IB.connectToSink(BB, InstsAfter, Inst);
676 }
677
mutate(BasicBlock & BB,RandomIRBuilder & IB)678 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
679 // A deterministic alternative to SmallPtrSet with the same lookup
680 // performance.
681 std::map<size_t, Instruction *> AliveInsts;
682 std::map<Instruction *, size_t> AliveInstsLookup;
683 size_t InsertIdx = 0;
684 for (auto &I : make_early_inc_range(
685 make_range(BB.getFirstInsertionPt(),
686 getEffectiveTerminator(BB)->getIterator()))) {
687 // First gather all instructions that can be shuffled. Don't take
688 // terminator.
689 AliveInsts.insert({InsertIdx, &I});
690 AliveInstsLookup.insert({&I, InsertIdx++});
691 // Then remove these instructions from the block
692 I.removeFromParent();
693 }
694
695 // Shuffle these instructions using topological sort.
696 // Returns false if all current instruction's dependencies in this block have
697 // been shuffled. If so, this instruction can be shuffled too.
698 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
699 for (Value *O : AliveInsts[Index]->operands()) {
700 Instruction *P = dyn_cast<Instruction>(O);
701 if (P && AliveInstsLookup.count(P))
702 return true;
703 }
704 return false;
705 };
706 // Get all alive instructions that depend on the current instruction.
707 // Takes Instruction* instead of index because the instruction is already
708 // shuffled.
709 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
710 SmallSetVector<size_t, 8> Children;
711 for (Value *U : I->users()) {
712 if (Instruction *P = dyn_cast<Instruction>(U)) {
713 auto It = AliveInstsLookup.find(P);
714 if (It != AliveInstsLookup.end())
715 Children.insert(It->second);
716 }
717 }
718 return Children;
719 };
720 SmallSet<size_t, 8> RootIndices;
721 SmallVector<Instruction *, 8> Insts;
722 for (const auto &[Index, Inst] : AliveInsts) {
723 if (!hasAliveParent(Index))
724 RootIndices.insert(Index);
725 }
726 // Topological sort by randomly selecting a node without a parent, or root.
727 while (!RootIndices.empty()) {
728 auto RS = makeSampler<size_t>(IB.Rand);
729 for (size_t RootIdx : RootIndices)
730 RS.sample(RootIdx, 1);
731 size_t RootIdx = RS.getSelection();
732
733 RootIndices.erase(RootIdx);
734 Instruction *Root = AliveInsts[RootIdx];
735 AliveInsts.erase(RootIdx);
736 AliveInstsLookup.erase(Root);
737 Insts.push_back(Root);
738
739 for (size_t Child : getAliveChildren(Root)) {
740 if (!hasAliveParent(Child)) {
741 RootIndices.insert(Child);
742 }
743 }
744 }
745
746 Instruction *Terminator = getEffectiveTerminator(BB);
747 // Then put instructions back.
748 for (Instruction *I : Insts) {
749 I->insertBefore(Terminator->getIterator());
750 }
751 }
752
parseModule(const uint8_t * Data,size_t Size,LLVMContext & Context)753 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
754 LLVMContext &Context) {
755
756 if (Size <= 1)
757 // We get bogus data given an empty corpus - just create a new module.
758 return std::make_unique<Module>("M", Context);
759
760 auto Buffer = MemoryBuffer::getMemBuffer(
761 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
762 /*RequiresNullTerminator=*/false);
763
764 SMDiagnostic Err;
765 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
766 if (Error E = M.takeError()) {
767 errs() << toString(std::move(E)) << "\n";
768 return nullptr;
769 }
770 return std::move(M.get());
771 }
772
writeModule(const Module & M,uint8_t * Dest,size_t MaxSize)773 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
774 std::string Buf;
775 {
776 raw_string_ostream OS(Buf);
777 WriteBitcodeToFile(M, OS);
778 }
779 if (Buf.size() > MaxSize)
780 return 0;
781 memcpy(Dest, Buf.data(), Buf.size());
782 return Buf.size();
783 }
784
parseAndVerify(const uint8_t * Data,size_t Size,LLVMContext & Context)785 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
786 LLVMContext &Context) {
787 auto M = parseModule(Data, Size, Context);
788 if (!M || verifyModule(*M, &errs()))
789 return nullptr;
790
791 return M;
792 }
793