1 //===-- IRMutator.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/FuzzMutate/IRMutator.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/ADT/SmallSet.h" 12 #include "llvm/Analysis/TargetLibraryInfo.h" 13 #include "llvm/Bitcode/BitcodeReader.h" 14 #include "llvm/Bitcode/BitcodeWriter.h" 15 #include "llvm/FuzzMutate/Operations.h" 16 #include "llvm/FuzzMutate/Random.h" 17 #include "llvm/FuzzMutate/RandomIRBuilder.h" 18 #include "llvm/IR/BasicBlock.h" 19 #include "llvm/IR/FMF.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/InstIterator.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IR/Operator.h" 25 #include "llvm/IR/PassInstrumentation.h" 26 #include "llvm/IR/Verifier.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Transforms/Scalar/DCE.h" 30 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 31 #include <map> 32 #include <optional> 33 34 using namespace llvm; 35 36 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) { 37 auto RS = makeSampler<Function *>(IB.Rand); 38 for (Function &F : M) 39 if (!F.isDeclaration()) 40 RS.sample(&F, /*Weight=*/1); 41 42 while (RS.totalWeight() < IB.MinFunctionNum) { 43 Function *F = IB.createFunctionDefinition(M); 44 RS.sample(F, /*Weight=*/1); 45 } 46 mutate(*RS.getSelection(), IB); 47 } 48 49 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) { 50 auto Range = make_filter_range(make_pointer_range(F), 51 [](BasicBlock *BB) { return !BB->isEHPad(); }); 52 53 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB); 54 } 55 56 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 57 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB); 58 } 59 60 size_t llvm::IRMutator::getModuleSize(const Module &M) { 61 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size(); 62 } 63 64 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) { 65 std::vector<Type *> Types; 66 for (const auto &Getter : AllowedTypes) 67 Types.push_back(Getter(M.getContext())); 68 RandomIRBuilder IB(Seed, Types); 69 70 size_t CurSize = IRMutator::getModuleSize(M); 71 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand); 72 for (const auto &Strategy : Strategies) 73 RS.sample(Strategy.get(), 74 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight())); 75 if (RS.totalWeight() == 0) 76 return; 77 auto Strategy = RS.getSelection(); 78 79 Strategy->mutate(M, IB); 80 } 81 82 static void eliminateDeadCode(Function &F) { 83 FunctionPassManager FPM; 84 FPM.addPass(DCEPass()); 85 FunctionAnalysisManager FAM; 86 FAM.registerPass([&] { return TargetLibraryAnalysis(); }); 87 FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); 88 FPM.run(F, FAM); 89 } 90 91 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { 92 IRMutationStrategy::mutate(F, IB); 93 eliminateDeadCode(F); 94 } 95 96 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() { 97 std::vector<fuzzerop::OpDescriptor> Ops; 98 describeFuzzerIntOps(Ops); 99 describeFuzzerFloatOps(Ops); 100 describeFuzzerControlFlowOps(Ops); 101 describeFuzzerPointerOps(Ops); 102 describeFuzzerAggregateOps(Ops); 103 describeFuzzerVectorOps(Ops); 104 return Ops; 105 } 106 107 std::optional<fuzzerop::OpDescriptor> 108 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) { 109 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) { 110 return Op.SourcePreds[0].matches({}, Src); 111 }; 112 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); 113 if (RS.isEmpty()) 114 return std::nullopt; 115 return *RS; 116 } 117 118 static inline iterator_range<BasicBlock::iterator> 119 getInsertionRange(BasicBlock &BB) { 120 auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end(); 121 return make_range(BB.getFirstInsertionPt(), End); 122 } 123 124 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 125 SmallVector<Instruction *, 32> Insts; 126 for (Instruction &I : getInsertionRange(BB)) 127 Insts.push_back(&I); 128 if (Insts.size() < 1) 129 return; 130 131 // Choose an insertion point for our new instruction. 132 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1); 133 134 auto InstsBefore = ArrayRef(Insts).slice(0, IP); 135 auto InstsAfter = ArrayRef(Insts).slice(IP); 136 137 // Choose a source, which will be used to constrain the operation selection. 138 SmallVector<Value *, 2> Srcs; 139 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore)); 140 141 // Choose an operation that's constrained to be valid for the type of the 142 // source, collect any other sources it needs, and then build it. 143 auto OpDesc = chooseOperation(Srcs[0], IB); 144 // Bail if no operation was found 145 if (!OpDesc) 146 return; 147 148 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1)) 149 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); 150 151 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) { 152 // Find a sink and wire up the results of the operation. 153 IB.connectToSink(BB, InstsAfter, Op); 154 } 155 } 156 157 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize, 158 uint64_t CurrentWeight) { 159 // If we have less than 200 bytes, panic and try to always delete. 160 if (CurrentSize > MaxSize - 200) 161 return CurrentWeight ? CurrentWeight * 100 : 1; 162 // Draw a line starting from when we only have 1k left and increasing linearly 163 // to double the current weight. 164 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) * 165 (static_cast<int64_t>(MaxSize) - 166 static_cast<int64_t>(CurrentSize) - 1000) / 167 1000; 168 // Clamp negative weights to zero. 169 if (Line < 0) 170 return 0; 171 return Line; 172 } 173 174 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { 175 auto RS = makeSampler<Instruction *>(IB.Rand); 176 for (Instruction &Inst : instructions(F)) { 177 // TODO: We can't handle these instructions. 178 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() || 179 isa<PHINode>(Inst)) 180 continue; 181 182 RS.sample(&Inst, /*Weight=*/1); 183 } 184 if (RS.isEmpty()) 185 return; 186 187 // Delete the instruction. 188 mutate(*RS.getSelection(), IB); 189 // Clean up any dead code that's left over after removing the instruction. 190 eliminateDeadCode(F); 191 } 192 193 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) { 194 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG"); 195 196 if (Inst.getType()->isVoidTy()) { 197 // Instructions with void type (ie, store) have no uses to worry about. Just 198 // erase it and move on. 199 Inst.eraseFromParent(); 200 return; 201 } 202 203 // Otherwise we need to find some other value with the right type to keep the 204 // users happy. 205 auto Pred = fuzzerop::onlyType(Inst.getType()); 206 auto RS = makeSampler<Value *>(IB.Rand); 207 SmallVector<Instruction *, 32> InstsBefore; 208 BasicBlock *BB = Inst.getParent(); 209 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E; 210 ++I) { 211 if (Pred.matches({}, &*I)) 212 RS.sample(&*I, /*Weight=*/1); 213 InstsBefore.push_back(&*I); 214 } 215 if (!RS) 216 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1); 217 218 Inst.replaceAllUsesWith(RS.getSelection()); 219 Inst.eraseFromParent(); 220 } 221 222 void InstModificationIRStrategy::mutate(Instruction &Inst, 223 RandomIRBuilder &IB) { 224 SmallVector<std::function<void()>, 8> Modifications; 225 CmpInst *CI = nullptr; 226 GetElementPtrInst *GEP = nullptr; 227 switch (Inst.getOpcode()) { 228 default: 229 break; 230 // Add nsw, nuw flag 231 case Instruction::Add: 232 case Instruction::Mul: 233 case Instruction::Sub: 234 case Instruction::Shl: 235 Modifications.push_back( 236 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); }); 237 Modifications.push_back( 238 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); }); 239 break; 240 case Instruction::ICmp: 241 CI = cast<ICmpInst>(&Inst); 242 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE; 243 p <= CmpInst::LAST_ICMP_PREDICATE; p++) { 244 Modifications.push_back( 245 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); }); 246 } 247 break; 248 // Add inbound flag. 249 case Instruction::GetElementPtr: 250 GEP = cast<GetElementPtrInst>(&Inst); 251 Modifications.push_back( 252 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); }); 253 break; 254 // Add exact flag. 255 case Instruction::UDiv: 256 case Instruction::SDiv: 257 case Instruction::LShr: 258 case Instruction::AShr: 259 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); }); 260 break; 261 262 case Instruction::FCmp: 263 CI = cast<FCmpInst>(&Inst); 264 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE; 265 p <= CmpInst::LAST_FCMP_PREDICATE; p++) { 266 Modifications.push_back( 267 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); }); 268 } 269 break; 270 } 271 272 // Add fast math flag if possible. 273 if (isa<FPMathOperator>(&Inst)) { 274 // Try setting everything unless they are already on. 275 Modifications.push_back( 276 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); }); 277 // Try unsetting everything unless they are already off. 278 Modifications.push_back( 279 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); }); 280 // Individual setting by flipping the bit 281 Modifications.push_back( 282 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); }); 283 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); }); 284 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); }); 285 Modifications.push_back( 286 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); }); 287 Modifications.push_back( 288 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); }); 289 Modifications.push_back( 290 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); }); 291 Modifications.push_back( 292 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); }); 293 } 294 295 // Randomly switch operands of instructions 296 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem); 297 switch (Inst.getOpcode()) { 298 case Instruction::SDiv: 299 case Instruction::UDiv: 300 case Instruction::SRem: 301 case Instruction::URem: 302 case Instruction::FDiv: 303 case Instruction::FRem: { 304 // Verify that the after shuffle the second operand is not 305 // constant 0. 306 Value *Operand = Inst.getOperand(0); 307 if (Constant *C = dyn_cast<Constant>(Operand)) { 308 if (!C->isZeroValue()) { 309 ShuffleItems = {0, 1}; 310 } 311 } 312 break; 313 } 314 case Instruction::Select: 315 ShuffleItems = {1, 2}; 316 break; 317 case Instruction::Add: 318 case Instruction::Sub: 319 case Instruction::Mul: 320 case Instruction::Shl: 321 case Instruction::LShr: 322 case Instruction::AShr: 323 case Instruction::And: 324 case Instruction::Or: 325 case Instruction::Xor: 326 case Instruction::FAdd: 327 case Instruction::FSub: 328 case Instruction::FMul: 329 case Instruction::ICmp: 330 case Instruction::FCmp: 331 case Instruction::ShuffleVector: 332 ShuffleItems = {0, 1}; 333 break; 334 } 335 if (ShuffleItems != NoneItem) { 336 Modifications.push_back([&Inst, &ShuffleItems]() { 337 Value *Op0 = Inst.getOperand(ShuffleItems.first); 338 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second)); 339 Inst.setOperand(ShuffleItems.second, Op0); 340 }); 341 } 342 343 auto RS = makeSampler(IB.Rand, Modifications); 344 if (RS) 345 RS.getSelection()(); 346 } 347 348 /// Return a case value that is not already taken to make sure we don't have two 349 /// cases with same value. 350 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken, 351 uint64_t MaxValue, RandomIRBuilder &IB) { 352 uint64_t tmp; 353 do { 354 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue); 355 } while (CasesTaken.count(tmp) != 0); 356 CasesTaken.insert(tmp); 357 return tmp; 358 } 359 360 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 361 Module *M = BB.getParent()->getParent(); 362 // If nullptr is selected, we will create a new function declaration. 363 SmallVector<Function *, 32> Functions({nullptr}); 364 for (Function &F : M->functions()) { 365 Functions.push_back(&F); 366 } 367 368 auto RS = makeSampler(IB.Rand, Functions); 369 Function *F = RS.getSelection(); 370 // Some functions accept metadata type or token type as arguments. 371 // We don't call those functions for now. 372 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)` 373 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare 374 auto IsUnsupportedTy = [](Type *T) { 375 return T->isMetadataTy() || T->isTokenTy(); 376 }; 377 if (!F || IsUnsupportedTy(F->getReturnType()) || 378 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) { 379 F = IB.createFunctionDeclaration(*M); 380 } 381 382 FunctionType *FTy = F->getFunctionType(); 383 SmallVector<fuzzerop::SourcePred, 2> SourcePreds; 384 if (!F->arg_empty()) { 385 for (Type *ArgTy : FTy->params()) { 386 SourcePreds.push_back(fuzzerop::onlyType(ArgTy)); 387 } 388 } 389 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext())); 390 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs, 391 Instruction *Inst) { 392 StringRef Name = isRetVoid ? nullptr : "C"; 393 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst); 394 // Don't return this call inst if it return void as it can't be sinked. 395 return isRetVoid ? nullptr : Call; 396 }; 397 398 SmallVector<Instruction *, 32> Insts; 399 for (Instruction &I : getInsertionRange(BB)) 400 Insts.push_back(&I); 401 if (Insts.size() < 1) 402 return; 403 404 // Choose an insertion point for our new call instruction. 405 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 406 407 auto InstsBefore = ArrayRef(Insts).slice(0, IP); 408 auto InstsAfter = ArrayRef(Insts).slice(IP); 409 410 // Choose a source, which will be used to constrain the operation selection. 411 SmallVector<Value *, 2> Srcs; 412 413 for (const auto &Pred : ArrayRef(SourcePreds)) { 414 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); 415 } 416 417 if (Value *Op = BuilderFunc(Srcs, Insts[IP])) { 418 // Find a sink and wire up the results of the operation. 419 IB.connectToSink(BB, InstsAfter, Op); 420 } 421 } 422 423 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 424 SmallVector<Instruction *, 32> Insts; 425 for (Instruction &I : getInsertionRange(BB)) 426 Insts.push_back(&I); 427 if (Insts.size() < 1) 428 return; 429 430 // Choose a point where we split the block. 431 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 432 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP); 433 434 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst 435 // directly jumps to `Sink`. Here, we have to create a new terminator for 436 // `Source`. 437 BasicBlock *Block = Insts[IP]->getParent(); 438 BasicBlock *Source = Block; 439 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB"); 440 441 Function *F = BB.getParent(); 442 LLVMContext &C = F->getParent()->getContext(); 443 // A coin decides if it is branch or switch 444 if (uniform<uint64_t>(IB.Rand, 0, 1)) { 445 // Branch 446 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F); 447 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F); 448 Value *Cond = 449 IB.findOrCreateSource(*Source, InstsBeforeSplit, {}, 450 fuzzerop::onlyType(Type::getInt1Ty(C)), false); 451 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond); 452 // Remove the old terminator. 453 ReplaceInstWithInst(Source->getTerminator(), Branch); 454 // Connect these blocks to `Sink` 455 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB); 456 } else { 457 // Switch 458 // Determine Integer type, it IS possible we use a boolean to switch. 459 auto RS = 460 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) { 461 return Ty->isIntegerTy(); 462 })); 463 assert(RS && "There is no integer type in all allowed types, is the " 464 "setting correct?"); 465 Type *Ty = RS.getSelection(); 466 IntegerType *IntTy = cast<IntegerType>(Ty); 467 468 uint64_t BitSize = IntTy->getBitWidth(); 469 uint64_t MaxCaseVal = 470 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1; 471 // Create Switch inst in Block 472 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {}, 473 fuzzerop::onlyType(IntTy), false); 474 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F); 475 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases); 476 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases; 477 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases); 478 // Remove the old terminator. 479 ReplaceInstWithInst(Source->getTerminator(), Switch); 480 481 // Create blocks, for each block assign a case value. 482 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock}); 483 SmallSet<uint64_t, 4> CasesTaken; 484 for (uint64_t i = 0; i < NumCases; i++) { 485 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB); 486 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F); 487 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal); 488 Switch->addCase(OnValue, CaseBlock); 489 Blocks.push_back(CaseBlock); 490 } 491 492 // Connect these blocks to `Sink` 493 connectBlocksToSink(Blocks, Sink, IB); 494 } 495 } 496 497 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't 498 /// even have terminator. 499 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks, 500 BasicBlock *Sink, 501 RandomIRBuilder &IB) { 502 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1); 503 for (uint64_t i = 0; i < Blocks.size(); i++) { 504 // We have at least one block that directly goes to sink. 505 CFGToSink ToSink = (i == DirectSinkIdx) 506 ? CFGToSink::DirectSink 507 : static_cast<CFGToSink>(uniform<uint64_t>( 508 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1)); 509 BasicBlock *BB = Blocks[i]; 510 Function *F = BB->getParent(); 511 LLVMContext &C = F->getParent()->getContext(); 512 switch (ToSink) { 513 case CFGToSink::Return: { 514 Type *RetTy = F->getReturnType(); 515 Value *RetValue = nullptr; 516 if (!RetTy->isVoidTy()) 517 RetValue = 518 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy)); 519 ReturnInst::Create(C, RetValue, BB); 520 break; 521 } 522 case CFGToSink::DirectSink: { 523 BranchInst::Create(Sink, BB); 524 break; 525 } 526 case CFGToSink::SinkOrSelfLoop: { 527 SmallVector<BasicBlock *, 2> Branches({Sink, BB}); 528 // A coin decides which block is true branch. 529 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1); 530 Value *Cond = IB.findOrCreateSource( 531 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false); 532 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB); 533 break; 534 } 535 case CFGToSink::EndOfCFGToLink: 536 llvm_unreachable("EndOfCFGToLink executed, something's wrong."); 537 } 538 } 539 } 540 541 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 542 // Can't insert PHI node to entry node. 543 if (&BB == &BB.getParent()->getEntryBlock()) 544 return; 545 Type *Ty = IB.randomType(); 546 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front()); 547 548 // Use a map to make sure the same incoming basic block has the same value. 549 DenseMap<BasicBlock *, Value *> IncomingValues; 550 for (BasicBlock *Pred : predecessors(&BB)) { 551 Value *Src = IncomingValues[Pred]; 552 // If `Pred` is not in the map yet, we'll get a nullptr. 553 if (!Src) { 554 SmallVector<Instruction *, 32> Insts; 555 for (auto I = Pred->begin(); I != Pred->end(); ++I) 556 Insts.push_back(&*I); 557 // There is no need to inform IB what previously used values are if we are 558 // using `onlyType` 559 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty)); 560 IncomingValues[Pred] = Src; 561 } 562 PHI->addIncoming(Src, Pred); 563 } 564 SmallVector<Instruction *, 32> InstsAfter; 565 for (Instruction &I : getInsertionRange(BB)) 566 InstsAfter.push_back(&I); 567 IB.connectToSink(BB, InstsAfter, PHI); 568 } 569 570 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) { 571 for (BasicBlock &BB : F) { 572 this->mutate(BB, IB); 573 } 574 } 575 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 576 SmallVector<Instruction *, 32> Insts; 577 for (Instruction &I : getInsertionRange(BB)) 578 Insts.push_back(&I); 579 if (Insts.size() < 1) 580 return; 581 // Choose an Instruction to mutate. 582 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 583 Instruction *Inst = Insts[Idx]; 584 // `Idx + 1` so we don't sink to ourselves. 585 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1); 586 Type *Ty = Inst->getType(); 587 // Don't sink terminators, void function calls, token, etc. 588 if (!Ty->isVoidTy() && !Ty->isTokenTy()) 589 // Find a new sink and wire up the results of the operation. 590 IB.connectToSink(BB, InstsAfter, Inst); 591 } 592 593 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 594 // A deterministic alternative to SmallPtrSet with the same lookup 595 // performance. 596 std::map<size_t, Instruction *> AliveInsts; 597 std::map<Instruction *, size_t> AliveInstsLookup; 598 size_t InsertIdx = 0; 599 for (auto &I : make_early_inc_range(make_range( 600 BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) { 601 // First gather all instructions that can be shuffled. Don't take 602 // terminator. 603 AliveInsts.insert({InsertIdx, &I}); 604 AliveInstsLookup.insert({&I, InsertIdx++}); 605 // Then remove these instructions from the block 606 I.removeFromParent(); 607 } 608 609 // Shuffle these instructions using topological sort. 610 // Returns false if all current instruction's dependencies in this block have 611 // been shuffled. If so, this instruction can be shuffled too. 612 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) { 613 for (Value *O : AliveInsts[Index]->operands()) { 614 Instruction *P = dyn_cast<Instruction>(O); 615 if (P && AliveInstsLookup.count(P)) 616 return true; 617 } 618 return false; 619 }; 620 // Get all alive instructions that depend on the current instruction. 621 // Takes Instruction* instead of index because the instruction is already 622 // shuffled. 623 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) { 624 SmallSetVector<size_t, 8> Children; 625 for (Value *U : I->users()) { 626 Instruction *P = dyn_cast<Instruction>(U); 627 if (P && AliveInstsLookup.count(P)) 628 Children.insert(AliveInstsLookup[P]); 629 } 630 return Children; 631 }; 632 SmallSet<size_t, 8> RootIndices; 633 SmallVector<Instruction *, 8> Insts; 634 for (const auto &[Index, Inst] : AliveInsts) { 635 if (!hasAliveParent(Index)) 636 RootIndices.insert(Index); 637 } 638 // Topological sort by randomly selecting a node without a parent, or root. 639 while (!RootIndices.empty()) { 640 auto RS = makeSampler<size_t>(IB.Rand); 641 for (size_t RootIdx : RootIndices) 642 RS.sample(RootIdx, 1); 643 size_t RootIdx = RS.getSelection(); 644 645 RootIndices.erase(RootIdx); 646 Instruction *Root = AliveInsts[RootIdx]; 647 AliveInsts.erase(RootIdx); 648 AliveInstsLookup.erase(Root); 649 Insts.push_back(Root); 650 651 for (size_t Child : getAliveChildren(Root)) { 652 if (!hasAliveParent(Child)) { 653 RootIndices.insert(Child); 654 } 655 } 656 } 657 658 Instruction *Terminator = BB.getTerminator(); 659 // Then put instructions back. 660 for (Instruction *I : Insts) { 661 I->insertBefore(Terminator); 662 } 663 } 664 665 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size, 666 LLVMContext &Context) { 667 668 if (Size <= 1) 669 // We get bogus data given an empty corpus - just create a new module. 670 return std::make_unique<Module>("M", Context); 671 672 auto Buffer = MemoryBuffer::getMemBuffer( 673 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input", 674 /*RequiresNullTerminator=*/false); 675 676 SMDiagnostic Err; 677 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context); 678 if (Error E = M.takeError()) { 679 errs() << toString(std::move(E)) << "\n"; 680 return nullptr; 681 } 682 return std::move(M.get()); 683 } 684 685 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) { 686 std::string Buf; 687 { 688 raw_string_ostream OS(Buf); 689 WriteBitcodeToFile(M, OS); 690 } 691 if (Buf.size() > MaxSize) 692 return 0; 693 memcpy(Dest, Buf.data(), Buf.size()); 694 return Buf.size(); 695 } 696 697 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size, 698 LLVMContext &Context) { 699 auto M = parseModule(Data, Size, Context); 700 if (!M || verifyModule(*M, &errs())) 701 return nullptr; 702 703 return M; 704 } 705