1 //===-- IRMutator.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/FuzzMutate/IRMutator.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/ADT/SmallSet.h" 12 #include "llvm/Analysis/TargetLibraryInfo.h" 13 #include "llvm/Bitcode/BitcodeReader.h" 14 #include "llvm/Bitcode/BitcodeWriter.h" 15 #include "llvm/FuzzMutate/Operations.h" 16 #include "llvm/FuzzMutate/Random.h" 17 #include "llvm/FuzzMutate/RandomIRBuilder.h" 18 #include "llvm/IR/BasicBlock.h" 19 #include "llvm/IR/FMF.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/InstIterator.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IR/Operator.h" 25 #include "llvm/IR/Verifier.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SourceMgr.h" 28 #include "llvm/Transforms/Scalar/DCE.h" 29 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 #include <map> 31 #include <optional> 32 33 using namespace llvm; 34 35 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) { 36 auto RS = makeSampler<Function *>(IB.Rand); 37 for (Function &F : M) 38 if (!F.isDeclaration()) 39 RS.sample(&F, /*Weight=*/1); 40 41 while (RS.totalWeight() < IB.MinFunctionNum) { 42 Function *F = IB.createFunctionDefinition(M); 43 RS.sample(F, /*Weight=*/1); 44 } 45 mutate(*RS.getSelection(), IB); 46 } 47 48 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) { 49 auto Range = make_filter_range(make_pointer_range(F), 50 [](BasicBlock *BB) { return !BB->isEHPad(); }); 51 52 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB); 53 } 54 55 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 56 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB); 57 } 58 59 size_t llvm::IRMutator::getModuleSize(const Module &M) { 60 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size(); 61 } 62 63 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) { 64 std::vector<Type *> Types; 65 for (const auto &Getter : AllowedTypes) 66 Types.push_back(Getter(M.getContext())); 67 RandomIRBuilder IB(Seed, Types); 68 69 size_t CurSize = IRMutator::getModuleSize(M); 70 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand); 71 for (const auto &Strategy : Strategies) 72 RS.sample(Strategy.get(), 73 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight())); 74 if (RS.totalWeight() == 0) 75 return; 76 auto Strategy = RS.getSelection(); 77 78 Strategy->mutate(M, IB); 79 } 80 81 static void eliminateDeadCode(Function &F) { 82 FunctionPassManager FPM; 83 FPM.addPass(DCEPass()); 84 FunctionAnalysisManager FAM; 85 FAM.registerPass([&] { return TargetLibraryAnalysis(); }); 86 FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); 87 FPM.run(F, FAM); 88 } 89 90 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { 91 IRMutationStrategy::mutate(F, IB); 92 eliminateDeadCode(F); 93 } 94 95 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() { 96 std::vector<fuzzerop::OpDescriptor> Ops; 97 describeFuzzerIntOps(Ops); 98 describeFuzzerFloatOps(Ops); 99 describeFuzzerControlFlowOps(Ops); 100 describeFuzzerPointerOps(Ops); 101 describeFuzzerAggregateOps(Ops); 102 describeFuzzerVectorOps(Ops); 103 return Ops; 104 } 105 106 std::optional<fuzzerop::OpDescriptor> 107 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) { 108 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) { 109 return Op.SourcePreds[0].matches({}, Src); 110 }; 111 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); 112 if (RS.isEmpty()) 113 return std::nullopt; 114 return *RS; 115 } 116 117 static inline iterator_range<BasicBlock::iterator> 118 getInsertionRange(BasicBlock &BB) { 119 auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end(); 120 return make_range(BB.getFirstInsertionPt(), End); 121 } 122 123 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 124 SmallVector<Instruction *, 32> Insts; 125 for (Instruction &I : getInsertionRange(BB)) 126 Insts.push_back(&I); 127 if (Insts.size() < 1) 128 return; 129 130 // Choose an insertion point for our new instruction. 131 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1); 132 133 auto InstsBefore = ArrayRef(Insts).slice(0, IP); 134 auto InstsAfter = ArrayRef(Insts).slice(IP); 135 136 // Choose a source, which will be used to constrain the operation selection. 137 SmallVector<Value *, 2> Srcs; 138 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore)); 139 140 // Choose an operation that's constrained to be valid for the type of the 141 // source, collect any other sources it needs, and then build it. 142 auto OpDesc = chooseOperation(Srcs[0], IB); 143 // Bail if no operation was found 144 if (!OpDesc) 145 return; 146 147 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1)) 148 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); 149 150 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) { 151 // Find a sink and wire up the results of the operation. 152 IB.connectToSink(BB, InstsAfter, Op); 153 } 154 } 155 156 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize, 157 uint64_t CurrentWeight) { 158 // If we have less than 200 bytes, panic and try to always delete. 159 if (CurrentSize > MaxSize - 200) 160 return CurrentWeight ? CurrentWeight * 100 : 1; 161 // Draw a line starting from when we only have 1k left and increasing linearly 162 // to double the current weight. 163 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) * 164 (static_cast<int64_t>(MaxSize) - 165 static_cast<int64_t>(CurrentSize) - 1000) / 166 1000; 167 // Clamp negative weights to zero. 168 if (Line < 0) 169 return 0; 170 return Line; 171 } 172 173 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { 174 auto RS = makeSampler<Instruction *>(IB.Rand); 175 for (Instruction &Inst : instructions(F)) { 176 // TODO: We can't handle these instructions. 177 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() || 178 isa<PHINode>(Inst)) 179 continue; 180 181 RS.sample(&Inst, /*Weight=*/1); 182 } 183 if (RS.isEmpty()) 184 return; 185 186 // Delete the instruction. 187 mutate(*RS.getSelection(), IB); 188 // Clean up any dead code that's left over after removing the instruction. 189 eliminateDeadCode(F); 190 } 191 192 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) { 193 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG"); 194 195 if (Inst.getType()->isVoidTy()) { 196 // Instructions with void type (ie, store) have no uses to worry about. Just 197 // erase it and move on. 198 Inst.eraseFromParent(); 199 return; 200 } 201 202 // Otherwise we need to find some other value with the right type to keep the 203 // users happy. 204 auto Pred = fuzzerop::onlyType(Inst.getType()); 205 auto RS = makeSampler<Value *>(IB.Rand); 206 SmallVector<Instruction *, 32> InstsBefore; 207 BasicBlock *BB = Inst.getParent(); 208 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E; 209 ++I) { 210 if (Pred.matches({}, &*I)) 211 RS.sample(&*I, /*Weight=*/1); 212 InstsBefore.push_back(&*I); 213 } 214 if (!RS) 215 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1); 216 217 Inst.replaceAllUsesWith(RS.getSelection()); 218 Inst.eraseFromParent(); 219 } 220 221 void InstModificationIRStrategy::mutate(Instruction &Inst, 222 RandomIRBuilder &IB) { 223 SmallVector<std::function<void()>, 8> Modifications; 224 CmpInst *CI = nullptr; 225 GetElementPtrInst *GEP = nullptr; 226 switch (Inst.getOpcode()) { 227 default: 228 break; 229 // Add nsw, nuw flag 230 case Instruction::Add: 231 case Instruction::Mul: 232 case Instruction::Sub: 233 case Instruction::Shl: 234 Modifications.push_back( 235 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); }); 236 Modifications.push_back( 237 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); }); 238 break; 239 case Instruction::ICmp: 240 CI = cast<ICmpInst>(&Inst); 241 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE; 242 p <= CmpInst::LAST_ICMP_PREDICATE; p++) { 243 Modifications.push_back( 244 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); }); 245 } 246 break; 247 // Add inbound flag. 248 case Instruction::GetElementPtr: 249 GEP = cast<GetElementPtrInst>(&Inst); 250 Modifications.push_back( 251 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); }); 252 break; 253 // Add exact flag. 254 case Instruction::UDiv: 255 case Instruction::SDiv: 256 case Instruction::LShr: 257 case Instruction::AShr: 258 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); }); 259 break; 260 261 case Instruction::FCmp: 262 CI = cast<FCmpInst>(&Inst); 263 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE; 264 p <= CmpInst::LAST_FCMP_PREDICATE; p++) { 265 Modifications.push_back( 266 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); }); 267 } 268 break; 269 } 270 271 // Add fast math flag if possible. 272 if (isa<FPMathOperator>(&Inst)) { 273 // Try setting everything unless they are already on. 274 Modifications.push_back( 275 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); }); 276 // Try unsetting everything unless they are already off. 277 Modifications.push_back( 278 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); }); 279 // Individual setting by flipping the bit 280 Modifications.push_back( 281 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); }); 282 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); }); 283 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); }); 284 Modifications.push_back( 285 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); }); 286 Modifications.push_back( 287 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); }); 288 Modifications.push_back( 289 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); }); 290 Modifications.push_back( 291 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); }); 292 } 293 294 // Randomly switch operands of instructions 295 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem); 296 switch (Inst.getOpcode()) { 297 case Instruction::SDiv: 298 case Instruction::UDiv: 299 case Instruction::SRem: 300 case Instruction::URem: 301 case Instruction::FDiv: 302 case Instruction::FRem: { 303 // Verify that the after shuffle the second operand is not 304 // constant 0. 305 Value *Operand = Inst.getOperand(0); 306 if (Constant *C = dyn_cast<Constant>(Operand)) { 307 if (!C->isZeroValue()) { 308 ShuffleItems = {0, 1}; 309 } 310 } 311 break; 312 } 313 case Instruction::Select: 314 ShuffleItems = {1, 2}; 315 break; 316 case Instruction::Add: 317 case Instruction::Sub: 318 case Instruction::Mul: 319 case Instruction::Shl: 320 case Instruction::LShr: 321 case Instruction::AShr: 322 case Instruction::And: 323 case Instruction::Or: 324 case Instruction::Xor: 325 case Instruction::FAdd: 326 case Instruction::FSub: 327 case Instruction::FMul: 328 case Instruction::ICmp: 329 case Instruction::FCmp: 330 case Instruction::ShuffleVector: 331 ShuffleItems = {0, 1}; 332 break; 333 } 334 if (ShuffleItems != NoneItem) { 335 Modifications.push_back([&Inst, &ShuffleItems]() { 336 Value *Op0 = Inst.getOperand(ShuffleItems.first); 337 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second)); 338 Inst.setOperand(ShuffleItems.second, Op0); 339 }); 340 } 341 342 auto RS = makeSampler(IB.Rand, Modifications); 343 if (RS) 344 RS.getSelection()(); 345 } 346 347 /// Return a case value that is not already taken to make sure we don't have two 348 /// cases with same value. 349 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken, 350 uint64_t MaxValue, RandomIRBuilder &IB) { 351 uint64_t tmp; 352 do { 353 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue); 354 } while (CasesTaken.count(tmp) != 0); 355 CasesTaken.insert(tmp); 356 return tmp; 357 } 358 359 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 360 Module *M = BB.getParent()->getParent(); 361 // If nullptr is selected, we will create a new function declaration. 362 SmallVector<Function *, 32> Functions({nullptr}); 363 for (Function &F : M->functions()) { 364 Functions.push_back(&F); 365 } 366 367 auto RS = makeSampler(IB.Rand, Functions); 368 Function *F = RS.getSelection(); 369 // Some functions accept metadata type or token type as arguments. 370 // We don't call those functions for now. 371 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)` 372 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare 373 auto IsUnsupportedTy = [](Type *T) { 374 return T->isMetadataTy() || T->isTokenTy(); 375 }; 376 if (!F || IsUnsupportedTy(F->getReturnType()) || 377 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) { 378 F = IB.createFunctionDeclaration(*M); 379 } 380 381 FunctionType *FTy = F->getFunctionType(); 382 SmallVector<fuzzerop::SourcePred, 2> SourcePreds; 383 if (!F->arg_empty()) { 384 for (Type *ArgTy : FTy->params()) { 385 SourcePreds.push_back(fuzzerop::onlyType(ArgTy)); 386 } 387 } 388 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext())); 389 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs, 390 Instruction *Inst) { 391 StringRef Name = isRetVoid ? nullptr : "C"; 392 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst); 393 // Don't return this call inst if it return void as it can't be sinked. 394 return isRetVoid ? nullptr : Call; 395 }; 396 397 SmallVector<Instruction *, 32> Insts; 398 for (Instruction &I : getInsertionRange(BB)) 399 Insts.push_back(&I); 400 if (Insts.size() < 1) 401 return; 402 403 // Choose an insertion point for our new call instruction. 404 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 405 406 auto InstsBefore = ArrayRef(Insts).slice(0, IP); 407 auto InstsAfter = ArrayRef(Insts).slice(IP); 408 409 // Choose a source, which will be used to constrain the operation selection. 410 SmallVector<Value *, 2> Srcs; 411 412 for (const auto &Pred : ArrayRef(SourcePreds)) { 413 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); 414 } 415 416 if (Value *Op = BuilderFunc(Srcs, Insts[IP])) { 417 // Find a sink and wire up the results of the operation. 418 IB.connectToSink(BB, InstsAfter, Op); 419 } 420 } 421 422 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 423 SmallVector<Instruction *, 32> Insts; 424 for (Instruction &I : getInsertionRange(BB)) 425 Insts.push_back(&I); 426 if (Insts.size() < 1) 427 return; 428 429 // Choose a point where we split the block. 430 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 431 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP); 432 433 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst 434 // directly jumps to `Sink`. Here, we have to create a new terminator for 435 // `Source`. 436 BasicBlock *Block = Insts[IP]->getParent(); 437 BasicBlock *Source = Block; 438 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB"); 439 440 Function *F = BB.getParent(); 441 LLVMContext &C = F->getParent()->getContext(); 442 // A coin decides if it is branch or switch 443 if (uniform<uint64_t>(IB.Rand, 0, 1)) { 444 // Branch 445 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F); 446 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F); 447 Value *Cond = 448 IB.findOrCreateSource(*Source, InstsBeforeSplit, {}, 449 fuzzerop::onlyType(Type::getInt1Ty(C)), false); 450 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond); 451 // Remove the old terminator. 452 ReplaceInstWithInst(Source->getTerminator(), Branch); 453 // Connect these blocks to `Sink` 454 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB); 455 } else { 456 // Switch 457 // Determine Integer type, it IS possible we use a boolean to switch. 458 auto RS = 459 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) { 460 return Ty->isIntegerTy(); 461 })); 462 assert(RS && "There is no integer type in all allowed types, is the " 463 "setting correct?"); 464 Type *Ty = RS.getSelection(); 465 IntegerType *IntTy = cast<IntegerType>(Ty); 466 467 uint64_t BitSize = IntTy->getBitWidth(); 468 uint64_t MaxCaseVal = 469 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1; 470 // Create Switch inst in Block 471 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {}, 472 fuzzerop::onlyType(IntTy), false); 473 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F); 474 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases); 475 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases; 476 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases); 477 // Remove the old terminator. 478 ReplaceInstWithInst(Source->getTerminator(), Switch); 479 480 // Create blocks, for each block assign a case value. 481 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock}); 482 SmallSet<uint64_t, 4> CasesTaken; 483 for (uint64_t i = 0; i < NumCases; i++) { 484 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB); 485 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F); 486 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal); 487 Switch->addCase(OnValue, CaseBlock); 488 Blocks.push_back(CaseBlock); 489 } 490 491 // Connect these blocks to `Sink` 492 connectBlocksToSink(Blocks, Sink, IB); 493 } 494 } 495 496 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't 497 /// even have terminator. 498 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks, 499 BasicBlock *Sink, 500 RandomIRBuilder &IB) { 501 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1); 502 for (uint64_t i = 0; i < Blocks.size(); i++) { 503 // We have at least one block that directly goes to sink. 504 CFGToSink ToSink = (i == DirectSinkIdx) 505 ? CFGToSink::DirectSink 506 : static_cast<CFGToSink>(uniform<uint64_t>( 507 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1)); 508 BasicBlock *BB = Blocks[i]; 509 Function *F = BB->getParent(); 510 LLVMContext &C = F->getParent()->getContext(); 511 switch (ToSink) { 512 case CFGToSink::Return: { 513 Type *RetTy = F->getReturnType(); 514 Value *RetValue = nullptr; 515 if (!RetTy->isVoidTy()) 516 RetValue = 517 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy)); 518 ReturnInst::Create(C, RetValue, BB); 519 break; 520 } 521 case CFGToSink::DirectSink: { 522 BranchInst::Create(Sink, BB); 523 break; 524 } 525 case CFGToSink::SinkOrSelfLoop: { 526 SmallVector<BasicBlock *, 2> Branches({Sink, BB}); 527 // A coin decides which block is true branch. 528 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1); 529 Value *Cond = IB.findOrCreateSource( 530 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false); 531 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB); 532 break; 533 } 534 case CFGToSink::EndOfCFGToLink: 535 llvm_unreachable("EndOfCFGToLink executed, something's wrong."); 536 } 537 } 538 } 539 540 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 541 // Can't insert PHI node to entry node. 542 if (&BB == &BB.getParent()->getEntryBlock()) 543 return; 544 Type *Ty = IB.randomType(); 545 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front()); 546 547 // Use a map to make sure the same incoming basic block has the same value. 548 DenseMap<BasicBlock *, Value *> IncomingValues; 549 for (BasicBlock *Pred : predecessors(&BB)) { 550 Value *Src = IncomingValues[Pred]; 551 // If `Pred` is not in the map yet, we'll get a nullptr. 552 if (!Src) { 553 SmallVector<Instruction *, 32> Insts; 554 for (auto I = Pred->begin(); I != Pred->end(); ++I) 555 Insts.push_back(&*I); 556 // There is no need to inform IB what previously used values are if we are 557 // using `onlyType` 558 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty)); 559 IncomingValues[Pred] = Src; 560 } 561 PHI->addIncoming(Src, Pred); 562 } 563 SmallVector<Instruction *, 32> InstsAfter; 564 for (Instruction &I : getInsertionRange(BB)) 565 InstsAfter.push_back(&I); 566 IB.connectToSink(BB, InstsAfter, PHI); 567 } 568 569 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) { 570 for (BasicBlock &BB : F) { 571 this->mutate(BB, IB); 572 } 573 } 574 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 575 SmallVector<Instruction *, 32> Insts; 576 for (Instruction &I : getInsertionRange(BB)) 577 Insts.push_back(&I); 578 if (Insts.size() < 1) 579 return; 580 // Choose an Instruction to mutate. 581 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 582 Instruction *Inst = Insts[Idx]; 583 // `Idx + 1` so we don't sink to ourselves. 584 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1); 585 Type *Ty = Inst->getType(); 586 // Don't sink terminators, void function calls, token, etc. 587 if (!Ty->isVoidTy() && !Ty->isTokenTy()) 588 // Find a new sink and wire up the results of the operation. 589 IB.connectToSink(BB, InstsAfter, Inst); 590 } 591 592 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 593 // A deterministic alternative to SmallPtrSet with the same lookup 594 // performance. 595 std::map<size_t, Instruction *> AliveInsts; 596 std::map<Instruction *, size_t> AliveInstsLookup; 597 size_t InsertIdx = 0; 598 for (auto &I : make_early_inc_range(make_range( 599 BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) { 600 // First gather all instructions that can be shuffled. Don't take 601 // terminator. 602 AliveInsts.insert({InsertIdx, &I}); 603 AliveInstsLookup.insert({&I, InsertIdx++}); 604 // Then remove these instructions from the block 605 I.removeFromParent(); 606 } 607 608 // Shuffle these instructions using topological sort. 609 // Returns false if all current instruction's dependencies in this block have 610 // been shuffled. If so, this instruction can be shuffled too. 611 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) { 612 for (Value *O : AliveInsts[Index]->operands()) { 613 Instruction *P = dyn_cast<Instruction>(O); 614 if (P && AliveInstsLookup.count(P)) 615 return true; 616 } 617 return false; 618 }; 619 // Get all alive instructions that depend on the current instruction. 620 // Takes Instruction* instead of index because the instruction is already 621 // shuffled. 622 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) { 623 SmallSetVector<size_t, 8> Children; 624 for (Value *U : I->users()) { 625 Instruction *P = dyn_cast<Instruction>(U); 626 if (P && AliveInstsLookup.count(P)) 627 Children.insert(AliveInstsLookup[P]); 628 } 629 return Children; 630 }; 631 SmallSet<size_t, 8> RootIndices; 632 SmallVector<Instruction *, 8> Insts; 633 for (const auto &[Index, Inst] : AliveInsts) { 634 if (!hasAliveParent(Index)) 635 RootIndices.insert(Index); 636 } 637 // Topological sort by randomly selecting a node without a parent, or root. 638 while (!RootIndices.empty()) { 639 auto RS = makeSampler<size_t>(IB.Rand); 640 for (size_t RootIdx : RootIndices) 641 RS.sample(RootIdx, 1); 642 size_t RootIdx = RS.getSelection(); 643 644 RootIndices.erase(RootIdx); 645 Instruction *Root = AliveInsts[RootIdx]; 646 AliveInsts.erase(RootIdx); 647 AliveInstsLookup.erase(Root); 648 Insts.push_back(Root); 649 650 for (size_t Child : getAliveChildren(Root)) { 651 if (!hasAliveParent(Child)) { 652 RootIndices.insert(Child); 653 } 654 } 655 } 656 657 Instruction *Terminator = BB.getTerminator(); 658 // Then put instructions back. 659 for (Instruction *I : Insts) { 660 I->insertBefore(Terminator); 661 } 662 } 663 664 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size, 665 LLVMContext &Context) { 666 667 if (Size <= 1) 668 // We get bogus data given an empty corpus - just create a new module. 669 return std::make_unique<Module>("M", Context); 670 671 auto Buffer = MemoryBuffer::getMemBuffer( 672 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input", 673 /*RequiresNullTerminator=*/false); 674 675 SMDiagnostic Err; 676 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context); 677 if (Error E = M.takeError()) { 678 errs() << toString(std::move(E)) << "\n"; 679 return nullptr; 680 } 681 return std::move(M.get()); 682 } 683 684 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) { 685 std::string Buf; 686 { 687 raw_string_ostream OS(Buf); 688 WriteBitcodeToFile(M, OS); 689 } 690 if (Buf.size() > MaxSize) 691 return 0; 692 memcpy(Dest, Buf.data(), Buf.size()); 693 return Buf.size(); 694 } 695 696 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size, 697 LLVMContext &Context) { 698 auto M = parseModule(Data, Size, Context); 699 if (!M || verifyModule(*M, &errs())) 700 return nullptr; 701 702 return M; 703 } 704