1 //===-- IRMutator.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/FuzzMutate/IRMutator.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/ADT/SmallSet.h" 12 #include "llvm/Analysis/TargetLibraryInfo.h" 13 #include "llvm/Bitcode/BitcodeReader.h" 14 #include "llvm/Bitcode/BitcodeWriter.h" 15 #include "llvm/FuzzMutate/Operations.h" 16 #include "llvm/FuzzMutate/Random.h" 17 #include "llvm/FuzzMutate/RandomIRBuilder.h" 18 #include "llvm/IR/BasicBlock.h" 19 #include "llvm/IR/FMF.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/InstIterator.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/IntrinsicsAMDGPU.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/IR/Operator.h" 27 #include "llvm/IR/PassInstrumentation.h" 28 #include "llvm/IR/Verifier.h" 29 #include "llvm/Support/MemoryBuffer.h" 30 #include "llvm/Support/SourceMgr.h" 31 #include "llvm/Transforms/Scalar/DCE.h" 32 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33 #include <map> 34 #include <optional> 35 36 using namespace llvm; 37 38 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) { 39 auto RS = makeSampler<Function *>(IB.Rand); 40 for (Function &F : M) 41 if (!F.isDeclaration()) 42 RS.sample(&F, /*Weight=*/1); 43 44 while (RS.totalWeight() < IB.MinFunctionNum) { 45 Function *F = IB.createFunctionDefinition(M); 46 RS.sample(F, /*Weight=*/1); 47 } 48 mutate(*RS.getSelection(), IB); 49 } 50 51 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) { 52 auto Range = make_filter_range(make_pointer_range(F), 53 [](BasicBlock *BB) { return !BB->isEHPad(); }); 54 55 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB); 56 } 57 58 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 59 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB); 60 } 61 62 size_t llvm::IRMutator::getModuleSize(const Module &M) { 63 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size(); 64 } 65 66 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) { 67 std::vector<Type *> Types; 68 for (const auto &Getter : AllowedTypes) 69 Types.push_back(Getter(M.getContext())); 70 RandomIRBuilder IB(Seed, Types); 71 72 size_t CurSize = IRMutator::getModuleSize(M); 73 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand); 74 for (const auto &Strategy : Strategies) 75 RS.sample(Strategy.get(), 76 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight())); 77 if (RS.totalWeight() == 0) 78 return; 79 auto Strategy = RS.getSelection(); 80 81 Strategy->mutate(M, IB); 82 } 83 84 static void eliminateDeadCode(Function &F) { 85 FunctionPassManager FPM; 86 FPM.addPass(DCEPass()); 87 FunctionAnalysisManager FAM; 88 FAM.registerPass([&] { return TargetLibraryAnalysis(); }); 89 FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); 90 FPM.run(F, FAM); 91 } 92 93 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { 94 IRMutationStrategy::mutate(F, IB); 95 eliminateDeadCode(F); 96 } 97 98 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() { 99 std::vector<fuzzerop::OpDescriptor> Ops; 100 describeFuzzerIntOps(Ops); 101 describeFuzzerFloatOps(Ops); 102 describeFuzzerControlFlowOps(Ops); 103 describeFuzzerPointerOps(Ops); 104 describeFuzzerAggregateOps(Ops); 105 describeFuzzerVectorOps(Ops); 106 return Ops; 107 } 108 109 std::optional<fuzzerop::OpDescriptor> 110 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) { 111 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) { 112 return Op.SourcePreds[0].matches({}, Src); 113 }; 114 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); 115 if (RS.isEmpty()) 116 return std::nullopt; 117 return *RS; 118 } 119 120 static inline Instruction *getEffectiveTerminator(BasicBlock &BB) { 121 if (Instruction *I = BB.getTerminatingMustTailCall()) { 122 return I; 123 } else { 124 // Certain intrinsics, such as @llvm.amdgcn.cs.chain, must be immediately 125 // followed by an unreachable instruction.. 126 if (UnreachableInst *UI = dyn_cast<UnreachableInst>(BB.getTerminator())) { 127 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI->getPrevNode())) { 128 return II; 129 } 130 } 131 } 132 133 return BB.getTerminator(); 134 } 135 136 static inline BasicBlock::iterator getEndIterator(BasicBlock &BB) { 137 auto End = BB.end(); 138 139 if (BB.empty()) { 140 return End; 141 } 142 143 Instruction *EffectiveTerminator = getEffectiveTerminator(BB); 144 if (EffectiveTerminator != BB.getTerminator()) { 145 // Adjust range for special cases such as tail call. 146 End = std::prev(BB.end()); 147 } 148 149 return End; 150 } 151 152 static inline iterator_range<BasicBlock::iterator> 153 getInsertionRange(BasicBlock &BB) { 154 auto End = getEndIterator(BB); 155 return make_range(BB.getFirstInsertionPt(), End); 156 } 157 158 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 159 SmallVector<Instruction *, 32> Insts( 160 llvm::make_pointer_range(getInsertionRange(BB))); 161 if (Insts.size() < 1) 162 return; 163 164 // Choose an insertion point for our new instruction. 165 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1); 166 167 auto InstsBefore = ArrayRef(Insts).slice(0, IP); 168 auto InstsAfter = ArrayRef(Insts).slice(IP); 169 170 // Choose a source, which will be used to constrain the operation selection. 171 SmallVector<Value *, 2> Srcs; 172 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore)); 173 174 // Choose an operation that's constrained to be valid for the type of the 175 // source, collect any other sources it needs, and then build it. 176 auto OpDesc = chooseOperation(Srcs[0], IB); 177 // Bail if no operation was found 178 if (!OpDesc) 179 return; 180 181 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1)) 182 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); 183 184 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP]->getIterator())) { 185 // Find a sink and wire up the results of the operation. 186 IB.connectToSink(BB, InstsAfter, Op); 187 } 188 } 189 190 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize, 191 uint64_t CurrentWeight) { 192 // If we have less than 200 bytes, panic and try to always delete. 193 if (CurrentSize > MaxSize - 200) 194 return CurrentWeight ? CurrentWeight * 100 : 1; 195 // Draw a line starting from when we only have 1k left and increasing linearly 196 // to double the current weight. 197 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) * 198 (static_cast<int64_t>(MaxSize) - 199 static_cast<int64_t>(CurrentSize) - 1000) / 200 1000; 201 // Clamp negative weights to zero. 202 if (Line < 0) 203 return 0; 204 return Line; 205 } 206 207 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { 208 auto RS = makeSampler<Instruction *>(IB.Rand); 209 for (Instruction &Inst : instructions(F)) { 210 // TODO: We can't handle these instructions. 211 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() || 212 isa<PHINode>(Inst)) 213 continue; 214 215 RS.sample(&Inst, /*Weight=*/1); 216 } 217 if (RS.isEmpty()) 218 return; 219 220 // Delete the instruction. 221 mutate(*RS.getSelection(), IB); 222 // Clean up any dead code that's left over after removing the instruction. 223 eliminateDeadCode(F); 224 } 225 226 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) { 227 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG"); 228 229 if (Inst.getType()->isVoidTy()) { 230 // Instructions with void type (ie, store) have no uses to worry about. Just 231 // erase it and move on. 232 Inst.eraseFromParent(); 233 return; 234 } 235 236 // Otherwise we need to find some other value with the right type to keep the 237 // users happy. 238 auto Pred = fuzzerop::onlyType(Inst.getType()); 239 auto RS = makeSampler<Value *>(IB.Rand); 240 SmallVector<Instruction *, 32> InstsBefore; 241 BasicBlock *BB = Inst.getParent(); 242 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E; 243 ++I) { 244 if (Pred.matches({}, &*I)) 245 RS.sample(&*I, /*Weight=*/1); 246 InstsBefore.push_back(&*I); 247 } 248 if (!RS) 249 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1); 250 251 Inst.replaceAllUsesWith(RS.getSelection()); 252 Inst.eraseFromParent(); 253 } 254 255 void InstModificationIRStrategy::mutate(Instruction &Inst, 256 RandomIRBuilder &IB) { 257 SmallVector<std::function<void()>, 8> Modifications; 258 CmpInst *CI = nullptr; 259 GetElementPtrInst *GEP = nullptr; 260 switch (Inst.getOpcode()) { 261 default: 262 break; 263 // Add nsw, nuw flag 264 case Instruction::Add: 265 case Instruction::Mul: 266 case Instruction::Sub: 267 case Instruction::Shl: 268 Modifications.push_back( 269 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); }); 270 Modifications.push_back( 271 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); }); 272 break; 273 case Instruction::ICmp: 274 CI = cast<ICmpInst>(&Inst); 275 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE; 276 p <= CmpInst::LAST_ICMP_PREDICATE; p++) { 277 Modifications.push_back( 278 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); }); 279 } 280 break; 281 // Add inbound flag. 282 case Instruction::GetElementPtr: 283 GEP = cast<GetElementPtrInst>(&Inst); 284 Modifications.push_back( 285 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); }); 286 break; 287 // Add exact flag. 288 case Instruction::UDiv: 289 case Instruction::SDiv: 290 case Instruction::LShr: 291 case Instruction::AShr: 292 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); }); 293 break; 294 295 case Instruction::FCmp: 296 CI = cast<FCmpInst>(&Inst); 297 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE; 298 p <= CmpInst::LAST_FCMP_PREDICATE; p++) { 299 Modifications.push_back( 300 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); }); 301 } 302 break; 303 } 304 305 // Add fast math flag if possible. 306 if (isa<FPMathOperator>(&Inst)) { 307 // Try setting everything unless they are already on. 308 Modifications.push_back( 309 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); }); 310 // Try unsetting everything unless they are already off. 311 Modifications.push_back( 312 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); }); 313 // Individual setting by flipping the bit 314 Modifications.push_back( 315 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); }); 316 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); }); 317 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); }); 318 Modifications.push_back( 319 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); }); 320 Modifications.push_back( 321 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); }); 322 Modifications.push_back( 323 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); }); 324 Modifications.push_back( 325 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); }); 326 } 327 328 // Randomly switch operands of instructions 329 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem); 330 switch (Inst.getOpcode()) { 331 case Instruction::SDiv: 332 case Instruction::UDiv: 333 case Instruction::SRem: 334 case Instruction::URem: 335 case Instruction::FDiv: 336 case Instruction::FRem: { 337 // Verify that the after shuffle the second operand is not 338 // constant 0. 339 Value *Operand = Inst.getOperand(0); 340 if (Constant *C = dyn_cast<Constant>(Operand)) { 341 if (!C->isZeroValue()) { 342 ShuffleItems = {0, 1}; 343 } 344 } 345 break; 346 } 347 case Instruction::Select: 348 ShuffleItems = {1, 2}; 349 break; 350 case Instruction::Add: 351 case Instruction::Sub: 352 case Instruction::Mul: 353 case Instruction::Shl: 354 case Instruction::LShr: 355 case Instruction::AShr: 356 case Instruction::And: 357 case Instruction::Or: 358 case Instruction::Xor: 359 case Instruction::FAdd: 360 case Instruction::FSub: 361 case Instruction::FMul: 362 case Instruction::ICmp: 363 case Instruction::FCmp: 364 case Instruction::ShuffleVector: 365 ShuffleItems = {0, 1}; 366 break; 367 } 368 if (ShuffleItems != NoneItem) { 369 Modifications.push_back([&Inst, &ShuffleItems]() { 370 Value *Op0 = Inst.getOperand(ShuffleItems.first); 371 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second)); 372 Inst.setOperand(ShuffleItems.second, Op0); 373 }); 374 } 375 376 auto RS = makeSampler(IB.Rand, Modifications); 377 if (RS) 378 RS.getSelection()(); 379 } 380 381 /// Return a case value that is not already taken to make sure we don't have two 382 /// cases with same value. 383 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken, 384 uint64_t MaxValue, RandomIRBuilder &IB) { 385 uint64_t tmp; 386 do { 387 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue); 388 } while (CasesTaken.count(tmp) != 0); 389 CasesTaken.insert(tmp); 390 return tmp; 391 } 392 393 /// Determines whether a function is unsupported by the current mutator's 394 /// implementation. The function returns true if any of the following criteria 395 /// are met: 396 /// * The function accepts metadata or token types as arguments. 397 /// * The function has ABI attributes that could cause UB. 398 /// * The function uses a non-callable CC that may result in UB. 399 static bool isUnsupportedFunction(Function *F) { 400 // Some functions accept metadata type or token type as arguments. 401 // We don't call those functions for now. 402 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)` 403 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare 404 auto IsUnsupportedTy = [](Type *T) { 405 return T->isMetadataTy() || T->isTokenTy(); 406 }; 407 408 if (IsUnsupportedTy(F->getReturnType()) || 409 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) { 410 return true; 411 } 412 413 // ABI attributes must be specified both at the function 414 // declaration/definition and call-site, otherwise the 415 // behavior may be undefined. 416 // We don't call those functions for now to prevent UB from happening. 417 auto IsABIAttribute = [](AttributeSet A) { 418 static const Attribute::AttrKind ABIAttrs[] = { 419 Attribute::StructRet, Attribute::ByVal, 420 Attribute::InAlloca, Attribute::InReg, 421 Attribute::StackAlignment, Attribute::SwiftSelf, 422 Attribute::SwiftAsync, Attribute::SwiftError, 423 Attribute::Preallocated, Attribute::ByRef, 424 Attribute::ZExt, Attribute::SExt}; 425 426 return llvm::any_of(ABIAttrs, [&](Attribute::AttrKind kind) { 427 return A.hasAttribute(kind); 428 }); 429 }; 430 431 auto FuncAttrs = F->getAttributes(); 432 if (IsABIAttribute(FuncAttrs.getRetAttrs())) { 433 return true; 434 } 435 for (size_t i = 0; i < F->arg_size(); i++) { 436 if (IsABIAttribute(FuncAttrs.getParamAttrs(i))) { 437 return true; 438 } 439 } 440 441 // If it is not satisfied, the IR will be invalid. 442 if (!isCallableCC(F->getCallingConv())) { 443 return true; 444 } 445 446 // This intrinsic has specific requirements for its parameters and the caller 447 // must adhere to certain calling conventions. 448 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) { 449 return true; 450 } 451 452 return false; 453 } 454 455 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 456 Module *M = BB.getParent()->getParent(); 457 // If nullptr is selected, we will create a new function declaration. 458 SmallVector<Function *, 32> Functions({nullptr}); 459 for (Function &F : M->functions()) { 460 Functions.push_back(&F); 461 } 462 463 auto RS = makeSampler(IB.Rand, Functions); 464 Function *F = RS.getSelection(); 465 466 if (!F || isUnsupportedFunction(F)) { 467 F = IB.createFunctionDeclaration(*M); 468 } 469 470 FunctionType *FTy = F->getFunctionType(); 471 SmallVector<fuzzerop::SourcePred, 2> SourcePreds; 472 if (!F->arg_empty()) { 473 for (Type *ArgTy : FTy->params()) { 474 SourcePreds.push_back(fuzzerop::onlyType(ArgTy)); 475 } 476 } 477 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext())); 478 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs, 479 BasicBlock::iterator InsertPt) { 480 StringRef Name = isRetVoid ? nullptr : "C"; 481 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, InsertPt); 482 Call->setCallingConv(F->getCallingConv()); 483 // Don't return this call inst if it return void as it can't be sinked. 484 return isRetVoid ? nullptr : Call; 485 }; 486 487 SmallVector<Instruction *, 32> Insts( 488 llvm::make_pointer_range(getInsertionRange(BB))); 489 if (Insts.size() < 1) 490 return; 491 492 // Choose an insertion point for our new call instruction. 493 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 494 495 auto InstsBefore = ArrayRef(Insts).slice(0, IP); 496 auto InstsAfter = ArrayRef(Insts).slice(IP); 497 498 // Choose a source, which will be used to constrain the operation selection. 499 SmallVector<Value *, 2> Srcs; 500 501 for (const auto &Pred : ArrayRef(SourcePreds)) { 502 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); 503 } 504 505 if (Value *Op = BuilderFunc(Srcs, Insts[IP]->getIterator())) { 506 // Find a sink and wire up the results of the operation. 507 IB.connectToSink(BB, InstsAfter, Op); 508 } 509 } 510 511 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 512 SmallVector<Instruction *, 32> Insts( 513 llvm::make_pointer_range(getInsertionRange(BB))); 514 if (Insts.size() < 1) 515 return; 516 517 // Choose a point where we split the block. 518 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 519 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP); 520 521 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst 522 // directly jumps to `Sink`. Here, we have to create a new terminator for 523 // `Source`. 524 BasicBlock *Block = Insts[IP]->getParent(); 525 BasicBlock *Source = Block; 526 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB"); 527 528 Function *F = BB.getParent(); 529 LLVMContext &C = F->getParent()->getContext(); 530 // A coin decides if it is branch or switch 531 if (uniform<uint64_t>(IB.Rand, 0, 1)) { 532 // Branch 533 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F); 534 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F); 535 Value *Cond = 536 IB.findOrCreateSource(*Source, InstsBeforeSplit, {}, 537 fuzzerop::onlyType(Type::getInt1Ty(C)), false); 538 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond); 539 // Remove the old terminator. 540 ReplaceInstWithInst(Source->getTerminator(), Branch); 541 // Connect these blocks to `Sink` 542 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB); 543 } else { 544 // Switch 545 // Determine Integer type, it IS possible we use a boolean to switch. 546 auto RS = 547 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) { 548 return Ty->isIntegerTy(); 549 })); 550 assert(RS && "There is no integer type in all allowed types, is the " 551 "setting correct?"); 552 Type *Ty = RS.getSelection(); 553 IntegerType *IntTy = cast<IntegerType>(Ty); 554 555 uint64_t BitSize = IntTy->getBitWidth(); 556 uint64_t MaxCaseVal = 557 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1; 558 // Create Switch inst in Block 559 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {}, 560 fuzzerop::onlyType(IntTy), false); 561 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F); 562 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases); 563 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases; 564 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases); 565 // Remove the old terminator. 566 ReplaceInstWithInst(Source->getTerminator(), Switch); 567 568 // Create blocks, for each block assign a case value. 569 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock}); 570 SmallSet<uint64_t, 4> CasesTaken; 571 for (uint64_t i = 0; i < NumCases; i++) { 572 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB); 573 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F); 574 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal); 575 Switch->addCase(OnValue, CaseBlock); 576 Blocks.push_back(CaseBlock); 577 } 578 579 // Connect these blocks to `Sink` 580 connectBlocksToSink(Blocks, Sink, IB); 581 } 582 } 583 584 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't 585 /// even have terminator. 586 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks, 587 BasicBlock *Sink, 588 RandomIRBuilder &IB) { 589 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1); 590 for (uint64_t i = 0; i < Blocks.size(); i++) { 591 // We have at least one block that directly goes to sink. 592 CFGToSink ToSink = (i == DirectSinkIdx) 593 ? CFGToSink::DirectSink 594 : static_cast<CFGToSink>(uniform<uint64_t>( 595 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1)); 596 BasicBlock *BB = Blocks[i]; 597 Function *F = BB->getParent(); 598 LLVMContext &C = F->getParent()->getContext(); 599 switch (ToSink) { 600 case CFGToSink::Return: { 601 Type *RetTy = F->getReturnType(); 602 Value *RetValue = nullptr; 603 if (!RetTy->isVoidTy()) 604 RetValue = 605 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy)); 606 ReturnInst::Create(C, RetValue, BB); 607 break; 608 } 609 case CFGToSink::DirectSink: { 610 BranchInst::Create(Sink, BB); 611 break; 612 } 613 case CFGToSink::SinkOrSelfLoop: { 614 SmallVector<BasicBlock *, 2> Branches({Sink, BB}); 615 // A coin decides which block is true branch. 616 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1); 617 Value *Cond = IB.findOrCreateSource( 618 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false); 619 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB); 620 break; 621 } 622 case CFGToSink::EndOfCFGToLink: 623 llvm_unreachable("EndOfCFGToLink executed, something's wrong."); 624 } 625 } 626 } 627 628 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 629 // Can't insert PHI node to entry node. 630 if (&BB == &BB.getParent()->getEntryBlock()) 631 return; 632 Type *Ty = IB.randomType(); 633 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", BB.begin()); 634 635 // Use a map to make sure the same incoming basic block has the same value. 636 DenseMap<BasicBlock *, Value *> IncomingValues; 637 for (BasicBlock *Pred : predecessors(&BB)) { 638 Value *Src = IncomingValues[Pred]; 639 // If `Pred` is not in the map yet, we'll get a nullptr. 640 if (!Src) { 641 SmallVector<Instruction *, 32> Insts; 642 for (auto I = Pred->begin(); I != Pred->end(); ++I) 643 Insts.push_back(&*I); 644 // There is no need to inform IB what previously used values are if we are 645 // using `onlyType` 646 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty)); 647 IncomingValues[Pred] = Src; 648 } 649 PHI->addIncoming(Src, Pred); 650 } 651 SmallVector<Instruction *, 32> InstsAfter( 652 llvm::make_pointer_range(getInsertionRange(BB))); 653 IB.connectToSink(BB, InstsAfter, PHI); 654 } 655 656 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) { 657 for (BasicBlock &BB : F) { 658 this->mutate(BB, IB); 659 } 660 } 661 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 662 SmallVector<Instruction *, 32> Insts( 663 llvm::make_pointer_range(getInsertionRange(BB))); 664 if (Insts.size() < 1) 665 return; 666 // Choose an Instruction to mutate. 667 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1); 668 Instruction *Inst = Insts[Idx]; 669 // `Idx + 1` so we don't sink to ourselves. 670 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1); 671 Type *Ty = Inst->getType(); 672 // Don't sink terminators, void function calls, token, etc. 673 if (!Ty->isVoidTy() && !Ty->isTokenTy()) 674 // Find a new sink and wire up the results of the operation. 675 IB.connectToSink(BB, InstsAfter, Inst); 676 } 677 678 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { 679 // A deterministic alternative to SmallPtrSet with the same lookup 680 // performance. 681 std::map<size_t, Instruction *> AliveInsts; 682 std::map<Instruction *, size_t> AliveInstsLookup; 683 size_t InsertIdx = 0; 684 for (auto &I : make_early_inc_range( 685 make_range(BB.getFirstInsertionPt(), 686 getEffectiveTerminator(BB)->getIterator()))) { 687 // First gather all instructions that can be shuffled. Don't take 688 // terminator. 689 AliveInsts.insert({InsertIdx, &I}); 690 AliveInstsLookup.insert({&I, InsertIdx++}); 691 // Then remove these instructions from the block 692 I.removeFromParent(); 693 } 694 695 // Shuffle these instructions using topological sort. 696 // Returns false if all current instruction's dependencies in this block have 697 // been shuffled. If so, this instruction can be shuffled too. 698 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) { 699 for (Value *O : AliveInsts[Index]->operands()) { 700 Instruction *P = dyn_cast<Instruction>(O); 701 if (P && AliveInstsLookup.count(P)) 702 return true; 703 } 704 return false; 705 }; 706 // Get all alive instructions that depend on the current instruction. 707 // Takes Instruction* instead of index because the instruction is already 708 // shuffled. 709 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) { 710 SmallSetVector<size_t, 8> Children; 711 for (Value *U : I->users()) { 712 if (Instruction *P = dyn_cast<Instruction>(U)) { 713 auto It = AliveInstsLookup.find(P); 714 if (It != AliveInstsLookup.end()) 715 Children.insert(It->second); 716 } 717 } 718 return Children; 719 }; 720 SmallSet<size_t, 8> RootIndices; 721 SmallVector<Instruction *, 8> Insts; 722 for (const auto &[Index, Inst] : AliveInsts) { 723 if (!hasAliveParent(Index)) 724 RootIndices.insert(Index); 725 } 726 // Topological sort by randomly selecting a node without a parent, or root. 727 while (!RootIndices.empty()) { 728 auto RS = makeSampler<size_t>(IB.Rand); 729 for (size_t RootIdx : RootIndices) 730 RS.sample(RootIdx, 1); 731 size_t RootIdx = RS.getSelection(); 732 733 RootIndices.erase(RootIdx); 734 Instruction *Root = AliveInsts[RootIdx]; 735 AliveInsts.erase(RootIdx); 736 AliveInstsLookup.erase(Root); 737 Insts.push_back(Root); 738 739 for (size_t Child : getAliveChildren(Root)) { 740 if (!hasAliveParent(Child)) { 741 RootIndices.insert(Child); 742 } 743 } 744 } 745 746 Instruction *Terminator = getEffectiveTerminator(BB); 747 // Then put instructions back. 748 for (Instruction *I : Insts) { 749 I->insertBefore(Terminator->getIterator()); 750 } 751 } 752 753 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size, 754 LLVMContext &Context) { 755 756 if (Size <= 1) 757 // We get bogus data given an empty corpus - just create a new module. 758 return std::make_unique<Module>("M", Context); 759 760 auto Buffer = MemoryBuffer::getMemBuffer( 761 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input", 762 /*RequiresNullTerminator=*/false); 763 764 SMDiagnostic Err; 765 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context); 766 if (Error E = M.takeError()) { 767 errs() << toString(std::move(E)) << "\n"; 768 return nullptr; 769 } 770 return std::move(M.get()); 771 } 772 773 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) { 774 std::string Buf; 775 { 776 raw_string_ostream OS(Buf); 777 WriteBitcodeToFile(M, OS); 778 } 779 if (Buf.size() > MaxSize) 780 return 0; 781 memcpy(Dest, Buf.data(), Buf.size()); 782 return Buf.size(); 783 } 784 785 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size, 786 LLVMContext &Context) { 787 auto M = parseModule(Data, Size, Context); 788 if (!M || verifyModule(*M, &errs())) 789 return nullptr; 790 791 return M; 792 } 793