1 //===-- RandomIRBuilder.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/FuzzMutate/RandomIRBuilder.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/FuzzMutate/OpDescriptor.h" 12 #include "llvm/FuzzMutate/Random.h" 13 #include "llvm/IR/BasicBlock.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/IR/DataLayout.h" 16 #include "llvm/IR/Dominators.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/IntrinsicInst.h" 20 #include "llvm/IR/Module.h" 21 22 using namespace llvm; 23 using namespace fuzzerop; 24 25 /// Return a vector of Blocks that dominates this block, excluding current 26 /// block. 27 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) { 28 std::vector<BasicBlock *> ret; 29 DominatorTree DT(*BB->getParent()); 30 DomTreeNode *Node = DT.getNode(BB); 31 // It's possible that an orphan block is not in the dom tree. In that case we 32 // just return nothing. 33 if (!Node) 34 return ret; 35 Node = Node->getIDom(); 36 while (Node && Node->getBlock()) { 37 ret.push_back(Node->getBlock()); 38 // Get parent block. 39 Node = Node->getIDom(); 40 } 41 return ret; 42 } 43 44 /// Return a vector of Blocks that is dominated by this block, excluding current 45 /// block 46 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) { 47 DominatorTree DT(*BB->getParent()); 48 std::vector<BasicBlock *> ret; 49 DomTreeNode *Parent = DT.getNode(BB); 50 // It's possible that an orphan block is not in the dom tree. In that case we 51 // just return nothing. 52 if (!Parent) 53 return ret; 54 for (DomTreeNode *Child : Parent->children()) 55 ret.push_back(Child->getBlock()); 56 uint64_t Idx = 0; 57 while (Idx < ret.size()) { 58 DomTreeNode *Node = DT[ret[Idx]]; 59 Idx++; 60 for (DomTreeNode *Child : Node->children()) 61 ret.push_back(Child->getBlock()); 62 } 63 return ret; 64 } 65 66 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty, 67 Value *Init) { 68 /// TODO: For all Allocas, maybe allocate an array. 69 BasicBlock *EntryBB = &F->getEntryBlock(); 70 DataLayout DL(F->getParent()); 71 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A", 72 &*EntryBB->getFirstInsertionPt()); 73 if (Init) 74 new StoreInst(Init, Alloca, Alloca->getNextNode()); 75 return Alloca; 76 } 77 78 std::pair<GlobalVariable *, bool> 79 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs, 80 fuzzerop::SourcePred Pred) { 81 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) { 82 // Can't directly compare GV's type, as it would be a pointer to the actual 83 // type. 84 return Pred.matches(Srcs, UndefValue::get(GV->getValueType())); 85 }; 86 bool DidCreate = false; 87 SmallVector<GlobalVariable *, 4> GlobalVars; 88 for (GlobalVariable &GV : M->globals()) { 89 GlobalVars.push_back(&GV); 90 } 91 auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred)); 92 RS.sample(nullptr, 1); 93 GlobalVariable *GV = RS.getSelection(); 94 if (!GV) { 95 DidCreate = true; 96 using LinkageTypes = GlobalVariable::LinkageTypes; 97 auto TRS = makeSampler<Constant *>(Rand); 98 TRS.sample(Pred.generate(Srcs, KnownTypes)); 99 Constant *Init = TRS.getSelection(); 100 Type *Ty = Init->getType(); 101 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init, 102 "G", nullptr, 103 GlobalValue::ThreadLocalMode::NotThreadLocal, 104 M->getDataLayout().getDefaultGlobalsAddressSpace()); 105 } 106 return {GV, DidCreate}; 107 } 108 109 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, 110 ArrayRef<Instruction *> Insts) { 111 return findOrCreateSource(BB, Insts, {}, anyType()); 112 } 113 114 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, 115 ArrayRef<Instruction *> Insts, 116 ArrayRef<Value *> Srcs, 117 SourcePred Pred, 118 bool allowConstant) { 119 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); }; 120 SmallVector<uint64_t, 8> SrcTys; 121 for (uint64_t i = 0; i < EndOfValueSource; i++) 122 SrcTys.push_back(i); 123 std::shuffle(SrcTys.begin(), SrcTys.end(), Rand); 124 for (uint64_t SrcTy : SrcTys) { 125 switch (SrcTy) { 126 case SrcFromInstInCurBlock: { 127 auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); 128 if (!RS.isEmpty()) { 129 return RS.getSelection(); 130 } 131 break; 132 } 133 case FunctionArgument: { 134 Function *F = BB.getParent(); 135 SmallVector<Argument *, 8> Args; 136 for (uint64_t i = 0; i < F->arg_size(); i++) { 137 Args.push_back(F->getArg(i)); 138 } 139 auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred)); 140 if (!RS.isEmpty()) { 141 return RS.getSelection(); 142 } 143 break; 144 } 145 case InstInDominator: { 146 auto Dominators = getDominators(&BB); 147 std::shuffle(Dominators.begin(), Dominators.end(), Rand); 148 for (BasicBlock *Dom : Dominators) { 149 SmallVector<Instruction *, 16> Instructions; 150 for (Instruction &I : *Dom) { 151 Instructions.push_back(&I); 152 } 153 auto RS = 154 makeSampler(Rand, make_filter_range(Instructions, MatchesPred)); 155 // Also consider choosing no source, meaning we want a new one. 156 if (!RS.isEmpty()) { 157 return RS.getSelection(); 158 } 159 } 160 break; 161 } 162 case SrcFromGlobalVariable: { 163 Module *M = BB.getParent()->getParent(); 164 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred); 165 Type *Ty = GV->getValueType(); 166 LoadInst *LoadGV = nullptr; 167 if (BB.getTerminator()) { 168 LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt()); 169 } else { 170 LoadGV = new LoadInst(Ty, GV, "LGV", &BB); 171 } 172 // Because we might be generating new values, we have to check if it 173 // matches again. 174 if (DidCreate) { 175 if (Pred.matches(Srcs, LoadGV)) { 176 return LoadGV; 177 } 178 LoadGV->eraseFromParent(); 179 // If no one is using this GlobalVariable, delete it too. 180 if (GV->use_empty()) { 181 GV->eraseFromParent(); 182 } 183 } 184 break; 185 } 186 case NewConstOrStack: { 187 return newSource(BB, Insts, Srcs, Pred, allowConstant); 188 } 189 default: 190 case EndOfValueSource: { 191 llvm_unreachable("EndOfValueSource executed"); 192 } 193 } 194 } 195 llvm_unreachable("Can't find a source"); 196 } 197 198 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts, 199 ArrayRef<Value *> Srcs, SourcePred Pred, 200 bool allowConstant) { 201 // Generate some constants to choose from. 202 auto RS = makeSampler<Value *>(Rand); 203 RS.sample(Pred.generate(Srcs, KnownTypes)); 204 205 // If we can find a pointer to load from, use it half the time. 206 Value *Ptr = findPointer(BB, Insts); 207 if (Ptr) { 208 // Create load from the chosen pointer 209 auto IP = BB.getFirstInsertionPt(); 210 if (auto *I = dyn_cast<Instruction>(Ptr)) { 211 IP = ++I->getIterator(); 212 assert(IP != BB.end() && "guaranteed by the findPointer"); 213 } 214 // Pick the type independently. 215 Type *AccessTy = RS.getSelection()->getType(); 216 auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP); 217 218 // Only sample this load if it really matches the descriptor 219 if (Pred.matches(Srcs, NewLoad)) 220 RS.sample(NewLoad, RS.totalWeight()); 221 else 222 NewLoad->eraseFromParent(); 223 } 224 225 Value *newSrc = RS.getSelection(); 226 // Generate a stack alloca and store the constant to it if constant is not 227 // allowed, our hope is that later mutations can generate some values and 228 // store to this placeholder. 229 if (!allowConstant && isa<Constant>(newSrc)) { 230 Type *Ty = newSrc->getType(); 231 Function *F = BB.getParent(); 232 AllocaInst *Alloca = createStackMemory(F, Ty, newSrc); 233 if (BB.getTerminator()) { 234 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator()); 235 } else { 236 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB); 237 } 238 } 239 return newSrc; 240 } 241 242 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, 243 const Value *Replacement) { 244 unsigned int OperandNo = Operand.getOperandNo(); 245 if (Operand->getType() != Replacement->getType()) 246 return false; 247 switch (I->getOpcode()) { 248 case Instruction::GetElementPtr: 249 case Instruction::ExtractElement: 250 case Instruction::ExtractValue: 251 // TODO: We could potentially validate these, but for now just leave indices 252 // alone. 253 if (OperandNo >= 1) 254 return false; 255 break; 256 case Instruction::InsertValue: 257 case Instruction::InsertElement: 258 case Instruction::ShuffleVector: 259 if (OperandNo >= 2) 260 return false; 261 break; 262 // For Br/Switch, we only try to modify the 1st Operand (condition). 263 // Modify other operands, like switch case may accidently change case from 264 // ConstantInt to a register, which is illegal. 265 case Instruction::Switch: 266 case Instruction::Br: 267 if (OperandNo >= 1) 268 return false; 269 break; 270 case Instruction::Call: 271 case Instruction::Invoke: 272 case Instruction::CallBr: { 273 const Function *Callee = cast<CallBase>(I)->getCalledFunction(); 274 // If it's an indirect call, give up. 275 if (!Callee) 276 return false; 277 // If callee is not an intrinsic, operand 0 is the function to be called. 278 // Since we cannot assume that the replacement is a function pointer, 279 // we give up. 280 if (!Callee->getIntrinsicID() && OperandNo == 0) 281 return false; 282 return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg); 283 } 284 default: 285 break; 286 } 287 return true; 288 } 289 290 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB, 291 ArrayRef<Instruction *> Insts, 292 Value *V) { 293 SmallVector<uint64_t, 8> SinkTys; 294 for (uint64_t i = 0; i < EndOfValueSink; i++) 295 SinkTys.push_back(i); 296 std::shuffle(SinkTys.begin(), SinkTys.end(), Rand); 297 auto findSinkAndConnect = 298 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * { 299 auto RS = makeSampler<Use *>(Rand); 300 for (auto &I : Instructions) { 301 for (Use &U : I->operands()) 302 if (isCompatibleReplacement(I, U, V)) 303 RS.sample(&U, 1); 304 } 305 if (!RS.isEmpty()) { 306 Use *Sink = RS.getSelection(); 307 User *U = Sink->getUser(); 308 unsigned OpNo = Sink->getOperandNo(); 309 U->setOperand(OpNo, V); 310 return cast<Instruction>(U); 311 } 312 return nullptr; 313 }; 314 Instruction *Sink = nullptr; 315 for (uint64_t SinkTy : SinkTys) { 316 switch (SinkTy) { 317 case SinkToInstInCurBlock: 318 Sink = findSinkAndConnect(Insts); 319 if (Sink) 320 return Sink; 321 break; 322 case PointersInDominator: { 323 auto Dominators = getDominators(&BB); 324 std::shuffle(Dominators.begin(), Dominators.end(), Rand); 325 for (BasicBlock *Dom : Dominators) { 326 for (Instruction &I : *Dom) { 327 if (isa<PointerType>(I.getType())) 328 return new StoreInst(V, &I, Insts.back()); 329 } 330 } 331 break; 332 } 333 case InstInDominatee: { 334 auto Dominatees = getDominatees(&BB); 335 std::shuffle(Dominatees.begin(), Dominatees.end(), Rand); 336 for (BasicBlock *Dominee : Dominatees) { 337 std::vector<Instruction *> Instructions; 338 for (Instruction &I : *Dominee) 339 Instructions.push_back(&I); 340 Sink = findSinkAndConnect(Instructions); 341 if (Sink) { 342 return Sink; 343 } 344 } 345 break; 346 } 347 case NewStore: 348 /// TODO: allocate a new stack memory. 349 return newSink(BB, Insts, V); 350 case SinkToGlobalVariable: { 351 Module *M = BB.getParent()->getParent(); 352 auto [GV, DidCreate] = 353 findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType())); 354 return new StoreInst(V, GV, Insts.back()); 355 } 356 case EndOfValueSink: 357 default: 358 llvm_unreachable("EndOfValueSink executed"); 359 } 360 } 361 llvm_unreachable("Can't find a sink"); 362 } 363 364 Instruction *RandomIRBuilder::newSink(BasicBlock &BB, 365 ArrayRef<Instruction *> Insts, Value *V) { 366 Value *Ptr = findPointer(BB, Insts); 367 if (!Ptr) { 368 if (uniform(Rand, 0, 1)) { 369 Type *Ty = V->getType(); 370 Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty)); 371 } else { 372 Ptr = UndefValue::get(PointerType::get(V->getType(), 0)); 373 } 374 } 375 376 return new StoreInst(V, Ptr, Insts.back()); 377 } 378 379 Value *RandomIRBuilder::findPointer(BasicBlock &BB, 380 ArrayRef<Instruction *> Insts) { 381 auto IsMatchingPtr = [](Instruction *Inst) { 382 // Invoke instructions sometimes produce valid pointers but currently 383 // we can't insert loads or stores from them 384 if (Inst->isTerminator()) 385 return false; 386 387 return Inst->getType()->isPointerTy(); 388 }; 389 if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) 390 return RS.getSelection(); 391 return nullptr; 392 } 393 394 Type *RandomIRBuilder::randomType() { 395 uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1); 396 return KnownTypes[TyIdx]; 397 } 398 399 Function *RandomIRBuilder::createFunctionDeclaration(Module &M, 400 uint64_t ArgNum) { 401 Type *RetType = randomType(); 402 403 SmallVector<Type *, 2> Args; 404 for (uint64_t i = 0; i < ArgNum; i++) { 405 Args.push_back(randomType()); 406 } 407 408 Function *F = Function::Create(FunctionType::get(RetType, Args, 409 /*isVarArg=*/false), 410 GlobalValue::ExternalLinkage, "f", &M); 411 return F; 412 } 413 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) { 414 return createFunctionDeclaration( 415 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); 416 } 417 418 Function *RandomIRBuilder::createFunctionDefinition(Module &M, 419 uint64_t ArgNum) { 420 Function *F = this->createFunctionDeclaration(M, ArgNum); 421 422 // TODO: Some arguments and a return value would probably be more 423 // interesting. 424 LLVMContext &Context = M.getContext(); 425 DataLayout DL(&M); 426 BasicBlock *BB = BasicBlock::Create(Context, "BB", F); 427 Type *RetTy = F->getReturnType(); 428 if (RetTy != Type::getVoidTy(Context)) { 429 Instruction *RetAlloca = 430 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB); 431 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB); 432 ReturnInst::Create(Context, RetLoad, BB); 433 } else { 434 ReturnInst::Create(Context, BB); 435 } 436 437 return F; 438 } 439 Function *RandomIRBuilder::createFunctionDefinition(Module &M) { 440 return createFunctionDefinition( 441 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); 442 } 443