1 //===-- RandomIRBuilder.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/FuzzMutate/RandomIRBuilder.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/FuzzMutate/OpDescriptor.h" 12 #include "llvm/FuzzMutate/Random.h" 13 #include "llvm/IR/BasicBlock.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/IR/DataLayout.h" 16 #include "llvm/IR/Dominators.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/Module.h" 20 21 using namespace llvm; 22 using namespace fuzzerop; 23 24 /// Return a vector of Blocks that dominates this block, excluding current 25 /// block. 26 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) { 27 std::vector<BasicBlock *> ret; 28 DominatorTree DT(*BB->getParent()); 29 DomTreeNode *Node = DT.getNode(BB); 30 // It's possible that an orphan block is not in the dom tree. In that case we 31 // just return nothing. 32 if (!Node) 33 return ret; 34 Node = Node->getIDom(); 35 while (Node && Node->getBlock()) { 36 ret.push_back(Node->getBlock()); 37 // Get parent block. 38 Node = Node->getIDom(); 39 } 40 return ret; 41 } 42 43 /// Return a vector of Blocks that is dominated by this block, excluding current 44 /// block 45 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) { 46 DominatorTree DT(*BB->getParent()); 47 std::vector<BasicBlock *> ret; 48 DomTreeNode *Parent = DT.getNode(BB); 49 // It's possible that an orphan block is not in the dom tree. In that case we 50 // just return nothing. 51 if (!Parent) 52 return ret; 53 for (DomTreeNode *Child : Parent->children()) 54 ret.push_back(Child->getBlock()); 55 uint64_t Idx = 0; 56 while (Idx < ret.size()) { 57 DomTreeNode *Node = DT[ret[Idx]]; 58 Idx++; 59 for (DomTreeNode *Child : Node->children()) 60 ret.push_back(Child->getBlock()); 61 } 62 return ret; 63 } 64 65 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty, 66 Value *Init) { 67 /// TODO: For all Allocas, maybe allocate an array. 68 BasicBlock *EntryBB = &F->getEntryBlock(); 69 const DataLayout &DL = F->getDataLayout(); 70 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A", 71 EntryBB->getFirstInsertionPt()); 72 if (Init) 73 new StoreInst(Init, Alloca, std::next(Alloca->getIterator())); 74 return Alloca; 75 } 76 77 std::pair<GlobalVariable *, bool> 78 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs, 79 fuzzerop::SourcePred Pred) { 80 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) { 81 // Can't directly compare GV's type, as it would be a pointer to the actual 82 // type. 83 return Pred.matches(Srcs, PoisonValue::get(GV->getValueType())); 84 }; 85 bool DidCreate = false; 86 SmallVector<GlobalVariable *, 4> GlobalVars( 87 llvm::make_pointer_range(M->globals())); 88 auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred)); 89 RS.sample(nullptr, 1); 90 GlobalVariable *GV = RS.getSelection(); 91 if (!GV) { 92 DidCreate = true; 93 using LinkageTypes = GlobalVariable::LinkageTypes; 94 auto TRS = makeSampler<Constant *>(Rand); 95 TRS.sample(Pred.generate(Srcs, KnownTypes)); 96 Constant *Init = TRS.getSelection(); 97 Type *Ty = Init->getType(); 98 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init, 99 "G", nullptr, 100 GlobalValue::ThreadLocalMode::NotThreadLocal, 101 M->getDataLayout().getDefaultGlobalsAddressSpace()); 102 } 103 return {GV, DidCreate}; 104 } 105 106 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, 107 ArrayRef<Instruction *> Insts) { 108 return findOrCreateSource(BB, Insts, {}, anyType()); 109 } 110 111 // Adapts the current pointer for a legal mem operation on the target arch. 112 static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP, 113 const Twine &Name, 114 SmallVector<Instruction *> *NewInsts) { 115 if (M && M->getTargetTriple().isAMDGCN()) { 116 // Check if we should perform an address space cast 117 PointerType *pointerType = dyn_cast<PointerType>(Ptr->getType()); 118 if (pointerType && pointerType->getAddressSpace() == 8) { 119 // Perform address space cast from address space 8 to address space 7 120 auto NewPtr = new AddrSpaceCastInst( 121 Ptr, PointerType::get(M->getContext(), 7), Name + ".ASC", IP); 122 if (NewInsts) 123 NewInsts->push_back(NewPtr); 124 return NewPtr; 125 } 126 } 127 128 return Ptr; 129 } 130 131 // Stores a value to memory, considering the target triple's restrictions. 132 static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr, 133 InsertPosition IP, Module *M) { 134 Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, "", nullptr); 135 Instruction *Store = new StoreInst(Val, StorePtr, IP); 136 return Store; 137 } 138 139 // Loads a value from memory, considering the target triple's restrictions. 140 static std::pair<Instruction *, SmallVector<Instruction *>> 141 buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M, 142 const Twine &LoadName) { 143 SmallVector<Instruction *> NewInsts; 144 145 Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, LoadName, &NewInsts); 146 147 Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP); 148 NewInsts.push_back(Load); 149 150 return std::make_pair(Load, NewInsts); 151 } 152 153 static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) { 154 // Remove in reverse order (uses before defs) 155 for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) { 156 (*it)->eraseFromParent(); 157 } 158 } 159 160 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, 161 ArrayRef<Instruction *> Insts, 162 ArrayRef<Value *> Srcs, 163 SourcePred Pred, 164 bool allowConstant) { 165 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); }; 166 SmallVector<uint64_t, 8> SrcTys; 167 for (uint64_t i = 0; i < EndOfValueSource; i++) 168 SrcTys.push_back(i); 169 std::shuffle(SrcTys.begin(), SrcTys.end(), Rand); 170 for (uint64_t SrcTy : SrcTys) { 171 switch (SrcTy) { 172 case SrcFromInstInCurBlock: { 173 auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); 174 if (!RS.isEmpty()) { 175 return RS.getSelection(); 176 } 177 break; 178 } 179 case FunctionArgument: { 180 Function *F = BB.getParent(); 181 SmallVector<Argument *, 8> Args; 182 for (uint64_t i = 0; i < F->arg_size(); i++) { 183 Args.push_back(F->getArg(i)); 184 } 185 auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred)); 186 if (!RS.isEmpty()) { 187 return RS.getSelection(); 188 } 189 break; 190 } 191 case InstInDominator: { 192 auto Dominators = getDominators(&BB); 193 std::shuffle(Dominators.begin(), Dominators.end(), Rand); 194 for (BasicBlock *Dom : Dominators) { 195 SmallVector<Instruction *, 16> Instructions( 196 llvm::make_pointer_range(*Dom)); 197 auto RS = 198 makeSampler(Rand, make_filter_range(Instructions, MatchesPred)); 199 // Also consider choosing no source, meaning we want a new one. 200 if (!RS.isEmpty()) { 201 return RS.getSelection(); 202 } 203 } 204 break; 205 } 206 case SrcFromGlobalVariable: { 207 Module *M = BB.getParent()->getParent(); 208 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred); 209 Type *Ty = GV->getValueType(); 210 InsertPosition IP = BB.getTerminator() 211 ? InsertPosition(BB.getFirstInsertionPt()) 212 : InsertPosition(&BB); 213 // Build a legal load and track new instructions in case a rollback is 214 // needed. 215 auto [LoadGV, NewInsts] = buildTargetLegalLoad(Ty, GV, IP, M, "LGV"); 216 // Because we might be generating new values, we have to check if it 217 // matches again. 218 if (DidCreate) { 219 if (Pred.matches(Srcs, LoadGV)) { 220 return LoadGV; 221 } 222 // Remove newly inserted instructions 223 eraseNewInstructions(NewInsts); 224 // If no one is using this GlobalVariable, delete it too. 225 if (GV->use_empty()) { 226 GV->eraseFromParent(); 227 } 228 } 229 break; 230 } 231 case NewConstOrStack: { 232 return newSource(BB, Insts, Srcs, Pred, allowConstant); 233 } 234 default: 235 case EndOfValueSource: { 236 llvm_unreachable("EndOfValueSource executed"); 237 } 238 } 239 } 240 llvm_unreachable("Can't find a source"); 241 } 242 243 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts, 244 ArrayRef<Value *> Srcs, SourcePred Pred, 245 bool allowConstant) { 246 // Generate some constants to choose from. 247 auto RS = makeSampler<Value *>(Rand); 248 RS.sample(Pred.generate(Srcs, KnownTypes)); 249 250 // If we can find a pointer to load from, use it half the time. 251 Value *Ptr = findPointer(BB, Insts); 252 if (Ptr) { 253 // Create load from the chosen pointer 254 auto IP = BB.getFirstInsertionPt(); 255 if (auto *I = dyn_cast<Instruction>(Ptr)) { 256 IP = ++I->getIterator(); 257 assert(IP != BB.end() && "guaranteed by the findPointer"); 258 } 259 // Pick the type independently. 260 Type *AccessTy = RS.getSelection()->getType(); 261 // Build a legal load and track new instructions in case a rollback is 262 // needed. 263 auto [NewLoad, NewInsts] = 264 buildTargetLegalLoad(AccessTy, Ptr, IP, BB.getModule(), "L"); 265 266 // Only sample this load if it really matches the descriptor 267 if (Pred.matches(Srcs, NewLoad)) 268 RS.sample(NewLoad, RS.totalWeight()); 269 else { 270 // Remove newly inserted instructions 271 eraseNewInstructions(NewInsts); 272 } 273 } 274 275 Value *newSrc = RS.getSelection(); 276 // Generate a stack alloca and store the constant to it if constant is not 277 // allowed, our hope is that later mutations can generate some values and 278 // store to this placeholder. 279 if (!allowConstant && isa<Constant>(newSrc)) { 280 Type *Ty = newSrc->getType(); 281 Function *F = BB.getParent(); 282 AllocaInst *Alloca = createStackMemory(F, Ty, newSrc); 283 if (BB.getTerminator()) { 284 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", 285 BB.getTerminator()->getIterator()); 286 } else { 287 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB); 288 } 289 } 290 return newSrc; 291 } 292 293 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, 294 const Value *Replacement) { 295 unsigned int OperandNo = Operand.getOperandNo(); 296 if (Operand->getType() != Replacement->getType()) 297 return false; 298 switch (I->getOpcode()) { 299 case Instruction::GetElementPtr: 300 case Instruction::ExtractElement: 301 case Instruction::ExtractValue: 302 // TODO: We could potentially validate these, but for now just leave indices 303 // alone. 304 if (OperandNo >= 1) 305 return false; 306 break; 307 case Instruction::InsertValue: 308 case Instruction::InsertElement: 309 case Instruction::ShuffleVector: 310 if (OperandNo >= 2) 311 return false; 312 break; 313 // For Br/Switch, we only try to modify the 1st Operand (condition). 314 // Modify other operands, like switch case may accidently change case from 315 // ConstantInt to a register, which is illegal. 316 case Instruction::Switch: 317 case Instruction::Br: 318 if (OperandNo >= 1) 319 return false; 320 break; 321 case Instruction::Call: 322 case Instruction::Invoke: 323 case Instruction::CallBr: { 324 const Function *Callee = cast<CallBase>(I)->getCalledFunction(); 325 // If it's an indirect call, give up. 326 if (!Callee) 327 return false; 328 // If callee is not an intrinsic, operand 0 is the function to be called. 329 // Since we cannot assume that the replacement is a function pointer, 330 // we give up. 331 if (!Callee->getIntrinsicID() && OperandNo == 0) 332 return false; 333 return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg); 334 } 335 default: 336 break; 337 } 338 return true; 339 } 340 341 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB, 342 ArrayRef<Instruction *> Insts, 343 Value *V) { 344 SmallVector<uint64_t, 8> SinkTys; 345 for (uint64_t i = 0; i < EndOfValueSink; i++) 346 SinkTys.push_back(i); 347 std::shuffle(SinkTys.begin(), SinkTys.end(), Rand); 348 auto findSinkAndConnect = 349 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * { 350 auto RS = makeSampler<Use *>(Rand); 351 for (auto &I : Instructions) { 352 for (Use &U : I->operands()) 353 if (isCompatibleReplacement(I, U, V)) 354 RS.sample(&U, 1); 355 } 356 if (!RS.isEmpty()) { 357 Use *Sink = RS.getSelection(); 358 User *U = Sink->getUser(); 359 unsigned OpNo = Sink->getOperandNo(); 360 U->setOperand(OpNo, V); 361 return cast<Instruction>(U); 362 } 363 return nullptr; 364 }; 365 Instruction *Sink = nullptr; 366 for (uint64_t SinkTy : SinkTys) { 367 switch (SinkTy) { 368 case SinkToInstInCurBlock: 369 Sink = findSinkAndConnect(Insts); 370 if (Sink) 371 return Sink; 372 break; 373 case PointersInDominator: { 374 auto Dominators = getDominators(&BB); 375 std::shuffle(Dominators.begin(), Dominators.end(), Rand); 376 for (BasicBlock *Dom : Dominators) { 377 for (Instruction &I : *Dom) { 378 if (isa<PointerType>(I.getType())) { 379 return buildTargetLegalStore(V, &I, Insts.back()->getIterator(), 380 I.getModule()); 381 } 382 } 383 } 384 break; 385 } 386 case InstInDominatee: { 387 auto Dominatees = getDominatees(&BB); 388 std::shuffle(Dominatees.begin(), Dominatees.end(), Rand); 389 for (BasicBlock *Dominee : Dominatees) { 390 std::vector<Instruction *> Instructions; 391 for (Instruction &I : *Dominee) 392 Instructions.push_back(&I); 393 Sink = findSinkAndConnect(Instructions); 394 if (Sink) { 395 return Sink; 396 } 397 } 398 break; 399 } 400 case NewStore: 401 /// TODO: allocate a new stack memory. 402 return newSink(BB, Insts, V); 403 case SinkToGlobalVariable: { 404 Module *M = BB.getModule(); 405 auto [GV, DidCreate] = 406 findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType())); 407 return buildTargetLegalStore(V, GV, Insts.back()->getIterator(), M); 408 } 409 case EndOfValueSink: 410 default: 411 llvm_unreachable("EndOfValueSink executed"); 412 } 413 } 414 llvm_unreachable("Can't find a sink"); 415 } 416 417 Instruction *RandomIRBuilder::newSink(BasicBlock &BB, 418 ArrayRef<Instruction *> Insts, Value *V) { 419 Value *Ptr = findPointer(BB, Insts); 420 if (!Ptr) { 421 if (uniform(Rand, 0, 1)) { 422 Type *Ty = V->getType(); 423 Ptr = createStackMemory(BB.getParent(), Ty, PoisonValue::get(Ty)); 424 } else { 425 Ptr = PoisonValue::get(PointerType::get(V->getContext(), 0)); 426 } 427 } 428 429 return buildTargetLegalStore(V, Ptr, Insts.back()->getIterator(), 430 BB.getModule()); 431 } 432 433 Value *RandomIRBuilder::findPointer(BasicBlock &BB, 434 ArrayRef<Instruction *> Insts) { 435 auto IsMatchingPtr = [](Instruction *Inst) { 436 // Invoke instructions sometimes produce valid pointers but currently 437 // we can't insert loads or stores from them 438 if (Inst->isTerminator()) 439 return false; 440 441 return Inst->getType()->isPointerTy(); 442 }; 443 if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) 444 return RS.getSelection(); 445 return nullptr; 446 } 447 448 Type *RandomIRBuilder::randomType() { 449 uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1); 450 return KnownTypes[TyIdx]; 451 } 452 453 Function *RandomIRBuilder::createFunctionDeclaration(Module &M, 454 uint64_t ArgNum) { 455 Type *RetType = randomType(); 456 457 SmallVector<Type *, 2> Args; 458 for (uint64_t i = 0; i < ArgNum; i++) { 459 Args.push_back(randomType()); 460 } 461 462 Function *F = Function::Create(FunctionType::get(RetType, Args, 463 /*isVarArg=*/false), 464 GlobalValue::ExternalLinkage, "f", &M); 465 return F; 466 } 467 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) { 468 return createFunctionDeclaration( 469 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); 470 } 471 472 Function *RandomIRBuilder::createFunctionDefinition(Module &M, 473 uint64_t ArgNum) { 474 Function *F = this->createFunctionDeclaration(M, ArgNum); 475 476 // TODO: Some arguments and a return value would probably be more 477 // interesting. 478 LLVMContext &Context = M.getContext(); 479 const DataLayout &DL = M.getDataLayout(); 480 BasicBlock *BB = BasicBlock::Create(Context, "BB", F); 481 Type *RetTy = F->getReturnType(); 482 if (RetTy != Type::getVoidTy(Context)) { 483 Instruction *RetAlloca = 484 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB); 485 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB); 486 ReturnInst::Create(Context, RetLoad, BB); 487 } else { 488 ReturnInst::Create(Context, BB); 489 } 490 491 return F; 492 } 493 Function *RandomIRBuilder::createFunctionDefinition(Module &M) { 494 return createFunctionDefinition( 495 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); 496 } 497