1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/Module.h"
20
21 using namespace llvm;
22 using namespace fuzzerop;
23
24 /// Return a vector of Blocks that dominates this block, excluding current
25 /// block.
getDominators(BasicBlock * BB)26 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
27 std::vector<BasicBlock *> ret;
28 DominatorTree DT(*BB->getParent());
29 DomTreeNode *Node = DT.getNode(BB);
30 // It's possible that an orphan block is not in the dom tree. In that case we
31 // just return nothing.
32 if (!Node)
33 return ret;
34 Node = Node->getIDom();
35 while (Node && Node->getBlock()) {
36 ret.push_back(Node->getBlock());
37 // Get parent block.
38 Node = Node->getIDom();
39 }
40 return ret;
41 }
42
43 /// Return a vector of Blocks that is dominated by this block, excluding current
44 /// block
getDominatees(BasicBlock * BB)45 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
46 DominatorTree DT(*BB->getParent());
47 std::vector<BasicBlock *> ret;
48 DomTreeNode *Parent = DT.getNode(BB);
49 // It's possible that an orphan block is not in the dom tree. In that case we
50 // just return nothing.
51 if (!Parent)
52 return ret;
53 for (DomTreeNode *Child : Parent->children())
54 ret.push_back(Child->getBlock());
55 uint64_t Idx = 0;
56 while (Idx < ret.size()) {
57 DomTreeNode *Node = DT[ret[Idx]];
58 Idx++;
59 for (DomTreeNode *Child : Node->children())
60 ret.push_back(Child->getBlock());
61 }
62 return ret;
63 }
64
createStackMemory(Function * F,Type * Ty,Value * Init)65 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
66 Value *Init) {
67 /// TODO: For all Allocas, maybe allocate an array.
68 BasicBlock *EntryBB = &F->getEntryBlock();
69 const DataLayout &DL = F->getDataLayout();
70 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
71 EntryBB->getFirstInsertionPt());
72 if (Init)
73 new StoreInst(Init, Alloca, std::next(Alloca->getIterator()));
74 return Alloca;
75 }
76
77 std::pair<GlobalVariable *, bool>
findOrCreateGlobalVariable(Module * M,ArrayRef<Value * > Srcs,fuzzerop::SourcePred Pred)78 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
79 fuzzerop::SourcePred Pred) {
80 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
81 // Can't directly compare GV's type, as it would be a pointer to the actual
82 // type.
83 return Pred.matches(Srcs, PoisonValue::get(GV->getValueType()));
84 };
85 bool DidCreate = false;
86 SmallVector<GlobalVariable *, 4> GlobalVars(
87 llvm::make_pointer_range(M->globals()));
88 auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
89 RS.sample(nullptr, 1);
90 GlobalVariable *GV = RS.getSelection();
91 if (!GV) {
92 DidCreate = true;
93 using LinkageTypes = GlobalVariable::LinkageTypes;
94 auto TRS = makeSampler<Constant *>(Rand);
95 TRS.sample(Pred.generate(Srcs, KnownTypes));
96 Constant *Init = TRS.getSelection();
97 Type *Ty = Init->getType();
98 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
99 "G", nullptr,
100 GlobalValue::ThreadLocalMode::NotThreadLocal,
101 M->getDataLayout().getDefaultGlobalsAddressSpace());
102 }
103 return {GV, DidCreate};
104 }
105
findOrCreateSource(BasicBlock & BB,ArrayRef<Instruction * > Insts)106 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
107 ArrayRef<Instruction *> Insts) {
108 return findOrCreateSource(BB, Insts, {}, anyType());
109 }
110
111 // Adapts the current pointer for a legal mem operation on the target arch.
buildTargetLegalPtr(Module * M,Value * Ptr,InsertPosition IP,const Twine & Name,SmallVector<Instruction * > * NewInsts)112 static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP,
113 const Twine &Name,
114 SmallVector<Instruction *> *NewInsts) {
115 if (M && M->getTargetTriple().isAMDGCN()) {
116 // Check if we should perform an address space cast
117 PointerType *pointerType = dyn_cast<PointerType>(Ptr->getType());
118 if (pointerType && pointerType->getAddressSpace() == 8) {
119 // Perform address space cast from address space 8 to address space 7
120 auto NewPtr = new AddrSpaceCastInst(
121 Ptr, PointerType::get(M->getContext(), 7), Name + ".ASC", IP);
122 if (NewInsts)
123 NewInsts->push_back(NewPtr);
124 return NewPtr;
125 }
126 }
127
128 return Ptr;
129 }
130
131 // Stores a value to memory, considering the target triple's restrictions.
buildTargetLegalStore(Value * Val,Value * Ptr,InsertPosition IP,Module * M)132 static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr,
133 InsertPosition IP, Module *M) {
134 Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, "", nullptr);
135 Instruction *Store = new StoreInst(Val, StorePtr, IP);
136 return Store;
137 }
138
139 // Loads a value from memory, considering the target triple's restrictions.
140 static std::pair<Instruction *, SmallVector<Instruction *>>
buildTargetLegalLoad(Type * AccessTy,Value * Ptr,InsertPosition IP,Module * M,const Twine & LoadName)141 buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
142 const Twine &LoadName) {
143 SmallVector<Instruction *> NewInsts;
144
145 Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, LoadName, &NewInsts);
146
147 Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
148 NewInsts.push_back(Load);
149
150 return std::make_pair(Load, NewInsts);
151 }
152
eraseNewInstructions(SmallVector<Instruction * > & NewInsts)153 static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
154 // Remove in reverse order (uses before defs)
155 for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) {
156 (*it)->eraseFromParent();
157 }
158 }
159
findOrCreateSource(BasicBlock & BB,ArrayRef<Instruction * > Insts,ArrayRef<Value * > Srcs,SourcePred Pred,bool allowConstant)160 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
161 ArrayRef<Instruction *> Insts,
162 ArrayRef<Value *> Srcs,
163 SourcePred Pred,
164 bool allowConstant) {
165 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
166 SmallVector<uint64_t, 8> SrcTys;
167 for (uint64_t i = 0; i < EndOfValueSource; i++)
168 SrcTys.push_back(i);
169 std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
170 for (uint64_t SrcTy : SrcTys) {
171 switch (SrcTy) {
172 case SrcFromInstInCurBlock: {
173 auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
174 if (!RS.isEmpty()) {
175 return RS.getSelection();
176 }
177 break;
178 }
179 case FunctionArgument: {
180 Function *F = BB.getParent();
181 SmallVector<Argument *, 8> Args;
182 for (uint64_t i = 0; i < F->arg_size(); i++) {
183 Args.push_back(F->getArg(i));
184 }
185 auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
186 if (!RS.isEmpty()) {
187 return RS.getSelection();
188 }
189 break;
190 }
191 case InstInDominator: {
192 auto Dominators = getDominators(&BB);
193 std::shuffle(Dominators.begin(), Dominators.end(), Rand);
194 for (BasicBlock *Dom : Dominators) {
195 SmallVector<Instruction *, 16> Instructions(
196 llvm::make_pointer_range(*Dom));
197 auto RS =
198 makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
199 // Also consider choosing no source, meaning we want a new one.
200 if (!RS.isEmpty()) {
201 return RS.getSelection();
202 }
203 }
204 break;
205 }
206 case SrcFromGlobalVariable: {
207 Module *M = BB.getParent()->getParent();
208 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
209 Type *Ty = GV->getValueType();
210 InsertPosition IP = BB.getTerminator()
211 ? InsertPosition(BB.getFirstInsertionPt())
212 : InsertPosition(&BB);
213 // Build a legal load and track new instructions in case a rollback is
214 // needed.
215 auto [LoadGV, NewInsts] = buildTargetLegalLoad(Ty, GV, IP, M, "LGV");
216 // Because we might be generating new values, we have to check if it
217 // matches again.
218 if (DidCreate) {
219 if (Pred.matches(Srcs, LoadGV)) {
220 return LoadGV;
221 }
222 // Remove newly inserted instructions
223 eraseNewInstructions(NewInsts);
224 // If no one is using this GlobalVariable, delete it too.
225 if (GV->use_empty()) {
226 GV->eraseFromParent();
227 }
228 }
229 break;
230 }
231 case NewConstOrStack: {
232 return newSource(BB, Insts, Srcs, Pred, allowConstant);
233 }
234 default:
235 case EndOfValueSource: {
236 llvm_unreachable("EndOfValueSource executed");
237 }
238 }
239 }
240 llvm_unreachable("Can't find a source");
241 }
242
newSource(BasicBlock & BB,ArrayRef<Instruction * > Insts,ArrayRef<Value * > Srcs,SourcePred Pred,bool allowConstant)243 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
244 ArrayRef<Value *> Srcs, SourcePred Pred,
245 bool allowConstant) {
246 // Generate some constants to choose from.
247 auto RS = makeSampler<Value *>(Rand);
248 RS.sample(Pred.generate(Srcs, KnownTypes));
249
250 // If we can find a pointer to load from, use it half the time.
251 Value *Ptr = findPointer(BB, Insts);
252 if (Ptr) {
253 // Create load from the chosen pointer
254 auto IP = BB.getFirstInsertionPt();
255 if (auto *I = dyn_cast<Instruction>(Ptr)) {
256 IP = ++I->getIterator();
257 assert(IP != BB.end() && "guaranteed by the findPointer");
258 }
259 // Pick the type independently.
260 Type *AccessTy = RS.getSelection()->getType();
261 // Build a legal load and track new instructions in case a rollback is
262 // needed.
263 auto [NewLoad, NewInsts] =
264 buildTargetLegalLoad(AccessTy, Ptr, IP, BB.getModule(), "L");
265
266 // Only sample this load if it really matches the descriptor
267 if (Pred.matches(Srcs, NewLoad))
268 RS.sample(NewLoad, RS.totalWeight());
269 else {
270 // Remove newly inserted instructions
271 eraseNewInstructions(NewInsts);
272 }
273 }
274
275 Value *newSrc = RS.getSelection();
276 // Generate a stack alloca and store the constant to it if constant is not
277 // allowed, our hope is that later mutations can generate some values and
278 // store to this placeholder.
279 if (!allowConstant && isa<Constant>(newSrc)) {
280 Type *Ty = newSrc->getType();
281 Function *F = BB.getParent();
282 AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
283 if (BB.getTerminator()) {
284 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
285 BB.getTerminator()->getIterator());
286 } else {
287 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
288 }
289 }
290 return newSrc;
291 }
292
isCompatibleReplacement(const Instruction * I,const Use & Operand,const Value * Replacement)293 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
294 const Value *Replacement) {
295 unsigned int OperandNo = Operand.getOperandNo();
296 if (Operand->getType() != Replacement->getType())
297 return false;
298 switch (I->getOpcode()) {
299 case Instruction::GetElementPtr:
300 case Instruction::ExtractElement:
301 case Instruction::ExtractValue:
302 // TODO: We could potentially validate these, but for now just leave indices
303 // alone.
304 if (OperandNo >= 1)
305 return false;
306 break;
307 case Instruction::InsertValue:
308 case Instruction::InsertElement:
309 case Instruction::ShuffleVector:
310 if (OperandNo >= 2)
311 return false;
312 break;
313 // For Br/Switch, we only try to modify the 1st Operand (condition).
314 // Modify other operands, like switch case may accidently change case from
315 // ConstantInt to a register, which is illegal.
316 case Instruction::Switch:
317 case Instruction::Br:
318 if (OperandNo >= 1)
319 return false;
320 break;
321 case Instruction::Call:
322 case Instruction::Invoke:
323 case Instruction::CallBr: {
324 const Function *Callee = cast<CallBase>(I)->getCalledFunction();
325 // If it's an indirect call, give up.
326 if (!Callee)
327 return false;
328 // If callee is not an intrinsic, operand 0 is the function to be called.
329 // Since we cannot assume that the replacement is a function pointer,
330 // we give up.
331 if (!Callee->getIntrinsicID() && OperandNo == 0)
332 return false;
333 return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
334 }
335 default:
336 break;
337 }
338 return true;
339 }
340
connectToSink(BasicBlock & BB,ArrayRef<Instruction * > Insts,Value * V)341 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
342 ArrayRef<Instruction *> Insts,
343 Value *V) {
344 SmallVector<uint64_t, 8> SinkTys;
345 for (uint64_t i = 0; i < EndOfValueSink; i++)
346 SinkTys.push_back(i);
347 std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
348 auto findSinkAndConnect =
349 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
350 auto RS = makeSampler<Use *>(Rand);
351 for (auto &I : Instructions) {
352 for (Use &U : I->operands())
353 if (isCompatibleReplacement(I, U, V))
354 RS.sample(&U, 1);
355 }
356 if (!RS.isEmpty()) {
357 Use *Sink = RS.getSelection();
358 User *U = Sink->getUser();
359 unsigned OpNo = Sink->getOperandNo();
360 U->setOperand(OpNo, V);
361 return cast<Instruction>(U);
362 }
363 return nullptr;
364 };
365 Instruction *Sink = nullptr;
366 for (uint64_t SinkTy : SinkTys) {
367 switch (SinkTy) {
368 case SinkToInstInCurBlock:
369 Sink = findSinkAndConnect(Insts);
370 if (Sink)
371 return Sink;
372 break;
373 case PointersInDominator: {
374 auto Dominators = getDominators(&BB);
375 std::shuffle(Dominators.begin(), Dominators.end(), Rand);
376 for (BasicBlock *Dom : Dominators) {
377 for (Instruction &I : *Dom) {
378 if (isa<PointerType>(I.getType())) {
379 return buildTargetLegalStore(V, &I, Insts.back()->getIterator(),
380 I.getModule());
381 }
382 }
383 }
384 break;
385 }
386 case InstInDominatee: {
387 auto Dominatees = getDominatees(&BB);
388 std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
389 for (BasicBlock *Dominee : Dominatees) {
390 std::vector<Instruction *> Instructions;
391 for (Instruction &I : *Dominee)
392 Instructions.push_back(&I);
393 Sink = findSinkAndConnect(Instructions);
394 if (Sink) {
395 return Sink;
396 }
397 }
398 break;
399 }
400 case NewStore:
401 /// TODO: allocate a new stack memory.
402 return newSink(BB, Insts, V);
403 case SinkToGlobalVariable: {
404 Module *M = BB.getModule();
405 auto [GV, DidCreate] =
406 findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
407 return buildTargetLegalStore(V, GV, Insts.back()->getIterator(), M);
408 }
409 case EndOfValueSink:
410 default:
411 llvm_unreachable("EndOfValueSink executed");
412 }
413 }
414 llvm_unreachable("Can't find a sink");
415 }
416
newSink(BasicBlock & BB,ArrayRef<Instruction * > Insts,Value * V)417 Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
418 ArrayRef<Instruction *> Insts, Value *V) {
419 Value *Ptr = findPointer(BB, Insts);
420 if (!Ptr) {
421 if (uniform(Rand, 0, 1)) {
422 Type *Ty = V->getType();
423 Ptr = createStackMemory(BB.getParent(), Ty, PoisonValue::get(Ty));
424 } else {
425 Ptr = PoisonValue::get(PointerType::get(V->getContext(), 0));
426 }
427 }
428
429 return buildTargetLegalStore(V, Ptr, Insts.back()->getIterator(),
430 BB.getModule());
431 }
432
findPointer(BasicBlock & BB,ArrayRef<Instruction * > Insts)433 Value *RandomIRBuilder::findPointer(BasicBlock &BB,
434 ArrayRef<Instruction *> Insts) {
435 auto IsMatchingPtr = [](Instruction *Inst) {
436 // Invoke instructions sometimes produce valid pointers but currently
437 // we can't insert loads or stores from them
438 if (Inst->isTerminator())
439 return false;
440
441 return Inst->getType()->isPointerTy();
442 };
443 if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
444 return RS.getSelection();
445 return nullptr;
446 }
447
randomType()448 Type *RandomIRBuilder::randomType() {
449 uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
450 return KnownTypes[TyIdx];
451 }
452
createFunctionDeclaration(Module & M,uint64_t ArgNum)453 Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
454 uint64_t ArgNum) {
455 Type *RetType = randomType();
456
457 SmallVector<Type *, 2> Args;
458 for (uint64_t i = 0; i < ArgNum; i++) {
459 Args.push_back(randomType());
460 }
461
462 Function *F = Function::Create(FunctionType::get(RetType, Args,
463 /*isVarArg=*/false),
464 GlobalValue::ExternalLinkage, "f", &M);
465 return F;
466 }
createFunctionDeclaration(Module & M)467 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
468 return createFunctionDeclaration(
469 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
470 }
471
createFunctionDefinition(Module & M,uint64_t ArgNum)472 Function *RandomIRBuilder::createFunctionDefinition(Module &M,
473 uint64_t ArgNum) {
474 Function *F = this->createFunctionDeclaration(M, ArgNum);
475
476 // TODO: Some arguments and a return value would probably be more
477 // interesting.
478 LLVMContext &Context = M.getContext();
479 const DataLayout &DL = M.getDataLayout();
480 BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
481 Type *RetTy = F->getReturnType();
482 if (RetTy != Type::getVoidTy(Context)) {
483 Instruction *RetAlloca =
484 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
485 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
486 ReturnInst::Create(Context, RetLoad, BB);
487 } else {
488 ReturnInst::Create(Context, BB);
489 }
490
491 return F;
492 }
createFunctionDefinition(Module & M)493 Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
494 return createFunctionDefinition(
495 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
496 }
497