1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/Module.h"
21
22 using namespace llvm;
23 using namespace fuzzerop;
24
25 /// Return a vector of Blocks that dominates this block, excluding current
26 /// block.
getDominators(BasicBlock * BB)27 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
28 std::vector<BasicBlock *> ret;
29 DominatorTree DT(*BB->getParent());
30 DomTreeNode *Node = DT.getNode(BB);
31 // It's possible that an orphan block is not in the dom tree. In that case we
32 // just return nothing.
33 if (!Node)
34 return ret;
35 Node = Node->getIDom();
36 while (Node && Node->getBlock()) {
37 ret.push_back(Node->getBlock());
38 // Get parent block.
39 Node = Node->getIDom();
40 }
41 return ret;
42 }
43
44 /// Return a vector of Blocks that is dominated by this block, excluding current
45 /// block
getDominatees(BasicBlock * BB)46 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
47 DominatorTree DT(*BB->getParent());
48 std::vector<BasicBlock *> ret;
49 DomTreeNode *Parent = DT.getNode(BB);
50 // It's possible that an orphan block is not in the dom tree. In that case we
51 // just return nothing.
52 if (!Parent)
53 return ret;
54 for (DomTreeNode *Child : Parent->children())
55 ret.push_back(Child->getBlock());
56 uint64_t Idx = 0;
57 while (Idx < ret.size()) {
58 DomTreeNode *Node = DT[ret[Idx]];
59 Idx++;
60 for (DomTreeNode *Child : Node->children())
61 ret.push_back(Child->getBlock());
62 }
63 return ret;
64 }
65
createStackMemory(Function * F,Type * Ty,Value * Init)66 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
67 Value *Init) {
68 /// TODO: For all Allocas, maybe allocate an array.
69 BasicBlock *EntryBB = &F->getEntryBlock();
70 DataLayout DL(F->getParent());
71 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
72 &*EntryBB->getFirstInsertionPt());
73 if (Init)
74 new StoreInst(Init, Alloca, Alloca->getNextNode());
75 return Alloca;
76 }
77
78 std::pair<GlobalVariable *, bool>
findOrCreateGlobalVariable(Module * M,ArrayRef<Value * > Srcs,fuzzerop::SourcePred Pred)79 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
80 fuzzerop::SourcePred Pred) {
81 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
82 // Can't directly compare GV's type, as it would be a pointer to the actual
83 // type.
84 return Pred.matches(Srcs, UndefValue::get(GV->getValueType()));
85 };
86 bool DidCreate = false;
87 SmallVector<GlobalVariable *, 4> GlobalVars;
88 for (GlobalVariable &GV : M->globals()) {
89 GlobalVars.push_back(&GV);
90 }
91 auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
92 RS.sample(nullptr, 1);
93 GlobalVariable *GV = RS.getSelection();
94 if (!GV) {
95 DidCreate = true;
96 using LinkageTypes = GlobalVariable::LinkageTypes;
97 auto TRS = makeSampler<Constant *>(Rand);
98 TRS.sample(Pred.generate(Srcs, KnownTypes));
99 Constant *Init = TRS.getSelection();
100 Type *Ty = Init->getType();
101 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
102 "G", nullptr,
103 GlobalValue::ThreadLocalMode::NotThreadLocal,
104 M->getDataLayout().getDefaultGlobalsAddressSpace());
105 }
106 return {GV, DidCreate};
107 }
108
findOrCreateSource(BasicBlock & BB,ArrayRef<Instruction * > Insts)109 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
110 ArrayRef<Instruction *> Insts) {
111 return findOrCreateSource(BB, Insts, {}, anyType());
112 }
113
findOrCreateSource(BasicBlock & BB,ArrayRef<Instruction * > Insts,ArrayRef<Value * > Srcs,SourcePred Pred,bool allowConstant)114 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
115 ArrayRef<Instruction *> Insts,
116 ArrayRef<Value *> Srcs,
117 SourcePred Pred,
118 bool allowConstant) {
119 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
120 SmallVector<uint64_t, 8> SrcTys;
121 for (uint64_t i = 0; i < EndOfValueSource; i++)
122 SrcTys.push_back(i);
123 std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
124 for (uint64_t SrcTy : SrcTys) {
125 switch (SrcTy) {
126 case SrcFromInstInCurBlock: {
127 auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
128 if (!RS.isEmpty()) {
129 return RS.getSelection();
130 }
131 break;
132 }
133 case FunctionArgument: {
134 Function *F = BB.getParent();
135 SmallVector<Argument *, 8> Args;
136 for (uint64_t i = 0; i < F->arg_size(); i++) {
137 Args.push_back(F->getArg(i));
138 }
139 auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
140 if (!RS.isEmpty()) {
141 return RS.getSelection();
142 }
143 break;
144 }
145 case InstInDominator: {
146 auto Dominators = getDominators(&BB);
147 std::shuffle(Dominators.begin(), Dominators.end(), Rand);
148 for (BasicBlock *Dom : Dominators) {
149 SmallVector<Instruction *, 16> Instructions;
150 for (Instruction &I : *Dom) {
151 Instructions.push_back(&I);
152 }
153 auto RS =
154 makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
155 // Also consider choosing no source, meaning we want a new one.
156 if (!RS.isEmpty()) {
157 return RS.getSelection();
158 }
159 }
160 break;
161 }
162 case SrcFromGlobalVariable: {
163 Module *M = BB.getParent()->getParent();
164 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
165 Type *Ty = GV->getValueType();
166 LoadInst *LoadGV = nullptr;
167 if (BB.getTerminator()) {
168 LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt());
169 } else {
170 LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
171 }
172 // Because we might be generating new values, we have to check if it
173 // matches again.
174 if (DidCreate) {
175 if (Pred.matches(Srcs, LoadGV)) {
176 return LoadGV;
177 }
178 LoadGV->eraseFromParent();
179 // If no one is using this GlobalVariable, delete it too.
180 if (GV->use_empty()) {
181 GV->eraseFromParent();
182 }
183 }
184 break;
185 }
186 case NewConstOrStack: {
187 return newSource(BB, Insts, Srcs, Pred, allowConstant);
188 }
189 default:
190 case EndOfValueSource: {
191 llvm_unreachable("EndOfValueSource executed");
192 }
193 }
194 }
195 llvm_unreachable("Can't find a source");
196 }
197
newSource(BasicBlock & BB,ArrayRef<Instruction * > Insts,ArrayRef<Value * > Srcs,SourcePred Pred,bool allowConstant)198 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
199 ArrayRef<Value *> Srcs, SourcePred Pred,
200 bool allowConstant) {
201 // Generate some constants to choose from.
202 auto RS = makeSampler<Value *>(Rand);
203 RS.sample(Pred.generate(Srcs, KnownTypes));
204
205 // If we can find a pointer to load from, use it half the time.
206 Value *Ptr = findPointer(BB, Insts);
207 if (Ptr) {
208 // Create load from the chosen pointer
209 auto IP = BB.getFirstInsertionPt();
210 if (auto *I = dyn_cast<Instruction>(Ptr)) {
211 IP = ++I->getIterator();
212 assert(IP != BB.end() && "guaranteed by the findPointer");
213 }
214 // Pick the type independently.
215 Type *AccessTy = RS.getSelection()->getType();
216 auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP);
217
218 // Only sample this load if it really matches the descriptor
219 if (Pred.matches(Srcs, NewLoad))
220 RS.sample(NewLoad, RS.totalWeight());
221 else
222 NewLoad->eraseFromParent();
223 }
224
225 Value *newSrc = RS.getSelection();
226 // Generate a stack alloca and store the constant to it if constant is not
227 // allowed, our hope is that later mutations can generate some values and
228 // store to this placeholder.
229 if (!allowConstant && isa<Constant>(newSrc)) {
230 Type *Ty = newSrc->getType();
231 Function *F = BB.getParent();
232 AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
233 if (BB.getTerminator()) {
234 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator());
235 } else {
236 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
237 }
238 }
239 return newSrc;
240 }
241
isCompatibleReplacement(const Instruction * I,const Use & Operand,const Value * Replacement)242 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
243 const Value *Replacement) {
244 unsigned int OperandNo = Operand.getOperandNo();
245 if (Operand->getType() != Replacement->getType())
246 return false;
247 switch (I->getOpcode()) {
248 case Instruction::GetElementPtr:
249 case Instruction::ExtractElement:
250 case Instruction::ExtractValue:
251 // TODO: We could potentially validate these, but for now just leave indices
252 // alone.
253 if (OperandNo >= 1)
254 return false;
255 break;
256 case Instruction::InsertValue:
257 case Instruction::InsertElement:
258 case Instruction::ShuffleVector:
259 if (OperandNo >= 2)
260 return false;
261 break;
262 // For Br/Switch, we only try to modify the 1st Operand (condition).
263 // Modify other operands, like switch case may accidently change case from
264 // ConstantInt to a register, which is illegal.
265 case Instruction::Switch:
266 case Instruction::Br:
267 if (OperandNo >= 1)
268 return false;
269 break;
270 case Instruction::Call:
271 case Instruction::Invoke:
272 case Instruction::CallBr: {
273 const Function *Callee = cast<CallBase>(I)->getCalledFunction();
274 // If it's an indirect call, give up.
275 if (!Callee)
276 return false;
277 // If callee is not an intrinsic, operand 0 is the function to be called.
278 // Since we cannot assume that the replacement is a function pointer,
279 // we give up.
280 if (!Callee->getIntrinsicID() && OperandNo == 0)
281 return false;
282 return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
283 }
284 default:
285 break;
286 }
287 return true;
288 }
289
connectToSink(BasicBlock & BB,ArrayRef<Instruction * > Insts,Value * V)290 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
291 ArrayRef<Instruction *> Insts,
292 Value *V) {
293 SmallVector<uint64_t, 8> SinkTys;
294 for (uint64_t i = 0; i < EndOfValueSink; i++)
295 SinkTys.push_back(i);
296 std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
297 auto findSinkAndConnect =
298 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
299 auto RS = makeSampler<Use *>(Rand);
300 for (auto &I : Instructions) {
301 for (Use &U : I->operands())
302 if (isCompatibleReplacement(I, U, V))
303 RS.sample(&U, 1);
304 }
305 if (!RS.isEmpty()) {
306 Use *Sink = RS.getSelection();
307 User *U = Sink->getUser();
308 unsigned OpNo = Sink->getOperandNo();
309 U->setOperand(OpNo, V);
310 return cast<Instruction>(U);
311 }
312 return nullptr;
313 };
314 Instruction *Sink = nullptr;
315 for (uint64_t SinkTy : SinkTys) {
316 switch (SinkTy) {
317 case SinkToInstInCurBlock:
318 Sink = findSinkAndConnect(Insts);
319 if (Sink)
320 return Sink;
321 break;
322 case PointersInDominator: {
323 auto Dominators = getDominators(&BB);
324 std::shuffle(Dominators.begin(), Dominators.end(), Rand);
325 for (BasicBlock *Dom : Dominators) {
326 for (Instruction &I : *Dom) {
327 if (isa<PointerType>(I.getType()))
328 return new StoreInst(V, &I, Insts.back());
329 }
330 }
331 break;
332 }
333 case InstInDominatee: {
334 auto Dominatees = getDominatees(&BB);
335 std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
336 for (BasicBlock *Dominee : Dominatees) {
337 std::vector<Instruction *> Instructions;
338 for (Instruction &I : *Dominee)
339 Instructions.push_back(&I);
340 Sink = findSinkAndConnect(Instructions);
341 if (Sink) {
342 return Sink;
343 }
344 }
345 break;
346 }
347 case NewStore:
348 /// TODO: allocate a new stack memory.
349 return newSink(BB, Insts, V);
350 case SinkToGlobalVariable: {
351 Module *M = BB.getParent()->getParent();
352 auto [GV, DidCreate] =
353 findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
354 return new StoreInst(V, GV, Insts.back());
355 }
356 case EndOfValueSink:
357 default:
358 llvm_unreachable("EndOfValueSink executed");
359 }
360 }
361 llvm_unreachable("Can't find a sink");
362 }
363
newSink(BasicBlock & BB,ArrayRef<Instruction * > Insts,Value * V)364 Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
365 ArrayRef<Instruction *> Insts, Value *V) {
366 Value *Ptr = findPointer(BB, Insts);
367 if (!Ptr) {
368 if (uniform(Rand, 0, 1)) {
369 Type *Ty = V->getType();
370 Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty));
371 } else {
372 Ptr = UndefValue::get(PointerType::get(V->getType(), 0));
373 }
374 }
375
376 return new StoreInst(V, Ptr, Insts.back());
377 }
378
findPointer(BasicBlock & BB,ArrayRef<Instruction * > Insts)379 Value *RandomIRBuilder::findPointer(BasicBlock &BB,
380 ArrayRef<Instruction *> Insts) {
381 auto IsMatchingPtr = [](Instruction *Inst) {
382 // Invoke instructions sometimes produce valid pointers but currently
383 // we can't insert loads or stores from them
384 if (Inst->isTerminator())
385 return false;
386
387 return Inst->getType()->isPointerTy();
388 };
389 if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
390 return RS.getSelection();
391 return nullptr;
392 }
393
randomType()394 Type *RandomIRBuilder::randomType() {
395 uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
396 return KnownTypes[TyIdx];
397 }
398
createFunctionDeclaration(Module & M,uint64_t ArgNum)399 Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
400 uint64_t ArgNum) {
401 Type *RetType = randomType();
402
403 SmallVector<Type *, 2> Args;
404 for (uint64_t i = 0; i < ArgNum; i++) {
405 Args.push_back(randomType());
406 }
407
408 Function *F = Function::Create(FunctionType::get(RetType, Args,
409 /*isVarArg=*/false),
410 GlobalValue::ExternalLinkage, "f", &M);
411 return F;
412 }
createFunctionDeclaration(Module & M)413 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
414 return createFunctionDeclaration(
415 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
416 }
417
createFunctionDefinition(Module & M,uint64_t ArgNum)418 Function *RandomIRBuilder::createFunctionDefinition(Module &M,
419 uint64_t ArgNum) {
420 Function *F = this->createFunctionDeclaration(M, ArgNum);
421
422 // TODO: Some arguments and a return value would probably be more
423 // interesting.
424 LLVMContext &Context = M.getContext();
425 DataLayout DL(&M);
426 BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
427 Type *RetTy = F->getReturnType();
428 if (RetTy != Type::getVoidTy(Context)) {
429 Instruction *RetAlloca =
430 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
431 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
432 ReturnInst::Create(Context, RetLoad, BB);
433 } else {
434 ReturnInst::Create(Context, BB);
435 }
436
437 return F;
438 }
createFunctionDefinition(Module & M)439 Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
440 return createFunctionDefinition(
441 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
442 }
443