xref: /freebsd/contrib/llvm-project/llvm/lib/FuzzMutate/RandomIRBuilder.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/Module.h"
21 
22 using namespace llvm;
23 using namespace fuzzerop;
24 
25 /// Return a vector of Blocks that dominates this block, excluding current
26 /// block.
getDominators(BasicBlock * BB)27 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
28   std::vector<BasicBlock *> ret;
29   DominatorTree DT(*BB->getParent());
30   DomTreeNode *Node = DT.getNode(BB);
31   // It's possible that an orphan block is not in the dom tree. In that case we
32   // just return nothing.
33   if (!Node)
34     return ret;
35   Node = Node->getIDom();
36   while (Node && Node->getBlock()) {
37     ret.push_back(Node->getBlock());
38     // Get parent block.
39     Node = Node->getIDom();
40   }
41   return ret;
42 }
43 
44 /// Return a vector of Blocks that is dominated by this block, excluding current
45 /// block
getDominatees(BasicBlock * BB)46 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
47   DominatorTree DT(*BB->getParent());
48   std::vector<BasicBlock *> ret;
49   DomTreeNode *Parent = DT.getNode(BB);
50   // It's possible that an orphan block is not in the dom tree. In that case we
51   // just return nothing.
52   if (!Parent)
53     return ret;
54   for (DomTreeNode *Child : Parent->children())
55     ret.push_back(Child->getBlock());
56   uint64_t Idx = 0;
57   while (Idx < ret.size()) {
58     DomTreeNode *Node = DT[ret[Idx]];
59     Idx++;
60     for (DomTreeNode *Child : Node->children())
61       ret.push_back(Child->getBlock());
62   }
63   return ret;
64 }
65 
createStackMemory(Function * F,Type * Ty,Value * Init)66 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
67                                                Value *Init) {
68   /// TODO: For all Allocas, maybe allocate an array.
69   BasicBlock *EntryBB = &F->getEntryBlock();
70   DataLayout DL(F->getParent());
71   AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
72                                       &*EntryBB->getFirstInsertionPt());
73   if (Init)
74     new StoreInst(Init, Alloca, Alloca->getNextNode());
75   return Alloca;
76 }
77 
78 std::pair<GlobalVariable *, bool>
findOrCreateGlobalVariable(Module * M,ArrayRef<Value * > Srcs,fuzzerop::SourcePred Pred)79 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
80                                             fuzzerop::SourcePred Pred) {
81   auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
82     // Can't directly compare GV's type, as it would be a pointer to the actual
83     // type.
84     return Pred.matches(Srcs, UndefValue::get(GV->getValueType()));
85   };
86   bool DidCreate = false;
87   SmallVector<GlobalVariable *, 4> GlobalVars;
88   for (GlobalVariable &GV : M->globals()) {
89     GlobalVars.push_back(&GV);
90   }
91   auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
92   RS.sample(nullptr, 1);
93   GlobalVariable *GV = RS.getSelection();
94   if (!GV) {
95     DidCreate = true;
96     using LinkageTypes = GlobalVariable::LinkageTypes;
97     auto TRS = makeSampler<Constant *>(Rand);
98     TRS.sample(Pred.generate(Srcs, KnownTypes));
99     Constant *Init = TRS.getSelection();
100     Type *Ty = Init->getType();
101     GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
102                             "G", nullptr,
103                             GlobalValue::ThreadLocalMode::NotThreadLocal,
104                             M->getDataLayout().getDefaultGlobalsAddressSpace());
105   }
106   return {GV, DidCreate};
107 }
108 
findOrCreateSource(BasicBlock & BB,ArrayRef<Instruction * > Insts)109 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
110                                            ArrayRef<Instruction *> Insts) {
111   return findOrCreateSource(BB, Insts, {}, anyType());
112 }
113 
findOrCreateSource(BasicBlock & BB,ArrayRef<Instruction * > Insts,ArrayRef<Value * > Srcs,SourcePred Pred,bool allowConstant)114 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
115                                            ArrayRef<Instruction *> Insts,
116                                            ArrayRef<Value *> Srcs,
117                                            SourcePred Pred,
118                                            bool allowConstant) {
119   auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
120   SmallVector<uint64_t, 8> SrcTys;
121   for (uint64_t i = 0; i < EndOfValueSource; i++)
122     SrcTys.push_back(i);
123   std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
124   for (uint64_t SrcTy : SrcTys) {
125     switch (SrcTy) {
126     case SrcFromInstInCurBlock: {
127       auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
128       if (!RS.isEmpty()) {
129         return RS.getSelection();
130       }
131       break;
132     }
133     case FunctionArgument: {
134       Function *F = BB.getParent();
135       SmallVector<Argument *, 8> Args;
136       for (uint64_t i = 0; i < F->arg_size(); i++) {
137         Args.push_back(F->getArg(i));
138       }
139       auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
140       if (!RS.isEmpty()) {
141         return RS.getSelection();
142       }
143       break;
144     }
145     case InstInDominator: {
146       auto Dominators = getDominators(&BB);
147       std::shuffle(Dominators.begin(), Dominators.end(), Rand);
148       for (BasicBlock *Dom : Dominators) {
149         SmallVector<Instruction *, 16> Instructions;
150         for (Instruction &I : *Dom) {
151           Instructions.push_back(&I);
152         }
153         auto RS =
154             makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
155         // Also consider choosing no source, meaning we want a new one.
156         if (!RS.isEmpty()) {
157           return RS.getSelection();
158         }
159       }
160       break;
161     }
162     case SrcFromGlobalVariable: {
163       Module *M = BB.getParent()->getParent();
164       auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
165       Type *Ty = GV->getValueType();
166       LoadInst *LoadGV = nullptr;
167       if (BB.getTerminator()) {
168         LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt());
169       } else {
170         LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
171       }
172       // Because we might be generating new values, we have to check if it
173       // matches again.
174       if (DidCreate) {
175         if (Pred.matches(Srcs, LoadGV)) {
176           return LoadGV;
177         }
178         LoadGV->eraseFromParent();
179         // If no one is using this GlobalVariable, delete it too.
180         if (GV->use_empty()) {
181           GV->eraseFromParent();
182         }
183       }
184       break;
185     }
186     case NewConstOrStack: {
187       return newSource(BB, Insts, Srcs, Pred, allowConstant);
188     }
189     default:
190     case EndOfValueSource: {
191       llvm_unreachable("EndOfValueSource executed");
192     }
193     }
194   }
195   llvm_unreachable("Can't find a source");
196 }
197 
newSource(BasicBlock & BB,ArrayRef<Instruction * > Insts,ArrayRef<Value * > Srcs,SourcePred Pred,bool allowConstant)198 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
199                                   ArrayRef<Value *> Srcs, SourcePred Pred,
200                                   bool allowConstant) {
201   // Generate some constants to choose from.
202   auto RS = makeSampler<Value *>(Rand);
203   RS.sample(Pred.generate(Srcs, KnownTypes));
204 
205   // If we can find a pointer to load from, use it half the time.
206   Value *Ptr = findPointer(BB, Insts);
207   if (Ptr) {
208     // Create load from the chosen pointer
209     auto IP = BB.getFirstInsertionPt();
210     if (auto *I = dyn_cast<Instruction>(Ptr)) {
211       IP = ++I->getIterator();
212       assert(IP != BB.end() && "guaranteed by the findPointer");
213     }
214     // Pick the type independently.
215     Type *AccessTy = RS.getSelection()->getType();
216     auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP);
217 
218     // Only sample this load if it really matches the descriptor
219     if (Pred.matches(Srcs, NewLoad))
220       RS.sample(NewLoad, RS.totalWeight());
221     else
222       NewLoad->eraseFromParent();
223   }
224 
225   Value *newSrc = RS.getSelection();
226   // Generate a stack alloca and store the constant to it if constant is not
227   // allowed, our hope is that later mutations can generate some values and
228   // store to this placeholder.
229   if (!allowConstant && isa<Constant>(newSrc)) {
230     Type *Ty = newSrc->getType();
231     Function *F = BB.getParent();
232     AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
233     if (BB.getTerminator()) {
234       newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator());
235     } else {
236       newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
237     }
238   }
239   return newSrc;
240 }
241 
isCompatibleReplacement(const Instruction * I,const Use & Operand,const Value * Replacement)242 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
243                                     const Value *Replacement) {
244   unsigned int OperandNo = Operand.getOperandNo();
245   if (Operand->getType() != Replacement->getType())
246     return false;
247   switch (I->getOpcode()) {
248   case Instruction::GetElementPtr:
249   case Instruction::ExtractElement:
250   case Instruction::ExtractValue:
251     // TODO: We could potentially validate these, but for now just leave indices
252     // alone.
253     if (OperandNo >= 1)
254       return false;
255     break;
256   case Instruction::InsertValue:
257   case Instruction::InsertElement:
258   case Instruction::ShuffleVector:
259     if (OperandNo >= 2)
260       return false;
261     break;
262   // For Br/Switch, we only try to modify the 1st Operand (condition).
263   // Modify other operands, like switch case may accidently change case from
264   // ConstantInt to a register, which is illegal.
265   case Instruction::Switch:
266   case Instruction::Br:
267     if (OperandNo >= 1)
268       return false;
269     break;
270   case Instruction::Call:
271   case Instruction::Invoke:
272   case Instruction::CallBr: {
273     const Function *Callee = cast<CallBase>(I)->getCalledFunction();
274     // If it's an indirect call, give up.
275     if (!Callee)
276       return false;
277     // If callee is not an intrinsic, operand 0 is the function to be called.
278     // Since we cannot assume that the replacement is a function pointer,
279     // we give up.
280     if (!Callee->getIntrinsicID() && OperandNo == 0)
281       return false;
282     return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
283   }
284   default:
285     break;
286   }
287   return true;
288 }
289 
connectToSink(BasicBlock & BB,ArrayRef<Instruction * > Insts,Value * V)290 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
291                                             ArrayRef<Instruction *> Insts,
292                                             Value *V) {
293   SmallVector<uint64_t, 8> SinkTys;
294   for (uint64_t i = 0; i < EndOfValueSink; i++)
295     SinkTys.push_back(i);
296   std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
297   auto findSinkAndConnect =
298       [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
299     auto RS = makeSampler<Use *>(Rand);
300     for (auto &I : Instructions) {
301       for (Use &U : I->operands())
302         if (isCompatibleReplacement(I, U, V))
303           RS.sample(&U, 1);
304     }
305     if (!RS.isEmpty()) {
306       Use *Sink = RS.getSelection();
307       User *U = Sink->getUser();
308       unsigned OpNo = Sink->getOperandNo();
309       U->setOperand(OpNo, V);
310       return cast<Instruction>(U);
311     }
312     return nullptr;
313   };
314   Instruction *Sink = nullptr;
315   for (uint64_t SinkTy : SinkTys) {
316     switch (SinkTy) {
317     case SinkToInstInCurBlock:
318       Sink = findSinkAndConnect(Insts);
319       if (Sink)
320         return Sink;
321       break;
322     case PointersInDominator: {
323       auto Dominators = getDominators(&BB);
324       std::shuffle(Dominators.begin(), Dominators.end(), Rand);
325       for (BasicBlock *Dom : Dominators) {
326         for (Instruction &I : *Dom) {
327           if (isa<PointerType>(I.getType()))
328             return new StoreInst(V, &I, Insts.back());
329         }
330       }
331       break;
332     }
333     case InstInDominatee: {
334       auto Dominatees = getDominatees(&BB);
335       std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
336       for (BasicBlock *Dominee : Dominatees) {
337         std::vector<Instruction *> Instructions;
338         for (Instruction &I : *Dominee)
339           Instructions.push_back(&I);
340         Sink = findSinkAndConnect(Instructions);
341         if (Sink) {
342           return Sink;
343         }
344       }
345       break;
346     }
347     case NewStore:
348       /// TODO: allocate a new stack memory.
349       return newSink(BB, Insts, V);
350     case SinkToGlobalVariable: {
351       Module *M = BB.getParent()->getParent();
352       auto [GV, DidCreate] =
353           findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
354       return new StoreInst(V, GV, Insts.back());
355     }
356     case EndOfValueSink:
357     default:
358       llvm_unreachable("EndOfValueSink executed");
359     }
360   }
361   llvm_unreachable("Can't find a sink");
362 }
363 
newSink(BasicBlock & BB,ArrayRef<Instruction * > Insts,Value * V)364 Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
365                                       ArrayRef<Instruction *> Insts, Value *V) {
366   Value *Ptr = findPointer(BB, Insts);
367   if (!Ptr) {
368     if (uniform(Rand, 0, 1)) {
369       Type *Ty = V->getType();
370       Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty));
371     } else {
372       Ptr = UndefValue::get(PointerType::get(V->getType(), 0));
373     }
374   }
375 
376   return new StoreInst(V, Ptr, Insts.back());
377 }
378 
findPointer(BasicBlock & BB,ArrayRef<Instruction * > Insts)379 Value *RandomIRBuilder::findPointer(BasicBlock &BB,
380                                     ArrayRef<Instruction *> Insts) {
381   auto IsMatchingPtr = [](Instruction *Inst) {
382     // Invoke instructions sometimes produce valid pointers but currently
383     // we can't insert loads or stores from them
384     if (Inst->isTerminator())
385       return false;
386 
387     return Inst->getType()->isPointerTy();
388   };
389   if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
390     return RS.getSelection();
391   return nullptr;
392 }
393 
randomType()394 Type *RandomIRBuilder::randomType() {
395   uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
396   return KnownTypes[TyIdx];
397 }
398 
createFunctionDeclaration(Module & M,uint64_t ArgNum)399 Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
400                                                      uint64_t ArgNum) {
401   Type *RetType = randomType();
402 
403   SmallVector<Type *, 2> Args;
404   for (uint64_t i = 0; i < ArgNum; i++) {
405     Args.push_back(randomType());
406   }
407 
408   Function *F = Function::Create(FunctionType::get(RetType, Args,
409                                                    /*isVarArg=*/false),
410                                  GlobalValue::ExternalLinkage, "f", &M);
411   return F;
412 }
createFunctionDeclaration(Module & M)413 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
414   return createFunctionDeclaration(
415       M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
416 }
417 
createFunctionDefinition(Module & M,uint64_t ArgNum)418 Function *RandomIRBuilder::createFunctionDefinition(Module &M,
419                                                     uint64_t ArgNum) {
420   Function *F = this->createFunctionDeclaration(M, ArgNum);
421 
422   // TODO: Some arguments and a return value would probably be more
423   // interesting.
424   LLVMContext &Context = M.getContext();
425   DataLayout DL(&M);
426   BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
427   Type *RetTy = F->getReturnType();
428   if (RetTy != Type::getVoidTy(Context)) {
429     Instruction *RetAlloca =
430         new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
431     Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
432     ReturnInst::Create(Context, RetLoad, BB);
433   } else {
434     ReturnInst::Create(Context, BB);
435   }
436 
437   return F;
438 }
createFunctionDefinition(Module & M)439 Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
440   return createFunctionDefinition(
441       M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
442 }
443