xref: /freebsd/contrib/llvm-project/llvm/lib/FuzzMutate/RandomIRBuilder.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/FuzzMutate/RandomIRBuilder.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/FuzzMutate/OpDescriptor.h"
12 #include "llvm/FuzzMutate/Random.h"
13 #include "llvm/IR/BasicBlock.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Dominators.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/Module.h"
20 
21 using namespace llvm;
22 using namespace fuzzerop;
23 
24 /// Return a vector of Blocks that dominates this block, excluding current
25 /// block.
26 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
27   std::vector<BasicBlock *> ret;
28   DominatorTree DT(*BB->getParent());
29   DomTreeNode *Node = DT.getNode(BB);
30   // It's possible that an orphan block is not in the dom tree. In that case we
31   // just return nothing.
32   if (!Node)
33     return ret;
34   Node = Node->getIDom();
35   while (Node && Node->getBlock()) {
36     ret.push_back(Node->getBlock());
37     // Get parent block.
38     Node = Node->getIDom();
39   }
40   return ret;
41 }
42 
43 /// Return a vector of Blocks that is dominated by this block, excluding current
44 /// block
45 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
46   DominatorTree DT(*BB->getParent());
47   std::vector<BasicBlock *> ret;
48   DomTreeNode *Parent = DT.getNode(BB);
49   // It's possible that an orphan block is not in the dom tree. In that case we
50   // just return nothing.
51   if (!Parent)
52     return ret;
53   for (DomTreeNode *Child : Parent->children())
54     ret.push_back(Child->getBlock());
55   uint64_t Idx = 0;
56   while (Idx < ret.size()) {
57     DomTreeNode *Node = DT[ret[Idx]];
58     Idx++;
59     for (DomTreeNode *Child : Node->children())
60       ret.push_back(Child->getBlock());
61   }
62   return ret;
63 }
64 
65 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
66                                                Value *Init) {
67   /// TODO: For all Allocas, maybe allocate an array.
68   BasicBlock *EntryBB = &F->getEntryBlock();
69   const DataLayout &DL = F->getDataLayout();
70   AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
71                                       EntryBB->getFirstInsertionPt());
72   if (Init)
73     new StoreInst(Init, Alloca, std::next(Alloca->getIterator()));
74   return Alloca;
75 }
76 
77 std::pair<GlobalVariable *, bool>
78 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
79                                             fuzzerop::SourcePred Pred) {
80   auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
81     // Can't directly compare GV's type, as it would be a pointer to the actual
82     // type.
83     return Pred.matches(Srcs, PoisonValue::get(GV->getValueType()));
84   };
85   bool DidCreate = false;
86   SmallVector<GlobalVariable *, 4> GlobalVars(
87       llvm::make_pointer_range(M->globals()));
88   auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
89   RS.sample(nullptr, 1);
90   GlobalVariable *GV = RS.getSelection();
91   if (!GV) {
92     DidCreate = true;
93     using LinkageTypes = GlobalVariable::LinkageTypes;
94     auto TRS = makeSampler<Constant *>(Rand);
95     TRS.sample(Pred.generate(Srcs, KnownTypes));
96     Constant *Init = TRS.getSelection();
97     Type *Ty = Init->getType();
98     GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
99                             "G", nullptr,
100                             GlobalValue::ThreadLocalMode::NotThreadLocal,
101                             M->getDataLayout().getDefaultGlobalsAddressSpace());
102   }
103   return {GV, DidCreate};
104 }
105 
106 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
107                                            ArrayRef<Instruction *> Insts) {
108   return findOrCreateSource(BB, Insts, {}, anyType());
109 }
110 
111 // Adapts the current pointer for a legal mem operation on the target arch.
112 static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP,
113                                   const Twine &Name,
114                                   SmallVector<Instruction *> *NewInsts) {
115   if (M && M->getTargetTriple().isAMDGCN()) {
116     // Check if we should perform an address space cast
117     PointerType *pointerType = dyn_cast<PointerType>(Ptr->getType());
118     if (pointerType && pointerType->getAddressSpace() == 8) {
119       // Perform address space cast from address space 8 to address space 7
120       auto NewPtr = new AddrSpaceCastInst(
121           Ptr, PointerType::get(M->getContext(), 7), Name + ".ASC", IP);
122       if (NewInsts)
123         NewInsts->push_back(NewPtr);
124       return NewPtr;
125     }
126   }
127 
128   return Ptr;
129 }
130 
131 // Stores a value to memory, considering the target triple's restrictions.
132 static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr,
133                                           InsertPosition IP, Module *M) {
134   Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, "", nullptr);
135   Instruction *Store = new StoreInst(Val, StorePtr, IP);
136   return Store;
137 }
138 
139 // Loads a value from memory, considering the target triple's restrictions.
140 static std::pair<Instruction *, SmallVector<Instruction *>>
141 buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
142                      const Twine &LoadName) {
143   SmallVector<Instruction *> NewInsts;
144 
145   Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, LoadName, &NewInsts);
146 
147   Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
148   NewInsts.push_back(Load);
149 
150   return std::make_pair(Load, NewInsts);
151 }
152 
153 static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
154   // Remove in reverse order (uses before defs)
155   for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) {
156     (*it)->eraseFromParent();
157   }
158 }
159 
160 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
161                                            ArrayRef<Instruction *> Insts,
162                                            ArrayRef<Value *> Srcs,
163                                            SourcePred Pred,
164                                            bool allowConstant) {
165   auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
166   SmallVector<uint64_t, 8> SrcTys;
167   for (uint64_t i = 0; i < EndOfValueSource; i++)
168     SrcTys.push_back(i);
169   std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
170   for (uint64_t SrcTy : SrcTys) {
171     switch (SrcTy) {
172     case SrcFromInstInCurBlock: {
173       auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
174       if (!RS.isEmpty()) {
175         return RS.getSelection();
176       }
177       break;
178     }
179     case FunctionArgument: {
180       Function *F = BB.getParent();
181       SmallVector<Argument *, 8> Args;
182       for (uint64_t i = 0; i < F->arg_size(); i++) {
183         Args.push_back(F->getArg(i));
184       }
185       auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
186       if (!RS.isEmpty()) {
187         return RS.getSelection();
188       }
189       break;
190     }
191     case InstInDominator: {
192       auto Dominators = getDominators(&BB);
193       std::shuffle(Dominators.begin(), Dominators.end(), Rand);
194       for (BasicBlock *Dom : Dominators) {
195         SmallVector<Instruction *, 16> Instructions(
196             llvm::make_pointer_range(*Dom));
197         auto RS =
198             makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
199         // Also consider choosing no source, meaning we want a new one.
200         if (!RS.isEmpty()) {
201           return RS.getSelection();
202         }
203       }
204       break;
205     }
206     case SrcFromGlobalVariable: {
207       Module *M = BB.getParent()->getParent();
208       auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
209       Type *Ty = GV->getValueType();
210       InsertPosition IP = BB.getTerminator()
211                               ? InsertPosition(BB.getFirstInsertionPt())
212                               : InsertPosition(&BB);
213       // Build a legal load and track new instructions in case a rollback is
214       // needed.
215       auto [LoadGV, NewInsts] = buildTargetLegalLoad(Ty, GV, IP, M, "LGV");
216       // Because we might be generating new values, we have to check if it
217       // matches again.
218       if (DidCreate) {
219         if (Pred.matches(Srcs, LoadGV)) {
220           return LoadGV;
221         }
222         // Remove newly inserted instructions
223         eraseNewInstructions(NewInsts);
224         // If no one is using this GlobalVariable, delete it too.
225         if (GV->use_empty()) {
226           GV->eraseFromParent();
227         }
228       }
229       break;
230     }
231     case NewConstOrStack: {
232       return newSource(BB, Insts, Srcs, Pred, allowConstant);
233     }
234     default:
235     case EndOfValueSource: {
236       llvm_unreachable("EndOfValueSource executed");
237     }
238     }
239   }
240   llvm_unreachable("Can't find a source");
241 }
242 
243 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
244                                   ArrayRef<Value *> Srcs, SourcePred Pred,
245                                   bool allowConstant) {
246   // Generate some constants to choose from.
247   auto RS = makeSampler<Value *>(Rand);
248   RS.sample(Pred.generate(Srcs, KnownTypes));
249 
250   // If we can find a pointer to load from, use it half the time.
251   Value *Ptr = findPointer(BB, Insts);
252   if (Ptr) {
253     // Create load from the chosen pointer
254     auto IP = BB.getFirstInsertionPt();
255     if (auto *I = dyn_cast<Instruction>(Ptr)) {
256       IP = ++I->getIterator();
257       assert(IP != BB.end() && "guaranteed by the findPointer");
258     }
259     // Pick the type independently.
260     Type *AccessTy = RS.getSelection()->getType();
261     // Build a legal load and track new instructions in case a rollback is
262     // needed.
263     auto [NewLoad, NewInsts] =
264         buildTargetLegalLoad(AccessTy, Ptr, IP, BB.getModule(), "L");
265 
266     // Only sample this load if it really matches the descriptor
267     if (Pred.matches(Srcs, NewLoad))
268       RS.sample(NewLoad, RS.totalWeight());
269     else {
270       // Remove newly inserted instructions
271       eraseNewInstructions(NewInsts);
272     }
273   }
274 
275   Value *newSrc = RS.getSelection();
276   // Generate a stack alloca and store the constant to it if constant is not
277   // allowed, our hope is that later mutations can generate some values and
278   // store to this placeholder.
279   if (!allowConstant && isa<Constant>(newSrc)) {
280     Type *Ty = newSrc->getType();
281     Function *F = BB.getParent();
282     AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
283     if (BB.getTerminator()) {
284       newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
285                             BB.getTerminator()->getIterator());
286     } else {
287       newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
288     }
289   }
290   return newSrc;
291 }
292 
293 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
294                                     const Value *Replacement) {
295   unsigned int OperandNo = Operand.getOperandNo();
296   if (Operand->getType() != Replacement->getType())
297     return false;
298   switch (I->getOpcode()) {
299   case Instruction::GetElementPtr:
300   case Instruction::ExtractElement:
301   case Instruction::ExtractValue:
302     // TODO: We could potentially validate these, but for now just leave indices
303     // alone.
304     if (OperandNo >= 1)
305       return false;
306     break;
307   case Instruction::InsertValue:
308   case Instruction::InsertElement:
309   case Instruction::ShuffleVector:
310     if (OperandNo >= 2)
311       return false;
312     break;
313   // For Br/Switch, we only try to modify the 1st Operand (condition).
314   // Modify other operands, like switch case may accidently change case from
315   // ConstantInt to a register, which is illegal.
316   case Instruction::Switch:
317   case Instruction::Br:
318     if (OperandNo >= 1)
319       return false;
320     break;
321   case Instruction::Call:
322   case Instruction::Invoke:
323   case Instruction::CallBr: {
324     const Function *Callee = cast<CallBase>(I)->getCalledFunction();
325     // If it's an indirect call, give up.
326     if (!Callee)
327       return false;
328     // If callee is not an intrinsic, operand 0 is the function to be called.
329     // Since we cannot assume that the replacement is a function pointer,
330     // we give up.
331     if (!Callee->getIntrinsicID() && OperandNo == 0)
332       return false;
333     return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
334   }
335   default:
336     break;
337   }
338   return true;
339 }
340 
341 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
342                                             ArrayRef<Instruction *> Insts,
343                                             Value *V) {
344   SmallVector<uint64_t, 8> SinkTys;
345   for (uint64_t i = 0; i < EndOfValueSink; i++)
346     SinkTys.push_back(i);
347   std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
348   auto findSinkAndConnect =
349       [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
350     auto RS = makeSampler<Use *>(Rand);
351     for (auto &I : Instructions) {
352       for (Use &U : I->operands())
353         if (isCompatibleReplacement(I, U, V))
354           RS.sample(&U, 1);
355     }
356     if (!RS.isEmpty()) {
357       Use *Sink = RS.getSelection();
358       User *U = Sink->getUser();
359       unsigned OpNo = Sink->getOperandNo();
360       U->setOperand(OpNo, V);
361       return cast<Instruction>(U);
362     }
363     return nullptr;
364   };
365   Instruction *Sink = nullptr;
366   for (uint64_t SinkTy : SinkTys) {
367     switch (SinkTy) {
368     case SinkToInstInCurBlock:
369       Sink = findSinkAndConnect(Insts);
370       if (Sink)
371         return Sink;
372       break;
373     case PointersInDominator: {
374       auto Dominators = getDominators(&BB);
375       std::shuffle(Dominators.begin(), Dominators.end(), Rand);
376       for (BasicBlock *Dom : Dominators) {
377         for (Instruction &I : *Dom) {
378           if (isa<PointerType>(I.getType())) {
379             return buildTargetLegalStore(V, &I, Insts.back()->getIterator(),
380                                          I.getModule());
381           }
382         }
383       }
384       break;
385     }
386     case InstInDominatee: {
387       auto Dominatees = getDominatees(&BB);
388       std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
389       for (BasicBlock *Dominee : Dominatees) {
390         std::vector<Instruction *> Instructions;
391         for (Instruction &I : *Dominee)
392           Instructions.push_back(&I);
393         Sink = findSinkAndConnect(Instructions);
394         if (Sink) {
395           return Sink;
396         }
397       }
398       break;
399     }
400     case NewStore:
401       /// TODO: allocate a new stack memory.
402       return newSink(BB, Insts, V);
403     case SinkToGlobalVariable: {
404       Module *M = BB.getModule();
405       auto [GV, DidCreate] =
406           findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
407       return buildTargetLegalStore(V, GV, Insts.back()->getIterator(), M);
408     }
409     case EndOfValueSink:
410     default:
411       llvm_unreachable("EndOfValueSink executed");
412     }
413   }
414   llvm_unreachable("Can't find a sink");
415 }
416 
417 Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
418                                       ArrayRef<Instruction *> Insts, Value *V) {
419   Value *Ptr = findPointer(BB, Insts);
420   if (!Ptr) {
421     if (uniform(Rand, 0, 1)) {
422       Type *Ty = V->getType();
423       Ptr = createStackMemory(BB.getParent(), Ty, PoisonValue::get(Ty));
424     } else {
425       Ptr = PoisonValue::get(PointerType::get(V->getContext(), 0));
426     }
427   }
428 
429   return buildTargetLegalStore(V, Ptr, Insts.back()->getIterator(),
430                                BB.getModule());
431 }
432 
433 Value *RandomIRBuilder::findPointer(BasicBlock &BB,
434                                     ArrayRef<Instruction *> Insts) {
435   auto IsMatchingPtr = [](Instruction *Inst) {
436     // Invoke instructions sometimes produce valid pointers but currently
437     // we can't insert loads or stores from them
438     if (Inst->isTerminator())
439       return false;
440 
441     return Inst->getType()->isPointerTy();
442   };
443   if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
444     return RS.getSelection();
445   return nullptr;
446 }
447 
448 Type *RandomIRBuilder::randomType() {
449   uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
450   return KnownTypes[TyIdx];
451 }
452 
453 Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
454                                                      uint64_t ArgNum) {
455   Type *RetType = randomType();
456 
457   SmallVector<Type *, 2> Args;
458   for (uint64_t i = 0; i < ArgNum; i++) {
459     Args.push_back(randomType());
460   }
461 
462   Function *F = Function::Create(FunctionType::get(RetType, Args,
463                                                    /*isVarArg=*/false),
464                                  GlobalValue::ExternalLinkage, "f", &M);
465   return F;
466 }
467 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
468   return createFunctionDeclaration(
469       M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
470 }
471 
472 Function *RandomIRBuilder::createFunctionDefinition(Module &M,
473                                                     uint64_t ArgNum) {
474   Function *F = this->createFunctionDeclaration(M, ArgNum);
475 
476   // TODO: Some arguments and a return value would probably be more
477   // interesting.
478   LLVMContext &Context = M.getContext();
479   const DataLayout &DL = M.getDataLayout();
480   BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
481   Type *RetTy = F->getReturnType();
482   if (RetTy != Type::getVoidTy(Context)) {
483     Instruction *RetAlloca =
484         new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
485     Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
486     ReturnInst::Create(Context, RetLoad, BB);
487   } else {
488     ReturnInst::Create(Context, BB);
489   }
490 
491   return F;
492 }
493 Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
494   return createFunctionDefinition(
495       M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
496 }
497