xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/User.h"
20 #include "llvm/InitializePasses.h"
21 #include "llvm/Support/SuffixTree.h"
22 
23 using namespace llvm;
24 using namespace IRSimilarity;
25 
26 namespace llvm {
27 cl::opt<bool>
28     DisableBranches("no-ir-sim-branch-matching", cl::init(false),
29                     cl::ReallyHidden,
30                     cl::desc("disable similarity matching, and outlining, "
31                              "across branches for debugging purposes."));
32 
33 cl::opt<bool>
34     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
35                          cl::ReallyHidden,
36                          cl::desc("disable outlining indirect calls."));
37 
38 cl::opt<bool>
39     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
40                      cl::desc("only allow matching call instructions if the "
41                               "name and type signature match."));
42 
43 cl::opt<bool>
44     DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
45                       cl::desc("Don't match or outline intrinsics"));
46 } // namespace llvm
47 
48 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
49                                      IRInstructionDataList &IDList)
50     : Inst(&I), Legal(Legality), IDL(&IDList) {
51   initializeInstruction();
52 }
53 
54 void IRInstructionData::initializeInstruction() {
55   // We check for whether we have a comparison instruction.  If it is, we
56   // find the "less than" version of the predicate for consistency for
57   // comparison instructions throught the program.
58   if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
59     CmpInst::Predicate Predicate = predicateForConsistency(C);
60     if (Predicate != C->getPredicate())
61       RevisedPredicate = Predicate;
62   }
63 
64   // Here we collect the operands and their types for determining whether
65   // the structure of the operand use matches between two different candidates.
66   for (Use &OI : Inst->operands()) {
67     if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
68       // If we have a CmpInst where the predicate is reversed, it means the
69       // operands must be reversed as well.
70       OperVals.insert(OperVals.begin(), OI.get());
71       continue;
72     }
73 
74     OperVals.push_back(OI.get());
75   }
76 
77   // We capture the incoming BasicBlocks as values as well as the incoming
78   // Values in order to check for structural similarity.
79   if (PHINode *PN = dyn_cast<PHINode>(Inst))
80     for (BasicBlock *BB : PN->blocks())
81       OperVals.push_back(BB);
82 }
83 
84 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
85     : IDL(&IDList) {}
86 
87 void IRInstructionData::setBranchSuccessors(
88     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89   assert(isa<BranchInst>(Inst) && "Instruction must be branch");
90 
91   BranchInst *BI = cast<BranchInst>(Inst);
92   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
93 
94   BBNumIt = BasicBlockToInteger.find(BI->getParent());
95   assert(BBNumIt != BasicBlockToInteger.end() &&
96          "Could not find location for BasicBlock!");
97 
98   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
99 
100   for (BasicBlock *Successor : BI->successors()) {
101     BBNumIt = BasicBlockToInteger.find(Successor);
102     assert(BBNumIt != BasicBlockToInteger.end() &&
103            "Could not find number for BasicBlock!");
104     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
105 
106     int Relative = OtherBlockNumber - CurrentBlockNumber;
107     RelativeBlockLocations.push_back(Relative);
108   }
109 }
110 
111 void IRInstructionData::setCalleeName(bool MatchByName) {
112   CallInst *CI = dyn_cast<CallInst>(Inst);
113   assert(CI && "Instruction must be call");
114 
115   CalleeName = "";
116   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
117     // To hash intrinsics, we use the opcode, and types like the other
118     // instructions, but also, the Intrinsic ID, and the Name of the
119     // intrinsic.
120     Intrinsic::ID IntrinsicID = II->getIntrinsicID();
121     FunctionType *FT = II->getFunctionType();
122     // If there is an overloaded name, we have to use the complex version
123     // of getName to get the entire string.
124     if (Intrinsic::isOverloaded(IntrinsicID))
125       CalleeName =
126           Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
127     // If there is not an overloaded name, we only need to use this version.
128     else
129       CalleeName = Intrinsic::getName(IntrinsicID).str();
130 
131     return;
132   }
133 
134   if (!CI->isIndirectCall() && MatchByName)
135     CalleeName = CI->getCalledFunction()->getName().str();
136 }
137 
138 void IRInstructionData::setPHIPredecessors(
139     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
140   assert(isa<PHINode>(Inst) && "Instruction must be phi node");
141 
142   PHINode *PN = cast<PHINode>(Inst);
143   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
144 
145   BBNumIt = BasicBlockToInteger.find(PN->getParent());
146   assert(BBNumIt != BasicBlockToInteger.end() &&
147          "Could not find location for BasicBlock!");
148 
149   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
150 
151   // Convert the incoming blocks of the PHINode to an integer value, based on
152   // the relative distances between the current block and the incoming block.
153   for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
154     BasicBlock *Incoming = PN->getIncomingBlock(Idx);
155     BBNumIt = BasicBlockToInteger.find(Incoming);
156     assert(BBNumIt != BasicBlockToInteger.end() &&
157            "Could not find number for BasicBlock!");
158     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
159 
160     int Relative = OtherBlockNumber - CurrentBlockNumber;
161     RelativeBlockLocations.push_back(Relative);
162     RelativeBlockLocations.push_back(Relative);
163   }
164 }
165 
166 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
167   switch (CI->getPredicate()) {
168   case CmpInst::FCMP_OGT:
169   case CmpInst::FCMP_UGT:
170   case CmpInst::FCMP_OGE:
171   case CmpInst::FCMP_UGE:
172   case CmpInst::ICMP_SGT:
173   case CmpInst::ICMP_UGT:
174   case CmpInst::ICMP_SGE:
175   case CmpInst::ICMP_UGE:
176     return CI->getSwappedPredicate();
177   default:
178     return CI->getPredicate();
179   }
180 }
181 
182 CmpInst::Predicate IRInstructionData::getPredicate() const {
183   assert(isa<CmpInst>(Inst) &&
184          "Can only get a predicate from a compare instruction");
185 
186   if (RevisedPredicate.hasValue())
187     return RevisedPredicate.getValue();
188 
189   return cast<CmpInst>(Inst)->getPredicate();
190 }
191 
192 StringRef IRInstructionData::getCalleeName() const {
193   assert(isa<CallInst>(Inst) &&
194          "Can only get a name from a call instruction");
195 
196   assert(CalleeName.hasValue() && "CalleeName has not been set");
197 
198   return *CalleeName;
199 }
200 
201 bool IRSimilarity::isClose(const IRInstructionData &A,
202                            const IRInstructionData &B) {
203 
204   if (!A.Legal || !B.Legal)
205     return false;
206 
207   // Check if we are performing the same sort of operation on the same types
208   // but not on the same values.
209   if (!A.Inst->isSameOperationAs(B.Inst)) {
210     // If there is a predicate, this means that either there is a swapped
211     // predicate, or that the types are different, we want to make sure that
212     // the predicates are equivalent via swapping.
213     if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
214 
215       if (A.getPredicate() != B.getPredicate())
216         return false;
217 
218       // If the predicates are the same via swap, make sure that the types are
219       // still the same.
220       auto ZippedTypes = zip(A.OperVals, B.OperVals);
221 
222       return all_of(
223           ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
224             return std::get<0>(R)->getType() == std::get<1>(R)->getType();
225           });
226     }
227 
228     return false;
229   }
230 
231   // Since any GEP Instruction operands after the first operand cannot be
232   // defined by a register, we must make sure that the operands after the first
233   // are the same in the two instructions
234   if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
235     auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
236 
237     // If the instructions do not have the same inbounds restrictions, we do
238     // not consider them the same.
239     if (GEP->isInBounds() != OtherGEP->isInBounds())
240       return false;
241 
242     auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
243 
244     // We increment here since we do not care about the first instruction,
245     // we only care about the following operands since they must be the
246     // exact same to be considered similar.
247     return all_of(drop_begin(ZippedOperands),
248                   [](std::tuple<llvm::Use &, llvm::Use &> R) {
249                     return std::get<0>(R) == std::get<1>(R);
250                   });
251   }
252 
253   // If the instructions are functions calls, we make sure that the function
254   // name is the same.  We already know that the types are since is
255   // isSameOperationAs is true.
256   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
257     if (A.getCalleeName().str() != B.getCalleeName().str())
258       return false;
259   }
260 
261   if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
262       A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
263     return false;
264 
265   return true;
266 }
267 
268 // TODO: This is the same as the MachineOutliner, and should be consolidated
269 // into the same interface.
270 void IRInstructionMapper::convertToUnsignedVec(
271     BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
272     std::vector<unsigned> &IntegerMapping) {
273   BasicBlock::iterator It = BB.begin();
274 
275   std::vector<unsigned> IntegerMappingForBB;
276   std::vector<IRInstructionData *> InstrListForBB;
277 
278   for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
279     switch (InstClassifier.visit(*It)) {
280     case InstrType::Legal:
281       mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
282       break;
283     case InstrType::Illegal:
284       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
285       break;
286     case InstrType::Invisible:
287       AddedIllegalLastTime = false;
288       break;
289     }
290   }
291 
292   if (HaveLegalRange) {
293     if (AddedIllegalLastTime)
294       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
295     for (IRInstructionData *ID : InstrListForBB)
296       this->IDL->push_back(*ID);
297     llvm::append_range(InstrList, InstrListForBB);
298     llvm::append_range(IntegerMapping, IntegerMappingForBB);
299   }
300 }
301 
302 // TODO: This is the same as the MachineOutliner, and should be consolidated
303 // into the same interface.
304 unsigned IRInstructionMapper::mapToLegalUnsigned(
305     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
306     std::vector<IRInstructionData *> &InstrListForBB) {
307   // We added something legal, so we should unset the AddedLegalLastTime
308   // flag.
309   AddedIllegalLastTime = false;
310 
311   // If we have at least two adjacent legal instructions (which may have
312   // invisible instructions in between), remember that.
313   if (CanCombineWithPrevInstr)
314     HaveLegalRange = true;
315   CanCombineWithPrevInstr = true;
316 
317   // Get the integer for this instruction or give it the current
318   // LegalInstrNumber.
319   IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
320   InstrListForBB.push_back(ID);
321 
322   if (isa<BranchInst>(*It))
323     ID->setBranchSuccessors(BasicBlockToInteger);
324 
325   if (isa<CallInst>(*It))
326     ID->setCalleeName(EnableMatchCallsByName);
327 
328   if (isa<PHINode>(*It))
329     ID->setPHIPredecessors(BasicBlockToInteger);
330 
331   // Add to the instruction list
332   bool WasInserted;
333   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
334       ResultIt;
335   std::tie(ResultIt, WasInserted) =
336       InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
337   unsigned INumber = ResultIt->second;
338 
339   // There was an insertion.
340   if (WasInserted)
341     LegalInstrNumber++;
342 
343   IntegerMappingForBB.push_back(INumber);
344 
345   // Make sure we don't overflow or use any integers reserved by the DenseMap.
346   assert(LegalInstrNumber < IllegalInstrNumber &&
347          "Instruction mapping overflow!");
348 
349   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
350          "Tried to assign DenseMap tombstone or empty key to instruction.");
351   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
352          "Tried to assign DenseMap tombstone or empty key to instruction.");
353 
354   return INumber;
355 }
356 
357 IRInstructionData *
358 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
359                                                IRInstructionDataList &IDL) {
360   return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
361 }
362 
363 IRInstructionData *
364 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
365   return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
366 }
367 
368 IRInstructionDataList *
369 IRInstructionMapper::allocateIRInstructionDataList() {
370   return new (IDLAllocator->Allocate()) IRInstructionDataList();
371 }
372 
373 // TODO: This is the same as the MachineOutliner, and should be consolidated
374 // into the same interface.
375 unsigned IRInstructionMapper::mapToIllegalUnsigned(
376     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
377     std::vector<IRInstructionData *> &InstrListForBB, bool End) {
378   // Can't combine an illegal instruction. Set the flag.
379   CanCombineWithPrevInstr = false;
380 
381   // Only add one illegal number per range of legal numbers.
382   if (AddedIllegalLastTime)
383     return IllegalInstrNumber;
384 
385   IRInstructionData *ID = nullptr;
386   if (!End)
387     ID = allocateIRInstructionData(*It, false, *IDL);
388   else
389     ID = allocateIRInstructionData(*IDL);
390   InstrListForBB.push_back(ID);
391 
392   // Remember that we added an illegal number last time.
393   AddedIllegalLastTime = true;
394   unsigned INumber = IllegalInstrNumber;
395   IntegerMappingForBB.push_back(IllegalInstrNumber--);
396 
397   assert(LegalInstrNumber < IllegalInstrNumber &&
398          "Instruction mapping overflow!");
399 
400   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
401          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
402 
403   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
404          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
405 
406   return INumber;
407 }
408 
409 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
410                                              IRInstructionData *FirstInstIt,
411                                              IRInstructionData *LastInstIt)
412     : StartIdx(StartIdx), Len(Len) {
413 
414   assert(FirstInstIt != nullptr && "Instruction is nullptr!");
415   assert(LastInstIt != nullptr && "Instruction is nullptr!");
416   assert(StartIdx + Len > StartIdx &&
417          "Overflow for IRSimilarityCandidate range?");
418   assert(Len - 1 == static_cast<unsigned>(std::distance(
419                         iterator(FirstInstIt), iterator(LastInstIt))) &&
420          "Length of the first and last IRInstructionData do not match the "
421          "given length");
422 
423   // We iterate over the given instructions, and map each unique value
424   // to a unique number in the IRSimilarityCandidate ValueToNumber and
425   // NumberToValue maps.  A constant get its own value globally, the individual
426   // uses of the constants are not considered to be unique.
427   //
428   // IR:                    Mapping Added:
429   // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
430   // %add2 = add i32 %a, %1    %add2 -> 4
431   // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
432   //
433   // when replace with global values, starting from 1, would be
434   //
435   // 3 = add i32 1, 2
436   // 4 = add i32 1, 3
437   // 6 = add i32 5, 2
438   unsigned LocalValNumber = 1;
439   IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
440   for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
441     // Map the operand values to an unsigned integer if it does not already
442     // have an unsigned integer assigned to it.
443     for (Value *Arg : ID->OperVals)
444       if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
445         ValueToNumber.try_emplace(Arg, LocalValNumber);
446         NumberToValue.try_emplace(LocalValNumber, Arg);
447         LocalValNumber++;
448       }
449 
450     // Mapping the instructions to an unsigned integer if it is not already
451     // exist in the mapping.
452     if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
453       ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
454       NumberToValue.try_emplace(LocalValNumber, ID->Inst);
455       LocalValNumber++;
456     }
457   }
458 
459   // Setting the first and last instruction data pointers for the candidate.  If
460   // we got through the entire for loop without hitting an assert, we know
461   // that both of these instructions are not nullptrs.
462   FirstInst = FirstInstIt;
463   LastInst = LastInstIt;
464 }
465 
466 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
467                                       const IRSimilarityCandidate &B) {
468   if (A.getLength() != B.getLength())
469     return false;
470 
471   auto InstrDataForBoth =
472       zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
473 
474   return all_of(InstrDataForBoth,
475                 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
476                   IRInstructionData &A = std::get<0>(R);
477                   IRInstructionData &B = std::get<1>(R);
478                   if (!A.Legal || !B.Legal)
479                     return false;
480                   return isClose(A, B);
481                 });
482 }
483 
484 /// Determine if one or more of the assigned global value numbers for the
485 /// operands in \p TargetValueNumbers is in the current mapping set for operand
486 /// numbers in \p SourceOperands.  The set of possible corresponding global
487 /// value numbers are replaced with the most recent version of compatible
488 /// values.
489 ///
490 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
491 /// value number for the source IRInstructionCandidate.
492 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
493 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
494 /// the target.
495 /// \param [in] SourceOperands - The operands in the original
496 /// IRSimilarityCandidate in the current instruction.
497 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
498 /// the corresponding Instruction in the other IRSimilarityCandidate.
499 /// \returns true if there exists a possible mapping between the source
500 /// Instruction operands and the target Instruction operands, and false if not.
501 static bool checkNumberingAndReplaceCommutative(
502   const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
503   DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
504   ArrayRef<Value *> &SourceOperands,
505   DenseSet<unsigned> &TargetValueNumbers){
506 
507   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
508 
509   unsigned ArgVal;
510   bool WasInserted;
511 
512   // Iterate over the operands in the source IRSimilarityCandidate to determine
513   // whether there exists an operand in the other IRSimilarityCandidate that
514   // creates a valid mapping of Value to Value between the
515   // IRSimilarityCaniddates.
516   for (Value *V : SourceOperands) {
517     ArgVal = SourceValueToNumberMapping.find(V)->second;
518 
519     std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
520         std::make_pair(ArgVal, TargetValueNumbers));
521 
522     // Instead of finding a current mapping, we inserted a set.  This means a
523     // mapping did not exist for the source Instruction operand, it has no
524     // current constraints we need to check.
525     if (WasInserted)
526       continue;
527 
528     // If a mapping already exists for the source operand to the values in the
529     // other IRSimilarityCandidate we need to iterate over the items in other
530     // IRSimilarityCandidate's Instruction to determine whether there is a valid
531     // mapping of Value to Value.
532     DenseSet<unsigned> NewSet;
533     for (unsigned &Curr : ValueMappingIt->second)
534       // If we can find the value in the mapping, we add it to the new set.
535       if (TargetValueNumbers.contains(Curr))
536         NewSet.insert(Curr);
537 
538     // If we could not find a Value, return 0.
539     if (NewSet.empty())
540       return false;
541 
542     // Otherwise replace the old mapping with the newly constructed one.
543     if (NewSet.size() != ValueMappingIt->second.size())
544       ValueMappingIt->second.swap(NewSet);
545 
546     // We have reached no conclusions about the mapping, and cannot remove
547     // any items from the other operands, so we move to check the next operand.
548     if (ValueMappingIt->second.size() != 1)
549       continue;
550 
551 
552     unsigned ValToRemove = *ValueMappingIt->second.begin();
553     // When there is only one item left in the mapping for and operand, remove
554     // the value from the other operands.  If it results in there being no
555     // mapping, return false, it means the mapping is wrong
556     for (Value *InnerV : SourceOperands) {
557       if (V == InnerV)
558         continue;
559 
560       unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
561       ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
562       if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
563         continue;
564 
565       ValueMappingIt->second.erase(ValToRemove);
566       if (ValueMappingIt->second.empty())
567         return false;
568     }
569   }
570 
571   return true;
572 }
573 
574 /// Determine if operand number \p TargetArgVal is in the current mapping set
575 /// for operand number \p SourceArgVal.
576 ///
577 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
578 /// value numbers from source IRSimilarityCandidate to target
579 /// IRSimilarityCandidate.
580 /// \param [in] SourceArgVal The global value number for an operand in the
581 /// in the original candidate.
582 /// \param [in] TargetArgVal The global value number for the corresponding
583 /// operand in the other candidate.
584 /// \returns True if there exists a mapping and false if not.
585 bool checkNumberingAndReplace(
586     DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
587     unsigned SourceArgVal, unsigned TargetArgVal) {
588   // We are given two unsigned integers representing the global values of
589   // the operands in different IRSimilarityCandidates and a current mapping
590   // between the two.
591   //
592   // Source Operand GVN: 1
593   // Target Operand GVN: 2
594   // CurrentMapping: {1: {1, 2}}
595   //
596   // Since we have mapping, and the target operand is contained in the set, we
597   // update it to:
598   // CurrentMapping: {1: {2}}
599   // and can return true. But, if the mapping was
600   // CurrentMapping: {1: {3}}
601   // we would return false.
602 
603   bool WasInserted;
604   DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
605 
606   std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
607       std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
608 
609   // If we created a new mapping, then we are done.
610   if (WasInserted)
611     return true;
612 
613   // If there is more than one option in the mapping set, and the target value
614   // is included in the mapping set replace that set with one that only includes
615   // the target value, as it is the only valid mapping via the non commutative
616   // instruction.
617 
618   DenseSet<unsigned> &TargetSet = Val->second;
619   if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
620     TargetSet.clear();
621     TargetSet.insert(TargetArgVal);
622     return true;
623   }
624 
625   // Return true if we can find the value in the set.
626   return TargetSet.contains(TargetArgVal);
627 }
628 
629 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
630     OperandMapping A, OperandMapping B) {
631   // Iterators to keep track of where we are in the operands for each
632   // Instruction.
633   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
634   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
635   unsigned OperandLength = A.OperVals.size();
636 
637   // For each operand, get the value numbering and ensure it is consistent.
638   for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
639     unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
640     unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
641 
642     // Attempt to add a set with only the target value.  If there is no mapping
643     // we can create it here.
644     //
645     // For an instruction like a subtraction:
646     // IRSimilarityCandidateA:  IRSimilarityCandidateB:
647     // %resultA = sub %a, %b    %resultB = sub %d, %e
648     //
649     // We map %a -> %d and %b -> %e.
650     //
651     // And check to see whether their mapping is consistent in
652     // checkNumberingAndReplace.
653 
654     if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
655       return false;
656 
657     if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
658       return false;
659   }
660   return true;
661 }
662 
663 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
664     OperandMapping A, OperandMapping B) {
665   DenseSet<unsigned> ValueNumbersA;
666   DenseSet<unsigned> ValueNumbersB;
667 
668   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
669   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
670   unsigned OperandLength = A.OperVals.size();
671 
672   // Find the value number sets for the operands.
673   for (unsigned Idx = 0; Idx < OperandLength;
674        Idx++, VItA++, VItB++) {
675     ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
676     ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
677   }
678 
679   // Iterate over the operands in the first IRSimilarityCandidate and make sure
680   // there exists a possible mapping with the operands in the second
681   // IRSimilarityCandidate.
682   if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
683                                            A.ValueNumberMapping, A.OperVals,
684                                            ValueNumbersB))
685     return false;
686 
687   // Iterate over the operands in the second IRSimilarityCandidate and make sure
688   // there exists a possible mapping with the operands in the first
689   // IRSimilarityCandidate.
690   if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
691                                            B.ValueNumberMapping, B.OperVals,
692                                            ValueNumbersA))
693     return false;
694 
695   return true;
696 }
697 
698 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
699                                                    RelativeLocMapping B) {
700   // Get the basic blocks the label refers to.
701   BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
702   BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
703 
704   // Get the basic blocks contained in each region.
705   DenseSet<BasicBlock *> BasicBlockA;
706   DenseSet<BasicBlock *> BasicBlockB;
707   A.IRSC.getBasicBlocks(BasicBlockA);
708   B.IRSC.getBasicBlocks(BasicBlockB);
709 
710   // Determine if the block is contained in the region.
711   bool AContained = BasicBlockA.contains(ABB);
712   bool BContained = BasicBlockB.contains(BBB);
713 
714   // Both blocks need to be contained in the region, or both need to be outside
715   // the reigon.
716   if (AContained != BContained)
717     return false;
718 
719   // If both are contained, then we need to make sure that the relative
720   // distance to the target blocks are the same.
721   if (AContained)
722     return A.RelativeLocation == B.RelativeLocation;
723   return true;
724 }
725 
726 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
727                                              const IRSimilarityCandidate &B) {
728   DenseMap<unsigned, DenseSet<unsigned>> MappingA;
729   DenseMap<unsigned, DenseSet<unsigned>> MappingB;
730   return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
731 }
732 
733 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
734                       SmallVector<int, 4> &, ArrayRef<Value *> &,
735                       ArrayRef<Value *> &>
736     ZippedRelativeLocationsT;
737 
738 bool IRSimilarityCandidate::compareStructure(
739     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
740     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
741     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
742   if (A.getLength() != B.getLength())
743     return false;
744 
745   if (A.ValueToNumber.size() != B.ValueToNumber.size())
746     return false;
747 
748   iterator ItA = A.begin();
749   iterator ItB = B.begin();
750 
751   // These ValueNumber Mapping sets create a create a mapping between the values
752   // in one candidate to values in the other candidate.  If we create a set with
753   // one element, and that same element maps to the original element in the
754   // candidate we have a good mapping.
755   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
756 
757 
758   // Iterate over the instructions contained in each candidate
759   unsigned SectionLength = A.getStartIdx() + A.getLength();
760   for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
761        ItA++, ItB++, Loc++) {
762     // Make sure the instructions are similar to one another.
763     if (!isClose(*ItA, *ItB))
764       return false;
765 
766     Instruction *IA = ItA->Inst;
767     Instruction *IB = ItB->Inst;
768 
769     if (!ItA->Legal || !ItB->Legal)
770       return false;
771 
772     // Get the operand sets for the instructions.
773     ArrayRef<Value *> OperValsA = ItA->OperVals;
774     ArrayRef<Value *> OperValsB = ItB->OperVals;
775 
776     unsigned InstValA = A.ValueToNumber.find(IA)->second;
777     unsigned InstValB = B.ValueToNumber.find(IB)->second;
778 
779     bool WasInserted;
780     // Ensure that the mappings for the instructions exists.
781     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
782         std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
783     if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
784       return false;
785 
786     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
787         std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
788     if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
789       return false;
790 
791     // We have different paths for commutative instructions and non-commutative
792     // instructions since commutative instructions could allow multiple mappings
793     // to certain values.
794     if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
795       if (!compareCommutativeOperandMapping(
796               {A, OperValsA, ValueNumberMappingA},
797               {B, OperValsB, ValueNumberMappingB}))
798         return false;
799       continue;
800     }
801 
802     // Handle the non-commutative cases.
803     if (!compareNonCommutativeOperandMapping(
804             {A, OperValsA, ValueNumberMappingA},
805             {B, OperValsB, ValueNumberMappingB}))
806       return false;
807 
808     // Here we check that between two corresponding instructions,
809     // when referring to a basic block in the same region, the
810     // relative locations are the same. And, that the instructions refer to
811     // basic blocks outside the region in the same corresponding locations.
812 
813     // We are able to make the assumption about blocks outside of the region
814     // since the target block labels are considered values and will follow the
815     // same number matching that we defined for the other instructions in the
816     // region.  So, at this point, in each location we target a specific block
817     // outside the region, we are targeting a corresponding block in each
818     // analagous location in the region we are comparing to.
819     if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
820         !(isa<PHINode>(IA) && isa<PHINode>(IB)))
821       continue;
822 
823     SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
824     SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
825     if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
826         OperValsA.size() != OperValsB.size())
827       return false;
828 
829     ZippedRelativeLocationsT ZippedRelativeLocations =
830         zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
831     if (any_of(ZippedRelativeLocations,
832                [&A, &B](std::tuple<int, int, Value *, Value *> R) {
833                  return !checkRelativeLocations(
834                      {A, std::get<0>(R), std::get<2>(R)},
835                      {B, std::get<1>(R), std::get<3>(R)});
836                }))
837       return false;
838   }
839   return true;
840 }
841 
842 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
843                                     const IRSimilarityCandidate &B) {
844   auto DoesOverlap = [](const IRSimilarityCandidate &X,
845                         const IRSimilarityCandidate &Y) {
846     // Check:
847     // XXXXXX        X starts before Y ends
848     //      YYYYYYY  Y starts after X starts
849     return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
850   };
851 
852   return DoesOverlap(A, B) || DoesOverlap(B, A);
853 }
854 
855 void IRSimilarityIdentifier::populateMapper(
856     Module &M, std::vector<IRInstructionData *> &InstrList,
857     std::vector<unsigned> &IntegerMapping) {
858 
859   std::vector<IRInstructionData *> InstrListForModule;
860   std::vector<unsigned> IntegerMappingForModule;
861   // Iterate over the functions in the module to map each Instruction in each
862   // BasicBlock to an unsigned integer.
863   Mapper.initializeForBBs(M);
864 
865   for (Function &F : M) {
866 
867     if (F.empty())
868       continue;
869 
870     for (BasicBlock &BB : F) {
871 
872       // BB has potential to have similarity since it has a size greater than 2
873       // and can therefore match other regions greater than 2. Map it to a list
874       // of unsigned integers.
875       Mapper.convertToUnsignedVec(BB, InstrListForModule,
876                                   IntegerMappingForModule);
877     }
878 
879     BasicBlock::iterator It = F.begin()->end();
880     Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
881                                 true);
882     if (InstrListForModule.size() > 0)
883       Mapper.IDL->push_back(*InstrListForModule.back());
884   }
885 
886   // Insert the InstrListForModule at the end of the overall InstrList so that
887   // we can have a long InstrList for the entire set of Modules being analyzed.
888   llvm::append_range(InstrList, InstrListForModule);
889   // Do the same as above, but for IntegerMapping.
890   llvm::append_range(IntegerMapping, IntegerMappingForModule);
891 }
892 
893 void IRSimilarityIdentifier::populateMapper(
894     ArrayRef<std::unique_ptr<Module>> &Modules,
895     std::vector<IRInstructionData *> &InstrList,
896     std::vector<unsigned> &IntegerMapping) {
897 
898   // Iterate over, and map the instructions in each module.
899   for (const std::unique_ptr<Module> &M : Modules)
900     populateMapper(*M, InstrList, IntegerMapping);
901 }
902 
903 /// From a repeated subsequence, find all the different instances of the
904 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
905 /// the IRInstructionData in subsequence.
906 ///
907 /// \param [in] Mapper - The instruction mapper for basic correctness checks.
908 /// \param [in] InstrList - The vector that holds the instruction data.
909 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
910 /// \param [out] CandsForRepSubstring - The vector to store the generated
911 /// IRSimilarityCandidates.
912 static void createCandidatesFromSuffixTree(
913     const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
914     std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
915     std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
916 
917   unsigned StringLen = RS.Length;
918   if (StringLen < 2)
919     return;
920 
921   // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
922   for (const unsigned &StartIdx : RS.StartIndices) {
923     unsigned EndIdx = StartIdx + StringLen - 1;
924 
925     // Check that this subsequence does not contain an illegal instruction.
926     bool ContainsIllegal = false;
927     for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
928       unsigned Key = IntegerMapping[CurrIdx];
929       if (Key > Mapper.IllegalInstrNumber) {
930         ContainsIllegal = true;
931         break;
932       }
933     }
934 
935     // If we have an illegal instruction, we should not create an
936     // IRSimilarityCandidate for this region.
937     if (ContainsIllegal)
938       continue;
939 
940     // We are getting iterators to the instructions in this region of code
941     // by advancing the start and end indices from the start of the
942     // InstrList.
943     std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
944     std::advance(StartIt, StartIdx);
945     std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
946     std::advance(EndIt, EndIdx);
947 
948     CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
949   }
950 }
951 
952 void IRSimilarityCandidate::createCanonicalRelationFrom(
953     IRSimilarityCandidate &SourceCand,
954     DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
955     DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
956   assert(SourceCand.CanonNumToNumber.size() != 0 &&
957          "Base canonical relationship is empty!");
958   assert(SourceCand.NumberToCanonNum.size() != 0 &&
959          "Base canonical relationship is empty!");
960 
961   assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
962   assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
963 
964   DenseSet<unsigned> UsedGVNs;
965   // Iterate over the mappings provided from this candidate to SourceCand.  We
966   // are then able to map the GVN in this candidate to the same canonical number
967   // given to the corresponding GVN in SourceCand.
968   for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
969     unsigned SourceGVN = GVNMapping.first;
970 
971     assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
972 
973     unsigned ResultGVN;
974     // We need special handling if we have more than one potential value.  This
975     // means that there are at least two GVNs that could correspond to this GVN.
976     // This could lead to potential swapping later on, so we make a decision
977     // here to ensure a one-to-one mapping.
978     if (GVNMapping.second.size() > 1) {
979       bool Found = false;
980       for (unsigned Val : GVNMapping.second) {
981         // We make sure the target value number hasn't already been reserved.
982         if (UsedGVNs.contains(Val))
983           continue;
984 
985         // We make sure that the opposite mapping is still consistent.
986         DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
987             FromSourceMapping.find(Val);
988 
989         if (!It->second.contains(SourceGVN))
990           continue;
991 
992         // We pick the first item that satisfies these conditions.
993         Found = true;
994         ResultGVN = Val;
995         break;
996       }
997 
998       assert(Found && "Could not find matching value for source GVN");
999       (void)Found;
1000 
1001     } else
1002       ResultGVN = *GVNMapping.second.begin();
1003 
1004     // Whatever GVN is found, we mark it as used.
1005     UsedGVNs.insert(ResultGVN);
1006 
1007     unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1008     CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1009     NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1010   }
1011 }
1012 
1013 void IRSimilarityCandidate::createCanonicalMappingFor(
1014     IRSimilarityCandidate &CurrCand) {
1015   assert(CurrCand.CanonNumToNumber.size() == 0 &&
1016          "Canonical Relationship is non-empty");
1017   assert(CurrCand.NumberToCanonNum.size() == 0 &&
1018          "Canonical Relationship is non-empty");
1019 
1020   unsigned CanonNum = 0;
1021   // Iterate over the value numbers found, the order does not matter in this
1022   // case.
1023   for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1024     CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1025     CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1026     CanonNum++;
1027   }
1028 }
1029 
1030 /// From the list of IRSimilarityCandidates, perform a comparison between each
1031 /// IRSimilarityCandidate to determine if there are overlapping
1032 /// IRInstructionData, or if they do not have the same structure.
1033 ///
1034 /// \param [in] CandsForRepSubstring - The vector containing the
1035 /// IRSimilarityCandidates.
1036 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1037 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1038 /// vector are structurally similar to one another.
1039 static void findCandidateStructures(
1040     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1041     DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
1042   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1043       InnerCandEndIt;
1044 
1045   // IRSimilarityCandidates each have a structure for operand use.  It is
1046   // possible that two instances of the same subsequences have different
1047   // structure. Each type of structure found is assigned a number.  This
1048   // DenseMap maps an IRSimilarityCandidate to which type of similarity
1049   // discovered it fits within.
1050   DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1051 
1052   // Find the compatibility from each candidate to the others to determine
1053   // which candidates overlap and which have the same structure by mapping
1054   // each structure to a different group.
1055   bool SameStructure;
1056   bool Inserted;
1057   unsigned CurrentGroupNum = 0;
1058   unsigned OuterGroupNum;
1059   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1060   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1061   DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1062 
1063   // Iterate over the candidates to determine its structural and overlapping
1064   // compatibility with other instructions
1065   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1066   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1067   for (CandIt = CandsForRepSubstring.begin(),
1068       CandEndIt = CandsForRepSubstring.end();
1069        CandIt != CandEndIt; CandIt++) {
1070 
1071     // Determine if it has an assigned structural group already.
1072     CandToGroupIt = CandToGroup.find(&*CandIt);
1073     if (CandToGroupIt == CandToGroup.end()) {
1074       // If not, we assign it one, and add it to our mapping.
1075       std::tie(CandToGroupIt, Inserted) =
1076           CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1077     }
1078 
1079     // Get the structural group number from the iterator.
1080     OuterGroupNum = CandToGroupIt->second;
1081 
1082     // Check if we already have a list of IRSimilarityCandidates for the current
1083     // structural group.  Create one if one does not exist.
1084     CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1085     if (CurrentGroupPair == StructuralGroups.end()) {
1086       IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1087       std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1088           std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1089     }
1090 
1091     // Iterate over the IRSimilarityCandidates following the current
1092     // IRSimilarityCandidate in the list to determine whether the two
1093     // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs
1094     // of IRSimilarityCandidates.
1095     for (InnerCandIt = std::next(CandIt),
1096         InnerCandEndIt = CandsForRepSubstring.end();
1097          InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1098 
1099       // We check if the inner item has a group already, if it does, we skip it.
1100       CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1101       if (CandToGroupItInner != CandToGroup.end())
1102         continue;
1103 
1104       // Otherwise we determine if they have the same structure and add it to
1105       // vector if they match.
1106       ValueNumberMappingA.clear();
1107       ValueNumberMappingB.clear();
1108       SameStructure = IRSimilarityCandidate::compareStructure(
1109           *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1110       if (!SameStructure)
1111         continue;
1112 
1113       InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1114                                                ValueNumberMappingB);
1115       CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1116       CurrentGroupPair->second.push_back(*InnerCandIt);
1117     }
1118   }
1119 }
1120 
1121 void IRSimilarityIdentifier::findCandidates(
1122     std::vector<IRInstructionData *> &InstrList,
1123     std::vector<unsigned> &IntegerMapping) {
1124   SuffixTree ST(IntegerMapping);
1125 
1126   std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1127   std::vector<SimilarityGroup> NewCandidateGroups;
1128 
1129   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1130 
1131   // Iterate over the subsequences found by the Suffix Tree to create
1132   // IRSimilarityCandidates for each repeated subsequence and determine which
1133   // instances are structurally similar to one another.
1134   for (SuffixTree::RepeatedSubstring &RS : ST) {
1135     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1136                                    CandsForRepSubstring);
1137 
1138     if (CandsForRepSubstring.size() < 2)
1139       continue;
1140 
1141     findCandidateStructures(CandsForRepSubstring, StructuralGroups);
1142     for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
1143       // We only add the group if it contains more than one
1144       // IRSimilarityCandidate.  If there is only one, that means there is no
1145       // other repeated subsequence with the same structure.
1146       if (Group.second.size() > 1)
1147         SimilarityCandidates->push_back(Group.second);
1148 
1149     CandsForRepSubstring.clear();
1150     StructuralGroups.clear();
1151     NewCandidateGroups.clear();
1152   }
1153 }
1154 
1155 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1156     ArrayRef<std::unique_ptr<Module>> Modules) {
1157   resetSimilarityCandidates();
1158 
1159   std::vector<IRInstructionData *> InstrList;
1160   std::vector<unsigned> IntegerMapping;
1161   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1162   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1163   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1164   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1165 
1166   populateMapper(Modules, InstrList, IntegerMapping);
1167   findCandidates(InstrList, IntegerMapping);
1168 
1169   return SimilarityCandidates.getValue();
1170 }
1171 
1172 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1173   resetSimilarityCandidates();
1174   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1175   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1176   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1177   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1178 
1179   std::vector<IRInstructionData *> InstrList;
1180   std::vector<unsigned> IntegerMapping;
1181 
1182   populateMapper(M, InstrList, IntegerMapping);
1183   findCandidates(InstrList, IntegerMapping);
1184 
1185   return SimilarityCandidates.getValue();
1186 }
1187 
1188 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1189                 "ir-similarity-identifier", false, true)
1190 
1191 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1192     : ModulePass(ID) {
1193   initializeIRSimilarityIdentifierWrapperPassPass(
1194       *PassRegistry::getPassRegistry());
1195 }
1196 
1197 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1198   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1199                                         MatchCallsByName, !DisableIntrinsics));
1200   return false;
1201 }
1202 
1203 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1204   IRSI.reset();
1205   return false;
1206 }
1207 
1208 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1209   IRSI->findSimilarity(M);
1210   return false;
1211 }
1212 
1213 AnalysisKey IRSimilarityAnalysis::Key;
1214 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1215                                                  ModuleAnalysisManager &) {
1216   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1217                                      MatchCallsByName, !DisableIntrinsics);
1218   IRSI.findSimilarity(M);
1219   return IRSI;
1220 }
1221 
1222 PreservedAnalyses
1223 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1224   IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1225   Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
1226 
1227   for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1228     OS << CandVec.size() << " candidates of length "
1229        << CandVec.begin()->getLength() << ".  Found in: \n";
1230     for (IRSimilarityCandidate &Cand : CandVec) {
1231       OS << "  Function: " << Cand.front()->Inst->getFunction()->getName().str()
1232          << ", Basic Block: ";
1233       if (Cand.front()->Inst->getParent()->getName().str() == "")
1234         OS << "(unnamed)";
1235       else
1236         OS << Cand.front()->Inst->getParent()->getName().str();
1237       OS << "\n    Start Instruction: ";
1238       Cand.frontInstruction()->print(OS);
1239       OS << "\n      End Instruction: ";
1240       Cand.backInstruction()->print(OS);
1241       OS << "\n";
1242     }
1243   }
1244 
1245   return PreservedAnalyses::all();
1246 }
1247 
1248 char IRSimilarityIdentifierWrapperPass::ID = 0;
1249