xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // \file
10  // Implementation file for the IRSimilarityIdentifier for identifying
11  // similarities in IR including the IRInstructionMapper.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "llvm/Analysis/IRSimilarityIdentifier.h"
16  #include "llvm/ADT/DenseMap.h"
17  #include "llvm/ADT/SetOperations.h"
18  #include "llvm/IR/Intrinsics.h"
19  #include "llvm/IR/Operator.h"
20  #include "llvm/IR/User.h"
21  #include "llvm/InitializePasses.h"
22  #include "llvm/Support/SuffixTree.h"
23  
24  using namespace llvm;
25  using namespace IRSimilarity;
26  
27  namespace llvm {
28  cl::opt<bool>
29      DisableBranches("no-ir-sim-branch-matching", cl::init(false),
30                      cl::ReallyHidden,
31                      cl::desc("disable similarity matching, and outlining, "
32                               "across branches for debugging purposes."));
33  
34  cl::opt<bool>
35      DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
36                           cl::ReallyHidden,
37                           cl::desc("disable outlining indirect calls."));
38  
39  cl::opt<bool>
40      MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41                       cl::desc("only allow matching call instructions if the "
42                                "name and type signature match."));
43  
44  cl::opt<bool>
45      DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46                        cl::desc("Don't match or outline intrinsics"));
47  } // namespace llvm
48  
IRInstructionData(Instruction & I,bool Legality,IRInstructionDataList & IDList)49  IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
50                                       IRInstructionDataList &IDList)
51      : Inst(&I), Legal(Legality), IDL(&IDList) {
52    initializeInstruction();
53  }
54  
initializeInstruction()55  void IRInstructionData::initializeInstruction() {
56    // We check for whether we have a comparison instruction.  If it is, we
57    // find the "less than" version of the predicate for consistency for
58    // comparison instructions throught the program.
59    if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
60      CmpInst::Predicate Predicate = predicateForConsistency(C);
61      if (Predicate != C->getPredicate())
62        RevisedPredicate = Predicate;
63    }
64  
65    // Here we collect the operands and their types for determining whether
66    // the structure of the operand use matches between two different candidates.
67    for (Use &OI : Inst->operands()) {
68      if (isa<CmpInst>(Inst) && RevisedPredicate) {
69        // If we have a CmpInst where the predicate is reversed, it means the
70        // operands must be reversed as well.
71        OperVals.insert(OperVals.begin(), OI.get());
72        continue;
73      }
74  
75      OperVals.push_back(OI.get());
76    }
77  
78    // We capture the incoming BasicBlocks as values as well as the incoming
79    // Values in order to check for structural similarity.
80    if (PHINode *PN = dyn_cast<PHINode>(Inst))
81      for (BasicBlock *BB : PN->blocks())
82        OperVals.push_back(BB);
83  }
84  
IRInstructionData(IRInstructionDataList & IDList)85  IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
86      : IDL(&IDList) {}
87  
setBranchSuccessors(DenseMap<BasicBlock *,unsigned> & BasicBlockToInteger)88  void IRInstructionData::setBranchSuccessors(
89      DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
90    assert(isa<BranchInst>(Inst) && "Instruction must be branch");
91  
92    BranchInst *BI = cast<BranchInst>(Inst);
93    DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
94  
95    BBNumIt = BasicBlockToInteger.find(BI->getParent());
96    assert(BBNumIt != BasicBlockToInteger.end() &&
97           "Could not find location for BasicBlock!");
98  
99    int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
100  
101    for (Value *V : getBlockOperVals()) {
102      BasicBlock *Successor = cast<BasicBlock>(V);
103      BBNumIt = BasicBlockToInteger.find(Successor);
104      assert(BBNumIt != BasicBlockToInteger.end() &&
105             "Could not find number for BasicBlock!");
106      int OtherBlockNumber = static_cast<int>(BBNumIt->second);
107  
108      int Relative = OtherBlockNumber - CurrentBlockNumber;
109      RelativeBlockLocations.push_back(Relative);
110    }
111  }
112  
getBlockOperVals()113  ArrayRef<Value *> IRInstructionData::getBlockOperVals() {
114    assert((isa<BranchInst>(Inst) ||
115           isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
116  
117    if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
118      return ArrayRef<Value *>(
119        std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
120        OperVals.end()
121      );
122  
123    if (PHINode *PN = dyn_cast<PHINode>(Inst))
124      return ArrayRef<Value *>(
125        std::next(OperVals.begin(), PN->getNumIncomingValues()),
126        OperVals.end()
127      );
128  
129    return ArrayRef<Value *>();
130  }
131  
setCalleeName(bool MatchByName)132  void IRInstructionData::setCalleeName(bool MatchByName) {
133    CallInst *CI = dyn_cast<CallInst>(Inst);
134    assert(CI && "Instruction must be call");
135  
136    CalleeName = "";
137    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
138      // To hash intrinsics, we use the opcode, and types like the other
139      // instructions, but also, the Intrinsic ID, and the Name of the
140      // intrinsic.
141      Intrinsic::ID IntrinsicID = II->getIntrinsicID();
142      FunctionType *FT = II->getFunctionType();
143      // If there is an overloaded name, we have to use the complex version
144      // of getName to get the entire string.
145      if (Intrinsic::isOverloaded(IntrinsicID))
146        CalleeName =
147            Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
148      // If there is not an overloaded name, we only need to use this version.
149      else
150        CalleeName = Intrinsic::getName(IntrinsicID).str();
151  
152      return;
153    }
154  
155    if (!CI->isIndirectCall() && MatchByName)
156      CalleeName = CI->getCalledFunction()->getName().str();
157  }
158  
setPHIPredecessors(DenseMap<BasicBlock *,unsigned> & BasicBlockToInteger)159  void IRInstructionData::setPHIPredecessors(
160      DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
161    assert(isa<PHINode>(Inst) && "Instruction must be phi node");
162  
163    PHINode *PN = cast<PHINode>(Inst);
164    DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
165  
166    BBNumIt = BasicBlockToInteger.find(PN->getParent());
167    assert(BBNumIt != BasicBlockToInteger.end() &&
168           "Could not find location for BasicBlock!");
169  
170    int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
171  
172    // Convert the incoming blocks of the PHINode to an integer value, based on
173    // the relative distances between the current block and the incoming block.
174    for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
175      BasicBlock *Incoming = PN->getIncomingBlock(Idx);
176      BBNumIt = BasicBlockToInteger.find(Incoming);
177      assert(BBNumIt != BasicBlockToInteger.end() &&
178             "Could not find number for BasicBlock!");
179      int OtherBlockNumber = static_cast<int>(BBNumIt->second);
180  
181      int Relative = OtherBlockNumber - CurrentBlockNumber;
182      RelativeBlockLocations.push_back(Relative);
183    }
184  }
185  
predicateForConsistency(CmpInst * CI)186  CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
187    switch (CI->getPredicate()) {
188    case CmpInst::FCMP_OGT:
189    case CmpInst::FCMP_UGT:
190    case CmpInst::FCMP_OGE:
191    case CmpInst::FCMP_UGE:
192    case CmpInst::ICMP_SGT:
193    case CmpInst::ICMP_UGT:
194    case CmpInst::ICMP_SGE:
195    case CmpInst::ICMP_UGE:
196      return CI->getSwappedPredicate();
197    default:
198      return CI->getPredicate();
199    }
200  }
201  
getPredicate() const202  CmpInst::Predicate IRInstructionData::getPredicate() const {
203    assert(isa<CmpInst>(Inst) &&
204           "Can only get a predicate from a compare instruction");
205  
206    if (RevisedPredicate)
207      return *RevisedPredicate;
208  
209    return cast<CmpInst>(Inst)->getPredicate();
210  }
211  
getCalleeName() const212  StringRef IRInstructionData::getCalleeName() const {
213    assert(isa<CallInst>(Inst) &&
214           "Can only get a name from a call instruction");
215  
216    assert(CalleeName && "CalleeName has not been set");
217  
218    return *CalleeName;
219  }
220  
isClose(const IRInstructionData & A,const IRInstructionData & B)221  bool IRSimilarity::isClose(const IRInstructionData &A,
222                             const IRInstructionData &B) {
223  
224    if (!A.Legal || !B.Legal)
225      return false;
226  
227    // Check if we are performing the same sort of operation on the same types
228    // but not on the same values.
229    if (!A.Inst->isSameOperationAs(B.Inst)) {
230      // If there is a predicate, this means that either there is a swapped
231      // predicate, or that the types are different, we want to make sure that
232      // the predicates are equivalent via swapping.
233      if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
234  
235        if (A.getPredicate() != B.getPredicate())
236          return false;
237  
238        // If the predicates are the same via swap, make sure that the types are
239        // still the same.
240        auto ZippedTypes = zip(A.OperVals, B.OperVals);
241  
242        return all_of(
243            ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244              return std::get<0>(R)->getType() == std::get<1>(R)->getType();
245            });
246      }
247  
248      return false;
249    }
250  
251    // Since any GEP Instruction operands after the first operand cannot be
252    // defined by a register, we must make sure that the operands after the first
253    // are the same in the two instructions
254    if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
255      auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
256  
257      // If the instructions do not have the same inbounds restrictions, we do
258      // not consider them the same.
259      if (GEP->isInBounds() != OtherGEP->isInBounds())
260        return false;
261  
262      auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
263  
264      // We increment here since we do not care about the first instruction,
265      // we only care about the following operands since they must be the
266      // exact same to be considered similar.
267      return all_of(drop_begin(ZippedOperands),
268                    [](std::tuple<llvm::Use &, llvm::Use &> R) {
269                      return std::get<0>(R) == std::get<1>(R);
270                    });
271    }
272  
273    // If the instructions are functions calls, we make sure that the function
274    // name is the same.  We already know that the types are since is
275    // isSameOperationAs is true.
276    if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
277      if (A.getCalleeName() != B.getCalleeName())
278        return false;
279    }
280  
281    if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
282        A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
283      return false;
284  
285    return true;
286  }
287  
288  // TODO: This is the same as the MachineOutliner, and should be consolidated
289  // into the same interface.
convertToUnsignedVec(BasicBlock & BB,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)290  void IRInstructionMapper::convertToUnsignedVec(
291      BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292      std::vector<unsigned> &IntegerMapping) {
293    BasicBlock::iterator It = BB.begin();
294  
295    std::vector<unsigned> IntegerMappingForBB;
296    std::vector<IRInstructionData *> InstrListForBB;
297  
298    for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
299      switch (InstClassifier.visit(*It)) {
300      case InstrType::Legal:
301        mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
302        break;
303      case InstrType::Illegal:
304        mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
305        break;
306      case InstrType::Invisible:
307        AddedIllegalLastTime = false;
308        break;
309      }
310    }
311  
312    if (AddedIllegalLastTime)
313      mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
314    for (IRInstructionData *ID : InstrListForBB)
315      this->IDL->push_back(*ID);
316    llvm::append_range(InstrList, InstrListForBB);
317    llvm::append_range(IntegerMapping, IntegerMappingForBB);
318  }
319  
320  // TODO: This is the same as the MachineOutliner, and should be consolidated
321  // into the same interface.
mapToLegalUnsigned(BasicBlock::iterator & It,std::vector<unsigned> & IntegerMappingForBB,std::vector<IRInstructionData * > & InstrListForBB)322  unsigned IRInstructionMapper::mapToLegalUnsigned(
323      BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
324      std::vector<IRInstructionData *> &InstrListForBB) {
325    // We added something legal, so we should unset the AddedLegalLastTime
326    // flag.
327    AddedIllegalLastTime = false;
328  
329    // If we have at least two adjacent legal instructions (which may have
330    // invisible instructions in between), remember that.
331    if (CanCombineWithPrevInstr)
332      HaveLegalRange = true;
333    CanCombineWithPrevInstr = true;
334  
335    // Get the integer for this instruction or give it the current
336    // LegalInstrNumber.
337    IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
338    InstrListForBB.push_back(ID);
339  
340    if (isa<BranchInst>(*It))
341      ID->setBranchSuccessors(BasicBlockToInteger);
342  
343    if (isa<CallInst>(*It))
344      ID->setCalleeName(EnableMatchCallsByName);
345  
346    if (isa<PHINode>(*It))
347      ID->setPHIPredecessors(BasicBlockToInteger);
348  
349    // Add to the instruction list
350    bool WasInserted;
351    DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
352        ResultIt;
353    std::tie(ResultIt, WasInserted) =
354        InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
355    unsigned INumber = ResultIt->second;
356  
357    // There was an insertion.
358    if (WasInserted)
359      LegalInstrNumber++;
360  
361    IntegerMappingForBB.push_back(INumber);
362  
363    // Make sure we don't overflow or use any integers reserved by the DenseMap.
364    assert(LegalInstrNumber < IllegalInstrNumber &&
365           "Instruction mapping overflow!");
366  
367    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
368           "Tried to assign DenseMap tombstone or empty key to instruction.");
369    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
370           "Tried to assign DenseMap tombstone or empty key to instruction.");
371  
372    return INumber;
373  }
374  
375  IRInstructionData *
allocateIRInstructionData(Instruction & I,bool Legality,IRInstructionDataList & IDL)376  IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
377                                                 IRInstructionDataList &IDL) {
378    return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
379  }
380  
381  IRInstructionData *
allocateIRInstructionData(IRInstructionDataList & IDL)382  IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
383    return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
384  }
385  
386  IRInstructionDataList *
allocateIRInstructionDataList()387  IRInstructionMapper::allocateIRInstructionDataList() {
388    return new (IDLAllocator->Allocate()) IRInstructionDataList();
389  }
390  
391  // TODO: This is the same as the MachineOutliner, and should be consolidated
392  // into the same interface.
mapToIllegalUnsigned(BasicBlock::iterator & It,std::vector<unsigned> & IntegerMappingForBB,std::vector<IRInstructionData * > & InstrListForBB,bool End)393  unsigned IRInstructionMapper::mapToIllegalUnsigned(
394      BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
395      std::vector<IRInstructionData *> &InstrListForBB, bool End) {
396    // Can't combine an illegal instruction. Set the flag.
397    CanCombineWithPrevInstr = false;
398  
399    // Only add one illegal number per range of legal numbers.
400    if (AddedIllegalLastTime)
401      return IllegalInstrNumber;
402  
403    IRInstructionData *ID = nullptr;
404    if (!End)
405      ID = allocateIRInstructionData(*It, false, *IDL);
406    else
407      ID = allocateIRInstructionData(*IDL);
408    InstrListForBB.push_back(ID);
409  
410    // Remember that we added an illegal number last time.
411    AddedIllegalLastTime = true;
412    unsigned INumber = IllegalInstrNumber;
413    IntegerMappingForBB.push_back(IllegalInstrNumber--);
414  
415    assert(LegalInstrNumber < IllegalInstrNumber &&
416           "Instruction mapping overflow!");
417  
418    assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
419           "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
420  
421    assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
422           "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
423  
424    return INumber;
425  }
426  
IRSimilarityCandidate(unsigned StartIdx,unsigned Len,IRInstructionData * FirstInstIt,IRInstructionData * LastInstIt)427  IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
428                                               IRInstructionData *FirstInstIt,
429                                               IRInstructionData *LastInstIt)
430      : StartIdx(StartIdx), Len(Len) {
431  
432    assert(FirstInstIt != nullptr && "Instruction is nullptr!");
433    assert(LastInstIt != nullptr && "Instruction is nullptr!");
434    assert(StartIdx + Len > StartIdx &&
435           "Overflow for IRSimilarityCandidate range?");
436    assert(Len - 1 == static_cast<unsigned>(std::distance(
437                          iterator(FirstInstIt), iterator(LastInstIt))) &&
438           "Length of the first and last IRInstructionData do not match the "
439           "given length");
440  
441    // We iterate over the given instructions, and map each unique value
442    // to a unique number in the IRSimilarityCandidate ValueToNumber and
443    // NumberToValue maps.  A constant get its own value globally, the individual
444    // uses of the constants are not considered to be unique.
445    //
446    // IR:                    Mapping Added:
447    // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
448    // %add2 = add i32 %a, %1    %add2 -> 4
449    // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
450    //
451    // when replace with global values, starting from 1, would be
452    //
453    // 3 = add i32 1, 2
454    // 4 = add i32 1, 3
455    // 6 = add i32 5, 2
456    unsigned LocalValNumber = 1;
457    IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
458    for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
459      // Map the operand values to an unsigned integer if it does not already
460      // have an unsigned integer assigned to it.
461      for (Value *Arg : ID->OperVals)
462        if (!ValueToNumber.contains(Arg)) {
463          ValueToNumber.try_emplace(Arg, LocalValNumber);
464          NumberToValue.try_emplace(LocalValNumber, Arg);
465          LocalValNumber++;
466        }
467  
468      // Mapping the instructions to an unsigned integer if it is not already
469      // exist in the mapping.
470      if (!ValueToNumber.contains(ID->Inst)) {
471        ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
472        NumberToValue.try_emplace(LocalValNumber, ID->Inst);
473        LocalValNumber++;
474      }
475    }
476  
477    // Setting the first and last instruction data pointers for the candidate.  If
478    // we got through the entire for loop without hitting an assert, we know
479    // that both of these instructions are not nullptrs.
480    FirstInst = FirstInstIt;
481    LastInst = LastInstIt;
482  
483    // Add the basic blocks contained in the set into the global value numbering.
484    DenseSet<BasicBlock *> BBSet;
485    getBasicBlocks(BBSet);
486    for (BasicBlock *BB : BBSet) {
487      if (ValueToNumber.contains(BB))
488        continue;
489  
490      ValueToNumber.try_emplace(BB, LocalValNumber);
491      NumberToValue.try_emplace(LocalValNumber, BB);
492      LocalValNumber++;
493    }
494  }
495  
isSimilar(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)496  bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
497                                        const IRSimilarityCandidate &B) {
498    if (A.getLength() != B.getLength())
499      return false;
500  
501    auto InstrDataForBoth =
502        zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
503  
504    return all_of(InstrDataForBoth,
505                  [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
506                    IRInstructionData &A = std::get<0>(R);
507                    IRInstructionData &B = std::get<1>(R);
508                    if (!A.Legal || !B.Legal)
509                      return false;
510                    return isClose(A, B);
511                  });
512  }
513  
514  /// Determine if one or more of the assigned global value numbers for the
515  /// operands in \p TargetValueNumbers is in the current mapping set for operand
516  /// numbers in \p SourceOperands.  The set of possible corresponding global
517  /// value numbers are replaced with the most recent version of compatible
518  /// values.
519  ///
520  /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
521  /// value number for the source IRInstructionCandidate.
522  /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
523  /// IRSimilarityCandidate global value numbers to a set of possible numbers in
524  /// the target.
525  /// \param [in] SourceOperands - The operands in the original
526  /// IRSimilarityCandidate in the current instruction.
527  /// \param [in] TargetValueNumbers - The global value numbers of the operands in
528  /// the corresponding Instruction in the other IRSimilarityCandidate.
529  /// \returns true if there exists a possible mapping between the source
530  /// Instruction operands and the target Instruction operands, and false if not.
checkNumberingAndReplaceCommutative(const DenseMap<Value *,unsigned> & SourceValueToNumberMapping,DenseMap<unsigned,DenseSet<unsigned>> & CurrentSrcTgtNumberMapping,ArrayRef<Value * > & SourceOperands,DenseSet<unsigned> & TargetValueNumbers)531  static bool checkNumberingAndReplaceCommutative(
532    const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
533    DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
534    ArrayRef<Value *> &SourceOperands,
535    DenseSet<unsigned> &TargetValueNumbers){
536  
537    DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
538  
539    unsigned ArgVal;
540    bool WasInserted;
541  
542    // Iterate over the operands in the source IRSimilarityCandidate to determine
543    // whether there exists an operand in the other IRSimilarityCandidate that
544    // creates a valid mapping of Value to Value between the
545    // IRSimilarityCaniddates.
546    for (Value *V : SourceOperands) {
547      ArgVal = SourceValueToNumberMapping.find(V)->second;
548  
549      // Instead of finding a current mapping, we attempt to insert a set.
550      std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
551          std::make_pair(ArgVal, TargetValueNumbers));
552  
553      // We need to iterate over the items in other IRSimilarityCandidate's
554      // Instruction to determine whether there is a valid mapping of
555      // Value to Value.
556      DenseSet<unsigned> NewSet;
557      for (unsigned &Curr : ValueMappingIt->second)
558        // If we can find the value in the mapping, we add it to the new set.
559        if (TargetValueNumbers.contains(Curr))
560          NewSet.insert(Curr);
561  
562      // If we could not find a Value, return 0.
563      if (NewSet.empty())
564        return false;
565  
566      // Otherwise replace the old mapping with the newly constructed one.
567      if (NewSet.size() != ValueMappingIt->second.size())
568        ValueMappingIt->second.swap(NewSet);
569  
570      // We have reached no conclusions about the mapping, and cannot remove
571      // any items from the other operands, so we move to check the next operand.
572      if (ValueMappingIt->second.size() != 1)
573        continue;
574  
575      unsigned ValToRemove = *ValueMappingIt->second.begin();
576      // When there is only one item left in the mapping for and operand, remove
577      // the value from the other operands.  If it results in there being no
578      // mapping, return false, it means the mapping is wrong
579      for (Value *InnerV : SourceOperands) {
580        if (V == InnerV)
581          continue;
582  
583        unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
584        ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
585        if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
586          continue;
587  
588        ValueMappingIt->second.erase(ValToRemove);
589        if (ValueMappingIt->second.empty())
590          return false;
591      }
592    }
593  
594    return true;
595  }
596  
597  /// Determine if operand number \p TargetArgVal is in the current mapping set
598  /// for operand number \p SourceArgVal.
599  ///
600  /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
601  /// value numbers from source IRSimilarityCandidate to target
602  /// IRSimilarityCandidate.
603  /// \param [in] SourceArgVal The global value number for an operand in the
604  /// in the original candidate.
605  /// \param [in] TargetArgVal The global value number for the corresponding
606  /// operand in the other candidate.
607  /// \returns True if there exists a mapping and false if not.
checkNumberingAndReplace(DenseMap<unsigned,DenseSet<unsigned>> & CurrentSrcTgtNumberMapping,unsigned SourceArgVal,unsigned TargetArgVal)608  bool checkNumberingAndReplace(
609      DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
610      unsigned SourceArgVal, unsigned TargetArgVal) {
611    // We are given two unsigned integers representing the global values of
612    // the operands in different IRSimilarityCandidates and a current mapping
613    // between the two.
614    //
615    // Source Operand GVN: 1
616    // Target Operand GVN: 2
617    // CurrentMapping: {1: {1, 2}}
618    //
619    // Since we have mapping, and the target operand is contained in the set, we
620    // update it to:
621    // CurrentMapping: {1: {2}}
622    // and can return true. But, if the mapping was
623    // CurrentMapping: {1: {3}}
624    // we would return false.
625  
626    bool WasInserted;
627    DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
628  
629    std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
630        std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
631  
632    // If we created a new mapping, then we are done.
633    if (WasInserted)
634      return true;
635  
636    // If there is more than one option in the mapping set, and the target value
637    // is included in the mapping set replace that set with one that only includes
638    // the target value, as it is the only valid mapping via the non commutative
639    // instruction.
640  
641    DenseSet<unsigned> &TargetSet = Val->second;
642    if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
643      TargetSet.clear();
644      TargetSet.insert(TargetArgVal);
645      return true;
646    }
647  
648    // Return true if we can find the value in the set.
649    return TargetSet.contains(TargetArgVal);
650  }
651  
compareNonCommutativeOperandMapping(OperandMapping A,OperandMapping B)652  bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
653      OperandMapping A, OperandMapping B) {
654    // Iterators to keep track of where we are in the operands for each
655    // Instruction.
656    ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
657    ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
658    unsigned OperandLength = A.OperVals.size();
659  
660    // For each operand, get the value numbering and ensure it is consistent.
661    for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
662      unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
663      unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
664  
665      // Attempt to add a set with only the target value.  If there is no mapping
666      // we can create it here.
667      //
668      // For an instruction like a subtraction:
669      // IRSimilarityCandidateA:  IRSimilarityCandidateB:
670      // %resultA = sub %a, %b    %resultB = sub %d, %e
671      //
672      // We map %a -> %d and %b -> %e.
673      //
674      // And check to see whether their mapping is consistent in
675      // checkNumberingAndReplace.
676  
677      if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
678        return false;
679  
680      if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
681        return false;
682    }
683    return true;
684  }
685  
compareCommutativeOperandMapping(OperandMapping A,OperandMapping B)686  bool IRSimilarityCandidate::compareCommutativeOperandMapping(
687      OperandMapping A, OperandMapping B) {
688    DenseSet<unsigned> ValueNumbersA;
689    DenseSet<unsigned> ValueNumbersB;
690  
691    ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
692    ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
693    unsigned OperandLength = A.OperVals.size();
694  
695    // Find the value number sets for the operands.
696    for (unsigned Idx = 0; Idx < OperandLength;
697         Idx++, VItA++, VItB++) {
698      ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
699      ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
700    }
701  
702    // Iterate over the operands in the first IRSimilarityCandidate and make sure
703    // there exists a possible mapping with the operands in the second
704    // IRSimilarityCandidate.
705    if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
706                                             A.ValueNumberMapping, A.OperVals,
707                                             ValueNumbersB))
708      return false;
709  
710    // Iterate over the operands in the second IRSimilarityCandidate and make sure
711    // there exists a possible mapping with the operands in the first
712    // IRSimilarityCandidate.
713    if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
714                                             B.ValueNumberMapping, B.OperVals,
715                                             ValueNumbersA))
716      return false;
717  
718    return true;
719  }
720  
compareAssignmentMapping(const unsigned InstValA,const unsigned & InstValB,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingA,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingB)721  bool IRSimilarityCandidate::compareAssignmentMapping(
722      const unsigned InstValA, const unsigned &InstValB,
723      DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
724      DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
725    DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
726    bool WasInserted;
727    std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
728        std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
729    if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
730      return false;
731    else if (ValueMappingIt->second.size() != 1) {
732      for (unsigned OtherVal : ValueMappingIt->second) {
733        if (OtherVal == InstValB)
734          continue;
735        if (!ValueNumberMappingA.contains(OtherVal))
736          continue;
737        if (!ValueNumberMappingA[OtherVal].contains(InstValA))
738          continue;
739        ValueNumberMappingA[OtherVal].erase(InstValA);
740      }
741      ValueNumberMappingA.erase(ValueMappingIt);
742      std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
743        std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
744    }
745  
746    return true;
747  }
748  
checkRelativeLocations(RelativeLocMapping A,RelativeLocMapping B)749  bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
750                                                     RelativeLocMapping B) {
751    // Get the basic blocks the label refers to.
752    BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
753    BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
754  
755    // Get the basic blocks contained in each region.
756    DenseSet<BasicBlock *> BasicBlockA;
757    DenseSet<BasicBlock *> BasicBlockB;
758    A.IRSC.getBasicBlocks(BasicBlockA);
759    B.IRSC.getBasicBlocks(BasicBlockB);
760  
761    // Determine if the block is contained in the region.
762    bool AContained = BasicBlockA.contains(ABB);
763    bool BContained = BasicBlockB.contains(BBB);
764  
765    // Both blocks need to be contained in the region, or both need to be outside
766    // the region.
767    if (AContained != BContained)
768      return false;
769  
770    // If both are contained, then we need to make sure that the relative
771    // distance to the target blocks are the same.
772    if (AContained)
773      return A.RelativeLocation == B.RelativeLocation;
774    return true;
775  }
776  
compareStructure(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)777  bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
778                                               const IRSimilarityCandidate &B) {
779    DenseMap<unsigned, DenseSet<unsigned>> MappingA;
780    DenseMap<unsigned, DenseSet<unsigned>> MappingB;
781    return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
782  }
783  
784  typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
785                        SmallVector<int, 4> &, ArrayRef<Value *> &,
786                        ArrayRef<Value *> &>
787      ZippedRelativeLocationsT;
788  
compareStructure(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingA,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingB)789  bool IRSimilarityCandidate::compareStructure(
790      const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
791      DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
792      DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
793    if (A.getLength() != B.getLength())
794      return false;
795  
796    if (A.ValueToNumber.size() != B.ValueToNumber.size())
797      return false;
798  
799    iterator ItA = A.begin();
800    iterator ItB = B.begin();
801  
802    // These ValueNumber Mapping sets create a create a mapping between the values
803    // in one candidate to values in the other candidate.  If we create a set with
804    // one element, and that same element maps to the original element in the
805    // candidate we have a good mapping.
806  
807    // Iterate over the instructions contained in each candidate
808    unsigned SectionLength = A.getStartIdx() + A.getLength();
809    for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
810         ItA++, ItB++, Loc++) {
811      // Make sure the instructions are similar to one another.
812      if (!isClose(*ItA, *ItB))
813        return false;
814  
815      Instruction *IA = ItA->Inst;
816      Instruction *IB = ItB->Inst;
817  
818      if (!ItA->Legal || !ItB->Legal)
819        return false;
820  
821      // Get the operand sets for the instructions.
822      ArrayRef<Value *> OperValsA = ItA->OperVals;
823      ArrayRef<Value *> OperValsB = ItB->OperVals;
824  
825      unsigned InstValA = A.ValueToNumber.find(IA)->second;
826      unsigned InstValB = B.ValueToNumber.find(IB)->second;
827  
828      // Ensure that the mappings for the instructions exists.
829      if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
830                                    ValueNumberMappingB))
831        return false;
832  
833      if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
834                                    ValueNumberMappingA))
835        return false;
836  
837      // We have different paths for commutative instructions and non-commutative
838      // instructions since commutative instructions could allow multiple mappings
839      // to certain values.
840      if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
841          !isa<IntrinsicInst>(IA)) {
842        if (!compareCommutativeOperandMapping(
843                {A, OperValsA, ValueNumberMappingA},
844                {B, OperValsB, ValueNumberMappingB}))
845          return false;
846        continue;
847      }
848  
849      // Handle the non-commutative cases.
850      if (!compareNonCommutativeOperandMapping(
851              {A, OperValsA, ValueNumberMappingA},
852              {B, OperValsB, ValueNumberMappingB}))
853        return false;
854  
855      // Here we check that between two corresponding instructions,
856      // when referring to a basic block in the same region, the
857      // relative locations are the same. And, that the instructions refer to
858      // basic blocks outside the region in the same corresponding locations.
859  
860      // We are able to make the assumption about blocks outside of the region
861      // since the target block labels are considered values and will follow the
862      // same number matching that we defined for the other instructions in the
863      // region.  So, at this point, in each location we target a specific block
864      // outside the region, we are targeting a corresponding block in each
865      // analagous location in the region we are comparing to.
866      if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
867          !(isa<PHINode>(IA) && isa<PHINode>(IB)))
868        continue;
869  
870      SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
871      SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
872      ArrayRef<Value *> ABL = ItA->getBlockOperVals();
873      ArrayRef<Value *> BBL = ItB->getBlockOperVals();
874  
875      // Check to make sure that the number of operands, and branching locations
876      // between BranchInsts is the same.
877      if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
878          ABL.size() != BBL.size())
879        return false;
880  
881      assert(RelBlockLocsA.size() == ABL.size() &&
882             "Block information vectors not the same size.");
883      assert(RelBlockLocsB.size() == BBL.size() &&
884             "Block information vectors not the same size.");
885  
886      ZippedRelativeLocationsT ZippedRelativeLocations =
887          zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
888      if (any_of(ZippedRelativeLocations,
889                 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
890                   return !checkRelativeLocations(
891                       {A, std::get<0>(R), std::get<2>(R)},
892                       {B, std::get<1>(R), std::get<3>(R)});
893                 }))
894        return false;
895    }
896    return true;
897  }
898  
overlap(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)899  bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
900                                      const IRSimilarityCandidate &B) {
901    auto DoesOverlap = [](const IRSimilarityCandidate &X,
902                          const IRSimilarityCandidate &Y) {
903      // Check:
904      // XXXXXX        X starts before Y ends
905      //      YYYYYYY  Y starts after X starts
906      return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
907    };
908  
909    return DoesOverlap(A, B) || DoesOverlap(B, A);
910  }
911  
populateMapper(Module & M,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)912  void IRSimilarityIdentifier::populateMapper(
913      Module &M, std::vector<IRInstructionData *> &InstrList,
914      std::vector<unsigned> &IntegerMapping) {
915  
916    std::vector<IRInstructionData *> InstrListForModule;
917    std::vector<unsigned> IntegerMappingForModule;
918    // Iterate over the functions in the module to map each Instruction in each
919    // BasicBlock to an unsigned integer.
920    Mapper.initializeForBBs(M);
921  
922    for (Function &F : M) {
923  
924      if (F.empty())
925        continue;
926  
927      for (BasicBlock &BB : F) {
928  
929        // BB has potential to have similarity since it has a size greater than 2
930        // and can therefore match other regions greater than 2. Map it to a list
931        // of unsigned integers.
932        Mapper.convertToUnsignedVec(BB, InstrListForModule,
933                                    IntegerMappingForModule);
934      }
935  
936      BasicBlock::iterator It = F.begin()->end();
937      Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
938                                  true);
939      if (InstrListForModule.size() > 0)
940        Mapper.IDL->push_back(*InstrListForModule.back());
941    }
942  
943    // Insert the InstrListForModule at the end of the overall InstrList so that
944    // we can have a long InstrList for the entire set of Modules being analyzed.
945    llvm::append_range(InstrList, InstrListForModule);
946    // Do the same as above, but for IntegerMapping.
947    llvm::append_range(IntegerMapping, IntegerMappingForModule);
948  }
949  
populateMapper(ArrayRef<std::unique_ptr<Module>> & Modules,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)950  void IRSimilarityIdentifier::populateMapper(
951      ArrayRef<std::unique_ptr<Module>> &Modules,
952      std::vector<IRInstructionData *> &InstrList,
953      std::vector<unsigned> &IntegerMapping) {
954  
955    // Iterate over, and map the instructions in each module.
956    for (const std::unique_ptr<Module> &M : Modules)
957      populateMapper(*M, InstrList, IntegerMapping);
958  }
959  
960  /// From a repeated subsequence, find all the different instances of the
961  /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
962  /// the IRInstructionData in subsequence.
963  ///
964  /// \param [in] Mapper - The instruction mapper for basic correctness checks.
965  /// \param [in] InstrList - The vector that holds the instruction data.
966  /// \param [in] IntegerMapping - The vector that holds the mapped integers.
967  /// \param [out] CandsForRepSubstring - The vector to store the generated
968  /// IRSimilarityCandidates.
createCandidatesFromSuffixTree(const IRInstructionMapper & Mapper,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping,SuffixTree::RepeatedSubstring & RS,std::vector<IRSimilarityCandidate> & CandsForRepSubstring)969  static void createCandidatesFromSuffixTree(
970      const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
971      std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
972      std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
973  
974    unsigned StringLen = RS.Length;
975    if (StringLen < 2)
976      return;
977  
978    // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
979    for (const unsigned &StartIdx : RS.StartIndices) {
980      unsigned EndIdx = StartIdx + StringLen - 1;
981  
982      // Check that this subsequence does not contain an illegal instruction.
983      bool ContainsIllegal = false;
984      for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
985        unsigned Key = IntegerMapping[CurrIdx];
986        if (Key > Mapper.IllegalInstrNumber) {
987          ContainsIllegal = true;
988          break;
989        }
990      }
991  
992      // If we have an illegal instruction, we should not create an
993      // IRSimilarityCandidate for this region.
994      if (ContainsIllegal)
995        continue;
996  
997      // We are getting iterators to the instructions in this region of code
998      // by advancing the start and end indices from the start of the
999      // InstrList.
1000      std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
1001      std::advance(StartIt, StartIdx);
1002      std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
1003      std::advance(EndIt, EndIdx);
1004  
1005      CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1006    }
1007  }
1008  
createCanonicalRelationFrom(IRSimilarityCandidate & SourceCand,DenseMap<unsigned,DenseSet<unsigned>> & ToSourceMapping,DenseMap<unsigned,DenseSet<unsigned>> & FromSourceMapping)1009  void IRSimilarityCandidate::createCanonicalRelationFrom(
1010      IRSimilarityCandidate &SourceCand,
1011      DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1012      DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1013    assert(SourceCand.CanonNumToNumber.size() != 0 &&
1014           "Base canonical relationship is empty!");
1015    assert(SourceCand.NumberToCanonNum.size() != 0 &&
1016           "Base canonical relationship is empty!");
1017  
1018    assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1019    assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1020  
1021    DenseSet<unsigned> UsedGVNs;
1022    // Iterate over the mappings provided from this candidate to SourceCand.  We
1023    // are then able to map the GVN in this candidate to the same canonical number
1024    // given to the corresponding GVN in SourceCand.
1025    for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1026      unsigned SourceGVN = GVNMapping.first;
1027  
1028      assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1029  
1030      unsigned ResultGVN;
1031      // We need special handling if we have more than one potential value.  This
1032      // means that there are at least two GVNs that could correspond to this GVN.
1033      // This could lead to potential swapping later on, so we make a decision
1034      // here to ensure a one-to-one mapping.
1035      if (GVNMapping.second.size() > 1) {
1036        bool Found = false;
1037        for (unsigned Val : GVNMapping.second) {
1038          // We make sure the target value number hasn't already been reserved.
1039          if (UsedGVNs.contains(Val))
1040            continue;
1041  
1042          // We make sure that the opposite mapping is still consistent.
1043          DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
1044              FromSourceMapping.find(Val);
1045  
1046          if (!It->second.contains(SourceGVN))
1047            continue;
1048  
1049          // We pick the first item that satisfies these conditions.
1050          Found = true;
1051          ResultGVN = Val;
1052          break;
1053        }
1054  
1055        assert(Found && "Could not find matching value for source GVN");
1056        (void)Found;
1057  
1058      } else
1059        ResultGVN = *GVNMapping.second.begin();
1060  
1061      // Whatever GVN is found, we mark it as used.
1062      UsedGVNs.insert(ResultGVN);
1063  
1064      unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1065      CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1066      NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1067    }
1068  
1069    DenseSet<BasicBlock *> BBSet;
1070    getBasicBlocks(BBSet);
1071    // Find canonical numbers for the BasicBlocks in the current candidate.
1072    // This is done by finding the corresponding value for the first instruction
1073    // in the block in the current candidate, finding the matching value in the
1074    // source candidate.  Then by finding the parent of this value, use the
1075    // canonical number of the block in the source candidate for the canonical
1076    // number in the current candidate.
1077    for (BasicBlock *BB : BBSet) {
1078      unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1079  
1080      // We can skip the BasicBlock if the canonical numbering has already been
1081      // found in a separate instruction.
1082      if (NumberToCanonNum.contains(BBGVNForCurrCand))
1083        continue;
1084  
1085      // If the basic block is the starting block, then the shared instruction may
1086      // not be the first instruction in the block, it will be the first
1087      // instruction in the similarity region.
1088      Value *FirstOutlineInst = BB == getStartBB()
1089                                    ? frontInstruction()
1090                                    : &*BB->instructionsWithoutDebug().begin();
1091  
1092      unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1093      unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1094      unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1095      Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1096      BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1097      unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1098      unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1099      CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1100      NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1101    }
1102  }
1103  
createCanonicalRelationFrom(IRSimilarityCandidate & SourceCand,IRSimilarityCandidate & SourceCandLarge,IRSimilarityCandidate & TargetCandLarge)1104  void IRSimilarityCandidate::createCanonicalRelationFrom(
1105      IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1106      IRSimilarityCandidate &TargetCandLarge) {
1107    assert(!SourceCand.CanonNumToNumber.empty() &&
1108           "Canonical Relationship is non-empty");
1109    assert(!SourceCand.NumberToCanonNum.empty() &&
1110           "Canonical Relationship is non-empty");
1111  
1112    assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1113           "Canonical Relationship is non-empty");
1114    assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1115           "Canonical Relationship is non-empty");
1116  
1117    assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1118           "Canonical Relationship is non-empty");
1119    assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1120           "Canonical Relationship is non-empty");
1121  
1122    assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1123    assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1124  
1125    // We're going to use the larger candidates as a "bridge" to create the
1126    // canonical number for the target candidate since we have idetified two
1127    // candidates as subsequences of larger sequences, and therefore must be
1128    // structurally similar.
1129    for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1130      Value *CurrVal = ValueNumPair.first;
1131      unsigned TargetCandGVN = ValueNumPair.second;
1132  
1133      // Find the numbering in the large candidate that surrounds the
1134      // current candidate.
1135      std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1136      assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1137  
1138      // Get the canonical numbering in the large target candidate.
1139      std::optional<unsigned> OTargetCandCanon =
1140          TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1141      assert(OTargetCandCanon.has_value() &&
1142             "Canononical Number not found for GVN");
1143  
1144      // Get the GVN in the large source candidate from the canonical numbering.
1145      std::optional<unsigned> OLargeSourceGVN =
1146          SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1147      assert(OLargeSourceGVN.has_value() &&
1148             "GVN Number not found for Canonical Number");
1149  
1150      // Get the Value from the GVN in the large source candidate.
1151      std::optional<Value *> OLargeSourceV =
1152          SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1153      assert(OLargeSourceV.has_value() && "Value not found for GVN");
1154  
1155      // Get the GVN number for the Value in the source candidate.
1156      std::optional<unsigned> OSourceGVN =
1157          SourceCand.getGVN(OLargeSourceV.value());
1158      assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1159  
1160      // Get the canonical numbering from the GVN/
1161      std::optional<unsigned> OSourceCanon =
1162          SourceCand.getCanonicalNum(OSourceGVN.value());
1163      assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1164  
1165      // Insert the canonical numbering and GVN pair into their respective
1166      // mappings.
1167      CanonNumToNumber.insert(
1168          std::make_pair(OSourceCanon.value(), TargetCandGVN));
1169      NumberToCanonNum.insert(
1170          std::make_pair(TargetCandGVN, OSourceCanon.value()));
1171    }
1172  }
1173  
createCanonicalMappingFor(IRSimilarityCandidate & CurrCand)1174  void IRSimilarityCandidate::createCanonicalMappingFor(
1175      IRSimilarityCandidate &CurrCand) {
1176    assert(CurrCand.CanonNumToNumber.size() == 0 &&
1177           "Canonical Relationship is non-empty");
1178    assert(CurrCand.NumberToCanonNum.size() == 0 &&
1179           "Canonical Relationship is non-empty");
1180  
1181    unsigned CanonNum = 0;
1182    // Iterate over the value numbers found, the order does not matter in this
1183    // case.
1184    for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1185      CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1186      CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1187      CanonNum++;
1188    }
1189  }
1190  
1191  /// Look for larger IRSimilarityCandidates From the previously matched
1192  /// IRSimilarityCandidates that fully contain \p CandA or \p CandB.  If there is
1193  /// an overlap, return a pair of structurally similar, larger
1194  /// IRSimilarityCandidates.
1195  ///
1196  /// \param [in] CandA - The first candidate we are trying to determine the
1197  /// structure of.
1198  /// \param [in] CandB - The second candidate we are trying to determine the
1199  /// structure of.
1200  /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1201  /// a circuit to the IRSimilarityCandidates that include this instruction.
1202  /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1203  /// number representing the structural group assigned to it.
1204  static std::optional<
1205      std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
CheckLargerCands(IRSimilarityCandidate & CandA,IRSimilarityCandidate & CandB,DenseMap<unsigned,DenseSet<IRSimilarityCandidate * >> & IndexToIncludedCand,DenseMap<IRSimilarityCandidate *,unsigned> & CandToGroup)1206  CheckLargerCands(
1207      IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
1208      DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1209      DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
1210    DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1211    DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1212    DenseSet<unsigned> IncludedGroupsA;
1213    DenseSet<unsigned> IncludedGroupsB;
1214  
1215    // Find the overall similarity group numbers that fully contain the candidate,
1216    // and record the larger candidate for each group.
1217    auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1218    std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1219        Result;
1220  
1221    unsigned CandAStart = CandA.getStartIdx();
1222    unsigned CandAEnd = CandA.getEndIdx();
1223    unsigned CandBStart = CandB.getStartIdx();
1224    unsigned CandBEnd = CandB.getEndIdx();
1225    if (IdxToCandidateIt == IndexToIncludedCand.end())
1226      return Result;
1227    for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1228      if (MatchedCand->getStartIdx() > CandAStart ||
1229          (MatchedCand->getEndIdx() < CandAEnd))
1230        continue;
1231      unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1232      IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1233      IncludedGroupsA.insert(GroupNum);
1234    }
1235  
1236    // Find the overall similarity group numbers that fully contain the next
1237    // candidate, and record the larger candidate for each group.
1238    IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1239    if (IdxToCandidateIt == IndexToIncludedCand.end())
1240      return Result;
1241    for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1242      if (MatchedCand->getStartIdx() > CandBStart ||
1243          MatchedCand->getEndIdx() < CandBEnd)
1244        continue;
1245      unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1246      IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1247      IncludedGroupsB.insert(GroupNum);
1248    }
1249  
1250    // Find the intersection between the two groups, these are the groups where
1251    // the larger candidates exist.
1252    set_intersect(IncludedGroupsA, IncludedGroupsB);
1253  
1254    // If there is no intersection between the sets, then we cannot determine
1255    // whether or not there is a match.
1256    if (IncludedGroupsA.empty())
1257      return Result;
1258  
1259    // Create a pair that contains the larger candidates.
1260    auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1261    auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1262    Result = std::make_pair(ItA->second, ItB->second);
1263    return Result;
1264  }
1265  
1266  /// From the list of IRSimilarityCandidates, perform a comparison between each
1267  /// IRSimilarityCandidate to determine if there are overlapping
1268  /// IRInstructionData, or if they do not have the same structure.
1269  ///
1270  /// \param [in] CandsForRepSubstring - The vector containing the
1271  /// IRSimilarityCandidates.
1272  /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1273  /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1274  /// vector are structurally similar to one another.
1275  /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1276  /// a circuit to the IRSimilarityCandidates that include this instruction.
1277  /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1278  /// number representing the structural group assigned to it.
findCandidateStructures(std::vector<IRSimilarityCandidate> & CandsForRepSubstring,DenseMap<unsigned,SimilarityGroup> & StructuralGroups,DenseMap<unsigned,DenseSet<IRSimilarityCandidate * >> & IndexToIncludedCand,DenseMap<IRSimilarityCandidate *,unsigned> & CandToOverallGroup)1279  static void findCandidateStructures(
1280      std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1281      DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1282      DenseMap<unsigned,  DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1283      DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
1284      ) {
1285    std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1286        InnerCandEndIt;
1287  
1288    // IRSimilarityCandidates each have a structure for operand use.  It is
1289    // possible that two instances of the same subsequences have different
1290    // structure. Each type of structure found is assigned a number.  This
1291    // DenseMap maps an IRSimilarityCandidate to which type of similarity
1292    // discovered it fits within.
1293    DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1294  
1295    // Find the compatibility from each candidate to the others to determine
1296    // which candidates overlap and which have the same structure by mapping
1297    // each structure to a different group.
1298    bool SameStructure;
1299    bool Inserted;
1300    unsigned CurrentGroupNum = 0;
1301    unsigned OuterGroupNum;
1302    DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1303    DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1304    DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1305  
1306    // Iterate over the candidates to determine its structural and overlapping
1307    // compatibility with other instructions
1308    DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1309    DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1310    for (CandIt = CandsForRepSubstring.begin(),
1311        CandEndIt = CandsForRepSubstring.end();
1312         CandIt != CandEndIt; CandIt++) {
1313  
1314      // Determine if it has an assigned structural group already.
1315      CandToGroupIt = CandToGroup.find(&*CandIt);
1316      if (CandToGroupIt == CandToGroup.end()) {
1317        // If not, we assign it one, and add it to our mapping.
1318        std::tie(CandToGroupIt, Inserted) =
1319            CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1320      }
1321  
1322      // Get the structural group number from the iterator.
1323      OuterGroupNum = CandToGroupIt->second;
1324  
1325      // Check if we already have a list of IRSimilarityCandidates for the current
1326      // structural group.  Create one if one does not exist.
1327      CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1328      if (CurrentGroupPair == StructuralGroups.end()) {
1329        IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1330        std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1331            std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1332      }
1333  
1334      // Iterate over the IRSimilarityCandidates following the current
1335      // IRSimilarityCandidate in the list to determine whether the two
1336      // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs
1337      // of IRSimilarityCandidates.
1338      for (InnerCandIt = std::next(CandIt),
1339          InnerCandEndIt = CandsForRepSubstring.end();
1340           InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1341  
1342        // We check if the inner item has a group already, if it does, we skip it.
1343        CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1344        if (CandToGroupItInner != CandToGroup.end())
1345          continue;
1346  
1347        // Check if we have found structural similarity between two candidates
1348        // that fully contains the first and second candidates.
1349        std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1350            LargerPair = CheckLargerCands(
1351                *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1352  
1353        // If a pair was found, it means that we can assume that these smaller
1354        // substrings are also structurally similar.  Use the larger candidates to
1355        // determine the canonical mapping between the two sections.
1356        if (LargerPair.has_value()) {
1357          SameStructure = true;
1358          InnerCandIt->createCanonicalRelationFrom(
1359              *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1360          CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1361          CurrentGroupPair->second.push_back(*InnerCandIt);
1362          continue;
1363        }
1364  
1365        // Otherwise we determine if they have the same structure and add it to
1366        // vector if they match.
1367        ValueNumberMappingA.clear();
1368        ValueNumberMappingB.clear();
1369        SameStructure = IRSimilarityCandidate::compareStructure(
1370            *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1371        if (!SameStructure)
1372          continue;
1373  
1374        InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1375                                                 ValueNumberMappingB);
1376        CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1377        CurrentGroupPair->second.push_back(*InnerCandIt);
1378      }
1379    }
1380  }
1381  
findCandidates(std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)1382  void IRSimilarityIdentifier::findCandidates(
1383      std::vector<IRInstructionData *> &InstrList,
1384      std::vector<unsigned> &IntegerMapping) {
1385    SuffixTree ST(IntegerMapping);
1386  
1387    std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1388    std::vector<SimilarityGroup> NewCandidateGroups;
1389  
1390    DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1391    DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1392    DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1393  
1394    // Iterate over the subsequences found by the Suffix Tree to create
1395    // IRSimilarityCandidates for each repeated subsequence and determine which
1396    // instances are structurally similar to one another.
1397  
1398    // Sort the suffix tree from longest substring to shortest.
1399    std::vector<SuffixTree::RepeatedSubstring> RSes;
1400    for (SuffixTree::RepeatedSubstring &RS : ST)
1401      RSes.push_back(RS);
1402  
1403    llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1404                               const SuffixTree::RepeatedSubstring &RHS) {
1405      return LHS.Length > RHS.Length;
1406    });
1407    for (SuffixTree::RepeatedSubstring &RS : RSes) {
1408      createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1409                                     CandsForRepSubstring);
1410  
1411      if (CandsForRepSubstring.size() < 2)
1412        continue;
1413  
1414      findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1415                              IndexToIncludedCand, CandToGroup);
1416      for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1417        // We only add the group if it contains more than one
1418        // IRSimilarityCandidate.  If there is only one, that means there is no
1419        // other repeated subsequence with the same structure.
1420        if (Group.second.size() > 1) {
1421          SimilarityCandidates->push_back(Group.second);
1422          // Iterate over each candidate in the group, and add an entry for each
1423          // instruction included with a mapping to a set of
1424          // IRSimilarityCandidates that include that instruction.
1425          for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1426            for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1427                 Idx <= Edx; ++Idx) {
1428              DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator
1429                  IdIt;
1430              IdIt = IndexToIncludedCand.find(Idx);
1431              bool Inserted = false;
1432              if (IdIt == IndexToIncludedCand.end())
1433                std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
1434                    std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
1435              IdIt->second.insert(&IRCand);
1436            }
1437            // Add mapping of candidate to the overall similarity group number.
1438            CandToGroup.insert(
1439                std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1440          }
1441        }
1442      }
1443  
1444      CandsForRepSubstring.clear();
1445      StructuralGroups.clear();
1446      NewCandidateGroups.clear();
1447    }
1448  }
1449  
findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules)1450  SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1451      ArrayRef<std::unique_ptr<Module>> Modules) {
1452    resetSimilarityCandidates();
1453  
1454    std::vector<IRInstructionData *> InstrList;
1455    std::vector<unsigned> IntegerMapping;
1456    Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1457    Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1458    Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1459    Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1460    Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1461  
1462    populateMapper(Modules, InstrList, IntegerMapping);
1463    findCandidates(InstrList, IntegerMapping);
1464  
1465    return *SimilarityCandidates;
1466  }
1467  
findSimilarity(Module & M)1468  SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1469    resetSimilarityCandidates();
1470    Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1471    Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1472    Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1473    Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1474    Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1475  
1476    std::vector<IRInstructionData *> InstrList;
1477    std::vector<unsigned> IntegerMapping;
1478  
1479    populateMapper(M, InstrList, IntegerMapping);
1480    findCandidates(InstrList, IntegerMapping);
1481  
1482    return *SimilarityCandidates;
1483  }
1484  
1485  INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1486                  "ir-similarity-identifier", false, true)
1487  
IRSimilarityIdentifierWrapperPass()1488  IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1489      : ModulePass(ID) {
1490    initializeIRSimilarityIdentifierWrapperPassPass(
1491        *PassRegistry::getPassRegistry());
1492  }
1493  
doInitialization(Module & M)1494  bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1495    IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1496                                          MatchCallsByName, !DisableIntrinsics,
1497                                          false));
1498    return false;
1499  }
1500  
doFinalization(Module & M)1501  bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1502    IRSI.reset();
1503    return false;
1504  }
1505  
runOnModule(Module & M)1506  bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1507    IRSI->findSimilarity(M);
1508    return false;
1509  }
1510  
1511  AnalysisKey IRSimilarityAnalysis::Key;
run(Module & M,ModuleAnalysisManager &)1512  IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1513                                                   ModuleAnalysisManager &) {
1514    auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1515                                       MatchCallsByName, !DisableIntrinsics,
1516                                       false);
1517    IRSI.findSimilarity(M);
1518    return IRSI;
1519  }
1520  
1521  PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)1522  IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1523    IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1524    std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1525        IRSI.getSimilarity();
1526  
1527    for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1528      OS << CandVec.size() << " candidates of length "
1529         << CandVec.begin()->getLength() << ".  Found in: \n";
1530      for (IRSimilarityCandidate &Cand : CandVec) {
1531        OS << "  Function: " << Cand.front()->Inst->getFunction()->getName().str()
1532           << ", Basic Block: ";
1533        if (Cand.front()->Inst->getParent()->getName().str() == "")
1534          OS << "(unnamed)";
1535        else
1536          OS << Cand.front()->Inst->getParent()->getName().str();
1537        OS << "\n    Start Instruction: ";
1538        Cand.frontInstruction()->print(OS);
1539        OS << "\n      End Instruction: ";
1540        Cand.backInstruction()->print(OS);
1541        OS << "\n";
1542      }
1543    }
1544  
1545    return PreservedAnalyses::all();
1546  }
1547  
1548  char IRSimilarityIdentifierWrapperPass::ID = 0;
1549