xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SetOperations.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Operator.h"
20 #include "llvm/IR/User.h"
21 #include "llvm/InitializePasses.h"
22 #include "llvm/Support/SuffixTree.h"
23 
24 using namespace llvm;
25 using namespace IRSimilarity;
26 
27 namespace llvm {
28 cl::opt<bool>
29     DisableBranches("no-ir-sim-branch-matching", cl::init(false),
30                     cl::ReallyHidden,
31                     cl::desc("disable similarity matching, and outlining, "
32                              "across branches for debugging purposes."));
33 
34 cl::opt<bool>
35     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
36                          cl::ReallyHidden,
37                          cl::desc("disable outlining indirect calls."));
38 
39 static cl::opt<bool>
40     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41                      cl::desc("only allow matching call instructions if the "
42                               "name and type signature match."));
43 
44 cl::opt<bool>
45     DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46                       cl::desc("Don't match or outline intrinsics"));
47 } // namespace llvm
48 
IRInstructionData(Instruction & I,bool Legality,IRInstructionDataList & IDList)49 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
50                                      IRInstructionDataList &IDList)
51     : Inst(&I), Legal(Legality), IDL(&IDList) {
52   initializeInstruction();
53 }
54 
initializeInstruction()55 void IRInstructionData::initializeInstruction() {
56   // We check for whether we have a comparison instruction.  If it is, we
57   // find the "less than" version of the predicate for consistency for
58   // comparison instructions throught the program.
59   if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
60     CmpInst::Predicate Predicate = predicateForConsistency(C);
61     if (Predicate != C->getPredicate())
62       RevisedPredicate = Predicate;
63   }
64 
65   // Here we collect the operands and their types for determining whether
66   // the structure of the operand use matches between two different candidates.
67   for (Use &OI : Inst->operands()) {
68     if (isa<CmpInst>(Inst) && RevisedPredicate) {
69       // If we have a CmpInst where the predicate is reversed, it means the
70       // operands must be reversed as well.
71       OperVals.insert(OperVals.begin(), OI.get());
72       continue;
73     }
74 
75     OperVals.push_back(OI.get());
76   }
77 
78   // We capture the incoming BasicBlocks as values as well as the incoming
79   // Values in order to check for structural similarity.
80   if (PHINode *PN = dyn_cast<PHINode>(Inst))
81     llvm::append_range(OperVals, PN->blocks());
82 }
83 
IRInstructionData(IRInstructionDataList & IDList)84 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
85     : IDL(&IDList) {}
86 
setBranchSuccessors(DenseMap<BasicBlock *,unsigned> & BasicBlockToInteger)87 void IRInstructionData::setBranchSuccessors(
88     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89   assert(isa<BranchInst>(Inst) && "Instruction must be branch");
90 
91   BranchInst *BI = cast<BranchInst>(Inst);
92   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
93 
94   BBNumIt = BasicBlockToInteger.find(BI->getParent());
95   assert(BBNumIt != BasicBlockToInteger.end() &&
96          "Could not find location for BasicBlock!");
97 
98   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
99 
100   for (Value *V : getBlockOperVals()) {
101     BasicBlock *Successor = cast<BasicBlock>(V);
102     BBNumIt = BasicBlockToInteger.find(Successor);
103     assert(BBNumIt != BasicBlockToInteger.end() &&
104            "Could not find number for BasicBlock!");
105     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
106 
107     int Relative = OtherBlockNumber - CurrentBlockNumber;
108     RelativeBlockLocations.push_back(Relative);
109   }
110 }
111 
getBlockOperVals()112 ArrayRef<Value *> IRInstructionData::getBlockOperVals() {
113   assert((isa<BranchInst>(Inst) ||
114          isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
115 
116   if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
117     return ArrayRef<Value *>(
118       std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
119       OperVals.end()
120     );
121 
122   if (PHINode *PN = dyn_cast<PHINode>(Inst))
123     return ArrayRef<Value *>(
124       std::next(OperVals.begin(), PN->getNumIncomingValues()),
125       OperVals.end()
126     );
127 
128   return ArrayRef<Value *>();
129 }
130 
setCalleeName(bool MatchByName)131 void IRInstructionData::setCalleeName(bool MatchByName) {
132   CallInst *CI = dyn_cast<CallInst>(Inst);
133   assert(CI && "Instruction must be call");
134 
135   CalleeName = "";
136   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
137     // To hash intrinsics, we use the opcode, and types like the other
138     // instructions, but also, the Intrinsic ID, and the Name of the
139     // intrinsic.
140     Intrinsic::ID IntrinsicID = II->getIntrinsicID();
141     FunctionType *FT = II->getFunctionType();
142     // If there is an overloaded name, we have to use the complex version
143     // of getName to get the entire string.
144     if (Intrinsic::isOverloaded(IntrinsicID))
145       CalleeName =
146           Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
147     // If there is not an overloaded name, we only need to use this version.
148     else
149       CalleeName = Intrinsic::getName(IntrinsicID).str();
150 
151     return;
152   }
153 
154   if (!CI->isIndirectCall() && MatchByName)
155     CalleeName = CI->getCalledFunction()->getName().str();
156 }
157 
setPHIPredecessors(DenseMap<BasicBlock *,unsigned> & BasicBlockToInteger)158 void IRInstructionData::setPHIPredecessors(
159     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
160   assert(isa<PHINode>(Inst) && "Instruction must be phi node");
161 
162   PHINode *PN = cast<PHINode>(Inst);
163   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
164 
165   BBNumIt = BasicBlockToInteger.find(PN->getParent());
166   assert(BBNumIt != BasicBlockToInteger.end() &&
167          "Could not find location for BasicBlock!");
168 
169   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
170 
171   // Convert the incoming blocks of the PHINode to an integer value, based on
172   // the relative distances between the current block and the incoming block.
173   for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
174     BasicBlock *Incoming = PN->getIncomingBlock(Idx);
175     BBNumIt = BasicBlockToInteger.find(Incoming);
176     assert(BBNumIt != BasicBlockToInteger.end() &&
177            "Could not find number for BasicBlock!");
178     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
179 
180     int Relative = OtherBlockNumber - CurrentBlockNumber;
181     RelativeBlockLocations.push_back(Relative);
182   }
183 }
184 
predicateForConsistency(CmpInst * CI)185 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
186   switch (CI->getPredicate()) {
187   case CmpInst::FCMP_OGT:
188   case CmpInst::FCMP_UGT:
189   case CmpInst::FCMP_OGE:
190   case CmpInst::FCMP_UGE:
191   case CmpInst::ICMP_SGT:
192   case CmpInst::ICMP_UGT:
193   case CmpInst::ICMP_SGE:
194   case CmpInst::ICMP_UGE:
195     return CI->getSwappedPredicate();
196   default:
197     return CI->getPredicate();
198   }
199 }
200 
getPredicate() const201 CmpInst::Predicate IRInstructionData::getPredicate() const {
202   assert(isa<CmpInst>(Inst) &&
203          "Can only get a predicate from a compare instruction");
204 
205   if (RevisedPredicate)
206     return *RevisedPredicate;
207 
208   return cast<CmpInst>(Inst)->getPredicate();
209 }
210 
getCalleeName() const211 StringRef IRInstructionData::getCalleeName() const {
212   assert(isa<CallInst>(Inst) &&
213          "Can only get a name from a call instruction");
214 
215   assert(CalleeName && "CalleeName has not been set");
216 
217   return *CalleeName;
218 }
219 
isClose(const IRInstructionData & A,const IRInstructionData & B)220 bool IRSimilarity::isClose(const IRInstructionData &A,
221                            const IRInstructionData &B) {
222 
223   if (!A.Legal || !B.Legal)
224     return false;
225 
226   // Check if we are performing the same sort of operation on the same types
227   // but not on the same values.
228   if (!A.Inst->isSameOperationAs(B.Inst)) {
229     // If there is a predicate, this means that either there is a swapped
230     // predicate, or that the types are different, we want to make sure that
231     // the predicates are equivalent via swapping.
232     if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
233 
234       if (A.getPredicate() != B.getPredicate())
235         return false;
236 
237       // If the predicates are the same via swap, make sure that the types are
238       // still the same.
239       auto ZippedTypes = zip(A.OperVals, B.OperVals);
240 
241       return all_of(
242           ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
243             return std::get<0>(R)->getType() == std::get<1>(R)->getType();
244           });
245     }
246 
247     return false;
248   }
249 
250   // Since any GEP Instruction operands after the first operand cannot be
251   // defined by a register, we must make sure that the operands after the first
252   // are the same in the two instructions
253   if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
254     auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
255 
256     // If the instructions do not have the same inbounds restrictions, we do
257     // not consider them the same.
258     if (GEP->isInBounds() != OtherGEP->isInBounds())
259       return false;
260 
261     auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
262 
263     // We increment here since we do not care about the first instruction,
264     // we only care about the following operands since they must be the
265     // exact same to be considered similar.
266     return all_of(drop_begin(ZippedOperands),
267                   [](std::tuple<llvm::Use &, llvm::Use &> R) {
268                     return std::get<0>(R) == std::get<1>(R);
269                   });
270   }
271 
272   // If the instructions are functions calls, we make sure that the function
273   // name is the same.  We already know that the types are since is
274   // isSameOperationAs is true.
275   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
276     if (A.getCalleeName() != B.getCalleeName())
277       return false;
278   }
279 
280   if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
281       A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
282     return false;
283 
284   return true;
285 }
286 
287 // TODO: This is the same as the MachineOutliner, and should be consolidated
288 // into the same interface.
convertToUnsignedVec(BasicBlock & BB,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)289 void IRInstructionMapper::convertToUnsignedVec(
290     BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
291     std::vector<unsigned> &IntegerMapping) {
292   BasicBlock::iterator It = BB.begin();
293 
294   std::vector<unsigned> IntegerMappingForBB;
295   std::vector<IRInstructionData *> InstrListForBB;
296 
297   for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
298     switch (InstClassifier.visit(*It)) {
299     case InstrType::Legal:
300       mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
301       break;
302     case InstrType::Illegal:
303       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
304       break;
305     case InstrType::Invisible:
306       AddedIllegalLastTime = false;
307       break;
308     }
309   }
310 
311   if (AddedIllegalLastTime)
312     mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
313   for (IRInstructionData *ID : InstrListForBB)
314     this->IDL->push_back(*ID);
315   llvm::append_range(InstrList, InstrListForBB);
316   llvm::append_range(IntegerMapping, IntegerMappingForBB);
317 }
318 
319 // TODO: This is the same as the MachineOutliner, and should be consolidated
320 // into the same interface.
mapToLegalUnsigned(BasicBlock::iterator & It,std::vector<unsigned> & IntegerMappingForBB,std::vector<IRInstructionData * > & InstrListForBB)321 unsigned IRInstructionMapper::mapToLegalUnsigned(
322     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
323     std::vector<IRInstructionData *> &InstrListForBB) {
324   // We added something legal, so we should unset the AddedLegalLastTime
325   // flag.
326   AddedIllegalLastTime = false;
327 
328   // If we have at least two adjacent legal instructions (which may have
329   // invisible instructions in between), remember that.
330   if (CanCombineWithPrevInstr)
331     HaveLegalRange = true;
332   CanCombineWithPrevInstr = true;
333 
334   // Get the integer for this instruction or give it the current
335   // LegalInstrNumber.
336   IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
337   InstrListForBB.push_back(ID);
338 
339   if (isa<BranchInst>(*It))
340     ID->setBranchSuccessors(BasicBlockToInteger);
341 
342   if (isa<CallInst>(*It))
343     ID->setCalleeName(EnableMatchCallsByName);
344 
345   if (isa<PHINode>(*It))
346     ID->setPHIPredecessors(BasicBlockToInteger);
347 
348   // Add to the instruction list
349   bool WasInserted;
350   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
351       ResultIt;
352   std::tie(ResultIt, WasInserted) =
353       InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
354   unsigned INumber = ResultIt->second;
355 
356   // There was an insertion.
357   if (WasInserted)
358     LegalInstrNumber++;
359 
360   IntegerMappingForBB.push_back(INumber);
361 
362   // Make sure we don't overflow or use any integers reserved by the DenseMap.
363   assert(LegalInstrNumber < IllegalInstrNumber &&
364          "Instruction mapping overflow!");
365 
366   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
367          "Tried to assign DenseMap tombstone or empty key to instruction.");
368   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
369          "Tried to assign DenseMap tombstone or empty key to instruction.");
370 
371   return INumber;
372 }
373 
374 IRInstructionData *
allocateIRInstructionData(Instruction & I,bool Legality,IRInstructionDataList & IDL)375 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
376                                                IRInstructionDataList &IDL) {
377   return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
378 }
379 
380 IRInstructionData *
allocateIRInstructionData(IRInstructionDataList & IDL)381 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
382   return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
383 }
384 
385 IRInstructionDataList *
allocateIRInstructionDataList()386 IRInstructionMapper::allocateIRInstructionDataList() {
387   return new (IDLAllocator->Allocate()) IRInstructionDataList();
388 }
389 
390 // TODO: This is the same as the MachineOutliner, and should be consolidated
391 // into the same interface.
mapToIllegalUnsigned(BasicBlock::iterator & It,std::vector<unsigned> & IntegerMappingForBB,std::vector<IRInstructionData * > & InstrListForBB,bool End)392 unsigned IRInstructionMapper::mapToIllegalUnsigned(
393     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
394     std::vector<IRInstructionData *> &InstrListForBB, bool End) {
395   // Can't combine an illegal instruction. Set the flag.
396   CanCombineWithPrevInstr = false;
397 
398   // Only add one illegal number per range of legal numbers.
399   if (AddedIllegalLastTime)
400     return IllegalInstrNumber;
401 
402   IRInstructionData *ID = nullptr;
403   if (!End)
404     ID = allocateIRInstructionData(*It, false, *IDL);
405   else
406     ID = allocateIRInstructionData(*IDL);
407   InstrListForBB.push_back(ID);
408 
409   // Remember that we added an illegal number last time.
410   AddedIllegalLastTime = true;
411   unsigned INumber = IllegalInstrNumber;
412   IntegerMappingForBB.push_back(IllegalInstrNumber--);
413 
414   assert(LegalInstrNumber < IllegalInstrNumber &&
415          "Instruction mapping overflow!");
416 
417   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
418          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
419 
420   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
421          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
422 
423   return INumber;
424 }
425 
IRSimilarityCandidate(unsigned StartIdx,unsigned Len,IRInstructionData * FirstInstIt,IRInstructionData * LastInstIt)426 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
427                                              IRInstructionData *FirstInstIt,
428                                              IRInstructionData *LastInstIt)
429     : StartIdx(StartIdx), Len(Len) {
430 
431   assert(FirstInstIt != nullptr && "Instruction is nullptr!");
432   assert(LastInstIt != nullptr && "Instruction is nullptr!");
433   assert(StartIdx + Len > StartIdx &&
434          "Overflow for IRSimilarityCandidate range?");
435   assert(Len - 1 == static_cast<unsigned>(std::distance(
436                         iterator(FirstInstIt), iterator(LastInstIt))) &&
437          "Length of the first and last IRInstructionData do not match the "
438          "given length");
439 
440   // We iterate over the given instructions, and map each unique value
441   // to a unique number in the IRSimilarityCandidate ValueToNumber and
442   // NumberToValue maps.  A constant get its own value globally, the individual
443   // uses of the constants are not considered to be unique.
444   //
445   // IR:                    Mapping Added:
446   // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
447   // %add2 = add i32 %a, %1    %add2 -> 4
448   // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
449   //
450   // when replace with global values, starting from 1, would be
451   //
452   // 3 = add i32 1, 2
453   // 4 = add i32 1, 3
454   // 6 = add i32 5, 2
455   unsigned LocalValNumber = 1;
456   IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
457   for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
458     // Map the operand values to an unsigned integer if it does not already
459     // have an unsigned integer assigned to it.
460     for (Value *Arg : ID->OperVals)
461       if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
462         NumberToValue.try_emplace(LocalValNumber, Arg);
463         LocalValNumber++;
464       }
465 
466     // Mapping the instructions to an unsigned integer if it is not already
467     // exist in the mapping.
468     if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
469       NumberToValue.try_emplace(LocalValNumber, ID->Inst);
470       LocalValNumber++;
471     }
472   }
473 
474   // Setting the first and last instruction data pointers for the candidate.  If
475   // we got through the entire for loop without hitting an assert, we know
476   // that both of these instructions are not nullptrs.
477   FirstInst = FirstInstIt;
478   LastInst = LastInstIt;
479 
480   // Add the basic blocks contained in the set into the global value numbering.
481   DenseSet<BasicBlock *> BBSet;
482   getBasicBlocks(BBSet);
483   for (BasicBlock *BB : BBSet) {
484     if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
485       NumberToValue.try_emplace(LocalValNumber, BB);
486       LocalValNumber++;
487     }
488   }
489 }
490 
isSimilar(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)491 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
492                                       const IRSimilarityCandidate &B) {
493   if (A.getLength() != B.getLength())
494     return false;
495 
496   auto InstrDataForBoth =
497       zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
498 
499   return all_of(InstrDataForBoth,
500                 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
501                   IRInstructionData &A = std::get<0>(R);
502                   IRInstructionData &B = std::get<1>(R);
503                   if (!A.Legal || !B.Legal)
504                     return false;
505                   return isClose(A, B);
506                 });
507 }
508 
509 /// Determine if one or more of the assigned global value numbers for the
510 /// operands in \p TargetValueNumbers is in the current mapping set for operand
511 /// numbers in \p SourceOperands.  The set of possible corresponding global
512 /// value numbers are replaced with the most recent version of compatible
513 /// values.
514 ///
515 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
516 /// value number for the source IRInstructionCandidate.
517 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
518 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
519 /// the target.
520 /// \param [in] SourceOperands - The operands in the original
521 /// IRSimilarityCandidate in the current instruction.
522 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
523 /// the corresponding Instruction in the other IRSimilarityCandidate.
524 /// \returns true if there exists a possible mapping between the source
525 /// Instruction operands and the target Instruction operands, and false if not.
checkNumberingAndReplaceCommutative(const DenseMap<Value *,unsigned> & SourceValueToNumberMapping,DenseMap<unsigned,DenseSet<unsigned>> & CurrentSrcTgtNumberMapping,ArrayRef<Value * > & SourceOperands,DenseSet<unsigned> & TargetValueNumbers)526 static bool checkNumberingAndReplaceCommutative(
527   const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
528   DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
529   ArrayRef<Value *> &SourceOperands,
530   DenseSet<unsigned> &TargetValueNumbers){
531 
532   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
533 
534   unsigned ArgVal;
535   bool WasInserted;
536 
537   // Iterate over the operands in the source IRSimilarityCandidate to determine
538   // whether there exists an operand in the other IRSimilarityCandidate that
539   // creates a valid mapping of Value to Value between the
540   // IRSimilarityCaniddates.
541   for (Value *V : SourceOperands) {
542     ArgVal = SourceValueToNumberMapping.find(V)->second;
543 
544     // Instead of finding a current mapping, we attempt to insert a set.
545     std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
546         std::make_pair(ArgVal, TargetValueNumbers));
547 
548     // We need to iterate over the items in other IRSimilarityCandidate's
549     // Instruction to determine whether there is a valid mapping of
550     // Value to Value.
551     DenseSet<unsigned> NewSet;
552     for (unsigned &Curr : ValueMappingIt->second)
553       // If we can find the value in the mapping, we add it to the new set.
554       if (TargetValueNumbers.contains(Curr))
555         NewSet.insert(Curr);
556 
557     // If we could not find a Value, return 0.
558     if (NewSet.empty())
559       return false;
560 
561     // Otherwise replace the old mapping with the newly constructed one.
562     if (NewSet.size() != ValueMappingIt->second.size())
563       ValueMappingIt->second.swap(NewSet);
564 
565     // We have reached no conclusions about the mapping, and cannot remove
566     // any items from the other operands, so we move to check the next operand.
567     if (ValueMappingIt->second.size() != 1)
568       continue;
569 
570     unsigned ValToRemove = *ValueMappingIt->second.begin();
571     // When there is only one item left in the mapping for and operand, remove
572     // the value from the other operands.  If it results in there being no
573     // mapping, return false, it means the mapping is wrong
574     for (Value *InnerV : SourceOperands) {
575       if (V == InnerV)
576         continue;
577 
578       unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
579       ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
580       if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
581         continue;
582 
583       ValueMappingIt->second.erase(ValToRemove);
584       if (ValueMappingIt->second.empty())
585         return false;
586     }
587   }
588 
589   return true;
590 }
591 
592 /// Determine if operand number \p TargetArgVal is in the current mapping set
593 /// for operand number \p SourceArgVal.
594 ///
595 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
596 /// value numbers from source IRSimilarityCandidate to target
597 /// IRSimilarityCandidate.
598 /// \param [in] SourceArgVal The global value number for an operand in the
599 /// in the original candidate.
600 /// \param [in] TargetArgVal The global value number for the corresponding
601 /// operand in the other candidate.
602 /// \returns True if there exists a mapping and false if not.
checkNumberingAndReplace(DenseMap<unsigned,DenseSet<unsigned>> & CurrentSrcTgtNumberMapping,unsigned SourceArgVal,unsigned TargetArgVal)603 bool checkNumberingAndReplace(
604     DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
605     unsigned SourceArgVal, unsigned TargetArgVal) {
606   // We are given two unsigned integers representing the global values of
607   // the operands in different IRSimilarityCandidates and a current mapping
608   // between the two.
609   //
610   // Source Operand GVN: 1
611   // Target Operand GVN: 2
612   // CurrentMapping: {1: {1, 2}}
613   //
614   // Since we have mapping, and the target operand is contained in the set, we
615   // update it to:
616   // CurrentMapping: {1: {2}}
617   // and can return true. But, if the mapping was
618   // CurrentMapping: {1: {3}}
619   // we would return false.
620 
621   bool WasInserted;
622   DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
623 
624   std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
625       std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
626 
627   // If we created a new mapping, then we are done.
628   if (WasInserted)
629     return true;
630 
631   // If there is more than one option in the mapping set, and the target value
632   // is included in the mapping set replace that set with one that only includes
633   // the target value, as it is the only valid mapping via the non commutative
634   // instruction.
635 
636   DenseSet<unsigned> &TargetSet = Val->second;
637   if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
638     TargetSet.clear();
639     TargetSet.insert(TargetArgVal);
640     return true;
641   }
642 
643   // Return true if we can find the value in the set.
644   return TargetSet.contains(TargetArgVal);
645 }
646 
compareNonCommutativeOperandMapping(OperandMapping A,OperandMapping B)647 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
648     OperandMapping A, OperandMapping B) {
649   // Iterators to keep track of where we are in the operands for each
650   // Instruction.
651   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
652   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
653   unsigned OperandLength = A.OperVals.size();
654 
655   // For each operand, get the value numbering and ensure it is consistent.
656   for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
657     unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
658     unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
659 
660     // Attempt to add a set with only the target value.  If there is no mapping
661     // we can create it here.
662     //
663     // For an instruction like a subtraction:
664     // IRSimilarityCandidateA:  IRSimilarityCandidateB:
665     // %resultA = sub %a, %b    %resultB = sub %d, %e
666     //
667     // We map %a -> %d and %b -> %e.
668     //
669     // And check to see whether their mapping is consistent in
670     // checkNumberingAndReplace.
671 
672     if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
673       return false;
674 
675     if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
676       return false;
677   }
678   return true;
679 }
680 
compareCommutativeOperandMapping(OperandMapping A,OperandMapping B)681 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
682     OperandMapping A, OperandMapping B) {
683   DenseSet<unsigned> ValueNumbersA;
684   DenseSet<unsigned> ValueNumbersB;
685 
686   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
687   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
688   unsigned OperandLength = A.OperVals.size();
689 
690   // Find the value number sets for the operands.
691   for (unsigned Idx = 0; Idx < OperandLength;
692        Idx++, VItA++, VItB++) {
693     ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
694     ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
695   }
696 
697   // Iterate over the operands in the first IRSimilarityCandidate and make sure
698   // there exists a possible mapping with the operands in the second
699   // IRSimilarityCandidate.
700   if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
701                                            A.ValueNumberMapping, A.OperVals,
702                                            ValueNumbersB))
703     return false;
704 
705   // Iterate over the operands in the second IRSimilarityCandidate and make sure
706   // there exists a possible mapping with the operands in the first
707   // IRSimilarityCandidate.
708   if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
709                                            B.ValueNumberMapping, B.OperVals,
710                                            ValueNumbersA))
711     return false;
712 
713   return true;
714 }
715 
compareAssignmentMapping(const unsigned InstValA,const unsigned & InstValB,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingA,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingB)716 bool IRSimilarityCandidate::compareAssignmentMapping(
717     const unsigned InstValA, const unsigned &InstValB,
718     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
719     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
720   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
721   bool WasInserted;
722   std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
723       std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
724   if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
725     return false;
726   else if (ValueMappingIt->second.size() != 1) {
727     for (unsigned OtherVal : ValueMappingIt->second) {
728       if (OtherVal == InstValB)
729         continue;
730       auto OtherValIt = ValueNumberMappingA.find(OtherVal);
731       if (OtherValIt == ValueNumberMappingA.end())
732         continue;
733       OtherValIt->second.erase(InstValA);
734     }
735     ValueNumberMappingA.erase(ValueMappingIt);
736     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
737       std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
738   }
739 
740   return true;
741 }
742 
checkRelativeLocations(RelativeLocMapping A,RelativeLocMapping B)743 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
744                                                    RelativeLocMapping B) {
745   // Get the basic blocks the label refers to.
746   BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
747   BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
748 
749   // Get the basic blocks contained in each region.
750   DenseSet<BasicBlock *> BasicBlockA;
751   DenseSet<BasicBlock *> BasicBlockB;
752   A.IRSC.getBasicBlocks(BasicBlockA);
753   B.IRSC.getBasicBlocks(BasicBlockB);
754 
755   // Determine if the block is contained in the region.
756   bool AContained = BasicBlockA.contains(ABB);
757   bool BContained = BasicBlockB.contains(BBB);
758 
759   // Both blocks need to be contained in the region, or both need to be outside
760   // the region.
761   if (AContained != BContained)
762     return false;
763 
764   // If both are contained, then we need to make sure that the relative
765   // distance to the target blocks are the same.
766   if (AContained)
767     return A.RelativeLocation == B.RelativeLocation;
768   return true;
769 }
770 
compareStructure(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)771 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
772                                              const IRSimilarityCandidate &B) {
773   DenseMap<unsigned, DenseSet<unsigned>> MappingA;
774   DenseMap<unsigned, DenseSet<unsigned>> MappingB;
775   return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
776 }
777 
778 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
779                       SmallVector<int, 4> &, ArrayRef<Value *> &,
780                       ArrayRef<Value *> &>
781     ZippedRelativeLocationsT;
782 
compareStructure(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingA,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingB)783 bool IRSimilarityCandidate::compareStructure(
784     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
785     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
786     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
787   if (A.getLength() != B.getLength())
788     return false;
789 
790   if (A.ValueToNumber.size() != B.ValueToNumber.size())
791     return false;
792 
793   iterator ItA = A.begin();
794   iterator ItB = B.begin();
795 
796   // These ValueNumber Mapping sets create a create a mapping between the values
797   // in one candidate to values in the other candidate.  If we create a set with
798   // one element, and that same element maps to the original element in the
799   // candidate we have a good mapping.
800 
801   // Iterate over the instructions contained in each candidate
802   unsigned SectionLength = A.getStartIdx() + A.getLength();
803   for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
804        ItA++, ItB++, Loc++) {
805     // Make sure the instructions are similar to one another.
806     if (!isClose(*ItA, *ItB))
807       return false;
808 
809     Instruction *IA = ItA->Inst;
810     Instruction *IB = ItB->Inst;
811 
812     if (!ItA->Legal || !ItB->Legal)
813       return false;
814 
815     // Get the operand sets for the instructions.
816     ArrayRef<Value *> OperValsA = ItA->OperVals;
817     ArrayRef<Value *> OperValsB = ItB->OperVals;
818 
819     unsigned InstValA = A.ValueToNumber.find(IA)->second;
820     unsigned InstValB = B.ValueToNumber.find(IB)->second;
821 
822     // Ensure that the mappings for the instructions exists.
823     if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
824                                   ValueNumberMappingB))
825       return false;
826 
827     if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
828                                   ValueNumberMappingA))
829       return false;
830 
831     // We have different paths for commutative instructions and non-commutative
832     // instructions since commutative instructions could allow multiple mappings
833     // to certain values.
834     if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
835         !isa<IntrinsicInst>(IA)) {
836       if (!compareCommutativeOperandMapping(
837               {A, OperValsA, ValueNumberMappingA},
838               {B, OperValsB, ValueNumberMappingB}))
839         return false;
840       continue;
841     }
842 
843     // Handle the non-commutative cases.
844     if (!compareNonCommutativeOperandMapping(
845             {A, OperValsA, ValueNumberMappingA},
846             {B, OperValsB, ValueNumberMappingB}))
847       return false;
848 
849     // Here we check that between two corresponding instructions,
850     // when referring to a basic block in the same region, the
851     // relative locations are the same. And, that the instructions refer to
852     // basic blocks outside the region in the same corresponding locations.
853 
854     // We are able to make the assumption about blocks outside of the region
855     // since the target block labels are considered values and will follow the
856     // same number matching that we defined for the other instructions in the
857     // region.  So, at this point, in each location we target a specific block
858     // outside the region, we are targeting a corresponding block in each
859     // analagous location in the region we are comparing to.
860     if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
861         !(isa<PHINode>(IA) && isa<PHINode>(IB)))
862       continue;
863 
864     SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
865     SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
866     ArrayRef<Value *> ABL = ItA->getBlockOperVals();
867     ArrayRef<Value *> BBL = ItB->getBlockOperVals();
868 
869     // Check to make sure that the number of operands, and branching locations
870     // between BranchInsts is the same.
871     if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
872         ABL.size() != BBL.size())
873       return false;
874 
875     assert(RelBlockLocsA.size() == ABL.size() &&
876            "Block information vectors not the same size.");
877     assert(RelBlockLocsB.size() == BBL.size() &&
878            "Block information vectors not the same size.");
879 
880     ZippedRelativeLocationsT ZippedRelativeLocations =
881         zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
882     if (any_of(ZippedRelativeLocations,
883                [&A, &B](std::tuple<int, int, Value *, Value *> R) {
884                  return !checkRelativeLocations(
885                      {A, std::get<0>(R), std::get<2>(R)},
886                      {B, std::get<1>(R), std::get<3>(R)});
887                }))
888       return false;
889   }
890   return true;
891 }
892 
overlap(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)893 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
894                                     const IRSimilarityCandidate &B) {
895   auto DoesOverlap = [](const IRSimilarityCandidate &X,
896                         const IRSimilarityCandidate &Y) {
897     // Check:
898     // XXXXXX        X starts before Y ends
899     //      YYYYYYY  Y starts after X starts
900     return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
901   };
902 
903   return DoesOverlap(A, B) || DoesOverlap(B, A);
904 }
905 
populateMapper(Module & M,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)906 void IRSimilarityIdentifier::populateMapper(
907     Module &M, std::vector<IRInstructionData *> &InstrList,
908     std::vector<unsigned> &IntegerMapping) {
909 
910   std::vector<IRInstructionData *> InstrListForModule;
911   std::vector<unsigned> IntegerMappingForModule;
912   // Iterate over the functions in the module to map each Instruction in each
913   // BasicBlock to an unsigned integer.
914   Mapper.initializeForBBs(M);
915 
916   for (Function &F : M) {
917 
918     if (F.empty())
919       continue;
920 
921     for (BasicBlock &BB : F) {
922 
923       // BB has potential to have similarity since it has a size greater than 2
924       // and can therefore match other regions greater than 2. Map it to a list
925       // of unsigned integers.
926       Mapper.convertToUnsignedVec(BB, InstrListForModule,
927                                   IntegerMappingForModule);
928     }
929 
930     BasicBlock::iterator It = F.begin()->end();
931     Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
932                                 true);
933     if (InstrListForModule.size() > 0)
934       Mapper.IDL->push_back(*InstrListForModule.back());
935   }
936 
937   // Insert the InstrListForModule at the end of the overall InstrList so that
938   // we can have a long InstrList for the entire set of Modules being analyzed.
939   llvm::append_range(InstrList, InstrListForModule);
940   // Do the same as above, but for IntegerMapping.
941   llvm::append_range(IntegerMapping, IntegerMappingForModule);
942 }
943 
populateMapper(ArrayRef<std::unique_ptr<Module>> & Modules,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)944 void IRSimilarityIdentifier::populateMapper(
945     ArrayRef<std::unique_ptr<Module>> &Modules,
946     std::vector<IRInstructionData *> &InstrList,
947     std::vector<unsigned> &IntegerMapping) {
948 
949   // Iterate over, and map the instructions in each module.
950   for (const std::unique_ptr<Module> &M : Modules)
951     populateMapper(*M, InstrList, IntegerMapping);
952 }
953 
954 /// From a repeated subsequence, find all the different instances of the
955 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
956 /// the IRInstructionData in subsequence.
957 ///
958 /// \param [in] Mapper - The instruction mapper for basic correctness checks.
959 /// \param [in] InstrList - The vector that holds the instruction data.
960 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
961 /// \param [out] CandsForRepSubstring - The vector to store the generated
962 /// IRSimilarityCandidates.
createCandidatesFromSuffixTree(const IRInstructionMapper & Mapper,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping,SuffixTree::RepeatedSubstring & RS,std::vector<IRSimilarityCandidate> & CandsForRepSubstring)963 static void createCandidatesFromSuffixTree(
964     const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
965     std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
966     std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
967 
968   unsigned StringLen = RS.Length;
969   if (StringLen < 2)
970     return;
971 
972   // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
973   for (const unsigned &StartIdx : RS.StartIndices) {
974     unsigned EndIdx = StartIdx + StringLen - 1;
975 
976     // Check that this subsequence does not contain an illegal instruction.
977     bool ContainsIllegal = false;
978     for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
979       unsigned Key = IntegerMapping[CurrIdx];
980       if (Key > Mapper.IllegalInstrNumber) {
981         ContainsIllegal = true;
982         break;
983       }
984     }
985 
986     // If we have an illegal instruction, we should not create an
987     // IRSimilarityCandidate for this region.
988     if (ContainsIllegal)
989       continue;
990 
991     // We are getting iterators to the instructions in this region of code
992     // by advancing the start and end indices from the start of the
993     // InstrList.
994     std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
995     std::advance(StartIt, StartIdx);
996     std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
997     std::advance(EndIt, EndIdx);
998 
999     CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1000   }
1001 }
1002 
createCanonicalRelationFrom(IRSimilarityCandidate & SourceCand,DenseMap<unsigned,DenseSet<unsigned>> & ToSourceMapping,DenseMap<unsigned,DenseSet<unsigned>> & FromSourceMapping)1003 void IRSimilarityCandidate::createCanonicalRelationFrom(
1004     IRSimilarityCandidate &SourceCand,
1005     DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1006     DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1007   assert(SourceCand.CanonNumToNumber.size() != 0 &&
1008          "Base canonical relationship is empty!");
1009   assert(SourceCand.NumberToCanonNum.size() != 0 &&
1010          "Base canonical relationship is empty!");
1011 
1012   assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1013   assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1014 
1015   DenseSet<unsigned> UsedGVNs;
1016   // Iterate over the mappings provided from this candidate to SourceCand.  We
1017   // are then able to map the GVN in this candidate to the same canonical number
1018   // given to the corresponding GVN in SourceCand.
1019   for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1020     unsigned SourceGVN = GVNMapping.first;
1021 
1022     assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1023 
1024     unsigned ResultGVN;
1025     // We need special handling if we have more than one potential value.  This
1026     // means that there are at least two GVNs that could correspond to this GVN.
1027     // This could lead to potential swapping later on, so we make a decision
1028     // here to ensure a one-to-one mapping.
1029     if (GVNMapping.second.size() > 1) {
1030       bool Found = false;
1031       for (unsigned Val : GVNMapping.second) {
1032         // We make sure the target value number hasn't already been reserved.
1033         if (UsedGVNs.contains(Val))
1034           continue;
1035 
1036         // We make sure that the opposite mapping is still consistent.
1037         DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
1038             FromSourceMapping.find(Val);
1039 
1040         if (!It->second.contains(SourceGVN))
1041           continue;
1042 
1043         // We pick the first item that satisfies these conditions.
1044         Found = true;
1045         ResultGVN = Val;
1046         break;
1047       }
1048 
1049       assert(Found && "Could not find matching value for source GVN");
1050       (void)Found;
1051 
1052     } else
1053       ResultGVN = *GVNMapping.second.begin();
1054 
1055     // Whatever GVN is found, we mark it as used.
1056     UsedGVNs.insert(ResultGVN);
1057 
1058     unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1059     CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1060     NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1061   }
1062 
1063   DenseSet<BasicBlock *> BBSet;
1064   getBasicBlocks(BBSet);
1065   // Find canonical numbers for the BasicBlocks in the current candidate.
1066   // This is done by finding the corresponding value for the first instruction
1067   // in the block in the current candidate, finding the matching value in the
1068   // source candidate.  Then by finding the parent of this value, use the
1069   // canonical number of the block in the source candidate for the canonical
1070   // number in the current candidate.
1071   for (BasicBlock *BB : BBSet) {
1072     unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1073 
1074     // We can skip the BasicBlock if the canonical numbering has already been
1075     // found in a separate instruction.
1076     if (NumberToCanonNum.contains(BBGVNForCurrCand))
1077       continue;
1078 
1079     // If the basic block is the starting block, then the shared instruction may
1080     // not be the first instruction in the block, it will be the first
1081     // instruction in the similarity region.
1082     Value *FirstOutlineInst = BB == getStartBB()
1083                                   ? frontInstruction()
1084                                   : &*BB->instructionsWithoutDebug().begin();
1085 
1086     unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1087     unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1088     unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1089     Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1090     BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1091     unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1092     unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1093     CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1094     NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1095   }
1096 }
1097 
createCanonicalRelationFrom(IRSimilarityCandidate & SourceCand,IRSimilarityCandidate & SourceCandLarge,IRSimilarityCandidate & TargetCandLarge)1098 void IRSimilarityCandidate::createCanonicalRelationFrom(
1099     IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1100     IRSimilarityCandidate &TargetCandLarge) {
1101   assert(!SourceCand.CanonNumToNumber.empty() &&
1102          "Canonical Relationship is non-empty");
1103   assert(!SourceCand.NumberToCanonNum.empty() &&
1104          "Canonical Relationship is non-empty");
1105 
1106   assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1107          "Canonical Relationship is non-empty");
1108   assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1109          "Canonical Relationship is non-empty");
1110 
1111   assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1112          "Canonical Relationship is non-empty");
1113   assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1114          "Canonical Relationship is non-empty");
1115 
1116   assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1117   assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1118 
1119   // We're going to use the larger candidates as a "bridge" to create the
1120   // canonical number for the target candidate since we have idetified two
1121   // candidates as subsequences of larger sequences, and therefore must be
1122   // structurally similar.
1123   for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1124     Value *CurrVal = ValueNumPair.first;
1125     unsigned TargetCandGVN = ValueNumPair.second;
1126 
1127     // Find the numbering in the large candidate that surrounds the
1128     // current candidate.
1129     std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1130     assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1131 
1132     // Get the canonical numbering in the large target candidate.
1133     std::optional<unsigned> OTargetCandCanon =
1134         TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1135     assert(OTargetCandCanon.has_value() &&
1136            "Canononical Number not found for GVN");
1137 
1138     // Get the GVN in the large source candidate from the canonical numbering.
1139     std::optional<unsigned> OLargeSourceGVN =
1140         SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1141     assert(OLargeSourceGVN.has_value() &&
1142            "GVN Number not found for Canonical Number");
1143 
1144     // Get the Value from the GVN in the large source candidate.
1145     std::optional<Value *> OLargeSourceV =
1146         SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1147     assert(OLargeSourceV.has_value() && "Value not found for GVN");
1148 
1149     // Get the GVN number for the Value in the source candidate.
1150     std::optional<unsigned> OSourceGVN =
1151         SourceCand.getGVN(OLargeSourceV.value());
1152     assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1153 
1154     // Get the canonical numbering from the GVN/
1155     std::optional<unsigned> OSourceCanon =
1156         SourceCand.getCanonicalNum(OSourceGVN.value());
1157     assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1158 
1159     // Insert the canonical numbering and GVN pair into their respective
1160     // mappings.
1161     CanonNumToNumber.insert(
1162         std::make_pair(OSourceCanon.value(), TargetCandGVN));
1163     NumberToCanonNum.insert(
1164         std::make_pair(TargetCandGVN, OSourceCanon.value()));
1165   }
1166 }
1167 
createCanonicalMappingFor(IRSimilarityCandidate & CurrCand)1168 void IRSimilarityCandidate::createCanonicalMappingFor(
1169     IRSimilarityCandidate &CurrCand) {
1170   assert(CurrCand.CanonNumToNumber.size() == 0 &&
1171          "Canonical Relationship is non-empty");
1172   assert(CurrCand.NumberToCanonNum.size() == 0 &&
1173          "Canonical Relationship is non-empty");
1174 
1175   unsigned CanonNum = 0;
1176   // Iterate over the value numbers found, the order does not matter in this
1177   // case.
1178   for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1179     CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1180     CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1181     CanonNum++;
1182   }
1183 }
1184 
1185 /// Look for larger IRSimilarityCandidates From the previously matched
1186 /// IRSimilarityCandidates that fully contain \p CandA or \p CandB.  If there is
1187 /// an overlap, return a pair of structurally similar, larger
1188 /// IRSimilarityCandidates.
1189 ///
1190 /// \param [in] CandA - The first candidate we are trying to determine the
1191 /// structure of.
1192 /// \param [in] CandB - The second candidate we are trying to determine the
1193 /// structure of.
1194 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1195 /// a circuit to the IRSimilarityCandidates that include this instruction.
1196 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1197 /// number representing the structural group assigned to it.
1198 static std::optional<
1199     std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
CheckLargerCands(IRSimilarityCandidate & CandA,IRSimilarityCandidate & CandB,DenseMap<unsigned,DenseSet<IRSimilarityCandidate * >> & IndexToIncludedCand,DenseMap<IRSimilarityCandidate *,unsigned> & CandToGroup)1200 CheckLargerCands(
1201     IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
1202     DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1203     DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
1204   DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1205   DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1206   DenseSet<unsigned> IncludedGroupsA;
1207   DenseSet<unsigned> IncludedGroupsB;
1208 
1209   // Find the overall similarity group numbers that fully contain the candidate,
1210   // and record the larger candidate for each group.
1211   auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1212   std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1213       Result;
1214 
1215   unsigned CandAStart = CandA.getStartIdx();
1216   unsigned CandAEnd = CandA.getEndIdx();
1217   unsigned CandBStart = CandB.getStartIdx();
1218   unsigned CandBEnd = CandB.getEndIdx();
1219   if (IdxToCandidateIt == IndexToIncludedCand.end())
1220     return Result;
1221   for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1222     if (MatchedCand->getStartIdx() > CandAStart ||
1223         (MatchedCand->getEndIdx() < CandAEnd))
1224       continue;
1225     unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1226     IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1227     IncludedGroupsA.insert(GroupNum);
1228   }
1229 
1230   // Find the overall similarity group numbers that fully contain the next
1231   // candidate, and record the larger candidate for each group.
1232   IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1233   if (IdxToCandidateIt == IndexToIncludedCand.end())
1234     return Result;
1235   for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1236     if (MatchedCand->getStartIdx() > CandBStart ||
1237         MatchedCand->getEndIdx() < CandBEnd)
1238       continue;
1239     unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1240     IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1241     IncludedGroupsB.insert(GroupNum);
1242   }
1243 
1244   // Find the intersection between the two groups, these are the groups where
1245   // the larger candidates exist.
1246   set_intersect(IncludedGroupsA, IncludedGroupsB);
1247 
1248   // If there is no intersection between the sets, then we cannot determine
1249   // whether or not there is a match.
1250   if (IncludedGroupsA.empty())
1251     return Result;
1252 
1253   // Create a pair that contains the larger candidates.
1254   auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1255   auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1256   Result = std::make_pair(ItA->second, ItB->second);
1257   return Result;
1258 }
1259 
1260 /// From the list of IRSimilarityCandidates, perform a comparison between each
1261 /// IRSimilarityCandidate to determine if there are overlapping
1262 /// IRInstructionData, or if they do not have the same structure.
1263 ///
1264 /// \param [in] CandsForRepSubstring - The vector containing the
1265 /// IRSimilarityCandidates.
1266 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1267 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1268 /// vector are structurally similar to one another.
1269 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1270 /// a circuit to the IRSimilarityCandidates that include this instruction.
1271 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1272 /// number representing the structural group assigned to it.
findCandidateStructures(std::vector<IRSimilarityCandidate> & CandsForRepSubstring,DenseMap<unsigned,SimilarityGroup> & StructuralGroups,DenseMap<unsigned,DenseSet<IRSimilarityCandidate * >> & IndexToIncludedCand,DenseMap<IRSimilarityCandidate *,unsigned> & CandToOverallGroup)1273 static void findCandidateStructures(
1274     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1275     DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1276     DenseMap<unsigned,  DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1277     DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
1278     ) {
1279   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1280       InnerCandEndIt;
1281 
1282   // IRSimilarityCandidates each have a structure for operand use.  It is
1283   // possible that two instances of the same subsequences have different
1284   // structure. Each type of structure found is assigned a number.  This
1285   // DenseMap maps an IRSimilarityCandidate to which type of similarity
1286   // discovered it fits within.
1287   DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1288 
1289   // Find the compatibility from each candidate to the others to determine
1290   // which candidates overlap and which have the same structure by mapping
1291   // each structure to a different group.
1292   bool SameStructure;
1293   bool Inserted;
1294   unsigned CurrentGroupNum = 0;
1295   unsigned OuterGroupNum;
1296   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1297   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1298   DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1299 
1300   // Iterate over the candidates to determine its structural and overlapping
1301   // compatibility with other instructions
1302   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1303   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1304   for (CandIt = CandsForRepSubstring.begin(),
1305       CandEndIt = CandsForRepSubstring.end();
1306        CandIt != CandEndIt; CandIt++) {
1307 
1308     // Determine if it has an assigned structural group already.
1309     // If not, we assign it one, and add it to our mapping.
1310     std::tie(CandToGroupIt, Inserted) =
1311         CandToGroup.try_emplace(&*CandIt, CurrentGroupNum);
1312     if (Inserted)
1313       ++CurrentGroupNum;
1314 
1315     // Get the structural group number from the iterator.
1316     OuterGroupNum = CandToGroupIt->second;
1317 
1318     // Check if we already have a list of IRSimilarityCandidates for the current
1319     // structural group.  Create one if one does not exist.
1320     CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1321     if (CurrentGroupPair == StructuralGroups.end()) {
1322       IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1323       std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1324           std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1325     }
1326 
1327     // Iterate over the IRSimilarityCandidates following the current
1328     // IRSimilarityCandidate in the list to determine whether the two
1329     // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs
1330     // of IRSimilarityCandidates.
1331     for (InnerCandIt = std::next(CandIt),
1332         InnerCandEndIt = CandsForRepSubstring.end();
1333          InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1334 
1335       // We check if the inner item has a group already, if it does, we skip it.
1336       CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1337       if (CandToGroupItInner != CandToGroup.end())
1338         continue;
1339 
1340       // Check if we have found structural similarity between two candidates
1341       // that fully contains the first and second candidates.
1342       std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1343           LargerPair = CheckLargerCands(
1344               *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1345 
1346       // If a pair was found, it means that we can assume that these smaller
1347       // substrings are also structurally similar.  Use the larger candidates to
1348       // determine the canonical mapping between the two sections.
1349       if (LargerPair.has_value()) {
1350         SameStructure = true;
1351         InnerCandIt->createCanonicalRelationFrom(
1352             *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1353         CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1354         CurrentGroupPair->second.push_back(*InnerCandIt);
1355         continue;
1356       }
1357 
1358       // Otherwise we determine if they have the same structure and add it to
1359       // vector if they match.
1360       ValueNumberMappingA.clear();
1361       ValueNumberMappingB.clear();
1362       SameStructure = IRSimilarityCandidate::compareStructure(
1363           *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1364       if (!SameStructure)
1365         continue;
1366 
1367       InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1368                                                ValueNumberMappingB);
1369       CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1370       CurrentGroupPair->second.push_back(*InnerCandIt);
1371     }
1372   }
1373 }
1374 
findCandidates(std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)1375 void IRSimilarityIdentifier::findCandidates(
1376     std::vector<IRInstructionData *> &InstrList,
1377     std::vector<unsigned> &IntegerMapping) {
1378   SuffixTree ST(IntegerMapping);
1379 
1380   std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1381   std::vector<SimilarityGroup> NewCandidateGroups;
1382 
1383   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1384   DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1385   DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1386 
1387   // Iterate over the subsequences found by the Suffix Tree to create
1388   // IRSimilarityCandidates for each repeated subsequence and determine which
1389   // instances are structurally similar to one another.
1390 
1391   // Sort the suffix tree from longest substring to shortest.
1392   std::vector<SuffixTree::RepeatedSubstring> RSes;
1393   for (SuffixTree::RepeatedSubstring &RS : ST)
1394     RSes.push_back(RS);
1395 
1396   llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1397                              const SuffixTree::RepeatedSubstring &RHS) {
1398     return LHS.Length > RHS.Length;
1399   });
1400   for (SuffixTree::RepeatedSubstring &RS : RSes) {
1401     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1402                                    CandsForRepSubstring);
1403 
1404     if (CandsForRepSubstring.size() < 2)
1405       continue;
1406 
1407     findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1408                             IndexToIncludedCand, CandToGroup);
1409     for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1410       // We only add the group if it contains more than one
1411       // IRSimilarityCandidate.  If there is only one, that means there is no
1412       // other repeated subsequence with the same structure.
1413       if (Group.second.size() > 1) {
1414         SimilarityCandidates->push_back(Group.second);
1415         // Iterate over each candidate in the group, and add an entry for each
1416         // instruction included with a mapping to a set of
1417         // IRSimilarityCandidates that include that instruction.
1418         for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1419           for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1420                Idx <= Edx; ++Idx)
1421             IndexToIncludedCand[Idx].insert(&IRCand);
1422           // Add mapping of candidate to the overall similarity group number.
1423           CandToGroup.insert(
1424               std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1425         }
1426       }
1427     }
1428 
1429     CandsForRepSubstring.clear();
1430     StructuralGroups.clear();
1431     NewCandidateGroups.clear();
1432   }
1433 }
1434 
findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules)1435 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1436     ArrayRef<std::unique_ptr<Module>> Modules) {
1437   resetSimilarityCandidates();
1438 
1439   std::vector<IRInstructionData *> InstrList;
1440   std::vector<unsigned> IntegerMapping;
1441   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1442   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1443   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1444   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1445   Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1446 
1447   populateMapper(Modules, InstrList, IntegerMapping);
1448   findCandidates(InstrList, IntegerMapping);
1449 
1450   return *SimilarityCandidates;
1451 }
1452 
findSimilarity(Module & M)1453 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1454   resetSimilarityCandidates();
1455   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1456   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1457   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1458   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1459   Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1460 
1461   std::vector<IRInstructionData *> InstrList;
1462   std::vector<unsigned> IntegerMapping;
1463 
1464   populateMapper(M, InstrList, IntegerMapping);
1465   findCandidates(InstrList, IntegerMapping);
1466 
1467   return *SimilarityCandidates;
1468 }
1469 
1470 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1471                 "ir-similarity-identifier", false, true)
1472 
IRSimilarityIdentifierWrapperPass()1473 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1474     : ModulePass(ID) {}
1475 
doInitialization(Module & M)1476 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1477   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1478                                         MatchCallsByName, !DisableIntrinsics,
1479                                         false));
1480   return false;
1481 }
1482 
doFinalization(Module & M)1483 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1484   IRSI.reset();
1485   return false;
1486 }
1487 
runOnModule(Module & M)1488 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1489   IRSI->findSimilarity(M);
1490   return false;
1491 }
1492 
1493 AnalysisKey IRSimilarityAnalysis::Key;
run(Module & M,ModuleAnalysisManager &)1494 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1495                                                  ModuleAnalysisManager &) {
1496   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1497                                      MatchCallsByName, !DisableIntrinsics,
1498                                      false);
1499   IRSI.findSimilarity(M);
1500   return IRSI;
1501 }
1502 
1503 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)1504 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1505   IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1506   std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1507       IRSI.getSimilarity();
1508 
1509   for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1510     OS << CandVec.size() << " candidates of length "
1511        << CandVec.begin()->getLength() << ".  Found in: \n";
1512     for (IRSimilarityCandidate &Cand : CandVec) {
1513       OS << "  Function: " << Cand.front()->Inst->getFunction()->getName().str()
1514          << ", Basic Block: ";
1515       if (Cand.front()->Inst->getParent()->getName().str() == "")
1516         OS << "(unnamed)";
1517       else
1518         OS << Cand.front()->Inst->getParent()->getName().str();
1519       OS << "\n    Start Instruction: ";
1520       Cand.frontInstruction()->print(OS);
1521       OS << "\n      End Instruction: ";
1522       Cand.backInstruction()->print(OS);
1523       OS << "\n";
1524     }
1525   }
1526 
1527   return PreservedAnalyses::all();
1528 }
1529 
1530 char IRSimilarityIdentifierWrapperPass::ID = 0;
1531