xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp (revision 1fd87a682ad7442327078e1eeb63edc4258f9815)
1e8d8bef9SDimitry Andric //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // \file
10e8d8bef9SDimitry Andric // Implementation file for the IRSimilarityIdentifier for identifying
11e8d8bef9SDimitry Andric // similarities in IR including the IRInstructionMapper.
12e8d8bef9SDimitry Andric //
13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
14e8d8bef9SDimitry Andric 
15e8d8bef9SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h"
16e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h"
17e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h"
18e8d8bef9SDimitry Andric #include "llvm/IR/Operator.h"
19e8d8bef9SDimitry Andric #include "llvm/IR/User.h"
20e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
21e8d8bef9SDimitry Andric #include "llvm/Support/SuffixTree.h"
22e8d8bef9SDimitry Andric 
23e8d8bef9SDimitry Andric using namespace llvm;
24e8d8bef9SDimitry Andric using namespace IRSimilarity;
25e8d8bef9SDimitry Andric 
2604eeddc0SDimitry Andric namespace llvm {
27349cc55cSDimitry Andric cl::opt<bool>
28349cc55cSDimitry Andric     DisableBranches("no-ir-sim-branch-matching", cl::init(false),
29349cc55cSDimitry Andric                     cl::ReallyHidden,
30349cc55cSDimitry Andric                     cl::desc("disable similarity matching, and outlining, "
31349cc55cSDimitry Andric                              "across branches for debugging purposes."));
32349cc55cSDimitry Andric 
3304eeddc0SDimitry Andric cl::opt<bool>
3404eeddc0SDimitry Andric     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
3504eeddc0SDimitry Andric                          cl::ReallyHidden,
3604eeddc0SDimitry Andric                          cl::desc("disable outlining indirect calls."));
3704eeddc0SDimitry Andric 
3804eeddc0SDimitry Andric cl::opt<bool>
3904eeddc0SDimitry Andric     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
4004eeddc0SDimitry Andric                      cl::desc("only allow matching call instructions if the "
4104eeddc0SDimitry Andric                               "name and type signature match."));
42*1fd87a68SDimitry Andric 
43*1fd87a68SDimitry Andric cl::opt<bool>
44*1fd87a68SDimitry Andric     DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
45*1fd87a68SDimitry Andric                       cl::desc("Don't match or outline intrinsics"));
4604eeddc0SDimitry Andric } // namespace llvm
4704eeddc0SDimitry Andric 
48e8d8bef9SDimitry Andric IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
49e8d8bef9SDimitry Andric                                      IRInstructionDataList &IDList)
50e8d8bef9SDimitry Andric     : Inst(&I), Legal(Legality), IDL(&IDList) {
51349cc55cSDimitry Andric   initializeInstruction();
52349cc55cSDimitry Andric }
53349cc55cSDimitry Andric 
54349cc55cSDimitry Andric void IRInstructionData::initializeInstruction() {
55e8d8bef9SDimitry Andric   // We check for whether we have a comparison instruction.  If it is, we
56e8d8bef9SDimitry Andric   // find the "less than" version of the predicate for consistency for
57e8d8bef9SDimitry Andric   // comparison instructions throught the program.
58349cc55cSDimitry Andric   if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
59e8d8bef9SDimitry Andric     CmpInst::Predicate Predicate = predicateForConsistency(C);
60e8d8bef9SDimitry Andric     if (Predicate != C->getPredicate())
61e8d8bef9SDimitry Andric       RevisedPredicate = Predicate;
62e8d8bef9SDimitry Andric   }
63e8d8bef9SDimitry Andric 
64e8d8bef9SDimitry Andric   // Here we collect the operands and their types for determining whether
65e8d8bef9SDimitry Andric   // the structure of the operand use matches between two different candidates.
66349cc55cSDimitry Andric   for (Use &OI : Inst->operands()) {
67349cc55cSDimitry Andric     if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
68e8d8bef9SDimitry Andric       // If we have a CmpInst where the predicate is reversed, it means the
69e8d8bef9SDimitry Andric       // operands must be reversed as well.
70e8d8bef9SDimitry Andric       OperVals.insert(OperVals.begin(), OI.get());
71e8d8bef9SDimitry Andric       continue;
72e8d8bef9SDimitry Andric     }
73e8d8bef9SDimitry Andric 
74e8d8bef9SDimitry Andric     OperVals.push_back(OI.get());
75e8d8bef9SDimitry Andric   }
7604eeddc0SDimitry Andric 
7704eeddc0SDimitry Andric   // We capture the incoming BasicBlocks as values as well as the incoming
7804eeddc0SDimitry Andric   // Values in order to check for structural similarity.
7904eeddc0SDimitry Andric   if (PHINode *PN = dyn_cast<PHINode>(Inst))
8004eeddc0SDimitry Andric     for (BasicBlock *BB : PN->blocks())
8104eeddc0SDimitry Andric       OperVals.push_back(BB);
82e8d8bef9SDimitry Andric }
83e8d8bef9SDimitry Andric 
84349cc55cSDimitry Andric IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
8504eeddc0SDimitry Andric     : IDL(&IDList) {}
86349cc55cSDimitry Andric 
87349cc55cSDimitry Andric void IRInstructionData::setBranchSuccessors(
88349cc55cSDimitry Andric     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89349cc55cSDimitry Andric   assert(isa<BranchInst>(Inst) && "Instruction must be branch");
90349cc55cSDimitry Andric 
91349cc55cSDimitry Andric   BranchInst *BI = cast<BranchInst>(Inst);
92349cc55cSDimitry Andric   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
93349cc55cSDimitry Andric 
94349cc55cSDimitry Andric   BBNumIt = BasicBlockToInteger.find(BI->getParent());
95349cc55cSDimitry Andric   assert(BBNumIt != BasicBlockToInteger.end() &&
96349cc55cSDimitry Andric          "Could not find location for BasicBlock!");
97349cc55cSDimitry Andric 
98349cc55cSDimitry Andric   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
99349cc55cSDimitry Andric 
100349cc55cSDimitry Andric   for (BasicBlock *Successor : BI->successors()) {
101349cc55cSDimitry Andric     BBNumIt = BasicBlockToInteger.find(Successor);
102349cc55cSDimitry Andric     assert(BBNumIt != BasicBlockToInteger.end() &&
103349cc55cSDimitry Andric            "Could not find number for BasicBlock!");
104349cc55cSDimitry Andric     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
105349cc55cSDimitry Andric 
106349cc55cSDimitry Andric     int Relative = OtherBlockNumber - CurrentBlockNumber;
107349cc55cSDimitry Andric     RelativeBlockLocations.push_back(Relative);
108349cc55cSDimitry Andric   }
109349cc55cSDimitry Andric }
110349cc55cSDimitry Andric 
11104eeddc0SDimitry Andric void IRInstructionData::setCalleeName(bool MatchByName) {
11204eeddc0SDimitry Andric   CallInst *CI = dyn_cast<CallInst>(Inst);
11304eeddc0SDimitry Andric   assert(CI && "Instruction must be call");
11404eeddc0SDimitry Andric 
11504eeddc0SDimitry Andric   CalleeName = "";
116*1fd87a68SDimitry Andric   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
117*1fd87a68SDimitry Andric     // To hash intrinsics, we use the opcode, and types like the other
118*1fd87a68SDimitry Andric     // instructions, but also, the Intrinsic ID, and the Name of the
119*1fd87a68SDimitry Andric     // intrinsic.
120*1fd87a68SDimitry Andric     Intrinsic::ID IntrinsicID = II->getIntrinsicID();
121*1fd87a68SDimitry Andric     FunctionType *FT = II->getFunctionType();
122*1fd87a68SDimitry Andric     // If there is an overloaded name, we have to use the complex version
123*1fd87a68SDimitry Andric     // of getName to get the entire string.
124*1fd87a68SDimitry Andric     if (Intrinsic::isOverloaded(IntrinsicID))
125*1fd87a68SDimitry Andric       CalleeName =
126*1fd87a68SDimitry Andric           Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
127*1fd87a68SDimitry Andric     // If there is not an overloaded name, we only need to use this version.
128*1fd87a68SDimitry Andric     else
129*1fd87a68SDimitry Andric       CalleeName = Intrinsic::getName(IntrinsicID).str();
130*1fd87a68SDimitry Andric 
131*1fd87a68SDimitry Andric     return;
132*1fd87a68SDimitry Andric   }
133*1fd87a68SDimitry Andric 
13404eeddc0SDimitry Andric   if (!CI->isIndirectCall() && MatchByName)
13504eeddc0SDimitry Andric     CalleeName = CI->getCalledFunction()->getName().str();
13604eeddc0SDimitry Andric }
13704eeddc0SDimitry Andric 
13804eeddc0SDimitry Andric void IRInstructionData::setPHIPredecessors(
13904eeddc0SDimitry Andric     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
14004eeddc0SDimitry Andric   assert(isa<PHINode>(Inst) && "Instruction must be phi node");
14104eeddc0SDimitry Andric 
14204eeddc0SDimitry Andric   PHINode *PN = cast<PHINode>(Inst);
14304eeddc0SDimitry Andric   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
14404eeddc0SDimitry Andric 
14504eeddc0SDimitry Andric   BBNumIt = BasicBlockToInteger.find(PN->getParent());
14604eeddc0SDimitry Andric   assert(BBNumIt != BasicBlockToInteger.end() &&
14704eeddc0SDimitry Andric          "Could not find location for BasicBlock!");
14804eeddc0SDimitry Andric 
14904eeddc0SDimitry Andric   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
15004eeddc0SDimitry Andric 
15104eeddc0SDimitry Andric   // Convert the incoming blocks of the PHINode to an integer value, based on
15204eeddc0SDimitry Andric   // the relative distances between the current block and the incoming block.
15304eeddc0SDimitry Andric   for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
15404eeddc0SDimitry Andric     BasicBlock *Incoming = PN->getIncomingBlock(Idx);
15504eeddc0SDimitry Andric     BBNumIt = BasicBlockToInteger.find(Incoming);
15604eeddc0SDimitry Andric     assert(BBNumIt != BasicBlockToInteger.end() &&
15704eeddc0SDimitry Andric            "Could not find number for BasicBlock!");
15804eeddc0SDimitry Andric     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
15904eeddc0SDimitry Andric 
16004eeddc0SDimitry Andric     int Relative = OtherBlockNumber - CurrentBlockNumber;
16104eeddc0SDimitry Andric     RelativeBlockLocations.push_back(Relative);
16204eeddc0SDimitry Andric     RelativeBlockLocations.push_back(Relative);
16304eeddc0SDimitry Andric   }
16404eeddc0SDimitry Andric }
16504eeddc0SDimitry Andric 
166e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
167e8d8bef9SDimitry Andric   switch (CI->getPredicate()) {
168e8d8bef9SDimitry Andric   case CmpInst::FCMP_OGT:
169e8d8bef9SDimitry Andric   case CmpInst::FCMP_UGT:
170e8d8bef9SDimitry Andric   case CmpInst::FCMP_OGE:
171e8d8bef9SDimitry Andric   case CmpInst::FCMP_UGE:
172e8d8bef9SDimitry Andric   case CmpInst::ICMP_SGT:
173e8d8bef9SDimitry Andric   case CmpInst::ICMP_UGT:
174e8d8bef9SDimitry Andric   case CmpInst::ICMP_SGE:
175e8d8bef9SDimitry Andric   case CmpInst::ICMP_UGE:
176e8d8bef9SDimitry Andric     return CI->getSwappedPredicate();
177e8d8bef9SDimitry Andric   default:
178e8d8bef9SDimitry Andric     return CI->getPredicate();
179e8d8bef9SDimitry Andric   }
180e8d8bef9SDimitry Andric }
181e8d8bef9SDimitry Andric 
182e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::getPredicate() const {
183e8d8bef9SDimitry Andric   assert(isa<CmpInst>(Inst) &&
184e8d8bef9SDimitry Andric          "Can only get a predicate from a compare instruction");
185e8d8bef9SDimitry Andric 
186e8d8bef9SDimitry Andric   if (RevisedPredicate.hasValue())
187e8d8bef9SDimitry Andric     return RevisedPredicate.getValue();
188e8d8bef9SDimitry Andric 
189e8d8bef9SDimitry Andric   return cast<CmpInst>(Inst)->getPredicate();
190e8d8bef9SDimitry Andric }
191e8d8bef9SDimitry Andric 
19204eeddc0SDimitry Andric StringRef IRInstructionData::getCalleeName() const {
19304eeddc0SDimitry Andric   assert(isa<CallInst>(Inst) &&
19404eeddc0SDimitry Andric          "Can only get a name from a call instruction");
195e8d8bef9SDimitry Andric 
19604eeddc0SDimitry Andric   assert(CalleeName.hasValue() && "CalleeName has not been set");
19704eeddc0SDimitry Andric 
19804eeddc0SDimitry Andric   return *CalleeName;
199e8d8bef9SDimitry Andric }
200e8d8bef9SDimitry Andric 
201e8d8bef9SDimitry Andric bool IRSimilarity::isClose(const IRInstructionData &A,
202e8d8bef9SDimitry Andric                            const IRInstructionData &B) {
203e8d8bef9SDimitry Andric 
204e8d8bef9SDimitry Andric   if (!A.Legal || !B.Legal)
205e8d8bef9SDimitry Andric     return false;
206e8d8bef9SDimitry Andric 
207e8d8bef9SDimitry Andric   // Check if we are performing the same sort of operation on the same types
208e8d8bef9SDimitry Andric   // but not on the same values.
209e8d8bef9SDimitry Andric   if (!A.Inst->isSameOperationAs(B.Inst)) {
210e8d8bef9SDimitry Andric     // If there is a predicate, this means that either there is a swapped
211e8d8bef9SDimitry Andric     // predicate, or that the types are different, we want to make sure that
212e8d8bef9SDimitry Andric     // the predicates are equivalent via swapping.
213e8d8bef9SDimitry Andric     if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
214e8d8bef9SDimitry Andric 
215e8d8bef9SDimitry Andric       if (A.getPredicate() != B.getPredicate())
216e8d8bef9SDimitry Andric         return false;
217e8d8bef9SDimitry Andric 
218e8d8bef9SDimitry Andric       // If the predicates are the same via swap, make sure that the types are
219e8d8bef9SDimitry Andric       // still the same.
220e8d8bef9SDimitry Andric       auto ZippedTypes = zip(A.OperVals, B.OperVals);
221e8d8bef9SDimitry Andric 
222e8d8bef9SDimitry Andric       return all_of(
223e8d8bef9SDimitry Andric           ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
224e8d8bef9SDimitry Andric             return std::get<0>(R)->getType() == std::get<1>(R)->getType();
225e8d8bef9SDimitry Andric           });
226e8d8bef9SDimitry Andric     }
227e8d8bef9SDimitry Andric 
228e8d8bef9SDimitry Andric     return false;
229e8d8bef9SDimitry Andric   }
230e8d8bef9SDimitry Andric 
231e8d8bef9SDimitry Andric   // Since any GEP Instruction operands after the first operand cannot be
232e8d8bef9SDimitry Andric   // defined by a register, we must make sure that the operands after the first
233e8d8bef9SDimitry Andric   // are the same in the two instructions
234e8d8bef9SDimitry Andric   if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
235e8d8bef9SDimitry Andric     auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
236e8d8bef9SDimitry Andric 
237e8d8bef9SDimitry Andric     // If the instructions do not have the same inbounds restrictions, we do
238e8d8bef9SDimitry Andric     // not consider them the same.
239e8d8bef9SDimitry Andric     if (GEP->isInBounds() != OtherGEP->isInBounds())
240e8d8bef9SDimitry Andric       return false;
241e8d8bef9SDimitry Andric 
242e8d8bef9SDimitry Andric     auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
243e8d8bef9SDimitry Andric 
244e8d8bef9SDimitry Andric     // We increment here since we do not care about the first instruction,
245e8d8bef9SDimitry Andric     // we only care about the following operands since they must be the
246e8d8bef9SDimitry Andric     // exact same to be considered similar.
247e8d8bef9SDimitry Andric     return all_of(drop_begin(ZippedOperands),
248e8d8bef9SDimitry Andric                   [](std::tuple<llvm::Use &, llvm::Use &> R) {
249e8d8bef9SDimitry Andric                     return std::get<0>(R) == std::get<1>(R);
250e8d8bef9SDimitry Andric                   });
251e8d8bef9SDimitry Andric   }
252e8d8bef9SDimitry Andric 
25304eeddc0SDimitry Andric   // If the instructions are functions calls, we make sure that the function
25404eeddc0SDimitry Andric   // name is the same.  We already know that the types are since is
25504eeddc0SDimitry Andric   // isSameOperationAs is true.
256e8d8bef9SDimitry Andric   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
257*1fd87a68SDimitry Andric     if (A.getCalleeName().str() != B.getCalleeName().str())
258e8d8bef9SDimitry Andric       return false;
259e8d8bef9SDimitry Andric   }
260e8d8bef9SDimitry Andric 
261349cc55cSDimitry Andric   if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
262349cc55cSDimitry Andric       A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
263349cc55cSDimitry Andric     return false;
264349cc55cSDimitry Andric 
265e8d8bef9SDimitry Andric   return true;
266e8d8bef9SDimitry Andric }
267e8d8bef9SDimitry Andric 
268e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated
269e8d8bef9SDimitry Andric // into the same interface.
270e8d8bef9SDimitry Andric void IRInstructionMapper::convertToUnsignedVec(
271e8d8bef9SDimitry Andric     BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
272e8d8bef9SDimitry Andric     std::vector<unsigned> &IntegerMapping) {
273e8d8bef9SDimitry Andric   BasicBlock::iterator It = BB.begin();
274e8d8bef9SDimitry Andric 
275e8d8bef9SDimitry Andric   std::vector<unsigned> IntegerMappingForBB;
276e8d8bef9SDimitry Andric   std::vector<IRInstructionData *> InstrListForBB;
277e8d8bef9SDimitry Andric 
278e8d8bef9SDimitry Andric   for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
279e8d8bef9SDimitry Andric     switch (InstClassifier.visit(*It)) {
280e8d8bef9SDimitry Andric     case InstrType::Legal:
281e8d8bef9SDimitry Andric       mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
282e8d8bef9SDimitry Andric       break;
283e8d8bef9SDimitry Andric     case InstrType::Illegal:
284e8d8bef9SDimitry Andric       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
285e8d8bef9SDimitry Andric       break;
286e8d8bef9SDimitry Andric     case InstrType::Invisible:
287e8d8bef9SDimitry Andric       AddedIllegalLastTime = false;
288e8d8bef9SDimitry Andric       break;
289e8d8bef9SDimitry Andric     }
290e8d8bef9SDimitry Andric   }
291e8d8bef9SDimitry Andric 
292e8d8bef9SDimitry Andric   if (HaveLegalRange) {
293349cc55cSDimitry Andric     if (AddedIllegalLastTime)
294e8d8bef9SDimitry Andric       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
295fe6060f1SDimitry Andric     for (IRInstructionData *ID : InstrListForBB)
296fe6060f1SDimitry Andric       this->IDL->push_back(*ID);
297e8d8bef9SDimitry Andric     llvm::append_range(InstrList, InstrListForBB);
298e8d8bef9SDimitry Andric     llvm::append_range(IntegerMapping, IntegerMappingForBB);
299e8d8bef9SDimitry Andric   }
300e8d8bef9SDimitry Andric }
301e8d8bef9SDimitry Andric 
302e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated
303e8d8bef9SDimitry Andric // into the same interface.
304e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToLegalUnsigned(
305e8d8bef9SDimitry Andric     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
306e8d8bef9SDimitry Andric     std::vector<IRInstructionData *> &InstrListForBB) {
307e8d8bef9SDimitry Andric   // We added something legal, so we should unset the AddedLegalLastTime
308e8d8bef9SDimitry Andric   // flag.
309e8d8bef9SDimitry Andric   AddedIllegalLastTime = false;
310e8d8bef9SDimitry Andric 
311e8d8bef9SDimitry Andric   // If we have at least two adjacent legal instructions (which may have
312e8d8bef9SDimitry Andric   // invisible instructions in between), remember that.
313e8d8bef9SDimitry Andric   if (CanCombineWithPrevInstr)
314e8d8bef9SDimitry Andric     HaveLegalRange = true;
315e8d8bef9SDimitry Andric   CanCombineWithPrevInstr = true;
316e8d8bef9SDimitry Andric 
317e8d8bef9SDimitry Andric   // Get the integer for this instruction or give it the current
318e8d8bef9SDimitry Andric   // LegalInstrNumber.
319e8d8bef9SDimitry Andric   IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
320e8d8bef9SDimitry Andric   InstrListForBB.push_back(ID);
321e8d8bef9SDimitry Andric 
322349cc55cSDimitry Andric   if (isa<BranchInst>(*It))
323349cc55cSDimitry Andric     ID->setBranchSuccessors(BasicBlockToInteger);
324349cc55cSDimitry Andric 
32504eeddc0SDimitry Andric   if (isa<CallInst>(*It))
32604eeddc0SDimitry Andric     ID->setCalleeName(EnableMatchCallsByName);
32704eeddc0SDimitry Andric 
32804eeddc0SDimitry Andric   if (isa<PHINode>(*It))
32904eeddc0SDimitry Andric     ID->setPHIPredecessors(BasicBlockToInteger);
33004eeddc0SDimitry Andric 
331e8d8bef9SDimitry Andric   // Add to the instruction list
332e8d8bef9SDimitry Andric   bool WasInserted;
333e8d8bef9SDimitry Andric   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
334e8d8bef9SDimitry Andric       ResultIt;
335e8d8bef9SDimitry Andric   std::tie(ResultIt, WasInserted) =
336e8d8bef9SDimitry Andric       InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
337e8d8bef9SDimitry Andric   unsigned INumber = ResultIt->second;
338e8d8bef9SDimitry Andric 
339e8d8bef9SDimitry Andric   // There was an insertion.
340e8d8bef9SDimitry Andric   if (WasInserted)
341e8d8bef9SDimitry Andric     LegalInstrNumber++;
342e8d8bef9SDimitry Andric 
343e8d8bef9SDimitry Andric   IntegerMappingForBB.push_back(INumber);
344e8d8bef9SDimitry Andric 
345e8d8bef9SDimitry Andric   // Make sure we don't overflow or use any integers reserved by the DenseMap.
346e8d8bef9SDimitry Andric   assert(LegalInstrNumber < IllegalInstrNumber &&
347e8d8bef9SDimitry Andric          "Instruction mapping overflow!");
348e8d8bef9SDimitry Andric 
349e8d8bef9SDimitry Andric   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
350e8d8bef9SDimitry Andric          "Tried to assign DenseMap tombstone or empty key to instruction.");
351e8d8bef9SDimitry Andric   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
352e8d8bef9SDimitry Andric          "Tried to assign DenseMap tombstone or empty key to instruction.");
353e8d8bef9SDimitry Andric 
354e8d8bef9SDimitry Andric   return INumber;
355e8d8bef9SDimitry Andric }
356e8d8bef9SDimitry Andric 
357e8d8bef9SDimitry Andric IRInstructionData *
358e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
359e8d8bef9SDimitry Andric                                                IRInstructionDataList &IDL) {
360e8d8bef9SDimitry Andric   return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
361e8d8bef9SDimitry Andric }
362e8d8bef9SDimitry Andric 
363349cc55cSDimitry Andric IRInstructionData *
364349cc55cSDimitry Andric IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
365349cc55cSDimitry Andric   return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
366349cc55cSDimitry Andric }
367349cc55cSDimitry Andric 
368e8d8bef9SDimitry Andric IRInstructionDataList *
369e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionDataList() {
370e8d8bef9SDimitry Andric   return new (IDLAllocator->Allocate()) IRInstructionDataList();
371e8d8bef9SDimitry Andric }
372e8d8bef9SDimitry Andric 
373e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated
374e8d8bef9SDimitry Andric // into the same interface.
375e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToIllegalUnsigned(
376e8d8bef9SDimitry Andric     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
377e8d8bef9SDimitry Andric     std::vector<IRInstructionData *> &InstrListForBB, bool End) {
378e8d8bef9SDimitry Andric   // Can't combine an illegal instruction. Set the flag.
379e8d8bef9SDimitry Andric   CanCombineWithPrevInstr = false;
380e8d8bef9SDimitry Andric 
381e8d8bef9SDimitry Andric   // Only add one illegal number per range of legal numbers.
382e8d8bef9SDimitry Andric   if (AddedIllegalLastTime)
383e8d8bef9SDimitry Andric     return IllegalInstrNumber;
384e8d8bef9SDimitry Andric 
385e8d8bef9SDimitry Andric   IRInstructionData *ID = nullptr;
386e8d8bef9SDimitry Andric   if (!End)
387e8d8bef9SDimitry Andric     ID = allocateIRInstructionData(*It, false, *IDL);
388349cc55cSDimitry Andric   else
389349cc55cSDimitry Andric     ID = allocateIRInstructionData(*IDL);
390e8d8bef9SDimitry Andric   InstrListForBB.push_back(ID);
391e8d8bef9SDimitry Andric 
392e8d8bef9SDimitry Andric   // Remember that we added an illegal number last time.
393e8d8bef9SDimitry Andric   AddedIllegalLastTime = true;
394e8d8bef9SDimitry Andric   unsigned INumber = IllegalInstrNumber;
395e8d8bef9SDimitry Andric   IntegerMappingForBB.push_back(IllegalInstrNumber--);
396e8d8bef9SDimitry Andric 
397e8d8bef9SDimitry Andric   assert(LegalInstrNumber < IllegalInstrNumber &&
398e8d8bef9SDimitry Andric          "Instruction mapping overflow!");
399e8d8bef9SDimitry Andric 
400e8d8bef9SDimitry Andric   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
401e8d8bef9SDimitry Andric          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
402e8d8bef9SDimitry Andric 
403e8d8bef9SDimitry Andric   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
404e8d8bef9SDimitry Andric          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
405e8d8bef9SDimitry Andric 
406e8d8bef9SDimitry Andric   return INumber;
407e8d8bef9SDimitry Andric }
408e8d8bef9SDimitry Andric 
409e8d8bef9SDimitry Andric IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
410e8d8bef9SDimitry Andric                                              IRInstructionData *FirstInstIt,
411e8d8bef9SDimitry Andric                                              IRInstructionData *LastInstIt)
412e8d8bef9SDimitry Andric     : StartIdx(StartIdx), Len(Len) {
413e8d8bef9SDimitry Andric 
414e8d8bef9SDimitry Andric   assert(FirstInstIt != nullptr && "Instruction is nullptr!");
415e8d8bef9SDimitry Andric   assert(LastInstIt != nullptr && "Instruction is nullptr!");
416e8d8bef9SDimitry Andric   assert(StartIdx + Len > StartIdx &&
417e8d8bef9SDimitry Andric          "Overflow for IRSimilarityCandidate range?");
418e8d8bef9SDimitry Andric   assert(Len - 1 == static_cast<unsigned>(std::distance(
419e8d8bef9SDimitry Andric                         iterator(FirstInstIt), iterator(LastInstIt))) &&
420e8d8bef9SDimitry Andric          "Length of the first and last IRInstructionData do not match the "
421e8d8bef9SDimitry Andric          "given length");
422e8d8bef9SDimitry Andric 
423e8d8bef9SDimitry Andric   // We iterate over the given instructions, and map each unique value
424e8d8bef9SDimitry Andric   // to a unique number in the IRSimilarityCandidate ValueToNumber and
425e8d8bef9SDimitry Andric   // NumberToValue maps.  A constant get its own value globally, the individual
426e8d8bef9SDimitry Andric   // uses of the constants are not considered to be unique.
427e8d8bef9SDimitry Andric   //
428e8d8bef9SDimitry Andric   // IR:                    Mapping Added:
429e8d8bef9SDimitry Andric   // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
430e8d8bef9SDimitry Andric   // %add2 = add i32 %a, %1    %add2 -> 4
431e8d8bef9SDimitry Andric   // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
432e8d8bef9SDimitry Andric   //
433e8d8bef9SDimitry Andric   // when replace with global values, starting from 1, would be
434e8d8bef9SDimitry Andric   //
435e8d8bef9SDimitry Andric   // 3 = add i32 1, 2
436e8d8bef9SDimitry Andric   // 4 = add i32 1, 3
437e8d8bef9SDimitry Andric   // 6 = add i32 5, 2
438e8d8bef9SDimitry Andric   unsigned LocalValNumber = 1;
439e8d8bef9SDimitry Andric   IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
440e8d8bef9SDimitry Andric   for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
441e8d8bef9SDimitry Andric     // Map the operand values to an unsigned integer if it does not already
442e8d8bef9SDimitry Andric     // have an unsigned integer assigned to it.
443e8d8bef9SDimitry Andric     for (Value *Arg : ID->OperVals)
444e8d8bef9SDimitry Andric       if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
445e8d8bef9SDimitry Andric         ValueToNumber.try_emplace(Arg, LocalValNumber);
446e8d8bef9SDimitry Andric         NumberToValue.try_emplace(LocalValNumber, Arg);
447e8d8bef9SDimitry Andric         LocalValNumber++;
448e8d8bef9SDimitry Andric       }
449e8d8bef9SDimitry Andric 
450e8d8bef9SDimitry Andric     // Mapping the instructions to an unsigned integer if it is not already
451e8d8bef9SDimitry Andric     // exist in the mapping.
452e8d8bef9SDimitry Andric     if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
453e8d8bef9SDimitry Andric       ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
454e8d8bef9SDimitry Andric       NumberToValue.try_emplace(LocalValNumber, ID->Inst);
455e8d8bef9SDimitry Andric       LocalValNumber++;
456e8d8bef9SDimitry Andric     }
457e8d8bef9SDimitry Andric   }
458e8d8bef9SDimitry Andric 
459e8d8bef9SDimitry Andric   // Setting the first and last instruction data pointers for the candidate.  If
460e8d8bef9SDimitry Andric   // we got through the entire for loop without hitting an assert, we know
461e8d8bef9SDimitry Andric   // that both of these instructions are not nullptrs.
462e8d8bef9SDimitry Andric   FirstInst = FirstInstIt;
463e8d8bef9SDimitry Andric   LastInst = LastInstIt;
464e8d8bef9SDimitry Andric }
465e8d8bef9SDimitry Andric 
466e8d8bef9SDimitry Andric bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
467e8d8bef9SDimitry Andric                                       const IRSimilarityCandidate &B) {
468e8d8bef9SDimitry Andric   if (A.getLength() != B.getLength())
469e8d8bef9SDimitry Andric     return false;
470e8d8bef9SDimitry Andric 
471e8d8bef9SDimitry Andric   auto InstrDataForBoth =
472e8d8bef9SDimitry Andric       zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
473e8d8bef9SDimitry Andric 
474e8d8bef9SDimitry Andric   return all_of(InstrDataForBoth,
475e8d8bef9SDimitry Andric                 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
476e8d8bef9SDimitry Andric                   IRInstructionData &A = std::get<0>(R);
477e8d8bef9SDimitry Andric                   IRInstructionData &B = std::get<1>(R);
478e8d8bef9SDimitry Andric                   if (!A.Legal || !B.Legal)
479e8d8bef9SDimitry Andric                     return false;
480e8d8bef9SDimitry Andric                   return isClose(A, B);
481e8d8bef9SDimitry Andric                 });
482e8d8bef9SDimitry Andric }
483e8d8bef9SDimitry Andric 
484e8d8bef9SDimitry Andric /// Determine if one or more of the assigned global value numbers for the
485e8d8bef9SDimitry Andric /// operands in \p TargetValueNumbers is in the current mapping set for operand
486e8d8bef9SDimitry Andric /// numbers in \p SourceOperands.  The set of possible corresponding global
487e8d8bef9SDimitry Andric /// value numbers are replaced with the most recent version of compatible
488e8d8bef9SDimitry Andric /// values.
489e8d8bef9SDimitry Andric ///
490e8d8bef9SDimitry Andric /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
491e8d8bef9SDimitry Andric /// value number for the source IRInstructionCandidate.
492e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
493e8d8bef9SDimitry Andric /// IRSimilarityCandidate global value numbers to a set of possible numbers in
494e8d8bef9SDimitry Andric /// the target.
495e8d8bef9SDimitry Andric /// \param [in] SourceOperands - The operands in the original
496e8d8bef9SDimitry Andric /// IRSimilarityCandidate in the current instruction.
497e8d8bef9SDimitry Andric /// \param [in] TargetValueNumbers - The global value numbers of the operands in
498e8d8bef9SDimitry Andric /// the corresponding Instruction in the other IRSimilarityCandidate.
499e8d8bef9SDimitry Andric /// \returns true if there exists a possible mapping between the source
500e8d8bef9SDimitry Andric /// Instruction operands and the target Instruction operands, and false if not.
501e8d8bef9SDimitry Andric static bool checkNumberingAndReplaceCommutative(
502e8d8bef9SDimitry Andric   const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
503e8d8bef9SDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
504e8d8bef9SDimitry Andric   ArrayRef<Value *> &SourceOperands,
505e8d8bef9SDimitry Andric   DenseSet<unsigned> &TargetValueNumbers){
506e8d8bef9SDimitry Andric 
507e8d8bef9SDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
508e8d8bef9SDimitry Andric 
509e8d8bef9SDimitry Andric   unsigned ArgVal;
510e8d8bef9SDimitry Andric   bool WasInserted;
511e8d8bef9SDimitry Andric 
512e8d8bef9SDimitry Andric   // Iterate over the operands in the source IRSimilarityCandidate to determine
513e8d8bef9SDimitry Andric   // whether there exists an operand in the other IRSimilarityCandidate that
514e8d8bef9SDimitry Andric   // creates a valid mapping of Value to Value between the
515e8d8bef9SDimitry Andric   // IRSimilarityCaniddates.
516e8d8bef9SDimitry Andric   for (Value *V : SourceOperands) {
517e8d8bef9SDimitry Andric     ArgVal = SourceValueToNumberMapping.find(V)->second;
518e8d8bef9SDimitry Andric 
519e8d8bef9SDimitry Andric     std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
520e8d8bef9SDimitry Andric         std::make_pair(ArgVal, TargetValueNumbers));
521e8d8bef9SDimitry Andric 
522e8d8bef9SDimitry Andric     // Instead of finding a current mapping, we inserted a set.  This means a
523e8d8bef9SDimitry Andric     // mapping did not exist for the source Instruction operand, it has no
524e8d8bef9SDimitry Andric     // current constraints we need to check.
525e8d8bef9SDimitry Andric     if (WasInserted)
526e8d8bef9SDimitry Andric       continue;
527e8d8bef9SDimitry Andric 
528e8d8bef9SDimitry Andric     // If a mapping already exists for the source operand to the values in the
529e8d8bef9SDimitry Andric     // other IRSimilarityCandidate we need to iterate over the items in other
530e8d8bef9SDimitry Andric     // IRSimilarityCandidate's Instruction to determine whether there is a valid
531e8d8bef9SDimitry Andric     // mapping of Value to Value.
532e8d8bef9SDimitry Andric     DenseSet<unsigned> NewSet;
533e8d8bef9SDimitry Andric     for (unsigned &Curr : ValueMappingIt->second)
534e8d8bef9SDimitry Andric       // If we can find the value in the mapping, we add it to the new set.
535e8d8bef9SDimitry Andric       if (TargetValueNumbers.contains(Curr))
536e8d8bef9SDimitry Andric         NewSet.insert(Curr);
537e8d8bef9SDimitry Andric 
538e8d8bef9SDimitry Andric     // If we could not find a Value, return 0.
539e8d8bef9SDimitry Andric     if (NewSet.empty())
540e8d8bef9SDimitry Andric       return false;
541e8d8bef9SDimitry Andric 
542e8d8bef9SDimitry Andric     // Otherwise replace the old mapping with the newly constructed one.
543e8d8bef9SDimitry Andric     if (NewSet.size() != ValueMappingIt->second.size())
544e8d8bef9SDimitry Andric       ValueMappingIt->second.swap(NewSet);
545e8d8bef9SDimitry Andric 
546e8d8bef9SDimitry Andric     // We have reached no conclusions about the mapping, and cannot remove
547e8d8bef9SDimitry Andric     // any items from the other operands, so we move to check the next operand.
548e8d8bef9SDimitry Andric     if (ValueMappingIt->second.size() != 1)
549e8d8bef9SDimitry Andric       continue;
550e8d8bef9SDimitry Andric 
551e8d8bef9SDimitry Andric 
552e8d8bef9SDimitry Andric     unsigned ValToRemove = *ValueMappingIt->second.begin();
553e8d8bef9SDimitry Andric     // When there is only one item left in the mapping for and operand, remove
554e8d8bef9SDimitry Andric     // the value from the other operands.  If it results in there being no
555e8d8bef9SDimitry Andric     // mapping, return false, it means the mapping is wrong
556e8d8bef9SDimitry Andric     for (Value *InnerV : SourceOperands) {
557e8d8bef9SDimitry Andric       if (V == InnerV)
558e8d8bef9SDimitry Andric         continue;
559e8d8bef9SDimitry Andric 
560e8d8bef9SDimitry Andric       unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
561e8d8bef9SDimitry Andric       ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
562e8d8bef9SDimitry Andric       if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
563e8d8bef9SDimitry Andric         continue;
564e8d8bef9SDimitry Andric 
565e8d8bef9SDimitry Andric       ValueMappingIt->second.erase(ValToRemove);
566e8d8bef9SDimitry Andric       if (ValueMappingIt->second.empty())
567e8d8bef9SDimitry Andric         return false;
568e8d8bef9SDimitry Andric     }
569e8d8bef9SDimitry Andric   }
570e8d8bef9SDimitry Andric 
571e8d8bef9SDimitry Andric   return true;
572e8d8bef9SDimitry Andric }
573e8d8bef9SDimitry Andric 
574e8d8bef9SDimitry Andric /// Determine if operand number \p TargetArgVal is in the current mapping set
575e8d8bef9SDimitry Andric /// for operand number \p SourceArgVal.
576e8d8bef9SDimitry Andric ///
577e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
578e8d8bef9SDimitry Andric /// value numbers from source IRSimilarityCandidate to target
579e8d8bef9SDimitry Andric /// IRSimilarityCandidate.
580e8d8bef9SDimitry Andric /// \param [in] SourceArgVal The global value number for an operand in the
581e8d8bef9SDimitry Andric /// in the original candidate.
582e8d8bef9SDimitry Andric /// \param [in] TargetArgVal The global value number for the corresponding
583e8d8bef9SDimitry Andric /// operand in the other candidate.
584e8d8bef9SDimitry Andric /// \returns True if there exists a mapping and false if not.
585e8d8bef9SDimitry Andric bool checkNumberingAndReplace(
586e8d8bef9SDimitry Andric     DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
587e8d8bef9SDimitry Andric     unsigned SourceArgVal, unsigned TargetArgVal) {
588e8d8bef9SDimitry Andric   // We are given two unsigned integers representing the global values of
589e8d8bef9SDimitry Andric   // the operands in different IRSimilarityCandidates and a current mapping
590e8d8bef9SDimitry Andric   // between the two.
591e8d8bef9SDimitry Andric   //
592e8d8bef9SDimitry Andric   // Source Operand GVN: 1
593e8d8bef9SDimitry Andric   // Target Operand GVN: 2
594e8d8bef9SDimitry Andric   // CurrentMapping: {1: {1, 2}}
595e8d8bef9SDimitry Andric   //
596e8d8bef9SDimitry Andric   // Since we have mapping, and the target operand is contained in the set, we
597e8d8bef9SDimitry Andric   // update it to:
598e8d8bef9SDimitry Andric   // CurrentMapping: {1: {2}}
599e8d8bef9SDimitry Andric   // and can return true. But, if the mapping was
600e8d8bef9SDimitry Andric   // CurrentMapping: {1: {3}}
601e8d8bef9SDimitry Andric   // we would return false.
602e8d8bef9SDimitry Andric 
603e8d8bef9SDimitry Andric   bool WasInserted;
604e8d8bef9SDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
605e8d8bef9SDimitry Andric 
606e8d8bef9SDimitry Andric   std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
607e8d8bef9SDimitry Andric       std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
608e8d8bef9SDimitry Andric 
609e8d8bef9SDimitry Andric   // If we created a new mapping, then we are done.
610e8d8bef9SDimitry Andric   if (WasInserted)
611e8d8bef9SDimitry Andric     return true;
612e8d8bef9SDimitry Andric 
613e8d8bef9SDimitry Andric   // If there is more than one option in the mapping set, and the target value
614e8d8bef9SDimitry Andric   // is included in the mapping set replace that set with one that only includes
615e8d8bef9SDimitry Andric   // the target value, as it is the only valid mapping via the non commutative
616e8d8bef9SDimitry Andric   // instruction.
617e8d8bef9SDimitry Andric 
618e8d8bef9SDimitry Andric   DenseSet<unsigned> &TargetSet = Val->second;
619e8d8bef9SDimitry Andric   if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
620e8d8bef9SDimitry Andric     TargetSet.clear();
621e8d8bef9SDimitry Andric     TargetSet.insert(TargetArgVal);
622e8d8bef9SDimitry Andric     return true;
623e8d8bef9SDimitry Andric   }
624e8d8bef9SDimitry Andric 
625e8d8bef9SDimitry Andric   // Return true if we can find the value in the set.
626e8d8bef9SDimitry Andric   return TargetSet.contains(TargetArgVal);
627e8d8bef9SDimitry Andric }
628e8d8bef9SDimitry Andric 
629e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
630e8d8bef9SDimitry Andric     OperandMapping A, OperandMapping B) {
631e8d8bef9SDimitry Andric   // Iterators to keep track of where we are in the operands for each
632e8d8bef9SDimitry Andric   // Instruction.
633e8d8bef9SDimitry Andric   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
634e8d8bef9SDimitry Andric   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
635e8d8bef9SDimitry Andric   unsigned OperandLength = A.OperVals.size();
636e8d8bef9SDimitry Andric 
637e8d8bef9SDimitry Andric   // For each operand, get the value numbering and ensure it is consistent.
638e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
639e8d8bef9SDimitry Andric     unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
640e8d8bef9SDimitry Andric     unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
641e8d8bef9SDimitry Andric 
642e8d8bef9SDimitry Andric     // Attempt to add a set with only the target value.  If there is no mapping
643e8d8bef9SDimitry Andric     // we can create it here.
644e8d8bef9SDimitry Andric     //
645e8d8bef9SDimitry Andric     // For an instruction like a subtraction:
646e8d8bef9SDimitry Andric     // IRSimilarityCandidateA:  IRSimilarityCandidateB:
647e8d8bef9SDimitry Andric     // %resultA = sub %a, %b    %resultB = sub %d, %e
648e8d8bef9SDimitry Andric     //
649e8d8bef9SDimitry Andric     // We map %a -> %d and %b -> %e.
650e8d8bef9SDimitry Andric     //
651e8d8bef9SDimitry Andric     // And check to see whether their mapping is consistent in
652e8d8bef9SDimitry Andric     // checkNumberingAndReplace.
653e8d8bef9SDimitry Andric 
654e8d8bef9SDimitry Andric     if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
655e8d8bef9SDimitry Andric       return false;
656e8d8bef9SDimitry Andric 
657e8d8bef9SDimitry Andric     if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
658e8d8bef9SDimitry Andric       return false;
659e8d8bef9SDimitry Andric   }
660e8d8bef9SDimitry Andric   return true;
661e8d8bef9SDimitry Andric }
662e8d8bef9SDimitry Andric 
663e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareCommutativeOperandMapping(
664e8d8bef9SDimitry Andric     OperandMapping A, OperandMapping B) {
665e8d8bef9SDimitry Andric   DenseSet<unsigned> ValueNumbersA;
666e8d8bef9SDimitry Andric   DenseSet<unsigned> ValueNumbersB;
667e8d8bef9SDimitry Andric 
668e8d8bef9SDimitry Andric   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
669e8d8bef9SDimitry Andric   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
670e8d8bef9SDimitry Andric   unsigned OperandLength = A.OperVals.size();
671e8d8bef9SDimitry Andric 
672e8d8bef9SDimitry Andric   // Find the value number sets for the operands.
673e8d8bef9SDimitry Andric   for (unsigned Idx = 0; Idx < OperandLength;
674e8d8bef9SDimitry Andric        Idx++, VItA++, VItB++) {
675e8d8bef9SDimitry Andric     ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
676e8d8bef9SDimitry Andric     ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
677e8d8bef9SDimitry Andric   }
678e8d8bef9SDimitry Andric 
679e8d8bef9SDimitry Andric   // Iterate over the operands in the first IRSimilarityCandidate and make sure
680e8d8bef9SDimitry Andric   // there exists a possible mapping with the operands in the second
681e8d8bef9SDimitry Andric   // IRSimilarityCandidate.
682e8d8bef9SDimitry Andric   if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
683e8d8bef9SDimitry Andric                                            A.ValueNumberMapping, A.OperVals,
684e8d8bef9SDimitry Andric                                            ValueNumbersB))
685e8d8bef9SDimitry Andric     return false;
686e8d8bef9SDimitry Andric 
687e8d8bef9SDimitry Andric   // Iterate over the operands in the second IRSimilarityCandidate and make sure
688e8d8bef9SDimitry Andric   // there exists a possible mapping with the operands in the first
689e8d8bef9SDimitry Andric   // IRSimilarityCandidate.
690e8d8bef9SDimitry Andric   if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
691e8d8bef9SDimitry Andric                                            B.ValueNumberMapping, B.OperVals,
692e8d8bef9SDimitry Andric                                            ValueNumbersA))
693e8d8bef9SDimitry Andric     return false;
694e8d8bef9SDimitry Andric 
695e8d8bef9SDimitry Andric   return true;
696e8d8bef9SDimitry Andric }
697e8d8bef9SDimitry Andric 
698349cc55cSDimitry Andric bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
699349cc55cSDimitry Andric                                                    RelativeLocMapping B) {
700349cc55cSDimitry Andric   // Get the basic blocks the label refers to.
701349cc55cSDimitry Andric   BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
702349cc55cSDimitry Andric   BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
703349cc55cSDimitry Andric 
704349cc55cSDimitry Andric   // Get the basic blocks contained in each region.
705349cc55cSDimitry Andric   DenseSet<BasicBlock *> BasicBlockA;
706349cc55cSDimitry Andric   DenseSet<BasicBlock *> BasicBlockB;
707349cc55cSDimitry Andric   A.IRSC.getBasicBlocks(BasicBlockA);
708349cc55cSDimitry Andric   B.IRSC.getBasicBlocks(BasicBlockB);
709349cc55cSDimitry Andric 
710349cc55cSDimitry Andric   // Determine if the block is contained in the region.
711349cc55cSDimitry Andric   bool AContained = BasicBlockA.contains(ABB);
712349cc55cSDimitry Andric   bool BContained = BasicBlockB.contains(BBB);
713349cc55cSDimitry Andric 
714349cc55cSDimitry Andric   // Both blocks need to be contained in the region, or both need to be outside
715349cc55cSDimitry Andric   // the reigon.
716349cc55cSDimitry Andric   if (AContained != BContained)
717349cc55cSDimitry Andric     return false;
718349cc55cSDimitry Andric 
719349cc55cSDimitry Andric   // If both are contained, then we need to make sure that the relative
720349cc55cSDimitry Andric   // distance to the target blocks are the same.
721349cc55cSDimitry Andric   if (AContained)
722349cc55cSDimitry Andric     return A.RelativeLocation == B.RelativeLocation;
723349cc55cSDimitry Andric   return true;
724349cc55cSDimitry Andric }
725349cc55cSDimitry Andric 
726e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
727e8d8bef9SDimitry Andric                                              const IRSimilarityCandidate &B) {
728349cc55cSDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>> MappingA;
729349cc55cSDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>> MappingB;
730349cc55cSDimitry Andric   return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
731349cc55cSDimitry Andric }
732349cc55cSDimitry Andric 
733349cc55cSDimitry Andric typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
734349cc55cSDimitry Andric                       SmallVector<int, 4> &, ArrayRef<Value *> &,
735349cc55cSDimitry Andric                       ArrayRef<Value *> &>
736349cc55cSDimitry Andric     ZippedRelativeLocationsT;
737349cc55cSDimitry Andric 
738349cc55cSDimitry Andric bool IRSimilarityCandidate::compareStructure(
739349cc55cSDimitry Andric     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
740349cc55cSDimitry Andric     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
741349cc55cSDimitry Andric     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
742e8d8bef9SDimitry Andric   if (A.getLength() != B.getLength())
743e8d8bef9SDimitry Andric     return false;
744e8d8bef9SDimitry Andric 
745e8d8bef9SDimitry Andric   if (A.ValueToNumber.size() != B.ValueToNumber.size())
746e8d8bef9SDimitry Andric     return false;
747e8d8bef9SDimitry Andric 
748e8d8bef9SDimitry Andric   iterator ItA = A.begin();
749e8d8bef9SDimitry Andric   iterator ItB = B.begin();
750e8d8bef9SDimitry Andric 
751349cc55cSDimitry Andric   // These ValueNumber Mapping sets create a create a mapping between the values
752349cc55cSDimitry Andric   // in one candidate to values in the other candidate.  If we create a set with
753349cc55cSDimitry Andric   // one element, and that same element maps to the original element in the
754349cc55cSDimitry Andric   // candidate we have a good mapping.
755e8d8bef9SDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
756e8d8bef9SDimitry Andric 
757e8d8bef9SDimitry Andric 
758e8d8bef9SDimitry Andric   // Iterate over the instructions contained in each candidate
759e8d8bef9SDimitry Andric   unsigned SectionLength = A.getStartIdx() + A.getLength();
760e8d8bef9SDimitry Andric   for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
761e8d8bef9SDimitry Andric        ItA++, ItB++, Loc++) {
762e8d8bef9SDimitry Andric     // Make sure the instructions are similar to one another.
763e8d8bef9SDimitry Andric     if (!isClose(*ItA, *ItB))
764e8d8bef9SDimitry Andric       return false;
765e8d8bef9SDimitry Andric 
766e8d8bef9SDimitry Andric     Instruction *IA = ItA->Inst;
767e8d8bef9SDimitry Andric     Instruction *IB = ItB->Inst;
768e8d8bef9SDimitry Andric 
769e8d8bef9SDimitry Andric     if (!ItA->Legal || !ItB->Legal)
770e8d8bef9SDimitry Andric       return false;
771e8d8bef9SDimitry Andric 
772e8d8bef9SDimitry Andric     // Get the operand sets for the instructions.
773e8d8bef9SDimitry Andric     ArrayRef<Value *> OperValsA = ItA->OperVals;
774e8d8bef9SDimitry Andric     ArrayRef<Value *> OperValsB = ItB->OperVals;
775e8d8bef9SDimitry Andric 
776e8d8bef9SDimitry Andric     unsigned InstValA = A.ValueToNumber.find(IA)->second;
777e8d8bef9SDimitry Andric     unsigned InstValB = B.ValueToNumber.find(IB)->second;
778e8d8bef9SDimitry Andric 
779349cc55cSDimitry Andric     bool WasInserted;
780e8d8bef9SDimitry Andric     // Ensure that the mappings for the instructions exists.
781e8d8bef9SDimitry Andric     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
782e8d8bef9SDimitry Andric         std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
783e8d8bef9SDimitry Andric     if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
784e8d8bef9SDimitry Andric       return false;
785e8d8bef9SDimitry Andric 
786e8d8bef9SDimitry Andric     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
787e8d8bef9SDimitry Andric         std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
788e8d8bef9SDimitry Andric     if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
789e8d8bef9SDimitry Andric       return false;
790e8d8bef9SDimitry Andric 
791e8d8bef9SDimitry Andric     // We have different paths for commutative instructions and non-commutative
792e8d8bef9SDimitry Andric     // instructions since commutative instructions could allow multiple mappings
793e8d8bef9SDimitry Andric     // to certain values.
794e8d8bef9SDimitry Andric     if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
795e8d8bef9SDimitry Andric       if (!compareCommutativeOperandMapping(
796e8d8bef9SDimitry Andric               {A, OperValsA, ValueNumberMappingA},
797e8d8bef9SDimitry Andric               {B, OperValsB, ValueNumberMappingB}))
798e8d8bef9SDimitry Andric         return false;
799e8d8bef9SDimitry Andric       continue;
800e8d8bef9SDimitry Andric     }
801e8d8bef9SDimitry Andric 
802e8d8bef9SDimitry Andric     // Handle the non-commutative cases.
803e8d8bef9SDimitry Andric     if (!compareNonCommutativeOperandMapping(
804e8d8bef9SDimitry Andric             {A, OperValsA, ValueNumberMappingA},
805e8d8bef9SDimitry Andric             {B, OperValsB, ValueNumberMappingB}))
806e8d8bef9SDimitry Andric       return false;
807349cc55cSDimitry Andric 
808349cc55cSDimitry Andric     // Here we check that between two corresponding instructions,
809349cc55cSDimitry Andric     // when referring to a basic block in the same region, the
810349cc55cSDimitry Andric     // relative locations are the same. And, that the instructions refer to
811349cc55cSDimitry Andric     // basic blocks outside the region in the same corresponding locations.
812349cc55cSDimitry Andric 
813349cc55cSDimitry Andric     // We are able to make the assumption about blocks outside of the region
814349cc55cSDimitry Andric     // since the target block labels are considered values and will follow the
815349cc55cSDimitry Andric     // same number matching that we defined for the other instructions in the
816349cc55cSDimitry Andric     // region.  So, at this point, in each location we target a specific block
817349cc55cSDimitry Andric     // outside the region, we are targeting a corresponding block in each
818349cc55cSDimitry Andric     // analagous location in the region we are comparing to.
819349cc55cSDimitry Andric     if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
820349cc55cSDimitry Andric         !(isa<PHINode>(IA) && isa<PHINode>(IB)))
821349cc55cSDimitry Andric       continue;
822349cc55cSDimitry Andric 
823349cc55cSDimitry Andric     SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
824349cc55cSDimitry Andric     SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
825349cc55cSDimitry Andric     if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
826349cc55cSDimitry Andric         OperValsA.size() != OperValsB.size())
827349cc55cSDimitry Andric       return false;
828349cc55cSDimitry Andric 
829349cc55cSDimitry Andric     ZippedRelativeLocationsT ZippedRelativeLocations =
830349cc55cSDimitry Andric         zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
831349cc55cSDimitry Andric     if (any_of(ZippedRelativeLocations,
832349cc55cSDimitry Andric                [&A, &B](std::tuple<int, int, Value *, Value *> R) {
833349cc55cSDimitry Andric                  return !checkRelativeLocations(
834349cc55cSDimitry Andric                      {A, std::get<0>(R), std::get<2>(R)},
835349cc55cSDimitry Andric                      {B, std::get<1>(R), std::get<3>(R)});
836349cc55cSDimitry Andric                }))
837349cc55cSDimitry Andric       return false;
838e8d8bef9SDimitry Andric   }
839e8d8bef9SDimitry Andric   return true;
840e8d8bef9SDimitry Andric }
841e8d8bef9SDimitry Andric 
842e8d8bef9SDimitry Andric bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
843e8d8bef9SDimitry Andric                                     const IRSimilarityCandidate &B) {
844e8d8bef9SDimitry Andric   auto DoesOverlap = [](const IRSimilarityCandidate &X,
845e8d8bef9SDimitry Andric                         const IRSimilarityCandidate &Y) {
846e8d8bef9SDimitry Andric     // Check:
847e8d8bef9SDimitry Andric     // XXXXXX        X starts before Y ends
848e8d8bef9SDimitry Andric     //      YYYYYYY  Y starts after X starts
849e8d8bef9SDimitry Andric     return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
850e8d8bef9SDimitry Andric   };
851e8d8bef9SDimitry Andric 
852e8d8bef9SDimitry Andric   return DoesOverlap(A, B) || DoesOverlap(B, A);
853e8d8bef9SDimitry Andric }
854e8d8bef9SDimitry Andric 
855e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper(
856e8d8bef9SDimitry Andric     Module &M, std::vector<IRInstructionData *> &InstrList,
857e8d8bef9SDimitry Andric     std::vector<unsigned> &IntegerMapping) {
858e8d8bef9SDimitry Andric 
859e8d8bef9SDimitry Andric   std::vector<IRInstructionData *> InstrListForModule;
860e8d8bef9SDimitry Andric   std::vector<unsigned> IntegerMappingForModule;
861e8d8bef9SDimitry Andric   // Iterate over the functions in the module to map each Instruction in each
862e8d8bef9SDimitry Andric   // BasicBlock to an unsigned integer.
863349cc55cSDimitry Andric   Mapper.initializeForBBs(M);
864349cc55cSDimitry Andric 
865e8d8bef9SDimitry Andric   for (Function &F : M) {
866e8d8bef9SDimitry Andric 
867e8d8bef9SDimitry Andric     if (F.empty())
868e8d8bef9SDimitry Andric       continue;
869e8d8bef9SDimitry Andric 
870e8d8bef9SDimitry Andric     for (BasicBlock &BB : F) {
871e8d8bef9SDimitry Andric 
872e8d8bef9SDimitry Andric       // BB has potential to have similarity since it has a size greater than 2
873e8d8bef9SDimitry Andric       // and can therefore match other regions greater than 2. Map it to a list
874e8d8bef9SDimitry Andric       // of unsigned integers.
875e8d8bef9SDimitry Andric       Mapper.convertToUnsignedVec(BB, InstrListForModule,
876e8d8bef9SDimitry Andric                                   IntegerMappingForModule);
877e8d8bef9SDimitry Andric     }
878349cc55cSDimitry Andric 
879349cc55cSDimitry Andric     BasicBlock::iterator It = F.begin()->end();
880349cc55cSDimitry Andric     Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
881349cc55cSDimitry Andric                                 true);
882349cc55cSDimitry Andric     if (InstrListForModule.size() > 0)
883349cc55cSDimitry Andric       Mapper.IDL->push_back(*InstrListForModule.back());
884e8d8bef9SDimitry Andric   }
885e8d8bef9SDimitry Andric 
886e8d8bef9SDimitry Andric   // Insert the InstrListForModule at the end of the overall InstrList so that
887e8d8bef9SDimitry Andric   // we can have a long InstrList for the entire set of Modules being analyzed.
888e8d8bef9SDimitry Andric   llvm::append_range(InstrList, InstrListForModule);
889e8d8bef9SDimitry Andric   // Do the same as above, but for IntegerMapping.
890e8d8bef9SDimitry Andric   llvm::append_range(IntegerMapping, IntegerMappingForModule);
891e8d8bef9SDimitry Andric }
892e8d8bef9SDimitry Andric 
893e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper(
894e8d8bef9SDimitry Andric     ArrayRef<std::unique_ptr<Module>> &Modules,
895e8d8bef9SDimitry Andric     std::vector<IRInstructionData *> &InstrList,
896e8d8bef9SDimitry Andric     std::vector<unsigned> &IntegerMapping) {
897e8d8bef9SDimitry Andric 
898e8d8bef9SDimitry Andric   // Iterate over, and map the instructions in each module.
899e8d8bef9SDimitry Andric   for (const std::unique_ptr<Module> &M : Modules)
900e8d8bef9SDimitry Andric     populateMapper(*M, InstrList, IntegerMapping);
901e8d8bef9SDimitry Andric }
902e8d8bef9SDimitry Andric 
903e8d8bef9SDimitry Andric /// From a repeated subsequence, find all the different instances of the
904e8d8bef9SDimitry Andric /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
905e8d8bef9SDimitry Andric /// the IRInstructionData in subsequence.
906e8d8bef9SDimitry Andric ///
9074824e7fdSDimitry Andric /// \param [in] Mapper - The instruction mapper for basic correctness checks.
908e8d8bef9SDimitry Andric /// \param [in] InstrList - The vector that holds the instruction data.
909e8d8bef9SDimitry Andric /// \param [in] IntegerMapping - The vector that holds the mapped integers.
910e8d8bef9SDimitry Andric /// \param [out] CandsForRepSubstring - The vector to store the generated
911e8d8bef9SDimitry Andric /// IRSimilarityCandidates.
912e8d8bef9SDimitry Andric static void createCandidatesFromSuffixTree(
913fe6060f1SDimitry Andric     const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
914e8d8bef9SDimitry Andric     std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
915e8d8bef9SDimitry Andric     std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
916e8d8bef9SDimitry Andric 
917e8d8bef9SDimitry Andric   unsigned StringLen = RS.Length;
918349cc55cSDimitry Andric   if (StringLen < 2)
919349cc55cSDimitry Andric     return;
920e8d8bef9SDimitry Andric 
921e8d8bef9SDimitry Andric   // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
922e8d8bef9SDimitry Andric   for (const unsigned &StartIdx : RS.StartIndices) {
923e8d8bef9SDimitry Andric     unsigned EndIdx = StartIdx + StringLen - 1;
924e8d8bef9SDimitry Andric 
925e8d8bef9SDimitry Andric     // Check that this subsequence does not contain an illegal instruction.
926e8d8bef9SDimitry Andric     bool ContainsIllegal = false;
927e8d8bef9SDimitry Andric     for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
928e8d8bef9SDimitry Andric       unsigned Key = IntegerMapping[CurrIdx];
929e8d8bef9SDimitry Andric       if (Key > Mapper.IllegalInstrNumber) {
930e8d8bef9SDimitry Andric         ContainsIllegal = true;
931e8d8bef9SDimitry Andric         break;
932e8d8bef9SDimitry Andric       }
933e8d8bef9SDimitry Andric     }
934e8d8bef9SDimitry Andric 
935e8d8bef9SDimitry Andric     // If we have an illegal instruction, we should not create an
936e8d8bef9SDimitry Andric     // IRSimilarityCandidate for this region.
937e8d8bef9SDimitry Andric     if (ContainsIllegal)
938e8d8bef9SDimitry Andric       continue;
939e8d8bef9SDimitry Andric 
940e8d8bef9SDimitry Andric     // We are getting iterators to the instructions in this region of code
941e8d8bef9SDimitry Andric     // by advancing the start and end indices from the start of the
942e8d8bef9SDimitry Andric     // InstrList.
943e8d8bef9SDimitry Andric     std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
944e8d8bef9SDimitry Andric     std::advance(StartIt, StartIdx);
945e8d8bef9SDimitry Andric     std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
946e8d8bef9SDimitry Andric     std::advance(EndIt, EndIdx);
947e8d8bef9SDimitry Andric 
948e8d8bef9SDimitry Andric     CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
949e8d8bef9SDimitry Andric   }
950e8d8bef9SDimitry Andric }
951e8d8bef9SDimitry Andric 
952349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalRelationFrom(
953349cc55cSDimitry Andric     IRSimilarityCandidate &SourceCand,
954349cc55cSDimitry Andric     DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
955349cc55cSDimitry Andric     DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
956349cc55cSDimitry Andric   assert(SourceCand.CanonNumToNumber.size() != 0 &&
957349cc55cSDimitry Andric          "Base canonical relationship is empty!");
958349cc55cSDimitry Andric   assert(SourceCand.NumberToCanonNum.size() != 0 &&
959349cc55cSDimitry Andric          "Base canonical relationship is empty!");
960349cc55cSDimitry Andric 
961349cc55cSDimitry Andric   assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
962349cc55cSDimitry Andric   assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
963349cc55cSDimitry Andric 
964349cc55cSDimitry Andric   DenseSet<unsigned> UsedGVNs;
965349cc55cSDimitry Andric   // Iterate over the mappings provided from this candidate to SourceCand.  We
966349cc55cSDimitry Andric   // are then able to map the GVN in this candidate to the same canonical number
967349cc55cSDimitry Andric   // given to the corresponding GVN in SourceCand.
968349cc55cSDimitry Andric   for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
969349cc55cSDimitry Andric     unsigned SourceGVN = GVNMapping.first;
970349cc55cSDimitry Andric 
971349cc55cSDimitry Andric     assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
972349cc55cSDimitry Andric 
973349cc55cSDimitry Andric     unsigned ResultGVN;
974349cc55cSDimitry Andric     // We need special handling if we have more than one potential value.  This
975349cc55cSDimitry Andric     // means that there are at least two GVNs that could correspond to this GVN.
976349cc55cSDimitry Andric     // This could lead to potential swapping later on, so we make a decision
977349cc55cSDimitry Andric     // here to ensure a one-to-one mapping.
978349cc55cSDimitry Andric     if (GVNMapping.second.size() > 1) {
979349cc55cSDimitry Andric       bool Found = false;
980349cc55cSDimitry Andric       for (unsigned Val : GVNMapping.second) {
981349cc55cSDimitry Andric         // We make sure the target value number hasn't already been reserved.
982349cc55cSDimitry Andric         if (UsedGVNs.contains(Val))
983349cc55cSDimitry Andric           continue;
984349cc55cSDimitry Andric 
985349cc55cSDimitry Andric         // We make sure that the opposite mapping is still consistent.
986349cc55cSDimitry Andric         DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
987349cc55cSDimitry Andric             FromSourceMapping.find(Val);
988349cc55cSDimitry Andric 
989349cc55cSDimitry Andric         if (!It->second.contains(SourceGVN))
990349cc55cSDimitry Andric           continue;
991349cc55cSDimitry Andric 
992349cc55cSDimitry Andric         // We pick the first item that satisfies these conditions.
993349cc55cSDimitry Andric         Found = true;
994349cc55cSDimitry Andric         ResultGVN = Val;
995349cc55cSDimitry Andric         break;
996349cc55cSDimitry Andric       }
997349cc55cSDimitry Andric 
998349cc55cSDimitry Andric       assert(Found && "Could not find matching value for source GVN");
999349cc55cSDimitry Andric       (void)Found;
1000349cc55cSDimitry Andric 
1001349cc55cSDimitry Andric     } else
1002349cc55cSDimitry Andric       ResultGVN = *GVNMapping.second.begin();
1003349cc55cSDimitry Andric 
1004349cc55cSDimitry Andric     // Whatever GVN is found, we mark it as used.
1005349cc55cSDimitry Andric     UsedGVNs.insert(ResultGVN);
1006349cc55cSDimitry Andric 
1007349cc55cSDimitry Andric     unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1008349cc55cSDimitry Andric     CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1009349cc55cSDimitry Andric     NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1010349cc55cSDimitry Andric   }
1011349cc55cSDimitry Andric }
1012349cc55cSDimitry Andric 
1013349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalMappingFor(
1014349cc55cSDimitry Andric     IRSimilarityCandidate &CurrCand) {
1015349cc55cSDimitry Andric   assert(CurrCand.CanonNumToNumber.size() == 0 &&
1016349cc55cSDimitry Andric          "Canonical Relationship is non-empty");
1017349cc55cSDimitry Andric   assert(CurrCand.NumberToCanonNum.size() == 0 &&
1018349cc55cSDimitry Andric          "Canonical Relationship is non-empty");
1019349cc55cSDimitry Andric 
1020349cc55cSDimitry Andric   unsigned CanonNum = 0;
1021349cc55cSDimitry Andric   // Iterate over the value numbers found, the order does not matter in this
1022349cc55cSDimitry Andric   // case.
1023349cc55cSDimitry Andric   for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1024349cc55cSDimitry Andric     CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1025349cc55cSDimitry Andric     CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1026349cc55cSDimitry Andric     CanonNum++;
1027349cc55cSDimitry Andric   }
1028349cc55cSDimitry Andric }
1029349cc55cSDimitry Andric 
1030e8d8bef9SDimitry Andric /// From the list of IRSimilarityCandidates, perform a comparison between each
1031e8d8bef9SDimitry Andric /// IRSimilarityCandidate to determine if there are overlapping
1032e8d8bef9SDimitry Andric /// IRInstructionData, or if they do not have the same structure.
1033e8d8bef9SDimitry Andric ///
1034e8d8bef9SDimitry Andric /// \param [in] CandsForRepSubstring - The vector containing the
1035e8d8bef9SDimitry Andric /// IRSimilarityCandidates.
1036e8d8bef9SDimitry Andric /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1037e8d8bef9SDimitry Andric /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1038e8d8bef9SDimitry Andric /// vector are structurally similar to one another.
1039e8d8bef9SDimitry Andric static void findCandidateStructures(
1040e8d8bef9SDimitry Andric     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1041e8d8bef9SDimitry Andric     DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
1042e8d8bef9SDimitry Andric   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1043e8d8bef9SDimitry Andric       InnerCandEndIt;
1044e8d8bef9SDimitry Andric 
1045e8d8bef9SDimitry Andric   // IRSimilarityCandidates each have a structure for operand use.  It is
1046e8d8bef9SDimitry Andric   // possible that two instances of the same subsequences have different
1047e8d8bef9SDimitry Andric   // structure. Each type of structure found is assigned a number.  This
1048e8d8bef9SDimitry Andric   // DenseMap maps an IRSimilarityCandidate to which type of similarity
1049e8d8bef9SDimitry Andric   // discovered it fits within.
1050e8d8bef9SDimitry Andric   DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1051e8d8bef9SDimitry Andric 
1052e8d8bef9SDimitry Andric   // Find the compatibility from each candidate to the others to determine
1053e8d8bef9SDimitry Andric   // which candidates overlap and which have the same structure by mapping
1054e8d8bef9SDimitry Andric   // each structure to a different group.
1055e8d8bef9SDimitry Andric   bool SameStructure;
1056e8d8bef9SDimitry Andric   bool Inserted;
1057e8d8bef9SDimitry Andric   unsigned CurrentGroupNum = 0;
1058e8d8bef9SDimitry Andric   unsigned OuterGroupNum;
1059e8d8bef9SDimitry Andric   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1060e8d8bef9SDimitry Andric   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1061e8d8bef9SDimitry Andric   DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1062e8d8bef9SDimitry Andric 
1063e8d8bef9SDimitry Andric   // Iterate over the candidates to determine its structural and overlapping
1064e8d8bef9SDimitry Andric   // compatibility with other instructions
1065349cc55cSDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1066349cc55cSDimitry Andric   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1067e8d8bef9SDimitry Andric   for (CandIt = CandsForRepSubstring.begin(),
1068e8d8bef9SDimitry Andric       CandEndIt = CandsForRepSubstring.end();
1069e8d8bef9SDimitry Andric        CandIt != CandEndIt; CandIt++) {
1070e8d8bef9SDimitry Andric 
1071e8d8bef9SDimitry Andric     // Determine if it has an assigned structural group already.
1072e8d8bef9SDimitry Andric     CandToGroupIt = CandToGroup.find(&*CandIt);
1073e8d8bef9SDimitry Andric     if (CandToGroupIt == CandToGroup.end()) {
1074e8d8bef9SDimitry Andric       // If not, we assign it one, and add it to our mapping.
1075e8d8bef9SDimitry Andric       std::tie(CandToGroupIt, Inserted) =
1076e8d8bef9SDimitry Andric           CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1077e8d8bef9SDimitry Andric     }
1078e8d8bef9SDimitry Andric 
1079e8d8bef9SDimitry Andric     // Get the structural group number from the iterator.
1080e8d8bef9SDimitry Andric     OuterGroupNum = CandToGroupIt->second;
1081e8d8bef9SDimitry Andric 
1082e8d8bef9SDimitry Andric     // Check if we already have a list of IRSimilarityCandidates for the current
1083e8d8bef9SDimitry Andric     // structural group.  Create one if one does not exist.
1084e8d8bef9SDimitry Andric     CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1085349cc55cSDimitry Andric     if (CurrentGroupPair == StructuralGroups.end()) {
1086349cc55cSDimitry Andric       IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1087e8d8bef9SDimitry Andric       std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1088e8d8bef9SDimitry Andric           std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1089349cc55cSDimitry Andric     }
1090e8d8bef9SDimitry Andric 
1091e8d8bef9SDimitry Andric     // Iterate over the IRSimilarityCandidates following the current
1092e8d8bef9SDimitry Andric     // IRSimilarityCandidate in the list to determine whether the two
1093e8d8bef9SDimitry Andric     // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs
1094e8d8bef9SDimitry Andric     // of IRSimilarityCandidates.
1095e8d8bef9SDimitry Andric     for (InnerCandIt = std::next(CandIt),
1096e8d8bef9SDimitry Andric         InnerCandEndIt = CandsForRepSubstring.end();
1097e8d8bef9SDimitry Andric          InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1098e8d8bef9SDimitry Andric 
1099e8d8bef9SDimitry Andric       // We check if the inner item has a group already, if it does, we skip it.
1100e8d8bef9SDimitry Andric       CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1101e8d8bef9SDimitry Andric       if (CandToGroupItInner != CandToGroup.end())
1102e8d8bef9SDimitry Andric         continue;
1103e8d8bef9SDimitry Andric 
1104e8d8bef9SDimitry Andric       // Otherwise we determine if they have the same structure and add it to
1105e8d8bef9SDimitry Andric       // vector if they match.
1106349cc55cSDimitry Andric       ValueNumberMappingA.clear();
1107349cc55cSDimitry Andric       ValueNumberMappingB.clear();
1108349cc55cSDimitry Andric       SameStructure = IRSimilarityCandidate::compareStructure(
1109349cc55cSDimitry Andric           *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1110e8d8bef9SDimitry Andric       if (!SameStructure)
1111e8d8bef9SDimitry Andric         continue;
1112e8d8bef9SDimitry Andric 
1113349cc55cSDimitry Andric       InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1114349cc55cSDimitry Andric                                                ValueNumberMappingB);
1115e8d8bef9SDimitry Andric       CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1116e8d8bef9SDimitry Andric       CurrentGroupPair->second.push_back(*InnerCandIt);
1117e8d8bef9SDimitry Andric     }
1118e8d8bef9SDimitry Andric   }
1119e8d8bef9SDimitry Andric }
1120e8d8bef9SDimitry Andric 
1121e8d8bef9SDimitry Andric void IRSimilarityIdentifier::findCandidates(
1122e8d8bef9SDimitry Andric     std::vector<IRInstructionData *> &InstrList,
1123e8d8bef9SDimitry Andric     std::vector<unsigned> &IntegerMapping) {
1124e8d8bef9SDimitry Andric   SuffixTree ST(IntegerMapping);
1125e8d8bef9SDimitry Andric 
1126e8d8bef9SDimitry Andric   std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1127e8d8bef9SDimitry Andric   std::vector<SimilarityGroup> NewCandidateGroups;
1128e8d8bef9SDimitry Andric 
1129e8d8bef9SDimitry Andric   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1130e8d8bef9SDimitry Andric 
1131e8d8bef9SDimitry Andric   // Iterate over the subsequences found by the Suffix Tree to create
1132e8d8bef9SDimitry Andric   // IRSimilarityCandidates for each repeated subsequence and determine which
1133e8d8bef9SDimitry Andric   // instances are structurally similar to one another.
1134fe6060f1SDimitry Andric   for (SuffixTree::RepeatedSubstring &RS : ST) {
1135fe6060f1SDimitry Andric     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1136e8d8bef9SDimitry Andric                                    CandsForRepSubstring);
1137e8d8bef9SDimitry Andric 
1138e8d8bef9SDimitry Andric     if (CandsForRepSubstring.size() < 2)
1139e8d8bef9SDimitry Andric       continue;
1140e8d8bef9SDimitry Andric 
1141e8d8bef9SDimitry Andric     findCandidateStructures(CandsForRepSubstring, StructuralGroups);
1142e8d8bef9SDimitry Andric     for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
1143e8d8bef9SDimitry Andric       // We only add the group if it contains more than one
1144e8d8bef9SDimitry Andric       // IRSimilarityCandidate.  If there is only one, that means there is no
1145e8d8bef9SDimitry Andric       // other repeated subsequence with the same structure.
1146e8d8bef9SDimitry Andric       if (Group.second.size() > 1)
1147e8d8bef9SDimitry Andric         SimilarityCandidates->push_back(Group.second);
1148e8d8bef9SDimitry Andric 
1149e8d8bef9SDimitry Andric     CandsForRepSubstring.clear();
1150e8d8bef9SDimitry Andric     StructuralGroups.clear();
1151e8d8bef9SDimitry Andric     NewCandidateGroups.clear();
1152e8d8bef9SDimitry Andric   }
1153e8d8bef9SDimitry Andric }
1154e8d8bef9SDimitry Andric 
1155e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1156e8d8bef9SDimitry Andric     ArrayRef<std::unique_ptr<Module>> Modules) {
1157e8d8bef9SDimitry Andric   resetSimilarityCandidates();
1158e8d8bef9SDimitry Andric 
1159e8d8bef9SDimitry Andric   std::vector<IRInstructionData *> InstrList;
1160e8d8bef9SDimitry Andric   std::vector<unsigned> IntegerMapping;
1161349cc55cSDimitry Andric   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
116204eeddc0SDimitry Andric   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
116304eeddc0SDimitry Andric   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1164*1fd87a68SDimitry Andric   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1165e8d8bef9SDimitry Andric 
1166e8d8bef9SDimitry Andric   populateMapper(Modules, InstrList, IntegerMapping);
1167e8d8bef9SDimitry Andric   findCandidates(InstrList, IntegerMapping);
1168e8d8bef9SDimitry Andric 
1169e8d8bef9SDimitry Andric   return SimilarityCandidates.getValue();
1170e8d8bef9SDimitry Andric }
1171e8d8bef9SDimitry Andric 
1172e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1173e8d8bef9SDimitry Andric   resetSimilarityCandidates();
1174349cc55cSDimitry Andric   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
117504eeddc0SDimitry Andric   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
117604eeddc0SDimitry Andric   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1177*1fd87a68SDimitry Andric   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1178e8d8bef9SDimitry Andric 
1179e8d8bef9SDimitry Andric   std::vector<IRInstructionData *> InstrList;
1180e8d8bef9SDimitry Andric   std::vector<unsigned> IntegerMapping;
1181e8d8bef9SDimitry Andric 
1182e8d8bef9SDimitry Andric   populateMapper(M, InstrList, IntegerMapping);
1183e8d8bef9SDimitry Andric   findCandidates(InstrList, IntegerMapping);
1184e8d8bef9SDimitry Andric 
1185e8d8bef9SDimitry Andric   return SimilarityCandidates.getValue();
1186e8d8bef9SDimitry Andric }
1187e8d8bef9SDimitry Andric 
1188e8d8bef9SDimitry Andric INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1189e8d8bef9SDimitry Andric                 "ir-similarity-identifier", false, true)
1190e8d8bef9SDimitry Andric 
1191e8d8bef9SDimitry Andric IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1192e8d8bef9SDimitry Andric     : ModulePass(ID) {
1193e8d8bef9SDimitry Andric   initializeIRSimilarityIdentifierWrapperPassPass(
1194e8d8bef9SDimitry Andric       *PassRegistry::getPassRegistry());
1195e8d8bef9SDimitry Andric }
1196e8d8bef9SDimitry Andric 
1197e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
119804eeddc0SDimitry Andric   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1199*1fd87a68SDimitry Andric                                         MatchCallsByName, !DisableIntrinsics));
1200e8d8bef9SDimitry Andric   return false;
1201e8d8bef9SDimitry Andric }
1202e8d8bef9SDimitry Andric 
1203e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1204e8d8bef9SDimitry Andric   IRSI.reset();
1205e8d8bef9SDimitry Andric   return false;
1206e8d8bef9SDimitry Andric }
1207e8d8bef9SDimitry Andric 
1208e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1209fe6060f1SDimitry Andric   IRSI->findSimilarity(M);
1210e8d8bef9SDimitry Andric   return false;
1211e8d8bef9SDimitry Andric }
1212e8d8bef9SDimitry Andric 
1213e8d8bef9SDimitry Andric AnalysisKey IRSimilarityAnalysis::Key;
1214e8d8bef9SDimitry Andric IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1215e8d8bef9SDimitry Andric                                                  ModuleAnalysisManager &) {
121604eeddc0SDimitry Andric   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1217*1fd87a68SDimitry Andric                                      MatchCallsByName, !DisableIntrinsics);
1218fe6060f1SDimitry Andric   IRSI.findSimilarity(M);
1219fe6060f1SDimitry Andric   return IRSI;
1220e8d8bef9SDimitry Andric }
1221e8d8bef9SDimitry Andric 
1222e8d8bef9SDimitry Andric PreservedAnalyses
1223e8d8bef9SDimitry Andric IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1224e8d8bef9SDimitry Andric   IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1225e8d8bef9SDimitry Andric   Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
1226e8d8bef9SDimitry Andric 
1227e8d8bef9SDimitry Andric   for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1228e8d8bef9SDimitry Andric     OS << CandVec.size() << " candidates of length "
1229e8d8bef9SDimitry Andric        << CandVec.begin()->getLength() << ".  Found in: \n";
1230e8d8bef9SDimitry Andric     for (IRSimilarityCandidate &Cand : CandVec) {
1231e8d8bef9SDimitry Andric       OS << "  Function: " << Cand.front()->Inst->getFunction()->getName().str()
1232e8d8bef9SDimitry Andric          << ", Basic Block: ";
1233e8d8bef9SDimitry Andric       if (Cand.front()->Inst->getParent()->getName().str() == "")
1234fe6060f1SDimitry Andric         OS << "(unnamed)";
1235e8d8bef9SDimitry Andric       else
1236fe6060f1SDimitry Andric         OS << Cand.front()->Inst->getParent()->getName().str();
1237fe6060f1SDimitry Andric       OS << "\n    Start Instruction: ";
1238fe6060f1SDimitry Andric       Cand.frontInstruction()->print(OS);
1239fe6060f1SDimitry Andric       OS << "\n      End Instruction: ";
1240fe6060f1SDimitry Andric       Cand.backInstruction()->print(OS);
1241fe6060f1SDimitry Andric       OS << "\n";
1242e8d8bef9SDimitry Andric     }
1243e8d8bef9SDimitry Andric   }
1244e8d8bef9SDimitry Andric 
1245e8d8bef9SDimitry Andric   return PreservedAnalyses::all();
1246e8d8bef9SDimitry Andric }
1247e8d8bef9SDimitry Andric 
1248e8d8bef9SDimitry Andric char IRSimilarityIdentifierWrapperPass::ID = 0;
1249