1e8d8bef9SDimitry Andric //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // \file 10e8d8bef9SDimitry Andric // Implementation file for the IRSimilarityIdentifier for identifying 11e8d8bef9SDimitry Andric // similarities in IR including the IRInstructionMapper. 12e8d8bef9SDimitry Andric // 13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14e8d8bef9SDimitry Andric 15e8d8bef9SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 16e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h" 17*06c3fb27SDimitry Andric #include "llvm/ADT/SetOperations.h" 18e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 19e8d8bef9SDimitry Andric #include "llvm/IR/Operator.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/User.h" 21e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 22e8d8bef9SDimitry Andric #include "llvm/Support/SuffixTree.h" 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric using namespace llvm; 25e8d8bef9SDimitry Andric using namespace IRSimilarity; 26e8d8bef9SDimitry Andric 2704eeddc0SDimitry Andric namespace llvm { 28349cc55cSDimitry Andric cl::opt<bool> 29349cc55cSDimitry Andric DisableBranches("no-ir-sim-branch-matching", cl::init(false), 30349cc55cSDimitry Andric cl::ReallyHidden, 31349cc55cSDimitry Andric cl::desc("disable similarity matching, and outlining, " 32349cc55cSDimitry Andric "across branches for debugging purposes.")); 33349cc55cSDimitry Andric 3404eeddc0SDimitry Andric cl::opt<bool> 3504eeddc0SDimitry Andric DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), 3604eeddc0SDimitry Andric cl::ReallyHidden, 3704eeddc0SDimitry Andric cl::desc("disable outlining indirect calls.")); 3804eeddc0SDimitry Andric 3904eeddc0SDimitry Andric cl::opt<bool> 4004eeddc0SDimitry Andric MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, 4104eeddc0SDimitry Andric cl::desc("only allow matching call instructions if the " 4204eeddc0SDimitry Andric "name and type signature match.")); 431fd87a68SDimitry Andric 441fd87a68SDimitry Andric cl::opt<bool> 451fd87a68SDimitry Andric DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, 461fd87a68SDimitry Andric cl::desc("Don't match or outline intrinsics")); 4704eeddc0SDimitry Andric } // namespace llvm 4804eeddc0SDimitry Andric 49e8d8bef9SDimitry Andric IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 50e8d8bef9SDimitry Andric IRInstructionDataList &IDList) 51e8d8bef9SDimitry Andric : Inst(&I), Legal(Legality), IDL(&IDList) { 52349cc55cSDimitry Andric initializeInstruction(); 53349cc55cSDimitry Andric } 54349cc55cSDimitry Andric 55349cc55cSDimitry Andric void IRInstructionData::initializeInstruction() { 56e8d8bef9SDimitry Andric // We check for whether we have a comparison instruction. If it is, we 57e8d8bef9SDimitry Andric // find the "less than" version of the predicate for consistency for 58e8d8bef9SDimitry Andric // comparison instructions throught the program. 59349cc55cSDimitry Andric if (CmpInst *C = dyn_cast<CmpInst>(Inst)) { 60e8d8bef9SDimitry Andric CmpInst::Predicate Predicate = predicateForConsistency(C); 61e8d8bef9SDimitry Andric if (Predicate != C->getPredicate()) 62e8d8bef9SDimitry Andric RevisedPredicate = Predicate; 63e8d8bef9SDimitry Andric } 64e8d8bef9SDimitry Andric 65e8d8bef9SDimitry Andric // Here we collect the operands and their types for determining whether 66e8d8bef9SDimitry Andric // the structure of the operand use matches between two different candidates. 67349cc55cSDimitry Andric for (Use &OI : Inst->operands()) { 6881ad6265SDimitry Andric if (isa<CmpInst>(Inst) && RevisedPredicate) { 69e8d8bef9SDimitry Andric // If we have a CmpInst where the predicate is reversed, it means the 70e8d8bef9SDimitry Andric // operands must be reversed as well. 71e8d8bef9SDimitry Andric OperVals.insert(OperVals.begin(), OI.get()); 72e8d8bef9SDimitry Andric continue; 73e8d8bef9SDimitry Andric } 74e8d8bef9SDimitry Andric 75e8d8bef9SDimitry Andric OperVals.push_back(OI.get()); 76e8d8bef9SDimitry Andric } 7704eeddc0SDimitry Andric 7804eeddc0SDimitry Andric // We capture the incoming BasicBlocks as values as well as the incoming 7904eeddc0SDimitry Andric // Values in order to check for structural similarity. 8004eeddc0SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(Inst)) 8104eeddc0SDimitry Andric for (BasicBlock *BB : PN->blocks()) 8204eeddc0SDimitry Andric OperVals.push_back(BB); 83e8d8bef9SDimitry Andric } 84e8d8bef9SDimitry Andric 85349cc55cSDimitry Andric IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) 8604eeddc0SDimitry Andric : IDL(&IDList) {} 87349cc55cSDimitry Andric 88349cc55cSDimitry Andric void IRInstructionData::setBranchSuccessors( 89349cc55cSDimitry Andric DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 90349cc55cSDimitry Andric assert(isa<BranchInst>(Inst) && "Instruction must be branch"); 91349cc55cSDimitry Andric 92349cc55cSDimitry Andric BranchInst *BI = cast<BranchInst>(Inst); 93349cc55cSDimitry Andric DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 94349cc55cSDimitry Andric 95349cc55cSDimitry Andric BBNumIt = BasicBlockToInteger.find(BI->getParent()); 96349cc55cSDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 97349cc55cSDimitry Andric "Could not find location for BasicBlock!"); 98349cc55cSDimitry Andric 99349cc55cSDimitry Andric int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 100349cc55cSDimitry Andric 101*06c3fb27SDimitry Andric for (Value *V : getBlockOperVals()) { 102*06c3fb27SDimitry Andric BasicBlock *Successor = cast<BasicBlock>(V); 103349cc55cSDimitry Andric BBNumIt = BasicBlockToInteger.find(Successor); 104349cc55cSDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 105349cc55cSDimitry Andric "Could not find number for BasicBlock!"); 106349cc55cSDimitry Andric int OtherBlockNumber = static_cast<int>(BBNumIt->second); 107349cc55cSDimitry Andric 108349cc55cSDimitry Andric int Relative = OtherBlockNumber - CurrentBlockNumber; 109349cc55cSDimitry Andric RelativeBlockLocations.push_back(Relative); 110349cc55cSDimitry Andric } 111349cc55cSDimitry Andric } 112349cc55cSDimitry Andric 113*06c3fb27SDimitry Andric ArrayRef<Value *> IRInstructionData::getBlockOperVals() { 114*06c3fb27SDimitry Andric assert((isa<BranchInst>(Inst) || 115*06c3fb27SDimitry Andric isa<PHINode>(Inst)) && "Instruction must be branch or PHINode"); 116*06c3fb27SDimitry Andric 117*06c3fb27SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(Inst)) 118*06c3fb27SDimitry Andric return ArrayRef<Value *>( 119*06c3fb27SDimitry Andric std::next(OperVals.begin(), BI->isConditional() ? 1 : 0), 120*06c3fb27SDimitry Andric OperVals.end() 121*06c3fb27SDimitry Andric ); 122*06c3fb27SDimitry Andric 123*06c3fb27SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(Inst)) 124*06c3fb27SDimitry Andric return ArrayRef<Value *>( 125*06c3fb27SDimitry Andric std::next(OperVals.begin(), PN->getNumIncomingValues()), 126*06c3fb27SDimitry Andric OperVals.end() 127*06c3fb27SDimitry Andric ); 128*06c3fb27SDimitry Andric 129*06c3fb27SDimitry Andric return ArrayRef<Value *>(); 130*06c3fb27SDimitry Andric } 131*06c3fb27SDimitry Andric 13204eeddc0SDimitry Andric void IRInstructionData::setCalleeName(bool MatchByName) { 13304eeddc0SDimitry Andric CallInst *CI = dyn_cast<CallInst>(Inst); 13404eeddc0SDimitry Andric assert(CI && "Instruction must be call"); 13504eeddc0SDimitry Andric 13604eeddc0SDimitry Andric CalleeName = ""; 1371fd87a68SDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 1381fd87a68SDimitry Andric // To hash intrinsics, we use the opcode, and types like the other 1391fd87a68SDimitry Andric // instructions, but also, the Intrinsic ID, and the Name of the 1401fd87a68SDimitry Andric // intrinsic. 1411fd87a68SDimitry Andric Intrinsic::ID IntrinsicID = II->getIntrinsicID(); 1421fd87a68SDimitry Andric FunctionType *FT = II->getFunctionType(); 1431fd87a68SDimitry Andric // If there is an overloaded name, we have to use the complex version 1441fd87a68SDimitry Andric // of getName to get the entire string. 1451fd87a68SDimitry Andric if (Intrinsic::isOverloaded(IntrinsicID)) 1461fd87a68SDimitry Andric CalleeName = 1471fd87a68SDimitry Andric Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); 1481fd87a68SDimitry Andric // If there is not an overloaded name, we only need to use this version. 1491fd87a68SDimitry Andric else 1501fd87a68SDimitry Andric CalleeName = Intrinsic::getName(IntrinsicID).str(); 1511fd87a68SDimitry Andric 1521fd87a68SDimitry Andric return; 1531fd87a68SDimitry Andric } 1541fd87a68SDimitry Andric 15504eeddc0SDimitry Andric if (!CI->isIndirectCall() && MatchByName) 15604eeddc0SDimitry Andric CalleeName = CI->getCalledFunction()->getName().str(); 15704eeddc0SDimitry Andric } 15804eeddc0SDimitry Andric 15904eeddc0SDimitry Andric void IRInstructionData::setPHIPredecessors( 16004eeddc0SDimitry Andric DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 16104eeddc0SDimitry Andric assert(isa<PHINode>(Inst) && "Instruction must be phi node"); 16204eeddc0SDimitry Andric 16304eeddc0SDimitry Andric PHINode *PN = cast<PHINode>(Inst); 16404eeddc0SDimitry Andric DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 16504eeddc0SDimitry Andric 16604eeddc0SDimitry Andric BBNumIt = BasicBlockToInteger.find(PN->getParent()); 16704eeddc0SDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 16804eeddc0SDimitry Andric "Could not find location for BasicBlock!"); 16904eeddc0SDimitry Andric 17004eeddc0SDimitry Andric int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 17104eeddc0SDimitry Andric 17204eeddc0SDimitry Andric // Convert the incoming blocks of the PHINode to an integer value, based on 17304eeddc0SDimitry Andric // the relative distances between the current block and the incoming block. 17404eeddc0SDimitry Andric for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { 17504eeddc0SDimitry Andric BasicBlock *Incoming = PN->getIncomingBlock(Idx); 17604eeddc0SDimitry Andric BBNumIt = BasicBlockToInteger.find(Incoming); 17704eeddc0SDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 17804eeddc0SDimitry Andric "Could not find number for BasicBlock!"); 17904eeddc0SDimitry Andric int OtherBlockNumber = static_cast<int>(BBNumIt->second); 18004eeddc0SDimitry Andric 18104eeddc0SDimitry Andric int Relative = OtherBlockNumber - CurrentBlockNumber; 18204eeddc0SDimitry Andric RelativeBlockLocations.push_back(Relative); 18304eeddc0SDimitry Andric } 18404eeddc0SDimitry Andric } 18504eeddc0SDimitry Andric 186e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 187e8d8bef9SDimitry Andric switch (CI->getPredicate()) { 188e8d8bef9SDimitry Andric case CmpInst::FCMP_OGT: 189e8d8bef9SDimitry Andric case CmpInst::FCMP_UGT: 190e8d8bef9SDimitry Andric case CmpInst::FCMP_OGE: 191e8d8bef9SDimitry Andric case CmpInst::FCMP_UGE: 192e8d8bef9SDimitry Andric case CmpInst::ICMP_SGT: 193e8d8bef9SDimitry Andric case CmpInst::ICMP_UGT: 194e8d8bef9SDimitry Andric case CmpInst::ICMP_SGE: 195e8d8bef9SDimitry Andric case CmpInst::ICMP_UGE: 196e8d8bef9SDimitry Andric return CI->getSwappedPredicate(); 197e8d8bef9SDimitry Andric default: 198e8d8bef9SDimitry Andric return CI->getPredicate(); 199e8d8bef9SDimitry Andric } 200e8d8bef9SDimitry Andric } 201e8d8bef9SDimitry Andric 202e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::getPredicate() const { 203e8d8bef9SDimitry Andric assert(isa<CmpInst>(Inst) && 204e8d8bef9SDimitry Andric "Can only get a predicate from a compare instruction"); 205e8d8bef9SDimitry Andric 20681ad6265SDimitry Andric if (RevisedPredicate) 207bdd1243dSDimitry Andric return *RevisedPredicate; 208e8d8bef9SDimitry Andric 209e8d8bef9SDimitry Andric return cast<CmpInst>(Inst)->getPredicate(); 210e8d8bef9SDimitry Andric } 211e8d8bef9SDimitry Andric 21204eeddc0SDimitry Andric StringRef IRInstructionData::getCalleeName() const { 21304eeddc0SDimitry Andric assert(isa<CallInst>(Inst) && 21404eeddc0SDimitry Andric "Can only get a name from a call instruction"); 215e8d8bef9SDimitry Andric 21681ad6265SDimitry Andric assert(CalleeName && "CalleeName has not been set"); 21704eeddc0SDimitry Andric 21804eeddc0SDimitry Andric return *CalleeName; 219e8d8bef9SDimitry Andric } 220e8d8bef9SDimitry Andric 221e8d8bef9SDimitry Andric bool IRSimilarity::isClose(const IRInstructionData &A, 222e8d8bef9SDimitry Andric const IRInstructionData &B) { 223e8d8bef9SDimitry Andric 224e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 225e8d8bef9SDimitry Andric return false; 226e8d8bef9SDimitry Andric 227e8d8bef9SDimitry Andric // Check if we are performing the same sort of operation on the same types 228e8d8bef9SDimitry Andric // but not on the same values. 229e8d8bef9SDimitry Andric if (!A.Inst->isSameOperationAs(B.Inst)) { 230e8d8bef9SDimitry Andric // If there is a predicate, this means that either there is a swapped 231e8d8bef9SDimitry Andric // predicate, or that the types are different, we want to make sure that 232e8d8bef9SDimitry Andric // the predicates are equivalent via swapping. 233e8d8bef9SDimitry Andric if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 234e8d8bef9SDimitry Andric 235e8d8bef9SDimitry Andric if (A.getPredicate() != B.getPredicate()) 236e8d8bef9SDimitry Andric return false; 237e8d8bef9SDimitry Andric 238e8d8bef9SDimitry Andric // If the predicates are the same via swap, make sure that the types are 239e8d8bef9SDimitry Andric // still the same. 240e8d8bef9SDimitry Andric auto ZippedTypes = zip(A.OperVals, B.OperVals); 241e8d8bef9SDimitry Andric 242e8d8bef9SDimitry Andric return all_of( 243e8d8bef9SDimitry Andric ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 244e8d8bef9SDimitry Andric return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 245e8d8bef9SDimitry Andric }); 246e8d8bef9SDimitry Andric } 247e8d8bef9SDimitry Andric 248e8d8bef9SDimitry Andric return false; 249e8d8bef9SDimitry Andric } 250e8d8bef9SDimitry Andric 251e8d8bef9SDimitry Andric // Since any GEP Instruction operands after the first operand cannot be 252e8d8bef9SDimitry Andric // defined by a register, we must make sure that the operands after the first 253e8d8bef9SDimitry Andric // are the same in the two instructions 254e8d8bef9SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 255e8d8bef9SDimitry Andric auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 256e8d8bef9SDimitry Andric 257e8d8bef9SDimitry Andric // If the instructions do not have the same inbounds restrictions, we do 258e8d8bef9SDimitry Andric // not consider them the same. 259e8d8bef9SDimitry Andric if (GEP->isInBounds() != OtherGEP->isInBounds()) 260e8d8bef9SDimitry Andric return false; 261e8d8bef9SDimitry Andric 262e8d8bef9SDimitry Andric auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 263e8d8bef9SDimitry Andric 264e8d8bef9SDimitry Andric // We increment here since we do not care about the first instruction, 265e8d8bef9SDimitry Andric // we only care about the following operands since they must be the 266e8d8bef9SDimitry Andric // exact same to be considered similar. 267e8d8bef9SDimitry Andric return all_of(drop_begin(ZippedOperands), 268e8d8bef9SDimitry Andric [](std::tuple<llvm::Use &, llvm::Use &> R) { 269e8d8bef9SDimitry Andric return std::get<0>(R) == std::get<1>(R); 270e8d8bef9SDimitry Andric }); 271e8d8bef9SDimitry Andric } 272e8d8bef9SDimitry Andric 27304eeddc0SDimitry Andric // If the instructions are functions calls, we make sure that the function 27404eeddc0SDimitry Andric // name is the same. We already know that the types are since is 27504eeddc0SDimitry Andric // isSameOperationAs is true. 276e8d8bef9SDimitry Andric if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 2771fd87a68SDimitry Andric if (A.getCalleeName().str() != B.getCalleeName().str()) 278e8d8bef9SDimitry Andric return false; 279e8d8bef9SDimitry Andric } 280e8d8bef9SDimitry Andric 281349cc55cSDimitry Andric if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) && 282349cc55cSDimitry Andric A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) 283349cc55cSDimitry Andric return false; 284349cc55cSDimitry Andric 285e8d8bef9SDimitry Andric return true; 286e8d8bef9SDimitry Andric } 287e8d8bef9SDimitry Andric 288e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 289e8d8bef9SDimitry Andric // into the same interface. 290e8d8bef9SDimitry Andric void IRInstructionMapper::convertToUnsignedVec( 291e8d8bef9SDimitry Andric BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 292e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 293e8d8bef9SDimitry Andric BasicBlock::iterator It = BB.begin(); 294e8d8bef9SDimitry Andric 295e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForBB; 296e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForBB; 297e8d8bef9SDimitry Andric 298e8d8bef9SDimitry Andric for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 299e8d8bef9SDimitry Andric switch (InstClassifier.visit(*It)) { 300e8d8bef9SDimitry Andric case InstrType::Legal: 301e8d8bef9SDimitry Andric mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 302e8d8bef9SDimitry Andric break; 303e8d8bef9SDimitry Andric case InstrType::Illegal: 304e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 305e8d8bef9SDimitry Andric break; 306e8d8bef9SDimitry Andric case InstrType::Invisible: 307e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 308e8d8bef9SDimitry Andric break; 309e8d8bef9SDimitry Andric } 310e8d8bef9SDimitry Andric } 311e8d8bef9SDimitry Andric 312349cc55cSDimitry Andric if (AddedIllegalLastTime) 313e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 314fe6060f1SDimitry Andric for (IRInstructionData *ID : InstrListForBB) 315fe6060f1SDimitry Andric this->IDL->push_back(*ID); 316e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForBB); 317e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForBB); 318e8d8bef9SDimitry Andric } 319e8d8bef9SDimitry Andric 320e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 321e8d8bef9SDimitry Andric // into the same interface. 322e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToLegalUnsigned( 323e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 324e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB) { 325e8d8bef9SDimitry Andric // We added something legal, so we should unset the AddedLegalLastTime 326e8d8bef9SDimitry Andric // flag. 327e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 328e8d8bef9SDimitry Andric 329e8d8bef9SDimitry Andric // If we have at least two adjacent legal instructions (which may have 330e8d8bef9SDimitry Andric // invisible instructions in between), remember that. 331e8d8bef9SDimitry Andric if (CanCombineWithPrevInstr) 332e8d8bef9SDimitry Andric HaveLegalRange = true; 333e8d8bef9SDimitry Andric CanCombineWithPrevInstr = true; 334e8d8bef9SDimitry Andric 335e8d8bef9SDimitry Andric // Get the integer for this instruction or give it the current 336e8d8bef9SDimitry Andric // LegalInstrNumber. 337e8d8bef9SDimitry Andric IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 338e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 339e8d8bef9SDimitry Andric 340349cc55cSDimitry Andric if (isa<BranchInst>(*It)) 341349cc55cSDimitry Andric ID->setBranchSuccessors(BasicBlockToInteger); 342349cc55cSDimitry Andric 34304eeddc0SDimitry Andric if (isa<CallInst>(*It)) 34404eeddc0SDimitry Andric ID->setCalleeName(EnableMatchCallsByName); 34504eeddc0SDimitry Andric 34604eeddc0SDimitry Andric if (isa<PHINode>(*It)) 34704eeddc0SDimitry Andric ID->setPHIPredecessors(BasicBlockToInteger); 34804eeddc0SDimitry Andric 349e8d8bef9SDimitry Andric // Add to the instruction list 350e8d8bef9SDimitry Andric bool WasInserted; 351e8d8bef9SDimitry Andric DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 352e8d8bef9SDimitry Andric ResultIt; 353e8d8bef9SDimitry Andric std::tie(ResultIt, WasInserted) = 354e8d8bef9SDimitry Andric InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 355e8d8bef9SDimitry Andric unsigned INumber = ResultIt->second; 356e8d8bef9SDimitry Andric 357e8d8bef9SDimitry Andric // There was an insertion. 358e8d8bef9SDimitry Andric if (WasInserted) 359e8d8bef9SDimitry Andric LegalInstrNumber++; 360e8d8bef9SDimitry Andric 361e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(INumber); 362e8d8bef9SDimitry Andric 363e8d8bef9SDimitry Andric // Make sure we don't overflow or use any integers reserved by the DenseMap. 364e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 365e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 366e8d8bef9SDimitry Andric 367e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 368e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 369e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 370e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 371e8d8bef9SDimitry Andric 372e8d8bef9SDimitry Andric return INumber; 373e8d8bef9SDimitry Andric } 374e8d8bef9SDimitry Andric 375e8d8bef9SDimitry Andric IRInstructionData * 376e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 377e8d8bef9SDimitry Andric IRInstructionDataList &IDL) { 378e8d8bef9SDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 379e8d8bef9SDimitry Andric } 380e8d8bef9SDimitry Andric 381349cc55cSDimitry Andric IRInstructionData * 382349cc55cSDimitry Andric IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { 383349cc55cSDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); 384349cc55cSDimitry Andric } 385349cc55cSDimitry Andric 386e8d8bef9SDimitry Andric IRInstructionDataList * 387e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionDataList() { 388e8d8bef9SDimitry Andric return new (IDLAllocator->Allocate()) IRInstructionDataList(); 389e8d8bef9SDimitry Andric } 390e8d8bef9SDimitry Andric 391e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 392e8d8bef9SDimitry Andric // into the same interface. 393e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToIllegalUnsigned( 394e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 395e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB, bool End) { 396e8d8bef9SDimitry Andric // Can't combine an illegal instruction. Set the flag. 397e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 398e8d8bef9SDimitry Andric 399e8d8bef9SDimitry Andric // Only add one illegal number per range of legal numbers. 400e8d8bef9SDimitry Andric if (AddedIllegalLastTime) 401e8d8bef9SDimitry Andric return IllegalInstrNumber; 402e8d8bef9SDimitry Andric 403e8d8bef9SDimitry Andric IRInstructionData *ID = nullptr; 404e8d8bef9SDimitry Andric if (!End) 405e8d8bef9SDimitry Andric ID = allocateIRInstructionData(*It, false, *IDL); 406349cc55cSDimitry Andric else 407349cc55cSDimitry Andric ID = allocateIRInstructionData(*IDL); 408e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 409e8d8bef9SDimitry Andric 410e8d8bef9SDimitry Andric // Remember that we added an illegal number last time. 411e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 412e8d8bef9SDimitry Andric unsigned INumber = IllegalInstrNumber; 413e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(IllegalInstrNumber--); 414e8d8bef9SDimitry Andric 415e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 416e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 417e8d8bef9SDimitry Andric 418e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 419e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 420e8d8bef9SDimitry Andric 421e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 422e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 423e8d8bef9SDimitry Andric 424e8d8bef9SDimitry Andric return INumber; 425e8d8bef9SDimitry Andric } 426e8d8bef9SDimitry Andric 427e8d8bef9SDimitry Andric IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 428e8d8bef9SDimitry Andric IRInstructionData *FirstInstIt, 429e8d8bef9SDimitry Andric IRInstructionData *LastInstIt) 430e8d8bef9SDimitry Andric : StartIdx(StartIdx), Len(Len) { 431e8d8bef9SDimitry Andric 432e8d8bef9SDimitry Andric assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 433e8d8bef9SDimitry Andric assert(LastInstIt != nullptr && "Instruction is nullptr!"); 434e8d8bef9SDimitry Andric assert(StartIdx + Len > StartIdx && 435e8d8bef9SDimitry Andric "Overflow for IRSimilarityCandidate range?"); 436e8d8bef9SDimitry Andric assert(Len - 1 == static_cast<unsigned>(std::distance( 437e8d8bef9SDimitry Andric iterator(FirstInstIt), iterator(LastInstIt))) && 438e8d8bef9SDimitry Andric "Length of the first and last IRInstructionData do not match the " 439e8d8bef9SDimitry Andric "given length"); 440e8d8bef9SDimitry Andric 441e8d8bef9SDimitry Andric // We iterate over the given instructions, and map each unique value 442e8d8bef9SDimitry Andric // to a unique number in the IRSimilarityCandidate ValueToNumber and 443e8d8bef9SDimitry Andric // NumberToValue maps. A constant get its own value globally, the individual 444e8d8bef9SDimitry Andric // uses of the constants are not considered to be unique. 445e8d8bef9SDimitry Andric // 446e8d8bef9SDimitry Andric // IR: Mapping Added: 447e8d8bef9SDimitry Andric // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 448e8d8bef9SDimitry Andric // %add2 = add i32 %a, %1 %add2 -> 4 449e8d8bef9SDimitry Andric // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 450e8d8bef9SDimitry Andric // 451e8d8bef9SDimitry Andric // when replace with global values, starting from 1, would be 452e8d8bef9SDimitry Andric // 453e8d8bef9SDimitry Andric // 3 = add i32 1, 2 454e8d8bef9SDimitry Andric // 4 = add i32 1, 3 455e8d8bef9SDimitry Andric // 6 = add i32 5, 2 456e8d8bef9SDimitry Andric unsigned LocalValNumber = 1; 457e8d8bef9SDimitry Andric IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 458e8d8bef9SDimitry Andric for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 459e8d8bef9SDimitry Andric // Map the operand values to an unsigned integer if it does not already 460e8d8bef9SDimitry Andric // have an unsigned integer assigned to it. 461e8d8bef9SDimitry Andric for (Value *Arg : ID->OperVals) 462*06c3fb27SDimitry Andric if (!ValueToNumber.contains(Arg)) { 463e8d8bef9SDimitry Andric ValueToNumber.try_emplace(Arg, LocalValNumber); 464e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, Arg); 465e8d8bef9SDimitry Andric LocalValNumber++; 466e8d8bef9SDimitry Andric } 467e8d8bef9SDimitry Andric 468e8d8bef9SDimitry Andric // Mapping the instructions to an unsigned integer if it is not already 469e8d8bef9SDimitry Andric // exist in the mapping. 470*06c3fb27SDimitry Andric if (!ValueToNumber.contains(ID->Inst)) { 471e8d8bef9SDimitry Andric ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 472e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, ID->Inst); 473e8d8bef9SDimitry Andric LocalValNumber++; 474e8d8bef9SDimitry Andric } 475e8d8bef9SDimitry Andric } 476e8d8bef9SDimitry Andric 477e8d8bef9SDimitry Andric // Setting the first and last instruction data pointers for the candidate. If 478e8d8bef9SDimitry Andric // we got through the entire for loop without hitting an assert, we know 479e8d8bef9SDimitry Andric // that both of these instructions are not nullptrs. 480e8d8bef9SDimitry Andric FirstInst = FirstInstIt; 481e8d8bef9SDimitry Andric LastInst = LastInstIt; 48281ad6265SDimitry Andric 48381ad6265SDimitry Andric // Add the basic blocks contained in the set into the global value numbering. 48481ad6265SDimitry Andric DenseSet<BasicBlock *> BBSet; 48581ad6265SDimitry Andric getBasicBlocks(BBSet); 48681ad6265SDimitry Andric for (BasicBlock *BB : BBSet) { 487*06c3fb27SDimitry Andric if (ValueToNumber.contains(BB)) 48881ad6265SDimitry Andric continue; 48981ad6265SDimitry Andric 49081ad6265SDimitry Andric ValueToNumber.try_emplace(BB, LocalValNumber); 49181ad6265SDimitry Andric NumberToValue.try_emplace(LocalValNumber, BB); 49281ad6265SDimitry Andric LocalValNumber++; 49381ad6265SDimitry Andric } 494e8d8bef9SDimitry Andric } 495e8d8bef9SDimitry Andric 496e8d8bef9SDimitry Andric bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 497e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 498e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 499e8d8bef9SDimitry Andric return false; 500e8d8bef9SDimitry Andric 501e8d8bef9SDimitry Andric auto InstrDataForBoth = 502e8d8bef9SDimitry Andric zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 503e8d8bef9SDimitry Andric 504e8d8bef9SDimitry Andric return all_of(InstrDataForBoth, 505e8d8bef9SDimitry Andric [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 506e8d8bef9SDimitry Andric IRInstructionData &A = std::get<0>(R); 507e8d8bef9SDimitry Andric IRInstructionData &B = std::get<1>(R); 508e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 509e8d8bef9SDimitry Andric return false; 510e8d8bef9SDimitry Andric return isClose(A, B); 511e8d8bef9SDimitry Andric }); 512e8d8bef9SDimitry Andric } 513e8d8bef9SDimitry Andric 514e8d8bef9SDimitry Andric /// Determine if one or more of the assigned global value numbers for the 515e8d8bef9SDimitry Andric /// operands in \p TargetValueNumbers is in the current mapping set for operand 516e8d8bef9SDimitry Andric /// numbers in \p SourceOperands. The set of possible corresponding global 517e8d8bef9SDimitry Andric /// value numbers are replaced with the most recent version of compatible 518e8d8bef9SDimitry Andric /// values. 519e8d8bef9SDimitry Andric /// 520e8d8bef9SDimitry Andric /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 521e8d8bef9SDimitry Andric /// value number for the source IRInstructionCandidate. 522e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 523e8d8bef9SDimitry Andric /// IRSimilarityCandidate global value numbers to a set of possible numbers in 524e8d8bef9SDimitry Andric /// the target. 525e8d8bef9SDimitry Andric /// \param [in] SourceOperands - The operands in the original 526e8d8bef9SDimitry Andric /// IRSimilarityCandidate in the current instruction. 527e8d8bef9SDimitry Andric /// \param [in] TargetValueNumbers - The global value numbers of the operands in 528e8d8bef9SDimitry Andric /// the corresponding Instruction in the other IRSimilarityCandidate. 529e8d8bef9SDimitry Andric /// \returns true if there exists a possible mapping between the source 530e8d8bef9SDimitry Andric /// Instruction operands and the target Instruction operands, and false if not. 531e8d8bef9SDimitry Andric static bool checkNumberingAndReplaceCommutative( 532e8d8bef9SDimitry Andric const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 533e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 534e8d8bef9SDimitry Andric ArrayRef<Value *> &SourceOperands, 535e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetValueNumbers){ 536e8d8bef9SDimitry Andric 537e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 538e8d8bef9SDimitry Andric 539e8d8bef9SDimitry Andric unsigned ArgVal; 540e8d8bef9SDimitry Andric bool WasInserted; 541e8d8bef9SDimitry Andric 542e8d8bef9SDimitry Andric // Iterate over the operands in the source IRSimilarityCandidate to determine 543e8d8bef9SDimitry Andric // whether there exists an operand in the other IRSimilarityCandidate that 544e8d8bef9SDimitry Andric // creates a valid mapping of Value to Value between the 545e8d8bef9SDimitry Andric // IRSimilarityCaniddates. 546e8d8bef9SDimitry Andric for (Value *V : SourceOperands) { 547e8d8bef9SDimitry Andric ArgVal = SourceValueToNumberMapping.find(V)->second; 548e8d8bef9SDimitry Andric 54981ad6265SDimitry Andric // Instead of finding a current mapping, we attempt to insert a set. 550e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 551e8d8bef9SDimitry Andric std::make_pair(ArgVal, TargetValueNumbers)); 552e8d8bef9SDimitry Andric 55381ad6265SDimitry Andric // We need to iterate over the items in other IRSimilarityCandidate's 55481ad6265SDimitry Andric // Instruction to determine whether there is a valid mapping of 55581ad6265SDimitry Andric // Value to Value. 556e8d8bef9SDimitry Andric DenseSet<unsigned> NewSet; 557e8d8bef9SDimitry Andric for (unsigned &Curr : ValueMappingIt->second) 558e8d8bef9SDimitry Andric // If we can find the value in the mapping, we add it to the new set. 559e8d8bef9SDimitry Andric if (TargetValueNumbers.contains(Curr)) 560e8d8bef9SDimitry Andric NewSet.insert(Curr); 561e8d8bef9SDimitry Andric 562e8d8bef9SDimitry Andric // If we could not find a Value, return 0. 563e8d8bef9SDimitry Andric if (NewSet.empty()) 564e8d8bef9SDimitry Andric return false; 565e8d8bef9SDimitry Andric 566e8d8bef9SDimitry Andric // Otherwise replace the old mapping with the newly constructed one. 567e8d8bef9SDimitry Andric if (NewSet.size() != ValueMappingIt->second.size()) 568e8d8bef9SDimitry Andric ValueMappingIt->second.swap(NewSet); 569e8d8bef9SDimitry Andric 570e8d8bef9SDimitry Andric // We have reached no conclusions about the mapping, and cannot remove 571e8d8bef9SDimitry Andric // any items from the other operands, so we move to check the next operand. 572e8d8bef9SDimitry Andric if (ValueMappingIt->second.size() != 1) 573e8d8bef9SDimitry Andric continue; 574e8d8bef9SDimitry Andric 575e8d8bef9SDimitry Andric unsigned ValToRemove = *ValueMappingIt->second.begin(); 576e8d8bef9SDimitry Andric // When there is only one item left in the mapping for and operand, remove 577e8d8bef9SDimitry Andric // the value from the other operands. If it results in there being no 578e8d8bef9SDimitry Andric // mapping, return false, it means the mapping is wrong 579e8d8bef9SDimitry Andric for (Value *InnerV : SourceOperands) { 580e8d8bef9SDimitry Andric if (V == InnerV) 581e8d8bef9SDimitry Andric continue; 582e8d8bef9SDimitry Andric 583e8d8bef9SDimitry Andric unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 584e8d8bef9SDimitry Andric ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 585e8d8bef9SDimitry Andric if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 586e8d8bef9SDimitry Andric continue; 587e8d8bef9SDimitry Andric 588e8d8bef9SDimitry Andric ValueMappingIt->second.erase(ValToRemove); 589e8d8bef9SDimitry Andric if (ValueMappingIt->second.empty()) 590e8d8bef9SDimitry Andric return false; 591e8d8bef9SDimitry Andric } 592e8d8bef9SDimitry Andric } 593e8d8bef9SDimitry Andric 594e8d8bef9SDimitry Andric return true; 595e8d8bef9SDimitry Andric } 596e8d8bef9SDimitry Andric 597e8d8bef9SDimitry Andric /// Determine if operand number \p TargetArgVal is in the current mapping set 598e8d8bef9SDimitry Andric /// for operand number \p SourceArgVal. 599e8d8bef9SDimitry Andric /// 600e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 601e8d8bef9SDimitry Andric /// value numbers from source IRSimilarityCandidate to target 602e8d8bef9SDimitry Andric /// IRSimilarityCandidate. 603e8d8bef9SDimitry Andric /// \param [in] SourceArgVal The global value number for an operand in the 604e8d8bef9SDimitry Andric /// in the original candidate. 605e8d8bef9SDimitry Andric /// \param [in] TargetArgVal The global value number for the corresponding 606e8d8bef9SDimitry Andric /// operand in the other candidate. 607e8d8bef9SDimitry Andric /// \returns True if there exists a mapping and false if not. 608e8d8bef9SDimitry Andric bool checkNumberingAndReplace( 609e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 610e8d8bef9SDimitry Andric unsigned SourceArgVal, unsigned TargetArgVal) { 611e8d8bef9SDimitry Andric // We are given two unsigned integers representing the global values of 612e8d8bef9SDimitry Andric // the operands in different IRSimilarityCandidates and a current mapping 613e8d8bef9SDimitry Andric // between the two. 614e8d8bef9SDimitry Andric // 615e8d8bef9SDimitry Andric // Source Operand GVN: 1 616e8d8bef9SDimitry Andric // Target Operand GVN: 2 617e8d8bef9SDimitry Andric // CurrentMapping: {1: {1, 2}} 618e8d8bef9SDimitry Andric // 619e8d8bef9SDimitry Andric // Since we have mapping, and the target operand is contained in the set, we 620e8d8bef9SDimitry Andric // update it to: 621e8d8bef9SDimitry Andric // CurrentMapping: {1: {2}} 622e8d8bef9SDimitry Andric // and can return true. But, if the mapping was 623e8d8bef9SDimitry Andric // CurrentMapping: {1: {3}} 624e8d8bef9SDimitry Andric // we would return false. 625e8d8bef9SDimitry Andric 626e8d8bef9SDimitry Andric bool WasInserted; 627e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 628e8d8bef9SDimitry Andric 629e8d8bef9SDimitry Andric std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 630e8d8bef9SDimitry Andric std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 631e8d8bef9SDimitry Andric 632e8d8bef9SDimitry Andric // If we created a new mapping, then we are done. 633e8d8bef9SDimitry Andric if (WasInserted) 634e8d8bef9SDimitry Andric return true; 635e8d8bef9SDimitry Andric 636e8d8bef9SDimitry Andric // If there is more than one option in the mapping set, and the target value 637e8d8bef9SDimitry Andric // is included in the mapping set replace that set with one that only includes 638e8d8bef9SDimitry Andric // the target value, as it is the only valid mapping via the non commutative 639e8d8bef9SDimitry Andric // instruction. 640e8d8bef9SDimitry Andric 641e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetSet = Val->second; 642e8d8bef9SDimitry Andric if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 643e8d8bef9SDimitry Andric TargetSet.clear(); 644e8d8bef9SDimitry Andric TargetSet.insert(TargetArgVal); 645e8d8bef9SDimitry Andric return true; 646e8d8bef9SDimitry Andric } 647e8d8bef9SDimitry Andric 648e8d8bef9SDimitry Andric // Return true if we can find the value in the set. 649e8d8bef9SDimitry Andric return TargetSet.contains(TargetArgVal); 650e8d8bef9SDimitry Andric } 651e8d8bef9SDimitry Andric 652e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 653e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 654e8d8bef9SDimitry Andric // Iterators to keep track of where we are in the operands for each 655e8d8bef9SDimitry Andric // Instruction. 656e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 657e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 658e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 659e8d8bef9SDimitry Andric 660e8d8bef9SDimitry Andric // For each operand, get the value numbering and ensure it is consistent. 661e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 662e8d8bef9SDimitry Andric unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 663e8d8bef9SDimitry Andric unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 664e8d8bef9SDimitry Andric 665e8d8bef9SDimitry Andric // Attempt to add a set with only the target value. If there is no mapping 666e8d8bef9SDimitry Andric // we can create it here. 667e8d8bef9SDimitry Andric // 668e8d8bef9SDimitry Andric // For an instruction like a subtraction: 669e8d8bef9SDimitry Andric // IRSimilarityCandidateA: IRSimilarityCandidateB: 670e8d8bef9SDimitry Andric // %resultA = sub %a, %b %resultB = sub %d, %e 671e8d8bef9SDimitry Andric // 672e8d8bef9SDimitry Andric // We map %a -> %d and %b -> %e. 673e8d8bef9SDimitry Andric // 674e8d8bef9SDimitry Andric // And check to see whether their mapping is consistent in 675e8d8bef9SDimitry Andric // checkNumberingAndReplace. 676e8d8bef9SDimitry Andric 677e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 678e8d8bef9SDimitry Andric return false; 679e8d8bef9SDimitry Andric 680e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 681e8d8bef9SDimitry Andric return false; 682e8d8bef9SDimitry Andric } 683e8d8bef9SDimitry Andric return true; 684e8d8bef9SDimitry Andric } 685e8d8bef9SDimitry Andric 686e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareCommutativeOperandMapping( 687e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 688e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersA; 689e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersB; 690e8d8bef9SDimitry Andric 691e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 692e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 693e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric // Find the value number sets for the operands. 696e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; 697e8d8bef9SDimitry Andric Idx++, VItA++, VItB++) { 698e8d8bef9SDimitry Andric ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 699e8d8bef9SDimitry Andric ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 700e8d8bef9SDimitry Andric } 701e8d8bef9SDimitry Andric 702e8d8bef9SDimitry Andric // Iterate over the operands in the first IRSimilarityCandidate and make sure 703e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the second 704e8d8bef9SDimitry Andric // IRSimilarityCandidate. 705e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 706e8d8bef9SDimitry Andric A.ValueNumberMapping, A.OperVals, 707e8d8bef9SDimitry Andric ValueNumbersB)) 708e8d8bef9SDimitry Andric return false; 709e8d8bef9SDimitry Andric 710e8d8bef9SDimitry Andric // Iterate over the operands in the second IRSimilarityCandidate and make sure 711e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the first 712e8d8bef9SDimitry Andric // IRSimilarityCandidate. 713e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 714e8d8bef9SDimitry Andric B.ValueNumberMapping, B.OperVals, 715e8d8bef9SDimitry Andric ValueNumbersA)) 716e8d8bef9SDimitry Andric return false; 717e8d8bef9SDimitry Andric 718e8d8bef9SDimitry Andric return true; 719e8d8bef9SDimitry Andric } 720e8d8bef9SDimitry Andric 721*06c3fb27SDimitry Andric bool IRSimilarityCandidate::compareAssignmentMapping( 722*06c3fb27SDimitry Andric const unsigned InstValA, const unsigned &InstValB, 723*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 724*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 725*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 726*06c3fb27SDimitry Andric bool WasInserted; 727*06c3fb27SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 728*06c3fb27SDimitry Andric std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 729*06c3fb27SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 730*06c3fb27SDimitry Andric return false; 731*06c3fb27SDimitry Andric else if (ValueMappingIt->second.size() != 1) { 732*06c3fb27SDimitry Andric for (unsigned OtherVal : ValueMappingIt->second) { 733*06c3fb27SDimitry Andric if (OtherVal == InstValB) 734*06c3fb27SDimitry Andric continue; 735*06c3fb27SDimitry Andric if (!ValueNumberMappingA.contains(OtherVal)) 736*06c3fb27SDimitry Andric continue; 737*06c3fb27SDimitry Andric if (!ValueNumberMappingA[OtherVal].contains(InstValA)) 738*06c3fb27SDimitry Andric continue; 739*06c3fb27SDimitry Andric ValueNumberMappingA[OtherVal].erase(InstValA); 740*06c3fb27SDimitry Andric } 741*06c3fb27SDimitry Andric ValueNumberMappingA.erase(ValueMappingIt); 742*06c3fb27SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 743*06c3fb27SDimitry Andric std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 744*06c3fb27SDimitry Andric } 745*06c3fb27SDimitry Andric 746*06c3fb27SDimitry Andric return true; 747*06c3fb27SDimitry Andric } 748*06c3fb27SDimitry Andric 749349cc55cSDimitry Andric bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, 750349cc55cSDimitry Andric RelativeLocMapping B) { 751349cc55cSDimitry Andric // Get the basic blocks the label refers to. 752*06c3fb27SDimitry Andric BasicBlock *ABB = cast<BasicBlock>(A.OperVal); 753*06c3fb27SDimitry Andric BasicBlock *BBB = cast<BasicBlock>(B.OperVal); 754349cc55cSDimitry Andric 755349cc55cSDimitry Andric // Get the basic blocks contained in each region. 756349cc55cSDimitry Andric DenseSet<BasicBlock *> BasicBlockA; 757349cc55cSDimitry Andric DenseSet<BasicBlock *> BasicBlockB; 758349cc55cSDimitry Andric A.IRSC.getBasicBlocks(BasicBlockA); 759349cc55cSDimitry Andric B.IRSC.getBasicBlocks(BasicBlockB); 760349cc55cSDimitry Andric 761349cc55cSDimitry Andric // Determine if the block is contained in the region. 762349cc55cSDimitry Andric bool AContained = BasicBlockA.contains(ABB); 763349cc55cSDimitry Andric bool BContained = BasicBlockB.contains(BBB); 764349cc55cSDimitry Andric 765349cc55cSDimitry Andric // Both blocks need to be contained in the region, or both need to be outside 766*06c3fb27SDimitry Andric // the region. 767349cc55cSDimitry Andric if (AContained != BContained) 768349cc55cSDimitry Andric return false; 769349cc55cSDimitry Andric 770349cc55cSDimitry Andric // If both are contained, then we need to make sure that the relative 771349cc55cSDimitry Andric // distance to the target blocks are the same. 772349cc55cSDimitry Andric if (AContained) 773349cc55cSDimitry Andric return A.RelativeLocation == B.RelativeLocation; 774349cc55cSDimitry Andric return true; 775349cc55cSDimitry Andric } 776349cc55cSDimitry Andric 777e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 778e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 779349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> MappingA; 780349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> MappingB; 781349cc55cSDimitry Andric return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB); 782349cc55cSDimitry Andric } 783349cc55cSDimitry Andric 784349cc55cSDimitry Andric typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, 785349cc55cSDimitry Andric SmallVector<int, 4> &, ArrayRef<Value *> &, 786349cc55cSDimitry Andric ArrayRef<Value *> &> 787349cc55cSDimitry Andric ZippedRelativeLocationsT; 788349cc55cSDimitry Andric 789349cc55cSDimitry Andric bool IRSimilarityCandidate::compareStructure( 790349cc55cSDimitry Andric const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, 791349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 792349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 793e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 794e8d8bef9SDimitry Andric return false; 795e8d8bef9SDimitry Andric 796e8d8bef9SDimitry Andric if (A.ValueToNumber.size() != B.ValueToNumber.size()) 797e8d8bef9SDimitry Andric return false; 798e8d8bef9SDimitry Andric 799e8d8bef9SDimitry Andric iterator ItA = A.begin(); 800e8d8bef9SDimitry Andric iterator ItB = B.begin(); 801e8d8bef9SDimitry Andric 802349cc55cSDimitry Andric // These ValueNumber Mapping sets create a create a mapping between the values 803349cc55cSDimitry Andric // in one candidate to values in the other candidate. If we create a set with 804349cc55cSDimitry Andric // one element, and that same element maps to the original element in the 805349cc55cSDimitry Andric // candidate we have a good mapping. 806e8d8bef9SDimitry Andric 807e8d8bef9SDimitry Andric // Iterate over the instructions contained in each candidate 808e8d8bef9SDimitry Andric unsigned SectionLength = A.getStartIdx() + A.getLength(); 809e8d8bef9SDimitry Andric for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 810e8d8bef9SDimitry Andric ItA++, ItB++, Loc++) { 811e8d8bef9SDimitry Andric // Make sure the instructions are similar to one another. 812e8d8bef9SDimitry Andric if (!isClose(*ItA, *ItB)) 813e8d8bef9SDimitry Andric return false; 814e8d8bef9SDimitry Andric 815e8d8bef9SDimitry Andric Instruction *IA = ItA->Inst; 816e8d8bef9SDimitry Andric Instruction *IB = ItB->Inst; 817e8d8bef9SDimitry Andric 818e8d8bef9SDimitry Andric if (!ItA->Legal || !ItB->Legal) 819e8d8bef9SDimitry Andric return false; 820e8d8bef9SDimitry Andric 821e8d8bef9SDimitry Andric // Get the operand sets for the instructions. 822e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsA = ItA->OperVals; 823e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsB = ItB->OperVals; 824e8d8bef9SDimitry Andric 825e8d8bef9SDimitry Andric unsigned InstValA = A.ValueToNumber.find(IA)->second; 826e8d8bef9SDimitry Andric unsigned InstValB = B.ValueToNumber.find(IB)->second; 827e8d8bef9SDimitry Andric 828e8d8bef9SDimitry Andric // Ensure that the mappings for the instructions exists. 829*06c3fb27SDimitry Andric if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, 830*06c3fb27SDimitry Andric ValueNumberMappingB)) 831e8d8bef9SDimitry Andric return false; 832e8d8bef9SDimitry Andric 833*06c3fb27SDimitry Andric if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB, 834*06c3fb27SDimitry Andric ValueNumberMappingA)) 835e8d8bef9SDimitry Andric return false; 836e8d8bef9SDimitry Andric 837e8d8bef9SDimitry Andric // We have different paths for commutative instructions and non-commutative 838e8d8bef9SDimitry Andric // instructions since commutative instructions could allow multiple mappings 839e8d8bef9SDimitry Andric // to certain values. 84081ad6265SDimitry Andric if (IA->isCommutative() && !isa<FPMathOperator>(IA) && 84181ad6265SDimitry Andric !isa<IntrinsicInst>(IA)) { 842e8d8bef9SDimitry Andric if (!compareCommutativeOperandMapping( 843e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 844e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 845e8d8bef9SDimitry Andric return false; 846e8d8bef9SDimitry Andric continue; 847e8d8bef9SDimitry Andric } 848e8d8bef9SDimitry Andric 849e8d8bef9SDimitry Andric // Handle the non-commutative cases. 850e8d8bef9SDimitry Andric if (!compareNonCommutativeOperandMapping( 851e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 852e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 853e8d8bef9SDimitry Andric return false; 854349cc55cSDimitry Andric 855349cc55cSDimitry Andric // Here we check that between two corresponding instructions, 856349cc55cSDimitry Andric // when referring to a basic block in the same region, the 857349cc55cSDimitry Andric // relative locations are the same. And, that the instructions refer to 858349cc55cSDimitry Andric // basic blocks outside the region in the same corresponding locations. 859349cc55cSDimitry Andric 860349cc55cSDimitry Andric // We are able to make the assumption about blocks outside of the region 861349cc55cSDimitry Andric // since the target block labels are considered values and will follow the 862349cc55cSDimitry Andric // same number matching that we defined for the other instructions in the 863349cc55cSDimitry Andric // region. So, at this point, in each location we target a specific block 864349cc55cSDimitry Andric // outside the region, we are targeting a corresponding block in each 865349cc55cSDimitry Andric // analagous location in the region we are comparing to. 866349cc55cSDimitry Andric if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) && 867349cc55cSDimitry Andric !(isa<PHINode>(IA) && isa<PHINode>(IB))) 868349cc55cSDimitry Andric continue; 869349cc55cSDimitry Andric 870349cc55cSDimitry Andric SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; 871349cc55cSDimitry Andric SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; 872*06c3fb27SDimitry Andric ArrayRef<Value *> ABL = ItA->getBlockOperVals(); 873*06c3fb27SDimitry Andric ArrayRef<Value *> BBL = ItB->getBlockOperVals(); 874*06c3fb27SDimitry Andric 875*06c3fb27SDimitry Andric // Check to make sure that the number of operands, and branching locations 876*06c3fb27SDimitry Andric // between BranchInsts is the same. 877349cc55cSDimitry Andric if (RelBlockLocsA.size() != RelBlockLocsB.size() && 878*06c3fb27SDimitry Andric ABL.size() != BBL.size()) 879349cc55cSDimitry Andric return false; 880349cc55cSDimitry Andric 881*06c3fb27SDimitry Andric assert(RelBlockLocsA.size() == ABL.size() && 882*06c3fb27SDimitry Andric "Block information vectors not the same size."); 883*06c3fb27SDimitry Andric assert(RelBlockLocsB.size() == BBL.size() && 884*06c3fb27SDimitry Andric "Block information vectors not the same size."); 885*06c3fb27SDimitry Andric 886349cc55cSDimitry Andric ZippedRelativeLocationsT ZippedRelativeLocations = 887*06c3fb27SDimitry Andric zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL); 888349cc55cSDimitry Andric if (any_of(ZippedRelativeLocations, 889349cc55cSDimitry Andric [&A, &B](std::tuple<int, int, Value *, Value *> R) { 890349cc55cSDimitry Andric return !checkRelativeLocations( 891349cc55cSDimitry Andric {A, std::get<0>(R), std::get<2>(R)}, 892349cc55cSDimitry Andric {B, std::get<1>(R), std::get<3>(R)}); 893349cc55cSDimitry Andric })) 894349cc55cSDimitry Andric return false; 895e8d8bef9SDimitry Andric } 896e8d8bef9SDimitry Andric return true; 897e8d8bef9SDimitry Andric } 898e8d8bef9SDimitry Andric 899e8d8bef9SDimitry Andric bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 900e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 901e8d8bef9SDimitry Andric auto DoesOverlap = [](const IRSimilarityCandidate &X, 902e8d8bef9SDimitry Andric const IRSimilarityCandidate &Y) { 903e8d8bef9SDimitry Andric // Check: 904e8d8bef9SDimitry Andric // XXXXXX X starts before Y ends 905e8d8bef9SDimitry Andric // YYYYYYY Y starts after X starts 906e8d8bef9SDimitry Andric return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 907e8d8bef9SDimitry Andric }; 908e8d8bef9SDimitry Andric 909e8d8bef9SDimitry Andric return DoesOverlap(A, B) || DoesOverlap(B, A); 910e8d8bef9SDimitry Andric } 911e8d8bef9SDimitry Andric 912e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 913e8d8bef9SDimitry Andric Module &M, std::vector<IRInstructionData *> &InstrList, 914e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 915e8d8bef9SDimitry Andric 916e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForModule; 917e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForModule; 918e8d8bef9SDimitry Andric // Iterate over the functions in the module to map each Instruction in each 919e8d8bef9SDimitry Andric // BasicBlock to an unsigned integer. 920349cc55cSDimitry Andric Mapper.initializeForBBs(M); 921349cc55cSDimitry Andric 922e8d8bef9SDimitry Andric for (Function &F : M) { 923e8d8bef9SDimitry Andric 924e8d8bef9SDimitry Andric if (F.empty()) 925e8d8bef9SDimitry Andric continue; 926e8d8bef9SDimitry Andric 927e8d8bef9SDimitry Andric for (BasicBlock &BB : F) { 928e8d8bef9SDimitry Andric 929e8d8bef9SDimitry Andric // BB has potential to have similarity since it has a size greater than 2 930e8d8bef9SDimitry Andric // and can therefore match other regions greater than 2. Map it to a list 931e8d8bef9SDimitry Andric // of unsigned integers. 932e8d8bef9SDimitry Andric Mapper.convertToUnsignedVec(BB, InstrListForModule, 933e8d8bef9SDimitry Andric IntegerMappingForModule); 934e8d8bef9SDimitry Andric } 935349cc55cSDimitry Andric 936349cc55cSDimitry Andric BasicBlock::iterator It = F.begin()->end(); 937349cc55cSDimitry Andric Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, 938349cc55cSDimitry Andric true); 939349cc55cSDimitry Andric if (InstrListForModule.size() > 0) 940349cc55cSDimitry Andric Mapper.IDL->push_back(*InstrListForModule.back()); 941e8d8bef9SDimitry Andric } 942e8d8bef9SDimitry Andric 943e8d8bef9SDimitry Andric // Insert the InstrListForModule at the end of the overall InstrList so that 944e8d8bef9SDimitry Andric // we can have a long InstrList for the entire set of Modules being analyzed. 945e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForModule); 946e8d8bef9SDimitry Andric // Do the same as above, but for IntegerMapping. 947e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForModule); 948e8d8bef9SDimitry Andric } 949e8d8bef9SDimitry Andric 950e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 951e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> &Modules, 952e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 953e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 954e8d8bef9SDimitry Andric 955e8d8bef9SDimitry Andric // Iterate over, and map the instructions in each module. 956e8d8bef9SDimitry Andric for (const std::unique_ptr<Module> &M : Modules) 957e8d8bef9SDimitry Andric populateMapper(*M, InstrList, IntegerMapping); 958e8d8bef9SDimitry Andric } 959e8d8bef9SDimitry Andric 960e8d8bef9SDimitry Andric /// From a repeated subsequence, find all the different instances of the 961e8d8bef9SDimitry Andric /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 962e8d8bef9SDimitry Andric /// the IRInstructionData in subsequence. 963e8d8bef9SDimitry Andric /// 9644824e7fdSDimitry Andric /// \param [in] Mapper - The instruction mapper for basic correctness checks. 965e8d8bef9SDimitry Andric /// \param [in] InstrList - The vector that holds the instruction data. 966e8d8bef9SDimitry Andric /// \param [in] IntegerMapping - The vector that holds the mapped integers. 967e8d8bef9SDimitry Andric /// \param [out] CandsForRepSubstring - The vector to store the generated 968e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 969e8d8bef9SDimitry Andric static void createCandidatesFromSuffixTree( 970fe6060f1SDimitry Andric const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 971e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 972e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 973e8d8bef9SDimitry Andric 974e8d8bef9SDimitry Andric unsigned StringLen = RS.Length; 975349cc55cSDimitry Andric if (StringLen < 2) 976349cc55cSDimitry Andric return; 977e8d8bef9SDimitry Andric 978e8d8bef9SDimitry Andric // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 979e8d8bef9SDimitry Andric for (const unsigned &StartIdx : RS.StartIndices) { 980e8d8bef9SDimitry Andric unsigned EndIdx = StartIdx + StringLen - 1; 981e8d8bef9SDimitry Andric 982e8d8bef9SDimitry Andric // Check that this subsequence does not contain an illegal instruction. 983e8d8bef9SDimitry Andric bool ContainsIllegal = false; 984e8d8bef9SDimitry Andric for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 985e8d8bef9SDimitry Andric unsigned Key = IntegerMapping[CurrIdx]; 986e8d8bef9SDimitry Andric if (Key > Mapper.IllegalInstrNumber) { 987e8d8bef9SDimitry Andric ContainsIllegal = true; 988e8d8bef9SDimitry Andric break; 989e8d8bef9SDimitry Andric } 990e8d8bef9SDimitry Andric } 991e8d8bef9SDimitry Andric 992e8d8bef9SDimitry Andric // If we have an illegal instruction, we should not create an 993e8d8bef9SDimitry Andric // IRSimilarityCandidate for this region. 994e8d8bef9SDimitry Andric if (ContainsIllegal) 995e8d8bef9SDimitry Andric continue; 996e8d8bef9SDimitry Andric 997e8d8bef9SDimitry Andric // We are getting iterators to the instructions in this region of code 998e8d8bef9SDimitry Andric // by advancing the start and end indices from the start of the 999e8d8bef9SDimitry Andric // InstrList. 1000e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 1001e8d8bef9SDimitry Andric std::advance(StartIt, StartIdx); 1002e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 1003e8d8bef9SDimitry Andric std::advance(EndIt, EndIdx); 1004e8d8bef9SDimitry Andric 1005e8d8bef9SDimitry Andric CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 1006e8d8bef9SDimitry Andric } 1007e8d8bef9SDimitry Andric } 1008e8d8bef9SDimitry Andric 1009349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalRelationFrom( 1010349cc55cSDimitry Andric IRSimilarityCandidate &SourceCand, 1011349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 1012349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { 1013349cc55cSDimitry Andric assert(SourceCand.CanonNumToNumber.size() != 0 && 1014349cc55cSDimitry Andric "Base canonical relationship is empty!"); 1015349cc55cSDimitry Andric assert(SourceCand.NumberToCanonNum.size() != 0 && 1016349cc55cSDimitry Andric "Base canonical relationship is empty!"); 1017349cc55cSDimitry Andric 1018349cc55cSDimitry Andric assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); 1019349cc55cSDimitry Andric assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); 1020349cc55cSDimitry Andric 1021349cc55cSDimitry Andric DenseSet<unsigned> UsedGVNs; 1022349cc55cSDimitry Andric // Iterate over the mappings provided from this candidate to SourceCand. We 1023349cc55cSDimitry Andric // are then able to map the GVN in this candidate to the same canonical number 1024349cc55cSDimitry Andric // given to the corresponding GVN in SourceCand. 1025349cc55cSDimitry Andric for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { 1026349cc55cSDimitry Andric unsigned SourceGVN = GVNMapping.first; 1027349cc55cSDimitry Andric 1028349cc55cSDimitry Andric assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); 1029349cc55cSDimitry Andric 1030349cc55cSDimitry Andric unsigned ResultGVN; 1031349cc55cSDimitry Andric // We need special handling if we have more than one potential value. This 1032349cc55cSDimitry Andric // means that there are at least two GVNs that could correspond to this GVN. 1033349cc55cSDimitry Andric // This could lead to potential swapping later on, so we make a decision 1034349cc55cSDimitry Andric // here to ensure a one-to-one mapping. 1035349cc55cSDimitry Andric if (GVNMapping.second.size() > 1) { 1036349cc55cSDimitry Andric bool Found = false; 1037349cc55cSDimitry Andric for (unsigned Val : GVNMapping.second) { 1038349cc55cSDimitry Andric // We make sure the target value number hasn't already been reserved. 1039349cc55cSDimitry Andric if (UsedGVNs.contains(Val)) 1040349cc55cSDimitry Andric continue; 1041349cc55cSDimitry Andric 1042349cc55cSDimitry Andric // We make sure that the opposite mapping is still consistent. 1043349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator It = 1044349cc55cSDimitry Andric FromSourceMapping.find(Val); 1045349cc55cSDimitry Andric 1046349cc55cSDimitry Andric if (!It->second.contains(SourceGVN)) 1047349cc55cSDimitry Andric continue; 1048349cc55cSDimitry Andric 1049349cc55cSDimitry Andric // We pick the first item that satisfies these conditions. 1050349cc55cSDimitry Andric Found = true; 1051349cc55cSDimitry Andric ResultGVN = Val; 1052349cc55cSDimitry Andric break; 1053349cc55cSDimitry Andric } 1054349cc55cSDimitry Andric 1055349cc55cSDimitry Andric assert(Found && "Could not find matching value for source GVN"); 1056349cc55cSDimitry Andric (void)Found; 1057349cc55cSDimitry Andric 1058349cc55cSDimitry Andric } else 1059349cc55cSDimitry Andric ResultGVN = *GVNMapping.second.begin(); 1060349cc55cSDimitry Andric 1061349cc55cSDimitry Andric // Whatever GVN is found, we mark it as used. 1062349cc55cSDimitry Andric UsedGVNs.insert(ResultGVN); 1063349cc55cSDimitry Andric 1064349cc55cSDimitry Andric unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN); 1065349cc55cSDimitry Andric CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); 1066349cc55cSDimitry Andric NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); 1067349cc55cSDimitry Andric } 106881ad6265SDimitry Andric 106981ad6265SDimitry Andric DenseSet<BasicBlock *> BBSet; 107081ad6265SDimitry Andric getBasicBlocks(BBSet); 107181ad6265SDimitry Andric // Find canonical numbers for the BasicBlocks in the current candidate. 107281ad6265SDimitry Andric // This is done by finding the corresponding value for the first instruction 107381ad6265SDimitry Andric // in the block in the current candidate, finding the matching value in the 107481ad6265SDimitry Andric // source candidate. Then by finding the parent of this value, use the 107581ad6265SDimitry Andric // canonical number of the block in the source candidate for the canonical 107681ad6265SDimitry Andric // number in the current candidate. 107781ad6265SDimitry Andric for (BasicBlock *BB : BBSet) { 107881ad6265SDimitry Andric unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second; 107981ad6265SDimitry Andric 108081ad6265SDimitry Andric // We can skip the BasicBlock if the canonical numbering has already been 108181ad6265SDimitry Andric // found in a separate instruction. 1082*06c3fb27SDimitry Andric if (NumberToCanonNum.contains(BBGVNForCurrCand)) 108381ad6265SDimitry Andric continue; 108481ad6265SDimitry Andric 108581ad6265SDimitry Andric // If the basic block is the starting block, then the shared instruction may 108681ad6265SDimitry Andric // not be the first instruction in the block, it will be the first 108781ad6265SDimitry Andric // instruction in the similarity region. 108881ad6265SDimitry Andric Value *FirstOutlineInst = BB == getStartBB() 108981ad6265SDimitry Andric ? frontInstruction() 109081ad6265SDimitry Andric : &*BB->instructionsWithoutDebug().begin(); 109181ad6265SDimitry Andric 109281ad6265SDimitry Andric unsigned FirstInstGVN = *getGVN(FirstOutlineInst); 109381ad6265SDimitry Andric unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN); 109481ad6265SDimitry Andric unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum); 109581ad6265SDimitry Andric Value *SourceV = *SourceCand.fromGVN(SourceGVN); 109681ad6265SDimitry Andric BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent(); 109781ad6265SDimitry Andric unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB); 109881ad6265SDimitry Andric unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN); 109981ad6265SDimitry Andric CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand)); 110081ad6265SDimitry Andric NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN)); 110181ad6265SDimitry Andric } 1102349cc55cSDimitry Andric } 1103349cc55cSDimitry Andric 1104*06c3fb27SDimitry Andric void IRSimilarityCandidate::createCanonicalRelationFrom( 1105*06c3fb27SDimitry Andric IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, 1106*06c3fb27SDimitry Andric IRSimilarityCandidate &TargetCandLarge) { 1107*06c3fb27SDimitry Andric assert(!SourceCand.CanonNumToNumber.empty() && 1108*06c3fb27SDimitry Andric "Canonical Relationship is non-empty"); 1109*06c3fb27SDimitry Andric assert(!SourceCand.NumberToCanonNum.empty() && 1110*06c3fb27SDimitry Andric "Canonical Relationship is non-empty"); 1111*06c3fb27SDimitry Andric 1112*06c3fb27SDimitry Andric assert(!SourceCandLarge.CanonNumToNumber.empty() && 1113*06c3fb27SDimitry Andric "Canonical Relationship is non-empty"); 1114*06c3fb27SDimitry Andric assert(!SourceCandLarge.NumberToCanonNum.empty() && 1115*06c3fb27SDimitry Andric "Canonical Relationship is non-empty"); 1116*06c3fb27SDimitry Andric 1117*06c3fb27SDimitry Andric assert(!TargetCandLarge.CanonNumToNumber.empty() && 1118*06c3fb27SDimitry Andric "Canonical Relationship is non-empty"); 1119*06c3fb27SDimitry Andric assert(!TargetCandLarge.NumberToCanonNum.empty() && 1120*06c3fb27SDimitry Andric "Canonical Relationship is non-empty"); 1121*06c3fb27SDimitry Andric 1122*06c3fb27SDimitry Andric assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); 1123*06c3fb27SDimitry Andric assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); 1124*06c3fb27SDimitry Andric 1125*06c3fb27SDimitry Andric // We're going to use the larger candidates as a "bridge" to create the 1126*06c3fb27SDimitry Andric // canonical number for the target candidate since we have idetified two 1127*06c3fb27SDimitry Andric // candidates as subsequences of larger sequences, and therefore must be 1128*06c3fb27SDimitry Andric // structurally similar. 1129*06c3fb27SDimitry Andric for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) { 1130*06c3fb27SDimitry Andric Value *CurrVal = ValueNumPair.first; 1131*06c3fb27SDimitry Andric unsigned TargetCandGVN = ValueNumPair.second; 1132*06c3fb27SDimitry Andric 1133*06c3fb27SDimitry Andric // Find the numbering in the large candidate that surrounds the 1134*06c3fb27SDimitry Andric // current candidate. 1135*06c3fb27SDimitry Andric std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); 1136*06c3fb27SDimitry Andric assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); 1137*06c3fb27SDimitry Andric 1138*06c3fb27SDimitry Andric // Get the canonical numbering in the large target candidate. 1139*06c3fb27SDimitry Andric std::optional<unsigned> OTargetCandCanon = 1140*06c3fb27SDimitry Andric TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); 1141*06c3fb27SDimitry Andric assert(OTargetCandCanon.has_value() && 1142*06c3fb27SDimitry Andric "Canononical Number not found for GVN"); 1143*06c3fb27SDimitry Andric 1144*06c3fb27SDimitry Andric // Get the GVN in the large source candidate from the canonical numbering. 1145*06c3fb27SDimitry Andric std::optional<unsigned> OLargeSourceGVN = 1146*06c3fb27SDimitry Andric SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); 1147*06c3fb27SDimitry Andric assert(OLargeSourceGVN.has_value() && 1148*06c3fb27SDimitry Andric "GVN Number not found for Canonical Number"); 1149*06c3fb27SDimitry Andric 1150*06c3fb27SDimitry Andric // Get the Value from the GVN in the large source candidate. 1151*06c3fb27SDimitry Andric std::optional<Value *> OLargeSourceV = 1152*06c3fb27SDimitry Andric SourceCandLarge.fromGVN(OLargeSourceGVN.value()); 1153*06c3fb27SDimitry Andric assert(OLargeSourceV.has_value() && "Value not found for GVN"); 1154*06c3fb27SDimitry Andric 1155*06c3fb27SDimitry Andric // Get the GVN number for the Value in the source candidate. 1156*06c3fb27SDimitry Andric std::optional<unsigned> OSourceGVN = 1157*06c3fb27SDimitry Andric SourceCand.getGVN(OLargeSourceV.value()); 1158*06c3fb27SDimitry Andric assert(OSourceGVN.has_value() && "GVN Number not found for Value"); 1159*06c3fb27SDimitry Andric 1160*06c3fb27SDimitry Andric // Get the canonical numbering from the GVN/ 1161*06c3fb27SDimitry Andric std::optional<unsigned> OSourceCanon = 1162*06c3fb27SDimitry Andric SourceCand.getCanonicalNum(OSourceGVN.value()); 1163*06c3fb27SDimitry Andric assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); 1164*06c3fb27SDimitry Andric 1165*06c3fb27SDimitry Andric // Insert the canonical numbering and GVN pair into their respective 1166*06c3fb27SDimitry Andric // mappings. 1167*06c3fb27SDimitry Andric CanonNumToNumber.insert( 1168*06c3fb27SDimitry Andric std::make_pair(OSourceCanon.value(), TargetCandGVN)); 1169*06c3fb27SDimitry Andric NumberToCanonNum.insert( 1170*06c3fb27SDimitry Andric std::make_pair(TargetCandGVN, OSourceCanon.value())); 1171*06c3fb27SDimitry Andric } 1172*06c3fb27SDimitry Andric } 1173*06c3fb27SDimitry Andric 1174349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalMappingFor( 1175349cc55cSDimitry Andric IRSimilarityCandidate &CurrCand) { 1176349cc55cSDimitry Andric assert(CurrCand.CanonNumToNumber.size() == 0 && 1177349cc55cSDimitry Andric "Canonical Relationship is non-empty"); 1178349cc55cSDimitry Andric assert(CurrCand.NumberToCanonNum.size() == 0 && 1179349cc55cSDimitry Andric "Canonical Relationship is non-empty"); 1180349cc55cSDimitry Andric 1181349cc55cSDimitry Andric unsigned CanonNum = 0; 1182349cc55cSDimitry Andric // Iterate over the value numbers found, the order does not matter in this 1183349cc55cSDimitry Andric // case. 1184349cc55cSDimitry Andric for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { 1185349cc55cSDimitry Andric CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum)); 1186349cc55cSDimitry Andric CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first)); 1187349cc55cSDimitry Andric CanonNum++; 1188349cc55cSDimitry Andric } 1189349cc55cSDimitry Andric } 1190349cc55cSDimitry Andric 1191*06c3fb27SDimitry Andric /// Look for larger IRSimilarityCandidates From the previously matched 1192*06c3fb27SDimitry Andric /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is 1193*06c3fb27SDimitry Andric /// an overlap, return a pair of structurally similar, larger 1194*06c3fb27SDimitry Andric /// IRSimilarityCandidates. 1195*06c3fb27SDimitry Andric /// 1196*06c3fb27SDimitry Andric /// \param [in] CandA - The first candidate we are trying to determine the 1197*06c3fb27SDimitry Andric /// structure of. 1198*06c3fb27SDimitry Andric /// \param [in] CandB - The second candidate we are trying to determine the 1199*06c3fb27SDimitry Andric /// structure of. 1200*06c3fb27SDimitry Andric /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in 1201*06c3fb27SDimitry Andric /// a circuit to the IRSimilarityCandidates that include this instruction. 1202*06c3fb27SDimitry Andric /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a 1203*06c3fb27SDimitry Andric /// number representing the structural group assigned to it. 1204*06c3fb27SDimitry Andric static std::optional< 1205*06c3fb27SDimitry Andric std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> 1206*06c3fb27SDimitry Andric CheckLargerCands( 1207*06c3fb27SDimitry Andric IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, 1208*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, 1209*06c3fb27SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) { 1210*06c3fb27SDimitry Andric DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA; 1211*06c3fb27SDimitry Andric DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB; 1212*06c3fb27SDimitry Andric DenseSet<unsigned> IncludedGroupsA; 1213*06c3fb27SDimitry Andric DenseSet<unsigned> IncludedGroupsB; 1214*06c3fb27SDimitry Andric 1215*06c3fb27SDimitry Andric // Find the overall similarity group numbers that fully contain the candidate, 1216*06c3fb27SDimitry Andric // and record the larger candidate for each group. 1217*06c3fb27SDimitry Andric auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); 1218*06c3fb27SDimitry Andric std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> 1219*06c3fb27SDimitry Andric Result; 1220*06c3fb27SDimitry Andric 1221*06c3fb27SDimitry Andric unsigned CandAStart = CandA.getStartIdx(); 1222*06c3fb27SDimitry Andric unsigned CandAEnd = CandA.getEndIdx(); 1223*06c3fb27SDimitry Andric unsigned CandBStart = CandB.getStartIdx(); 1224*06c3fb27SDimitry Andric unsigned CandBEnd = CandB.getEndIdx(); 1225*06c3fb27SDimitry Andric if (IdxToCandidateIt == IndexToIncludedCand.end()) 1226*06c3fb27SDimitry Andric return Result; 1227*06c3fb27SDimitry Andric for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { 1228*06c3fb27SDimitry Andric if (MatchedCand->getStartIdx() > CandAStart || 1229*06c3fb27SDimitry Andric (MatchedCand->getEndIdx() < CandAEnd)) 1230*06c3fb27SDimitry Andric continue; 1231*06c3fb27SDimitry Andric unsigned GroupNum = CandToGroup.find(MatchedCand)->second; 1232*06c3fb27SDimitry Andric IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); 1233*06c3fb27SDimitry Andric IncludedGroupsA.insert(GroupNum); 1234*06c3fb27SDimitry Andric } 1235*06c3fb27SDimitry Andric 1236*06c3fb27SDimitry Andric // Find the overall similarity group numbers that fully contain the next 1237*06c3fb27SDimitry Andric // candidate, and record the larger candidate for each group. 1238*06c3fb27SDimitry Andric IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); 1239*06c3fb27SDimitry Andric if (IdxToCandidateIt == IndexToIncludedCand.end()) 1240*06c3fb27SDimitry Andric return Result; 1241*06c3fb27SDimitry Andric for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { 1242*06c3fb27SDimitry Andric if (MatchedCand->getStartIdx() > CandBStart || 1243*06c3fb27SDimitry Andric MatchedCand->getEndIdx() < CandBEnd) 1244*06c3fb27SDimitry Andric continue; 1245*06c3fb27SDimitry Andric unsigned GroupNum = CandToGroup.find(MatchedCand)->second; 1246*06c3fb27SDimitry Andric IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); 1247*06c3fb27SDimitry Andric IncludedGroupsB.insert(GroupNum); 1248*06c3fb27SDimitry Andric } 1249*06c3fb27SDimitry Andric 1250*06c3fb27SDimitry Andric // Find the intersection between the two groups, these are the groups where 1251*06c3fb27SDimitry Andric // the larger candidates exist. 1252*06c3fb27SDimitry Andric set_intersect(IncludedGroupsA, IncludedGroupsB); 1253*06c3fb27SDimitry Andric 1254*06c3fb27SDimitry Andric // If there is no intersection between the sets, then we cannot determine 1255*06c3fb27SDimitry Andric // whether or not there is a match. 1256*06c3fb27SDimitry Andric if (IncludedGroupsA.empty()) 1257*06c3fb27SDimitry Andric return Result; 1258*06c3fb27SDimitry Andric 1259*06c3fb27SDimitry Andric // Create a pair that contains the larger candidates. 1260*06c3fb27SDimitry Andric auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); 1261*06c3fb27SDimitry Andric auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); 1262*06c3fb27SDimitry Andric Result = std::make_pair(ItA->second, ItB->second); 1263*06c3fb27SDimitry Andric return Result; 1264*06c3fb27SDimitry Andric } 1265*06c3fb27SDimitry Andric 1266e8d8bef9SDimitry Andric /// From the list of IRSimilarityCandidates, perform a comparison between each 1267e8d8bef9SDimitry Andric /// IRSimilarityCandidate to determine if there are overlapping 1268e8d8bef9SDimitry Andric /// IRInstructionData, or if they do not have the same structure. 1269e8d8bef9SDimitry Andric /// 1270e8d8bef9SDimitry Andric /// \param [in] CandsForRepSubstring - The vector containing the 1271e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 1272e8d8bef9SDimitry Andric /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 1273e8d8bef9SDimitry Andric /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 1274e8d8bef9SDimitry Andric /// vector are structurally similar to one another. 1275*06c3fb27SDimitry Andric /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in 1276*06c3fb27SDimitry Andric /// a circuit to the IRSimilarityCandidates that include this instruction. 1277*06c3fb27SDimitry Andric /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a 1278*06c3fb27SDimitry Andric /// number representing the structural group assigned to it. 1279e8d8bef9SDimitry Andric static void findCandidateStructures( 1280e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 1281*06c3fb27SDimitry Andric DenseMap<unsigned, SimilarityGroup> &StructuralGroups, 1282*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, 1283*06c3fb27SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup 1284*06c3fb27SDimitry Andric ) { 1285e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 1286e8d8bef9SDimitry Andric InnerCandEndIt; 1287e8d8bef9SDimitry Andric 1288e8d8bef9SDimitry Andric // IRSimilarityCandidates each have a structure for operand use. It is 1289e8d8bef9SDimitry Andric // possible that two instances of the same subsequences have different 1290e8d8bef9SDimitry Andric // structure. Each type of structure found is assigned a number. This 1291e8d8bef9SDimitry Andric // DenseMap maps an IRSimilarityCandidate to which type of similarity 1292e8d8bef9SDimitry Andric // discovered it fits within. 1293e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1294e8d8bef9SDimitry Andric 1295e8d8bef9SDimitry Andric // Find the compatibility from each candidate to the others to determine 1296e8d8bef9SDimitry Andric // which candidates overlap and which have the same structure by mapping 1297e8d8bef9SDimitry Andric // each structure to a different group. 1298e8d8bef9SDimitry Andric bool SameStructure; 1299e8d8bef9SDimitry Andric bool Inserted; 1300e8d8bef9SDimitry Andric unsigned CurrentGroupNum = 0; 1301e8d8bef9SDimitry Andric unsigned OuterGroupNum; 1302e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 1303e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 1304e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 1305e8d8bef9SDimitry Andric 1306e8d8bef9SDimitry Andric // Iterate over the candidates to determine its structural and overlapping 1307e8d8bef9SDimitry Andric // compatibility with other instructions 1308349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 1309349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 1310e8d8bef9SDimitry Andric for (CandIt = CandsForRepSubstring.begin(), 1311e8d8bef9SDimitry Andric CandEndIt = CandsForRepSubstring.end(); 1312e8d8bef9SDimitry Andric CandIt != CandEndIt; CandIt++) { 1313e8d8bef9SDimitry Andric 1314e8d8bef9SDimitry Andric // Determine if it has an assigned structural group already. 1315e8d8bef9SDimitry Andric CandToGroupIt = CandToGroup.find(&*CandIt); 1316e8d8bef9SDimitry Andric if (CandToGroupIt == CandToGroup.end()) { 1317e8d8bef9SDimitry Andric // If not, we assign it one, and add it to our mapping. 1318e8d8bef9SDimitry Andric std::tie(CandToGroupIt, Inserted) = 1319e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 1320e8d8bef9SDimitry Andric } 1321e8d8bef9SDimitry Andric 1322e8d8bef9SDimitry Andric // Get the structural group number from the iterator. 1323e8d8bef9SDimitry Andric OuterGroupNum = CandToGroupIt->second; 1324e8d8bef9SDimitry Andric 1325e8d8bef9SDimitry Andric // Check if we already have a list of IRSimilarityCandidates for the current 1326e8d8bef9SDimitry Andric // structural group. Create one if one does not exist. 1327e8d8bef9SDimitry Andric CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 1328349cc55cSDimitry Andric if (CurrentGroupPair == StructuralGroups.end()) { 1329349cc55cSDimitry Andric IRSimilarityCandidate::createCanonicalMappingFor(*CandIt); 1330e8d8bef9SDimitry Andric std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 1331e8d8bef9SDimitry Andric std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 1332349cc55cSDimitry Andric } 1333e8d8bef9SDimitry Andric 1334e8d8bef9SDimitry Andric // Iterate over the IRSimilarityCandidates following the current 1335e8d8bef9SDimitry Andric // IRSimilarityCandidate in the list to determine whether the two 1336e8d8bef9SDimitry Andric // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 1337e8d8bef9SDimitry Andric // of IRSimilarityCandidates. 1338e8d8bef9SDimitry Andric for (InnerCandIt = std::next(CandIt), 1339e8d8bef9SDimitry Andric InnerCandEndIt = CandsForRepSubstring.end(); 1340e8d8bef9SDimitry Andric InnerCandIt != InnerCandEndIt; InnerCandIt++) { 1341e8d8bef9SDimitry Andric 1342e8d8bef9SDimitry Andric // We check if the inner item has a group already, if it does, we skip it. 1343e8d8bef9SDimitry Andric CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 1344e8d8bef9SDimitry Andric if (CandToGroupItInner != CandToGroup.end()) 1345e8d8bef9SDimitry Andric continue; 1346e8d8bef9SDimitry Andric 1347*06c3fb27SDimitry Andric // Check if we have found structural similarity between two candidates 1348*06c3fb27SDimitry Andric // that fully contains the first and second candidates. 1349*06c3fb27SDimitry Andric std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> 1350*06c3fb27SDimitry Andric LargerPair = CheckLargerCands( 1351*06c3fb27SDimitry Andric *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); 1352*06c3fb27SDimitry Andric 1353*06c3fb27SDimitry Andric // If a pair was found, it means that we can assume that these smaller 1354*06c3fb27SDimitry Andric // substrings are also structurally similar. Use the larger candidates to 1355*06c3fb27SDimitry Andric // determine the canonical mapping between the two sections. 1356*06c3fb27SDimitry Andric if (LargerPair.has_value()) { 1357*06c3fb27SDimitry Andric SameStructure = true; 1358*06c3fb27SDimitry Andric InnerCandIt->createCanonicalRelationFrom( 1359*06c3fb27SDimitry Andric *CandIt, *LargerPair.value().first, *LargerPair.value().second); 1360*06c3fb27SDimitry Andric CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1361*06c3fb27SDimitry Andric CurrentGroupPair->second.push_back(*InnerCandIt); 1362*06c3fb27SDimitry Andric continue; 1363*06c3fb27SDimitry Andric } 1364*06c3fb27SDimitry Andric 1365e8d8bef9SDimitry Andric // Otherwise we determine if they have the same structure and add it to 1366e8d8bef9SDimitry Andric // vector if they match. 1367349cc55cSDimitry Andric ValueNumberMappingA.clear(); 1368349cc55cSDimitry Andric ValueNumberMappingB.clear(); 1369349cc55cSDimitry Andric SameStructure = IRSimilarityCandidate::compareStructure( 1370349cc55cSDimitry Andric *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); 1371e8d8bef9SDimitry Andric if (!SameStructure) 1372e8d8bef9SDimitry Andric continue; 1373e8d8bef9SDimitry Andric 1374349cc55cSDimitry Andric InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA, 1375349cc55cSDimitry Andric ValueNumberMappingB); 1376e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1377e8d8bef9SDimitry Andric CurrentGroupPair->second.push_back(*InnerCandIt); 1378e8d8bef9SDimitry Andric } 1379e8d8bef9SDimitry Andric } 1380e8d8bef9SDimitry Andric } 1381e8d8bef9SDimitry Andric 1382e8d8bef9SDimitry Andric void IRSimilarityIdentifier::findCandidates( 1383e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 1384e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 1385e8d8bef9SDimitry Andric SuffixTree ST(IntegerMapping); 1386e8d8bef9SDimitry Andric 1387e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> CandsForRepSubstring; 1388e8d8bef9SDimitry Andric std::vector<SimilarityGroup> NewCandidateGroups; 1389e8d8bef9SDimitry Andric 1390e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> StructuralGroups; 1391*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand; 1392*06c3fb27SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1393e8d8bef9SDimitry Andric 1394e8d8bef9SDimitry Andric // Iterate over the subsequences found by the Suffix Tree to create 1395e8d8bef9SDimitry Andric // IRSimilarityCandidates for each repeated subsequence and determine which 1396e8d8bef9SDimitry Andric // instances are structurally similar to one another. 1397*06c3fb27SDimitry Andric 1398*06c3fb27SDimitry Andric // Sort the suffix tree from longest substring to shortest. 1399*06c3fb27SDimitry Andric std::vector<SuffixTree::RepeatedSubstring> RSes; 1400*06c3fb27SDimitry Andric for (SuffixTree::RepeatedSubstring &RS : ST) 1401*06c3fb27SDimitry Andric RSes.push_back(RS); 1402*06c3fb27SDimitry Andric 1403*06c3fb27SDimitry Andric llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, 1404*06c3fb27SDimitry Andric const SuffixTree::RepeatedSubstring &RHS) { 1405*06c3fb27SDimitry Andric return LHS.Length > RHS.Length; 1406*06c3fb27SDimitry Andric }); 1407*06c3fb27SDimitry Andric for (SuffixTree::RepeatedSubstring &RS : RSes) { 1408fe6060f1SDimitry Andric createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 1409e8d8bef9SDimitry Andric CandsForRepSubstring); 1410e8d8bef9SDimitry Andric 1411e8d8bef9SDimitry Andric if (CandsForRepSubstring.size() < 2) 1412e8d8bef9SDimitry Andric continue; 1413e8d8bef9SDimitry Andric 1414*06c3fb27SDimitry Andric findCandidateStructures(CandsForRepSubstring, StructuralGroups, 1415*06c3fb27SDimitry Andric IndexToIncludedCand, CandToGroup); 1416*06c3fb27SDimitry Andric for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) { 1417e8d8bef9SDimitry Andric // We only add the group if it contains more than one 1418e8d8bef9SDimitry Andric // IRSimilarityCandidate. If there is only one, that means there is no 1419e8d8bef9SDimitry Andric // other repeated subsequence with the same structure. 1420*06c3fb27SDimitry Andric if (Group.second.size() > 1) { 1421e8d8bef9SDimitry Andric SimilarityCandidates->push_back(Group.second); 1422*06c3fb27SDimitry Andric // Iterate over each candidate in the group, and add an entry for each 1423*06c3fb27SDimitry Andric // instruction included with a mapping to a set of 1424*06c3fb27SDimitry Andric // IRSimilarityCandidates that include that instruction. 1425*06c3fb27SDimitry Andric for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { 1426*06c3fb27SDimitry Andric for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); 1427*06c3fb27SDimitry Andric Idx <= Edx; ++Idx) { 1428*06c3fb27SDimitry Andric DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator 1429*06c3fb27SDimitry Andric IdIt; 1430*06c3fb27SDimitry Andric IdIt = IndexToIncludedCand.find(Idx); 1431*06c3fb27SDimitry Andric bool Inserted = false; 1432*06c3fb27SDimitry Andric if (IdIt == IndexToIncludedCand.end()) 1433*06c3fb27SDimitry Andric std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( 1434*06c3fb27SDimitry Andric std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>())); 1435*06c3fb27SDimitry Andric IdIt->second.insert(&IRCand); 1436*06c3fb27SDimitry Andric } 1437*06c3fb27SDimitry Andric // Add mapping of candidate to the overall similarity group number. 1438*06c3fb27SDimitry Andric CandToGroup.insert( 1439*06c3fb27SDimitry Andric std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); 1440*06c3fb27SDimitry Andric } 1441*06c3fb27SDimitry Andric } 1442*06c3fb27SDimitry Andric } 1443e8d8bef9SDimitry Andric 1444e8d8bef9SDimitry Andric CandsForRepSubstring.clear(); 1445e8d8bef9SDimitry Andric StructuralGroups.clear(); 1446e8d8bef9SDimitry Andric NewCandidateGroups.clear(); 1447e8d8bef9SDimitry Andric } 1448e8d8bef9SDimitry Andric } 1449e8d8bef9SDimitry Andric 1450e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 1451e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> Modules) { 1452e8d8bef9SDimitry Andric resetSimilarityCandidates(); 1453e8d8bef9SDimitry Andric 1454e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 1455e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 1456349cc55cSDimitry Andric Mapper.InstClassifier.EnableBranches = this->EnableBranches; 145704eeddc0SDimitry Andric Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 145804eeddc0SDimitry Andric Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 14591fd87a68SDimitry Andric Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; 146081ad6265SDimitry Andric Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; 1461e8d8bef9SDimitry Andric 1462e8d8bef9SDimitry Andric populateMapper(Modules, InstrList, IntegerMapping); 1463e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 1464e8d8bef9SDimitry Andric 146581ad6265SDimitry Andric return *SimilarityCandidates; 1466e8d8bef9SDimitry Andric } 1467e8d8bef9SDimitry Andric 1468e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 1469e8d8bef9SDimitry Andric resetSimilarityCandidates(); 1470349cc55cSDimitry Andric Mapper.InstClassifier.EnableBranches = this->EnableBranches; 147104eeddc0SDimitry Andric Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 147204eeddc0SDimitry Andric Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 14731fd87a68SDimitry Andric Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; 147481ad6265SDimitry Andric Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; 1475e8d8bef9SDimitry Andric 1476e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 1477e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 1478e8d8bef9SDimitry Andric 1479e8d8bef9SDimitry Andric populateMapper(M, InstrList, IntegerMapping); 1480e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 1481e8d8bef9SDimitry Andric 148281ad6265SDimitry Andric return *SimilarityCandidates; 1483e8d8bef9SDimitry Andric } 1484e8d8bef9SDimitry Andric 1485e8d8bef9SDimitry Andric INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 1486e8d8bef9SDimitry Andric "ir-similarity-identifier", false, true) 1487e8d8bef9SDimitry Andric 1488e8d8bef9SDimitry Andric IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 1489e8d8bef9SDimitry Andric : ModulePass(ID) { 1490e8d8bef9SDimitry Andric initializeIRSimilarityIdentifierWrapperPassPass( 1491e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 1492e8d8bef9SDimitry Andric } 1493e8d8bef9SDimitry Andric 1494e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 149504eeddc0SDimitry Andric IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 149681ad6265SDimitry Andric MatchCallsByName, !DisableIntrinsics, 149781ad6265SDimitry Andric false)); 1498e8d8bef9SDimitry Andric return false; 1499e8d8bef9SDimitry Andric } 1500e8d8bef9SDimitry Andric 1501e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 1502e8d8bef9SDimitry Andric IRSI.reset(); 1503e8d8bef9SDimitry Andric return false; 1504e8d8bef9SDimitry Andric } 1505e8d8bef9SDimitry Andric 1506e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 1507fe6060f1SDimitry Andric IRSI->findSimilarity(M); 1508e8d8bef9SDimitry Andric return false; 1509e8d8bef9SDimitry Andric } 1510e8d8bef9SDimitry Andric 1511e8d8bef9SDimitry Andric AnalysisKey IRSimilarityAnalysis::Key; 1512e8d8bef9SDimitry Andric IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 1513e8d8bef9SDimitry Andric ModuleAnalysisManager &) { 151404eeddc0SDimitry Andric auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 151581ad6265SDimitry Andric MatchCallsByName, !DisableIntrinsics, 151681ad6265SDimitry Andric false); 1517fe6060f1SDimitry Andric IRSI.findSimilarity(M); 1518fe6060f1SDimitry Andric return IRSI; 1519e8d8bef9SDimitry Andric } 1520e8d8bef9SDimitry Andric 1521e8d8bef9SDimitry Andric PreservedAnalyses 1522e8d8bef9SDimitry Andric IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 1523e8d8bef9SDimitry Andric IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 1524bdd1243dSDimitry Andric std::optional<SimilarityGroupList> &SimilarityCandidatesOpt = 1525bdd1243dSDimitry Andric IRSI.getSimilarity(); 1526e8d8bef9SDimitry Andric 1527e8d8bef9SDimitry Andric for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 1528e8d8bef9SDimitry Andric OS << CandVec.size() << " candidates of length " 1529e8d8bef9SDimitry Andric << CandVec.begin()->getLength() << ". Found in: \n"; 1530e8d8bef9SDimitry Andric for (IRSimilarityCandidate &Cand : CandVec) { 1531e8d8bef9SDimitry Andric OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 1532e8d8bef9SDimitry Andric << ", Basic Block: "; 1533e8d8bef9SDimitry Andric if (Cand.front()->Inst->getParent()->getName().str() == "") 1534fe6060f1SDimitry Andric OS << "(unnamed)"; 1535e8d8bef9SDimitry Andric else 1536fe6060f1SDimitry Andric OS << Cand.front()->Inst->getParent()->getName().str(); 1537fe6060f1SDimitry Andric OS << "\n Start Instruction: "; 1538fe6060f1SDimitry Andric Cand.frontInstruction()->print(OS); 1539fe6060f1SDimitry Andric OS << "\n End Instruction: "; 1540fe6060f1SDimitry Andric Cand.backInstruction()->print(OS); 1541fe6060f1SDimitry Andric OS << "\n"; 1542e8d8bef9SDimitry Andric } 1543e8d8bef9SDimitry Andric } 1544e8d8bef9SDimitry Andric 1545e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 1546e8d8bef9SDimitry Andric } 1547e8d8bef9SDimitry Andric 1548e8d8bef9SDimitry Andric char IRSimilarityIdentifierWrapperPass::ID = 0; 1549