1e8d8bef9SDimitry Andric //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // \file 10e8d8bef9SDimitry Andric // Implementation file for the IRSimilarityIdentifier for identifying 11e8d8bef9SDimitry Andric // similarities in IR including the IRInstructionMapper. 12e8d8bef9SDimitry Andric // 13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14e8d8bef9SDimitry Andric 15e8d8bef9SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 16e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h" 17e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 18e8d8bef9SDimitry Andric #include "llvm/IR/Operator.h" 19e8d8bef9SDimitry Andric #include "llvm/IR/User.h" 20e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 21e8d8bef9SDimitry Andric #include "llvm/Support/SuffixTree.h" 22e8d8bef9SDimitry Andric 23e8d8bef9SDimitry Andric using namespace llvm; 24e8d8bef9SDimitry Andric using namespace IRSimilarity; 25e8d8bef9SDimitry Andric 26*349cc55cSDimitry Andric cl::opt<bool> 27*349cc55cSDimitry Andric DisableBranches("no-ir-sim-branch-matching", cl::init(false), 28*349cc55cSDimitry Andric cl::ReallyHidden, 29*349cc55cSDimitry Andric cl::desc("disable similarity matching, and outlining, " 30*349cc55cSDimitry Andric "across branches for debugging purposes.")); 31*349cc55cSDimitry Andric 32e8d8bef9SDimitry Andric IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 33e8d8bef9SDimitry Andric IRInstructionDataList &IDList) 34e8d8bef9SDimitry Andric : Inst(&I), Legal(Legality), IDL(&IDList) { 35*349cc55cSDimitry Andric initializeInstruction(); 36*349cc55cSDimitry Andric } 37*349cc55cSDimitry Andric 38*349cc55cSDimitry Andric void IRInstructionData::initializeInstruction() { 39e8d8bef9SDimitry Andric // We check for whether we have a comparison instruction. If it is, we 40e8d8bef9SDimitry Andric // find the "less than" version of the predicate for consistency for 41e8d8bef9SDimitry Andric // comparison instructions throught the program. 42*349cc55cSDimitry Andric if (CmpInst *C = dyn_cast<CmpInst>(Inst)) { 43e8d8bef9SDimitry Andric CmpInst::Predicate Predicate = predicateForConsistency(C); 44e8d8bef9SDimitry Andric if (Predicate != C->getPredicate()) 45e8d8bef9SDimitry Andric RevisedPredicate = Predicate; 46e8d8bef9SDimitry Andric } 47e8d8bef9SDimitry Andric 48e8d8bef9SDimitry Andric // Here we collect the operands and their types for determining whether 49e8d8bef9SDimitry Andric // the structure of the operand use matches between two different candidates. 50*349cc55cSDimitry Andric for (Use &OI : Inst->operands()) { 51*349cc55cSDimitry Andric if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) { 52e8d8bef9SDimitry Andric // If we have a CmpInst where the predicate is reversed, it means the 53e8d8bef9SDimitry Andric // operands must be reversed as well. 54e8d8bef9SDimitry Andric OperVals.insert(OperVals.begin(), OI.get()); 55e8d8bef9SDimitry Andric continue; 56e8d8bef9SDimitry Andric } 57e8d8bef9SDimitry Andric 58e8d8bef9SDimitry Andric OperVals.push_back(OI.get()); 59e8d8bef9SDimitry Andric } 60e8d8bef9SDimitry Andric } 61e8d8bef9SDimitry Andric 62*349cc55cSDimitry Andric IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) 63*349cc55cSDimitry Andric : Inst(nullptr), Legal(false), IDL(&IDList) {} 64*349cc55cSDimitry Andric 65*349cc55cSDimitry Andric void IRInstructionData::setBranchSuccessors( 66*349cc55cSDimitry Andric DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 67*349cc55cSDimitry Andric assert(isa<BranchInst>(Inst) && "Instruction must be branch"); 68*349cc55cSDimitry Andric 69*349cc55cSDimitry Andric BranchInst *BI = cast<BranchInst>(Inst); 70*349cc55cSDimitry Andric DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 71*349cc55cSDimitry Andric 72*349cc55cSDimitry Andric BBNumIt = BasicBlockToInteger.find(BI->getParent()); 73*349cc55cSDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 74*349cc55cSDimitry Andric "Could not find location for BasicBlock!"); 75*349cc55cSDimitry Andric 76*349cc55cSDimitry Andric int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 77*349cc55cSDimitry Andric 78*349cc55cSDimitry Andric for (BasicBlock *Successor : BI->successors()) { 79*349cc55cSDimitry Andric BBNumIt = BasicBlockToInteger.find(Successor); 80*349cc55cSDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 81*349cc55cSDimitry Andric "Could not find number for BasicBlock!"); 82*349cc55cSDimitry Andric int OtherBlockNumber = static_cast<int>(BBNumIt->second); 83*349cc55cSDimitry Andric 84*349cc55cSDimitry Andric int Relative = OtherBlockNumber - CurrentBlockNumber; 85*349cc55cSDimitry Andric RelativeBlockLocations.push_back(Relative); 86*349cc55cSDimitry Andric } 87*349cc55cSDimitry Andric } 88*349cc55cSDimitry Andric 89e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 90e8d8bef9SDimitry Andric switch (CI->getPredicate()) { 91e8d8bef9SDimitry Andric case CmpInst::FCMP_OGT: 92e8d8bef9SDimitry Andric case CmpInst::FCMP_UGT: 93e8d8bef9SDimitry Andric case CmpInst::FCMP_OGE: 94e8d8bef9SDimitry Andric case CmpInst::FCMP_UGE: 95e8d8bef9SDimitry Andric case CmpInst::ICMP_SGT: 96e8d8bef9SDimitry Andric case CmpInst::ICMP_UGT: 97e8d8bef9SDimitry Andric case CmpInst::ICMP_SGE: 98e8d8bef9SDimitry Andric case CmpInst::ICMP_UGE: 99e8d8bef9SDimitry Andric return CI->getSwappedPredicate(); 100e8d8bef9SDimitry Andric default: 101e8d8bef9SDimitry Andric return CI->getPredicate(); 102e8d8bef9SDimitry Andric } 103e8d8bef9SDimitry Andric } 104e8d8bef9SDimitry Andric 105e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::getPredicate() const { 106e8d8bef9SDimitry Andric assert(isa<CmpInst>(Inst) && 107e8d8bef9SDimitry Andric "Can only get a predicate from a compare instruction"); 108e8d8bef9SDimitry Andric 109e8d8bef9SDimitry Andric if (RevisedPredicate.hasValue()) 110e8d8bef9SDimitry Andric return RevisedPredicate.getValue(); 111e8d8bef9SDimitry Andric 112e8d8bef9SDimitry Andric return cast<CmpInst>(Inst)->getPredicate(); 113e8d8bef9SDimitry Andric } 114e8d8bef9SDimitry Andric 115e8d8bef9SDimitry Andric static StringRef getCalledFunctionName(CallInst &CI) { 116e8d8bef9SDimitry Andric assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); 117e8d8bef9SDimitry Andric 118e8d8bef9SDimitry Andric return CI.getCalledFunction()->getName(); 119e8d8bef9SDimitry Andric } 120e8d8bef9SDimitry Andric 121e8d8bef9SDimitry Andric bool IRSimilarity::isClose(const IRInstructionData &A, 122e8d8bef9SDimitry Andric const IRInstructionData &B) { 123e8d8bef9SDimitry Andric 124e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 125e8d8bef9SDimitry Andric return false; 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric // Check if we are performing the same sort of operation on the same types 128e8d8bef9SDimitry Andric // but not on the same values. 129e8d8bef9SDimitry Andric if (!A.Inst->isSameOperationAs(B.Inst)) { 130e8d8bef9SDimitry Andric // If there is a predicate, this means that either there is a swapped 131e8d8bef9SDimitry Andric // predicate, or that the types are different, we want to make sure that 132e8d8bef9SDimitry Andric // the predicates are equivalent via swapping. 133e8d8bef9SDimitry Andric if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 134e8d8bef9SDimitry Andric 135e8d8bef9SDimitry Andric if (A.getPredicate() != B.getPredicate()) 136e8d8bef9SDimitry Andric return false; 137e8d8bef9SDimitry Andric 138e8d8bef9SDimitry Andric // If the predicates are the same via swap, make sure that the types are 139e8d8bef9SDimitry Andric // still the same. 140e8d8bef9SDimitry Andric auto ZippedTypes = zip(A.OperVals, B.OperVals); 141e8d8bef9SDimitry Andric 142e8d8bef9SDimitry Andric return all_of( 143e8d8bef9SDimitry Andric ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 144e8d8bef9SDimitry Andric return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 145e8d8bef9SDimitry Andric }); 146e8d8bef9SDimitry Andric } 147e8d8bef9SDimitry Andric 148e8d8bef9SDimitry Andric return false; 149e8d8bef9SDimitry Andric } 150e8d8bef9SDimitry Andric 151e8d8bef9SDimitry Andric // Since any GEP Instruction operands after the first operand cannot be 152e8d8bef9SDimitry Andric // defined by a register, we must make sure that the operands after the first 153e8d8bef9SDimitry Andric // are the same in the two instructions 154e8d8bef9SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 155e8d8bef9SDimitry Andric auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 156e8d8bef9SDimitry Andric 157e8d8bef9SDimitry Andric // If the instructions do not have the same inbounds restrictions, we do 158e8d8bef9SDimitry Andric // not consider them the same. 159e8d8bef9SDimitry Andric if (GEP->isInBounds() != OtherGEP->isInBounds()) 160e8d8bef9SDimitry Andric return false; 161e8d8bef9SDimitry Andric 162e8d8bef9SDimitry Andric auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 163e8d8bef9SDimitry Andric 164e8d8bef9SDimitry Andric // We increment here since we do not care about the first instruction, 165e8d8bef9SDimitry Andric // we only care about the following operands since they must be the 166e8d8bef9SDimitry Andric // exact same to be considered similar. 167e8d8bef9SDimitry Andric return all_of(drop_begin(ZippedOperands), 168e8d8bef9SDimitry Andric [](std::tuple<llvm::Use &, llvm::Use &> R) { 169e8d8bef9SDimitry Andric return std::get<0>(R) == std::get<1>(R); 170e8d8bef9SDimitry Andric }); 171e8d8bef9SDimitry Andric } 172e8d8bef9SDimitry Andric 173e8d8bef9SDimitry Andric // If the instructions are functions, we make sure that the function name is 174e8d8bef9SDimitry Andric // the same. We already know that the types are since is isSameOperationAs is 175e8d8bef9SDimitry Andric // true. 176e8d8bef9SDimitry Andric if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 177e8d8bef9SDimitry Andric CallInst *CIA = cast<CallInst>(A.Inst); 178e8d8bef9SDimitry Andric CallInst *CIB = cast<CallInst>(B.Inst); 179e8d8bef9SDimitry Andric if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) 180e8d8bef9SDimitry Andric return false; 181e8d8bef9SDimitry Andric } 182e8d8bef9SDimitry Andric 183*349cc55cSDimitry Andric if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) && 184*349cc55cSDimitry Andric A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) 185*349cc55cSDimitry Andric return false; 186*349cc55cSDimitry Andric 187e8d8bef9SDimitry Andric return true; 188e8d8bef9SDimitry Andric } 189e8d8bef9SDimitry Andric 190e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 191e8d8bef9SDimitry Andric // into the same interface. 192e8d8bef9SDimitry Andric void IRInstructionMapper::convertToUnsignedVec( 193e8d8bef9SDimitry Andric BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 194e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 195e8d8bef9SDimitry Andric BasicBlock::iterator It = BB.begin(); 196e8d8bef9SDimitry Andric 197e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForBB; 198e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForBB; 199e8d8bef9SDimitry Andric 200e8d8bef9SDimitry Andric for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 201e8d8bef9SDimitry Andric switch (InstClassifier.visit(*It)) { 202e8d8bef9SDimitry Andric case InstrType::Legal: 203e8d8bef9SDimitry Andric mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 204e8d8bef9SDimitry Andric break; 205e8d8bef9SDimitry Andric case InstrType::Illegal: 206e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 207e8d8bef9SDimitry Andric break; 208e8d8bef9SDimitry Andric case InstrType::Invisible: 209e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 210e8d8bef9SDimitry Andric break; 211e8d8bef9SDimitry Andric } 212e8d8bef9SDimitry Andric } 213e8d8bef9SDimitry Andric 214e8d8bef9SDimitry Andric if (HaveLegalRange) { 215*349cc55cSDimitry Andric if (AddedIllegalLastTime) 216e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 217fe6060f1SDimitry Andric for (IRInstructionData *ID : InstrListForBB) 218fe6060f1SDimitry Andric this->IDL->push_back(*ID); 219e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForBB); 220e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForBB); 221e8d8bef9SDimitry Andric } 222e8d8bef9SDimitry Andric } 223e8d8bef9SDimitry Andric 224e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 225e8d8bef9SDimitry Andric // into the same interface. 226e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToLegalUnsigned( 227e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 228e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB) { 229e8d8bef9SDimitry Andric // We added something legal, so we should unset the AddedLegalLastTime 230e8d8bef9SDimitry Andric // flag. 231e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 232e8d8bef9SDimitry Andric 233e8d8bef9SDimitry Andric // If we have at least two adjacent legal instructions (which may have 234e8d8bef9SDimitry Andric // invisible instructions in between), remember that. 235e8d8bef9SDimitry Andric if (CanCombineWithPrevInstr) 236e8d8bef9SDimitry Andric HaveLegalRange = true; 237e8d8bef9SDimitry Andric CanCombineWithPrevInstr = true; 238e8d8bef9SDimitry Andric 239e8d8bef9SDimitry Andric // Get the integer for this instruction or give it the current 240e8d8bef9SDimitry Andric // LegalInstrNumber. 241e8d8bef9SDimitry Andric IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 242e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 243e8d8bef9SDimitry Andric 244*349cc55cSDimitry Andric if (isa<BranchInst>(*It)) 245*349cc55cSDimitry Andric ID->setBranchSuccessors(BasicBlockToInteger); 246*349cc55cSDimitry Andric 247e8d8bef9SDimitry Andric // Add to the instruction list 248e8d8bef9SDimitry Andric bool WasInserted; 249e8d8bef9SDimitry Andric DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 250e8d8bef9SDimitry Andric ResultIt; 251e8d8bef9SDimitry Andric std::tie(ResultIt, WasInserted) = 252e8d8bef9SDimitry Andric InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 253e8d8bef9SDimitry Andric unsigned INumber = ResultIt->second; 254e8d8bef9SDimitry Andric 255e8d8bef9SDimitry Andric // There was an insertion. 256e8d8bef9SDimitry Andric if (WasInserted) 257e8d8bef9SDimitry Andric LegalInstrNumber++; 258e8d8bef9SDimitry Andric 259e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(INumber); 260e8d8bef9SDimitry Andric 261e8d8bef9SDimitry Andric // Make sure we don't overflow or use any integers reserved by the DenseMap. 262e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 263e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 264e8d8bef9SDimitry Andric 265e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 266e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 267e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 268e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 269e8d8bef9SDimitry Andric 270e8d8bef9SDimitry Andric return INumber; 271e8d8bef9SDimitry Andric } 272e8d8bef9SDimitry Andric 273e8d8bef9SDimitry Andric IRInstructionData * 274e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 275e8d8bef9SDimitry Andric IRInstructionDataList &IDL) { 276e8d8bef9SDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 277e8d8bef9SDimitry Andric } 278e8d8bef9SDimitry Andric 279*349cc55cSDimitry Andric IRInstructionData * 280*349cc55cSDimitry Andric IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { 281*349cc55cSDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); 282*349cc55cSDimitry Andric } 283*349cc55cSDimitry Andric 284e8d8bef9SDimitry Andric IRInstructionDataList * 285e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionDataList() { 286e8d8bef9SDimitry Andric return new (IDLAllocator->Allocate()) IRInstructionDataList(); 287e8d8bef9SDimitry Andric } 288e8d8bef9SDimitry Andric 289e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 290e8d8bef9SDimitry Andric // into the same interface. 291e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToIllegalUnsigned( 292e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 293e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB, bool End) { 294e8d8bef9SDimitry Andric // Can't combine an illegal instruction. Set the flag. 295e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 296e8d8bef9SDimitry Andric 297e8d8bef9SDimitry Andric // Only add one illegal number per range of legal numbers. 298e8d8bef9SDimitry Andric if (AddedIllegalLastTime) 299e8d8bef9SDimitry Andric return IllegalInstrNumber; 300e8d8bef9SDimitry Andric 301e8d8bef9SDimitry Andric IRInstructionData *ID = nullptr; 302e8d8bef9SDimitry Andric if (!End) 303e8d8bef9SDimitry Andric ID = allocateIRInstructionData(*It, false, *IDL); 304*349cc55cSDimitry Andric else 305*349cc55cSDimitry Andric ID = allocateIRInstructionData(*IDL); 306e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 307e8d8bef9SDimitry Andric 308e8d8bef9SDimitry Andric // Remember that we added an illegal number last time. 309e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 310e8d8bef9SDimitry Andric unsigned INumber = IllegalInstrNumber; 311e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(IllegalInstrNumber--); 312e8d8bef9SDimitry Andric 313e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 314e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 315e8d8bef9SDimitry Andric 316e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 317e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 318e8d8bef9SDimitry Andric 319e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 320e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 321e8d8bef9SDimitry Andric 322e8d8bef9SDimitry Andric return INumber; 323e8d8bef9SDimitry Andric } 324e8d8bef9SDimitry Andric 325e8d8bef9SDimitry Andric IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 326e8d8bef9SDimitry Andric IRInstructionData *FirstInstIt, 327e8d8bef9SDimitry Andric IRInstructionData *LastInstIt) 328e8d8bef9SDimitry Andric : StartIdx(StartIdx), Len(Len) { 329e8d8bef9SDimitry Andric 330e8d8bef9SDimitry Andric assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 331e8d8bef9SDimitry Andric assert(LastInstIt != nullptr && "Instruction is nullptr!"); 332e8d8bef9SDimitry Andric assert(StartIdx + Len > StartIdx && 333e8d8bef9SDimitry Andric "Overflow for IRSimilarityCandidate range?"); 334e8d8bef9SDimitry Andric assert(Len - 1 == static_cast<unsigned>(std::distance( 335e8d8bef9SDimitry Andric iterator(FirstInstIt), iterator(LastInstIt))) && 336e8d8bef9SDimitry Andric "Length of the first and last IRInstructionData do not match the " 337e8d8bef9SDimitry Andric "given length"); 338e8d8bef9SDimitry Andric 339e8d8bef9SDimitry Andric // We iterate over the given instructions, and map each unique value 340e8d8bef9SDimitry Andric // to a unique number in the IRSimilarityCandidate ValueToNumber and 341e8d8bef9SDimitry Andric // NumberToValue maps. A constant get its own value globally, the individual 342e8d8bef9SDimitry Andric // uses of the constants are not considered to be unique. 343e8d8bef9SDimitry Andric // 344e8d8bef9SDimitry Andric // IR: Mapping Added: 345e8d8bef9SDimitry Andric // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 346e8d8bef9SDimitry Andric // %add2 = add i32 %a, %1 %add2 -> 4 347e8d8bef9SDimitry Andric // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 348e8d8bef9SDimitry Andric // 349e8d8bef9SDimitry Andric // when replace with global values, starting from 1, would be 350e8d8bef9SDimitry Andric // 351e8d8bef9SDimitry Andric // 3 = add i32 1, 2 352e8d8bef9SDimitry Andric // 4 = add i32 1, 3 353e8d8bef9SDimitry Andric // 6 = add i32 5, 2 354e8d8bef9SDimitry Andric unsigned LocalValNumber = 1; 355e8d8bef9SDimitry Andric IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 356e8d8bef9SDimitry Andric for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 357e8d8bef9SDimitry Andric // Map the operand values to an unsigned integer if it does not already 358e8d8bef9SDimitry Andric // have an unsigned integer assigned to it. 359e8d8bef9SDimitry Andric for (Value *Arg : ID->OperVals) 360e8d8bef9SDimitry Andric if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 361e8d8bef9SDimitry Andric ValueToNumber.try_emplace(Arg, LocalValNumber); 362e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, Arg); 363e8d8bef9SDimitry Andric LocalValNumber++; 364e8d8bef9SDimitry Andric } 365e8d8bef9SDimitry Andric 366e8d8bef9SDimitry Andric // Mapping the instructions to an unsigned integer if it is not already 367e8d8bef9SDimitry Andric // exist in the mapping. 368e8d8bef9SDimitry Andric if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 369e8d8bef9SDimitry Andric ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 370e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, ID->Inst); 371e8d8bef9SDimitry Andric LocalValNumber++; 372e8d8bef9SDimitry Andric } 373e8d8bef9SDimitry Andric } 374e8d8bef9SDimitry Andric 375e8d8bef9SDimitry Andric // Setting the first and last instruction data pointers for the candidate. If 376e8d8bef9SDimitry Andric // we got through the entire for loop without hitting an assert, we know 377e8d8bef9SDimitry Andric // that both of these instructions are not nullptrs. 378e8d8bef9SDimitry Andric FirstInst = FirstInstIt; 379e8d8bef9SDimitry Andric LastInst = LastInstIt; 380e8d8bef9SDimitry Andric } 381e8d8bef9SDimitry Andric 382e8d8bef9SDimitry Andric bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 383e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 384e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 385e8d8bef9SDimitry Andric return false; 386e8d8bef9SDimitry Andric 387e8d8bef9SDimitry Andric auto InstrDataForBoth = 388e8d8bef9SDimitry Andric zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 389e8d8bef9SDimitry Andric 390e8d8bef9SDimitry Andric return all_of(InstrDataForBoth, 391e8d8bef9SDimitry Andric [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 392e8d8bef9SDimitry Andric IRInstructionData &A = std::get<0>(R); 393e8d8bef9SDimitry Andric IRInstructionData &B = std::get<1>(R); 394e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 395e8d8bef9SDimitry Andric return false; 396e8d8bef9SDimitry Andric return isClose(A, B); 397e8d8bef9SDimitry Andric }); 398e8d8bef9SDimitry Andric } 399e8d8bef9SDimitry Andric 400e8d8bef9SDimitry Andric /// Determine if one or more of the assigned global value numbers for the 401e8d8bef9SDimitry Andric /// operands in \p TargetValueNumbers is in the current mapping set for operand 402e8d8bef9SDimitry Andric /// numbers in \p SourceOperands. The set of possible corresponding global 403e8d8bef9SDimitry Andric /// value numbers are replaced with the most recent version of compatible 404e8d8bef9SDimitry Andric /// values. 405e8d8bef9SDimitry Andric /// 406e8d8bef9SDimitry Andric /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 407e8d8bef9SDimitry Andric /// value number for the source IRInstructionCandidate. 408e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 409e8d8bef9SDimitry Andric /// IRSimilarityCandidate global value numbers to a set of possible numbers in 410e8d8bef9SDimitry Andric /// the target. 411e8d8bef9SDimitry Andric /// \param [in] SourceOperands - The operands in the original 412e8d8bef9SDimitry Andric /// IRSimilarityCandidate in the current instruction. 413e8d8bef9SDimitry Andric /// \param [in] TargetValueNumbers - The global value numbers of the operands in 414e8d8bef9SDimitry Andric /// the corresponding Instruction in the other IRSimilarityCandidate. 415e8d8bef9SDimitry Andric /// \returns true if there exists a possible mapping between the source 416e8d8bef9SDimitry Andric /// Instruction operands and the target Instruction operands, and false if not. 417e8d8bef9SDimitry Andric static bool checkNumberingAndReplaceCommutative( 418e8d8bef9SDimitry Andric const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 419e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 420e8d8bef9SDimitry Andric ArrayRef<Value *> &SourceOperands, 421e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetValueNumbers){ 422e8d8bef9SDimitry Andric 423e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 424e8d8bef9SDimitry Andric 425e8d8bef9SDimitry Andric unsigned ArgVal; 426e8d8bef9SDimitry Andric bool WasInserted; 427e8d8bef9SDimitry Andric 428e8d8bef9SDimitry Andric // Iterate over the operands in the source IRSimilarityCandidate to determine 429e8d8bef9SDimitry Andric // whether there exists an operand in the other IRSimilarityCandidate that 430e8d8bef9SDimitry Andric // creates a valid mapping of Value to Value between the 431e8d8bef9SDimitry Andric // IRSimilarityCaniddates. 432e8d8bef9SDimitry Andric for (Value *V : SourceOperands) { 433e8d8bef9SDimitry Andric ArgVal = SourceValueToNumberMapping.find(V)->second; 434e8d8bef9SDimitry Andric 435e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 436e8d8bef9SDimitry Andric std::make_pair(ArgVal, TargetValueNumbers)); 437e8d8bef9SDimitry Andric 438e8d8bef9SDimitry Andric // Instead of finding a current mapping, we inserted a set. This means a 439e8d8bef9SDimitry Andric // mapping did not exist for the source Instruction operand, it has no 440e8d8bef9SDimitry Andric // current constraints we need to check. 441e8d8bef9SDimitry Andric if (WasInserted) 442e8d8bef9SDimitry Andric continue; 443e8d8bef9SDimitry Andric 444e8d8bef9SDimitry Andric // If a mapping already exists for the source operand to the values in the 445e8d8bef9SDimitry Andric // other IRSimilarityCandidate we need to iterate over the items in other 446e8d8bef9SDimitry Andric // IRSimilarityCandidate's Instruction to determine whether there is a valid 447e8d8bef9SDimitry Andric // mapping of Value to Value. 448e8d8bef9SDimitry Andric DenseSet<unsigned> NewSet; 449e8d8bef9SDimitry Andric for (unsigned &Curr : ValueMappingIt->second) 450e8d8bef9SDimitry Andric // If we can find the value in the mapping, we add it to the new set. 451e8d8bef9SDimitry Andric if (TargetValueNumbers.contains(Curr)) 452e8d8bef9SDimitry Andric NewSet.insert(Curr); 453e8d8bef9SDimitry Andric 454e8d8bef9SDimitry Andric // If we could not find a Value, return 0. 455e8d8bef9SDimitry Andric if (NewSet.empty()) 456e8d8bef9SDimitry Andric return false; 457e8d8bef9SDimitry Andric 458e8d8bef9SDimitry Andric // Otherwise replace the old mapping with the newly constructed one. 459e8d8bef9SDimitry Andric if (NewSet.size() != ValueMappingIt->second.size()) 460e8d8bef9SDimitry Andric ValueMappingIt->second.swap(NewSet); 461e8d8bef9SDimitry Andric 462e8d8bef9SDimitry Andric // We have reached no conclusions about the mapping, and cannot remove 463e8d8bef9SDimitry Andric // any items from the other operands, so we move to check the next operand. 464e8d8bef9SDimitry Andric if (ValueMappingIt->second.size() != 1) 465e8d8bef9SDimitry Andric continue; 466e8d8bef9SDimitry Andric 467e8d8bef9SDimitry Andric 468e8d8bef9SDimitry Andric unsigned ValToRemove = *ValueMappingIt->second.begin(); 469e8d8bef9SDimitry Andric // When there is only one item left in the mapping for and operand, remove 470e8d8bef9SDimitry Andric // the value from the other operands. If it results in there being no 471e8d8bef9SDimitry Andric // mapping, return false, it means the mapping is wrong 472e8d8bef9SDimitry Andric for (Value *InnerV : SourceOperands) { 473e8d8bef9SDimitry Andric if (V == InnerV) 474e8d8bef9SDimitry Andric continue; 475e8d8bef9SDimitry Andric 476e8d8bef9SDimitry Andric unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 477e8d8bef9SDimitry Andric ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 478e8d8bef9SDimitry Andric if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 479e8d8bef9SDimitry Andric continue; 480e8d8bef9SDimitry Andric 481e8d8bef9SDimitry Andric ValueMappingIt->second.erase(ValToRemove); 482e8d8bef9SDimitry Andric if (ValueMappingIt->second.empty()) 483e8d8bef9SDimitry Andric return false; 484e8d8bef9SDimitry Andric } 485e8d8bef9SDimitry Andric } 486e8d8bef9SDimitry Andric 487e8d8bef9SDimitry Andric return true; 488e8d8bef9SDimitry Andric } 489e8d8bef9SDimitry Andric 490e8d8bef9SDimitry Andric /// Determine if operand number \p TargetArgVal is in the current mapping set 491e8d8bef9SDimitry Andric /// for operand number \p SourceArgVal. 492e8d8bef9SDimitry Andric /// 493e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 494e8d8bef9SDimitry Andric /// value numbers from source IRSimilarityCandidate to target 495e8d8bef9SDimitry Andric /// IRSimilarityCandidate. 496e8d8bef9SDimitry Andric /// \param [in] SourceArgVal The global value number for an operand in the 497e8d8bef9SDimitry Andric /// in the original candidate. 498e8d8bef9SDimitry Andric /// \param [in] TargetArgVal The global value number for the corresponding 499e8d8bef9SDimitry Andric /// operand in the other candidate. 500e8d8bef9SDimitry Andric /// \returns True if there exists a mapping and false if not. 501e8d8bef9SDimitry Andric bool checkNumberingAndReplace( 502e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 503e8d8bef9SDimitry Andric unsigned SourceArgVal, unsigned TargetArgVal) { 504e8d8bef9SDimitry Andric // We are given two unsigned integers representing the global values of 505e8d8bef9SDimitry Andric // the operands in different IRSimilarityCandidates and a current mapping 506e8d8bef9SDimitry Andric // between the two. 507e8d8bef9SDimitry Andric // 508e8d8bef9SDimitry Andric // Source Operand GVN: 1 509e8d8bef9SDimitry Andric // Target Operand GVN: 2 510e8d8bef9SDimitry Andric // CurrentMapping: {1: {1, 2}} 511e8d8bef9SDimitry Andric // 512e8d8bef9SDimitry Andric // Since we have mapping, and the target operand is contained in the set, we 513e8d8bef9SDimitry Andric // update it to: 514e8d8bef9SDimitry Andric // CurrentMapping: {1: {2}} 515e8d8bef9SDimitry Andric // and can return true. But, if the mapping was 516e8d8bef9SDimitry Andric // CurrentMapping: {1: {3}} 517e8d8bef9SDimitry Andric // we would return false. 518e8d8bef9SDimitry Andric 519e8d8bef9SDimitry Andric bool WasInserted; 520e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 521e8d8bef9SDimitry Andric 522e8d8bef9SDimitry Andric std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 523e8d8bef9SDimitry Andric std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 524e8d8bef9SDimitry Andric 525e8d8bef9SDimitry Andric // If we created a new mapping, then we are done. 526e8d8bef9SDimitry Andric if (WasInserted) 527e8d8bef9SDimitry Andric return true; 528e8d8bef9SDimitry Andric 529e8d8bef9SDimitry Andric // If there is more than one option in the mapping set, and the target value 530e8d8bef9SDimitry Andric // is included in the mapping set replace that set with one that only includes 531e8d8bef9SDimitry Andric // the target value, as it is the only valid mapping via the non commutative 532e8d8bef9SDimitry Andric // instruction. 533e8d8bef9SDimitry Andric 534e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetSet = Val->second; 535e8d8bef9SDimitry Andric if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 536e8d8bef9SDimitry Andric TargetSet.clear(); 537e8d8bef9SDimitry Andric TargetSet.insert(TargetArgVal); 538e8d8bef9SDimitry Andric return true; 539e8d8bef9SDimitry Andric } 540e8d8bef9SDimitry Andric 541e8d8bef9SDimitry Andric // Return true if we can find the value in the set. 542e8d8bef9SDimitry Andric return TargetSet.contains(TargetArgVal); 543e8d8bef9SDimitry Andric } 544e8d8bef9SDimitry Andric 545e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 546e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 547e8d8bef9SDimitry Andric // Iterators to keep track of where we are in the operands for each 548e8d8bef9SDimitry Andric // Instruction. 549e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 550e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 551e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 552e8d8bef9SDimitry Andric 553e8d8bef9SDimitry Andric // For each operand, get the value numbering and ensure it is consistent. 554e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 555e8d8bef9SDimitry Andric unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 556e8d8bef9SDimitry Andric unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 557e8d8bef9SDimitry Andric 558e8d8bef9SDimitry Andric // Attempt to add a set with only the target value. If there is no mapping 559e8d8bef9SDimitry Andric // we can create it here. 560e8d8bef9SDimitry Andric // 561e8d8bef9SDimitry Andric // For an instruction like a subtraction: 562e8d8bef9SDimitry Andric // IRSimilarityCandidateA: IRSimilarityCandidateB: 563e8d8bef9SDimitry Andric // %resultA = sub %a, %b %resultB = sub %d, %e 564e8d8bef9SDimitry Andric // 565e8d8bef9SDimitry Andric // We map %a -> %d and %b -> %e. 566e8d8bef9SDimitry Andric // 567e8d8bef9SDimitry Andric // And check to see whether their mapping is consistent in 568e8d8bef9SDimitry Andric // checkNumberingAndReplace. 569e8d8bef9SDimitry Andric 570e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 571e8d8bef9SDimitry Andric return false; 572e8d8bef9SDimitry Andric 573e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 574e8d8bef9SDimitry Andric return false; 575e8d8bef9SDimitry Andric } 576e8d8bef9SDimitry Andric return true; 577e8d8bef9SDimitry Andric } 578e8d8bef9SDimitry Andric 579e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareCommutativeOperandMapping( 580e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 581e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersA; 582e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersB; 583e8d8bef9SDimitry Andric 584e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 585e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 586e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 587e8d8bef9SDimitry Andric 588e8d8bef9SDimitry Andric // Find the value number sets for the operands. 589e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; 590e8d8bef9SDimitry Andric Idx++, VItA++, VItB++) { 591e8d8bef9SDimitry Andric ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 592e8d8bef9SDimitry Andric ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 593e8d8bef9SDimitry Andric } 594e8d8bef9SDimitry Andric 595e8d8bef9SDimitry Andric // Iterate over the operands in the first IRSimilarityCandidate and make sure 596e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the second 597e8d8bef9SDimitry Andric // IRSimilarityCandidate. 598e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 599e8d8bef9SDimitry Andric A.ValueNumberMapping, A.OperVals, 600e8d8bef9SDimitry Andric ValueNumbersB)) 601e8d8bef9SDimitry Andric return false; 602e8d8bef9SDimitry Andric 603e8d8bef9SDimitry Andric // Iterate over the operands in the second IRSimilarityCandidate and make sure 604e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the first 605e8d8bef9SDimitry Andric // IRSimilarityCandidate. 606e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 607e8d8bef9SDimitry Andric B.ValueNumberMapping, B.OperVals, 608e8d8bef9SDimitry Andric ValueNumbersA)) 609e8d8bef9SDimitry Andric return false; 610e8d8bef9SDimitry Andric 611e8d8bef9SDimitry Andric return true; 612e8d8bef9SDimitry Andric } 613e8d8bef9SDimitry Andric 614*349cc55cSDimitry Andric bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, 615*349cc55cSDimitry Andric RelativeLocMapping B) { 616*349cc55cSDimitry Andric // Get the basic blocks the label refers to. 617*349cc55cSDimitry Andric BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal); 618*349cc55cSDimitry Andric BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal); 619*349cc55cSDimitry Andric 620*349cc55cSDimitry Andric // Get the basic blocks contained in each region. 621*349cc55cSDimitry Andric DenseSet<BasicBlock *> BasicBlockA; 622*349cc55cSDimitry Andric DenseSet<BasicBlock *> BasicBlockB; 623*349cc55cSDimitry Andric A.IRSC.getBasicBlocks(BasicBlockA); 624*349cc55cSDimitry Andric B.IRSC.getBasicBlocks(BasicBlockB); 625*349cc55cSDimitry Andric 626*349cc55cSDimitry Andric // Determine if the block is contained in the region. 627*349cc55cSDimitry Andric bool AContained = BasicBlockA.contains(ABB); 628*349cc55cSDimitry Andric bool BContained = BasicBlockB.contains(BBB); 629*349cc55cSDimitry Andric 630*349cc55cSDimitry Andric // Both blocks need to be contained in the region, or both need to be outside 631*349cc55cSDimitry Andric // the reigon. 632*349cc55cSDimitry Andric if (AContained != BContained) 633*349cc55cSDimitry Andric return false; 634*349cc55cSDimitry Andric 635*349cc55cSDimitry Andric // If both are contained, then we need to make sure that the relative 636*349cc55cSDimitry Andric // distance to the target blocks are the same. 637*349cc55cSDimitry Andric if (AContained) 638*349cc55cSDimitry Andric return A.RelativeLocation == B.RelativeLocation; 639*349cc55cSDimitry Andric return true; 640*349cc55cSDimitry Andric } 641*349cc55cSDimitry Andric 642e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 643e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 644*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> MappingA; 645*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> MappingB; 646*349cc55cSDimitry Andric return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB); 647*349cc55cSDimitry Andric } 648*349cc55cSDimitry Andric 649*349cc55cSDimitry Andric typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, 650*349cc55cSDimitry Andric SmallVector<int, 4> &, ArrayRef<Value *> &, 651*349cc55cSDimitry Andric ArrayRef<Value *> &> 652*349cc55cSDimitry Andric ZippedRelativeLocationsT; 653*349cc55cSDimitry Andric 654*349cc55cSDimitry Andric bool IRSimilarityCandidate::compareStructure( 655*349cc55cSDimitry Andric const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, 656*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 657*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 658e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 659e8d8bef9SDimitry Andric return false; 660e8d8bef9SDimitry Andric 661e8d8bef9SDimitry Andric if (A.ValueToNumber.size() != B.ValueToNumber.size()) 662e8d8bef9SDimitry Andric return false; 663e8d8bef9SDimitry Andric 664e8d8bef9SDimitry Andric iterator ItA = A.begin(); 665e8d8bef9SDimitry Andric iterator ItB = B.begin(); 666e8d8bef9SDimitry Andric 667*349cc55cSDimitry Andric // These ValueNumber Mapping sets create a create a mapping between the values 668*349cc55cSDimitry Andric // in one candidate to values in the other candidate. If we create a set with 669*349cc55cSDimitry Andric // one element, and that same element maps to the original element in the 670*349cc55cSDimitry Andric // candidate we have a good mapping. 671e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 672e8d8bef9SDimitry Andric 673e8d8bef9SDimitry Andric 674e8d8bef9SDimitry Andric // Iterate over the instructions contained in each candidate 675e8d8bef9SDimitry Andric unsigned SectionLength = A.getStartIdx() + A.getLength(); 676e8d8bef9SDimitry Andric for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 677e8d8bef9SDimitry Andric ItA++, ItB++, Loc++) { 678e8d8bef9SDimitry Andric // Make sure the instructions are similar to one another. 679e8d8bef9SDimitry Andric if (!isClose(*ItA, *ItB)) 680e8d8bef9SDimitry Andric return false; 681e8d8bef9SDimitry Andric 682e8d8bef9SDimitry Andric Instruction *IA = ItA->Inst; 683e8d8bef9SDimitry Andric Instruction *IB = ItB->Inst; 684e8d8bef9SDimitry Andric 685e8d8bef9SDimitry Andric if (!ItA->Legal || !ItB->Legal) 686e8d8bef9SDimitry Andric return false; 687e8d8bef9SDimitry Andric 688e8d8bef9SDimitry Andric // Get the operand sets for the instructions. 689e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsA = ItA->OperVals; 690e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsB = ItB->OperVals; 691e8d8bef9SDimitry Andric 692e8d8bef9SDimitry Andric unsigned InstValA = A.ValueToNumber.find(IA)->second; 693e8d8bef9SDimitry Andric unsigned InstValB = B.ValueToNumber.find(IB)->second; 694e8d8bef9SDimitry Andric 695*349cc55cSDimitry Andric bool WasInserted; 696e8d8bef9SDimitry Andric // Ensure that the mappings for the instructions exists. 697e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 698e8d8bef9SDimitry Andric std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 699e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 700e8d8bef9SDimitry Andric return false; 701e8d8bef9SDimitry Andric 702e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 703e8d8bef9SDimitry Andric std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 704e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 705e8d8bef9SDimitry Andric return false; 706e8d8bef9SDimitry Andric 707e8d8bef9SDimitry Andric // We have different paths for commutative instructions and non-commutative 708e8d8bef9SDimitry Andric // instructions since commutative instructions could allow multiple mappings 709e8d8bef9SDimitry Andric // to certain values. 710e8d8bef9SDimitry Andric if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 711e8d8bef9SDimitry Andric if (!compareCommutativeOperandMapping( 712e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 713e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 714e8d8bef9SDimitry Andric return false; 715e8d8bef9SDimitry Andric continue; 716e8d8bef9SDimitry Andric } 717e8d8bef9SDimitry Andric 718e8d8bef9SDimitry Andric // Handle the non-commutative cases. 719e8d8bef9SDimitry Andric if (!compareNonCommutativeOperandMapping( 720e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 721e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 722e8d8bef9SDimitry Andric return false; 723*349cc55cSDimitry Andric 724*349cc55cSDimitry Andric // Here we check that between two corresponding instructions, 725*349cc55cSDimitry Andric // when referring to a basic block in the same region, the 726*349cc55cSDimitry Andric // relative locations are the same. And, that the instructions refer to 727*349cc55cSDimitry Andric // basic blocks outside the region in the same corresponding locations. 728*349cc55cSDimitry Andric 729*349cc55cSDimitry Andric // We are able to make the assumption about blocks outside of the region 730*349cc55cSDimitry Andric // since the target block labels are considered values and will follow the 731*349cc55cSDimitry Andric // same number matching that we defined for the other instructions in the 732*349cc55cSDimitry Andric // region. So, at this point, in each location we target a specific block 733*349cc55cSDimitry Andric // outside the region, we are targeting a corresponding block in each 734*349cc55cSDimitry Andric // analagous location in the region we are comparing to. 735*349cc55cSDimitry Andric if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) && 736*349cc55cSDimitry Andric !(isa<PHINode>(IA) && isa<PHINode>(IB))) 737*349cc55cSDimitry Andric continue; 738*349cc55cSDimitry Andric 739*349cc55cSDimitry Andric SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; 740*349cc55cSDimitry Andric SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; 741*349cc55cSDimitry Andric if (RelBlockLocsA.size() != RelBlockLocsB.size() && 742*349cc55cSDimitry Andric OperValsA.size() != OperValsB.size()) 743*349cc55cSDimitry Andric return false; 744*349cc55cSDimitry Andric 745*349cc55cSDimitry Andric ZippedRelativeLocationsT ZippedRelativeLocations = 746*349cc55cSDimitry Andric zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB); 747*349cc55cSDimitry Andric if (any_of(ZippedRelativeLocations, 748*349cc55cSDimitry Andric [&A, &B](std::tuple<int, int, Value *, Value *> R) { 749*349cc55cSDimitry Andric return !checkRelativeLocations( 750*349cc55cSDimitry Andric {A, std::get<0>(R), std::get<2>(R)}, 751*349cc55cSDimitry Andric {B, std::get<1>(R), std::get<3>(R)}); 752*349cc55cSDimitry Andric })) 753*349cc55cSDimitry Andric return false; 754e8d8bef9SDimitry Andric } 755e8d8bef9SDimitry Andric return true; 756e8d8bef9SDimitry Andric } 757e8d8bef9SDimitry Andric 758e8d8bef9SDimitry Andric bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 759e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 760e8d8bef9SDimitry Andric auto DoesOverlap = [](const IRSimilarityCandidate &X, 761e8d8bef9SDimitry Andric const IRSimilarityCandidate &Y) { 762e8d8bef9SDimitry Andric // Check: 763e8d8bef9SDimitry Andric // XXXXXX X starts before Y ends 764e8d8bef9SDimitry Andric // YYYYYYY Y starts after X starts 765e8d8bef9SDimitry Andric return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 766e8d8bef9SDimitry Andric }; 767e8d8bef9SDimitry Andric 768e8d8bef9SDimitry Andric return DoesOverlap(A, B) || DoesOverlap(B, A); 769e8d8bef9SDimitry Andric } 770e8d8bef9SDimitry Andric 771e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 772e8d8bef9SDimitry Andric Module &M, std::vector<IRInstructionData *> &InstrList, 773e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 774e8d8bef9SDimitry Andric 775e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForModule; 776e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForModule; 777e8d8bef9SDimitry Andric // Iterate over the functions in the module to map each Instruction in each 778e8d8bef9SDimitry Andric // BasicBlock to an unsigned integer. 779*349cc55cSDimitry Andric Mapper.initializeForBBs(M); 780*349cc55cSDimitry Andric 781e8d8bef9SDimitry Andric for (Function &F : M) { 782e8d8bef9SDimitry Andric 783e8d8bef9SDimitry Andric if (F.empty()) 784e8d8bef9SDimitry Andric continue; 785e8d8bef9SDimitry Andric 786e8d8bef9SDimitry Andric for (BasicBlock &BB : F) { 787e8d8bef9SDimitry Andric 788e8d8bef9SDimitry Andric // BB has potential to have similarity since it has a size greater than 2 789e8d8bef9SDimitry Andric // and can therefore match other regions greater than 2. Map it to a list 790e8d8bef9SDimitry Andric // of unsigned integers. 791e8d8bef9SDimitry Andric Mapper.convertToUnsignedVec(BB, InstrListForModule, 792e8d8bef9SDimitry Andric IntegerMappingForModule); 793e8d8bef9SDimitry Andric } 794*349cc55cSDimitry Andric 795*349cc55cSDimitry Andric BasicBlock::iterator It = F.begin()->end(); 796*349cc55cSDimitry Andric Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, 797*349cc55cSDimitry Andric true); 798*349cc55cSDimitry Andric if (InstrListForModule.size() > 0) 799*349cc55cSDimitry Andric Mapper.IDL->push_back(*InstrListForModule.back()); 800e8d8bef9SDimitry Andric } 801e8d8bef9SDimitry Andric 802e8d8bef9SDimitry Andric // Insert the InstrListForModule at the end of the overall InstrList so that 803e8d8bef9SDimitry Andric // we can have a long InstrList for the entire set of Modules being analyzed. 804e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForModule); 805e8d8bef9SDimitry Andric // Do the same as above, but for IntegerMapping. 806e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForModule); 807e8d8bef9SDimitry Andric } 808e8d8bef9SDimitry Andric 809e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 810e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> &Modules, 811e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 812e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 813e8d8bef9SDimitry Andric 814e8d8bef9SDimitry Andric // Iterate over, and map the instructions in each module. 815e8d8bef9SDimitry Andric for (const std::unique_ptr<Module> &M : Modules) 816e8d8bef9SDimitry Andric populateMapper(*M, InstrList, IntegerMapping); 817e8d8bef9SDimitry Andric } 818e8d8bef9SDimitry Andric 819e8d8bef9SDimitry Andric /// From a repeated subsequence, find all the different instances of the 820e8d8bef9SDimitry Andric /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 821e8d8bef9SDimitry Andric /// the IRInstructionData in subsequence. 822e8d8bef9SDimitry Andric /// 823e8d8bef9SDimitry Andric /// \param [in] Mapper - The instruction mapper for sanity checks. 824e8d8bef9SDimitry Andric /// \param [in] InstrList - The vector that holds the instruction data. 825e8d8bef9SDimitry Andric /// \param [in] IntegerMapping - The vector that holds the mapped integers. 826e8d8bef9SDimitry Andric /// \param [out] CandsForRepSubstring - The vector to store the generated 827e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 828e8d8bef9SDimitry Andric static void createCandidatesFromSuffixTree( 829fe6060f1SDimitry Andric const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 830e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 831e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 832e8d8bef9SDimitry Andric 833e8d8bef9SDimitry Andric unsigned StringLen = RS.Length; 834*349cc55cSDimitry Andric if (StringLen < 2) 835*349cc55cSDimitry Andric return; 836e8d8bef9SDimitry Andric 837e8d8bef9SDimitry Andric // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 838e8d8bef9SDimitry Andric for (const unsigned &StartIdx : RS.StartIndices) { 839e8d8bef9SDimitry Andric unsigned EndIdx = StartIdx + StringLen - 1; 840e8d8bef9SDimitry Andric 841e8d8bef9SDimitry Andric // Check that this subsequence does not contain an illegal instruction. 842e8d8bef9SDimitry Andric bool ContainsIllegal = false; 843e8d8bef9SDimitry Andric for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 844e8d8bef9SDimitry Andric unsigned Key = IntegerMapping[CurrIdx]; 845e8d8bef9SDimitry Andric if (Key > Mapper.IllegalInstrNumber) { 846e8d8bef9SDimitry Andric ContainsIllegal = true; 847e8d8bef9SDimitry Andric break; 848e8d8bef9SDimitry Andric } 849e8d8bef9SDimitry Andric } 850e8d8bef9SDimitry Andric 851e8d8bef9SDimitry Andric // If we have an illegal instruction, we should not create an 852e8d8bef9SDimitry Andric // IRSimilarityCandidate for this region. 853e8d8bef9SDimitry Andric if (ContainsIllegal) 854e8d8bef9SDimitry Andric continue; 855e8d8bef9SDimitry Andric 856e8d8bef9SDimitry Andric // We are getting iterators to the instructions in this region of code 857e8d8bef9SDimitry Andric // by advancing the start and end indices from the start of the 858e8d8bef9SDimitry Andric // InstrList. 859e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 860e8d8bef9SDimitry Andric std::advance(StartIt, StartIdx); 861e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 862e8d8bef9SDimitry Andric std::advance(EndIt, EndIdx); 863e8d8bef9SDimitry Andric 864e8d8bef9SDimitry Andric CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 865e8d8bef9SDimitry Andric } 866e8d8bef9SDimitry Andric } 867e8d8bef9SDimitry Andric 868*349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalRelationFrom( 869*349cc55cSDimitry Andric IRSimilarityCandidate &SourceCand, 870*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 871*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { 872*349cc55cSDimitry Andric assert(SourceCand.CanonNumToNumber.size() != 0 && 873*349cc55cSDimitry Andric "Base canonical relationship is empty!"); 874*349cc55cSDimitry Andric assert(SourceCand.NumberToCanonNum.size() != 0 && 875*349cc55cSDimitry Andric "Base canonical relationship is empty!"); 876*349cc55cSDimitry Andric 877*349cc55cSDimitry Andric assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); 878*349cc55cSDimitry Andric assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); 879*349cc55cSDimitry Andric 880*349cc55cSDimitry Andric DenseSet<unsigned> UsedGVNs; 881*349cc55cSDimitry Andric // Iterate over the mappings provided from this candidate to SourceCand. We 882*349cc55cSDimitry Andric // are then able to map the GVN in this candidate to the same canonical number 883*349cc55cSDimitry Andric // given to the corresponding GVN in SourceCand. 884*349cc55cSDimitry Andric for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { 885*349cc55cSDimitry Andric unsigned SourceGVN = GVNMapping.first; 886*349cc55cSDimitry Andric 887*349cc55cSDimitry Andric assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); 888*349cc55cSDimitry Andric 889*349cc55cSDimitry Andric unsigned ResultGVN; 890*349cc55cSDimitry Andric // We need special handling if we have more than one potential value. This 891*349cc55cSDimitry Andric // means that there are at least two GVNs that could correspond to this GVN. 892*349cc55cSDimitry Andric // This could lead to potential swapping later on, so we make a decision 893*349cc55cSDimitry Andric // here to ensure a one-to-one mapping. 894*349cc55cSDimitry Andric if (GVNMapping.second.size() > 1) { 895*349cc55cSDimitry Andric bool Found = false; 896*349cc55cSDimitry Andric for (unsigned Val : GVNMapping.second) { 897*349cc55cSDimitry Andric // We make sure the target value number hasn't already been reserved. 898*349cc55cSDimitry Andric if (UsedGVNs.contains(Val)) 899*349cc55cSDimitry Andric continue; 900*349cc55cSDimitry Andric 901*349cc55cSDimitry Andric // We make sure that the opposite mapping is still consistent. 902*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator It = 903*349cc55cSDimitry Andric FromSourceMapping.find(Val); 904*349cc55cSDimitry Andric 905*349cc55cSDimitry Andric if (!It->second.contains(SourceGVN)) 906*349cc55cSDimitry Andric continue; 907*349cc55cSDimitry Andric 908*349cc55cSDimitry Andric // We pick the first item that satisfies these conditions. 909*349cc55cSDimitry Andric Found = true; 910*349cc55cSDimitry Andric ResultGVN = Val; 911*349cc55cSDimitry Andric break; 912*349cc55cSDimitry Andric } 913*349cc55cSDimitry Andric 914*349cc55cSDimitry Andric assert(Found && "Could not find matching value for source GVN"); 915*349cc55cSDimitry Andric (void)Found; 916*349cc55cSDimitry Andric 917*349cc55cSDimitry Andric } else 918*349cc55cSDimitry Andric ResultGVN = *GVNMapping.second.begin(); 919*349cc55cSDimitry Andric 920*349cc55cSDimitry Andric // Whatever GVN is found, we mark it as used. 921*349cc55cSDimitry Andric UsedGVNs.insert(ResultGVN); 922*349cc55cSDimitry Andric 923*349cc55cSDimitry Andric unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN); 924*349cc55cSDimitry Andric CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); 925*349cc55cSDimitry Andric NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); 926*349cc55cSDimitry Andric } 927*349cc55cSDimitry Andric } 928*349cc55cSDimitry Andric 929*349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalMappingFor( 930*349cc55cSDimitry Andric IRSimilarityCandidate &CurrCand) { 931*349cc55cSDimitry Andric assert(CurrCand.CanonNumToNumber.size() == 0 && 932*349cc55cSDimitry Andric "Canonical Relationship is non-empty"); 933*349cc55cSDimitry Andric assert(CurrCand.NumberToCanonNum.size() == 0 && 934*349cc55cSDimitry Andric "Canonical Relationship is non-empty"); 935*349cc55cSDimitry Andric 936*349cc55cSDimitry Andric unsigned CanonNum = 0; 937*349cc55cSDimitry Andric // Iterate over the value numbers found, the order does not matter in this 938*349cc55cSDimitry Andric // case. 939*349cc55cSDimitry Andric for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { 940*349cc55cSDimitry Andric CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum)); 941*349cc55cSDimitry Andric CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first)); 942*349cc55cSDimitry Andric CanonNum++; 943*349cc55cSDimitry Andric } 944*349cc55cSDimitry Andric } 945*349cc55cSDimitry Andric 946e8d8bef9SDimitry Andric /// From the list of IRSimilarityCandidates, perform a comparison between each 947e8d8bef9SDimitry Andric /// IRSimilarityCandidate to determine if there are overlapping 948e8d8bef9SDimitry Andric /// IRInstructionData, or if they do not have the same structure. 949e8d8bef9SDimitry Andric /// 950e8d8bef9SDimitry Andric /// \param [in] CandsForRepSubstring - The vector containing the 951e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 952e8d8bef9SDimitry Andric /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 953e8d8bef9SDimitry Andric /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 954e8d8bef9SDimitry Andric /// vector are structurally similar to one another. 955e8d8bef9SDimitry Andric static void findCandidateStructures( 956e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 957e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 958e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 959e8d8bef9SDimitry Andric InnerCandEndIt; 960e8d8bef9SDimitry Andric 961e8d8bef9SDimitry Andric // IRSimilarityCandidates each have a structure for operand use. It is 962e8d8bef9SDimitry Andric // possible that two instances of the same subsequences have different 963e8d8bef9SDimitry Andric // structure. Each type of structure found is assigned a number. This 964e8d8bef9SDimitry Andric // DenseMap maps an IRSimilarityCandidate to which type of similarity 965e8d8bef9SDimitry Andric // discovered it fits within. 966e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 967e8d8bef9SDimitry Andric 968e8d8bef9SDimitry Andric // Find the compatibility from each candidate to the others to determine 969e8d8bef9SDimitry Andric // which candidates overlap and which have the same structure by mapping 970e8d8bef9SDimitry Andric // each structure to a different group. 971e8d8bef9SDimitry Andric bool SameStructure; 972e8d8bef9SDimitry Andric bool Inserted; 973e8d8bef9SDimitry Andric unsigned CurrentGroupNum = 0; 974e8d8bef9SDimitry Andric unsigned OuterGroupNum; 975e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 976e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 977e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 978e8d8bef9SDimitry Andric 979e8d8bef9SDimitry Andric // Iterate over the candidates to determine its structural and overlapping 980e8d8bef9SDimitry Andric // compatibility with other instructions 981*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 982*349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 983e8d8bef9SDimitry Andric for (CandIt = CandsForRepSubstring.begin(), 984e8d8bef9SDimitry Andric CandEndIt = CandsForRepSubstring.end(); 985e8d8bef9SDimitry Andric CandIt != CandEndIt; CandIt++) { 986e8d8bef9SDimitry Andric 987e8d8bef9SDimitry Andric // Determine if it has an assigned structural group already. 988e8d8bef9SDimitry Andric CandToGroupIt = CandToGroup.find(&*CandIt); 989e8d8bef9SDimitry Andric if (CandToGroupIt == CandToGroup.end()) { 990e8d8bef9SDimitry Andric // If not, we assign it one, and add it to our mapping. 991e8d8bef9SDimitry Andric std::tie(CandToGroupIt, Inserted) = 992e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 993e8d8bef9SDimitry Andric } 994e8d8bef9SDimitry Andric 995e8d8bef9SDimitry Andric // Get the structural group number from the iterator. 996e8d8bef9SDimitry Andric OuterGroupNum = CandToGroupIt->second; 997e8d8bef9SDimitry Andric 998e8d8bef9SDimitry Andric // Check if we already have a list of IRSimilarityCandidates for the current 999e8d8bef9SDimitry Andric // structural group. Create one if one does not exist. 1000e8d8bef9SDimitry Andric CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 1001*349cc55cSDimitry Andric if (CurrentGroupPair == StructuralGroups.end()) { 1002*349cc55cSDimitry Andric IRSimilarityCandidate::createCanonicalMappingFor(*CandIt); 1003e8d8bef9SDimitry Andric std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 1004e8d8bef9SDimitry Andric std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 1005*349cc55cSDimitry Andric } 1006e8d8bef9SDimitry Andric 1007e8d8bef9SDimitry Andric // Iterate over the IRSimilarityCandidates following the current 1008e8d8bef9SDimitry Andric // IRSimilarityCandidate in the list to determine whether the two 1009e8d8bef9SDimitry Andric // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 1010e8d8bef9SDimitry Andric // of IRSimilarityCandidates. 1011e8d8bef9SDimitry Andric for (InnerCandIt = std::next(CandIt), 1012e8d8bef9SDimitry Andric InnerCandEndIt = CandsForRepSubstring.end(); 1013e8d8bef9SDimitry Andric InnerCandIt != InnerCandEndIt; InnerCandIt++) { 1014e8d8bef9SDimitry Andric 1015e8d8bef9SDimitry Andric // We check if the inner item has a group already, if it does, we skip it. 1016e8d8bef9SDimitry Andric CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 1017e8d8bef9SDimitry Andric if (CandToGroupItInner != CandToGroup.end()) 1018e8d8bef9SDimitry Andric continue; 1019e8d8bef9SDimitry Andric 1020e8d8bef9SDimitry Andric // Otherwise we determine if they have the same structure and add it to 1021e8d8bef9SDimitry Andric // vector if they match. 1022*349cc55cSDimitry Andric ValueNumberMappingA.clear(); 1023*349cc55cSDimitry Andric ValueNumberMappingB.clear(); 1024*349cc55cSDimitry Andric SameStructure = IRSimilarityCandidate::compareStructure( 1025*349cc55cSDimitry Andric *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); 1026e8d8bef9SDimitry Andric if (!SameStructure) 1027e8d8bef9SDimitry Andric continue; 1028e8d8bef9SDimitry Andric 1029*349cc55cSDimitry Andric InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA, 1030*349cc55cSDimitry Andric ValueNumberMappingB); 1031e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1032e8d8bef9SDimitry Andric CurrentGroupPair->second.push_back(*InnerCandIt); 1033e8d8bef9SDimitry Andric } 1034e8d8bef9SDimitry Andric } 1035e8d8bef9SDimitry Andric } 1036e8d8bef9SDimitry Andric 1037e8d8bef9SDimitry Andric void IRSimilarityIdentifier::findCandidates( 1038e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 1039e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 1040e8d8bef9SDimitry Andric SuffixTree ST(IntegerMapping); 1041e8d8bef9SDimitry Andric 1042e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> CandsForRepSubstring; 1043e8d8bef9SDimitry Andric std::vector<SimilarityGroup> NewCandidateGroups; 1044e8d8bef9SDimitry Andric 1045e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> StructuralGroups; 1046e8d8bef9SDimitry Andric 1047e8d8bef9SDimitry Andric // Iterate over the subsequences found by the Suffix Tree to create 1048e8d8bef9SDimitry Andric // IRSimilarityCandidates for each repeated subsequence and determine which 1049e8d8bef9SDimitry Andric // instances are structurally similar to one another. 1050fe6060f1SDimitry Andric for (SuffixTree::RepeatedSubstring &RS : ST) { 1051fe6060f1SDimitry Andric createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 1052e8d8bef9SDimitry Andric CandsForRepSubstring); 1053e8d8bef9SDimitry Andric 1054e8d8bef9SDimitry Andric if (CandsForRepSubstring.size() < 2) 1055e8d8bef9SDimitry Andric continue; 1056e8d8bef9SDimitry Andric 1057e8d8bef9SDimitry Andric findCandidateStructures(CandsForRepSubstring, StructuralGroups); 1058e8d8bef9SDimitry Andric for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 1059e8d8bef9SDimitry Andric // We only add the group if it contains more than one 1060e8d8bef9SDimitry Andric // IRSimilarityCandidate. If there is only one, that means there is no 1061e8d8bef9SDimitry Andric // other repeated subsequence with the same structure. 1062e8d8bef9SDimitry Andric if (Group.second.size() > 1) 1063e8d8bef9SDimitry Andric SimilarityCandidates->push_back(Group.second); 1064e8d8bef9SDimitry Andric 1065e8d8bef9SDimitry Andric CandsForRepSubstring.clear(); 1066e8d8bef9SDimitry Andric StructuralGroups.clear(); 1067e8d8bef9SDimitry Andric NewCandidateGroups.clear(); 1068e8d8bef9SDimitry Andric } 1069e8d8bef9SDimitry Andric } 1070e8d8bef9SDimitry Andric 1071e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 1072e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> Modules) { 1073e8d8bef9SDimitry Andric resetSimilarityCandidates(); 1074e8d8bef9SDimitry Andric 1075e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 1076e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 1077*349cc55cSDimitry Andric Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1078e8d8bef9SDimitry Andric 1079e8d8bef9SDimitry Andric populateMapper(Modules, InstrList, IntegerMapping); 1080e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 1081e8d8bef9SDimitry Andric 1082e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 1083e8d8bef9SDimitry Andric } 1084e8d8bef9SDimitry Andric 1085e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 1086e8d8bef9SDimitry Andric resetSimilarityCandidates(); 1087*349cc55cSDimitry Andric Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1088e8d8bef9SDimitry Andric 1089e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 1090e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 1091e8d8bef9SDimitry Andric 1092e8d8bef9SDimitry Andric populateMapper(M, InstrList, IntegerMapping); 1093e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 1094e8d8bef9SDimitry Andric 1095e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 1096e8d8bef9SDimitry Andric } 1097e8d8bef9SDimitry Andric 1098e8d8bef9SDimitry Andric INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 1099e8d8bef9SDimitry Andric "ir-similarity-identifier", false, true) 1100e8d8bef9SDimitry Andric 1101e8d8bef9SDimitry Andric IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 1102e8d8bef9SDimitry Andric : ModulePass(ID) { 1103e8d8bef9SDimitry Andric initializeIRSimilarityIdentifierWrapperPassPass( 1104e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 1105e8d8bef9SDimitry Andric } 1106e8d8bef9SDimitry Andric 1107e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 1108*349cc55cSDimitry Andric IRSI.reset(new IRSimilarityIdentifier(!DisableBranches)); 1109e8d8bef9SDimitry Andric return false; 1110e8d8bef9SDimitry Andric } 1111e8d8bef9SDimitry Andric 1112e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 1113e8d8bef9SDimitry Andric IRSI.reset(); 1114e8d8bef9SDimitry Andric return false; 1115e8d8bef9SDimitry Andric } 1116e8d8bef9SDimitry Andric 1117e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 1118fe6060f1SDimitry Andric IRSI->findSimilarity(M); 1119e8d8bef9SDimitry Andric return false; 1120e8d8bef9SDimitry Andric } 1121e8d8bef9SDimitry Andric 1122e8d8bef9SDimitry Andric AnalysisKey IRSimilarityAnalysis::Key; 1123e8d8bef9SDimitry Andric IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 1124e8d8bef9SDimitry Andric ModuleAnalysisManager &) { 1125e8d8bef9SDimitry Andric 1126*349cc55cSDimitry Andric auto IRSI = IRSimilarityIdentifier(!DisableBranches); 1127fe6060f1SDimitry Andric IRSI.findSimilarity(M); 1128fe6060f1SDimitry Andric return IRSI; 1129e8d8bef9SDimitry Andric } 1130e8d8bef9SDimitry Andric 1131e8d8bef9SDimitry Andric PreservedAnalyses 1132e8d8bef9SDimitry Andric IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 1133e8d8bef9SDimitry Andric IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 1134e8d8bef9SDimitry Andric Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 1135e8d8bef9SDimitry Andric 1136e8d8bef9SDimitry Andric for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 1137e8d8bef9SDimitry Andric OS << CandVec.size() << " candidates of length " 1138e8d8bef9SDimitry Andric << CandVec.begin()->getLength() << ". Found in: \n"; 1139e8d8bef9SDimitry Andric for (IRSimilarityCandidate &Cand : CandVec) { 1140e8d8bef9SDimitry Andric OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 1141e8d8bef9SDimitry Andric << ", Basic Block: "; 1142e8d8bef9SDimitry Andric if (Cand.front()->Inst->getParent()->getName().str() == "") 1143fe6060f1SDimitry Andric OS << "(unnamed)"; 1144e8d8bef9SDimitry Andric else 1145fe6060f1SDimitry Andric OS << Cand.front()->Inst->getParent()->getName().str(); 1146fe6060f1SDimitry Andric OS << "\n Start Instruction: "; 1147fe6060f1SDimitry Andric Cand.frontInstruction()->print(OS); 1148fe6060f1SDimitry Andric OS << "\n End Instruction: "; 1149fe6060f1SDimitry Andric Cand.backInstruction()->print(OS); 1150fe6060f1SDimitry Andric OS << "\n"; 1151e8d8bef9SDimitry Andric } 1152e8d8bef9SDimitry Andric } 1153e8d8bef9SDimitry Andric 1154e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 1155e8d8bef9SDimitry Andric } 1156e8d8bef9SDimitry Andric 1157e8d8bef9SDimitry Andric char IRSimilarityIdentifierWrapperPass::ID = 0; 1158