1e8d8bef9SDimitry Andric //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // \file 10e8d8bef9SDimitry Andric // Implementation file for the IRSimilarityIdentifier for identifying 11e8d8bef9SDimitry Andric // similarities in IR including the IRInstructionMapper. 12e8d8bef9SDimitry Andric // 13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14e8d8bef9SDimitry Andric 15e8d8bef9SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 16e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h" 17e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 18e8d8bef9SDimitry Andric #include "llvm/IR/Operator.h" 19e8d8bef9SDimitry Andric #include "llvm/IR/User.h" 20e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 21e8d8bef9SDimitry Andric #include "llvm/Support/SuffixTree.h" 22e8d8bef9SDimitry Andric 23e8d8bef9SDimitry Andric using namespace llvm; 24e8d8bef9SDimitry Andric using namespace IRSimilarity; 25e8d8bef9SDimitry Andric 26*04eeddc0SDimitry Andric namespace llvm { 27349cc55cSDimitry Andric cl::opt<bool> 28349cc55cSDimitry Andric DisableBranches("no-ir-sim-branch-matching", cl::init(false), 29349cc55cSDimitry Andric cl::ReallyHidden, 30349cc55cSDimitry Andric cl::desc("disable similarity matching, and outlining, " 31349cc55cSDimitry Andric "across branches for debugging purposes.")); 32349cc55cSDimitry Andric 33*04eeddc0SDimitry Andric cl::opt<bool> 34*04eeddc0SDimitry Andric DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), 35*04eeddc0SDimitry Andric cl::ReallyHidden, 36*04eeddc0SDimitry Andric cl::desc("disable outlining indirect calls.")); 37*04eeddc0SDimitry Andric 38*04eeddc0SDimitry Andric cl::opt<bool> 39*04eeddc0SDimitry Andric MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, 40*04eeddc0SDimitry Andric cl::desc("only allow matching call instructions if the " 41*04eeddc0SDimitry Andric "name and type signature match.")); 42*04eeddc0SDimitry Andric } // namespace llvm 43*04eeddc0SDimitry Andric 44e8d8bef9SDimitry Andric IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 45e8d8bef9SDimitry Andric IRInstructionDataList &IDList) 46e8d8bef9SDimitry Andric : Inst(&I), Legal(Legality), IDL(&IDList) { 47349cc55cSDimitry Andric initializeInstruction(); 48349cc55cSDimitry Andric } 49349cc55cSDimitry Andric 50349cc55cSDimitry Andric void IRInstructionData::initializeInstruction() { 51e8d8bef9SDimitry Andric // We check for whether we have a comparison instruction. If it is, we 52e8d8bef9SDimitry Andric // find the "less than" version of the predicate for consistency for 53e8d8bef9SDimitry Andric // comparison instructions throught the program. 54349cc55cSDimitry Andric if (CmpInst *C = dyn_cast<CmpInst>(Inst)) { 55e8d8bef9SDimitry Andric CmpInst::Predicate Predicate = predicateForConsistency(C); 56e8d8bef9SDimitry Andric if (Predicate != C->getPredicate()) 57e8d8bef9SDimitry Andric RevisedPredicate = Predicate; 58e8d8bef9SDimitry Andric } 59e8d8bef9SDimitry Andric 60e8d8bef9SDimitry Andric // Here we collect the operands and their types for determining whether 61e8d8bef9SDimitry Andric // the structure of the operand use matches between two different candidates. 62349cc55cSDimitry Andric for (Use &OI : Inst->operands()) { 63349cc55cSDimitry Andric if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) { 64e8d8bef9SDimitry Andric // If we have a CmpInst where the predicate is reversed, it means the 65e8d8bef9SDimitry Andric // operands must be reversed as well. 66e8d8bef9SDimitry Andric OperVals.insert(OperVals.begin(), OI.get()); 67e8d8bef9SDimitry Andric continue; 68e8d8bef9SDimitry Andric } 69e8d8bef9SDimitry Andric 70e8d8bef9SDimitry Andric OperVals.push_back(OI.get()); 71e8d8bef9SDimitry Andric } 72*04eeddc0SDimitry Andric 73*04eeddc0SDimitry Andric // We capture the incoming BasicBlocks as values as well as the incoming 74*04eeddc0SDimitry Andric // Values in order to check for structural similarity. 75*04eeddc0SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(Inst)) 76*04eeddc0SDimitry Andric for (BasicBlock *BB : PN->blocks()) 77*04eeddc0SDimitry Andric OperVals.push_back(BB); 78e8d8bef9SDimitry Andric } 79e8d8bef9SDimitry Andric 80349cc55cSDimitry Andric IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) 81*04eeddc0SDimitry Andric : IDL(&IDList) {} 82349cc55cSDimitry Andric 83349cc55cSDimitry Andric void IRInstructionData::setBranchSuccessors( 84349cc55cSDimitry Andric DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 85349cc55cSDimitry Andric assert(isa<BranchInst>(Inst) && "Instruction must be branch"); 86349cc55cSDimitry Andric 87349cc55cSDimitry Andric BranchInst *BI = cast<BranchInst>(Inst); 88349cc55cSDimitry Andric DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 89349cc55cSDimitry Andric 90349cc55cSDimitry Andric BBNumIt = BasicBlockToInteger.find(BI->getParent()); 91349cc55cSDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 92349cc55cSDimitry Andric "Could not find location for BasicBlock!"); 93349cc55cSDimitry Andric 94349cc55cSDimitry Andric int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 95349cc55cSDimitry Andric 96349cc55cSDimitry Andric for (BasicBlock *Successor : BI->successors()) { 97349cc55cSDimitry Andric BBNumIt = BasicBlockToInteger.find(Successor); 98349cc55cSDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 99349cc55cSDimitry Andric "Could not find number for BasicBlock!"); 100349cc55cSDimitry Andric int OtherBlockNumber = static_cast<int>(BBNumIt->second); 101349cc55cSDimitry Andric 102349cc55cSDimitry Andric int Relative = OtherBlockNumber - CurrentBlockNumber; 103349cc55cSDimitry Andric RelativeBlockLocations.push_back(Relative); 104349cc55cSDimitry Andric } 105349cc55cSDimitry Andric } 106349cc55cSDimitry Andric 107*04eeddc0SDimitry Andric void IRInstructionData::setCalleeName(bool MatchByName) { 108*04eeddc0SDimitry Andric CallInst *CI = dyn_cast<CallInst>(Inst); 109*04eeddc0SDimitry Andric assert(CI && "Instruction must be call"); 110*04eeddc0SDimitry Andric 111*04eeddc0SDimitry Andric CalleeName = ""; 112*04eeddc0SDimitry Andric if (!CI->isIndirectCall() && MatchByName) 113*04eeddc0SDimitry Andric CalleeName = CI->getCalledFunction()->getName().str(); 114*04eeddc0SDimitry Andric } 115*04eeddc0SDimitry Andric 116*04eeddc0SDimitry Andric void IRInstructionData::setPHIPredecessors( 117*04eeddc0SDimitry Andric DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 118*04eeddc0SDimitry Andric assert(isa<PHINode>(Inst) && "Instruction must be phi node"); 119*04eeddc0SDimitry Andric 120*04eeddc0SDimitry Andric PHINode *PN = cast<PHINode>(Inst); 121*04eeddc0SDimitry Andric DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 122*04eeddc0SDimitry Andric 123*04eeddc0SDimitry Andric BBNumIt = BasicBlockToInteger.find(PN->getParent()); 124*04eeddc0SDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 125*04eeddc0SDimitry Andric "Could not find location for BasicBlock!"); 126*04eeddc0SDimitry Andric 127*04eeddc0SDimitry Andric int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 128*04eeddc0SDimitry Andric 129*04eeddc0SDimitry Andric // Convert the incoming blocks of the PHINode to an integer value, based on 130*04eeddc0SDimitry Andric // the relative distances between the current block and the incoming block. 131*04eeddc0SDimitry Andric for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { 132*04eeddc0SDimitry Andric BasicBlock *Incoming = PN->getIncomingBlock(Idx); 133*04eeddc0SDimitry Andric BBNumIt = BasicBlockToInteger.find(Incoming); 134*04eeddc0SDimitry Andric assert(BBNumIt != BasicBlockToInteger.end() && 135*04eeddc0SDimitry Andric "Could not find number for BasicBlock!"); 136*04eeddc0SDimitry Andric int OtherBlockNumber = static_cast<int>(BBNumIt->second); 137*04eeddc0SDimitry Andric 138*04eeddc0SDimitry Andric int Relative = OtherBlockNumber - CurrentBlockNumber; 139*04eeddc0SDimitry Andric RelativeBlockLocations.push_back(Relative); 140*04eeddc0SDimitry Andric RelativeBlockLocations.push_back(Relative); 141*04eeddc0SDimitry Andric } 142*04eeddc0SDimitry Andric } 143*04eeddc0SDimitry Andric 144e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 145e8d8bef9SDimitry Andric switch (CI->getPredicate()) { 146e8d8bef9SDimitry Andric case CmpInst::FCMP_OGT: 147e8d8bef9SDimitry Andric case CmpInst::FCMP_UGT: 148e8d8bef9SDimitry Andric case CmpInst::FCMP_OGE: 149e8d8bef9SDimitry Andric case CmpInst::FCMP_UGE: 150e8d8bef9SDimitry Andric case CmpInst::ICMP_SGT: 151e8d8bef9SDimitry Andric case CmpInst::ICMP_UGT: 152e8d8bef9SDimitry Andric case CmpInst::ICMP_SGE: 153e8d8bef9SDimitry Andric case CmpInst::ICMP_UGE: 154e8d8bef9SDimitry Andric return CI->getSwappedPredicate(); 155e8d8bef9SDimitry Andric default: 156e8d8bef9SDimitry Andric return CI->getPredicate(); 157e8d8bef9SDimitry Andric } 158e8d8bef9SDimitry Andric } 159e8d8bef9SDimitry Andric 160e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::getPredicate() const { 161e8d8bef9SDimitry Andric assert(isa<CmpInst>(Inst) && 162e8d8bef9SDimitry Andric "Can only get a predicate from a compare instruction"); 163e8d8bef9SDimitry Andric 164e8d8bef9SDimitry Andric if (RevisedPredicate.hasValue()) 165e8d8bef9SDimitry Andric return RevisedPredicate.getValue(); 166e8d8bef9SDimitry Andric 167e8d8bef9SDimitry Andric return cast<CmpInst>(Inst)->getPredicate(); 168e8d8bef9SDimitry Andric } 169e8d8bef9SDimitry Andric 170*04eeddc0SDimitry Andric StringRef IRInstructionData::getCalleeName() const { 171*04eeddc0SDimitry Andric assert(isa<CallInst>(Inst) && 172*04eeddc0SDimitry Andric "Can only get a name from a call instruction"); 173e8d8bef9SDimitry Andric 174*04eeddc0SDimitry Andric assert(CalleeName.hasValue() && "CalleeName has not been set"); 175*04eeddc0SDimitry Andric 176*04eeddc0SDimitry Andric return *CalleeName; 177e8d8bef9SDimitry Andric } 178e8d8bef9SDimitry Andric 179e8d8bef9SDimitry Andric bool IRSimilarity::isClose(const IRInstructionData &A, 180e8d8bef9SDimitry Andric const IRInstructionData &B) { 181e8d8bef9SDimitry Andric 182e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 183e8d8bef9SDimitry Andric return false; 184e8d8bef9SDimitry Andric 185e8d8bef9SDimitry Andric // Check if we are performing the same sort of operation on the same types 186e8d8bef9SDimitry Andric // but not on the same values. 187e8d8bef9SDimitry Andric if (!A.Inst->isSameOperationAs(B.Inst)) { 188e8d8bef9SDimitry Andric // If there is a predicate, this means that either there is a swapped 189e8d8bef9SDimitry Andric // predicate, or that the types are different, we want to make sure that 190e8d8bef9SDimitry Andric // the predicates are equivalent via swapping. 191e8d8bef9SDimitry Andric if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 192e8d8bef9SDimitry Andric 193e8d8bef9SDimitry Andric if (A.getPredicate() != B.getPredicate()) 194e8d8bef9SDimitry Andric return false; 195e8d8bef9SDimitry Andric 196e8d8bef9SDimitry Andric // If the predicates are the same via swap, make sure that the types are 197e8d8bef9SDimitry Andric // still the same. 198e8d8bef9SDimitry Andric auto ZippedTypes = zip(A.OperVals, B.OperVals); 199e8d8bef9SDimitry Andric 200e8d8bef9SDimitry Andric return all_of( 201e8d8bef9SDimitry Andric ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 202e8d8bef9SDimitry Andric return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 203e8d8bef9SDimitry Andric }); 204e8d8bef9SDimitry Andric } 205e8d8bef9SDimitry Andric 206e8d8bef9SDimitry Andric return false; 207e8d8bef9SDimitry Andric } 208e8d8bef9SDimitry Andric 209e8d8bef9SDimitry Andric // Since any GEP Instruction operands after the first operand cannot be 210e8d8bef9SDimitry Andric // defined by a register, we must make sure that the operands after the first 211e8d8bef9SDimitry Andric // are the same in the two instructions 212e8d8bef9SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 213e8d8bef9SDimitry Andric auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 214e8d8bef9SDimitry Andric 215e8d8bef9SDimitry Andric // If the instructions do not have the same inbounds restrictions, we do 216e8d8bef9SDimitry Andric // not consider them the same. 217e8d8bef9SDimitry Andric if (GEP->isInBounds() != OtherGEP->isInBounds()) 218e8d8bef9SDimitry Andric return false; 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andric auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 221e8d8bef9SDimitry Andric 222e8d8bef9SDimitry Andric // We increment here since we do not care about the first instruction, 223e8d8bef9SDimitry Andric // we only care about the following operands since they must be the 224e8d8bef9SDimitry Andric // exact same to be considered similar. 225e8d8bef9SDimitry Andric return all_of(drop_begin(ZippedOperands), 226e8d8bef9SDimitry Andric [](std::tuple<llvm::Use &, llvm::Use &> R) { 227e8d8bef9SDimitry Andric return std::get<0>(R) == std::get<1>(R); 228e8d8bef9SDimitry Andric }); 229e8d8bef9SDimitry Andric } 230e8d8bef9SDimitry Andric 231*04eeddc0SDimitry Andric // If the instructions are functions calls, we make sure that the function 232*04eeddc0SDimitry Andric // name is the same. We already know that the types are since is 233*04eeddc0SDimitry Andric // isSameOperationAs is true. 234e8d8bef9SDimitry Andric if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 235*04eeddc0SDimitry Andric if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0) 236e8d8bef9SDimitry Andric return false; 237e8d8bef9SDimitry Andric } 238e8d8bef9SDimitry Andric 239349cc55cSDimitry Andric if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) && 240349cc55cSDimitry Andric A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) 241349cc55cSDimitry Andric return false; 242349cc55cSDimitry Andric 243e8d8bef9SDimitry Andric return true; 244e8d8bef9SDimitry Andric } 245e8d8bef9SDimitry Andric 246e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 247e8d8bef9SDimitry Andric // into the same interface. 248e8d8bef9SDimitry Andric void IRInstructionMapper::convertToUnsignedVec( 249e8d8bef9SDimitry Andric BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 250e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 251e8d8bef9SDimitry Andric BasicBlock::iterator It = BB.begin(); 252e8d8bef9SDimitry Andric 253e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForBB; 254e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForBB; 255e8d8bef9SDimitry Andric 256e8d8bef9SDimitry Andric for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 257e8d8bef9SDimitry Andric switch (InstClassifier.visit(*It)) { 258e8d8bef9SDimitry Andric case InstrType::Legal: 259e8d8bef9SDimitry Andric mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 260e8d8bef9SDimitry Andric break; 261e8d8bef9SDimitry Andric case InstrType::Illegal: 262e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 263e8d8bef9SDimitry Andric break; 264e8d8bef9SDimitry Andric case InstrType::Invisible: 265e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 266e8d8bef9SDimitry Andric break; 267e8d8bef9SDimitry Andric } 268e8d8bef9SDimitry Andric } 269e8d8bef9SDimitry Andric 270e8d8bef9SDimitry Andric if (HaveLegalRange) { 271349cc55cSDimitry Andric if (AddedIllegalLastTime) 272e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 273fe6060f1SDimitry Andric for (IRInstructionData *ID : InstrListForBB) 274fe6060f1SDimitry Andric this->IDL->push_back(*ID); 275e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForBB); 276e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForBB); 277e8d8bef9SDimitry Andric } 278e8d8bef9SDimitry Andric } 279e8d8bef9SDimitry Andric 280e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 281e8d8bef9SDimitry Andric // into the same interface. 282e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToLegalUnsigned( 283e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 284e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB) { 285e8d8bef9SDimitry Andric // We added something legal, so we should unset the AddedLegalLastTime 286e8d8bef9SDimitry Andric // flag. 287e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 288e8d8bef9SDimitry Andric 289e8d8bef9SDimitry Andric // If we have at least two adjacent legal instructions (which may have 290e8d8bef9SDimitry Andric // invisible instructions in between), remember that. 291e8d8bef9SDimitry Andric if (CanCombineWithPrevInstr) 292e8d8bef9SDimitry Andric HaveLegalRange = true; 293e8d8bef9SDimitry Andric CanCombineWithPrevInstr = true; 294e8d8bef9SDimitry Andric 295e8d8bef9SDimitry Andric // Get the integer for this instruction or give it the current 296e8d8bef9SDimitry Andric // LegalInstrNumber. 297e8d8bef9SDimitry Andric IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 298e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 299e8d8bef9SDimitry Andric 300349cc55cSDimitry Andric if (isa<BranchInst>(*It)) 301349cc55cSDimitry Andric ID->setBranchSuccessors(BasicBlockToInteger); 302349cc55cSDimitry Andric 303*04eeddc0SDimitry Andric if (isa<CallInst>(*It)) 304*04eeddc0SDimitry Andric ID->setCalleeName(EnableMatchCallsByName); 305*04eeddc0SDimitry Andric 306*04eeddc0SDimitry Andric if (isa<PHINode>(*It)) 307*04eeddc0SDimitry Andric ID->setPHIPredecessors(BasicBlockToInteger); 308*04eeddc0SDimitry Andric 309e8d8bef9SDimitry Andric // Add to the instruction list 310e8d8bef9SDimitry Andric bool WasInserted; 311e8d8bef9SDimitry Andric DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 312e8d8bef9SDimitry Andric ResultIt; 313e8d8bef9SDimitry Andric std::tie(ResultIt, WasInserted) = 314e8d8bef9SDimitry Andric InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 315e8d8bef9SDimitry Andric unsigned INumber = ResultIt->second; 316e8d8bef9SDimitry Andric 317e8d8bef9SDimitry Andric // There was an insertion. 318e8d8bef9SDimitry Andric if (WasInserted) 319e8d8bef9SDimitry Andric LegalInstrNumber++; 320e8d8bef9SDimitry Andric 321e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(INumber); 322e8d8bef9SDimitry Andric 323e8d8bef9SDimitry Andric // Make sure we don't overflow or use any integers reserved by the DenseMap. 324e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 325e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 326e8d8bef9SDimitry Andric 327e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 328e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 329e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 330e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 331e8d8bef9SDimitry Andric 332e8d8bef9SDimitry Andric return INumber; 333e8d8bef9SDimitry Andric } 334e8d8bef9SDimitry Andric 335e8d8bef9SDimitry Andric IRInstructionData * 336e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 337e8d8bef9SDimitry Andric IRInstructionDataList &IDL) { 338e8d8bef9SDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 339e8d8bef9SDimitry Andric } 340e8d8bef9SDimitry Andric 341349cc55cSDimitry Andric IRInstructionData * 342349cc55cSDimitry Andric IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { 343349cc55cSDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); 344349cc55cSDimitry Andric } 345349cc55cSDimitry Andric 346e8d8bef9SDimitry Andric IRInstructionDataList * 347e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionDataList() { 348e8d8bef9SDimitry Andric return new (IDLAllocator->Allocate()) IRInstructionDataList(); 349e8d8bef9SDimitry Andric } 350e8d8bef9SDimitry Andric 351e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 352e8d8bef9SDimitry Andric // into the same interface. 353e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToIllegalUnsigned( 354e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 355e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB, bool End) { 356e8d8bef9SDimitry Andric // Can't combine an illegal instruction. Set the flag. 357e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 358e8d8bef9SDimitry Andric 359e8d8bef9SDimitry Andric // Only add one illegal number per range of legal numbers. 360e8d8bef9SDimitry Andric if (AddedIllegalLastTime) 361e8d8bef9SDimitry Andric return IllegalInstrNumber; 362e8d8bef9SDimitry Andric 363e8d8bef9SDimitry Andric IRInstructionData *ID = nullptr; 364e8d8bef9SDimitry Andric if (!End) 365e8d8bef9SDimitry Andric ID = allocateIRInstructionData(*It, false, *IDL); 366349cc55cSDimitry Andric else 367349cc55cSDimitry Andric ID = allocateIRInstructionData(*IDL); 368e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 369e8d8bef9SDimitry Andric 370e8d8bef9SDimitry Andric // Remember that we added an illegal number last time. 371e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 372e8d8bef9SDimitry Andric unsigned INumber = IllegalInstrNumber; 373e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(IllegalInstrNumber--); 374e8d8bef9SDimitry Andric 375e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 376e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 377e8d8bef9SDimitry Andric 378e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 379e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 380e8d8bef9SDimitry Andric 381e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 382e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 383e8d8bef9SDimitry Andric 384e8d8bef9SDimitry Andric return INumber; 385e8d8bef9SDimitry Andric } 386e8d8bef9SDimitry Andric 387e8d8bef9SDimitry Andric IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 388e8d8bef9SDimitry Andric IRInstructionData *FirstInstIt, 389e8d8bef9SDimitry Andric IRInstructionData *LastInstIt) 390e8d8bef9SDimitry Andric : StartIdx(StartIdx), Len(Len) { 391e8d8bef9SDimitry Andric 392e8d8bef9SDimitry Andric assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 393e8d8bef9SDimitry Andric assert(LastInstIt != nullptr && "Instruction is nullptr!"); 394e8d8bef9SDimitry Andric assert(StartIdx + Len > StartIdx && 395e8d8bef9SDimitry Andric "Overflow for IRSimilarityCandidate range?"); 396e8d8bef9SDimitry Andric assert(Len - 1 == static_cast<unsigned>(std::distance( 397e8d8bef9SDimitry Andric iterator(FirstInstIt), iterator(LastInstIt))) && 398e8d8bef9SDimitry Andric "Length of the first and last IRInstructionData do not match the " 399e8d8bef9SDimitry Andric "given length"); 400e8d8bef9SDimitry Andric 401e8d8bef9SDimitry Andric // We iterate over the given instructions, and map each unique value 402e8d8bef9SDimitry Andric // to a unique number in the IRSimilarityCandidate ValueToNumber and 403e8d8bef9SDimitry Andric // NumberToValue maps. A constant get its own value globally, the individual 404e8d8bef9SDimitry Andric // uses of the constants are not considered to be unique. 405e8d8bef9SDimitry Andric // 406e8d8bef9SDimitry Andric // IR: Mapping Added: 407e8d8bef9SDimitry Andric // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 408e8d8bef9SDimitry Andric // %add2 = add i32 %a, %1 %add2 -> 4 409e8d8bef9SDimitry Andric // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 410e8d8bef9SDimitry Andric // 411e8d8bef9SDimitry Andric // when replace with global values, starting from 1, would be 412e8d8bef9SDimitry Andric // 413e8d8bef9SDimitry Andric // 3 = add i32 1, 2 414e8d8bef9SDimitry Andric // 4 = add i32 1, 3 415e8d8bef9SDimitry Andric // 6 = add i32 5, 2 416e8d8bef9SDimitry Andric unsigned LocalValNumber = 1; 417e8d8bef9SDimitry Andric IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 418e8d8bef9SDimitry Andric for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 419e8d8bef9SDimitry Andric // Map the operand values to an unsigned integer if it does not already 420e8d8bef9SDimitry Andric // have an unsigned integer assigned to it. 421e8d8bef9SDimitry Andric for (Value *Arg : ID->OperVals) 422e8d8bef9SDimitry Andric if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 423e8d8bef9SDimitry Andric ValueToNumber.try_emplace(Arg, LocalValNumber); 424e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, Arg); 425e8d8bef9SDimitry Andric LocalValNumber++; 426e8d8bef9SDimitry Andric } 427e8d8bef9SDimitry Andric 428e8d8bef9SDimitry Andric // Mapping the instructions to an unsigned integer if it is not already 429e8d8bef9SDimitry Andric // exist in the mapping. 430e8d8bef9SDimitry Andric if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 431e8d8bef9SDimitry Andric ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 432e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, ID->Inst); 433e8d8bef9SDimitry Andric LocalValNumber++; 434e8d8bef9SDimitry Andric } 435e8d8bef9SDimitry Andric } 436e8d8bef9SDimitry Andric 437e8d8bef9SDimitry Andric // Setting the first and last instruction data pointers for the candidate. If 438e8d8bef9SDimitry Andric // we got through the entire for loop without hitting an assert, we know 439e8d8bef9SDimitry Andric // that both of these instructions are not nullptrs. 440e8d8bef9SDimitry Andric FirstInst = FirstInstIt; 441e8d8bef9SDimitry Andric LastInst = LastInstIt; 442e8d8bef9SDimitry Andric } 443e8d8bef9SDimitry Andric 444e8d8bef9SDimitry Andric bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 445e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 446e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 447e8d8bef9SDimitry Andric return false; 448e8d8bef9SDimitry Andric 449e8d8bef9SDimitry Andric auto InstrDataForBoth = 450e8d8bef9SDimitry Andric zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 451e8d8bef9SDimitry Andric 452e8d8bef9SDimitry Andric return all_of(InstrDataForBoth, 453e8d8bef9SDimitry Andric [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 454e8d8bef9SDimitry Andric IRInstructionData &A = std::get<0>(R); 455e8d8bef9SDimitry Andric IRInstructionData &B = std::get<1>(R); 456e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 457e8d8bef9SDimitry Andric return false; 458e8d8bef9SDimitry Andric return isClose(A, B); 459e8d8bef9SDimitry Andric }); 460e8d8bef9SDimitry Andric } 461e8d8bef9SDimitry Andric 462e8d8bef9SDimitry Andric /// Determine if one or more of the assigned global value numbers for the 463e8d8bef9SDimitry Andric /// operands in \p TargetValueNumbers is in the current mapping set for operand 464e8d8bef9SDimitry Andric /// numbers in \p SourceOperands. The set of possible corresponding global 465e8d8bef9SDimitry Andric /// value numbers are replaced with the most recent version of compatible 466e8d8bef9SDimitry Andric /// values. 467e8d8bef9SDimitry Andric /// 468e8d8bef9SDimitry Andric /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 469e8d8bef9SDimitry Andric /// value number for the source IRInstructionCandidate. 470e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 471e8d8bef9SDimitry Andric /// IRSimilarityCandidate global value numbers to a set of possible numbers in 472e8d8bef9SDimitry Andric /// the target. 473e8d8bef9SDimitry Andric /// \param [in] SourceOperands - The operands in the original 474e8d8bef9SDimitry Andric /// IRSimilarityCandidate in the current instruction. 475e8d8bef9SDimitry Andric /// \param [in] TargetValueNumbers - The global value numbers of the operands in 476e8d8bef9SDimitry Andric /// the corresponding Instruction in the other IRSimilarityCandidate. 477e8d8bef9SDimitry Andric /// \returns true if there exists a possible mapping between the source 478e8d8bef9SDimitry Andric /// Instruction operands and the target Instruction operands, and false if not. 479e8d8bef9SDimitry Andric static bool checkNumberingAndReplaceCommutative( 480e8d8bef9SDimitry Andric const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 481e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 482e8d8bef9SDimitry Andric ArrayRef<Value *> &SourceOperands, 483e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetValueNumbers){ 484e8d8bef9SDimitry Andric 485e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 486e8d8bef9SDimitry Andric 487e8d8bef9SDimitry Andric unsigned ArgVal; 488e8d8bef9SDimitry Andric bool WasInserted; 489e8d8bef9SDimitry Andric 490e8d8bef9SDimitry Andric // Iterate over the operands in the source IRSimilarityCandidate to determine 491e8d8bef9SDimitry Andric // whether there exists an operand in the other IRSimilarityCandidate that 492e8d8bef9SDimitry Andric // creates a valid mapping of Value to Value between the 493e8d8bef9SDimitry Andric // IRSimilarityCaniddates. 494e8d8bef9SDimitry Andric for (Value *V : SourceOperands) { 495e8d8bef9SDimitry Andric ArgVal = SourceValueToNumberMapping.find(V)->second; 496e8d8bef9SDimitry Andric 497e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 498e8d8bef9SDimitry Andric std::make_pair(ArgVal, TargetValueNumbers)); 499e8d8bef9SDimitry Andric 500e8d8bef9SDimitry Andric // Instead of finding a current mapping, we inserted a set. This means a 501e8d8bef9SDimitry Andric // mapping did not exist for the source Instruction operand, it has no 502e8d8bef9SDimitry Andric // current constraints we need to check. 503e8d8bef9SDimitry Andric if (WasInserted) 504e8d8bef9SDimitry Andric continue; 505e8d8bef9SDimitry Andric 506e8d8bef9SDimitry Andric // If a mapping already exists for the source operand to the values in the 507e8d8bef9SDimitry Andric // other IRSimilarityCandidate we need to iterate over the items in other 508e8d8bef9SDimitry Andric // IRSimilarityCandidate's Instruction to determine whether there is a valid 509e8d8bef9SDimitry Andric // mapping of Value to Value. 510e8d8bef9SDimitry Andric DenseSet<unsigned> NewSet; 511e8d8bef9SDimitry Andric for (unsigned &Curr : ValueMappingIt->second) 512e8d8bef9SDimitry Andric // If we can find the value in the mapping, we add it to the new set. 513e8d8bef9SDimitry Andric if (TargetValueNumbers.contains(Curr)) 514e8d8bef9SDimitry Andric NewSet.insert(Curr); 515e8d8bef9SDimitry Andric 516e8d8bef9SDimitry Andric // If we could not find a Value, return 0. 517e8d8bef9SDimitry Andric if (NewSet.empty()) 518e8d8bef9SDimitry Andric return false; 519e8d8bef9SDimitry Andric 520e8d8bef9SDimitry Andric // Otherwise replace the old mapping with the newly constructed one. 521e8d8bef9SDimitry Andric if (NewSet.size() != ValueMappingIt->second.size()) 522e8d8bef9SDimitry Andric ValueMappingIt->second.swap(NewSet); 523e8d8bef9SDimitry Andric 524e8d8bef9SDimitry Andric // We have reached no conclusions about the mapping, and cannot remove 525e8d8bef9SDimitry Andric // any items from the other operands, so we move to check the next operand. 526e8d8bef9SDimitry Andric if (ValueMappingIt->second.size() != 1) 527e8d8bef9SDimitry Andric continue; 528e8d8bef9SDimitry Andric 529e8d8bef9SDimitry Andric 530e8d8bef9SDimitry Andric unsigned ValToRemove = *ValueMappingIt->second.begin(); 531e8d8bef9SDimitry Andric // When there is only one item left in the mapping for and operand, remove 532e8d8bef9SDimitry Andric // the value from the other operands. If it results in there being no 533e8d8bef9SDimitry Andric // mapping, return false, it means the mapping is wrong 534e8d8bef9SDimitry Andric for (Value *InnerV : SourceOperands) { 535e8d8bef9SDimitry Andric if (V == InnerV) 536e8d8bef9SDimitry Andric continue; 537e8d8bef9SDimitry Andric 538e8d8bef9SDimitry Andric unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 539e8d8bef9SDimitry Andric ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 540e8d8bef9SDimitry Andric if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 541e8d8bef9SDimitry Andric continue; 542e8d8bef9SDimitry Andric 543e8d8bef9SDimitry Andric ValueMappingIt->second.erase(ValToRemove); 544e8d8bef9SDimitry Andric if (ValueMappingIt->second.empty()) 545e8d8bef9SDimitry Andric return false; 546e8d8bef9SDimitry Andric } 547e8d8bef9SDimitry Andric } 548e8d8bef9SDimitry Andric 549e8d8bef9SDimitry Andric return true; 550e8d8bef9SDimitry Andric } 551e8d8bef9SDimitry Andric 552e8d8bef9SDimitry Andric /// Determine if operand number \p TargetArgVal is in the current mapping set 553e8d8bef9SDimitry Andric /// for operand number \p SourceArgVal. 554e8d8bef9SDimitry Andric /// 555e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 556e8d8bef9SDimitry Andric /// value numbers from source IRSimilarityCandidate to target 557e8d8bef9SDimitry Andric /// IRSimilarityCandidate. 558e8d8bef9SDimitry Andric /// \param [in] SourceArgVal The global value number for an operand in the 559e8d8bef9SDimitry Andric /// in the original candidate. 560e8d8bef9SDimitry Andric /// \param [in] TargetArgVal The global value number for the corresponding 561e8d8bef9SDimitry Andric /// operand in the other candidate. 562e8d8bef9SDimitry Andric /// \returns True if there exists a mapping and false if not. 563e8d8bef9SDimitry Andric bool checkNumberingAndReplace( 564e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 565e8d8bef9SDimitry Andric unsigned SourceArgVal, unsigned TargetArgVal) { 566e8d8bef9SDimitry Andric // We are given two unsigned integers representing the global values of 567e8d8bef9SDimitry Andric // the operands in different IRSimilarityCandidates and a current mapping 568e8d8bef9SDimitry Andric // between the two. 569e8d8bef9SDimitry Andric // 570e8d8bef9SDimitry Andric // Source Operand GVN: 1 571e8d8bef9SDimitry Andric // Target Operand GVN: 2 572e8d8bef9SDimitry Andric // CurrentMapping: {1: {1, 2}} 573e8d8bef9SDimitry Andric // 574e8d8bef9SDimitry Andric // Since we have mapping, and the target operand is contained in the set, we 575e8d8bef9SDimitry Andric // update it to: 576e8d8bef9SDimitry Andric // CurrentMapping: {1: {2}} 577e8d8bef9SDimitry Andric // and can return true. But, if the mapping was 578e8d8bef9SDimitry Andric // CurrentMapping: {1: {3}} 579e8d8bef9SDimitry Andric // we would return false. 580e8d8bef9SDimitry Andric 581e8d8bef9SDimitry Andric bool WasInserted; 582e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 583e8d8bef9SDimitry Andric 584e8d8bef9SDimitry Andric std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 585e8d8bef9SDimitry Andric std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 586e8d8bef9SDimitry Andric 587e8d8bef9SDimitry Andric // If we created a new mapping, then we are done. 588e8d8bef9SDimitry Andric if (WasInserted) 589e8d8bef9SDimitry Andric return true; 590e8d8bef9SDimitry Andric 591e8d8bef9SDimitry Andric // If there is more than one option in the mapping set, and the target value 592e8d8bef9SDimitry Andric // is included in the mapping set replace that set with one that only includes 593e8d8bef9SDimitry Andric // the target value, as it is the only valid mapping via the non commutative 594e8d8bef9SDimitry Andric // instruction. 595e8d8bef9SDimitry Andric 596e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetSet = Val->second; 597e8d8bef9SDimitry Andric if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 598e8d8bef9SDimitry Andric TargetSet.clear(); 599e8d8bef9SDimitry Andric TargetSet.insert(TargetArgVal); 600e8d8bef9SDimitry Andric return true; 601e8d8bef9SDimitry Andric } 602e8d8bef9SDimitry Andric 603e8d8bef9SDimitry Andric // Return true if we can find the value in the set. 604e8d8bef9SDimitry Andric return TargetSet.contains(TargetArgVal); 605e8d8bef9SDimitry Andric } 606e8d8bef9SDimitry Andric 607e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 608e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 609e8d8bef9SDimitry Andric // Iterators to keep track of where we are in the operands for each 610e8d8bef9SDimitry Andric // Instruction. 611e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 612e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 613e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 614e8d8bef9SDimitry Andric 615e8d8bef9SDimitry Andric // For each operand, get the value numbering and ensure it is consistent. 616e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 617e8d8bef9SDimitry Andric unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 618e8d8bef9SDimitry Andric unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 619e8d8bef9SDimitry Andric 620e8d8bef9SDimitry Andric // Attempt to add a set with only the target value. If there is no mapping 621e8d8bef9SDimitry Andric // we can create it here. 622e8d8bef9SDimitry Andric // 623e8d8bef9SDimitry Andric // For an instruction like a subtraction: 624e8d8bef9SDimitry Andric // IRSimilarityCandidateA: IRSimilarityCandidateB: 625e8d8bef9SDimitry Andric // %resultA = sub %a, %b %resultB = sub %d, %e 626e8d8bef9SDimitry Andric // 627e8d8bef9SDimitry Andric // We map %a -> %d and %b -> %e. 628e8d8bef9SDimitry Andric // 629e8d8bef9SDimitry Andric // And check to see whether their mapping is consistent in 630e8d8bef9SDimitry Andric // checkNumberingAndReplace. 631e8d8bef9SDimitry Andric 632e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 633e8d8bef9SDimitry Andric return false; 634e8d8bef9SDimitry Andric 635e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 636e8d8bef9SDimitry Andric return false; 637e8d8bef9SDimitry Andric } 638e8d8bef9SDimitry Andric return true; 639e8d8bef9SDimitry Andric } 640e8d8bef9SDimitry Andric 641e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareCommutativeOperandMapping( 642e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 643e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersA; 644e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersB; 645e8d8bef9SDimitry Andric 646e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 647e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 648e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 649e8d8bef9SDimitry Andric 650e8d8bef9SDimitry Andric // Find the value number sets for the operands. 651e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; 652e8d8bef9SDimitry Andric Idx++, VItA++, VItB++) { 653e8d8bef9SDimitry Andric ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 654e8d8bef9SDimitry Andric ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 655e8d8bef9SDimitry Andric } 656e8d8bef9SDimitry Andric 657e8d8bef9SDimitry Andric // Iterate over the operands in the first IRSimilarityCandidate and make sure 658e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the second 659e8d8bef9SDimitry Andric // IRSimilarityCandidate. 660e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 661e8d8bef9SDimitry Andric A.ValueNumberMapping, A.OperVals, 662e8d8bef9SDimitry Andric ValueNumbersB)) 663e8d8bef9SDimitry Andric return false; 664e8d8bef9SDimitry Andric 665e8d8bef9SDimitry Andric // Iterate over the operands in the second IRSimilarityCandidate and make sure 666e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the first 667e8d8bef9SDimitry Andric // IRSimilarityCandidate. 668e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 669e8d8bef9SDimitry Andric B.ValueNumberMapping, B.OperVals, 670e8d8bef9SDimitry Andric ValueNumbersA)) 671e8d8bef9SDimitry Andric return false; 672e8d8bef9SDimitry Andric 673e8d8bef9SDimitry Andric return true; 674e8d8bef9SDimitry Andric } 675e8d8bef9SDimitry Andric 676349cc55cSDimitry Andric bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, 677349cc55cSDimitry Andric RelativeLocMapping B) { 678349cc55cSDimitry Andric // Get the basic blocks the label refers to. 679349cc55cSDimitry Andric BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal); 680349cc55cSDimitry Andric BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal); 681349cc55cSDimitry Andric 682349cc55cSDimitry Andric // Get the basic blocks contained in each region. 683349cc55cSDimitry Andric DenseSet<BasicBlock *> BasicBlockA; 684349cc55cSDimitry Andric DenseSet<BasicBlock *> BasicBlockB; 685349cc55cSDimitry Andric A.IRSC.getBasicBlocks(BasicBlockA); 686349cc55cSDimitry Andric B.IRSC.getBasicBlocks(BasicBlockB); 687349cc55cSDimitry Andric 688349cc55cSDimitry Andric // Determine if the block is contained in the region. 689349cc55cSDimitry Andric bool AContained = BasicBlockA.contains(ABB); 690349cc55cSDimitry Andric bool BContained = BasicBlockB.contains(BBB); 691349cc55cSDimitry Andric 692349cc55cSDimitry Andric // Both blocks need to be contained in the region, or both need to be outside 693349cc55cSDimitry Andric // the reigon. 694349cc55cSDimitry Andric if (AContained != BContained) 695349cc55cSDimitry Andric return false; 696349cc55cSDimitry Andric 697349cc55cSDimitry Andric // If both are contained, then we need to make sure that the relative 698349cc55cSDimitry Andric // distance to the target blocks are the same. 699349cc55cSDimitry Andric if (AContained) 700349cc55cSDimitry Andric return A.RelativeLocation == B.RelativeLocation; 701349cc55cSDimitry Andric return true; 702349cc55cSDimitry Andric } 703349cc55cSDimitry Andric 704e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 705e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 706349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> MappingA; 707349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> MappingB; 708349cc55cSDimitry Andric return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB); 709349cc55cSDimitry Andric } 710349cc55cSDimitry Andric 711349cc55cSDimitry Andric typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, 712349cc55cSDimitry Andric SmallVector<int, 4> &, ArrayRef<Value *> &, 713349cc55cSDimitry Andric ArrayRef<Value *> &> 714349cc55cSDimitry Andric ZippedRelativeLocationsT; 715349cc55cSDimitry Andric 716349cc55cSDimitry Andric bool IRSimilarityCandidate::compareStructure( 717349cc55cSDimitry Andric const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, 718349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 719349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 720e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 721e8d8bef9SDimitry Andric return false; 722e8d8bef9SDimitry Andric 723e8d8bef9SDimitry Andric if (A.ValueToNumber.size() != B.ValueToNumber.size()) 724e8d8bef9SDimitry Andric return false; 725e8d8bef9SDimitry Andric 726e8d8bef9SDimitry Andric iterator ItA = A.begin(); 727e8d8bef9SDimitry Andric iterator ItB = B.begin(); 728e8d8bef9SDimitry Andric 729349cc55cSDimitry Andric // These ValueNumber Mapping sets create a create a mapping between the values 730349cc55cSDimitry Andric // in one candidate to values in the other candidate. If we create a set with 731349cc55cSDimitry Andric // one element, and that same element maps to the original element in the 732349cc55cSDimitry Andric // candidate we have a good mapping. 733e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 734e8d8bef9SDimitry Andric 735e8d8bef9SDimitry Andric 736e8d8bef9SDimitry Andric // Iterate over the instructions contained in each candidate 737e8d8bef9SDimitry Andric unsigned SectionLength = A.getStartIdx() + A.getLength(); 738e8d8bef9SDimitry Andric for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 739e8d8bef9SDimitry Andric ItA++, ItB++, Loc++) { 740e8d8bef9SDimitry Andric // Make sure the instructions are similar to one another. 741e8d8bef9SDimitry Andric if (!isClose(*ItA, *ItB)) 742e8d8bef9SDimitry Andric return false; 743e8d8bef9SDimitry Andric 744e8d8bef9SDimitry Andric Instruction *IA = ItA->Inst; 745e8d8bef9SDimitry Andric Instruction *IB = ItB->Inst; 746e8d8bef9SDimitry Andric 747e8d8bef9SDimitry Andric if (!ItA->Legal || !ItB->Legal) 748e8d8bef9SDimitry Andric return false; 749e8d8bef9SDimitry Andric 750e8d8bef9SDimitry Andric // Get the operand sets for the instructions. 751e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsA = ItA->OperVals; 752e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsB = ItB->OperVals; 753e8d8bef9SDimitry Andric 754e8d8bef9SDimitry Andric unsigned InstValA = A.ValueToNumber.find(IA)->second; 755e8d8bef9SDimitry Andric unsigned InstValB = B.ValueToNumber.find(IB)->second; 756e8d8bef9SDimitry Andric 757349cc55cSDimitry Andric bool WasInserted; 758e8d8bef9SDimitry Andric // Ensure that the mappings for the instructions exists. 759e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 760e8d8bef9SDimitry Andric std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 761e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 762e8d8bef9SDimitry Andric return false; 763e8d8bef9SDimitry Andric 764e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 765e8d8bef9SDimitry Andric std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 766e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 767e8d8bef9SDimitry Andric return false; 768e8d8bef9SDimitry Andric 769e8d8bef9SDimitry Andric // We have different paths for commutative instructions and non-commutative 770e8d8bef9SDimitry Andric // instructions since commutative instructions could allow multiple mappings 771e8d8bef9SDimitry Andric // to certain values. 772e8d8bef9SDimitry Andric if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 773e8d8bef9SDimitry Andric if (!compareCommutativeOperandMapping( 774e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 775e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 776e8d8bef9SDimitry Andric return false; 777e8d8bef9SDimitry Andric continue; 778e8d8bef9SDimitry Andric } 779e8d8bef9SDimitry Andric 780e8d8bef9SDimitry Andric // Handle the non-commutative cases. 781e8d8bef9SDimitry Andric if (!compareNonCommutativeOperandMapping( 782e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 783e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 784e8d8bef9SDimitry Andric return false; 785349cc55cSDimitry Andric 786349cc55cSDimitry Andric // Here we check that between two corresponding instructions, 787349cc55cSDimitry Andric // when referring to a basic block in the same region, the 788349cc55cSDimitry Andric // relative locations are the same. And, that the instructions refer to 789349cc55cSDimitry Andric // basic blocks outside the region in the same corresponding locations. 790349cc55cSDimitry Andric 791349cc55cSDimitry Andric // We are able to make the assumption about blocks outside of the region 792349cc55cSDimitry Andric // since the target block labels are considered values and will follow the 793349cc55cSDimitry Andric // same number matching that we defined for the other instructions in the 794349cc55cSDimitry Andric // region. So, at this point, in each location we target a specific block 795349cc55cSDimitry Andric // outside the region, we are targeting a corresponding block in each 796349cc55cSDimitry Andric // analagous location in the region we are comparing to. 797349cc55cSDimitry Andric if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) && 798349cc55cSDimitry Andric !(isa<PHINode>(IA) && isa<PHINode>(IB))) 799349cc55cSDimitry Andric continue; 800349cc55cSDimitry Andric 801349cc55cSDimitry Andric SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; 802349cc55cSDimitry Andric SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; 803349cc55cSDimitry Andric if (RelBlockLocsA.size() != RelBlockLocsB.size() && 804349cc55cSDimitry Andric OperValsA.size() != OperValsB.size()) 805349cc55cSDimitry Andric return false; 806349cc55cSDimitry Andric 807349cc55cSDimitry Andric ZippedRelativeLocationsT ZippedRelativeLocations = 808349cc55cSDimitry Andric zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB); 809349cc55cSDimitry Andric if (any_of(ZippedRelativeLocations, 810349cc55cSDimitry Andric [&A, &B](std::tuple<int, int, Value *, Value *> R) { 811349cc55cSDimitry Andric return !checkRelativeLocations( 812349cc55cSDimitry Andric {A, std::get<0>(R), std::get<2>(R)}, 813349cc55cSDimitry Andric {B, std::get<1>(R), std::get<3>(R)}); 814349cc55cSDimitry Andric })) 815349cc55cSDimitry Andric return false; 816e8d8bef9SDimitry Andric } 817e8d8bef9SDimitry Andric return true; 818e8d8bef9SDimitry Andric } 819e8d8bef9SDimitry Andric 820e8d8bef9SDimitry Andric bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 821e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 822e8d8bef9SDimitry Andric auto DoesOverlap = [](const IRSimilarityCandidate &X, 823e8d8bef9SDimitry Andric const IRSimilarityCandidate &Y) { 824e8d8bef9SDimitry Andric // Check: 825e8d8bef9SDimitry Andric // XXXXXX X starts before Y ends 826e8d8bef9SDimitry Andric // YYYYYYY Y starts after X starts 827e8d8bef9SDimitry Andric return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 828e8d8bef9SDimitry Andric }; 829e8d8bef9SDimitry Andric 830e8d8bef9SDimitry Andric return DoesOverlap(A, B) || DoesOverlap(B, A); 831e8d8bef9SDimitry Andric } 832e8d8bef9SDimitry Andric 833e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 834e8d8bef9SDimitry Andric Module &M, std::vector<IRInstructionData *> &InstrList, 835e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 836e8d8bef9SDimitry Andric 837e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForModule; 838e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForModule; 839e8d8bef9SDimitry Andric // Iterate over the functions in the module to map each Instruction in each 840e8d8bef9SDimitry Andric // BasicBlock to an unsigned integer. 841349cc55cSDimitry Andric Mapper.initializeForBBs(M); 842349cc55cSDimitry Andric 843e8d8bef9SDimitry Andric for (Function &F : M) { 844e8d8bef9SDimitry Andric 845e8d8bef9SDimitry Andric if (F.empty()) 846e8d8bef9SDimitry Andric continue; 847e8d8bef9SDimitry Andric 848e8d8bef9SDimitry Andric for (BasicBlock &BB : F) { 849e8d8bef9SDimitry Andric 850e8d8bef9SDimitry Andric // BB has potential to have similarity since it has a size greater than 2 851e8d8bef9SDimitry Andric // and can therefore match other regions greater than 2. Map it to a list 852e8d8bef9SDimitry Andric // of unsigned integers. 853e8d8bef9SDimitry Andric Mapper.convertToUnsignedVec(BB, InstrListForModule, 854e8d8bef9SDimitry Andric IntegerMappingForModule); 855e8d8bef9SDimitry Andric } 856349cc55cSDimitry Andric 857349cc55cSDimitry Andric BasicBlock::iterator It = F.begin()->end(); 858349cc55cSDimitry Andric Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, 859349cc55cSDimitry Andric true); 860349cc55cSDimitry Andric if (InstrListForModule.size() > 0) 861349cc55cSDimitry Andric Mapper.IDL->push_back(*InstrListForModule.back()); 862e8d8bef9SDimitry Andric } 863e8d8bef9SDimitry Andric 864e8d8bef9SDimitry Andric // Insert the InstrListForModule at the end of the overall InstrList so that 865e8d8bef9SDimitry Andric // we can have a long InstrList for the entire set of Modules being analyzed. 866e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForModule); 867e8d8bef9SDimitry Andric // Do the same as above, but for IntegerMapping. 868e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForModule); 869e8d8bef9SDimitry Andric } 870e8d8bef9SDimitry Andric 871e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 872e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> &Modules, 873e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 874e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 875e8d8bef9SDimitry Andric 876e8d8bef9SDimitry Andric // Iterate over, and map the instructions in each module. 877e8d8bef9SDimitry Andric for (const std::unique_ptr<Module> &M : Modules) 878e8d8bef9SDimitry Andric populateMapper(*M, InstrList, IntegerMapping); 879e8d8bef9SDimitry Andric } 880e8d8bef9SDimitry Andric 881e8d8bef9SDimitry Andric /// From a repeated subsequence, find all the different instances of the 882e8d8bef9SDimitry Andric /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 883e8d8bef9SDimitry Andric /// the IRInstructionData in subsequence. 884e8d8bef9SDimitry Andric /// 8854824e7fdSDimitry Andric /// \param [in] Mapper - The instruction mapper for basic correctness checks. 886e8d8bef9SDimitry Andric /// \param [in] InstrList - The vector that holds the instruction data. 887e8d8bef9SDimitry Andric /// \param [in] IntegerMapping - The vector that holds the mapped integers. 888e8d8bef9SDimitry Andric /// \param [out] CandsForRepSubstring - The vector to store the generated 889e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 890e8d8bef9SDimitry Andric static void createCandidatesFromSuffixTree( 891fe6060f1SDimitry Andric const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 892e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 893e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 894e8d8bef9SDimitry Andric 895e8d8bef9SDimitry Andric unsigned StringLen = RS.Length; 896349cc55cSDimitry Andric if (StringLen < 2) 897349cc55cSDimitry Andric return; 898e8d8bef9SDimitry Andric 899e8d8bef9SDimitry Andric // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 900e8d8bef9SDimitry Andric for (const unsigned &StartIdx : RS.StartIndices) { 901e8d8bef9SDimitry Andric unsigned EndIdx = StartIdx + StringLen - 1; 902e8d8bef9SDimitry Andric 903e8d8bef9SDimitry Andric // Check that this subsequence does not contain an illegal instruction. 904e8d8bef9SDimitry Andric bool ContainsIllegal = false; 905e8d8bef9SDimitry Andric for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 906e8d8bef9SDimitry Andric unsigned Key = IntegerMapping[CurrIdx]; 907e8d8bef9SDimitry Andric if (Key > Mapper.IllegalInstrNumber) { 908e8d8bef9SDimitry Andric ContainsIllegal = true; 909e8d8bef9SDimitry Andric break; 910e8d8bef9SDimitry Andric } 911e8d8bef9SDimitry Andric } 912e8d8bef9SDimitry Andric 913e8d8bef9SDimitry Andric // If we have an illegal instruction, we should not create an 914e8d8bef9SDimitry Andric // IRSimilarityCandidate for this region. 915e8d8bef9SDimitry Andric if (ContainsIllegal) 916e8d8bef9SDimitry Andric continue; 917e8d8bef9SDimitry Andric 918e8d8bef9SDimitry Andric // We are getting iterators to the instructions in this region of code 919e8d8bef9SDimitry Andric // by advancing the start and end indices from the start of the 920e8d8bef9SDimitry Andric // InstrList. 921e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 922e8d8bef9SDimitry Andric std::advance(StartIt, StartIdx); 923e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 924e8d8bef9SDimitry Andric std::advance(EndIt, EndIdx); 925e8d8bef9SDimitry Andric 926e8d8bef9SDimitry Andric CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 927e8d8bef9SDimitry Andric } 928e8d8bef9SDimitry Andric } 929e8d8bef9SDimitry Andric 930349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalRelationFrom( 931349cc55cSDimitry Andric IRSimilarityCandidate &SourceCand, 932349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 933349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { 934349cc55cSDimitry Andric assert(SourceCand.CanonNumToNumber.size() != 0 && 935349cc55cSDimitry Andric "Base canonical relationship is empty!"); 936349cc55cSDimitry Andric assert(SourceCand.NumberToCanonNum.size() != 0 && 937349cc55cSDimitry Andric "Base canonical relationship is empty!"); 938349cc55cSDimitry Andric 939349cc55cSDimitry Andric assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); 940349cc55cSDimitry Andric assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); 941349cc55cSDimitry Andric 942349cc55cSDimitry Andric DenseSet<unsigned> UsedGVNs; 943349cc55cSDimitry Andric // Iterate over the mappings provided from this candidate to SourceCand. We 944349cc55cSDimitry Andric // are then able to map the GVN in this candidate to the same canonical number 945349cc55cSDimitry Andric // given to the corresponding GVN in SourceCand. 946349cc55cSDimitry Andric for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { 947349cc55cSDimitry Andric unsigned SourceGVN = GVNMapping.first; 948349cc55cSDimitry Andric 949349cc55cSDimitry Andric assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); 950349cc55cSDimitry Andric 951349cc55cSDimitry Andric unsigned ResultGVN; 952349cc55cSDimitry Andric // We need special handling if we have more than one potential value. This 953349cc55cSDimitry Andric // means that there are at least two GVNs that could correspond to this GVN. 954349cc55cSDimitry Andric // This could lead to potential swapping later on, so we make a decision 955349cc55cSDimitry Andric // here to ensure a one-to-one mapping. 956349cc55cSDimitry Andric if (GVNMapping.second.size() > 1) { 957349cc55cSDimitry Andric bool Found = false; 958349cc55cSDimitry Andric for (unsigned Val : GVNMapping.second) { 959349cc55cSDimitry Andric // We make sure the target value number hasn't already been reserved. 960349cc55cSDimitry Andric if (UsedGVNs.contains(Val)) 961349cc55cSDimitry Andric continue; 962349cc55cSDimitry Andric 963349cc55cSDimitry Andric // We make sure that the opposite mapping is still consistent. 964349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator It = 965349cc55cSDimitry Andric FromSourceMapping.find(Val); 966349cc55cSDimitry Andric 967349cc55cSDimitry Andric if (!It->second.contains(SourceGVN)) 968349cc55cSDimitry Andric continue; 969349cc55cSDimitry Andric 970349cc55cSDimitry Andric // We pick the first item that satisfies these conditions. 971349cc55cSDimitry Andric Found = true; 972349cc55cSDimitry Andric ResultGVN = Val; 973349cc55cSDimitry Andric break; 974349cc55cSDimitry Andric } 975349cc55cSDimitry Andric 976349cc55cSDimitry Andric assert(Found && "Could not find matching value for source GVN"); 977349cc55cSDimitry Andric (void)Found; 978349cc55cSDimitry Andric 979349cc55cSDimitry Andric } else 980349cc55cSDimitry Andric ResultGVN = *GVNMapping.second.begin(); 981349cc55cSDimitry Andric 982349cc55cSDimitry Andric // Whatever GVN is found, we mark it as used. 983349cc55cSDimitry Andric UsedGVNs.insert(ResultGVN); 984349cc55cSDimitry Andric 985349cc55cSDimitry Andric unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN); 986349cc55cSDimitry Andric CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); 987349cc55cSDimitry Andric NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); 988349cc55cSDimitry Andric } 989349cc55cSDimitry Andric } 990349cc55cSDimitry Andric 991349cc55cSDimitry Andric void IRSimilarityCandidate::createCanonicalMappingFor( 992349cc55cSDimitry Andric IRSimilarityCandidate &CurrCand) { 993349cc55cSDimitry Andric assert(CurrCand.CanonNumToNumber.size() == 0 && 994349cc55cSDimitry Andric "Canonical Relationship is non-empty"); 995349cc55cSDimitry Andric assert(CurrCand.NumberToCanonNum.size() == 0 && 996349cc55cSDimitry Andric "Canonical Relationship is non-empty"); 997349cc55cSDimitry Andric 998349cc55cSDimitry Andric unsigned CanonNum = 0; 999349cc55cSDimitry Andric // Iterate over the value numbers found, the order does not matter in this 1000349cc55cSDimitry Andric // case. 1001349cc55cSDimitry Andric for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { 1002349cc55cSDimitry Andric CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum)); 1003349cc55cSDimitry Andric CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first)); 1004349cc55cSDimitry Andric CanonNum++; 1005349cc55cSDimitry Andric } 1006349cc55cSDimitry Andric } 1007349cc55cSDimitry Andric 1008e8d8bef9SDimitry Andric /// From the list of IRSimilarityCandidates, perform a comparison between each 1009e8d8bef9SDimitry Andric /// IRSimilarityCandidate to determine if there are overlapping 1010e8d8bef9SDimitry Andric /// IRInstructionData, or if they do not have the same structure. 1011e8d8bef9SDimitry Andric /// 1012e8d8bef9SDimitry Andric /// \param [in] CandsForRepSubstring - The vector containing the 1013e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 1014e8d8bef9SDimitry Andric /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 1015e8d8bef9SDimitry Andric /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 1016e8d8bef9SDimitry Andric /// vector are structurally similar to one another. 1017e8d8bef9SDimitry Andric static void findCandidateStructures( 1018e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 1019e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 1020e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 1021e8d8bef9SDimitry Andric InnerCandEndIt; 1022e8d8bef9SDimitry Andric 1023e8d8bef9SDimitry Andric // IRSimilarityCandidates each have a structure for operand use. It is 1024e8d8bef9SDimitry Andric // possible that two instances of the same subsequences have different 1025e8d8bef9SDimitry Andric // structure. Each type of structure found is assigned a number. This 1026e8d8bef9SDimitry Andric // DenseMap maps an IRSimilarityCandidate to which type of similarity 1027e8d8bef9SDimitry Andric // discovered it fits within. 1028e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1029e8d8bef9SDimitry Andric 1030e8d8bef9SDimitry Andric // Find the compatibility from each candidate to the others to determine 1031e8d8bef9SDimitry Andric // which candidates overlap and which have the same structure by mapping 1032e8d8bef9SDimitry Andric // each structure to a different group. 1033e8d8bef9SDimitry Andric bool SameStructure; 1034e8d8bef9SDimitry Andric bool Inserted; 1035e8d8bef9SDimitry Andric unsigned CurrentGroupNum = 0; 1036e8d8bef9SDimitry Andric unsigned OuterGroupNum; 1037e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 1038e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 1039e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 1040e8d8bef9SDimitry Andric 1041e8d8bef9SDimitry Andric // Iterate over the candidates to determine its structural and overlapping 1042e8d8bef9SDimitry Andric // compatibility with other instructions 1043349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 1044349cc55cSDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 1045e8d8bef9SDimitry Andric for (CandIt = CandsForRepSubstring.begin(), 1046e8d8bef9SDimitry Andric CandEndIt = CandsForRepSubstring.end(); 1047e8d8bef9SDimitry Andric CandIt != CandEndIt; CandIt++) { 1048e8d8bef9SDimitry Andric 1049e8d8bef9SDimitry Andric // Determine if it has an assigned structural group already. 1050e8d8bef9SDimitry Andric CandToGroupIt = CandToGroup.find(&*CandIt); 1051e8d8bef9SDimitry Andric if (CandToGroupIt == CandToGroup.end()) { 1052e8d8bef9SDimitry Andric // If not, we assign it one, and add it to our mapping. 1053e8d8bef9SDimitry Andric std::tie(CandToGroupIt, Inserted) = 1054e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 1055e8d8bef9SDimitry Andric } 1056e8d8bef9SDimitry Andric 1057e8d8bef9SDimitry Andric // Get the structural group number from the iterator. 1058e8d8bef9SDimitry Andric OuterGroupNum = CandToGroupIt->second; 1059e8d8bef9SDimitry Andric 1060e8d8bef9SDimitry Andric // Check if we already have a list of IRSimilarityCandidates for the current 1061e8d8bef9SDimitry Andric // structural group. Create one if one does not exist. 1062e8d8bef9SDimitry Andric CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 1063349cc55cSDimitry Andric if (CurrentGroupPair == StructuralGroups.end()) { 1064349cc55cSDimitry Andric IRSimilarityCandidate::createCanonicalMappingFor(*CandIt); 1065e8d8bef9SDimitry Andric std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 1066e8d8bef9SDimitry Andric std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 1067349cc55cSDimitry Andric } 1068e8d8bef9SDimitry Andric 1069e8d8bef9SDimitry Andric // Iterate over the IRSimilarityCandidates following the current 1070e8d8bef9SDimitry Andric // IRSimilarityCandidate in the list to determine whether the two 1071e8d8bef9SDimitry Andric // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 1072e8d8bef9SDimitry Andric // of IRSimilarityCandidates. 1073e8d8bef9SDimitry Andric for (InnerCandIt = std::next(CandIt), 1074e8d8bef9SDimitry Andric InnerCandEndIt = CandsForRepSubstring.end(); 1075e8d8bef9SDimitry Andric InnerCandIt != InnerCandEndIt; InnerCandIt++) { 1076e8d8bef9SDimitry Andric 1077e8d8bef9SDimitry Andric // We check if the inner item has a group already, if it does, we skip it. 1078e8d8bef9SDimitry Andric CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 1079e8d8bef9SDimitry Andric if (CandToGroupItInner != CandToGroup.end()) 1080e8d8bef9SDimitry Andric continue; 1081e8d8bef9SDimitry Andric 1082e8d8bef9SDimitry Andric // Otherwise we determine if they have the same structure and add it to 1083e8d8bef9SDimitry Andric // vector if they match. 1084349cc55cSDimitry Andric ValueNumberMappingA.clear(); 1085349cc55cSDimitry Andric ValueNumberMappingB.clear(); 1086349cc55cSDimitry Andric SameStructure = IRSimilarityCandidate::compareStructure( 1087349cc55cSDimitry Andric *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); 1088e8d8bef9SDimitry Andric if (!SameStructure) 1089e8d8bef9SDimitry Andric continue; 1090e8d8bef9SDimitry Andric 1091349cc55cSDimitry Andric InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA, 1092349cc55cSDimitry Andric ValueNumberMappingB); 1093e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1094e8d8bef9SDimitry Andric CurrentGroupPair->second.push_back(*InnerCandIt); 1095e8d8bef9SDimitry Andric } 1096e8d8bef9SDimitry Andric } 1097e8d8bef9SDimitry Andric } 1098e8d8bef9SDimitry Andric 1099e8d8bef9SDimitry Andric void IRSimilarityIdentifier::findCandidates( 1100e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 1101e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 1102e8d8bef9SDimitry Andric SuffixTree ST(IntegerMapping); 1103e8d8bef9SDimitry Andric 1104e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> CandsForRepSubstring; 1105e8d8bef9SDimitry Andric std::vector<SimilarityGroup> NewCandidateGroups; 1106e8d8bef9SDimitry Andric 1107e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> StructuralGroups; 1108e8d8bef9SDimitry Andric 1109e8d8bef9SDimitry Andric // Iterate over the subsequences found by the Suffix Tree to create 1110e8d8bef9SDimitry Andric // IRSimilarityCandidates for each repeated subsequence and determine which 1111e8d8bef9SDimitry Andric // instances are structurally similar to one another. 1112fe6060f1SDimitry Andric for (SuffixTree::RepeatedSubstring &RS : ST) { 1113fe6060f1SDimitry Andric createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 1114e8d8bef9SDimitry Andric CandsForRepSubstring); 1115e8d8bef9SDimitry Andric 1116e8d8bef9SDimitry Andric if (CandsForRepSubstring.size() < 2) 1117e8d8bef9SDimitry Andric continue; 1118e8d8bef9SDimitry Andric 1119e8d8bef9SDimitry Andric findCandidateStructures(CandsForRepSubstring, StructuralGroups); 1120e8d8bef9SDimitry Andric for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 1121e8d8bef9SDimitry Andric // We only add the group if it contains more than one 1122e8d8bef9SDimitry Andric // IRSimilarityCandidate. If there is only one, that means there is no 1123e8d8bef9SDimitry Andric // other repeated subsequence with the same structure. 1124e8d8bef9SDimitry Andric if (Group.second.size() > 1) 1125e8d8bef9SDimitry Andric SimilarityCandidates->push_back(Group.second); 1126e8d8bef9SDimitry Andric 1127e8d8bef9SDimitry Andric CandsForRepSubstring.clear(); 1128e8d8bef9SDimitry Andric StructuralGroups.clear(); 1129e8d8bef9SDimitry Andric NewCandidateGroups.clear(); 1130e8d8bef9SDimitry Andric } 1131e8d8bef9SDimitry Andric } 1132e8d8bef9SDimitry Andric 1133e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 1134e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> Modules) { 1135e8d8bef9SDimitry Andric resetSimilarityCandidates(); 1136e8d8bef9SDimitry Andric 1137e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 1138e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 1139349cc55cSDimitry Andric Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1140*04eeddc0SDimitry Andric Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1141*04eeddc0SDimitry Andric Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1142e8d8bef9SDimitry Andric 1143e8d8bef9SDimitry Andric populateMapper(Modules, InstrList, IntegerMapping); 1144e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 1145e8d8bef9SDimitry Andric 1146e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 1147e8d8bef9SDimitry Andric } 1148e8d8bef9SDimitry Andric 1149e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 1150e8d8bef9SDimitry Andric resetSimilarityCandidates(); 1151349cc55cSDimitry Andric Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1152*04eeddc0SDimitry Andric Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1153*04eeddc0SDimitry Andric Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1154e8d8bef9SDimitry Andric 1155e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 1156e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 1157e8d8bef9SDimitry Andric 1158e8d8bef9SDimitry Andric populateMapper(M, InstrList, IntegerMapping); 1159e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 1160e8d8bef9SDimitry Andric 1161e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 1162e8d8bef9SDimitry Andric } 1163e8d8bef9SDimitry Andric 1164e8d8bef9SDimitry Andric INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 1165e8d8bef9SDimitry Andric "ir-similarity-identifier", false, true) 1166e8d8bef9SDimitry Andric 1167e8d8bef9SDimitry Andric IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 1168e8d8bef9SDimitry Andric : ModulePass(ID) { 1169e8d8bef9SDimitry Andric initializeIRSimilarityIdentifierWrapperPassPass( 1170e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 1171e8d8bef9SDimitry Andric } 1172e8d8bef9SDimitry Andric 1173e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 1174*04eeddc0SDimitry Andric IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1175*04eeddc0SDimitry Andric MatchCallsByName)); 1176e8d8bef9SDimitry Andric return false; 1177e8d8bef9SDimitry Andric } 1178e8d8bef9SDimitry Andric 1179e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 1180e8d8bef9SDimitry Andric IRSI.reset(); 1181e8d8bef9SDimitry Andric return false; 1182e8d8bef9SDimitry Andric } 1183e8d8bef9SDimitry Andric 1184e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 1185fe6060f1SDimitry Andric IRSI->findSimilarity(M); 1186e8d8bef9SDimitry Andric return false; 1187e8d8bef9SDimitry Andric } 1188e8d8bef9SDimitry Andric 1189e8d8bef9SDimitry Andric AnalysisKey IRSimilarityAnalysis::Key; 1190e8d8bef9SDimitry Andric IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 1191e8d8bef9SDimitry Andric ModuleAnalysisManager &) { 1192e8d8bef9SDimitry Andric 1193*04eeddc0SDimitry Andric auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1194*04eeddc0SDimitry Andric MatchCallsByName); 1195fe6060f1SDimitry Andric IRSI.findSimilarity(M); 1196fe6060f1SDimitry Andric return IRSI; 1197e8d8bef9SDimitry Andric } 1198e8d8bef9SDimitry Andric 1199e8d8bef9SDimitry Andric PreservedAnalyses 1200e8d8bef9SDimitry Andric IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 1201e8d8bef9SDimitry Andric IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 1202e8d8bef9SDimitry Andric Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 1203e8d8bef9SDimitry Andric 1204e8d8bef9SDimitry Andric for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 1205e8d8bef9SDimitry Andric OS << CandVec.size() << " candidates of length " 1206e8d8bef9SDimitry Andric << CandVec.begin()->getLength() << ". Found in: \n"; 1207e8d8bef9SDimitry Andric for (IRSimilarityCandidate &Cand : CandVec) { 1208e8d8bef9SDimitry Andric OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 1209e8d8bef9SDimitry Andric << ", Basic Block: "; 1210e8d8bef9SDimitry Andric if (Cand.front()->Inst->getParent()->getName().str() == "") 1211fe6060f1SDimitry Andric OS << "(unnamed)"; 1212e8d8bef9SDimitry Andric else 1213fe6060f1SDimitry Andric OS << Cand.front()->Inst->getParent()->getName().str(); 1214fe6060f1SDimitry Andric OS << "\n Start Instruction: "; 1215fe6060f1SDimitry Andric Cand.frontInstruction()->print(OS); 1216fe6060f1SDimitry Andric OS << "\n End Instruction: "; 1217fe6060f1SDimitry Andric Cand.backInstruction()->print(OS); 1218fe6060f1SDimitry Andric OS << "\n"; 1219e8d8bef9SDimitry Andric } 1220e8d8bef9SDimitry Andric } 1221e8d8bef9SDimitry Andric 1222e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 1223e8d8bef9SDimitry Andric } 1224e8d8bef9SDimitry Andric 1225e8d8bef9SDimitry Andric char IRSimilarityIdentifierWrapperPass::ID = 0; 1226