1*e8d8bef9SDimitry Andric //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e8d8bef9SDimitry Andric // 7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8*e8d8bef9SDimitry Andric // 9*e8d8bef9SDimitry Andric // \file 10*e8d8bef9SDimitry Andric // Implementation file for the IRSimilarityIdentifier for identifying 11*e8d8bef9SDimitry Andric // similarities in IR including the IRInstructionMapper. 12*e8d8bef9SDimitry Andric // 13*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14*e8d8bef9SDimitry Andric 15*e8d8bef9SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 16*e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h" 17*e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 18*e8d8bef9SDimitry Andric #include "llvm/IR/Operator.h" 19*e8d8bef9SDimitry Andric #include "llvm/IR/User.h" 20*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 21*e8d8bef9SDimitry Andric #include "llvm/Support/SuffixTree.h" 22*e8d8bef9SDimitry Andric 23*e8d8bef9SDimitry Andric using namespace llvm; 24*e8d8bef9SDimitry Andric using namespace IRSimilarity; 25*e8d8bef9SDimitry Andric 26*e8d8bef9SDimitry Andric IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 27*e8d8bef9SDimitry Andric IRInstructionDataList &IDList) 28*e8d8bef9SDimitry Andric : Inst(&I), Legal(Legality), IDL(&IDList) { 29*e8d8bef9SDimitry Andric // We check for whether we have a comparison instruction. If it is, we 30*e8d8bef9SDimitry Andric // find the "less than" version of the predicate for consistency for 31*e8d8bef9SDimitry Andric // comparison instructions throught the program. 32*e8d8bef9SDimitry Andric if (CmpInst *C = dyn_cast<CmpInst>(&I)) { 33*e8d8bef9SDimitry Andric CmpInst::Predicate Predicate = predicateForConsistency(C); 34*e8d8bef9SDimitry Andric if (Predicate != C->getPredicate()) 35*e8d8bef9SDimitry Andric RevisedPredicate = Predicate; 36*e8d8bef9SDimitry Andric } 37*e8d8bef9SDimitry Andric 38*e8d8bef9SDimitry Andric // Here we collect the operands and their types for determining whether 39*e8d8bef9SDimitry Andric // the structure of the operand use matches between two different candidates. 40*e8d8bef9SDimitry Andric for (Use &OI : I.operands()) { 41*e8d8bef9SDimitry Andric if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { 42*e8d8bef9SDimitry Andric // If we have a CmpInst where the predicate is reversed, it means the 43*e8d8bef9SDimitry Andric // operands must be reversed as well. 44*e8d8bef9SDimitry Andric OperVals.insert(OperVals.begin(), OI.get()); 45*e8d8bef9SDimitry Andric continue; 46*e8d8bef9SDimitry Andric } 47*e8d8bef9SDimitry Andric 48*e8d8bef9SDimitry Andric OperVals.push_back(OI.get()); 49*e8d8bef9SDimitry Andric } 50*e8d8bef9SDimitry Andric } 51*e8d8bef9SDimitry Andric 52*e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 53*e8d8bef9SDimitry Andric switch (CI->getPredicate()) { 54*e8d8bef9SDimitry Andric case CmpInst::FCMP_OGT: 55*e8d8bef9SDimitry Andric case CmpInst::FCMP_UGT: 56*e8d8bef9SDimitry Andric case CmpInst::FCMP_OGE: 57*e8d8bef9SDimitry Andric case CmpInst::FCMP_UGE: 58*e8d8bef9SDimitry Andric case CmpInst::ICMP_SGT: 59*e8d8bef9SDimitry Andric case CmpInst::ICMP_UGT: 60*e8d8bef9SDimitry Andric case CmpInst::ICMP_SGE: 61*e8d8bef9SDimitry Andric case CmpInst::ICMP_UGE: 62*e8d8bef9SDimitry Andric return CI->getSwappedPredicate(); 63*e8d8bef9SDimitry Andric default: 64*e8d8bef9SDimitry Andric return CI->getPredicate(); 65*e8d8bef9SDimitry Andric } 66*e8d8bef9SDimitry Andric } 67*e8d8bef9SDimitry Andric 68*e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::getPredicate() const { 69*e8d8bef9SDimitry Andric assert(isa<CmpInst>(Inst) && 70*e8d8bef9SDimitry Andric "Can only get a predicate from a compare instruction"); 71*e8d8bef9SDimitry Andric 72*e8d8bef9SDimitry Andric if (RevisedPredicate.hasValue()) 73*e8d8bef9SDimitry Andric return RevisedPredicate.getValue(); 74*e8d8bef9SDimitry Andric 75*e8d8bef9SDimitry Andric return cast<CmpInst>(Inst)->getPredicate(); 76*e8d8bef9SDimitry Andric } 77*e8d8bef9SDimitry Andric 78*e8d8bef9SDimitry Andric static StringRef getCalledFunctionName(CallInst &CI) { 79*e8d8bef9SDimitry Andric assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); 80*e8d8bef9SDimitry Andric 81*e8d8bef9SDimitry Andric return CI.getCalledFunction()->getName(); 82*e8d8bef9SDimitry Andric } 83*e8d8bef9SDimitry Andric 84*e8d8bef9SDimitry Andric bool IRSimilarity::isClose(const IRInstructionData &A, 85*e8d8bef9SDimitry Andric const IRInstructionData &B) { 86*e8d8bef9SDimitry Andric 87*e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 88*e8d8bef9SDimitry Andric return false; 89*e8d8bef9SDimitry Andric 90*e8d8bef9SDimitry Andric // Check if we are performing the same sort of operation on the same types 91*e8d8bef9SDimitry Andric // but not on the same values. 92*e8d8bef9SDimitry Andric if (!A.Inst->isSameOperationAs(B.Inst)) { 93*e8d8bef9SDimitry Andric // If there is a predicate, this means that either there is a swapped 94*e8d8bef9SDimitry Andric // predicate, or that the types are different, we want to make sure that 95*e8d8bef9SDimitry Andric // the predicates are equivalent via swapping. 96*e8d8bef9SDimitry Andric if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 97*e8d8bef9SDimitry Andric 98*e8d8bef9SDimitry Andric if (A.getPredicate() != B.getPredicate()) 99*e8d8bef9SDimitry Andric return false; 100*e8d8bef9SDimitry Andric 101*e8d8bef9SDimitry Andric // If the predicates are the same via swap, make sure that the types are 102*e8d8bef9SDimitry Andric // still the same. 103*e8d8bef9SDimitry Andric auto ZippedTypes = zip(A.OperVals, B.OperVals); 104*e8d8bef9SDimitry Andric 105*e8d8bef9SDimitry Andric return all_of( 106*e8d8bef9SDimitry Andric ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 107*e8d8bef9SDimitry Andric return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 108*e8d8bef9SDimitry Andric }); 109*e8d8bef9SDimitry Andric } 110*e8d8bef9SDimitry Andric 111*e8d8bef9SDimitry Andric return false; 112*e8d8bef9SDimitry Andric } 113*e8d8bef9SDimitry Andric 114*e8d8bef9SDimitry Andric // Since any GEP Instruction operands after the first operand cannot be 115*e8d8bef9SDimitry Andric // defined by a register, we must make sure that the operands after the first 116*e8d8bef9SDimitry Andric // are the same in the two instructions 117*e8d8bef9SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 118*e8d8bef9SDimitry Andric auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 119*e8d8bef9SDimitry Andric 120*e8d8bef9SDimitry Andric // If the instructions do not have the same inbounds restrictions, we do 121*e8d8bef9SDimitry Andric // not consider them the same. 122*e8d8bef9SDimitry Andric if (GEP->isInBounds() != OtherGEP->isInBounds()) 123*e8d8bef9SDimitry Andric return false; 124*e8d8bef9SDimitry Andric 125*e8d8bef9SDimitry Andric auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 126*e8d8bef9SDimitry Andric 127*e8d8bef9SDimitry Andric // We increment here since we do not care about the first instruction, 128*e8d8bef9SDimitry Andric // we only care about the following operands since they must be the 129*e8d8bef9SDimitry Andric // exact same to be considered similar. 130*e8d8bef9SDimitry Andric return all_of(drop_begin(ZippedOperands), 131*e8d8bef9SDimitry Andric [](std::tuple<llvm::Use &, llvm::Use &> R) { 132*e8d8bef9SDimitry Andric return std::get<0>(R) == std::get<1>(R); 133*e8d8bef9SDimitry Andric }); 134*e8d8bef9SDimitry Andric } 135*e8d8bef9SDimitry Andric 136*e8d8bef9SDimitry Andric // If the instructions are functions, we make sure that the function name is 137*e8d8bef9SDimitry Andric // the same. We already know that the types are since is isSameOperationAs is 138*e8d8bef9SDimitry Andric // true. 139*e8d8bef9SDimitry Andric if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 140*e8d8bef9SDimitry Andric CallInst *CIA = cast<CallInst>(A.Inst); 141*e8d8bef9SDimitry Andric CallInst *CIB = cast<CallInst>(B.Inst); 142*e8d8bef9SDimitry Andric if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) 143*e8d8bef9SDimitry Andric return false; 144*e8d8bef9SDimitry Andric } 145*e8d8bef9SDimitry Andric 146*e8d8bef9SDimitry Andric return true; 147*e8d8bef9SDimitry Andric } 148*e8d8bef9SDimitry Andric 149*e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 150*e8d8bef9SDimitry Andric // into the same interface. 151*e8d8bef9SDimitry Andric void IRInstructionMapper::convertToUnsignedVec( 152*e8d8bef9SDimitry Andric BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 153*e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 154*e8d8bef9SDimitry Andric BasicBlock::iterator It = BB.begin(); 155*e8d8bef9SDimitry Andric 156*e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForBB; 157*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForBB; 158*e8d8bef9SDimitry Andric 159*e8d8bef9SDimitry Andric HaveLegalRange = false; 160*e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 161*e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 162*e8d8bef9SDimitry Andric 163*e8d8bef9SDimitry Andric for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 164*e8d8bef9SDimitry Andric switch (InstClassifier.visit(*It)) { 165*e8d8bef9SDimitry Andric case InstrType::Legal: 166*e8d8bef9SDimitry Andric mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 167*e8d8bef9SDimitry Andric break; 168*e8d8bef9SDimitry Andric case InstrType::Illegal: 169*e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 170*e8d8bef9SDimitry Andric break; 171*e8d8bef9SDimitry Andric case InstrType::Invisible: 172*e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 173*e8d8bef9SDimitry Andric break; 174*e8d8bef9SDimitry Andric } 175*e8d8bef9SDimitry Andric } 176*e8d8bef9SDimitry Andric 177*e8d8bef9SDimitry Andric if (HaveLegalRange) { 178*e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 179*e8d8bef9SDimitry Andric for_each(InstrListForBB, 180*e8d8bef9SDimitry Andric [this](IRInstructionData *ID) { this->IDL->push_back(*ID); }); 181*e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForBB); 182*e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForBB); 183*e8d8bef9SDimitry Andric } 184*e8d8bef9SDimitry Andric } 185*e8d8bef9SDimitry Andric 186*e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 187*e8d8bef9SDimitry Andric // into the same interface. 188*e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToLegalUnsigned( 189*e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 190*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB) { 191*e8d8bef9SDimitry Andric // We added something legal, so we should unset the AddedLegalLastTime 192*e8d8bef9SDimitry Andric // flag. 193*e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 194*e8d8bef9SDimitry Andric 195*e8d8bef9SDimitry Andric // If we have at least two adjacent legal instructions (which may have 196*e8d8bef9SDimitry Andric // invisible instructions in between), remember that. 197*e8d8bef9SDimitry Andric if (CanCombineWithPrevInstr) 198*e8d8bef9SDimitry Andric HaveLegalRange = true; 199*e8d8bef9SDimitry Andric CanCombineWithPrevInstr = true; 200*e8d8bef9SDimitry Andric 201*e8d8bef9SDimitry Andric // Get the integer for this instruction or give it the current 202*e8d8bef9SDimitry Andric // LegalInstrNumber. 203*e8d8bef9SDimitry Andric IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 204*e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 205*e8d8bef9SDimitry Andric 206*e8d8bef9SDimitry Andric // Add to the instruction list 207*e8d8bef9SDimitry Andric bool WasInserted; 208*e8d8bef9SDimitry Andric DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 209*e8d8bef9SDimitry Andric ResultIt; 210*e8d8bef9SDimitry Andric std::tie(ResultIt, WasInserted) = 211*e8d8bef9SDimitry Andric InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 212*e8d8bef9SDimitry Andric unsigned INumber = ResultIt->second; 213*e8d8bef9SDimitry Andric 214*e8d8bef9SDimitry Andric // There was an insertion. 215*e8d8bef9SDimitry Andric if (WasInserted) 216*e8d8bef9SDimitry Andric LegalInstrNumber++; 217*e8d8bef9SDimitry Andric 218*e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(INumber); 219*e8d8bef9SDimitry Andric 220*e8d8bef9SDimitry Andric // Make sure we don't overflow or use any integers reserved by the DenseMap. 221*e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 222*e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 223*e8d8bef9SDimitry Andric 224*e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 225*e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 226*e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 227*e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 228*e8d8bef9SDimitry Andric 229*e8d8bef9SDimitry Andric return INumber; 230*e8d8bef9SDimitry Andric } 231*e8d8bef9SDimitry Andric 232*e8d8bef9SDimitry Andric IRInstructionData * 233*e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 234*e8d8bef9SDimitry Andric IRInstructionDataList &IDL) { 235*e8d8bef9SDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 236*e8d8bef9SDimitry Andric } 237*e8d8bef9SDimitry Andric 238*e8d8bef9SDimitry Andric IRInstructionDataList * 239*e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionDataList() { 240*e8d8bef9SDimitry Andric return new (IDLAllocator->Allocate()) IRInstructionDataList(); 241*e8d8bef9SDimitry Andric } 242*e8d8bef9SDimitry Andric 243*e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 244*e8d8bef9SDimitry Andric // into the same interface. 245*e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToIllegalUnsigned( 246*e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 247*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB, bool End) { 248*e8d8bef9SDimitry Andric // Can't combine an illegal instruction. Set the flag. 249*e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 250*e8d8bef9SDimitry Andric 251*e8d8bef9SDimitry Andric // Only add one illegal number per range of legal numbers. 252*e8d8bef9SDimitry Andric if (AddedIllegalLastTime) 253*e8d8bef9SDimitry Andric return IllegalInstrNumber; 254*e8d8bef9SDimitry Andric 255*e8d8bef9SDimitry Andric IRInstructionData *ID = nullptr; 256*e8d8bef9SDimitry Andric if (!End) 257*e8d8bef9SDimitry Andric ID = allocateIRInstructionData(*It, false, *IDL); 258*e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 259*e8d8bef9SDimitry Andric 260*e8d8bef9SDimitry Andric // Remember that we added an illegal number last time. 261*e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 262*e8d8bef9SDimitry Andric unsigned INumber = IllegalInstrNumber; 263*e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(IllegalInstrNumber--); 264*e8d8bef9SDimitry Andric 265*e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 266*e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 267*e8d8bef9SDimitry Andric 268*e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 269*e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 270*e8d8bef9SDimitry Andric 271*e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 272*e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 273*e8d8bef9SDimitry Andric 274*e8d8bef9SDimitry Andric return INumber; 275*e8d8bef9SDimitry Andric } 276*e8d8bef9SDimitry Andric 277*e8d8bef9SDimitry Andric IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 278*e8d8bef9SDimitry Andric IRInstructionData *FirstInstIt, 279*e8d8bef9SDimitry Andric IRInstructionData *LastInstIt) 280*e8d8bef9SDimitry Andric : StartIdx(StartIdx), Len(Len) { 281*e8d8bef9SDimitry Andric 282*e8d8bef9SDimitry Andric assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 283*e8d8bef9SDimitry Andric assert(LastInstIt != nullptr && "Instruction is nullptr!"); 284*e8d8bef9SDimitry Andric assert(StartIdx + Len > StartIdx && 285*e8d8bef9SDimitry Andric "Overflow for IRSimilarityCandidate range?"); 286*e8d8bef9SDimitry Andric assert(Len - 1 == static_cast<unsigned>(std::distance( 287*e8d8bef9SDimitry Andric iterator(FirstInstIt), iterator(LastInstIt))) && 288*e8d8bef9SDimitry Andric "Length of the first and last IRInstructionData do not match the " 289*e8d8bef9SDimitry Andric "given length"); 290*e8d8bef9SDimitry Andric 291*e8d8bef9SDimitry Andric // We iterate over the given instructions, and map each unique value 292*e8d8bef9SDimitry Andric // to a unique number in the IRSimilarityCandidate ValueToNumber and 293*e8d8bef9SDimitry Andric // NumberToValue maps. A constant get its own value globally, the individual 294*e8d8bef9SDimitry Andric // uses of the constants are not considered to be unique. 295*e8d8bef9SDimitry Andric // 296*e8d8bef9SDimitry Andric // IR: Mapping Added: 297*e8d8bef9SDimitry Andric // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 298*e8d8bef9SDimitry Andric // %add2 = add i32 %a, %1 %add2 -> 4 299*e8d8bef9SDimitry Andric // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 300*e8d8bef9SDimitry Andric // 301*e8d8bef9SDimitry Andric // when replace with global values, starting from 1, would be 302*e8d8bef9SDimitry Andric // 303*e8d8bef9SDimitry Andric // 3 = add i32 1, 2 304*e8d8bef9SDimitry Andric // 4 = add i32 1, 3 305*e8d8bef9SDimitry Andric // 6 = add i32 5, 2 306*e8d8bef9SDimitry Andric unsigned LocalValNumber = 1; 307*e8d8bef9SDimitry Andric IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 308*e8d8bef9SDimitry Andric for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 309*e8d8bef9SDimitry Andric // Map the operand values to an unsigned integer if it does not already 310*e8d8bef9SDimitry Andric // have an unsigned integer assigned to it. 311*e8d8bef9SDimitry Andric for (Value *Arg : ID->OperVals) 312*e8d8bef9SDimitry Andric if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 313*e8d8bef9SDimitry Andric ValueToNumber.try_emplace(Arg, LocalValNumber); 314*e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, Arg); 315*e8d8bef9SDimitry Andric LocalValNumber++; 316*e8d8bef9SDimitry Andric } 317*e8d8bef9SDimitry Andric 318*e8d8bef9SDimitry Andric // Mapping the instructions to an unsigned integer if it is not already 319*e8d8bef9SDimitry Andric // exist in the mapping. 320*e8d8bef9SDimitry Andric if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 321*e8d8bef9SDimitry Andric ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 322*e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, ID->Inst); 323*e8d8bef9SDimitry Andric LocalValNumber++; 324*e8d8bef9SDimitry Andric } 325*e8d8bef9SDimitry Andric } 326*e8d8bef9SDimitry Andric 327*e8d8bef9SDimitry Andric // Setting the first and last instruction data pointers for the candidate. If 328*e8d8bef9SDimitry Andric // we got through the entire for loop without hitting an assert, we know 329*e8d8bef9SDimitry Andric // that both of these instructions are not nullptrs. 330*e8d8bef9SDimitry Andric FirstInst = FirstInstIt; 331*e8d8bef9SDimitry Andric LastInst = LastInstIt; 332*e8d8bef9SDimitry Andric } 333*e8d8bef9SDimitry Andric 334*e8d8bef9SDimitry Andric bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 335*e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 336*e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 337*e8d8bef9SDimitry Andric return false; 338*e8d8bef9SDimitry Andric 339*e8d8bef9SDimitry Andric auto InstrDataForBoth = 340*e8d8bef9SDimitry Andric zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 341*e8d8bef9SDimitry Andric 342*e8d8bef9SDimitry Andric return all_of(InstrDataForBoth, 343*e8d8bef9SDimitry Andric [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 344*e8d8bef9SDimitry Andric IRInstructionData &A = std::get<0>(R); 345*e8d8bef9SDimitry Andric IRInstructionData &B = std::get<1>(R); 346*e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 347*e8d8bef9SDimitry Andric return false; 348*e8d8bef9SDimitry Andric return isClose(A, B); 349*e8d8bef9SDimitry Andric }); 350*e8d8bef9SDimitry Andric } 351*e8d8bef9SDimitry Andric 352*e8d8bef9SDimitry Andric /// Determine if one or more of the assigned global value numbers for the 353*e8d8bef9SDimitry Andric /// operands in \p TargetValueNumbers is in the current mapping set for operand 354*e8d8bef9SDimitry Andric /// numbers in \p SourceOperands. The set of possible corresponding global 355*e8d8bef9SDimitry Andric /// value numbers are replaced with the most recent version of compatible 356*e8d8bef9SDimitry Andric /// values. 357*e8d8bef9SDimitry Andric /// 358*e8d8bef9SDimitry Andric /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 359*e8d8bef9SDimitry Andric /// value number for the source IRInstructionCandidate. 360*e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 361*e8d8bef9SDimitry Andric /// IRSimilarityCandidate global value numbers to a set of possible numbers in 362*e8d8bef9SDimitry Andric /// the target. 363*e8d8bef9SDimitry Andric /// \param [in] SourceOperands - The operands in the original 364*e8d8bef9SDimitry Andric /// IRSimilarityCandidate in the current instruction. 365*e8d8bef9SDimitry Andric /// \param [in] TargetValueNumbers - The global value numbers of the operands in 366*e8d8bef9SDimitry Andric /// the corresponding Instruction in the other IRSimilarityCandidate. 367*e8d8bef9SDimitry Andric /// \returns true if there exists a possible mapping between the source 368*e8d8bef9SDimitry Andric /// Instruction operands and the target Instruction operands, and false if not. 369*e8d8bef9SDimitry Andric static bool checkNumberingAndReplaceCommutative( 370*e8d8bef9SDimitry Andric const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 371*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 372*e8d8bef9SDimitry Andric ArrayRef<Value *> &SourceOperands, 373*e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetValueNumbers){ 374*e8d8bef9SDimitry Andric 375*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 376*e8d8bef9SDimitry Andric 377*e8d8bef9SDimitry Andric unsigned ArgVal; 378*e8d8bef9SDimitry Andric bool WasInserted; 379*e8d8bef9SDimitry Andric 380*e8d8bef9SDimitry Andric // Iterate over the operands in the source IRSimilarityCandidate to determine 381*e8d8bef9SDimitry Andric // whether there exists an operand in the other IRSimilarityCandidate that 382*e8d8bef9SDimitry Andric // creates a valid mapping of Value to Value between the 383*e8d8bef9SDimitry Andric // IRSimilarityCaniddates. 384*e8d8bef9SDimitry Andric for (Value *V : SourceOperands) { 385*e8d8bef9SDimitry Andric ArgVal = SourceValueToNumberMapping.find(V)->second; 386*e8d8bef9SDimitry Andric 387*e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 388*e8d8bef9SDimitry Andric std::make_pair(ArgVal, TargetValueNumbers)); 389*e8d8bef9SDimitry Andric 390*e8d8bef9SDimitry Andric // Instead of finding a current mapping, we inserted a set. This means a 391*e8d8bef9SDimitry Andric // mapping did not exist for the source Instruction operand, it has no 392*e8d8bef9SDimitry Andric // current constraints we need to check. 393*e8d8bef9SDimitry Andric if (WasInserted) 394*e8d8bef9SDimitry Andric continue; 395*e8d8bef9SDimitry Andric 396*e8d8bef9SDimitry Andric // If a mapping already exists for the source operand to the values in the 397*e8d8bef9SDimitry Andric // other IRSimilarityCandidate we need to iterate over the items in other 398*e8d8bef9SDimitry Andric // IRSimilarityCandidate's Instruction to determine whether there is a valid 399*e8d8bef9SDimitry Andric // mapping of Value to Value. 400*e8d8bef9SDimitry Andric DenseSet<unsigned> NewSet; 401*e8d8bef9SDimitry Andric for (unsigned &Curr : ValueMappingIt->second) 402*e8d8bef9SDimitry Andric // If we can find the value in the mapping, we add it to the new set. 403*e8d8bef9SDimitry Andric if (TargetValueNumbers.contains(Curr)) 404*e8d8bef9SDimitry Andric NewSet.insert(Curr); 405*e8d8bef9SDimitry Andric 406*e8d8bef9SDimitry Andric // If we could not find a Value, return 0. 407*e8d8bef9SDimitry Andric if (NewSet.empty()) 408*e8d8bef9SDimitry Andric return false; 409*e8d8bef9SDimitry Andric 410*e8d8bef9SDimitry Andric // Otherwise replace the old mapping with the newly constructed one. 411*e8d8bef9SDimitry Andric if (NewSet.size() != ValueMappingIt->second.size()) 412*e8d8bef9SDimitry Andric ValueMappingIt->second.swap(NewSet); 413*e8d8bef9SDimitry Andric 414*e8d8bef9SDimitry Andric // We have reached no conclusions about the mapping, and cannot remove 415*e8d8bef9SDimitry Andric // any items from the other operands, so we move to check the next operand. 416*e8d8bef9SDimitry Andric if (ValueMappingIt->second.size() != 1) 417*e8d8bef9SDimitry Andric continue; 418*e8d8bef9SDimitry Andric 419*e8d8bef9SDimitry Andric 420*e8d8bef9SDimitry Andric unsigned ValToRemove = *ValueMappingIt->second.begin(); 421*e8d8bef9SDimitry Andric // When there is only one item left in the mapping for and operand, remove 422*e8d8bef9SDimitry Andric // the value from the other operands. If it results in there being no 423*e8d8bef9SDimitry Andric // mapping, return false, it means the mapping is wrong 424*e8d8bef9SDimitry Andric for (Value *InnerV : SourceOperands) { 425*e8d8bef9SDimitry Andric if (V == InnerV) 426*e8d8bef9SDimitry Andric continue; 427*e8d8bef9SDimitry Andric 428*e8d8bef9SDimitry Andric unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 429*e8d8bef9SDimitry Andric ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 430*e8d8bef9SDimitry Andric if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 431*e8d8bef9SDimitry Andric continue; 432*e8d8bef9SDimitry Andric 433*e8d8bef9SDimitry Andric ValueMappingIt->second.erase(ValToRemove); 434*e8d8bef9SDimitry Andric if (ValueMappingIt->second.empty()) 435*e8d8bef9SDimitry Andric return false; 436*e8d8bef9SDimitry Andric } 437*e8d8bef9SDimitry Andric } 438*e8d8bef9SDimitry Andric 439*e8d8bef9SDimitry Andric return true; 440*e8d8bef9SDimitry Andric } 441*e8d8bef9SDimitry Andric 442*e8d8bef9SDimitry Andric /// Determine if operand number \p TargetArgVal is in the current mapping set 443*e8d8bef9SDimitry Andric /// for operand number \p SourceArgVal. 444*e8d8bef9SDimitry Andric /// 445*e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 446*e8d8bef9SDimitry Andric /// value numbers from source IRSimilarityCandidate to target 447*e8d8bef9SDimitry Andric /// IRSimilarityCandidate. 448*e8d8bef9SDimitry Andric /// \param [in] SourceArgVal The global value number for an operand in the 449*e8d8bef9SDimitry Andric /// in the original candidate. 450*e8d8bef9SDimitry Andric /// \param [in] TargetArgVal The global value number for the corresponding 451*e8d8bef9SDimitry Andric /// operand in the other candidate. 452*e8d8bef9SDimitry Andric /// \returns True if there exists a mapping and false if not. 453*e8d8bef9SDimitry Andric bool checkNumberingAndReplace( 454*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 455*e8d8bef9SDimitry Andric unsigned SourceArgVal, unsigned TargetArgVal) { 456*e8d8bef9SDimitry Andric // We are given two unsigned integers representing the global values of 457*e8d8bef9SDimitry Andric // the operands in different IRSimilarityCandidates and a current mapping 458*e8d8bef9SDimitry Andric // between the two. 459*e8d8bef9SDimitry Andric // 460*e8d8bef9SDimitry Andric // Source Operand GVN: 1 461*e8d8bef9SDimitry Andric // Target Operand GVN: 2 462*e8d8bef9SDimitry Andric // CurrentMapping: {1: {1, 2}} 463*e8d8bef9SDimitry Andric // 464*e8d8bef9SDimitry Andric // Since we have mapping, and the target operand is contained in the set, we 465*e8d8bef9SDimitry Andric // update it to: 466*e8d8bef9SDimitry Andric // CurrentMapping: {1: {2}} 467*e8d8bef9SDimitry Andric // and can return true. But, if the mapping was 468*e8d8bef9SDimitry Andric // CurrentMapping: {1: {3}} 469*e8d8bef9SDimitry Andric // we would return false. 470*e8d8bef9SDimitry Andric 471*e8d8bef9SDimitry Andric bool WasInserted; 472*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 473*e8d8bef9SDimitry Andric 474*e8d8bef9SDimitry Andric std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 475*e8d8bef9SDimitry Andric std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 476*e8d8bef9SDimitry Andric 477*e8d8bef9SDimitry Andric // If we created a new mapping, then we are done. 478*e8d8bef9SDimitry Andric if (WasInserted) 479*e8d8bef9SDimitry Andric return true; 480*e8d8bef9SDimitry Andric 481*e8d8bef9SDimitry Andric // If there is more than one option in the mapping set, and the target value 482*e8d8bef9SDimitry Andric // is included in the mapping set replace that set with one that only includes 483*e8d8bef9SDimitry Andric // the target value, as it is the only valid mapping via the non commutative 484*e8d8bef9SDimitry Andric // instruction. 485*e8d8bef9SDimitry Andric 486*e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetSet = Val->second; 487*e8d8bef9SDimitry Andric if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 488*e8d8bef9SDimitry Andric TargetSet.clear(); 489*e8d8bef9SDimitry Andric TargetSet.insert(TargetArgVal); 490*e8d8bef9SDimitry Andric return true; 491*e8d8bef9SDimitry Andric } 492*e8d8bef9SDimitry Andric 493*e8d8bef9SDimitry Andric // Return true if we can find the value in the set. 494*e8d8bef9SDimitry Andric return TargetSet.contains(TargetArgVal); 495*e8d8bef9SDimitry Andric } 496*e8d8bef9SDimitry Andric 497*e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 498*e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 499*e8d8bef9SDimitry Andric // Iterators to keep track of where we are in the operands for each 500*e8d8bef9SDimitry Andric // Instruction. 501*e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 502*e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 503*e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 504*e8d8bef9SDimitry Andric 505*e8d8bef9SDimitry Andric // For each operand, get the value numbering and ensure it is consistent. 506*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 507*e8d8bef9SDimitry Andric unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 508*e8d8bef9SDimitry Andric unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 509*e8d8bef9SDimitry Andric 510*e8d8bef9SDimitry Andric // Attempt to add a set with only the target value. If there is no mapping 511*e8d8bef9SDimitry Andric // we can create it here. 512*e8d8bef9SDimitry Andric // 513*e8d8bef9SDimitry Andric // For an instruction like a subtraction: 514*e8d8bef9SDimitry Andric // IRSimilarityCandidateA: IRSimilarityCandidateB: 515*e8d8bef9SDimitry Andric // %resultA = sub %a, %b %resultB = sub %d, %e 516*e8d8bef9SDimitry Andric // 517*e8d8bef9SDimitry Andric // We map %a -> %d and %b -> %e. 518*e8d8bef9SDimitry Andric // 519*e8d8bef9SDimitry Andric // And check to see whether their mapping is consistent in 520*e8d8bef9SDimitry Andric // checkNumberingAndReplace. 521*e8d8bef9SDimitry Andric 522*e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 523*e8d8bef9SDimitry Andric return false; 524*e8d8bef9SDimitry Andric 525*e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 526*e8d8bef9SDimitry Andric return false; 527*e8d8bef9SDimitry Andric } 528*e8d8bef9SDimitry Andric return true; 529*e8d8bef9SDimitry Andric } 530*e8d8bef9SDimitry Andric 531*e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareCommutativeOperandMapping( 532*e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 533*e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersA; 534*e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersB; 535*e8d8bef9SDimitry Andric 536*e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 537*e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 538*e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 539*e8d8bef9SDimitry Andric 540*e8d8bef9SDimitry Andric // Find the value number sets for the operands. 541*e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; 542*e8d8bef9SDimitry Andric Idx++, VItA++, VItB++) { 543*e8d8bef9SDimitry Andric ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 544*e8d8bef9SDimitry Andric ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 545*e8d8bef9SDimitry Andric } 546*e8d8bef9SDimitry Andric 547*e8d8bef9SDimitry Andric // Iterate over the operands in the first IRSimilarityCandidate and make sure 548*e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the second 549*e8d8bef9SDimitry Andric // IRSimilarityCandidate. 550*e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 551*e8d8bef9SDimitry Andric A.ValueNumberMapping, A.OperVals, 552*e8d8bef9SDimitry Andric ValueNumbersB)) 553*e8d8bef9SDimitry Andric return false; 554*e8d8bef9SDimitry Andric 555*e8d8bef9SDimitry Andric // Iterate over the operands in the second IRSimilarityCandidate and make sure 556*e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the first 557*e8d8bef9SDimitry Andric // IRSimilarityCandidate. 558*e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 559*e8d8bef9SDimitry Andric B.ValueNumberMapping, B.OperVals, 560*e8d8bef9SDimitry Andric ValueNumbersA)) 561*e8d8bef9SDimitry Andric return false; 562*e8d8bef9SDimitry Andric 563*e8d8bef9SDimitry Andric return true; 564*e8d8bef9SDimitry Andric } 565*e8d8bef9SDimitry Andric 566*e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 567*e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 568*e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 569*e8d8bef9SDimitry Andric return false; 570*e8d8bef9SDimitry Andric 571*e8d8bef9SDimitry Andric if (A.ValueToNumber.size() != B.ValueToNumber.size()) 572*e8d8bef9SDimitry Andric return false; 573*e8d8bef9SDimitry Andric 574*e8d8bef9SDimitry Andric iterator ItA = A.begin(); 575*e8d8bef9SDimitry Andric iterator ItB = B.begin(); 576*e8d8bef9SDimitry Andric 577*e8d8bef9SDimitry Andric // These sets create a create a mapping between the values in one candidate 578*e8d8bef9SDimitry Andric // to values in the other candidate. If we create a set with one element, 579*e8d8bef9SDimitry Andric // and that same element maps to the original element in the candidate 580*e8d8bef9SDimitry Andric // we have a good mapping. 581*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 582*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 583*e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 584*e8d8bef9SDimitry Andric 585*e8d8bef9SDimitry Andric bool WasInserted; 586*e8d8bef9SDimitry Andric 587*e8d8bef9SDimitry Andric // Iterate over the instructions contained in each candidate 588*e8d8bef9SDimitry Andric unsigned SectionLength = A.getStartIdx() + A.getLength(); 589*e8d8bef9SDimitry Andric for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 590*e8d8bef9SDimitry Andric ItA++, ItB++, Loc++) { 591*e8d8bef9SDimitry Andric // Make sure the instructions are similar to one another. 592*e8d8bef9SDimitry Andric if (!isClose(*ItA, *ItB)) 593*e8d8bef9SDimitry Andric return false; 594*e8d8bef9SDimitry Andric 595*e8d8bef9SDimitry Andric Instruction *IA = ItA->Inst; 596*e8d8bef9SDimitry Andric Instruction *IB = ItB->Inst; 597*e8d8bef9SDimitry Andric 598*e8d8bef9SDimitry Andric if (!ItA->Legal || !ItB->Legal) 599*e8d8bef9SDimitry Andric return false; 600*e8d8bef9SDimitry Andric 601*e8d8bef9SDimitry Andric // Get the operand sets for the instructions. 602*e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsA = ItA->OperVals; 603*e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsB = ItB->OperVals; 604*e8d8bef9SDimitry Andric 605*e8d8bef9SDimitry Andric unsigned InstValA = A.ValueToNumber.find(IA)->second; 606*e8d8bef9SDimitry Andric unsigned InstValB = B.ValueToNumber.find(IB)->second; 607*e8d8bef9SDimitry Andric 608*e8d8bef9SDimitry Andric // Ensure that the mappings for the instructions exists. 609*e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 610*e8d8bef9SDimitry Andric std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 611*e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 612*e8d8bef9SDimitry Andric return false; 613*e8d8bef9SDimitry Andric 614*e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 615*e8d8bef9SDimitry Andric std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 616*e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 617*e8d8bef9SDimitry Andric return false; 618*e8d8bef9SDimitry Andric 619*e8d8bef9SDimitry Andric // We have different paths for commutative instructions and non-commutative 620*e8d8bef9SDimitry Andric // instructions since commutative instructions could allow multiple mappings 621*e8d8bef9SDimitry Andric // to certain values. 622*e8d8bef9SDimitry Andric if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 623*e8d8bef9SDimitry Andric if (!compareCommutativeOperandMapping( 624*e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 625*e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 626*e8d8bef9SDimitry Andric return false; 627*e8d8bef9SDimitry Andric continue; 628*e8d8bef9SDimitry Andric } 629*e8d8bef9SDimitry Andric 630*e8d8bef9SDimitry Andric // Handle the non-commutative cases. 631*e8d8bef9SDimitry Andric if (!compareNonCommutativeOperandMapping( 632*e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 633*e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 634*e8d8bef9SDimitry Andric return false; 635*e8d8bef9SDimitry Andric } 636*e8d8bef9SDimitry Andric return true; 637*e8d8bef9SDimitry Andric } 638*e8d8bef9SDimitry Andric 639*e8d8bef9SDimitry Andric bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 640*e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 641*e8d8bef9SDimitry Andric auto DoesOverlap = [](const IRSimilarityCandidate &X, 642*e8d8bef9SDimitry Andric const IRSimilarityCandidate &Y) { 643*e8d8bef9SDimitry Andric // Check: 644*e8d8bef9SDimitry Andric // XXXXXX X starts before Y ends 645*e8d8bef9SDimitry Andric // YYYYYYY Y starts after X starts 646*e8d8bef9SDimitry Andric return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 647*e8d8bef9SDimitry Andric }; 648*e8d8bef9SDimitry Andric 649*e8d8bef9SDimitry Andric return DoesOverlap(A, B) || DoesOverlap(B, A); 650*e8d8bef9SDimitry Andric } 651*e8d8bef9SDimitry Andric 652*e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 653*e8d8bef9SDimitry Andric Module &M, std::vector<IRInstructionData *> &InstrList, 654*e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 655*e8d8bef9SDimitry Andric 656*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForModule; 657*e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForModule; 658*e8d8bef9SDimitry Andric // Iterate over the functions in the module to map each Instruction in each 659*e8d8bef9SDimitry Andric // BasicBlock to an unsigned integer. 660*e8d8bef9SDimitry Andric for (Function &F : M) { 661*e8d8bef9SDimitry Andric 662*e8d8bef9SDimitry Andric if (F.empty()) 663*e8d8bef9SDimitry Andric continue; 664*e8d8bef9SDimitry Andric 665*e8d8bef9SDimitry Andric for (BasicBlock &BB : F) { 666*e8d8bef9SDimitry Andric 667*e8d8bef9SDimitry Andric if (BB.sizeWithoutDebug() < 2) 668*e8d8bef9SDimitry Andric continue; 669*e8d8bef9SDimitry Andric 670*e8d8bef9SDimitry Andric // BB has potential to have similarity since it has a size greater than 2 671*e8d8bef9SDimitry Andric // and can therefore match other regions greater than 2. Map it to a list 672*e8d8bef9SDimitry Andric // of unsigned integers. 673*e8d8bef9SDimitry Andric Mapper.convertToUnsignedVec(BB, InstrListForModule, 674*e8d8bef9SDimitry Andric IntegerMappingForModule); 675*e8d8bef9SDimitry Andric } 676*e8d8bef9SDimitry Andric } 677*e8d8bef9SDimitry Andric 678*e8d8bef9SDimitry Andric // Insert the InstrListForModule at the end of the overall InstrList so that 679*e8d8bef9SDimitry Andric // we can have a long InstrList for the entire set of Modules being analyzed. 680*e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForModule); 681*e8d8bef9SDimitry Andric // Do the same as above, but for IntegerMapping. 682*e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForModule); 683*e8d8bef9SDimitry Andric } 684*e8d8bef9SDimitry Andric 685*e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 686*e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> &Modules, 687*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 688*e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 689*e8d8bef9SDimitry Andric 690*e8d8bef9SDimitry Andric // Iterate over, and map the instructions in each module. 691*e8d8bef9SDimitry Andric for (const std::unique_ptr<Module> &M : Modules) 692*e8d8bef9SDimitry Andric populateMapper(*M, InstrList, IntegerMapping); 693*e8d8bef9SDimitry Andric } 694*e8d8bef9SDimitry Andric 695*e8d8bef9SDimitry Andric /// From a repeated subsequence, find all the different instances of the 696*e8d8bef9SDimitry Andric /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 697*e8d8bef9SDimitry Andric /// the IRInstructionData in subsequence. 698*e8d8bef9SDimitry Andric /// 699*e8d8bef9SDimitry Andric /// \param [in] Mapper - The instruction mapper for sanity checks. 700*e8d8bef9SDimitry Andric /// \param [in] InstrList - The vector that holds the instruction data. 701*e8d8bef9SDimitry Andric /// \param [in] IntegerMapping - The vector that holds the mapped integers. 702*e8d8bef9SDimitry Andric /// \param [out] CandsForRepSubstring - The vector to store the generated 703*e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 704*e8d8bef9SDimitry Andric static void createCandidatesFromSuffixTree( 705*e8d8bef9SDimitry Andric IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList, 706*e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 707*e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 708*e8d8bef9SDimitry Andric 709*e8d8bef9SDimitry Andric unsigned StringLen = RS.Length; 710*e8d8bef9SDimitry Andric 711*e8d8bef9SDimitry Andric // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 712*e8d8bef9SDimitry Andric for (const unsigned &StartIdx : RS.StartIndices) { 713*e8d8bef9SDimitry Andric unsigned EndIdx = StartIdx + StringLen - 1; 714*e8d8bef9SDimitry Andric 715*e8d8bef9SDimitry Andric // Check that this subsequence does not contain an illegal instruction. 716*e8d8bef9SDimitry Andric bool ContainsIllegal = false; 717*e8d8bef9SDimitry Andric for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 718*e8d8bef9SDimitry Andric unsigned Key = IntegerMapping[CurrIdx]; 719*e8d8bef9SDimitry Andric if (Key > Mapper.IllegalInstrNumber) { 720*e8d8bef9SDimitry Andric ContainsIllegal = true; 721*e8d8bef9SDimitry Andric break; 722*e8d8bef9SDimitry Andric } 723*e8d8bef9SDimitry Andric } 724*e8d8bef9SDimitry Andric 725*e8d8bef9SDimitry Andric // If we have an illegal instruction, we should not create an 726*e8d8bef9SDimitry Andric // IRSimilarityCandidate for this region. 727*e8d8bef9SDimitry Andric if (ContainsIllegal) 728*e8d8bef9SDimitry Andric continue; 729*e8d8bef9SDimitry Andric 730*e8d8bef9SDimitry Andric // We are getting iterators to the instructions in this region of code 731*e8d8bef9SDimitry Andric // by advancing the start and end indices from the start of the 732*e8d8bef9SDimitry Andric // InstrList. 733*e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 734*e8d8bef9SDimitry Andric std::advance(StartIt, StartIdx); 735*e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 736*e8d8bef9SDimitry Andric std::advance(EndIt, EndIdx); 737*e8d8bef9SDimitry Andric 738*e8d8bef9SDimitry Andric CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 739*e8d8bef9SDimitry Andric } 740*e8d8bef9SDimitry Andric } 741*e8d8bef9SDimitry Andric 742*e8d8bef9SDimitry Andric /// From the list of IRSimilarityCandidates, perform a comparison between each 743*e8d8bef9SDimitry Andric /// IRSimilarityCandidate to determine if there are overlapping 744*e8d8bef9SDimitry Andric /// IRInstructionData, or if they do not have the same structure. 745*e8d8bef9SDimitry Andric /// 746*e8d8bef9SDimitry Andric /// \param [in] CandsForRepSubstring - The vector containing the 747*e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 748*e8d8bef9SDimitry Andric /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 749*e8d8bef9SDimitry Andric /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 750*e8d8bef9SDimitry Andric /// vector are structurally similar to one another. 751*e8d8bef9SDimitry Andric static void findCandidateStructures( 752*e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 753*e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 754*e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 755*e8d8bef9SDimitry Andric InnerCandEndIt; 756*e8d8bef9SDimitry Andric 757*e8d8bef9SDimitry Andric // IRSimilarityCandidates each have a structure for operand use. It is 758*e8d8bef9SDimitry Andric // possible that two instances of the same subsequences have different 759*e8d8bef9SDimitry Andric // structure. Each type of structure found is assigned a number. This 760*e8d8bef9SDimitry Andric // DenseMap maps an IRSimilarityCandidate to which type of similarity 761*e8d8bef9SDimitry Andric // discovered it fits within. 762*e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 763*e8d8bef9SDimitry Andric 764*e8d8bef9SDimitry Andric // Find the compatibility from each candidate to the others to determine 765*e8d8bef9SDimitry Andric // which candidates overlap and which have the same structure by mapping 766*e8d8bef9SDimitry Andric // each structure to a different group. 767*e8d8bef9SDimitry Andric bool SameStructure; 768*e8d8bef9SDimitry Andric bool Inserted; 769*e8d8bef9SDimitry Andric unsigned CurrentGroupNum = 0; 770*e8d8bef9SDimitry Andric unsigned OuterGroupNum; 771*e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 772*e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 773*e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 774*e8d8bef9SDimitry Andric 775*e8d8bef9SDimitry Andric // Iterate over the candidates to determine its structural and overlapping 776*e8d8bef9SDimitry Andric // compatibility with other instructions 777*e8d8bef9SDimitry Andric for (CandIt = CandsForRepSubstring.begin(), 778*e8d8bef9SDimitry Andric CandEndIt = CandsForRepSubstring.end(); 779*e8d8bef9SDimitry Andric CandIt != CandEndIt; CandIt++) { 780*e8d8bef9SDimitry Andric 781*e8d8bef9SDimitry Andric // Determine if it has an assigned structural group already. 782*e8d8bef9SDimitry Andric CandToGroupIt = CandToGroup.find(&*CandIt); 783*e8d8bef9SDimitry Andric if (CandToGroupIt == CandToGroup.end()) { 784*e8d8bef9SDimitry Andric // If not, we assign it one, and add it to our mapping. 785*e8d8bef9SDimitry Andric std::tie(CandToGroupIt, Inserted) = 786*e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 787*e8d8bef9SDimitry Andric } 788*e8d8bef9SDimitry Andric 789*e8d8bef9SDimitry Andric // Get the structural group number from the iterator. 790*e8d8bef9SDimitry Andric OuterGroupNum = CandToGroupIt->second; 791*e8d8bef9SDimitry Andric 792*e8d8bef9SDimitry Andric // Check if we already have a list of IRSimilarityCandidates for the current 793*e8d8bef9SDimitry Andric // structural group. Create one if one does not exist. 794*e8d8bef9SDimitry Andric CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 795*e8d8bef9SDimitry Andric if (CurrentGroupPair == StructuralGroups.end()) 796*e8d8bef9SDimitry Andric std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 797*e8d8bef9SDimitry Andric std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 798*e8d8bef9SDimitry Andric 799*e8d8bef9SDimitry Andric // Iterate over the IRSimilarityCandidates following the current 800*e8d8bef9SDimitry Andric // IRSimilarityCandidate in the list to determine whether the two 801*e8d8bef9SDimitry Andric // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 802*e8d8bef9SDimitry Andric // of IRSimilarityCandidates. 803*e8d8bef9SDimitry Andric for (InnerCandIt = std::next(CandIt), 804*e8d8bef9SDimitry Andric InnerCandEndIt = CandsForRepSubstring.end(); 805*e8d8bef9SDimitry Andric InnerCandIt != InnerCandEndIt; InnerCandIt++) { 806*e8d8bef9SDimitry Andric 807*e8d8bef9SDimitry Andric // We check if the inner item has a group already, if it does, we skip it. 808*e8d8bef9SDimitry Andric CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 809*e8d8bef9SDimitry Andric if (CandToGroupItInner != CandToGroup.end()) 810*e8d8bef9SDimitry Andric continue; 811*e8d8bef9SDimitry Andric 812*e8d8bef9SDimitry Andric // Otherwise we determine if they have the same structure and add it to 813*e8d8bef9SDimitry Andric // vector if they match. 814*e8d8bef9SDimitry Andric SameStructure = 815*e8d8bef9SDimitry Andric IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); 816*e8d8bef9SDimitry Andric if (!SameStructure) 817*e8d8bef9SDimitry Andric continue; 818*e8d8bef9SDimitry Andric 819*e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 820*e8d8bef9SDimitry Andric CurrentGroupPair->second.push_back(*InnerCandIt); 821*e8d8bef9SDimitry Andric } 822*e8d8bef9SDimitry Andric } 823*e8d8bef9SDimitry Andric } 824*e8d8bef9SDimitry Andric 825*e8d8bef9SDimitry Andric void IRSimilarityIdentifier::findCandidates( 826*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 827*e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 828*e8d8bef9SDimitry Andric SuffixTree ST(IntegerMapping); 829*e8d8bef9SDimitry Andric 830*e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> CandsForRepSubstring; 831*e8d8bef9SDimitry Andric std::vector<SimilarityGroup> NewCandidateGroups; 832*e8d8bef9SDimitry Andric 833*e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> StructuralGroups; 834*e8d8bef9SDimitry Andric 835*e8d8bef9SDimitry Andric // Iterate over the subsequences found by the Suffix Tree to create 836*e8d8bef9SDimitry Andric // IRSimilarityCandidates for each repeated subsequence and determine which 837*e8d8bef9SDimitry Andric // instances are structurally similar to one another. 838*e8d8bef9SDimitry Andric for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { 839*e8d8bef9SDimitry Andric createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It, 840*e8d8bef9SDimitry Andric CandsForRepSubstring); 841*e8d8bef9SDimitry Andric 842*e8d8bef9SDimitry Andric if (CandsForRepSubstring.size() < 2) 843*e8d8bef9SDimitry Andric continue; 844*e8d8bef9SDimitry Andric 845*e8d8bef9SDimitry Andric findCandidateStructures(CandsForRepSubstring, StructuralGroups); 846*e8d8bef9SDimitry Andric for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 847*e8d8bef9SDimitry Andric // We only add the group if it contains more than one 848*e8d8bef9SDimitry Andric // IRSimilarityCandidate. If there is only one, that means there is no 849*e8d8bef9SDimitry Andric // other repeated subsequence with the same structure. 850*e8d8bef9SDimitry Andric if (Group.second.size() > 1) 851*e8d8bef9SDimitry Andric SimilarityCandidates->push_back(Group.second); 852*e8d8bef9SDimitry Andric 853*e8d8bef9SDimitry Andric CandsForRepSubstring.clear(); 854*e8d8bef9SDimitry Andric StructuralGroups.clear(); 855*e8d8bef9SDimitry Andric NewCandidateGroups.clear(); 856*e8d8bef9SDimitry Andric } 857*e8d8bef9SDimitry Andric } 858*e8d8bef9SDimitry Andric 859*e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 860*e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> Modules) { 861*e8d8bef9SDimitry Andric resetSimilarityCandidates(); 862*e8d8bef9SDimitry Andric 863*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 864*e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 865*e8d8bef9SDimitry Andric 866*e8d8bef9SDimitry Andric populateMapper(Modules, InstrList, IntegerMapping); 867*e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 868*e8d8bef9SDimitry Andric 869*e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 870*e8d8bef9SDimitry Andric } 871*e8d8bef9SDimitry Andric 872*e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 873*e8d8bef9SDimitry Andric resetSimilarityCandidates(); 874*e8d8bef9SDimitry Andric 875*e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 876*e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 877*e8d8bef9SDimitry Andric 878*e8d8bef9SDimitry Andric populateMapper(M, InstrList, IntegerMapping); 879*e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 880*e8d8bef9SDimitry Andric 881*e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 882*e8d8bef9SDimitry Andric } 883*e8d8bef9SDimitry Andric 884*e8d8bef9SDimitry Andric INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 885*e8d8bef9SDimitry Andric "ir-similarity-identifier", false, true) 886*e8d8bef9SDimitry Andric 887*e8d8bef9SDimitry Andric IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 888*e8d8bef9SDimitry Andric : ModulePass(ID) { 889*e8d8bef9SDimitry Andric initializeIRSimilarityIdentifierWrapperPassPass( 890*e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 891*e8d8bef9SDimitry Andric } 892*e8d8bef9SDimitry Andric 893*e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 894*e8d8bef9SDimitry Andric IRSI.reset(new IRSimilarityIdentifier(M)); 895*e8d8bef9SDimitry Andric return false; 896*e8d8bef9SDimitry Andric } 897*e8d8bef9SDimitry Andric 898*e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 899*e8d8bef9SDimitry Andric IRSI.reset(); 900*e8d8bef9SDimitry Andric return false; 901*e8d8bef9SDimitry Andric } 902*e8d8bef9SDimitry Andric 903*e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 904*e8d8bef9SDimitry Andric // All the real work is done in the constructor for the pass. 905*e8d8bef9SDimitry Andric IRSI.reset(new IRSimilarityIdentifier(M)); 906*e8d8bef9SDimitry Andric return false; 907*e8d8bef9SDimitry Andric } 908*e8d8bef9SDimitry Andric 909*e8d8bef9SDimitry Andric AnalysisKey IRSimilarityAnalysis::Key; 910*e8d8bef9SDimitry Andric IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 911*e8d8bef9SDimitry Andric ModuleAnalysisManager &) { 912*e8d8bef9SDimitry Andric 913*e8d8bef9SDimitry Andric return IRSimilarityIdentifier(M); 914*e8d8bef9SDimitry Andric } 915*e8d8bef9SDimitry Andric 916*e8d8bef9SDimitry Andric PreservedAnalyses 917*e8d8bef9SDimitry Andric IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 918*e8d8bef9SDimitry Andric IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 919*e8d8bef9SDimitry Andric Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 920*e8d8bef9SDimitry Andric 921*e8d8bef9SDimitry Andric for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 922*e8d8bef9SDimitry Andric OS << CandVec.size() << " candidates of length " 923*e8d8bef9SDimitry Andric << CandVec.begin()->getLength() << ". Found in: \n"; 924*e8d8bef9SDimitry Andric for (IRSimilarityCandidate &Cand : CandVec) { 925*e8d8bef9SDimitry Andric OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 926*e8d8bef9SDimitry Andric << ", Basic Block: "; 927*e8d8bef9SDimitry Andric if (Cand.front()->Inst->getParent()->getName().str() == "") 928*e8d8bef9SDimitry Andric OS << "(unnamed)\n"; 929*e8d8bef9SDimitry Andric else 930*e8d8bef9SDimitry Andric OS << Cand.front()->Inst->getParent()->getName().str() << "\n"; 931*e8d8bef9SDimitry Andric } 932*e8d8bef9SDimitry Andric } 933*e8d8bef9SDimitry Andric 934*e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 935*e8d8bef9SDimitry Andric } 936*e8d8bef9SDimitry Andric 937*e8d8bef9SDimitry Andric char IRSimilarityIdentifierWrapperPass::ID = 0; 938