1e8d8bef9SDimitry Andric //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // \file 10e8d8bef9SDimitry Andric // Implementation file for the IRSimilarityIdentifier for identifying 11e8d8bef9SDimitry Andric // similarities in IR including the IRInstructionMapper. 12e8d8bef9SDimitry Andric // 13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14e8d8bef9SDimitry Andric 15e8d8bef9SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 16e8d8bef9SDimitry Andric #include "llvm/ADT/DenseMap.h" 17e8d8bef9SDimitry Andric #include "llvm/IR/Intrinsics.h" 18e8d8bef9SDimitry Andric #include "llvm/IR/Operator.h" 19e8d8bef9SDimitry Andric #include "llvm/IR/User.h" 20e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 21e8d8bef9SDimitry Andric #include "llvm/Support/SuffixTree.h" 22e8d8bef9SDimitry Andric 23e8d8bef9SDimitry Andric using namespace llvm; 24e8d8bef9SDimitry Andric using namespace IRSimilarity; 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 27e8d8bef9SDimitry Andric IRInstructionDataList &IDList) 28e8d8bef9SDimitry Andric : Inst(&I), Legal(Legality), IDL(&IDList) { 29e8d8bef9SDimitry Andric // We check for whether we have a comparison instruction. If it is, we 30e8d8bef9SDimitry Andric // find the "less than" version of the predicate for consistency for 31e8d8bef9SDimitry Andric // comparison instructions throught the program. 32e8d8bef9SDimitry Andric if (CmpInst *C = dyn_cast<CmpInst>(&I)) { 33e8d8bef9SDimitry Andric CmpInst::Predicate Predicate = predicateForConsistency(C); 34e8d8bef9SDimitry Andric if (Predicate != C->getPredicate()) 35e8d8bef9SDimitry Andric RevisedPredicate = Predicate; 36e8d8bef9SDimitry Andric } 37e8d8bef9SDimitry Andric 38e8d8bef9SDimitry Andric // Here we collect the operands and their types for determining whether 39e8d8bef9SDimitry Andric // the structure of the operand use matches between two different candidates. 40e8d8bef9SDimitry Andric for (Use &OI : I.operands()) { 41e8d8bef9SDimitry Andric if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { 42e8d8bef9SDimitry Andric // If we have a CmpInst where the predicate is reversed, it means the 43e8d8bef9SDimitry Andric // operands must be reversed as well. 44e8d8bef9SDimitry Andric OperVals.insert(OperVals.begin(), OI.get()); 45e8d8bef9SDimitry Andric continue; 46e8d8bef9SDimitry Andric } 47e8d8bef9SDimitry Andric 48e8d8bef9SDimitry Andric OperVals.push_back(OI.get()); 49e8d8bef9SDimitry Andric } 50e8d8bef9SDimitry Andric } 51e8d8bef9SDimitry Andric 52e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 53e8d8bef9SDimitry Andric switch (CI->getPredicate()) { 54e8d8bef9SDimitry Andric case CmpInst::FCMP_OGT: 55e8d8bef9SDimitry Andric case CmpInst::FCMP_UGT: 56e8d8bef9SDimitry Andric case CmpInst::FCMP_OGE: 57e8d8bef9SDimitry Andric case CmpInst::FCMP_UGE: 58e8d8bef9SDimitry Andric case CmpInst::ICMP_SGT: 59e8d8bef9SDimitry Andric case CmpInst::ICMP_UGT: 60e8d8bef9SDimitry Andric case CmpInst::ICMP_SGE: 61e8d8bef9SDimitry Andric case CmpInst::ICMP_UGE: 62e8d8bef9SDimitry Andric return CI->getSwappedPredicate(); 63e8d8bef9SDimitry Andric default: 64e8d8bef9SDimitry Andric return CI->getPredicate(); 65e8d8bef9SDimitry Andric } 66e8d8bef9SDimitry Andric } 67e8d8bef9SDimitry Andric 68e8d8bef9SDimitry Andric CmpInst::Predicate IRInstructionData::getPredicate() const { 69e8d8bef9SDimitry Andric assert(isa<CmpInst>(Inst) && 70e8d8bef9SDimitry Andric "Can only get a predicate from a compare instruction"); 71e8d8bef9SDimitry Andric 72e8d8bef9SDimitry Andric if (RevisedPredicate.hasValue()) 73e8d8bef9SDimitry Andric return RevisedPredicate.getValue(); 74e8d8bef9SDimitry Andric 75e8d8bef9SDimitry Andric return cast<CmpInst>(Inst)->getPredicate(); 76e8d8bef9SDimitry Andric } 77e8d8bef9SDimitry Andric 78e8d8bef9SDimitry Andric static StringRef getCalledFunctionName(CallInst &CI) { 79e8d8bef9SDimitry Andric assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); 80e8d8bef9SDimitry Andric 81e8d8bef9SDimitry Andric return CI.getCalledFunction()->getName(); 82e8d8bef9SDimitry Andric } 83e8d8bef9SDimitry Andric 84e8d8bef9SDimitry Andric bool IRSimilarity::isClose(const IRInstructionData &A, 85e8d8bef9SDimitry Andric const IRInstructionData &B) { 86e8d8bef9SDimitry Andric 87e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 88e8d8bef9SDimitry Andric return false; 89e8d8bef9SDimitry Andric 90e8d8bef9SDimitry Andric // Check if we are performing the same sort of operation on the same types 91e8d8bef9SDimitry Andric // but not on the same values. 92e8d8bef9SDimitry Andric if (!A.Inst->isSameOperationAs(B.Inst)) { 93e8d8bef9SDimitry Andric // If there is a predicate, this means that either there is a swapped 94e8d8bef9SDimitry Andric // predicate, or that the types are different, we want to make sure that 95e8d8bef9SDimitry Andric // the predicates are equivalent via swapping. 96e8d8bef9SDimitry Andric if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 97e8d8bef9SDimitry Andric 98e8d8bef9SDimitry Andric if (A.getPredicate() != B.getPredicate()) 99e8d8bef9SDimitry Andric return false; 100e8d8bef9SDimitry Andric 101e8d8bef9SDimitry Andric // If the predicates are the same via swap, make sure that the types are 102e8d8bef9SDimitry Andric // still the same. 103e8d8bef9SDimitry Andric auto ZippedTypes = zip(A.OperVals, B.OperVals); 104e8d8bef9SDimitry Andric 105e8d8bef9SDimitry Andric return all_of( 106e8d8bef9SDimitry Andric ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 107e8d8bef9SDimitry Andric return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 108e8d8bef9SDimitry Andric }); 109e8d8bef9SDimitry Andric } 110e8d8bef9SDimitry Andric 111e8d8bef9SDimitry Andric return false; 112e8d8bef9SDimitry Andric } 113e8d8bef9SDimitry Andric 114e8d8bef9SDimitry Andric // Since any GEP Instruction operands after the first operand cannot be 115e8d8bef9SDimitry Andric // defined by a register, we must make sure that the operands after the first 116e8d8bef9SDimitry Andric // are the same in the two instructions 117e8d8bef9SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 118e8d8bef9SDimitry Andric auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 119e8d8bef9SDimitry Andric 120e8d8bef9SDimitry Andric // If the instructions do not have the same inbounds restrictions, we do 121e8d8bef9SDimitry Andric // not consider them the same. 122e8d8bef9SDimitry Andric if (GEP->isInBounds() != OtherGEP->isInBounds()) 123e8d8bef9SDimitry Andric return false; 124e8d8bef9SDimitry Andric 125e8d8bef9SDimitry Andric auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric // We increment here since we do not care about the first instruction, 128e8d8bef9SDimitry Andric // we only care about the following operands since they must be the 129e8d8bef9SDimitry Andric // exact same to be considered similar. 130e8d8bef9SDimitry Andric return all_of(drop_begin(ZippedOperands), 131e8d8bef9SDimitry Andric [](std::tuple<llvm::Use &, llvm::Use &> R) { 132e8d8bef9SDimitry Andric return std::get<0>(R) == std::get<1>(R); 133e8d8bef9SDimitry Andric }); 134e8d8bef9SDimitry Andric } 135e8d8bef9SDimitry Andric 136e8d8bef9SDimitry Andric // If the instructions are functions, we make sure that the function name is 137e8d8bef9SDimitry Andric // the same. We already know that the types are since is isSameOperationAs is 138e8d8bef9SDimitry Andric // true. 139e8d8bef9SDimitry Andric if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 140e8d8bef9SDimitry Andric CallInst *CIA = cast<CallInst>(A.Inst); 141e8d8bef9SDimitry Andric CallInst *CIB = cast<CallInst>(B.Inst); 142e8d8bef9SDimitry Andric if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) 143e8d8bef9SDimitry Andric return false; 144e8d8bef9SDimitry Andric } 145e8d8bef9SDimitry Andric 146e8d8bef9SDimitry Andric return true; 147e8d8bef9SDimitry Andric } 148e8d8bef9SDimitry Andric 149e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 150e8d8bef9SDimitry Andric // into the same interface. 151e8d8bef9SDimitry Andric void IRInstructionMapper::convertToUnsignedVec( 152e8d8bef9SDimitry Andric BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 153e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 154e8d8bef9SDimitry Andric BasicBlock::iterator It = BB.begin(); 155e8d8bef9SDimitry Andric 156e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForBB; 157e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForBB; 158e8d8bef9SDimitry Andric 159e8d8bef9SDimitry Andric HaveLegalRange = false; 160e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 161e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 162e8d8bef9SDimitry Andric 163e8d8bef9SDimitry Andric for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 164e8d8bef9SDimitry Andric switch (InstClassifier.visit(*It)) { 165e8d8bef9SDimitry Andric case InstrType::Legal: 166e8d8bef9SDimitry Andric mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 167e8d8bef9SDimitry Andric break; 168e8d8bef9SDimitry Andric case InstrType::Illegal: 169e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 170e8d8bef9SDimitry Andric break; 171e8d8bef9SDimitry Andric case InstrType::Invisible: 172e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 173e8d8bef9SDimitry Andric break; 174e8d8bef9SDimitry Andric } 175e8d8bef9SDimitry Andric } 176e8d8bef9SDimitry Andric 177e8d8bef9SDimitry Andric if (HaveLegalRange) { 178e8d8bef9SDimitry Andric mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 179*fe6060f1SDimitry Andric for (IRInstructionData *ID : InstrListForBB) 180*fe6060f1SDimitry Andric this->IDL->push_back(*ID); 181e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForBB); 182e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForBB); 183e8d8bef9SDimitry Andric } 184e8d8bef9SDimitry Andric } 185e8d8bef9SDimitry Andric 186e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 187e8d8bef9SDimitry Andric // into the same interface. 188e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToLegalUnsigned( 189e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 190e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB) { 191e8d8bef9SDimitry Andric // We added something legal, so we should unset the AddedLegalLastTime 192e8d8bef9SDimitry Andric // flag. 193e8d8bef9SDimitry Andric AddedIllegalLastTime = false; 194e8d8bef9SDimitry Andric 195e8d8bef9SDimitry Andric // If we have at least two adjacent legal instructions (which may have 196e8d8bef9SDimitry Andric // invisible instructions in between), remember that. 197e8d8bef9SDimitry Andric if (CanCombineWithPrevInstr) 198e8d8bef9SDimitry Andric HaveLegalRange = true; 199e8d8bef9SDimitry Andric CanCombineWithPrevInstr = true; 200e8d8bef9SDimitry Andric 201e8d8bef9SDimitry Andric // Get the integer for this instruction or give it the current 202e8d8bef9SDimitry Andric // LegalInstrNumber. 203e8d8bef9SDimitry Andric IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 204e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 205e8d8bef9SDimitry Andric 206e8d8bef9SDimitry Andric // Add to the instruction list 207e8d8bef9SDimitry Andric bool WasInserted; 208e8d8bef9SDimitry Andric DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 209e8d8bef9SDimitry Andric ResultIt; 210e8d8bef9SDimitry Andric std::tie(ResultIt, WasInserted) = 211e8d8bef9SDimitry Andric InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 212e8d8bef9SDimitry Andric unsigned INumber = ResultIt->second; 213e8d8bef9SDimitry Andric 214e8d8bef9SDimitry Andric // There was an insertion. 215e8d8bef9SDimitry Andric if (WasInserted) 216e8d8bef9SDimitry Andric LegalInstrNumber++; 217e8d8bef9SDimitry Andric 218e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(INumber); 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andric // Make sure we don't overflow or use any integers reserved by the DenseMap. 221e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 222e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 223e8d8bef9SDimitry Andric 224e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 225e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 226e8d8bef9SDimitry Andric assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 227e8d8bef9SDimitry Andric "Tried to assign DenseMap tombstone or empty key to instruction."); 228e8d8bef9SDimitry Andric 229e8d8bef9SDimitry Andric return INumber; 230e8d8bef9SDimitry Andric } 231e8d8bef9SDimitry Andric 232e8d8bef9SDimitry Andric IRInstructionData * 233e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 234e8d8bef9SDimitry Andric IRInstructionDataList &IDL) { 235e8d8bef9SDimitry Andric return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 236e8d8bef9SDimitry Andric } 237e8d8bef9SDimitry Andric 238e8d8bef9SDimitry Andric IRInstructionDataList * 239e8d8bef9SDimitry Andric IRInstructionMapper::allocateIRInstructionDataList() { 240e8d8bef9SDimitry Andric return new (IDLAllocator->Allocate()) IRInstructionDataList(); 241e8d8bef9SDimitry Andric } 242e8d8bef9SDimitry Andric 243e8d8bef9SDimitry Andric // TODO: This is the same as the MachineOutliner, and should be consolidated 244e8d8bef9SDimitry Andric // into the same interface. 245e8d8bef9SDimitry Andric unsigned IRInstructionMapper::mapToIllegalUnsigned( 246e8d8bef9SDimitry Andric BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 247e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrListForBB, bool End) { 248e8d8bef9SDimitry Andric // Can't combine an illegal instruction. Set the flag. 249e8d8bef9SDimitry Andric CanCombineWithPrevInstr = false; 250e8d8bef9SDimitry Andric 251e8d8bef9SDimitry Andric // Only add one illegal number per range of legal numbers. 252e8d8bef9SDimitry Andric if (AddedIllegalLastTime) 253e8d8bef9SDimitry Andric return IllegalInstrNumber; 254e8d8bef9SDimitry Andric 255e8d8bef9SDimitry Andric IRInstructionData *ID = nullptr; 256e8d8bef9SDimitry Andric if (!End) 257e8d8bef9SDimitry Andric ID = allocateIRInstructionData(*It, false, *IDL); 258e8d8bef9SDimitry Andric InstrListForBB.push_back(ID); 259e8d8bef9SDimitry Andric 260e8d8bef9SDimitry Andric // Remember that we added an illegal number last time. 261e8d8bef9SDimitry Andric AddedIllegalLastTime = true; 262e8d8bef9SDimitry Andric unsigned INumber = IllegalInstrNumber; 263e8d8bef9SDimitry Andric IntegerMappingForBB.push_back(IllegalInstrNumber--); 264e8d8bef9SDimitry Andric 265e8d8bef9SDimitry Andric assert(LegalInstrNumber < IllegalInstrNumber && 266e8d8bef9SDimitry Andric "Instruction mapping overflow!"); 267e8d8bef9SDimitry Andric 268e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 269e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 270e8d8bef9SDimitry Andric 271e8d8bef9SDimitry Andric assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 272e8d8bef9SDimitry Andric "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 273e8d8bef9SDimitry Andric 274e8d8bef9SDimitry Andric return INumber; 275e8d8bef9SDimitry Andric } 276e8d8bef9SDimitry Andric 277e8d8bef9SDimitry Andric IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 278e8d8bef9SDimitry Andric IRInstructionData *FirstInstIt, 279e8d8bef9SDimitry Andric IRInstructionData *LastInstIt) 280e8d8bef9SDimitry Andric : StartIdx(StartIdx), Len(Len) { 281e8d8bef9SDimitry Andric 282e8d8bef9SDimitry Andric assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 283e8d8bef9SDimitry Andric assert(LastInstIt != nullptr && "Instruction is nullptr!"); 284e8d8bef9SDimitry Andric assert(StartIdx + Len > StartIdx && 285e8d8bef9SDimitry Andric "Overflow for IRSimilarityCandidate range?"); 286e8d8bef9SDimitry Andric assert(Len - 1 == static_cast<unsigned>(std::distance( 287e8d8bef9SDimitry Andric iterator(FirstInstIt), iterator(LastInstIt))) && 288e8d8bef9SDimitry Andric "Length of the first and last IRInstructionData do not match the " 289e8d8bef9SDimitry Andric "given length"); 290e8d8bef9SDimitry Andric 291e8d8bef9SDimitry Andric // We iterate over the given instructions, and map each unique value 292e8d8bef9SDimitry Andric // to a unique number in the IRSimilarityCandidate ValueToNumber and 293e8d8bef9SDimitry Andric // NumberToValue maps. A constant get its own value globally, the individual 294e8d8bef9SDimitry Andric // uses of the constants are not considered to be unique. 295e8d8bef9SDimitry Andric // 296e8d8bef9SDimitry Andric // IR: Mapping Added: 297e8d8bef9SDimitry Andric // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 298e8d8bef9SDimitry Andric // %add2 = add i32 %a, %1 %add2 -> 4 299e8d8bef9SDimitry Andric // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 300e8d8bef9SDimitry Andric // 301e8d8bef9SDimitry Andric // when replace with global values, starting from 1, would be 302e8d8bef9SDimitry Andric // 303e8d8bef9SDimitry Andric // 3 = add i32 1, 2 304e8d8bef9SDimitry Andric // 4 = add i32 1, 3 305e8d8bef9SDimitry Andric // 6 = add i32 5, 2 306e8d8bef9SDimitry Andric unsigned LocalValNumber = 1; 307e8d8bef9SDimitry Andric IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 308e8d8bef9SDimitry Andric for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 309e8d8bef9SDimitry Andric // Map the operand values to an unsigned integer if it does not already 310e8d8bef9SDimitry Andric // have an unsigned integer assigned to it. 311e8d8bef9SDimitry Andric for (Value *Arg : ID->OperVals) 312e8d8bef9SDimitry Andric if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 313e8d8bef9SDimitry Andric ValueToNumber.try_emplace(Arg, LocalValNumber); 314e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, Arg); 315e8d8bef9SDimitry Andric LocalValNumber++; 316e8d8bef9SDimitry Andric } 317e8d8bef9SDimitry Andric 318e8d8bef9SDimitry Andric // Mapping the instructions to an unsigned integer if it is not already 319e8d8bef9SDimitry Andric // exist in the mapping. 320e8d8bef9SDimitry Andric if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 321e8d8bef9SDimitry Andric ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 322e8d8bef9SDimitry Andric NumberToValue.try_emplace(LocalValNumber, ID->Inst); 323e8d8bef9SDimitry Andric LocalValNumber++; 324e8d8bef9SDimitry Andric } 325e8d8bef9SDimitry Andric } 326e8d8bef9SDimitry Andric 327e8d8bef9SDimitry Andric // Setting the first and last instruction data pointers for the candidate. If 328e8d8bef9SDimitry Andric // we got through the entire for loop without hitting an assert, we know 329e8d8bef9SDimitry Andric // that both of these instructions are not nullptrs. 330e8d8bef9SDimitry Andric FirstInst = FirstInstIt; 331e8d8bef9SDimitry Andric LastInst = LastInstIt; 332e8d8bef9SDimitry Andric } 333e8d8bef9SDimitry Andric 334e8d8bef9SDimitry Andric bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 335e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 336e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 337e8d8bef9SDimitry Andric return false; 338e8d8bef9SDimitry Andric 339e8d8bef9SDimitry Andric auto InstrDataForBoth = 340e8d8bef9SDimitry Andric zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 341e8d8bef9SDimitry Andric 342e8d8bef9SDimitry Andric return all_of(InstrDataForBoth, 343e8d8bef9SDimitry Andric [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 344e8d8bef9SDimitry Andric IRInstructionData &A = std::get<0>(R); 345e8d8bef9SDimitry Andric IRInstructionData &B = std::get<1>(R); 346e8d8bef9SDimitry Andric if (!A.Legal || !B.Legal) 347e8d8bef9SDimitry Andric return false; 348e8d8bef9SDimitry Andric return isClose(A, B); 349e8d8bef9SDimitry Andric }); 350e8d8bef9SDimitry Andric } 351e8d8bef9SDimitry Andric 352e8d8bef9SDimitry Andric /// Determine if one or more of the assigned global value numbers for the 353e8d8bef9SDimitry Andric /// operands in \p TargetValueNumbers is in the current mapping set for operand 354e8d8bef9SDimitry Andric /// numbers in \p SourceOperands. The set of possible corresponding global 355e8d8bef9SDimitry Andric /// value numbers are replaced with the most recent version of compatible 356e8d8bef9SDimitry Andric /// values. 357e8d8bef9SDimitry Andric /// 358e8d8bef9SDimitry Andric /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 359e8d8bef9SDimitry Andric /// value number for the source IRInstructionCandidate. 360e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 361e8d8bef9SDimitry Andric /// IRSimilarityCandidate global value numbers to a set of possible numbers in 362e8d8bef9SDimitry Andric /// the target. 363e8d8bef9SDimitry Andric /// \param [in] SourceOperands - The operands in the original 364e8d8bef9SDimitry Andric /// IRSimilarityCandidate in the current instruction. 365e8d8bef9SDimitry Andric /// \param [in] TargetValueNumbers - The global value numbers of the operands in 366e8d8bef9SDimitry Andric /// the corresponding Instruction in the other IRSimilarityCandidate. 367e8d8bef9SDimitry Andric /// \returns true if there exists a possible mapping between the source 368e8d8bef9SDimitry Andric /// Instruction operands and the target Instruction operands, and false if not. 369e8d8bef9SDimitry Andric static bool checkNumberingAndReplaceCommutative( 370e8d8bef9SDimitry Andric const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 371e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 372e8d8bef9SDimitry Andric ArrayRef<Value *> &SourceOperands, 373e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetValueNumbers){ 374e8d8bef9SDimitry Andric 375e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 376e8d8bef9SDimitry Andric 377e8d8bef9SDimitry Andric unsigned ArgVal; 378e8d8bef9SDimitry Andric bool WasInserted; 379e8d8bef9SDimitry Andric 380e8d8bef9SDimitry Andric // Iterate over the operands in the source IRSimilarityCandidate to determine 381e8d8bef9SDimitry Andric // whether there exists an operand in the other IRSimilarityCandidate that 382e8d8bef9SDimitry Andric // creates a valid mapping of Value to Value between the 383e8d8bef9SDimitry Andric // IRSimilarityCaniddates. 384e8d8bef9SDimitry Andric for (Value *V : SourceOperands) { 385e8d8bef9SDimitry Andric ArgVal = SourceValueToNumberMapping.find(V)->second; 386e8d8bef9SDimitry Andric 387e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 388e8d8bef9SDimitry Andric std::make_pair(ArgVal, TargetValueNumbers)); 389e8d8bef9SDimitry Andric 390e8d8bef9SDimitry Andric // Instead of finding a current mapping, we inserted a set. This means a 391e8d8bef9SDimitry Andric // mapping did not exist for the source Instruction operand, it has no 392e8d8bef9SDimitry Andric // current constraints we need to check. 393e8d8bef9SDimitry Andric if (WasInserted) 394e8d8bef9SDimitry Andric continue; 395e8d8bef9SDimitry Andric 396e8d8bef9SDimitry Andric // If a mapping already exists for the source operand to the values in the 397e8d8bef9SDimitry Andric // other IRSimilarityCandidate we need to iterate over the items in other 398e8d8bef9SDimitry Andric // IRSimilarityCandidate's Instruction to determine whether there is a valid 399e8d8bef9SDimitry Andric // mapping of Value to Value. 400e8d8bef9SDimitry Andric DenseSet<unsigned> NewSet; 401e8d8bef9SDimitry Andric for (unsigned &Curr : ValueMappingIt->second) 402e8d8bef9SDimitry Andric // If we can find the value in the mapping, we add it to the new set. 403e8d8bef9SDimitry Andric if (TargetValueNumbers.contains(Curr)) 404e8d8bef9SDimitry Andric NewSet.insert(Curr); 405e8d8bef9SDimitry Andric 406e8d8bef9SDimitry Andric // If we could not find a Value, return 0. 407e8d8bef9SDimitry Andric if (NewSet.empty()) 408e8d8bef9SDimitry Andric return false; 409e8d8bef9SDimitry Andric 410e8d8bef9SDimitry Andric // Otherwise replace the old mapping with the newly constructed one. 411e8d8bef9SDimitry Andric if (NewSet.size() != ValueMappingIt->second.size()) 412e8d8bef9SDimitry Andric ValueMappingIt->second.swap(NewSet); 413e8d8bef9SDimitry Andric 414e8d8bef9SDimitry Andric // We have reached no conclusions about the mapping, and cannot remove 415e8d8bef9SDimitry Andric // any items from the other operands, so we move to check the next operand. 416e8d8bef9SDimitry Andric if (ValueMappingIt->second.size() != 1) 417e8d8bef9SDimitry Andric continue; 418e8d8bef9SDimitry Andric 419e8d8bef9SDimitry Andric 420e8d8bef9SDimitry Andric unsigned ValToRemove = *ValueMappingIt->second.begin(); 421e8d8bef9SDimitry Andric // When there is only one item left in the mapping for and operand, remove 422e8d8bef9SDimitry Andric // the value from the other operands. If it results in there being no 423e8d8bef9SDimitry Andric // mapping, return false, it means the mapping is wrong 424e8d8bef9SDimitry Andric for (Value *InnerV : SourceOperands) { 425e8d8bef9SDimitry Andric if (V == InnerV) 426e8d8bef9SDimitry Andric continue; 427e8d8bef9SDimitry Andric 428e8d8bef9SDimitry Andric unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 429e8d8bef9SDimitry Andric ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 430e8d8bef9SDimitry Andric if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 431e8d8bef9SDimitry Andric continue; 432e8d8bef9SDimitry Andric 433e8d8bef9SDimitry Andric ValueMappingIt->second.erase(ValToRemove); 434e8d8bef9SDimitry Andric if (ValueMappingIt->second.empty()) 435e8d8bef9SDimitry Andric return false; 436e8d8bef9SDimitry Andric } 437e8d8bef9SDimitry Andric } 438e8d8bef9SDimitry Andric 439e8d8bef9SDimitry Andric return true; 440e8d8bef9SDimitry Andric } 441e8d8bef9SDimitry Andric 442e8d8bef9SDimitry Andric /// Determine if operand number \p TargetArgVal is in the current mapping set 443e8d8bef9SDimitry Andric /// for operand number \p SourceArgVal. 444e8d8bef9SDimitry Andric /// 445e8d8bef9SDimitry Andric /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 446e8d8bef9SDimitry Andric /// value numbers from source IRSimilarityCandidate to target 447e8d8bef9SDimitry Andric /// IRSimilarityCandidate. 448e8d8bef9SDimitry Andric /// \param [in] SourceArgVal The global value number for an operand in the 449e8d8bef9SDimitry Andric /// in the original candidate. 450e8d8bef9SDimitry Andric /// \param [in] TargetArgVal The global value number for the corresponding 451e8d8bef9SDimitry Andric /// operand in the other candidate. 452e8d8bef9SDimitry Andric /// \returns True if there exists a mapping and false if not. 453e8d8bef9SDimitry Andric bool checkNumberingAndReplace( 454e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 455e8d8bef9SDimitry Andric unsigned SourceArgVal, unsigned TargetArgVal) { 456e8d8bef9SDimitry Andric // We are given two unsigned integers representing the global values of 457e8d8bef9SDimitry Andric // the operands in different IRSimilarityCandidates and a current mapping 458e8d8bef9SDimitry Andric // between the two. 459e8d8bef9SDimitry Andric // 460e8d8bef9SDimitry Andric // Source Operand GVN: 1 461e8d8bef9SDimitry Andric // Target Operand GVN: 2 462e8d8bef9SDimitry Andric // CurrentMapping: {1: {1, 2}} 463e8d8bef9SDimitry Andric // 464e8d8bef9SDimitry Andric // Since we have mapping, and the target operand is contained in the set, we 465e8d8bef9SDimitry Andric // update it to: 466e8d8bef9SDimitry Andric // CurrentMapping: {1: {2}} 467e8d8bef9SDimitry Andric // and can return true. But, if the mapping was 468e8d8bef9SDimitry Andric // CurrentMapping: {1: {3}} 469e8d8bef9SDimitry Andric // we would return false. 470e8d8bef9SDimitry Andric 471e8d8bef9SDimitry Andric bool WasInserted; 472e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 473e8d8bef9SDimitry Andric 474e8d8bef9SDimitry Andric std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 475e8d8bef9SDimitry Andric std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 476e8d8bef9SDimitry Andric 477e8d8bef9SDimitry Andric // If we created a new mapping, then we are done. 478e8d8bef9SDimitry Andric if (WasInserted) 479e8d8bef9SDimitry Andric return true; 480e8d8bef9SDimitry Andric 481e8d8bef9SDimitry Andric // If there is more than one option in the mapping set, and the target value 482e8d8bef9SDimitry Andric // is included in the mapping set replace that set with one that only includes 483e8d8bef9SDimitry Andric // the target value, as it is the only valid mapping via the non commutative 484e8d8bef9SDimitry Andric // instruction. 485e8d8bef9SDimitry Andric 486e8d8bef9SDimitry Andric DenseSet<unsigned> &TargetSet = Val->second; 487e8d8bef9SDimitry Andric if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 488e8d8bef9SDimitry Andric TargetSet.clear(); 489e8d8bef9SDimitry Andric TargetSet.insert(TargetArgVal); 490e8d8bef9SDimitry Andric return true; 491e8d8bef9SDimitry Andric } 492e8d8bef9SDimitry Andric 493e8d8bef9SDimitry Andric // Return true if we can find the value in the set. 494e8d8bef9SDimitry Andric return TargetSet.contains(TargetArgVal); 495e8d8bef9SDimitry Andric } 496e8d8bef9SDimitry Andric 497e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 498e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 499e8d8bef9SDimitry Andric // Iterators to keep track of where we are in the operands for each 500e8d8bef9SDimitry Andric // Instruction. 501e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 502e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 503e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 504e8d8bef9SDimitry Andric 505e8d8bef9SDimitry Andric // For each operand, get the value numbering and ensure it is consistent. 506e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 507e8d8bef9SDimitry Andric unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 508e8d8bef9SDimitry Andric unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 509e8d8bef9SDimitry Andric 510e8d8bef9SDimitry Andric // Attempt to add a set with only the target value. If there is no mapping 511e8d8bef9SDimitry Andric // we can create it here. 512e8d8bef9SDimitry Andric // 513e8d8bef9SDimitry Andric // For an instruction like a subtraction: 514e8d8bef9SDimitry Andric // IRSimilarityCandidateA: IRSimilarityCandidateB: 515e8d8bef9SDimitry Andric // %resultA = sub %a, %b %resultB = sub %d, %e 516e8d8bef9SDimitry Andric // 517e8d8bef9SDimitry Andric // We map %a -> %d and %b -> %e. 518e8d8bef9SDimitry Andric // 519e8d8bef9SDimitry Andric // And check to see whether their mapping is consistent in 520e8d8bef9SDimitry Andric // checkNumberingAndReplace. 521e8d8bef9SDimitry Andric 522e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 523e8d8bef9SDimitry Andric return false; 524e8d8bef9SDimitry Andric 525e8d8bef9SDimitry Andric if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 526e8d8bef9SDimitry Andric return false; 527e8d8bef9SDimitry Andric } 528e8d8bef9SDimitry Andric return true; 529e8d8bef9SDimitry Andric } 530e8d8bef9SDimitry Andric 531e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareCommutativeOperandMapping( 532e8d8bef9SDimitry Andric OperandMapping A, OperandMapping B) { 533e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersA; 534e8d8bef9SDimitry Andric DenseSet<unsigned> ValueNumbersB; 535e8d8bef9SDimitry Andric 536e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 537e8d8bef9SDimitry Andric ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 538e8d8bef9SDimitry Andric unsigned OperandLength = A.OperVals.size(); 539e8d8bef9SDimitry Andric 540e8d8bef9SDimitry Andric // Find the value number sets for the operands. 541e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < OperandLength; 542e8d8bef9SDimitry Andric Idx++, VItA++, VItB++) { 543e8d8bef9SDimitry Andric ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 544e8d8bef9SDimitry Andric ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 545e8d8bef9SDimitry Andric } 546e8d8bef9SDimitry Andric 547e8d8bef9SDimitry Andric // Iterate over the operands in the first IRSimilarityCandidate and make sure 548e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the second 549e8d8bef9SDimitry Andric // IRSimilarityCandidate. 550e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 551e8d8bef9SDimitry Andric A.ValueNumberMapping, A.OperVals, 552e8d8bef9SDimitry Andric ValueNumbersB)) 553e8d8bef9SDimitry Andric return false; 554e8d8bef9SDimitry Andric 555e8d8bef9SDimitry Andric // Iterate over the operands in the second IRSimilarityCandidate and make sure 556e8d8bef9SDimitry Andric // there exists a possible mapping with the operands in the first 557e8d8bef9SDimitry Andric // IRSimilarityCandidate. 558e8d8bef9SDimitry Andric if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 559e8d8bef9SDimitry Andric B.ValueNumberMapping, B.OperVals, 560e8d8bef9SDimitry Andric ValueNumbersA)) 561e8d8bef9SDimitry Andric return false; 562e8d8bef9SDimitry Andric 563e8d8bef9SDimitry Andric return true; 564e8d8bef9SDimitry Andric } 565e8d8bef9SDimitry Andric 566e8d8bef9SDimitry Andric bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 567e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 568e8d8bef9SDimitry Andric if (A.getLength() != B.getLength()) 569e8d8bef9SDimitry Andric return false; 570e8d8bef9SDimitry Andric 571e8d8bef9SDimitry Andric if (A.ValueToNumber.size() != B.ValueToNumber.size()) 572e8d8bef9SDimitry Andric return false; 573e8d8bef9SDimitry Andric 574e8d8bef9SDimitry Andric iterator ItA = A.begin(); 575e8d8bef9SDimitry Andric iterator ItB = B.begin(); 576e8d8bef9SDimitry Andric 577e8d8bef9SDimitry Andric // These sets create a create a mapping between the values in one candidate 578e8d8bef9SDimitry Andric // to values in the other candidate. If we create a set with one element, 579e8d8bef9SDimitry Andric // and that same element maps to the original element in the candidate 580e8d8bef9SDimitry Andric // we have a good mapping. 581e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 582e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 583e8d8bef9SDimitry Andric DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 584e8d8bef9SDimitry Andric 585e8d8bef9SDimitry Andric bool WasInserted; 586e8d8bef9SDimitry Andric 587e8d8bef9SDimitry Andric // Iterate over the instructions contained in each candidate 588e8d8bef9SDimitry Andric unsigned SectionLength = A.getStartIdx() + A.getLength(); 589e8d8bef9SDimitry Andric for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 590e8d8bef9SDimitry Andric ItA++, ItB++, Loc++) { 591e8d8bef9SDimitry Andric // Make sure the instructions are similar to one another. 592e8d8bef9SDimitry Andric if (!isClose(*ItA, *ItB)) 593e8d8bef9SDimitry Andric return false; 594e8d8bef9SDimitry Andric 595e8d8bef9SDimitry Andric Instruction *IA = ItA->Inst; 596e8d8bef9SDimitry Andric Instruction *IB = ItB->Inst; 597e8d8bef9SDimitry Andric 598e8d8bef9SDimitry Andric if (!ItA->Legal || !ItB->Legal) 599e8d8bef9SDimitry Andric return false; 600e8d8bef9SDimitry Andric 601e8d8bef9SDimitry Andric // Get the operand sets for the instructions. 602e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsA = ItA->OperVals; 603e8d8bef9SDimitry Andric ArrayRef<Value *> OperValsB = ItB->OperVals; 604e8d8bef9SDimitry Andric 605e8d8bef9SDimitry Andric unsigned InstValA = A.ValueToNumber.find(IA)->second; 606e8d8bef9SDimitry Andric unsigned InstValB = B.ValueToNumber.find(IB)->second; 607e8d8bef9SDimitry Andric 608e8d8bef9SDimitry Andric // Ensure that the mappings for the instructions exists. 609e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 610e8d8bef9SDimitry Andric std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 611e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 612e8d8bef9SDimitry Andric return false; 613e8d8bef9SDimitry Andric 614e8d8bef9SDimitry Andric std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 615e8d8bef9SDimitry Andric std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 616e8d8bef9SDimitry Andric if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 617e8d8bef9SDimitry Andric return false; 618e8d8bef9SDimitry Andric 619e8d8bef9SDimitry Andric // We have different paths for commutative instructions and non-commutative 620e8d8bef9SDimitry Andric // instructions since commutative instructions could allow multiple mappings 621e8d8bef9SDimitry Andric // to certain values. 622e8d8bef9SDimitry Andric if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 623e8d8bef9SDimitry Andric if (!compareCommutativeOperandMapping( 624e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 625e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 626e8d8bef9SDimitry Andric return false; 627e8d8bef9SDimitry Andric continue; 628e8d8bef9SDimitry Andric } 629e8d8bef9SDimitry Andric 630e8d8bef9SDimitry Andric // Handle the non-commutative cases. 631e8d8bef9SDimitry Andric if (!compareNonCommutativeOperandMapping( 632e8d8bef9SDimitry Andric {A, OperValsA, ValueNumberMappingA}, 633e8d8bef9SDimitry Andric {B, OperValsB, ValueNumberMappingB})) 634e8d8bef9SDimitry Andric return false; 635e8d8bef9SDimitry Andric } 636e8d8bef9SDimitry Andric return true; 637e8d8bef9SDimitry Andric } 638e8d8bef9SDimitry Andric 639e8d8bef9SDimitry Andric bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 640e8d8bef9SDimitry Andric const IRSimilarityCandidate &B) { 641e8d8bef9SDimitry Andric auto DoesOverlap = [](const IRSimilarityCandidate &X, 642e8d8bef9SDimitry Andric const IRSimilarityCandidate &Y) { 643e8d8bef9SDimitry Andric // Check: 644e8d8bef9SDimitry Andric // XXXXXX X starts before Y ends 645e8d8bef9SDimitry Andric // YYYYYYY Y starts after X starts 646e8d8bef9SDimitry Andric return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 647e8d8bef9SDimitry Andric }; 648e8d8bef9SDimitry Andric 649e8d8bef9SDimitry Andric return DoesOverlap(A, B) || DoesOverlap(B, A); 650e8d8bef9SDimitry Andric } 651e8d8bef9SDimitry Andric 652e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 653e8d8bef9SDimitry Andric Module &M, std::vector<IRInstructionData *> &InstrList, 654e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 655e8d8bef9SDimitry Andric 656e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrListForModule; 657e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMappingForModule; 658e8d8bef9SDimitry Andric // Iterate over the functions in the module to map each Instruction in each 659e8d8bef9SDimitry Andric // BasicBlock to an unsigned integer. 660e8d8bef9SDimitry Andric for (Function &F : M) { 661e8d8bef9SDimitry Andric 662e8d8bef9SDimitry Andric if (F.empty()) 663e8d8bef9SDimitry Andric continue; 664e8d8bef9SDimitry Andric 665e8d8bef9SDimitry Andric for (BasicBlock &BB : F) { 666e8d8bef9SDimitry Andric 667e8d8bef9SDimitry Andric if (BB.sizeWithoutDebug() < 2) 668e8d8bef9SDimitry Andric continue; 669e8d8bef9SDimitry Andric 670e8d8bef9SDimitry Andric // BB has potential to have similarity since it has a size greater than 2 671e8d8bef9SDimitry Andric // and can therefore match other regions greater than 2. Map it to a list 672e8d8bef9SDimitry Andric // of unsigned integers. 673e8d8bef9SDimitry Andric Mapper.convertToUnsignedVec(BB, InstrListForModule, 674e8d8bef9SDimitry Andric IntegerMappingForModule); 675e8d8bef9SDimitry Andric } 676e8d8bef9SDimitry Andric } 677e8d8bef9SDimitry Andric 678e8d8bef9SDimitry Andric // Insert the InstrListForModule at the end of the overall InstrList so that 679e8d8bef9SDimitry Andric // we can have a long InstrList for the entire set of Modules being analyzed. 680e8d8bef9SDimitry Andric llvm::append_range(InstrList, InstrListForModule); 681e8d8bef9SDimitry Andric // Do the same as above, but for IntegerMapping. 682e8d8bef9SDimitry Andric llvm::append_range(IntegerMapping, IntegerMappingForModule); 683e8d8bef9SDimitry Andric } 684e8d8bef9SDimitry Andric 685e8d8bef9SDimitry Andric void IRSimilarityIdentifier::populateMapper( 686e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> &Modules, 687e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 688e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 689e8d8bef9SDimitry Andric 690e8d8bef9SDimitry Andric // Iterate over, and map the instructions in each module. 691e8d8bef9SDimitry Andric for (const std::unique_ptr<Module> &M : Modules) 692e8d8bef9SDimitry Andric populateMapper(*M, InstrList, IntegerMapping); 693e8d8bef9SDimitry Andric } 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric /// From a repeated subsequence, find all the different instances of the 696e8d8bef9SDimitry Andric /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 697e8d8bef9SDimitry Andric /// the IRInstructionData in subsequence. 698e8d8bef9SDimitry Andric /// 699e8d8bef9SDimitry Andric /// \param [in] Mapper - The instruction mapper for sanity checks. 700e8d8bef9SDimitry Andric /// \param [in] InstrList - The vector that holds the instruction data. 701e8d8bef9SDimitry Andric /// \param [in] IntegerMapping - The vector that holds the mapped integers. 702e8d8bef9SDimitry Andric /// \param [out] CandsForRepSubstring - The vector to store the generated 703e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 704e8d8bef9SDimitry Andric static void createCandidatesFromSuffixTree( 705*fe6060f1SDimitry Andric const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 706e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 707e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 708e8d8bef9SDimitry Andric 709e8d8bef9SDimitry Andric unsigned StringLen = RS.Length; 710e8d8bef9SDimitry Andric 711e8d8bef9SDimitry Andric // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 712e8d8bef9SDimitry Andric for (const unsigned &StartIdx : RS.StartIndices) { 713e8d8bef9SDimitry Andric unsigned EndIdx = StartIdx + StringLen - 1; 714e8d8bef9SDimitry Andric 715e8d8bef9SDimitry Andric // Check that this subsequence does not contain an illegal instruction. 716e8d8bef9SDimitry Andric bool ContainsIllegal = false; 717e8d8bef9SDimitry Andric for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 718e8d8bef9SDimitry Andric unsigned Key = IntegerMapping[CurrIdx]; 719e8d8bef9SDimitry Andric if (Key > Mapper.IllegalInstrNumber) { 720e8d8bef9SDimitry Andric ContainsIllegal = true; 721e8d8bef9SDimitry Andric break; 722e8d8bef9SDimitry Andric } 723e8d8bef9SDimitry Andric } 724e8d8bef9SDimitry Andric 725e8d8bef9SDimitry Andric // If we have an illegal instruction, we should not create an 726e8d8bef9SDimitry Andric // IRSimilarityCandidate for this region. 727e8d8bef9SDimitry Andric if (ContainsIllegal) 728e8d8bef9SDimitry Andric continue; 729e8d8bef9SDimitry Andric 730e8d8bef9SDimitry Andric // We are getting iterators to the instructions in this region of code 731e8d8bef9SDimitry Andric // by advancing the start and end indices from the start of the 732e8d8bef9SDimitry Andric // InstrList. 733e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 734e8d8bef9SDimitry Andric std::advance(StartIt, StartIdx); 735e8d8bef9SDimitry Andric std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 736e8d8bef9SDimitry Andric std::advance(EndIt, EndIdx); 737e8d8bef9SDimitry Andric 738e8d8bef9SDimitry Andric CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 739e8d8bef9SDimitry Andric } 740e8d8bef9SDimitry Andric } 741e8d8bef9SDimitry Andric 742e8d8bef9SDimitry Andric /// From the list of IRSimilarityCandidates, perform a comparison between each 743e8d8bef9SDimitry Andric /// IRSimilarityCandidate to determine if there are overlapping 744e8d8bef9SDimitry Andric /// IRInstructionData, or if they do not have the same structure. 745e8d8bef9SDimitry Andric /// 746e8d8bef9SDimitry Andric /// \param [in] CandsForRepSubstring - The vector containing the 747e8d8bef9SDimitry Andric /// IRSimilarityCandidates. 748e8d8bef9SDimitry Andric /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 749e8d8bef9SDimitry Andric /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 750e8d8bef9SDimitry Andric /// vector are structurally similar to one another. 751e8d8bef9SDimitry Andric static void findCandidateStructures( 752e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 753e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 754e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 755e8d8bef9SDimitry Andric InnerCandEndIt; 756e8d8bef9SDimitry Andric 757e8d8bef9SDimitry Andric // IRSimilarityCandidates each have a structure for operand use. It is 758e8d8bef9SDimitry Andric // possible that two instances of the same subsequences have different 759e8d8bef9SDimitry Andric // structure. Each type of structure found is assigned a number. This 760e8d8bef9SDimitry Andric // DenseMap maps an IRSimilarityCandidate to which type of similarity 761e8d8bef9SDimitry Andric // discovered it fits within. 762e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 763e8d8bef9SDimitry Andric 764e8d8bef9SDimitry Andric // Find the compatibility from each candidate to the others to determine 765e8d8bef9SDimitry Andric // which candidates overlap and which have the same structure by mapping 766e8d8bef9SDimitry Andric // each structure to a different group. 767e8d8bef9SDimitry Andric bool SameStructure; 768e8d8bef9SDimitry Andric bool Inserted; 769e8d8bef9SDimitry Andric unsigned CurrentGroupNum = 0; 770e8d8bef9SDimitry Andric unsigned OuterGroupNum; 771e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 772e8d8bef9SDimitry Andric DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 773e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 774e8d8bef9SDimitry Andric 775e8d8bef9SDimitry Andric // Iterate over the candidates to determine its structural and overlapping 776e8d8bef9SDimitry Andric // compatibility with other instructions 777e8d8bef9SDimitry Andric for (CandIt = CandsForRepSubstring.begin(), 778e8d8bef9SDimitry Andric CandEndIt = CandsForRepSubstring.end(); 779e8d8bef9SDimitry Andric CandIt != CandEndIt; CandIt++) { 780e8d8bef9SDimitry Andric 781e8d8bef9SDimitry Andric // Determine if it has an assigned structural group already. 782e8d8bef9SDimitry Andric CandToGroupIt = CandToGroup.find(&*CandIt); 783e8d8bef9SDimitry Andric if (CandToGroupIt == CandToGroup.end()) { 784e8d8bef9SDimitry Andric // If not, we assign it one, and add it to our mapping. 785e8d8bef9SDimitry Andric std::tie(CandToGroupIt, Inserted) = 786e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 787e8d8bef9SDimitry Andric } 788e8d8bef9SDimitry Andric 789e8d8bef9SDimitry Andric // Get the structural group number from the iterator. 790e8d8bef9SDimitry Andric OuterGroupNum = CandToGroupIt->second; 791e8d8bef9SDimitry Andric 792e8d8bef9SDimitry Andric // Check if we already have a list of IRSimilarityCandidates for the current 793e8d8bef9SDimitry Andric // structural group. Create one if one does not exist. 794e8d8bef9SDimitry Andric CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 795e8d8bef9SDimitry Andric if (CurrentGroupPair == StructuralGroups.end()) 796e8d8bef9SDimitry Andric std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 797e8d8bef9SDimitry Andric std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 798e8d8bef9SDimitry Andric 799e8d8bef9SDimitry Andric // Iterate over the IRSimilarityCandidates following the current 800e8d8bef9SDimitry Andric // IRSimilarityCandidate in the list to determine whether the two 801e8d8bef9SDimitry Andric // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 802e8d8bef9SDimitry Andric // of IRSimilarityCandidates. 803e8d8bef9SDimitry Andric for (InnerCandIt = std::next(CandIt), 804e8d8bef9SDimitry Andric InnerCandEndIt = CandsForRepSubstring.end(); 805e8d8bef9SDimitry Andric InnerCandIt != InnerCandEndIt; InnerCandIt++) { 806e8d8bef9SDimitry Andric 807e8d8bef9SDimitry Andric // We check if the inner item has a group already, if it does, we skip it. 808e8d8bef9SDimitry Andric CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 809e8d8bef9SDimitry Andric if (CandToGroupItInner != CandToGroup.end()) 810e8d8bef9SDimitry Andric continue; 811e8d8bef9SDimitry Andric 812e8d8bef9SDimitry Andric // Otherwise we determine if they have the same structure and add it to 813e8d8bef9SDimitry Andric // vector if they match. 814e8d8bef9SDimitry Andric SameStructure = 815e8d8bef9SDimitry Andric IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); 816e8d8bef9SDimitry Andric if (!SameStructure) 817e8d8bef9SDimitry Andric continue; 818e8d8bef9SDimitry Andric 819e8d8bef9SDimitry Andric CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 820e8d8bef9SDimitry Andric CurrentGroupPair->second.push_back(*InnerCandIt); 821e8d8bef9SDimitry Andric } 822e8d8bef9SDimitry Andric } 823e8d8bef9SDimitry Andric } 824e8d8bef9SDimitry Andric 825e8d8bef9SDimitry Andric void IRSimilarityIdentifier::findCandidates( 826e8d8bef9SDimitry Andric std::vector<IRInstructionData *> &InstrList, 827e8d8bef9SDimitry Andric std::vector<unsigned> &IntegerMapping) { 828e8d8bef9SDimitry Andric SuffixTree ST(IntegerMapping); 829e8d8bef9SDimitry Andric 830e8d8bef9SDimitry Andric std::vector<IRSimilarityCandidate> CandsForRepSubstring; 831e8d8bef9SDimitry Andric std::vector<SimilarityGroup> NewCandidateGroups; 832e8d8bef9SDimitry Andric 833e8d8bef9SDimitry Andric DenseMap<unsigned, SimilarityGroup> StructuralGroups; 834e8d8bef9SDimitry Andric 835e8d8bef9SDimitry Andric // Iterate over the subsequences found by the Suffix Tree to create 836e8d8bef9SDimitry Andric // IRSimilarityCandidates for each repeated subsequence and determine which 837e8d8bef9SDimitry Andric // instances are structurally similar to one another. 838*fe6060f1SDimitry Andric for (SuffixTree::RepeatedSubstring &RS : ST) { 839*fe6060f1SDimitry Andric createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 840e8d8bef9SDimitry Andric CandsForRepSubstring); 841e8d8bef9SDimitry Andric 842e8d8bef9SDimitry Andric if (CandsForRepSubstring.size() < 2) 843e8d8bef9SDimitry Andric continue; 844e8d8bef9SDimitry Andric 845e8d8bef9SDimitry Andric findCandidateStructures(CandsForRepSubstring, StructuralGroups); 846e8d8bef9SDimitry Andric for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 847e8d8bef9SDimitry Andric // We only add the group if it contains more than one 848e8d8bef9SDimitry Andric // IRSimilarityCandidate. If there is only one, that means there is no 849e8d8bef9SDimitry Andric // other repeated subsequence with the same structure. 850e8d8bef9SDimitry Andric if (Group.second.size() > 1) 851e8d8bef9SDimitry Andric SimilarityCandidates->push_back(Group.second); 852e8d8bef9SDimitry Andric 853e8d8bef9SDimitry Andric CandsForRepSubstring.clear(); 854e8d8bef9SDimitry Andric StructuralGroups.clear(); 855e8d8bef9SDimitry Andric NewCandidateGroups.clear(); 856e8d8bef9SDimitry Andric } 857e8d8bef9SDimitry Andric } 858e8d8bef9SDimitry Andric 859e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 860e8d8bef9SDimitry Andric ArrayRef<std::unique_ptr<Module>> Modules) { 861e8d8bef9SDimitry Andric resetSimilarityCandidates(); 862e8d8bef9SDimitry Andric 863e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 864e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 865e8d8bef9SDimitry Andric 866e8d8bef9SDimitry Andric populateMapper(Modules, InstrList, IntegerMapping); 867e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 868e8d8bef9SDimitry Andric 869e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 870e8d8bef9SDimitry Andric } 871e8d8bef9SDimitry Andric 872e8d8bef9SDimitry Andric SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 873e8d8bef9SDimitry Andric resetSimilarityCandidates(); 874e8d8bef9SDimitry Andric 875e8d8bef9SDimitry Andric std::vector<IRInstructionData *> InstrList; 876e8d8bef9SDimitry Andric std::vector<unsigned> IntegerMapping; 877e8d8bef9SDimitry Andric 878e8d8bef9SDimitry Andric populateMapper(M, InstrList, IntegerMapping); 879e8d8bef9SDimitry Andric findCandidates(InstrList, IntegerMapping); 880e8d8bef9SDimitry Andric 881e8d8bef9SDimitry Andric return SimilarityCandidates.getValue(); 882e8d8bef9SDimitry Andric } 883e8d8bef9SDimitry Andric 884e8d8bef9SDimitry Andric INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 885e8d8bef9SDimitry Andric "ir-similarity-identifier", false, true) 886e8d8bef9SDimitry Andric 887e8d8bef9SDimitry Andric IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 888e8d8bef9SDimitry Andric : ModulePass(ID) { 889e8d8bef9SDimitry Andric initializeIRSimilarityIdentifierWrapperPassPass( 890e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 891e8d8bef9SDimitry Andric } 892e8d8bef9SDimitry Andric 893e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 894*fe6060f1SDimitry Andric IRSI.reset(new IRSimilarityIdentifier()); 895e8d8bef9SDimitry Andric return false; 896e8d8bef9SDimitry Andric } 897e8d8bef9SDimitry Andric 898e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 899e8d8bef9SDimitry Andric IRSI.reset(); 900e8d8bef9SDimitry Andric return false; 901e8d8bef9SDimitry Andric } 902e8d8bef9SDimitry Andric 903e8d8bef9SDimitry Andric bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 904*fe6060f1SDimitry Andric IRSI->findSimilarity(M); 905e8d8bef9SDimitry Andric return false; 906e8d8bef9SDimitry Andric } 907e8d8bef9SDimitry Andric 908e8d8bef9SDimitry Andric AnalysisKey IRSimilarityAnalysis::Key; 909e8d8bef9SDimitry Andric IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 910e8d8bef9SDimitry Andric ModuleAnalysisManager &) { 911e8d8bef9SDimitry Andric 912*fe6060f1SDimitry Andric auto IRSI = IRSimilarityIdentifier(); 913*fe6060f1SDimitry Andric IRSI.findSimilarity(M); 914*fe6060f1SDimitry Andric return IRSI; 915e8d8bef9SDimitry Andric } 916e8d8bef9SDimitry Andric 917e8d8bef9SDimitry Andric PreservedAnalyses 918e8d8bef9SDimitry Andric IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 919e8d8bef9SDimitry Andric IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 920e8d8bef9SDimitry Andric Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 921e8d8bef9SDimitry Andric 922e8d8bef9SDimitry Andric for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 923e8d8bef9SDimitry Andric OS << CandVec.size() << " candidates of length " 924e8d8bef9SDimitry Andric << CandVec.begin()->getLength() << ". Found in: \n"; 925e8d8bef9SDimitry Andric for (IRSimilarityCandidate &Cand : CandVec) { 926e8d8bef9SDimitry Andric OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 927e8d8bef9SDimitry Andric << ", Basic Block: "; 928e8d8bef9SDimitry Andric if (Cand.front()->Inst->getParent()->getName().str() == "") 929*fe6060f1SDimitry Andric OS << "(unnamed)"; 930e8d8bef9SDimitry Andric else 931*fe6060f1SDimitry Andric OS << Cand.front()->Inst->getParent()->getName().str(); 932*fe6060f1SDimitry Andric OS << "\n Start Instruction: "; 933*fe6060f1SDimitry Andric Cand.frontInstruction()->print(OS); 934*fe6060f1SDimitry Andric OS << "\n End Instruction: "; 935*fe6060f1SDimitry Andric Cand.backInstruction()->print(OS); 936*fe6060f1SDimitry Andric OS << "\n"; 937e8d8bef9SDimitry Andric } 938e8d8bef9SDimitry Andric } 939e8d8bef9SDimitry Andric 940e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 941e8d8bef9SDimitry Andric } 942e8d8bef9SDimitry Andric 943e8d8bef9SDimitry Andric char IRSimilarityIdentifierWrapperPass::ID = 0; 944