1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/SetOperations.h" 18 #include "llvm/IR/Intrinsics.h" 19 #include "llvm/IR/Operator.h" 20 #include "llvm/IR/User.h" 21 #include "llvm/InitializePasses.h" 22 #include "llvm/Support/SuffixTree.h" 23 24 using namespace llvm; 25 using namespace IRSimilarity; 26 27 namespace llvm { 28 cl::opt<bool> 29 DisableBranches("no-ir-sim-branch-matching", cl::init(false), 30 cl::ReallyHidden, 31 cl::desc("disable similarity matching, and outlining, " 32 "across branches for debugging purposes.")); 33 34 cl::opt<bool> 35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), 36 cl::ReallyHidden, 37 cl::desc("disable outlining indirect calls.")); 38 39 cl::opt<bool> 40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, 41 cl::desc("only allow matching call instructions if the " 42 "name and type signature match.")); 43 44 cl::opt<bool> 45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, 46 cl::desc("Don't match or outline intrinsics")); 47 } // namespace llvm 48 49 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 50 IRInstructionDataList &IDList) 51 : Inst(&I), Legal(Legality), IDL(&IDList) { 52 initializeInstruction(); 53 } 54 55 void IRInstructionData::initializeInstruction() { 56 // We check for whether we have a comparison instruction. If it is, we 57 // find the "less than" version of the predicate for consistency for 58 // comparison instructions throught the program. 59 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) { 60 CmpInst::Predicate Predicate = predicateForConsistency(C); 61 if (Predicate != C->getPredicate()) 62 RevisedPredicate = Predicate; 63 } 64 65 // Here we collect the operands and their types for determining whether 66 // the structure of the operand use matches between two different candidates. 67 for (Use &OI : Inst->operands()) { 68 if (isa<CmpInst>(Inst) && RevisedPredicate) { 69 // If we have a CmpInst where the predicate is reversed, it means the 70 // operands must be reversed as well. 71 OperVals.insert(OperVals.begin(), OI.get()); 72 continue; 73 } 74 75 OperVals.push_back(OI.get()); 76 } 77 78 // We capture the incoming BasicBlocks as values as well as the incoming 79 // Values in order to check for structural similarity. 80 if (PHINode *PN = dyn_cast<PHINode>(Inst)) 81 for (BasicBlock *BB : PN->blocks()) 82 OperVals.push_back(BB); 83 } 84 85 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) 86 : IDL(&IDList) {} 87 88 void IRInstructionData::setBranchSuccessors( 89 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 90 assert(isa<BranchInst>(Inst) && "Instruction must be branch"); 91 92 BranchInst *BI = cast<BranchInst>(Inst); 93 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 94 95 BBNumIt = BasicBlockToInteger.find(BI->getParent()); 96 assert(BBNumIt != BasicBlockToInteger.end() && 97 "Could not find location for BasicBlock!"); 98 99 int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 100 101 for (Value *V : getBlockOperVals()) { 102 BasicBlock *Successor = cast<BasicBlock>(V); 103 BBNumIt = BasicBlockToInteger.find(Successor); 104 assert(BBNumIt != BasicBlockToInteger.end() && 105 "Could not find number for BasicBlock!"); 106 int OtherBlockNumber = static_cast<int>(BBNumIt->second); 107 108 int Relative = OtherBlockNumber - CurrentBlockNumber; 109 RelativeBlockLocations.push_back(Relative); 110 } 111 } 112 113 ArrayRef<Value *> IRInstructionData::getBlockOperVals() { 114 assert((isa<BranchInst>(Inst) || 115 isa<PHINode>(Inst)) && "Instruction must be branch or PHINode"); 116 117 if (BranchInst *BI = dyn_cast<BranchInst>(Inst)) 118 return ArrayRef<Value *>( 119 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0), 120 OperVals.end() 121 ); 122 123 if (PHINode *PN = dyn_cast<PHINode>(Inst)) 124 return ArrayRef<Value *>( 125 std::next(OperVals.begin(), PN->getNumIncomingValues()), 126 OperVals.end() 127 ); 128 129 return ArrayRef<Value *>(); 130 } 131 132 void IRInstructionData::setCalleeName(bool MatchByName) { 133 CallInst *CI = dyn_cast<CallInst>(Inst); 134 assert(CI && "Instruction must be call"); 135 136 CalleeName = ""; 137 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 138 // To hash intrinsics, we use the opcode, and types like the other 139 // instructions, but also, the Intrinsic ID, and the Name of the 140 // intrinsic. 141 Intrinsic::ID IntrinsicID = II->getIntrinsicID(); 142 FunctionType *FT = II->getFunctionType(); 143 // If there is an overloaded name, we have to use the complex version 144 // of getName to get the entire string. 145 if (Intrinsic::isOverloaded(IntrinsicID)) 146 CalleeName = 147 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); 148 // If there is not an overloaded name, we only need to use this version. 149 else 150 CalleeName = Intrinsic::getName(IntrinsicID).str(); 151 152 return; 153 } 154 155 if (!CI->isIndirectCall() && MatchByName) 156 CalleeName = CI->getCalledFunction()->getName().str(); 157 } 158 159 void IRInstructionData::setPHIPredecessors( 160 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 161 assert(isa<PHINode>(Inst) && "Instruction must be phi node"); 162 163 PHINode *PN = cast<PHINode>(Inst); 164 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 165 166 BBNumIt = BasicBlockToInteger.find(PN->getParent()); 167 assert(BBNumIt != BasicBlockToInteger.end() && 168 "Could not find location for BasicBlock!"); 169 170 int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 171 172 // Convert the incoming blocks of the PHINode to an integer value, based on 173 // the relative distances between the current block and the incoming block. 174 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { 175 BasicBlock *Incoming = PN->getIncomingBlock(Idx); 176 BBNumIt = BasicBlockToInteger.find(Incoming); 177 assert(BBNumIt != BasicBlockToInteger.end() && 178 "Could not find number for BasicBlock!"); 179 int OtherBlockNumber = static_cast<int>(BBNumIt->second); 180 181 int Relative = OtherBlockNumber - CurrentBlockNumber; 182 RelativeBlockLocations.push_back(Relative); 183 } 184 } 185 186 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 187 switch (CI->getPredicate()) { 188 case CmpInst::FCMP_OGT: 189 case CmpInst::FCMP_UGT: 190 case CmpInst::FCMP_OGE: 191 case CmpInst::FCMP_UGE: 192 case CmpInst::ICMP_SGT: 193 case CmpInst::ICMP_UGT: 194 case CmpInst::ICMP_SGE: 195 case CmpInst::ICMP_UGE: 196 return CI->getSwappedPredicate(); 197 default: 198 return CI->getPredicate(); 199 } 200 } 201 202 CmpInst::Predicate IRInstructionData::getPredicate() const { 203 assert(isa<CmpInst>(Inst) && 204 "Can only get a predicate from a compare instruction"); 205 206 if (RevisedPredicate) 207 return *RevisedPredicate; 208 209 return cast<CmpInst>(Inst)->getPredicate(); 210 } 211 212 StringRef IRInstructionData::getCalleeName() const { 213 assert(isa<CallInst>(Inst) && 214 "Can only get a name from a call instruction"); 215 216 assert(CalleeName && "CalleeName has not been set"); 217 218 return *CalleeName; 219 } 220 221 bool IRSimilarity::isClose(const IRInstructionData &A, 222 const IRInstructionData &B) { 223 224 if (!A.Legal || !B.Legal) 225 return false; 226 227 // Check if we are performing the same sort of operation on the same types 228 // but not on the same values. 229 if (!A.Inst->isSameOperationAs(B.Inst)) { 230 // If there is a predicate, this means that either there is a swapped 231 // predicate, or that the types are different, we want to make sure that 232 // the predicates are equivalent via swapping. 233 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 234 235 if (A.getPredicate() != B.getPredicate()) 236 return false; 237 238 // If the predicates are the same via swap, make sure that the types are 239 // still the same. 240 auto ZippedTypes = zip(A.OperVals, B.OperVals); 241 242 return all_of( 243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 244 return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 245 }); 246 } 247 248 return false; 249 } 250 251 // Since any GEP Instruction operands after the first operand cannot be 252 // defined by a register, we must make sure that the operands after the first 253 // are the same in the two instructions 254 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 255 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 256 257 // If the instructions do not have the same inbounds restrictions, we do 258 // not consider them the same. 259 if (GEP->isInBounds() != OtherGEP->isInBounds()) 260 return false; 261 262 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 263 264 // We increment here since we do not care about the first instruction, 265 // we only care about the following operands since they must be the 266 // exact same to be considered similar. 267 return all_of(drop_begin(ZippedOperands), 268 [](std::tuple<llvm::Use &, llvm::Use &> R) { 269 return std::get<0>(R) == std::get<1>(R); 270 }); 271 } 272 273 // If the instructions are functions calls, we make sure that the function 274 // name is the same. We already know that the types are since is 275 // isSameOperationAs is true. 276 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 277 if (A.getCalleeName() != B.getCalleeName()) 278 return false; 279 } 280 281 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) && 282 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) 283 return false; 284 285 return true; 286 } 287 288 // TODO: This is the same as the MachineOutliner, and should be consolidated 289 // into the same interface. 290 void IRInstructionMapper::convertToUnsignedVec( 291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 292 std::vector<unsigned> &IntegerMapping) { 293 BasicBlock::iterator It = BB.begin(); 294 295 std::vector<unsigned> IntegerMappingForBB; 296 std::vector<IRInstructionData *> InstrListForBB; 297 298 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 299 switch (InstClassifier.visit(*It)) { 300 case InstrType::Legal: 301 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 302 break; 303 case InstrType::Illegal: 304 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 305 break; 306 case InstrType::Invisible: 307 AddedIllegalLastTime = false; 308 break; 309 } 310 } 311 312 if (AddedIllegalLastTime) 313 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 314 for (IRInstructionData *ID : InstrListForBB) 315 this->IDL->push_back(*ID); 316 llvm::append_range(InstrList, InstrListForBB); 317 llvm::append_range(IntegerMapping, IntegerMappingForBB); 318 } 319 320 // TODO: This is the same as the MachineOutliner, and should be consolidated 321 // into the same interface. 322 unsigned IRInstructionMapper::mapToLegalUnsigned( 323 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 324 std::vector<IRInstructionData *> &InstrListForBB) { 325 // We added something legal, so we should unset the AddedLegalLastTime 326 // flag. 327 AddedIllegalLastTime = false; 328 329 // If we have at least two adjacent legal instructions (which may have 330 // invisible instructions in between), remember that. 331 if (CanCombineWithPrevInstr) 332 HaveLegalRange = true; 333 CanCombineWithPrevInstr = true; 334 335 // Get the integer for this instruction or give it the current 336 // LegalInstrNumber. 337 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 338 InstrListForBB.push_back(ID); 339 340 if (isa<BranchInst>(*It)) 341 ID->setBranchSuccessors(BasicBlockToInteger); 342 343 if (isa<CallInst>(*It)) 344 ID->setCalleeName(EnableMatchCallsByName); 345 346 if (isa<PHINode>(*It)) 347 ID->setPHIPredecessors(BasicBlockToInteger); 348 349 // Add to the instruction list 350 bool WasInserted; 351 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 352 ResultIt; 353 std::tie(ResultIt, WasInserted) = 354 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 355 unsigned INumber = ResultIt->second; 356 357 // There was an insertion. 358 if (WasInserted) 359 LegalInstrNumber++; 360 361 IntegerMappingForBB.push_back(INumber); 362 363 // Make sure we don't overflow or use any integers reserved by the DenseMap. 364 assert(LegalInstrNumber < IllegalInstrNumber && 365 "Instruction mapping overflow!"); 366 367 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 368 "Tried to assign DenseMap tombstone or empty key to instruction."); 369 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 370 "Tried to assign DenseMap tombstone or empty key to instruction."); 371 372 return INumber; 373 } 374 375 IRInstructionData * 376 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 377 IRInstructionDataList &IDL) { 378 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 379 } 380 381 IRInstructionData * 382 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { 383 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); 384 } 385 386 IRInstructionDataList * 387 IRInstructionMapper::allocateIRInstructionDataList() { 388 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 389 } 390 391 // TODO: This is the same as the MachineOutliner, and should be consolidated 392 // into the same interface. 393 unsigned IRInstructionMapper::mapToIllegalUnsigned( 394 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 395 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 396 // Can't combine an illegal instruction. Set the flag. 397 CanCombineWithPrevInstr = false; 398 399 // Only add one illegal number per range of legal numbers. 400 if (AddedIllegalLastTime) 401 return IllegalInstrNumber; 402 403 IRInstructionData *ID = nullptr; 404 if (!End) 405 ID = allocateIRInstructionData(*It, false, *IDL); 406 else 407 ID = allocateIRInstructionData(*IDL); 408 InstrListForBB.push_back(ID); 409 410 // Remember that we added an illegal number last time. 411 AddedIllegalLastTime = true; 412 unsigned INumber = IllegalInstrNumber; 413 IntegerMappingForBB.push_back(IllegalInstrNumber--); 414 415 assert(LegalInstrNumber < IllegalInstrNumber && 416 "Instruction mapping overflow!"); 417 418 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 420 421 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 423 424 return INumber; 425 } 426 427 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 428 IRInstructionData *FirstInstIt, 429 IRInstructionData *LastInstIt) 430 : StartIdx(StartIdx), Len(Len) { 431 432 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 433 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 434 assert(StartIdx + Len > StartIdx && 435 "Overflow for IRSimilarityCandidate range?"); 436 assert(Len - 1 == static_cast<unsigned>(std::distance( 437 iterator(FirstInstIt), iterator(LastInstIt))) && 438 "Length of the first and last IRInstructionData do not match the " 439 "given length"); 440 441 // We iterate over the given instructions, and map each unique value 442 // to a unique number in the IRSimilarityCandidate ValueToNumber and 443 // NumberToValue maps. A constant get its own value globally, the individual 444 // uses of the constants are not considered to be unique. 445 // 446 // IR: Mapping Added: 447 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 448 // %add2 = add i32 %a, %1 %add2 -> 4 449 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 450 // 451 // when replace with global values, starting from 1, would be 452 // 453 // 3 = add i32 1, 2 454 // 4 = add i32 1, 3 455 // 6 = add i32 5, 2 456 unsigned LocalValNumber = 1; 457 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 458 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 459 // Map the operand values to an unsigned integer if it does not already 460 // have an unsigned integer assigned to it. 461 for (Value *Arg : ID->OperVals) 462 if (!ValueToNumber.contains(Arg)) { 463 ValueToNumber.try_emplace(Arg, LocalValNumber); 464 NumberToValue.try_emplace(LocalValNumber, Arg); 465 LocalValNumber++; 466 } 467 468 // Mapping the instructions to an unsigned integer if it is not already 469 // exist in the mapping. 470 if (!ValueToNumber.contains(ID->Inst)) { 471 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 472 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 473 LocalValNumber++; 474 } 475 } 476 477 // Setting the first and last instruction data pointers for the candidate. If 478 // we got through the entire for loop without hitting an assert, we know 479 // that both of these instructions are not nullptrs. 480 FirstInst = FirstInstIt; 481 LastInst = LastInstIt; 482 483 // Add the basic blocks contained in the set into the global value numbering. 484 DenseSet<BasicBlock *> BBSet; 485 getBasicBlocks(BBSet); 486 for (BasicBlock *BB : BBSet) { 487 if (ValueToNumber.contains(BB)) 488 continue; 489 490 ValueToNumber.try_emplace(BB, LocalValNumber); 491 NumberToValue.try_emplace(LocalValNumber, BB); 492 LocalValNumber++; 493 } 494 } 495 496 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 497 const IRSimilarityCandidate &B) { 498 if (A.getLength() != B.getLength()) 499 return false; 500 501 auto InstrDataForBoth = 502 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 503 504 return all_of(InstrDataForBoth, 505 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 506 IRInstructionData &A = std::get<0>(R); 507 IRInstructionData &B = std::get<1>(R); 508 if (!A.Legal || !B.Legal) 509 return false; 510 return isClose(A, B); 511 }); 512 } 513 514 /// Determine if one or more of the assigned global value numbers for the 515 /// operands in \p TargetValueNumbers is in the current mapping set for operand 516 /// numbers in \p SourceOperands. The set of possible corresponding global 517 /// value numbers are replaced with the most recent version of compatible 518 /// values. 519 /// 520 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 521 /// value number for the source IRInstructionCandidate. 522 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 523 /// IRSimilarityCandidate global value numbers to a set of possible numbers in 524 /// the target. 525 /// \param [in] SourceOperands - The operands in the original 526 /// IRSimilarityCandidate in the current instruction. 527 /// \param [in] TargetValueNumbers - The global value numbers of the operands in 528 /// the corresponding Instruction in the other IRSimilarityCandidate. 529 /// \returns true if there exists a possible mapping between the source 530 /// Instruction operands and the target Instruction operands, and false if not. 531 static bool checkNumberingAndReplaceCommutative( 532 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 533 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 534 ArrayRef<Value *> &SourceOperands, 535 DenseSet<unsigned> &TargetValueNumbers){ 536 537 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 538 539 unsigned ArgVal; 540 bool WasInserted; 541 542 // Iterate over the operands in the source IRSimilarityCandidate to determine 543 // whether there exists an operand in the other IRSimilarityCandidate that 544 // creates a valid mapping of Value to Value between the 545 // IRSimilarityCaniddates. 546 for (Value *V : SourceOperands) { 547 ArgVal = SourceValueToNumberMapping.find(V)->second; 548 549 // Instead of finding a current mapping, we attempt to insert a set. 550 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 551 std::make_pair(ArgVal, TargetValueNumbers)); 552 553 // We need to iterate over the items in other IRSimilarityCandidate's 554 // Instruction to determine whether there is a valid mapping of 555 // Value to Value. 556 DenseSet<unsigned> NewSet; 557 for (unsigned &Curr : ValueMappingIt->second) 558 // If we can find the value in the mapping, we add it to the new set. 559 if (TargetValueNumbers.contains(Curr)) 560 NewSet.insert(Curr); 561 562 // If we could not find a Value, return 0. 563 if (NewSet.empty()) 564 return false; 565 566 // Otherwise replace the old mapping with the newly constructed one. 567 if (NewSet.size() != ValueMappingIt->second.size()) 568 ValueMappingIt->second.swap(NewSet); 569 570 // We have reached no conclusions about the mapping, and cannot remove 571 // any items from the other operands, so we move to check the next operand. 572 if (ValueMappingIt->second.size() != 1) 573 continue; 574 575 unsigned ValToRemove = *ValueMappingIt->second.begin(); 576 // When there is only one item left in the mapping for and operand, remove 577 // the value from the other operands. If it results in there being no 578 // mapping, return false, it means the mapping is wrong 579 for (Value *InnerV : SourceOperands) { 580 if (V == InnerV) 581 continue; 582 583 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 584 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 585 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 586 continue; 587 588 ValueMappingIt->second.erase(ValToRemove); 589 if (ValueMappingIt->second.empty()) 590 return false; 591 } 592 } 593 594 return true; 595 } 596 597 /// Determine if operand number \p TargetArgVal is in the current mapping set 598 /// for operand number \p SourceArgVal. 599 /// 600 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 601 /// value numbers from source IRSimilarityCandidate to target 602 /// IRSimilarityCandidate. 603 /// \param [in] SourceArgVal The global value number for an operand in the 604 /// in the original candidate. 605 /// \param [in] TargetArgVal The global value number for the corresponding 606 /// operand in the other candidate. 607 /// \returns True if there exists a mapping and false if not. 608 bool checkNumberingAndReplace( 609 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 610 unsigned SourceArgVal, unsigned TargetArgVal) { 611 // We are given two unsigned integers representing the global values of 612 // the operands in different IRSimilarityCandidates and a current mapping 613 // between the two. 614 // 615 // Source Operand GVN: 1 616 // Target Operand GVN: 2 617 // CurrentMapping: {1: {1, 2}} 618 // 619 // Since we have mapping, and the target operand is contained in the set, we 620 // update it to: 621 // CurrentMapping: {1: {2}} 622 // and can return true. But, if the mapping was 623 // CurrentMapping: {1: {3}} 624 // we would return false. 625 626 bool WasInserted; 627 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 628 629 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 630 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 631 632 // If we created a new mapping, then we are done. 633 if (WasInserted) 634 return true; 635 636 // If there is more than one option in the mapping set, and the target value 637 // is included in the mapping set replace that set with one that only includes 638 // the target value, as it is the only valid mapping via the non commutative 639 // instruction. 640 641 DenseSet<unsigned> &TargetSet = Val->second; 642 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 643 TargetSet.clear(); 644 TargetSet.insert(TargetArgVal); 645 return true; 646 } 647 648 // Return true if we can find the value in the set. 649 return TargetSet.contains(TargetArgVal); 650 } 651 652 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 653 OperandMapping A, OperandMapping B) { 654 // Iterators to keep track of where we are in the operands for each 655 // Instruction. 656 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 657 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 658 unsigned OperandLength = A.OperVals.size(); 659 660 // For each operand, get the value numbering and ensure it is consistent. 661 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 662 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 663 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 664 665 // Attempt to add a set with only the target value. If there is no mapping 666 // we can create it here. 667 // 668 // For an instruction like a subtraction: 669 // IRSimilarityCandidateA: IRSimilarityCandidateB: 670 // %resultA = sub %a, %b %resultB = sub %d, %e 671 // 672 // We map %a -> %d and %b -> %e. 673 // 674 // And check to see whether their mapping is consistent in 675 // checkNumberingAndReplace. 676 677 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 678 return false; 679 680 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 681 return false; 682 } 683 return true; 684 } 685 686 bool IRSimilarityCandidate::compareCommutativeOperandMapping( 687 OperandMapping A, OperandMapping B) { 688 DenseSet<unsigned> ValueNumbersA; 689 DenseSet<unsigned> ValueNumbersB; 690 691 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 692 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 693 unsigned OperandLength = A.OperVals.size(); 694 695 // Find the value number sets for the operands. 696 for (unsigned Idx = 0; Idx < OperandLength; 697 Idx++, VItA++, VItB++) { 698 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 699 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 700 } 701 702 // Iterate over the operands in the first IRSimilarityCandidate and make sure 703 // there exists a possible mapping with the operands in the second 704 // IRSimilarityCandidate. 705 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 706 A.ValueNumberMapping, A.OperVals, 707 ValueNumbersB)) 708 return false; 709 710 // Iterate over the operands in the second IRSimilarityCandidate and make sure 711 // there exists a possible mapping with the operands in the first 712 // IRSimilarityCandidate. 713 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 714 B.ValueNumberMapping, B.OperVals, 715 ValueNumbersA)) 716 return false; 717 718 return true; 719 } 720 721 bool IRSimilarityCandidate::compareAssignmentMapping( 722 const unsigned InstValA, const unsigned &InstValB, 723 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 724 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 725 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 726 bool WasInserted; 727 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 728 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 729 if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 730 return false; 731 else if (ValueMappingIt->second.size() != 1) { 732 for (unsigned OtherVal : ValueMappingIt->second) { 733 if (OtherVal == InstValB) 734 continue; 735 if (!ValueNumberMappingA.contains(OtherVal)) 736 continue; 737 if (!ValueNumberMappingA[OtherVal].contains(InstValA)) 738 continue; 739 ValueNumberMappingA[OtherVal].erase(InstValA); 740 } 741 ValueNumberMappingA.erase(ValueMappingIt); 742 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 743 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 744 } 745 746 return true; 747 } 748 749 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, 750 RelativeLocMapping B) { 751 // Get the basic blocks the label refers to. 752 BasicBlock *ABB = cast<BasicBlock>(A.OperVal); 753 BasicBlock *BBB = cast<BasicBlock>(B.OperVal); 754 755 // Get the basic blocks contained in each region. 756 DenseSet<BasicBlock *> BasicBlockA; 757 DenseSet<BasicBlock *> BasicBlockB; 758 A.IRSC.getBasicBlocks(BasicBlockA); 759 B.IRSC.getBasicBlocks(BasicBlockB); 760 761 // Determine if the block is contained in the region. 762 bool AContained = BasicBlockA.contains(ABB); 763 bool BContained = BasicBlockB.contains(BBB); 764 765 // Both blocks need to be contained in the region, or both need to be outside 766 // the region. 767 if (AContained != BContained) 768 return false; 769 770 // If both are contained, then we need to make sure that the relative 771 // distance to the target blocks are the same. 772 if (AContained) 773 return A.RelativeLocation == B.RelativeLocation; 774 return true; 775 } 776 777 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 778 const IRSimilarityCandidate &B) { 779 DenseMap<unsigned, DenseSet<unsigned>> MappingA; 780 DenseMap<unsigned, DenseSet<unsigned>> MappingB; 781 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB); 782 } 783 784 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, 785 SmallVector<int, 4> &, ArrayRef<Value *> &, 786 ArrayRef<Value *> &> 787 ZippedRelativeLocationsT; 788 789 bool IRSimilarityCandidate::compareStructure( 790 const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, 791 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 792 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 793 if (A.getLength() != B.getLength()) 794 return false; 795 796 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 797 return false; 798 799 iterator ItA = A.begin(); 800 iterator ItB = B.begin(); 801 802 // These ValueNumber Mapping sets create a create a mapping between the values 803 // in one candidate to values in the other candidate. If we create a set with 804 // one element, and that same element maps to the original element in the 805 // candidate we have a good mapping. 806 807 // Iterate over the instructions contained in each candidate 808 unsigned SectionLength = A.getStartIdx() + A.getLength(); 809 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 810 ItA++, ItB++, Loc++) { 811 // Make sure the instructions are similar to one another. 812 if (!isClose(*ItA, *ItB)) 813 return false; 814 815 Instruction *IA = ItA->Inst; 816 Instruction *IB = ItB->Inst; 817 818 if (!ItA->Legal || !ItB->Legal) 819 return false; 820 821 // Get the operand sets for the instructions. 822 ArrayRef<Value *> OperValsA = ItA->OperVals; 823 ArrayRef<Value *> OperValsB = ItB->OperVals; 824 825 unsigned InstValA = A.ValueToNumber.find(IA)->second; 826 unsigned InstValB = B.ValueToNumber.find(IB)->second; 827 828 // Ensure that the mappings for the instructions exists. 829 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, 830 ValueNumberMappingB)) 831 return false; 832 833 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB, 834 ValueNumberMappingA)) 835 return false; 836 837 // We have different paths for commutative instructions and non-commutative 838 // instructions since commutative instructions could allow multiple mappings 839 // to certain values. 840 if (IA->isCommutative() && !isa<FPMathOperator>(IA) && 841 !isa<IntrinsicInst>(IA)) { 842 if (!compareCommutativeOperandMapping( 843 {A, OperValsA, ValueNumberMappingA}, 844 {B, OperValsB, ValueNumberMappingB})) 845 return false; 846 continue; 847 } 848 849 // Handle the non-commutative cases. 850 if (!compareNonCommutativeOperandMapping( 851 {A, OperValsA, ValueNumberMappingA}, 852 {B, OperValsB, ValueNumberMappingB})) 853 return false; 854 855 // Here we check that between two corresponding instructions, 856 // when referring to a basic block in the same region, the 857 // relative locations are the same. And, that the instructions refer to 858 // basic blocks outside the region in the same corresponding locations. 859 860 // We are able to make the assumption about blocks outside of the region 861 // since the target block labels are considered values and will follow the 862 // same number matching that we defined for the other instructions in the 863 // region. So, at this point, in each location we target a specific block 864 // outside the region, we are targeting a corresponding block in each 865 // analagous location in the region we are comparing to. 866 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) && 867 !(isa<PHINode>(IA) && isa<PHINode>(IB))) 868 continue; 869 870 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; 871 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; 872 ArrayRef<Value *> ABL = ItA->getBlockOperVals(); 873 ArrayRef<Value *> BBL = ItB->getBlockOperVals(); 874 875 // Check to make sure that the number of operands, and branching locations 876 // between BranchInsts is the same. 877 if (RelBlockLocsA.size() != RelBlockLocsB.size() && 878 ABL.size() != BBL.size()) 879 return false; 880 881 assert(RelBlockLocsA.size() == ABL.size() && 882 "Block information vectors not the same size."); 883 assert(RelBlockLocsB.size() == BBL.size() && 884 "Block information vectors not the same size."); 885 886 ZippedRelativeLocationsT ZippedRelativeLocations = 887 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL); 888 if (any_of(ZippedRelativeLocations, 889 [&A, &B](std::tuple<int, int, Value *, Value *> R) { 890 return !checkRelativeLocations( 891 {A, std::get<0>(R), std::get<2>(R)}, 892 {B, std::get<1>(R), std::get<3>(R)}); 893 })) 894 return false; 895 } 896 return true; 897 } 898 899 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 900 const IRSimilarityCandidate &B) { 901 auto DoesOverlap = [](const IRSimilarityCandidate &X, 902 const IRSimilarityCandidate &Y) { 903 // Check: 904 // XXXXXX X starts before Y ends 905 // YYYYYYY Y starts after X starts 906 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 907 }; 908 909 return DoesOverlap(A, B) || DoesOverlap(B, A); 910 } 911 912 void IRSimilarityIdentifier::populateMapper( 913 Module &M, std::vector<IRInstructionData *> &InstrList, 914 std::vector<unsigned> &IntegerMapping) { 915 916 std::vector<IRInstructionData *> InstrListForModule; 917 std::vector<unsigned> IntegerMappingForModule; 918 // Iterate over the functions in the module to map each Instruction in each 919 // BasicBlock to an unsigned integer. 920 Mapper.initializeForBBs(M); 921 922 for (Function &F : M) { 923 924 if (F.empty()) 925 continue; 926 927 for (BasicBlock &BB : F) { 928 929 // BB has potential to have similarity since it has a size greater than 2 930 // and can therefore match other regions greater than 2. Map it to a list 931 // of unsigned integers. 932 Mapper.convertToUnsignedVec(BB, InstrListForModule, 933 IntegerMappingForModule); 934 } 935 936 BasicBlock::iterator It = F.begin()->end(); 937 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, 938 true); 939 if (InstrListForModule.size() > 0) 940 Mapper.IDL->push_back(*InstrListForModule.back()); 941 } 942 943 // Insert the InstrListForModule at the end of the overall InstrList so that 944 // we can have a long InstrList for the entire set of Modules being analyzed. 945 llvm::append_range(InstrList, InstrListForModule); 946 // Do the same as above, but for IntegerMapping. 947 llvm::append_range(IntegerMapping, IntegerMappingForModule); 948 } 949 950 void IRSimilarityIdentifier::populateMapper( 951 ArrayRef<std::unique_ptr<Module>> &Modules, 952 std::vector<IRInstructionData *> &InstrList, 953 std::vector<unsigned> &IntegerMapping) { 954 955 // Iterate over, and map the instructions in each module. 956 for (const std::unique_ptr<Module> &M : Modules) 957 populateMapper(*M, InstrList, IntegerMapping); 958 } 959 960 /// From a repeated subsequence, find all the different instances of the 961 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 962 /// the IRInstructionData in subsequence. 963 /// 964 /// \param [in] Mapper - The instruction mapper for basic correctness checks. 965 /// \param [in] InstrList - The vector that holds the instruction data. 966 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 967 /// \param [out] CandsForRepSubstring - The vector to store the generated 968 /// IRSimilarityCandidates. 969 static void createCandidatesFromSuffixTree( 970 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 971 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 972 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 973 974 unsigned StringLen = RS.Length; 975 if (StringLen < 2) 976 return; 977 978 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 979 for (const unsigned &StartIdx : RS.StartIndices) { 980 unsigned EndIdx = StartIdx + StringLen - 1; 981 982 // Check that this subsequence does not contain an illegal instruction. 983 bool ContainsIllegal = false; 984 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 985 unsigned Key = IntegerMapping[CurrIdx]; 986 if (Key > Mapper.IllegalInstrNumber) { 987 ContainsIllegal = true; 988 break; 989 } 990 } 991 992 // If we have an illegal instruction, we should not create an 993 // IRSimilarityCandidate for this region. 994 if (ContainsIllegal) 995 continue; 996 997 // We are getting iterators to the instructions in this region of code 998 // by advancing the start and end indices from the start of the 999 // InstrList. 1000 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 1001 std::advance(StartIt, StartIdx); 1002 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 1003 std::advance(EndIt, EndIdx); 1004 1005 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 1006 } 1007 } 1008 1009 void IRSimilarityCandidate::createCanonicalRelationFrom( 1010 IRSimilarityCandidate &SourceCand, 1011 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 1012 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { 1013 assert(SourceCand.CanonNumToNumber.size() != 0 && 1014 "Base canonical relationship is empty!"); 1015 assert(SourceCand.NumberToCanonNum.size() != 0 && 1016 "Base canonical relationship is empty!"); 1017 1018 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); 1019 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); 1020 1021 DenseSet<unsigned> UsedGVNs; 1022 // Iterate over the mappings provided from this candidate to SourceCand. We 1023 // are then able to map the GVN in this candidate to the same canonical number 1024 // given to the corresponding GVN in SourceCand. 1025 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { 1026 unsigned SourceGVN = GVNMapping.first; 1027 1028 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); 1029 1030 unsigned ResultGVN; 1031 // We need special handling if we have more than one potential value. This 1032 // means that there are at least two GVNs that could correspond to this GVN. 1033 // This could lead to potential swapping later on, so we make a decision 1034 // here to ensure a one-to-one mapping. 1035 if (GVNMapping.second.size() > 1) { 1036 bool Found = false; 1037 for (unsigned Val : GVNMapping.second) { 1038 // We make sure the target value number hasn't already been reserved. 1039 if (UsedGVNs.contains(Val)) 1040 continue; 1041 1042 // We make sure that the opposite mapping is still consistent. 1043 DenseMap<unsigned, DenseSet<unsigned>>::iterator It = 1044 FromSourceMapping.find(Val); 1045 1046 if (!It->second.contains(SourceGVN)) 1047 continue; 1048 1049 // We pick the first item that satisfies these conditions. 1050 Found = true; 1051 ResultGVN = Val; 1052 break; 1053 } 1054 1055 assert(Found && "Could not find matching value for source GVN"); 1056 (void)Found; 1057 1058 } else 1059 ResultGVN = *GVNMapping.second.begin(); 1060 1061 // Whatever GVN is found, we mark it as used. 1062 UsedGVNs.insert(ResultGVN); 1063 1064 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN); 1065 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); 1066 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); 1067 } 1068 1069 DenseSet<BasicBlock *> BBSet; 1070 getBasicBlocks(BBSet); 1071 // Find canonical numbers for the BasicBlocks in the current candidate. 1072 // This is done by finding the corresponding value for the first instruction 1073 // in the block in the current candidate, finding the matching value in the 1074 // source candidate. Then by finding the parent of this value, use the 1075 // canonical number of the block in the source candidate for the canonical 1076 // number in the current candidate. 1077 for (BasicBlock *BB : BBSet) { 1078 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second; 1079 1080 // We can skip the BasicBlock if the canonical numbering has already been 1081 // found in a separate instruction. 1082 if (NumberToCanonNum.contains(BBGVNForCurrCand)) 1083 continue; 1084 1085 // If the basic block is the starting block, then the shared instruction may 1086 // not be the first instruction in the block, it will be the first 1087 // instruction in the similarity region. 1088 Value *FirstOutlineInst = BB == getStartBB() 1089 ? frontInstruction() 1090 : &*BB->instructionsWithoutDebug().begin(); 1091 1092 unsigned FirstInstGVN = *getGVN(FirstOutlineInst); 1093 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN); 1094 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum); 1095 Value *SourceV = *SourceCand.fromGVN(SourceGVN); 1096 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent(); 1097 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB); 1098 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN); 1099 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand)); 1100 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN)); 1101 } 1102 } 1103 1104 void IRSimilarityCandidate::createCanonicalRelationFrom( 1105 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, 1106 IRSimilarityCandidate &TargetCandLarge) { 1107 assert(!SourceCand.CanonNumToNumber.empty() && 1108 "Canonical Relationship is non-empty"); 1109 assert(!SourceCand.NumberToCanonNum.empty() && 1110 "Canonical Relationship is non-empty"); 1111 1112 assert(!SourceCandLarge.CanonNumToNumber.empty() && 1113 "Canonical Relationship is non-empty"); 1114 assert(!SourceCandLarge.NumberToCanonNum.empty() && 1115 "Canonical Relationship is non-empty"); 1116 1117 assert(!TargetCandLarge.CanonNumToNumber.empty() && 1118 "Canonical Relationship is non-empty"); 1119 assert(!TargetCandLarge.NumberToCanonNum.empty() && 1120 "Canonical Relationship is non-empty"); 1121 1122 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); 1123 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); 1124 1125 // We're going to use the larger candidates as a "bridge" to create the 1126 // canonical number for the target candidate since we have idetified two 1127 // candidates as subsequences of larger sequences, and therefore must be 1128 // structurally similar. 1129 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) { 1130 Value *CurrVal = ValueNumPair.first; 1131 unsigned TargetCandGVN = ValueNumPair.second; 1132 1133 // Find the numbering in the large candidate that surrounds the 1134 // current candidate. 1135 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); 1136 assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); 1137 1138 // Get the canonical numbering in the large target candidate. 1139 std::optional<unsigned> OTargetCandCanon = 1140 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); 1141 assert(OTargetCandCanon.has_value() && 1142 "Canononical Number not found for GVN"); 1143 1144 // Get the GVN in the large source candidate from the canonical numbering. 1145 std::optional<unsigned> OLargeSourceGVN = 1146 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); 1147 assert(OLargeSourceGVN.has_value() && 1148 "GVN Number not found for Canonical Number"); 1149 1150 // Get the Value from the GVN in the large source candidate. 1151 std::optional<Value *> OLargeSourceV = 1152 SourceCandLarge.fromGVN(OLargeSourceGVN.value()); 1153 assert(OLargeSourceV.has_value() && "Value not found for GVN"); 1154 1155 // Get the GVN number for the Value in the source candidate. 1156 std::optional<unsigned> OSourceGVN = 1157 SourceCand.getGVN(OLargeSourceV.value()); 1158 assert(OSourceGVN.has_value() && "GVN Number not found for Value"); 1159 1160 // Get the canonical numbering from the GVN/ 1161 std::optional<unsigned> OSourceCanon = 1162 SourceCand.getCanonicalNum(OSourceGVN.value()); 1163 assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); 1164 1165 // Insert the canonical numbering and GVN pair into their respective 1166 // mappings. 1167 CanonNumToNumber.insert( 1168 std::make_pair(OSourceCanon.value(), TargetCandGVN)); 1169 NumberToCanonNum.insert( 1170 std::make_pair(TargetCandGVN, OSourceCanon.value())); 1171 } 1172 } 1173 1174 void IRSimilarityCandidate::createCanonicalMappingFor( 1175 IRSimilarityCandidate &CurrCand) { 1176 assert(CurrCand.CanonNumToNumber.size() == 0 && 1177 "Canonical Relationship is non-empty"); 1178 assert(CurrCand.NumberToCanonNum.size() == 0 && 1179 "Canonical Relationship is non-empty"); 1180 1181 unsigned CanonNum = 0; 1182 // Iterate over the value numbers found, the order does not matter in this 1183 // case. 1184 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { 1185 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum)); 1186 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first)); 1187 CanonNum++; 1188 } 1189 } 1190 1191 /// Look for larger IRSimilarityCandidates From the previously matched 1192 /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is 1193 /// an overlap, return a pair of structurally similar, larger 1194 /// IRSimilarityCandidates. 1195 /// 1196 /// \param [in] CandA - The first candidate we are trying to determine the 1197 /// structure of. 1198 /// \param [in] CandB - The second candidate we are trying to determine the 1199 /// structure of. 1200 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in 1201 /// a circuit to the IRSimilarityCandidates that include this instruction. 1202 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a 1203 /// number representing the structural group assigned to it. 1204 static std::optional< 1205 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> 1206 CheckLargerCands( 1207 IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, 1208 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, 1209 DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) { 1210 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA; 1211 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB; 1212 DenseSet<unsigned> IncludedGroupsA; 1213 DenseSet<unsigned> IncludedGroupsB; 1214 1215 // Find the overall similarity group numbers that fully contain the candidate, 1216 // and record the larger candidate for each group. 1217 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); 1218 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> 1219 Result; 1220 1221 unsigned CandAStart = CandA.getStartIdx(); 1222 unsigned CandAEnd = CandA.getEndIdx(); 1223 unsigned CandBStart = CandB.getStartIdx(); 1224 unsigned CandBEnd = CandB.getEndIdx(); 1225 if (IdxToCandidateIt == IndexToIncludedCand.end()) 1226 return Result; 1227 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { 1228 if (MatchedCand->getStartIdx() > CandAStart || 1229 (MatchedCand->getEndIdx() < CandAEnd)) 1230 continue; 1231 unsigned GroupNum = CandToGroup.find(MatchedCand)->second; 1232 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); 1233 IncludedGroupsA.insert(GroupNum); 1234 } 1235 1236 // Find the overall similarity group numbers that fully contain the next 1237 // candidate, and record the larger candidate for each group. 1238 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); 1239 if (IdxToCandidateIt == IndexToIncludedCand.end()) 1240 return Result; 1241 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { 1242 if (MatchedCand->getStartIdx() > CandBStart || 1243 MatchedCand->getEndIdx() < CandBEnd) 1244 continue; 1245 unsigned GroupNum = CandToGroup.find(MatchedCand)->second; 1246 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); 1247 IncludedGroupsB.insert(GroupNum); 1248 } 1249 1250 // Find the intersection between the two groups, these are the groups where 1251 // the larger candidates exist. 1252 set_intersect(IncludedGroupsA, IncludedGroupsB); 1253 1254 // If there is no intersection between the sets, then we cannot determine 1255 // whether or not there is a match. 1256 if (IncludedGroupsA.empty()) 1257 return Result; 1258 1259 // Create a pair that contains the larger candidates. 1260 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); 1261 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); 1262 Result = std::make_pair(ItA->second, ItB->second); 1263 return Result; 1264 } 1265 1266 /// From the list of IRSimilarityCandidates, perform a comparison between each 1267 /// IRSimilarityCandidate to determine if there are overlapping 1268 /// IRInstructionData, or if they do not have the same structure. 1269 /// 1270 /// \param [in] CandsForRepSubstring - The vector containing the 1271 /// IRSimilarityCandidates. 1272 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 1273 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 1274 /// vector are structurally similar to one another. 1275 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in 1276 /// a circuit to the IRSimilarityCandidates that include this instruction. 1277 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a 1278 /// number representing the structural group assigned to it. 1279 static void findCandidateStructures( 1280 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 1281 DenseMap<unsigned, SimilarityGroup> &StructuralGroups, 1282 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, 1283 DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup 1284 ) { 1285 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 1286 InnerCandEndIt; 1287 1288 // IRSimilarityCandidates each have a structure for operand use. It is 1289 // possible that two instances of the same subsequences have different 1290 // structure. Each type of structure found is assigned a number. This 1291 // DenseMap maps an IRSimilarityCandidate to which type of similarity 1292 // discovered it fits within. 1293 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1294 1295 // Find the compatibility from each candidate to the others to determine 1296 // which candidates overlap and which have the same structure by mapping 1297 // each structure to a different group. 1298 bool SameStructure; 1299 bool Inserted; 1300 unsigned CurrentGroupNum = 0; 1301 unsigned OuterGroupNum; 1302 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 1303 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 1304 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 1305 1306 // Iterate over the candidates to determine its structural and overlapping 1307 // compatibility with other instructions 1308 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 1309 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 1310 for (CandIt = CandsForRepSubstring.begin(), 1311 CandEndIt = CandsForRepSubstring.end(); 1312 CandIt != CandEndIt; CandIt++) { 1313 1314 // Determine if it has an assigned structural group already. 1315 CandToGroupIt = CandToGroup.find(&*CandIt); 1316 if (CandToGroupIt == CandToGroup.end()) { 1317 // If not, we assign it one, and add it to our mapping. 1318 std::tie(CandToGroupIt, Inserted) = 1319 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 1320 } 1321 1322 // Get the structural group number from the iterator. 1323 OuterGroupNum = CandToGroupIt->second; 1324 1325 // Check if we already have a list of IRSimilarityCandidates for the current 1326 // structural group. Create one if one does not exist. 1327 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 1328 if (CurrentGroupPair == StructuralGroups.end()) { 1329 IRSimilarityCandidate::createCanonicalMappingFor(*CandIt); 1330 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 1331 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 1332 } 1333 1334 // Iterate over the IRSimilarityCandidates following the current 1335 // IRSimilarityCandidate in the list to determine whether the two 1336 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 1337 // of IRSimilarityCandidates. 1338 for (InnerCandIt = std::next(CandIt), 1339 InnerCandEndIt = CandsForRepSubstring.end(); 1340 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 1341 1342 // We check if the inner item has a group already, if it does, we skip it. 1343 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 1344 if (CandToGroupItInner != CandToGroup.end()) 1345 continue; 1346 1347 // Check if we have found structural similarity between two candidates 1348 // that fully contains the first and second candidates. 1349 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> 1350 LargerPair = CheckLargerCands( 1351 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); 1352 1353 // If a pair was found, it means that we can assume that these smaller 1354 // substrings are also structurally similar. Use the larger candidates to 1355 // determine the canonical mapping between the two sections. 1356 if (LargerPair.has_value()) { 1357 SameStructure = true; 1358 InnerCandIt->createCanonicalRelationFrom( 1359 *CandIt, *LargerPair.value().first, *LargerPair.value().second); 1360 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1361 CurrentGroupPair->second.push_back(*InnerCandIt); 1362 continue; 1363 } 1364 1365 // Otherwise we determine if they have the same structure and add it to 1366 // vector if they match. 1367 ValueNumberMappingA.clear(); 1368 ValueNumberMappingB.clear(); 1369 SameStructure = IRSimilarityCandidate::compareStructure( 1370 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); 1371 if (!SameStructure) 1372 continue; 1373 1374 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA, 1375 ValueNumberMappingB); 1376 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1377 CurrentGroupPair->second.push_back(*InnerCandIt); 1378 } 1379 } 1380 } 1381 1382 void IRSimilarityIdentifier::findCandidates( 1383 std::vector<IRInstructionData *> &InstrList, 1384 std::vector<unsigned> &IntegerMapping) { 1385 SuffixTree ST(IntegerMapping); 1386 1387 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 1388 std::vector<SimilarityGroup> NewCandidateGroups; 1389 1390 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 1391 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand; 1392 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1393 1394 // Iterate over the subsequences found by the Suffix Tree to create 1395 // IRSimilarityCandidates for each repeated subsequence and determine which 1396 // instances are structurally similar to one another. 1397 1398 // Sort the suffix tree from longest substring to shortest. 1399 std::vector<SuffixTree::RepeatedSubstring> RSes; 1400 for (SuffixTree::RepeatedSubstring &RS : ST) 1401 RSes.push_back(RS); 1402 1403 llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, 1404 const SuffixTree::RepeatedSubstring &RHS) { 1405 return LHS.Length > RHS.Length; 1406 }); 1407 for (SuffixTree::RepeatedSubstring &RS : RSes) { 1408 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 1409 CandsForRepSubstring); 1410 1411 if (CandsForRepSubstring.size() < 2) 1412 continue; 1413 1414 findCandidateStructures(CandsForRepSubstring, StructuralGroups, 1415 IndexToIncludedCand, CandToGroup); 1416 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) { 1417 // We only add the group if it contains more than one 1418 // IRSimilarityCandidate. If there is only one, that means there is no 1419 // other repeated subsequence with the same structure. 1420 if (Group.second.size() > 1) { 1421 SimilarityCandidates->push_back(Group.second); 1422 // Iterate over each candidate in the group, and add an entry for each 1423 // instruction included with a mapping to a set of 1424 // IRSimilarityCandidates that include that instruction. 1425 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { 1426 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); 1427 Idx <= Edx; ++Idx) { 1428 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator 1429 IdIt; 1430 IdIt = IndexToIncludedCand.find(Idx); 1431 bool Inserted = false; 1432 if (IdIt == IndexToIncludedCand.end()) 1433 std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( 1434 std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>())); 1435 IdIt->second.insert(&IRCand); 1436 } 1437 // Add mapping of candidate to the overall similarity group number. 1438 CandToGroup.insert( 1439 std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); 1440 } 1441 } 1442 } 1443 1444 CandsForRepSubstring.clear(); 1445 StructuralGroups.clear(); 1446 NewCandidateGroups.clear(); 1447 } 1448 } 1449 1450 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 1451 ArrayRef<std::unique_ptr<Module>> Modules) { 1452 resetSimilarityCandidates(); 1453 1454 std::vector<IRInstructionData *> InstrList; 1455 std::vector<unsigned> IntegerMapping; 1456 Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1457 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1458 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1459 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; 1460 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; 1461 1462 populateMapper(Modules, InstrList, IntegerMapping); 1463 findCandidates(InstrList, IntegerMapping); 1464 1465 return *SimilarityCandidates; 1466 } 1467 1468 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 1469 resetSimilarityCandidates(); 1470 Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1471 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1472 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1473 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; 1474 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; 1475 1476 std::vector<IRInstructionData *> InstrList; 1477 std::vector<unsigned> IntegerMapping; 1478 1479 populateMapper(M, InstrList, IntegerMapping); 1480 findCandidates(InstrList, IntegerMapping); 1481 1482 return *SimilarityCandidates; 1483 } 1484 1485 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 1486 "ir-similarity-identifier", false, true) 1487 1488 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 1489 : ModulePass(ID) { 1490 initializeIRSimilarityIdentifierWrapperPassPass( 1491 *PassRegistry::getPassRegistry()); 1492 } 1493 1494 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 1495 IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1496 MatchCallsByName, !DisableIntrinsics, 1497 false)); 1498 return false; 1499 } 1500 1501 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 1502 IRSI.reset(); 1503 return false; 1504 } 1505 1506 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 1507 IRSI->findSimilarity(M); 1508 return false; 1509 } 1510 1511 AnalysisKey IRSimilarityAnalysis::Key; 1512 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 1513 ModuleAnalysisManager &) { 1514 auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1515 MatchCallsByName, !DisableIntrinsics, 1516 false); 1517 IRSI.findSimilarity(M); 1518 return IRSI; 1519 } 1520 1521 PreservedAnalyses 1522 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 1523 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 1524 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt = 1525 IRSI.getSimilarity(); 1526 1527 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 1528 OS << CandVec.size() << " candidates of length " 1529 << CandVec.begin()->getLength() << ". Found in: \n"; 1530 for (IRSimilarityCandidate &Cand : CandVec) { 1531 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 1532 << ", Basic Block: "; 1533 if (Cand.front()->Inst->getParent()->getName().str() == "") 1534 OS << "(unnamed)"; 1535 else 1536 OS << Cand.front()->Inst->getParent()->getName().str(); 1537 OS << "\n Start Instruction: "; 1538 Cand.frontInstruction()->print(OS); 1539 OS << "\n End Instruction: "; 1540 Cand.backInstruction()->print(OS); 1541 OS << "\n"; 1542 } 1543 } 1544 1545 return PreservedAnalyses::all(); 1546 } 1547 1548 char IRSimilarityIdentifierWrapperPass::ID = 0; 1549