1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/Operator.h" 19 #include "llvm/IR/User.h" 20 #include "llvm/InitializePasses.h" 21 #include "llvm/Support/SuffixTree.h" 22 23 using namespace llvm; 24 using namespace IRSimilarity; 25 26 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 27 IRInstructionDataList &IDList) 28 : Inst(&I), Legal(Legality), IDL(&IDList) { 29 // We check for whether we have a comparison instruction. If it is, we 30 // find the "less than" version of the predicate for consistency for 31 // comparison instructions throught the program. 32 if (CmpInst *C = dyn_cast<CmpInst>(&I)) { 33 CmpInst::Predicate Predicate = predicateForConsistency(C); 34 if (Predicate != C->getPredicate()) 35 RevisedPredicate = Predicate; 36 } 37 38 // Here we collect the operands and their types for determining whether 39 // the structure of the operand use matches between two different candidates. 40 for (Use &OI : I.operands()) { 41 if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { 42 // If we have a CmpInst where the predicate is reversed, it means the 43 // operands must be reversed as well. 44 OperVals.insert(OperVals.begin(), OI.get()); 45 continue; 46 } 47 48 OperVals.push_back(OI.get()); 49 } 50 } 51 52 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 53 switch (CI->getPredicate()) { 54 case CmpInst::FCMP_OGT: 55 case CmpInst::FCMP_UGT: 56 case CmpInst::FCMP_OGE: 57 case CmpInst::FCMP_UGE: 58 case CmpInst::ICMP_SGT: 59 case CmpInst::ICMP_UGT: 60 case CmpInst::ICMP_SGE: 61 case CmpInst::ICMP_UGE: 62 return CI->getSwappedPredicate(); 63 default: 64 return CI->getPredicate(); 65 } 66 } 67 68 CmpInst::Predicate IRInstructionData::getPredicate() const { 69 assert(isa<CmpInst>(Inst) && 70 "Can only get a predicate from a compare instruction"); 71 72 if (RevisedPredicate.hasValue()) 73 return RevisedPredicate.getValue(); 74 75 return cast<CmpInst>(Inst)->getPredicate(); 76 } 77 78 static StringRef getCalledFunctionName(CallInst &CI) { 79 assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); 80 81 return CI.getCalledFunction()->getName(); 82 } 83 84 bool IRSimilarity::isClose(const IRInstructionData &A, 85 const IRInstructionData &B) { 86 87 if (!A.Legal || !B.Legal) 88 return false; 89 90 // Check if we are performing the same sort of operation on the same types 91 // but not on the same values. 92 if (!A.Inst->isSameOperationAs(B.Inst)) { 93 // If there is a predicate, this means that either there is a swapped 94 // predicate, or that the types are different, we want to make sure that 95 // the predicates are equivalent via swapping. 96 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 97 98 if (A.getPredicate() != B.getPredicate()) 99 return false; 100 101 // If the predicates are the same via swap, make sure that the types are 102 // still the same. 103 auto ZippedTypes = zip(A.OperVals, B.OperVals); 104 105 return all_of( 106 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 107 return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 108 }); 109 } 110 111 return false; 112 } 113 114 // Since any GEP Instruction operands after the first operand cannot be 115 // defined by a register, we must make sure that the operands after the first 116 // are the same in the two instructions 117 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 118 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 119 120 // If the instructions do not have the same inbounds restrictions, we do 121 // not consider them the same. 122 if (GEP->isInBounds() != OtherGEP->isInBounds()) 123 return false; 124 125 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 126 127 // We increment here since we do not care about the first instruction, 128 // we only care about the following operands since they must be the 129 // exact same to be considered similar. 130 return all_of(drop_begin(ZippedOperands), 131 [](std::tuple<llvm::Use &, llvm::Use &> R) { 132 return std::get<0>(R) == std::get<1>(R); 133 }); 134 } 135 136 // If the instructions are functions, we make sure that the function name is 137 // the same. We already know that the types are since is isSameOperationAs is 138 // true. 139 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 140 CallInst *CIA = cast<CallInst>(A.Inst); 141 CallInst *CIB = cast<CallInst>(B.Inst); 142 if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) 143 return false; 144 } 145 146 return true; 147 } 148 149 // TODO: This is the same as the MachineOutliner, and should be consolidated 150 // into the same interface. 151 void IRInstructionMapper::convertToUnsignedVec( 152 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 153 std::vector<unsigned> &IntegerMapping) { 154 BasicBlock::iterator It = BB.begin(); 155 156 std::vector<unsigned> IntegerMappingForBB; 157 std::vector<IRInstructionData *> InstrListForBB; 158 159 HaveLegalRange = false; 160 CanCombineWithPrevInstr = false; 161 AddedIllegalLastTime = true; 162 163 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 164 switch (InstClassifier.visit(*It)) { 165 case InstrType::Legal: 166 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 167 break; 168 case InstrType::Illegal: 169 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 170 break; 171 case InstrType::Invisible: 172 AddedIllegalLastTime = false; 173 break; 174 } 175 } 176 177 if (HaveLegalRange) { 178 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 179 for (IRInstructionData *ID : InstrListForBB) 180 this->IDL->push_back(*ID); 181 llvm::append_range(InstrList, InstrListForBB); 182 llvm::append_range(IntegerMapping, IntegerMappingForBB); 183 } 184 } 185 186 // TODO: This is the same as the MachineOutliner, and should be consolidated 187 // into the same interface. 188 unsigned IRInstructionMapper::mapToLegalUnsigned( 189 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 190 std::vector<IRInstructionData *> &InstrListForBB) { 191 // We added something legal, so we should unset the AddedLegalLastTime 192 // flag. 193 AddedIllegalLastTime = false; 194 195 // If we have at least two adjacent legal instructions (which may have 196 // invisible instructions in between), remember that. 197 if (CanCombineWithPrevInstr) 198 HaveLegalRange = true; 199 CanCombineWithPrevInstr = true; 200 201 // Get the integer for this instruction or give it the current 202 // LegalInstrNumber. 203 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 204 InstrListForBB.push_back(ID); 205 206 // Add to the instruction list 207 bool WasInserted; 208 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 209 ResultIt; 210 std::tie(ResultIt, WasInserted) = 211 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 212 unsigned INumber = ResultIt->second; 213 214 // There was an insertion. 215 if (WasInserted) 216 LegalInstrNumber++; 217 218 IntegerMappingForBB.push_back(INumber); 219 220 // Make sure we don't overflow or use any integers reserved by the DenseMap. 221 assert(LegalInstrNumber < IllegalInstrNumber && 222 "Instruction mapping overflow!"); 223 224 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 225 "Tried to assign DenseMap tombstone or empty key to instruction."); 226 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 227 "Tried to assign DenseMap tombstone or empty key to instruction."); 228 229 return INumber; 230 } 231 232 IRInstructionData * 233 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 234 IRInstructionDataList &IDL) { 235 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 236 } 237 238 IRInstructionDataList * 239 IRInstructionMapper::allocateIRInstructionDataList() { 240 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 241 } 242 243 // TODO: This is the same as the MachineOutliner, and should be consolidated 244 // into the same interface. 245 unsigned IRInstructionMapper::mapToIllegalUnsigned( 246 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 247 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 248 // Can't combine an illegal instruction. Set the flag. 249 CanCombineWithPrevInstr = false; 250 251 // Only add one illegal number per range of legal numbers. 252 if (AddedIllegalLastTime) 253 return IllegalInstrNumber; 254 255 IRInstructionData *ID = nullptr; 256 if (!End) 257 ID = allocateIRInstructionData(*It, false, *IDL); 258 InstrListForBB.push_back(ID); 259 260 // Remember that we added an illegal number last time. 261 AddedIllegalLastTime = true; 262 unsigned INumber = IllegalInstrNumber; 263 IntegerMappingForBB.push_back(IllegalInstrNumber--); 264 265 assert(LegalInstrNumber < IllegalInstrNumber && 266 "Instruction mapping overflow!"); 267 268 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 269 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 270 271 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 272 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 273 274 return INumber; 275 } 276 277 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 278 IRInstructionData *FirstInstIt, 279 IRInstructionData *LastInstIt) 280 : StartIdx(StartIdx), Len(Len) { 281 282 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 283 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 284 assert(StartIdx + Len > StartIdx && 285 "Overflow for IRSimilarityCandidate range?"); 286 assert(Len - 1 == static_cast<unsigned>(std::distance( 287 iterator(FirstInstIt), iterator(LastInstIt))) && 288 "Length of the first and last IRInstructionData do not match the " 289 "given length"); 290 291 // We iterate over the given instructions, and map each unique value 292 // to a unique number in the IRSimilarityCandidate ValueToNumber and 293 // NumberToValue maps. A constant get its own value globally, the individual 294 // uses of the constants are not considered to be unique. 295 // 296 // IR: Mapping Added: 297 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 298 // %add2 = add i32 %a, %1 %add2 -> 4 299 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 300 // 301 // when replace with global values, starting from 1, would be 302 // 303 // 3 = add i32 1, 2 304 // 4 = add i32 1, 3 305 // 6 = add i32 5, 2 306 unsigned LocalValNumber = 1; 307 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 308 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 309 // Map the operand values to an unsigned integer if it does not already 310 // have an unsigned integer assigned to it. 311 for (Value *Arg : ID->OperVals) 312 if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 313 ValueToNumber.try_emplace(Arg, LocalValNumber); 314 NumberToValue.try_emplace(LocalValNumber, Arg); 315 LocalValNumber++; 316 } 317 318 // Mapping the instructions to an unsigned integer if it is not already 319 // exist in the mapping. 320 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 321 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 322 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 323 LocalValNumber++; 324 } 325 } 326 327 // Setting the first and last instruction data pointers for the candidate. If 328 // we got through the entire for loop without hitting an assert, we know 329 // that both of these instructions are not nullptrs. 330 FirstInst = FirstInstIt; 331 LastInst = LastInstIt; 332 } 333 334 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 335 const IRSimilarityCandidate &B) { 336 if (A.getLength() != B.getLength()) 337 return false; 338 339 auto InstrDataForBoth = 340 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 341 342 return all_of(InstrDataForBoth, 343 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 344 IRInstructionData &A = std::get<0>(R); 345 IRInstructionData &B = std::get<1>(R); 346 if (!A.Legal || !B.Legal) 347 return false; 348 return isClose(A, B); 349 }); 350 } 351 352 /// Determine if one or more of the assigned global value numbers for the 353 /// operands in \p TargetValueNumbers is in the current mapping set for operand 354 /// numbers in \p SourceOperands. The set of possible corresponding global 355 /// value numbers are replaced with the most recent version of compatible 356 /// values. 357 /// 358 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 359 /// value number for the source IRInstructionCandidate. 360 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 361 /// IRSimilarityCandidate global value numbers to a set of possible numbers in 362 /// the target. 363 /// \param [in] SourceOperands - The operands in the original 364 /// IRSimilarityCandidate in the current instruction. 365 /// \param [in] TargetValueNumbers - The global value numbers of the operands in 366 /// the corresponding Instruction in the other IRSimilarityCandidate. 367 /// \returns true if there exists a possible mapping between the source 368 /// Instruction operands and the target Instruction operands, and false if not. 369 static bool checkNumberingAndReplaceCommutative( 370 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 371 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 372 ArrayRef<Value *> &SourceOperands, 373 DenseSet<unsigned> &TargetValueNumbers){ 374 375 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 376 377 unsigned ArgVal; 378 bool WasInserted; 379 380 // Iterate over the operands in the source IRSimilarityCandidate to determine 381 // whether there exists an operand in the other IRSimilarityCandidate that 382 // creates a valid mapping of Value to Value between the 383 // IRSimilarityCaniddates. 384 for (Value *V : SourceOperands) { 385 ArgVal = SourceValueToNumberMapping.find(V)->second; 386 387 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 388 std::make_pair(ArgVal, TargetValueNumbers)); 389 390 // Instead of finding a current mapping, we inserted a set. This means a 391 // mapping did not exist for the source Instruction operand, it has no 392 // current constraints we need to check. 393 if (WasInserted) 394 continue; 395 396 // If a mapping already exists for the source operand to the values in the 397 // other IRSimilarityCandidate we need to iterate over the items in other 398 // IRSimilarityCandidate's Instruction to determine whether there is a valid 399 // mapping of Value to Value. 400 DenseSet<unsigned> NewSet; 401 for (unsigned &Curr : ValueMappingIt->second) 402 // If we can find the value in the mapping, we add it to the new set. 403 if (TargetValueNumbers.contains(Curr)) 404 NewSet.insert(Curr); 405 406 // If we could not find a Value, return 0. 407 if (NewSet.empty()) 408 return false; 409 410 // Otherwise replace the old mapping with the newly constructed one. 411 if (NewSet.size() != ValueMappingIt->second.size()) 412 ValueMappingIt->second.swap(NewSet); 413 414 // We have reached no conclusions about the mapping, and cannot remove 415 // any items from the other operands, so we move to check the next operand. 416 if (ValueMappingIt->second.size() != 1) 417 continue; 418 419 420 unsigned ValToRemove = *ValueMappingIt->second.begin(); 421 // When there is only one item left in the mapping for and operand, remove 422 // the value from the other operands. If it results in there being no 423 // mapping, return false, it means the mapping is wrong 424 for (Value *InnerV : SourceOperands) { 425 if (V == InnerV) 426 continue; 427 428 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 429 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 430 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 431 continue; 432 433 ValueMappingIt->second.erase(ValToRemove); 434 if (ValueMappingIt->second.empty()) 435 return false; 436 } 437 } 438 439 return true; 440 } 441 442 /// Determine if operand number \p TargetArgVal is in the current mapping set 443 /// for operand number \p SourceArgVal. 444 /// 445 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 446 /// value numbers from source IRSimilarityCandidate to target 447 /// IRSimilarityCandidate. 448 /// \param [in] SourceArgVal The global value number for an operand in the 449 /// in the original candidate. 450 /// \param [in] TargetArgVal The global value number for the corresponding 451 /// operand in the other candidate. 452 /// \returns True if there exists a mapping and false if not. 453 bool checkNumberingAndReplace( 454 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 455 unsigned SourceArgVal, unsigned TargetArgVal) { 456 // We are given two unsigned integers representing the global values of 457 // the operands in different IRSimilarityCandidates and a current mapping 458 // between the two. 459 // 460 // Source Operand GVN: 1 461 // Target Operand GVN: 2 462 // CurrentMapping: {1: {1, 2}} 463 // 464 // Since we have mapping, and the target operand is contained in the set, we 465 // update it to: 466 // CurrentMapping: {1: {2}} 467 // and can return true. But, if the mapping was 468 // CurrentMapping: {1: {3}} 469 // we would return false. 470 471 bool WasInserted; 472 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 473 474 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 475 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 476 477 // If we created a new mapping, then we are done. 478 if (WasInserted) 479 return true; 480 481 // If there is more than one option in the mapping set, and the target value 482 // is included in the mapping set replace that set with one that only includes 483 // the target value, as it is the only valid mapping via the non commutative 484 // instruction. 485 486 DenseSet<unsigned> &TargetSet = Val->second; 487 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 488 TargetSet.clear(); 489 TargetSet.insert(TargetArgVal); 490 return true; 491 } 492 493 // Return true if we can find the value in the set. 494 return TargetSet.contains(TargetArgVal); 495 } 496 497 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 498 OperandMapping A, OperandMapping B) { 499 // Iterators to keep track of where we are in the operands for each 500 // Instruction. 501 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 502 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 503 unsigned OperandLength = A.OperVals.size(); 504 505 // For each operand, get the value numbering and ensure it is consistent. 506 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 507 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 508 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 509 510 // Attempt to add a set with only the target value. If there is no mapping 511 // we can create it here. 512 // 513 // For an instruction like a subtraction: 514 // IRSimilarityCandidateA: IRSimilarityCandidateB: 515 // %resultA = sub %a, %b %resultB = sub %d, %e 516 // 517 // We map %a -> %d and %b -> %e. 518 // 519 // And check to see whether their mapping is consistent in 520 // checkNumberingAndReplace. 521 522 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 523 return false; 524 525 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 526 return false; 527 } 528 return true; 529 } 530 531 bool IRSimilarityCandidate::compareCommutativeOperandMapping( 532 OperandMapping A, OperandMapping B) { 533 DenseSet<unsigned> ValueNumbersA; 534 DenseSet<unsigned> ValueNumbersB; 535 536 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 537 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 538 unsigned OperandLength = A.OperVals.size(); 539 540 // Find the value number sets for the operands. 541 for (unsigned Idx = 0; Idx < OperandLength; 542 Idx++, VItA++, VItB++) { 543 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 544 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 545 } 546 547 // Iterate over the operands in the first IRSimilarityCandidate and make sure 548 // there exists a possible mapping with the operands in the second 549 // IRSimilarityCandidate. 550 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 551 A.ValueNumberMapping, A.OperVals, 552 ValueNumbersB)) 553 return false; 554 555 // Iterate over the operands in the second IRSimilarityCandidate and make sure 556 // there exists a possible mapping with the operands in the first 557 // IRSimilarityCandidate. 558 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 559 B.ValueNumberMapping, B.OperVals, 560 ValueNumbersA)) 561 return false; 562 563 return true; 564 } 565 566 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 567 const IRSimilarityCandidate &B) { 568 if (A.getLength() != B.getLength()) 569 return false; 570 571 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 572 return false; 573 574 iterator ItA = A.begin(); 575 iterator ItB = B.begin(); 576 577 // These sets create a create a mapping between the values in one candidate 578 // to values in the other candidate. If we create a set with one element, 579 // and that same element maps to the original element in the candidate 580 // we have a good mapping. 581 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 582 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 583 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 584 585 bool WasInserted; 586 587 // Iterate over the instructions contained in each candidate 588 unsigned SectionLength = A.getStartIdx() + A.getLength(); 589 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 590 ItA++, ItB++, Loc++) { 591 // Make sure the instructions are similar to one another. 592 if (!isClose(*ItA, *ItB)) 593 return false; 594 595 Instruction *IA = ItA->Inst; 596 Instruction *IB = ItB->Inst; 597 598 if (!ItA->Legal || !ItB->Legal) 599 return false; 600 601 // Get the operand sets for the instructions. 602 ArrayRef<Value *> OperValsA = ItA->OperVals; 603 ArrayRef<Value *> OperValsB = ItB->OperVals; 604 605 unsigned InstValA = A.ValueToNumber.find(IA)->second; 606 unsigned InstValB = B.ValueToNumber.find(IB)->second; 607 608 // Ensure that the mappings for the instructions exists. 609 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 610 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 611 if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 612 return false; 613 614 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 615 std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 616 if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 617 return false; 618 619 // We have different paths for commutative instructions and non-commutative 620 // instructions since commutative instructions could allow multiple mappings 621 // to certain values. 622 if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 623 if (!compareCommutativeOperandMapping( 624 {A, OperValsA, ValueNumberMappingA}, 625 {B, OperValsB, ValueNumberMappingB})) 626 return false; 627 continue; 628 } 629 630 // Handle the non-commutative cases. 631 if (!compareNonCommutativeOperandMapping( 632 {A, OperValsA, ValueNumberMappingA}, 633 {B, OperValsB, ValueNumberMappingB})) 634 return false; 635 } 636 return true; 637 } 638 639 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 640 const IRSimilarityCandidate &B) { 641 auto DoesOverlap = [](const IRSimilarityCandidate &X, 642 const IRSimilarityCandidate &Y) { 643 // Check: 644 // XXXXXX X starts before Y ends 645 // YYYYYYY Y starts after X starts 646 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 647 }; 648 649 return DoesOverlap(A, B) || DoesOverlap(B, A); 650 } 651 652 void IRSimilarityIdentifier::populateMapper( 653 Module &M, std::vector<IRInstructionData *> &InstrList, 654 std::vector<unsigned> &IntegerMapping) { 655 656 std::vector<IRInstructionData *> InstrListForModule; 657 std::vector<unsigned> IntegerMappingForModule; 658 // Iterate over the functions in the module to map each Instruction in each 659 // BasicBlock to an unsigned integer. 660 for (Function &F : M) { 661 662 if (F.empty()) 663 continue; 664 665 for (BasicBlock &BB : F) { 666 667 if (BB.sizeWithoutDebug() < 2) 668 continue; 669 670 // BB has potential to have similarity since it has a size greater than 2 671 // and can therefore match other regions greater than 2. Map it to a list 672 // of unsigned integers. 673 Mapper.convertToUnsignedVec(BB, InstrListForModule, 674 IntegerMappingForModule); 675 } 676 } 677 678 // Insert the InstrListForModule at the end of the overall InstrList so that 679 // we can have a long InstrList for the entire set of Modules being analyzed. 680 llvm::append_range(InstrList, InstrListForModule); 681 // Do the same as above, but for IntegerMapping. 682 llvm::append_range(IntegerMapping, IntegerMappingForModule); 683 } 684 685 void IRSimilarityIdentifier::populateMapper( 686 ArrayRef<std::unique_ptr<Module>> &Modules, 687 std::vector<IRInstructionData *> &InstrList, 688 std::vector<unsigned> &IntegerMapping) { 689 690 // Iterate over, and map the instructions in each module. 691 for (const std::unique_ptr<Module> &M : Modules) 692 populateMapper(*M, InstrList, IntegerMapping); 693 } 694 695 /// From a repeated subsequence, find all the different instances of the 696 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 697 /// the IRInstructionData in subsequence. 698 /// 699 /// \param [in] Mapper - The instruction mapper for sanity checks. 700 /// \param [in] InstrList - The vector that holds the instruction data. 701 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 702 /// \param [out] CandsForRepSubstring - The vector to store the generated 703 /// IRSimilarityCandidates. 704 static void createCandidatesFromSuffixTree( 705 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 706 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 707 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 708 709 unsigned StringLen = RS.Length; 710 711 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 712 for (const unsigned &StartIdx : RS.StartIndices) { 713 unsigned EndIdx = StartIdx + StringLen - 1; 714 715 // Check that this subsequence does not contain an illegal instruction. 716 bool ContainsIllegal = false; 717 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 718 unsigned Key = IntegerMapping[CurrIdx]; 719 if (Key > Mapper.IllegalInstrNumber) { 720 ContainsIllegal = true; 721 break; 722 } 723 } 724 725 // If we have an illegal instruction, we should not create an 726 // IRSimilarityCandidate for this region. 727 if (ContainsIllegal) 728 continue; 729 730 // We are getting iterators to the instructions in this region of code 731 // by advancing the start and end indices from the start of the 732 // InstrList. 733 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 734 std::advance(StartIt, StartIdx); 735 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 736 std::advance(EndIt, EndIdx); 737 738 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 739 } 740 } 741 742 /// From the list of IRSimilarityCandidates, perform a comparison between each 743 /// IRSimilarityCandidate to determine if there are overlapping 744 /// IRInstructionData, or if they do not have the same structure. 745 /// 746 /// \param [in] CandsForRepSubstring - The vector containing the 747 /// IRSimilarityCandidates. 748 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 749 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 750 /// vector are structurally similar to one another. 751 static void findCandidateStructures( 752 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 753 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 754 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 755 InnerCandEndIt; 756 757 // IRSimilarityCandidates each have a structure for operand use. It is 758 // possible that two instances of the same subsequences have different 759 // structure. Each type of structure found is assigned a number. This 760 // DenseMap maps an IRSimilarityCandidate to which type of similarity 761 // discovered it fits within. 762 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 763 764 // Find the compatibility from each candidate to the others to determine 765 // which candidates overlap and which have the same structure by mapping 766 // each structure to a different group. 767 bool SameStructure; 768 bool Inserted; 769 unsigned CurrentGroupNum = 0; 770 unsigned OuterGroupNum; 771 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 772 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 773 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 774 775 // Iterate over the candidates to determine its structural and overlapping 776 // compatibility with other instructions 777 for (CandIt = CandsForRepSubstring.begin(), 778 CandEndIt = CandsForRepSubstring.end(); 779 CandIt != CandEndIt; CandIt++) { 780 781 // Determine if it has an assigned structural group already. 782 CandToGroupIt = CandToGroup.find(&*CandIt); 783 if (CandToGroupIt == CandToGroup.end()) { 784 // If not, we assign it one, and add it to our mapping. 785 std::tie(CandToGroupIt, Inserted) = 786 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 787 } 788 789 // Get the structural group number from the iterator. 790 OuterGroupNum = CandToGroupIt->second; 791 792 // Check if we already have a list of IRSimilarityCandidates for the current 793 // structural group. Create one if one does not exist. 794 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 795 if (CurrentGroupPair == StructuralGroups.end()) 796 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 797 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 798 799 // Iterate over the IRSimilarityCandidates following the current 800 // IRSimilarityCandidate in the list to determine whether the two 801 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 802 // of IRSimilarityCandidates. 803 for (InnerCandIt = std::next(CandIt), 804 InnerCandEndIt = CandsForRepSubstring.end(); 805 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 806 807 // We check if the inner item has a group already, if it does, we skip it. 808 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 809 if (CandToGroupItInner != CandToGroup.end()) 810 continue; 811 812 // Otherwise we determine if they have the same structure and add it to 813 // vector if they match. 814 SameStructure = 815 IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); 816 if (!SameStructure) 817 continue; 818 819 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 820 CurrentGroupPair->second.push_back(*InnerCandIt); 821 } 822 } 823 } 824 825 void IRSimilarityIdentifier::findCandidates( 826 std::vector<IRInstructionData *> &InstrList, 827 std::vector<unsigned> &IntegerMapping) { 828 SuffixTree ST(IntegerMapping); 829 830 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 831 std::vector<SimilarityGroup> NewCandidateGroups; 832 833 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 834 835 // Iterate over the subsequences found by the Suffix Tree to create 836 // IRSimilarityCandidates for each repeated subsequence and determine which 837 // instances are structurally similar to one another. 838 for (SuffixTree::RepeatedSubstring &RS : ST) { 839 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 840 CandsForRepSubstring); 841 842 if (CandsForRepSubstring.size() < 2) 843 continue; 844 845 findCandidateStructures(CandsForRepSubstring, StructuralGroups); 846 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 847 // We only add the group if it contains more than one 848 // IRSimilarityCandidate. If there is only one, that means there is no 849 // other repeated subsequence with the same structure. 850 if (Group.second.size() > 1) 851 SimilarityCandidates->push_back(Group.second); 852 853 CandsForRepSubstring.clear(); 854 StructuralGroups.clear(); 855 NewCandidateGroups.clear(); 856 } 857 } 858 859 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 860 ArrayRef<std::unique_ptr<Module>> Modules) { 861 resetSimilarityCandidates(); 862 863 std::vector<IRInstructionData *> InstrList; 864 std::vector<unsigned> IntegerMapping; 865 866 populateMapper(Modules, InstrList, IntegerMapping); 867 findCandidates(InstrList, IntegerMapping); 868 869 return SimilarityCandidates.getValue(); 870 } 871 872 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 873 resetSimilarityCandidates(); 874 875 std::vector<IRInstructionData *> InstrList; 876 std::vector<unsigned> IntegerMapping; 877 878 populateMapper(M, InstrList, IntegerMapping); 879 findCandidates(InstrList, IntegerMapping); 880 881 return SimilarityCandidates.getValue(); 882 } 883 884 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 885 "ir-similarity-identifier", false, true) 886 887 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 888 : ModulePass(ID) { 889 initializeIRSimilarityIdentifierWrapperPassPass( 890 *PassRegistry::getPassRegistry()); 891 } 892 893 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 894 IRSI.reset(new IRSimilarityIdentifier()); 895 return false; 896 } 897 898 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 899 IRSI.reset(); 900 return false; 901 } 902 903 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 904 IRSI->findSimilarity(M); 905 return false; 906 } 907 908 AnalysisKey IRSimilarityAnalysis::Key; 909 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 910 ModuleAnalysisManager &) { 911 912 auto IRSI = IRSimilarityIdentifier(); 913 IRSI.findSimilarity(M); 914 return IRSI; 915 } 916 917 PreservedAnalyses 918 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 919 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 920 Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 921 922 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 923 OS << CandVec.size() << " candidates of length " 924 << CandVec.begin()->getLength() << ". Found in: \n"; 925 for (IRSimilarityCandidate &Cand : CandVec) { 926 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 927 << ", Basic Block: "; 928 if (Cand.front()->Inst->getParent()->getName().str() == "") 929 OS << "(unnamed)"; 930 else 931 OS << Cand.front()->Inst->getParent()->getName().str(); 932 OS << "\n Start Instruction: "; 933 Cand.frontInstruction()->print(OS); 934 OS << "\n End Instruction: "; 935 Cand.backInstruction()->print(OS); 936 OS << "\n"; 937 } 938 } 939 940 return PreservedAnalyses::all(); 941 } 942 943 char IRSimilarityIdentifierWrapperPass::ID = 0; 944