1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines the implementation of the LLVM difference 10 // engine, which structurally compares global values within a module. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "DifferenceEngine.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallString.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/CFG.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/Instructions.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/Support/type_traits.h" 29 #include <utility> 30 31 using namespace llvm; 32 33 namespace { 34 35 /// A priority queue, implemented as a heap. 36 template <class T, class Sorter, unsigned InlineCapacity> 37 class PriorityQueue { 38 Sorter Precedes; 39 llvm::SmallVector<T, InlineCapacity> Storage; 40 41 public: 42 PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} 43 44 /// Checks whether the heap is empty. 45 bool empty() const { return Storage.empty(); } 46 47 /// Insert a new value on the heap. 48 void insert(const T &V) { 49 unsigned Index = Storage.size(); 50 Storage.push_back(V); 51 if (Index == 0) return; 52 53 T *data = Storage.data(); 54 while (true) { 55 unsigned Target = (Index + 1) / 2 - 1; 56 if (!Precedes(data[Index], data[Target])) return; 57 std::swap(data[Index], data[Target]); 58 if (Target == 0) return; 59 Index = Target; 60 } 61 } 62 63 /// Remove the minimum value in the heap. Only valid on a non-empty heap. 64 T remove_min() { 65 assert(!empty()); 66 T tmp = Storage[0]; 67 68 unsigned NewSize = Storage.size() - 1; 69 if (NewSize) { 70 // Move the slot at the end to the beginning. 71 if (std::is_trivially_copyable<T>::value) 72 Storage[0] = Storage[NewSize]; 73 else 74 std::swap(Storage[0], Storage[NewSize]); 75 76 // Bubble the root up as necessary. 77 unsigned Index = 0; 78 while (true) { 79 // With a 1-based index, the children would be Index*2 and Index*2+1. 80 unsigned R = (Index + 1) * 2; 81 unsigned L = R - 1; 82 83 // If R is out of bounds, we're done after this in any case. 84 if (R >= NewSize) { 85 // If L is also out of bounds, we're done immediately. 86 if (L >= NewSize) break; 87 88 // Otherwise, test whether we should swap L and Index. 89 if (Precedes(Storage[L], Storage[Index])) 90 std::swap(Storage[L], Storage[Index]); 91 break; 92 } 93 94 // Otherwise, we need to compare with the smaller of L and R. 95 // Prefer R because it's closer to the end of the array. 96 unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); 97 98 // If Index is >= the min of L and R, then heap ordering is restored. 99 if (!Precedes(Storage[IndexToTest], Storage[Index])) 100 break; 101 102 // Otherwise, keep bubbling up. 103 std::swap(Storage[IndexToTest], Storage[Index]); 104 Index = IndexToTest; 105 } 106 } 107 Storage.pop_back(); 108 109 return tmp; 110 } 111 }; 112 113 /// A function-scope difference engine. 114 class FunctionDifferenceEngine { 115 DifferenceEngine &Engine; 116 117 // Some initializers may reference the variable we're currently checking. This 118 // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent 119 // recursing. 120 const Value *SavedLHS; 121 const Value *SavedRHS; 122 123 // The current mapping from old local values to new local values. 124 DenseMap<const Value *, const Value *> Values; 125 126 // The current mapping from old blocks to new blocks. 127 DenseMap<const BasicBlock *, const BasicBlock *> Blocks; 128 129 // The tentative mapping from old local values while comparing a pair of 130 // basic blocks. Once the pair has been processed, the tentative mapping is 131 // committed to the Values map. 132 DenseSet<std::pair<const Value *, const Value *>> TentativeValues; 133 134 // Equivalence Assumptions 135 // 136 // For basic blocks in loops, some values in phi nodes may depend on 137 // values from not yet processed basic blocks in the loop. When encountering 138 // such values, we optimistically asssume their equivalence and store this 139 // assumption in a BlockDiffCandidate for the pair of compared BBs. 140 // 141 // Once we have diffed all BBs, for every BlockDiffCandidate, we check all 142 // stored assumptions using the Values map that stores proven equivalences 143 // between the old and new values, and report a diff if an assumption cannot 144 // be proven to be true. 145 // 146 // Note that after having made an assumption, all further determined 147 // equivalences implicitly depend on that assumption. These will not be 148 // reverted or reported if the assumption proves to be false, because these 149 // are considered indirect diffs caused by earlier direct diffs. 150 // 151 // We aim to avoid false negatives in llvm-diff, that is, ensure that 152 // whenever no diff is reported, the functions are indeed equal. If 153 // assumptions were made, this is not entirely clear, because in principle we 154 // could end up with a circular proof where the proof of equivalence of two 155 // nodes is depending on the assumption of their equivalence. 156 // 157 // To see that assumptions do not add false negatives, note that if we do not 158 // report a diff, this means that there is an equivalence mapping between old 159 // and new values that is consistent with all assumptions made. The circular 160 // dependency that exists on an IR value level does not exist at run time, 161 // because the values selected by the phi nodes must always already have been 162 // computed. Hence, we can prove equivalence of the old and new functions by 163 // considering step-wise parallel execution, and incrementally proving 164 // equivalence of every new computed value. Another way to think about it is 165 // to imagine cloning the loop BBs for every iteration, turning the loops 166 // into (possibly infinite) DAGs, and proving equivalence by induction on the 167 // iteration, using the computed value mapping. 168 169 // The class BlockDiffCandidate stores pairs which either have already been 170 // proven to differ, or pairs whose equivalence depends on assumptions to be 171 // verified later. 172 struct BlockDiffCandidate { 173 const BasicBlock *LBB; 174 const BasicBlock *RBB; 175 // Maps old values to assumed-to-be-equivalent new values 176 SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions; 177 // If set, we already know the blocks differ. 178 bool KnownToDiffer; 179 }; 180 181 // List of block diff candidates in the order found by processing. 182 // We generate reports in this order. 183 // For every LBB, there may only be one corresponding RBB. 184 SmallVector<BlockDiffCandidate> BlockDiffCandidates; 185 // Maps LBB to the index of its BlockDiffCandidate, if existing. 186 DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices; 187 188 // Note: Every LBB must always be queried together with the same RBB. 189 // The returned reference is not permanently valid and should not be stored. 190 BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB, 191 const BasicBlock *RBB) { 192 auto It = BlockDiffCandidateIndices.find(LBB); 193 // Check if LBB already has a diff candidate 194 if (It == BlockDiffCandidateIndices.end()) { 195 // Add new one 196 BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size(); 197 BlockDiffCandidates.push_back( 198 {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false}); 199 return BlockDiffCandidates.back(); 200 } 201 // Use existing one 202 BlockDiffCandidate &Result = BlockDiffCandidates[It->second]; 203 assert(Result.RBB == RBB && "Inconsistent basic block pairing!"); 204 return Result; 205 } 206 207 // Optionally passed to equivalence checker functions, so these can add 208 // assumptions in BlockDiffCandidates. Its presence controls whether 209 // assumptions are generated. 210 struct AssumptionContext { 211 // The two basic blocks that need the two compared values to be equivalent. 212 const BasicBlock *LBB; 213 const BasicBlock *RBB; 214 }; 215 216 unsigned getUnprocPredCount(const BasicBlock *Block) const { 217 return llvm::count_if(predecessors(Block), [&](const BasicBlock *Pred) { 218 return !Blocks.contains(Pred); 219 }); 220 } 221 222 typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair; 223 224 /// A type which sorts a priority queue by the number of unprocessed 225 /// predecessor blocks it has remaining. 226 /// 227 /// This is actually really expensive to calculate. 228 struct QueueSorter { 229 const FunctionDifferenceEngine &fde; 230 explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} 231 232 bool operator()(BlockPair &Old, BlockPair &New) { 233 return fde.getUnprocPredCount(Old.first) 234 < fde.getUnprocPredCount(New.first); 235 } 236 }; 237 238 /// A queue of unified blocks to process. 239 PriorityQueue<BlockPair, QueueSorter, 20> Queue; 240 241 /// Try to unify the given two blocks. Enqueues them for processing 242 /// if they haven't already been processed. 243 /// 244 /// Returns true if there was a problem unifying them. 245 bool tryUnify(const BasicBlock *L, const BasicBlock *R) { 246 const BasicBlock *&Ref = Blocks[L]; 247 248 if (Ref) { 249 if (Ref == R) return false; 250 251 Engine.logf("successor %l cannot be equivalent to %r; " 252 "it's already equivalent to %r") 253 << L << R << Ref; 254 return true; 255 } 256 257 Ref = R; 258 Queue.insert(BlockPair(L, R)); 259 return false; 260 } 261 262 /// Unifies two instructions, given that they're known not to have 263 /// structural differences. 264 void unify(const Instruction *L, const Instruction *R) { 265 DifferenceEngine::Context C(Engine, L, R); 266 267 bool Result = diff(L, R, true, true, true); 268 assert(!Result && "structural differences second time around?"); 269 (void) Result; 270 if (!L->use_empty()) 271 Values[L] = R; 272 } 273 274 void processQueue() { 275 while (!Queue.empty()) { 276 BlockPair Pair = Queue.remove_min(); 277 diff(Pair.first, Pair.second); 278 } 279 } 280 281 void checkAndReportDiffCandidates() { 282 for (BlockDiffCandidate &BDC : BlockDiffCandidates) { 283 284 // Check assumptions 285 for (const auto &[L, R] : BDC.EquivalenceAssumptions) { 286 auto It = Values.find(L); 287 if (It == Values.end() || It->second != R) { 288 BDC.KnownToDiffer = true; 289 break; 290 } 291 } 292 293 // Run block diff if the BBs differ 294 if (BDC.KnownToDiffer) { 295 DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB); 296 runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin()); 297 } 298 } 299 } 300 301 void diff(const BasicBlock *L, const BasicBlock *R) { 302 DifferenceEngine::Context C(Engine, L, R); 303 304 BasicBlock::const_iterator LI = L->begin(), LE = L->end(); 305 BasicBlock::const_iterator RI = R->begin(); 306 307 do { 308 assert(LI != LE && RI != R->end()); 309 const Instruction *LeftI = &*LI, *RightI = &*RI; 310 311 // If the instructions differ, start the more sophisticated diff 312 // algorithm at the start of the block. 313 if (diff(LeftI, RightI, false, false, true)) { 314 TentativeValues.clear(); 315 // Register (L, R) as diffing pair. Note that we could directly emit a 316 // block diff here, but this way we ensure all diffs are emitted in one 317 // consistent order, independent of whether the diffs were detected 318 // immediately or via invalid assumptions. 319 getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true; 320 return; 321 } 322 323 // Otherwise, tentatively unify them. 324 if (!LeftI->use_empty()) 325 TentativeValues.insert(std::make_pair(LeftI, RightI)); 326 327 ++LI; 328 ++RI; 329 } while (LI != LE); // This is sufficient: we can't get equality of 330 // terminators if there are residual instructions. 331 332 // Unify everything in the block, non-tentatively this time. 333 TentativeValues.clear(); 334 for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI) 335 unify(&*LI, &*RI); 336 } 337 338 bool matchForBlockDiff(const Instruction *L, const Instruction *R); 339 void runBlockDiff(BasicBlock::const_iterator LI, 340 BasicBlock::const_iterator RI); 341 342 bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) { 343 // FIXME: call attributes 344 AssumptionContext AC = {L.getParent(), R.getParent()}; 345 if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(), 346 &AC)) { 347 if (Complain) Engine.log("called functions differ"); 348 return true; 349 } 350 if (L.arg_size() != R.arg_size()) { 351 if (Complain) Engine.log("argument counts differ"); 352 return true; 353 } 354 for (unsigned I = 0, E = L.arg_size(); I != E; ++I) 355 if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) { 356 if (Complain) 357 Engine.logf("arguments %l and %r differ") 358 << L.getArgOperand(I) << R.getArgOperand(I); 359 return true; 360 } 361 return false; 362 } 363 364 // If AllowAssumptions is enabled, whenever we encounter a pair of values 365 // that we cannot prove to be equivalent, we assume equivalence and store that 366 // assumption to be checked later in BlockDiffCandidates. 367 bool diff(const Instruction *L, const Instruction *R, bool Complain, 368 bool TryUnify, bool AllowAssumptions) { 369 // FIXME: metadata (if Complain is set) 370 AssumptionContext ACValue = {L->getParent(), R->getParent()}; 371 // nullptr AssumptionContext disables assumption generation. 372 const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr; 373 374 // Different opcodes always imply different operations. 375 if (L->getOpcode() != R->getOpcode()) { 376 if (Complain) Engine.log("different instruction types"); 377 return true; 378 } 379 380 if (isa<CmpInst>(L)) { 381 if (cast<CmpInst>(L)->getPredicate() 382 != cast<CmpInst>(R)->getPredicate()) { 383 if (Complain) Engine.log("different predicates"); 384 return true; 385 } 386 } else if (isa<CallInst>(L)) { 387 return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain); 388 } else if (isa<PHINode>(L)) { 389 const PHINode &LI = cast<PHINode>(*L); 390 const PHINode &RI = cast<PHINode>(*R); 391 392 // This is really weird; type uniquing is broken? 393 if (LI.getType() != RI.getType()) { 394 if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) { 395 if (Complain) Engine.log("different phi types"); 396 return true; 397 } 398 } 399 400 if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) { 401 if (Complain) 402 Engine.log("PHI node # of incoming values differ"); 403 return true; 404 } 405 406 for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) { 407 if (TryUnify) 408 tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I)); 409 410 if (!equivalentAsOperands(LI.getIncomingValue(I), 411 RI.getIncomingValue(I), AC)) { 412 if (Complain) 413 Engine.log("PHI node incoming values differ"); 414 return true; 415 } 416 } 417 418 return false; 419 420 // Terminators. 421 } else if (isa<InvokeInst>(L)) { 422 const InvokeInst &LI = cast<InvokeInst>(*L); 423 const InvokeInst &RI = cast<InvokeInst>(*R); 424 if (diffCallSites(LI, RI, Complain)) 425 return true; 426 427 if (TryUnify) { 428 tryUnify(LI.getNormalDest(), RI.getNormalDest()); 429 tryUnify(LI.getUnwindDest(), RI.getUnwindDest()); 430 } 431 return false; 432 433 } else if (isa<CallBrInst>(L)) { 434 const CallBrInst &LI = cast<CallBrInst>(*L); 435 const CallBrInst &RI = cast<CallBrInst>(*R); 436 if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) { 437 if (Complain) 438 Engine.log("callbr # of indirect destinations differ"); 439 return true; 440 } 441 442 // Perform the "try unify" step so that we can equate the indirect 443 // destinations before checking the call site. 444 for (unsigned I = 0; I < LI.getNumIndirectDests(); I++) 445 tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I)); 446 447 if (diffCallSites(LI, RI, Complain)) 448 return true; 449 450 if (TryUnify) 451 tryUnify(LI.getDefaultDest(), RI.getDefaultDest()); 452 return false; 453 454 } else if (isa<BranchInst>(L)) { 455 const BranchInst *LI = cast<BranchInst>(L); 456 const BranchInst *RI = cast<BranchInst>(R); 457 if (LI->isConditional() != RI->isConditional()) { 458 if (Complain) Engine.log("branch conditionality differs"); 459 return true; 460 } 461 462 if (LI->isConditional()) { 463 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { 464 if (Complain) Engine.log("branch conditions differ"); 465 return true; 466 } 467 if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1)); 468 } 469 if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0)); 470 return false; 471 472 } else if (isa<IndirectBrInst>(L)) { 473 const IndirectBrInst *LI = cast<IndirectBrInst>(L); 474 const IndirectBrInst *RI = cast<IndirectBrInst>(R); 475 if (LI->getNumDestinations() != RI->getNumDestinations()) { 476 if (Complain) Engine.log("indirectbr # of destinations differ"); 477 return true; 478 } 479 480 if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) { 481 if (Complain) Engine.log("indirectbr addresses differ"); 482 return true; 483 } 484 485 if (TryUnify) { 486 for (unsigned i = 0; i < LI->getNumDestinations(); i++) { 487 tryUnify(LI->getDestination(i), RI->getDestination(i)); 488 } 489 } 490 return false; 491 492 } else if (isa<SwitchInst>(L)) { 493 const SwitchInst *LI = cast<SwitchInst>(L); 494 const SwitchInst *RI = cast<SwitchInst>(R); 495 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { 496 if (Complain) Engine.log("switch conditions differ"); 497 return true; 498 } 499 if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest()); 500 501 bool Difference = false; 502 503 DenseMap<const ConstantInt *, const BasicBlock *> LCases; 504 for (auto Case : LI->cases()) 505 LCases[Case.getCaseValue()] = Case.getCaseSuccessor(); 506 507 for (auto Case : RI->cases()) { 508 const ConstantInt *CaseValue = Case.getCaseValue(); 509 const BasicBlock *LCase = LCases[CaseValue]; 510 if (LCase) { 511 if (TryUnify) 512 tryUnify(LCase, Case.getCaseSuccessor()); 513 LCases.erase(CaseValue); 514 } else if (Complain || !Difference) { 515 if (Complain) 516 Engine.logf("right switch has extra case %r") << CaseValue; 517 Difference = true; 518 } 519 } 520 if (!Difference) 521 for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator 522 I = LCases.begin(), 523 E = LCases.end(); 524 I != E; ++I) { 525 if (Complain) 526 Engine.logf("left switch has extra case %l") << I->first; 527 Difference = true; 528 } 529 return Difference; 530 } else if (isa<UnreachableInst>(L)) { 531 return false; 532 } 533 534 if (L->getNumOperands() != R->getNumOperands()) { 535 if (Complain) Engine.log("instructions have different operand counts"); 536 return true; 537 } 538 539 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { 540 Value *LO = L->getOperand(I), *RO = R->getOperand(I); 541 if (!equivalentAsOperands(LO, RO, AC)) { 542 if (Complain) Engine.logf("operands %l and %r differ") << LO << RO; 543 return true; 544 } 545 } 546 547 return false; 548 } 549 550 public: 551 bool equivalentAsOperands(const Constant *L, const Constant *R, 552 const AssumptionContext *AC) { 553 // Use equality as a preliminary filter. 554 if (L == R) 555 return true; 556 557 if (L->getValueID() != R->getValueID()) 558 return false; 559 560 // Ask the engine about global values. 561 if (isa<GlobalValue>(L)) 562 return Engine.equivalentAsOperands(cast<GlobalValue>(L), 563 cast<GlobalValue>(R)); 564 565 // Compare constant expressions structurally. 566 if (isa<ConstantExpr>(L)) 567 return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R), 568 AC); 569 570 // Constants of the "same type" don't always actually have the same 571 // type; I don't know why. Just white-list them. 572 if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L)) 573 return true; 574 575 // Block addresses only match if we've already encountered the 576 // block. FIXME: tentative matches? 577 if (isa<BlockAddress>(L)) 578 return Blocks[cast<BlockAddress>(L)->getBasicBlock()] 579 == cast<BlockAddress>(R)->getBasicBlock(); 580 581 // If L and R are ConstantVectors, compare each element 582 if (isa<ConstantVector>(L)) { 583 const ConstantVector *CVL = cast<ConstantVector>(L); 584 const ConstantVector *CVR = cast<ConstantVector>(R); 585 if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements()) 586 return false; 587 for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) { 588 if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC)) 589 return false; 590 } 591 return true; 592 } 593 594 // If L and R are ConstantArrays, compare the element count and types. 595 if (isa<ConstantArray>(L)) { 596 const ConstantArray *CAL = cast<ConstantArray>(L); 597 const ConstantArray *CAR = cast<ConstantArray>(R); 598 // Sometimes a type may be equivalent, but not uniquified---e.g. it may 599 // contain a GEP instruction. Do a deeper comparison of the types. 600 if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements()) 601 return false; 602 603 for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) { 604 if (!equivalentAsOperands(CAL->getAggregateElement(I), 605 CAR->getAggregateElement(I), AC)) 606 return false; 607 } 608 609 return true; 610 } 611 612 // If L and R are ConstantStructs, compare each field and type. 613 if (isa<ConstantStruct>(L)) { 614 const ConstantStruct *CSL = cast<ConstantStruct>(L); 615 const ConstantStruct *CSR = cast<ConstantStruct>(R); 616 617 const StructType *LTy = cast<StructType>(CSL->getType()); 618 const StructType *RTy = cast<StructType>(CSR->getType()); 619 620 // The StructTypes should have the same attributes. Don't use 621 // isLayoutIdentical(), because that just checks the element pointers, 622 // which may not work here. 623 if (LTy->getNumElements() != RTy->getNumElements() || 624 LTy->isPacked() != RTy->isPacked()) 625 return false; 626 627 for (unsigned I = 0; I < LTy->getNumElements(); I++) { 628 const Value *LAgg = CSL->getAggregateElement(I); 629 const Value *RAgg = CSR->getAggregateElement(I); 630 631 if (LAgg == SavedLHS || RAgg == SavedRHS) { 632 if (LAgg != SavedLHS || RAgg != SavedRHS) 633 // If the left and right operands aren't both re-analyzing the 634 // variable, then the initialiers don't match, so report "false". 635 // Otherwise, we skip these operands.. 636 return false; 637 638 continue; 639 } 640 641 if (!equivalentAsOperands(LAgg, RAgg, AC)) { 642 return false; 643 } 644 } 645 646 return true; 647 } 648 649 return false; 650 } 651 652 bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R, 653 const AssumptionContext *AC) { 654 if (L == R) 655 return true; 656 657 if (L->getOpcode() != R->getOpcode()) 658 return false; 659 660 switch (L->getOpcode()) { 661 case Instruction::GetElementPtr: 662 // FIXME: inbounds? 663 break; 664 665 default: 666 break; 667 } 668 669 if (L->getNumOperands() != R->getNumOperands()) 670 return false; 671 672 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { 673 const auto *LOp = L->getOperand(I); 674 const auto *ROp = R->getOperand(I); 675 676 if (LOp == SavedLHS || ROp == SavedRHS) { 677 if (LOp != SavedLHS || ROp != SavedRHS) 678 // If the left and right operands aren't both re-analyzing the 679 // variable, then the initialiers don't match, so report "false". 680 // Otherwise, we skip these operands.. 681 return false; 682 683 continue; 684 } 685 686 if (!equivalentAsOperands(LOp, ROp, AC)) 687 return false; 688 } 689 690 return true; 691 } 692 693 // There are cases where we cannot determine whether two values are 694 // equivalent, because it depends on not yet processed basic blocks -- see the 695 // documentation on assumptions. 696 // 697 // AC is the context in which we are currently performing a diff. 698 // When we encounter a pair of values for which we can neither prove 699 // equivalence nor the opposite, we do the following: 700 // * If AC is nullptr, we treat the pair as non-equivalent. 701 // * If AC is set, we add an assumption for the basic blocks given by AC, 702 // and treat the pair as equivalent. The assumption is checked later. 703 bool equivalentAsOperands(const Value *L, const Value *R, 704 const AssumptionContext *AC) { 705 // Fall out if the values have different kind. 706 // This possibly shouldn't take priority over oracles. 707 if (L->getValueID() != R->getValueID()) 708 return false; 709 710 // Value subtypes: Argument, Constant, Instruction, BasicBlock, 711 // InlineAsm, MDNode, MDString, PseudoSourceValue 712 713 if (isa<Constant>(L)) 714 return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC); 715 716 if (isa<Instruction>(L)) { 717 auto It = Values.find(L); 718 if (It != Values.end()) 719 return It->second == R; 720 721 if (TentativeValues.count(std::make_pair(L, R))) 722 return true; 723 724 // L and R might be equivalent, this could depend on not yet processed 725 // basic blocks, so we cannot decide here. 726 if (AC) { 727 // Add an assumption, unless there is a conflict with an existing one 728 BlockDiffCandidate &BDC = 729 getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB); 730 auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R}); 731 if (!InsertionResult.second && InsertionResult.first->second != R) { 732 // We already have a conflicting equivalence assumption for L, so at 733 // least one must be wrong, and we know that there is a diff. 734 BDC.KnownToDiffer = true; 735 BDC.EquivalenceAssumptions.clear(); 736 return false; 737 } 738 // Optimistically assume equivalence, and check later once all BBs 739 // have been processed. 740 return true; 741 } 742 743 // Assumptions disabled, so pessimistically assume non-equivalence. 744 return false; 745 } 746 747 if (isa<Argument>(L)) 748 return Values[L] == R; 749 750 if (isa<BasicBlock>(L)) 751 return Blocks[cast<BasicBlock>(L)] != R; 752 753 // Pretend everything else is identical. 754 return true; 755 } 756 757 // Avoid a gcc warning about accessing 'this' in an initializer. 758 FunctionDifferenceEngine *this_() { return this; } 759 760 public: 761 FunctionDifferenceEngine(DifferenceEngine &Engine, 762 const Value *SavedLHS = nullptr, 763 const Value *SavedRHS = nullptr) 764 : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS), 765 Queue(QueueSorter(*this_())) {} 766 767 void diff(const Function *L, const Function *R) { 768 assert(Values.empty() && "Multiple diffs per engine are not supported!"); 769 770 if (L->arg_size() != R->arg_size()) 771 Engine.log("different argument counts"); 772 773 // Map the arguments. 774 for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(), 775 RI = R->arg_begin(), RE = R->arg_end(); 776 LI != LE && RI != RE; ++LI, ++RI) 777 Values[&*LI] = &*RI; 778 779 tryUnify(&*L->begin(), &*R->begin()); 780 processQueue(); 781 checkAndReportDiffCandidates(); 782 } 783 }; 784 785 struct DiffEntry { 786 DiffEntry() = default; 787 788 unsigned Cost = 0; 789 llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange 790 }; 791 792 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L, 793 const Instruction *R) { 794 return !diff(L, R, false, false, false); 795 } 796 797 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart, 798 BasicBlock::const_iterator RStart) { 799 BasicBlock::const_iterator LE = LStart->getParent()->end(); 800 BasicBlock::const_iterator RE = RStart->getParent()->end(); 801 802 unsigned NL = std::distance(LStart, LE); 803 804 SmallVector<DiffEntry, 20> Paths1(NL+1); 805 SmallVector<DiffEntry, 20> Paths2(NL+1); 806 807 DiffEntry *Cur = Paths1.data(); 808 DiffEntry *Next = Paths2.data(); 809 810 const unsigned LeftCost = 2; 811 const unsigned RightCost = 2; 812 const unsigned MatchCost = 0; 813 814 assert(TentativeValues.empty()); 815 816 // Initialize the first column. 817 for (unsigned I = 0; I != NL+1; ++I) { 818 Cur[I].Cost = I * LeftCost; 819 for (unsigned J = 0; J != I; ++J) 820 Cur[I].Path.push_back(DC_left); 821 } 822 823 for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) { 824 // Initialize the first row. 825 Next[0] = Cur[0]; 826 Next[0].Cost += RightCost; 827 Next[0].Path.push_back(DC_right); 828 829 unsigned Index = 1; 830 for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) { 831 if (matchForBlockDiff(&*LI, &*RI)) { 832 Next[Index] = Cur[Index-1]; 833 Next[Index].Cost += MatchCost; 834 Next[Index].Path.push_back(DC_match); 835 TentativeValues.insert(std::make_pair(&*LI, &*RI)); 836 } else if (Next[Index-1].Cost <= Cur[Index].Cost) { 837 Next[Index] = Next[Index-1]; 838 Next[Index].Cost += LeftCost; 839 Next[Index].Path.push_back(DC_left); 840 } else { 841 Next[Index] = Cur[Index]; 842 Next[Index].Cost += RightCost; 843 Next[Index].Path.push_back(DC_right); 844 } 845 } 846 847 std::swap(Cur, Next); 848 } 849 850 // We don't need the tentative values anymore; everything from here 851 // on out should be non-tentative. 852 TentativeValues.clear(); 853 854 SmallVectorImpl<char> &Path = Cur[NL].Path; 855 BasicBlock::const_iterator LI = LStart, RI = RStart; 856 857 DiffLogBuilder Diff(Engine.getConsumer()); 858 859 // Drop trailing matches. 860 while (Path.size() && Path.back() == DC_match) 861 Path.pop_back(); 862 863 // Skip leading matches. 864 SmallVectorImpl<char>::iterator 865 PI = Path.begin(), PE = Path.end(); 866 while (PI != PE && *PI == DC_match) { 867 unify(&*LI, &*RI); 868 ++PI; 869 ++LI; 870 ++RI; 871 } 872 873 for (; PI != PE; ++PI) { 874 switch (static_cast<DiffChange>(*PI)) { 875 case DC_match: 876 assert(LI != LE && RI != RE); 877 { 878 const Instruction *L = &*LI, *R = &*RI; 879 unify(L, R); 880 Diff.addMatch(L, R); 881 } 882 ++LI; ++RI; 883 break; 884 885 case DC_left: 886 assert(LI != LE); 887 Diff.addLeft(&*LI); 888 ++LI; 889 break; 890 891 case DC_right: 892 assert(RI != RE); 893 Diff.addRight(&*RI); 894 ++RI; 895 break; 896 } 897 } 898 899 // Finishing unifying and complaining about the tails of the block, 900 // which should be matches all the way through. 901 while (LI != LE) { 902 assert(RI != RE); 903 unify(&*LI, &*RI); 904 ++LI; 905 ++RI; 906 } 907 908 // If the terminators have different kinds, but one is an invoke and the 909 // other is an unconditional branch immediately following a call, unify 910 // the results and the destinations. 911 const Instruction *LTerm = LStart->getParent()->getTerminator(); 912 const Instruction *RTerm = RStart->getParent()->getTerminator(); 913 if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) { 914 if (cast<BranchInst>(LTerm)->isConditional()) return; 915 BasicBlock::const_iterator I = LTerm->getIterator(); 916 if (I == LStart->getParent()->begin()) return; 917 --I; 918 if (!isa<CallInst>(*I)) return; 919 const CallInst *LCall = cast<CallInst>(&*I); 920 const InvokeInst *RInvoke = cast<InvokeInst>(RTerm); 921 if (!equivalentAsOperands(LCall->getCalledOperand(), 922 RInvoke->getCalledOperand(), nullptr)) 923 return; 924 if (!LCall->use_empty()) 925 Values[LCall] = RInvoke; 926 tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest()); 927 } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) { 928 if (cast<BranchInst>(RTerm)->isConditional()) return; 929 BasicBlock::const_iterator I = RTerm->getIterator(); 930 if (I == RStart->getParent()->begin()) return; 931 --I; 932 if (!isa<CallInst>(*I)) return; 933 const CallInst *RCall = cast<CallInst>(I); 934 const InvokeInst *LInvoke = cast<InvokeInst>(LTerm); 935 if (!equivalentAsOperands(LInvoke->getCalledOperand(), 936 RCall->getCalledOperand(), nullptr)) 937 return; 938 if (!LInvoke->use_empty()) 939 Values[LInvoke] = RCall; 940 tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0)); 941 } 942 } 943 } 944 945 void DifferenceEngine::Oracle::anchor() { } 946 947 void DifferenceEngine::diff(const Function *L, const Function *R) { 948 Context C(*this, L, R); 949 950 // FIXME: types 951 // FIXME: attributes and CC 952 // FIXME: parameter attributes 953 954 // If both are declarations, we're done. 955 if (L->empty() && R->empty()) 956 return; 957 else if (L->empty()) 958 log("left function is declaration, right function is definition"); 959 else if (R->empty()) 960 log("right function is declaration, left function is definition"); 961 else 962 FunctionDifferenceEngine(*this).diff(L, R); 963 } 964 965 void DifferenceEngine::diff(const Module *L, const Module *R) { 966 StringSet<> LNames; 967 SmallVector<std::pair<const Function *, const Function *>, 20> Queue; 968 969 unsigned LeftAnonCount = 0; 970 unsigned RightAnonCount = 0; 971 972 for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) { 973 const Function *LFn = &*I; 974 StringRef Name = LFn->getName(); 975 if (Name.empty()) { 976 ++LeftAnonCount; 977 continue; 978 } 979 980 LNames.insert(Name); 981 982 if (Function *RFn = R->getFunction(LFn->getName())) 983 Queue.push_back(std::make_pair(LFn, RFn)); 984 else 985 logf("function %l exists only in left module") << LFn; 986 } 987 988 for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) { 989 const Function *RFn = &*I; 990 StringRef Name = RFn->getName(); 991 if (Name.empty()) { 992 ++RightAnonCount; 993 continue; 994 } 995 996 if (!LNames.count(Name)) 997 logf("function %r exists only in right module") << RFn; 998 } 999 1000 if (LeftAnonCount != 0 || RightAnonCount != 0) { 1001 SmallString<32> Tmp; 1002 logf(("not comparing " + Twine(LeftAnonCount) + 1003 " anonymous functions in the left module and " + 1004 Twine(RightAnonCount) + " in the right module") 1005 .toStringRef(Tmp)); 1006 } 1007 1008 for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator 1009 I = Queue.begin(), 1010 E = Queue.end(); 1011 I != E; ++I) 1012 diff(I->first, I->second); 1013 } 1014 1015 bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L, 1016 const GlobalValue *R) { 1017 if (globalValueOracle) return (*globalValueOracle)(L, R); 1018 1019 if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) { 1020 const GlobalVariable *GVL = cast<GlobalVariable>(L); 1021 const GlobalVariable *GVR = cast<GlobalVariable>(R); 1022 if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() && 1023 GVR->hasLocalLinkage() && GVR->hasUniqueInitializer()) 1024 return FunctionDifferenceEngine(*this, GVL, GVR) 1025 .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(), 1026 nullptr); 1027 } 1028 1029 return L->getName() == R->getName(); 1030 } 1031