1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines the implementation of the LLVM difference 10 // engine, which structurally compares global values within a module. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "DifferenceEngine.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallString.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/CFG.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/Instructions.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/Support/type_traits.h" 29 #include <utility> 30 31 using namespace llvm; 32 33 namespace { 34 35 /// A priority queue, implemented as a heap. 36 template <class T, class Sorter, unsigned InlineCapacity> 37 class PriorityQueue { 38 Sorter Precedes; 39 llvm::SmallVector<T, InlineCapacity> Storage; 40 41 public: 42 PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} 43 44 /// Checks whether the heap is empty. 45 bool empty() const { return Storage.empty(); } 46 47 /// Insert a new value on the heap. 48 void insert(const T &V) { 49 unsigned Index = Storage.size(); 50 Storage.push_back(V); 51 if (Index == 0) return; 52 53 T *data = Storage.data(); 54 while (true) { 55 unsigned Target = (Index + 1) / 2 - 1; 56 if (!Precedes(data[Index], data[Target])) return; 57 std::swap(data[Index], data[Target]); 58 if (Target == 0) return; 59 Index = Target; 60 } 61 } 62 63 /// Remove the minimum value in the heap. Only valid on a non-empty heap. 64 T remove_min() { 65 assert(!empty()); 66 T tmp = Storage[0]; 67 68 unsigned NewSize = Storage.size() - 1; 69 if (NewSize) { 70 // Move the slot at the end to the beginning. 71 if (std::is_trivially_copyable<T>::value) 72 Storage[0] = Storage[NewSize]; 73 else 74 std::swap(Storage[0], Storage[NewSize]); 75 76 // Bubble the root up as necessary. 77 unsigned Index = 0; 78 while (true) { 79 // With a 1-based index, the children would be Index*2 and Index*2+1. 80 unsigned R = (Index + 1) * 2; 81 unsigned L = R - 1; 82 83 // If R is out of bounds, we're done after this in any case. 84 if (R >= NewSize) { 85 // If L is also out of bounds, we're done immediately. 86 if (L >= NewSize) break; 87 88 // Otherwise, test whether we should swap L and Index. 89 if (Precedes(Storage[L], Storage[Index])) 90 std::swap(Storage[L], Storage[Index]); 91 break; 92 } 93 94 // Otherwise, we need to compare with the smaller of L and R. 95 // Prefer R because it's closer to the end of the array. 96 unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); 97 98 // If Index is >= the min of L and R, then heap ordering is restored. 99 if (!Precedes(Storage[IndexToTest], Storage[Index])) 100 break; 101 102 // Otherwise, keep bubbling up. 103 std::swap(Storage[IndexToTest], Storage[Index]); 104 Index = IndexToTest; 105 } 106 } 107 Storage.pop_back(); 108 109 return tmp; 110 } 111 }; 112 113 /// A function-scope difference engine. 114 class FunctionDifferenceEngine { 115 DifferenceEngine &Engine; 116 117 // Some initializers may reference the variable we're currently checking. This 118 // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent 119 // recursing. 120 const Value *SavedLHS; 121 const Value *SavedRHS; 122 123 // The current mapping from old local values to new local values. 124 DenseMap<const Value *, const Value *> Values; 125 126 // The current mapping from old blocks to new blocks. 127 DenseMap<const BasicBlock *, const BasicBlock *> Blocks; 128 129 // The tentative mapping from old local values while comparing a pair of 130 // basic blocks. Once the pair has been processed, the tentative mapping is 131 // committed to the Values map. 132 DenseSet<std::pair<const Value *, const Value *>> TentativeValues; 133 134 // Equivalence Assumptions 135 // 136 // For basic blocks in loops, some values in phi nodes may depend on 137 // values from not yet processed basic blocks in the loop. When encountering 138 // such values, we optimistically asssume their equivalence and store this 139 // assumption in a BlockDiffCandidate for the pair of compared BBs. 140 // 141 // Once we have diffed all BBs, for every BlockDiffCandidate, we check all 142 // stored assumptions using the Values map that stores proven equivalences 143 // between the old and new values, and report a diff if an assumption cannot 144 // be proven to be true. 145 // 146 // Note that after having made an assumption, all further determined 147 // equivalences implicitly depend on that assumption. These will not be 148 // reverted or reported if the assumption proves to be false, because these 149 // are considered indirect diffs caused by earlier direct diffs. 150 // 151 // We aim to avoid false negatives in llvm-diff, that is, ensure that 152 // whenever no diff is reported, the functions are indeed equal. If 153 // assumptions were made, this is not entirely clear, because in principle we 154 // could end up with a circular proof where the proof of equivalence of two 155 // nodes is depending on the assumption of their equivalence. 156 // 157 // To see that assumptions do not add false negatives, note that if we do not 158 // report a diff, this means that there is an equivalence mapping between old 159 // and new values that is consistent with all assumptions made. The circular 160 // dependency that exists on an IR value level does not exist at run time, 161 // because the values selected by the phi nodes must always already have been 162 // computed. Hence, we can prove equivalence of the old and new functions by 163 // considering step-wise parallel execution, and incrementally proving 164 // equivalence of every new computed value. Another way to think about it is 165 // to imagine cloning the loop BBs for every iteration, turning the loops 166 // into (possibly infinite) DAGs, and proving equivalence by induction on the 167 // iteration, using the computed value mapping. 168 169 // The class BlockDiffCandidate stores pairs which either have already been 170 // proven to differ, or pairs whose equivalence depends on assumptions to be 171 // verified later. 172 struct BlockDiffCandidate { 173 const BasicBlock *LBB; 174 const BasicBlock *RBB; 175 // Maps old values to assumed-to-be-equivalent new values 176 SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions; 177 // If set, we already know the blocks differ. 178 bool KnownToDiffer; 179 }; 180 181 // List of block diff candidates in the order found by processing. 182 // We generate reports in this order. 183 // For every LBB, there may only be one corresponding RBB. 184 SmallVector<BlockDiffCandidate> BlockDiffCandidates; 185 // Maps LBB to the index of its BlockDiffCandidate, if existing. 186 DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices; 187 188 // Note: Every LBB must always be queried together with the same RBB. 189 // The returned reference is not permanently valid and should not be stored. 190 BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB, 191 const BasicBlock *RBB) { 192 auto It = BlockDiffCandidateIndices.find(LBB); 193 // Check if LBB already has a diff candidate 194 if (It == BlockDiffCandidateIndices.end()) { 195 // Add new one 196 BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size(); 197 BlockDiffCandidates.push_back( 198 {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false}); 199 return BlockDiffCandidates.back(); 200 } 201 // Use existing one 202 BlockDiffCandidate &Result = BlockDiffCandidates[It->second]; 203 assert(Result.RBB == RBB && "Inconsistent basic block pairing!"); 204 return Result; 205 } 206 207 // Optionally passed to equivalence checker functions, so these can add 208 // assumptions in BlockDiffCandidates. Its presence controls whether 209 // assumptions are generated. 210 struct AssumptionContext { 211 // The two basic blocks that need the two compared values to be equivalent. 212 const BasicBlock *LBB; 213 const BasicBlock *RBB; 214 }; 215 216 unsigned getUnprocPredCount(const BasicBlock *Block) const { 217 unsigned Count = 0; 218 for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; 219 ++I) 220 if (!Blocks.count(*I)) Count++; 221 return Count; 222 } 223 224 typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair; 225 226 /// A type which sorts a priority queue by the number of unprocessed 227 /// predecessor blocks it has remaining. 228 /// 229 /// This is actually really expensive to calculate. 230 struct QueueSorter { 231 const FunctionDifferenceEngine &fde; 232 explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} 233 234 bool operator()(BlockPair &Old, BlockPair &New) { 235 return fde.getUnprocPredCount(Old.first) 236 < fde.getUnprocPredCount(New.first); 237 } 238 }; 239 240 /// A queue of unified blocks to process. 241 PriorityQueue<BlockPair, QueueSorter, 20> Queue; 242 243 /// Try to unify the given two blocks. Enqueues them for processing 244 /// if they haven't already been processed. 245 /// 246 /// Returns true if there was a problem unifying them. 247 bool tryUnify(const BasicBlock *L, const BasicBlock *R) { 248 const BasicBlock *&Ref = Blocks[L]; 249 250 if (Ref) { 251 if (Ref == R) return false; 252 253 Engine.logf("successor %l cannot be equivalent to %r; " 254 "it's already equivalent to %r") 255 << L << R << Ref; 256 return true; 257 } 258 259 Ref = R; 260 Queue.insert(BlockPair(L, R)); 261 return false; 262 } 263 264 /// Unifies two instructions, given that they're known not to have 265 /// structural differences. 266 void unify(const Instruction *L, const Instruction *R) { 267 DifferenceEngine::Context C(Engine, L, R); 268 269 bool Result = diff(L, R, true, true, true); 270 assert(!Result && "structural differences second time around?"); 271 (void) Result; 272 if (!L->use_empty()) 273 Values[L] = R; 274 } 275 276 void processQueue() { 277 while (!Queue.empty()) { 278 BlockPair Pair = Queue.remove_min(); 279 diff(Pair.first, Pair.second); 280 } 281 } 282 283 void checkAndReportDiffCandidates() { 284 for (BlockDiffCandidate &BDC : BlockDiffCandidates) { 285 286 // Check assumptions 287 for (const auto &[L, R] : BDC.EquivalenceAssumptions) { 288 auto It = Values.find(L); 289 if (It == Values.end() || It->second != R) { 290 BDC.KnownToDiffer = true; 291 break; 292 } 293 } 294 295 // Run block diff if the BBs differ 296 if (BDC.KnownToDiffer) { 297 DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB); 298 runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin()); 299 } 300 } 301 } 302 303 void diff(const BasicBlock *L, const BasicBlock *R) { 304 DifferenceEngine::Context C(Engine, L, R); 305 306 BasicBlock::const_iterator LI = L->begin(), LE = L->end(); 307 BasicBlock::const_iterator RI = R->begin(); 308 309 do { 310 assert(LI != LE && RI != R->end()); 311 const Instruction *LeftI = &*LI, *RightI = &*RI; 312 313 // If the instructions differ, start the more sophisticated diff 314 // algorithm at the start of the block. 315 if (diff(LeftI, RightI, false, false, true)) { 316 TentativeValues.clear(); 317 // Register (L, R) as diffing pair. Note that we could directly emit a 318 // block diff here, but this way we ensure all diffs are emitted in one 319 // consistent order, independent of whether the diffs were detected 320 // immediately or via invalid assumptions. 321 getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true; 322 return; 323 } 324 325 // Otherwise, tentatively unify them. 326 if (!LeftI->use_empty()) 327 TentativeValues.insert(std::make_pair(LeftI, RightI)); 328 329 ++LI; 330 ++RI; 331 } while (LI != LE); // This is sufficient: we can't get equality of 332 // terminators if there are residual instructions. 333 334 // Unify everything in the block, non-tentatively this time. 335 TentativeValues.clear(); 336 for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI) 337 unify(&*LI, &*RI); 338 } 339 340 bool matchForBlockDiff(const Instruction *L, const Instruction *R); 341 void runBlockDiff(BasicBlock::const_iterator LI, 342 BasicBlock::const_iterator RI); 343 344 bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) { 345 // FIXME: call attributes 346 AssumptionContext AC = {L.getParent(), R.getParent()}; 347 if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(), 348 &AC)) { 349 if (Complain) Engine.log("called functions differ"); 350 return true; 351 } 352 if (L.arg_size() != R.arg_size()) { 353 if (Complain) Engine.log("argument counts differ"); 354 return true; 355 } 356 for (unsigned I = 0, E = L.arg_size(); I != E; ++I) 357 if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) { 358 if (Complain) 359 Engine.logf("arguments %l and %r differ") 360 << L.getArgOperand(I) << R.getArgOperand(I); 361 return true; 362 } 363 return false; 364 } 365 366 // If AllowAssumptions is enabled, whenever we encounter a pair of values 367 // that we cannot prove to be equivalent, we assume equivalence and store that 368 // assumption to be checked later in BlockDiffCandidates. 369 bool diff(const Instruction *L, const Instruction *R, bool Complain, 370 bool TryUnify, bool AllowAssumptions) { 371 // FIXME: metadata (if Complain is set) 372 AssumptionContext ACValue = {L->getParent(), R->getParent()}; 373 // nullptr AssumptionContext disables assumption generation. 374 const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr; 375 376 // Different opcodes always imply different operations. 377 if (L->getOpcode() != R->getOpcode()) { 378 if (Complain) Engine.log("different instruction types"); 379 return true; 380 } 381 382 if (isa<CmpInst>(L)) { 383 if (cast<CmpInst>(L)->getPredicate() 384 != cast<CmpInst>(R)->getPredicate()) { 385 if (Complain) Engine.log("different predicates"); 386 return true; 387 } 388 } else if (isa<CallInst>(L)) { 389 return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain); 390 } else if (isa<PHINode>(L)) { 391 const PHINode &LI = cast<PHINode>(*L); 392 const PHINode &RI = cast<PHINode>(*R); 393 394 // This is really weird; type uniquing is broken? 395 if (LI.getType() != RI.getType()) { 396 if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) { 397 if (Complain) Engine.log("different phi types"); 398 return true; 399 } 400 } 401 402 if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) { 403 if (Complain) 404 Engine.log("PHI node # of incoming values differ"); 405 return true; 406 } 407 408 for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) { 409 if (TryUnify) 410 tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I)); 411 412 if (!equivalentAsOperands(LI.getIncomingValue(I), 413 RI.getIncomingValue(I), AC)) { 414 if (Complain) 415 Engine.log("PHI node incoming values differ"); 416 return true; 417 } 418 } 419 420 return false; 421 422 // Terminators. 423 } else if (isa<InvokeInst>(L)) { 424 const InvokeInst &LI = cast<InvokeInst>(*L); 425 const InvokeInst &RI = cast<InvokeInst>(*R); 426 if (diffCallSites(LI, RI, Complain)) 427 return true; 428 429 if (TryUnify) { 430 tryUnify(LI.getNormalDest(), RI.getNormalDest()); 431 tryUnify(LI.getUnwindDest(), RI.getUnwindDest()); 432 } 433 return false; 434 435 } else if (isa<CallBrInst>(L)) { 436 const CallBrInst &LI = cast<CallBrInst>(*L); 437 const CallBrInst &RI = cast<CallBrInst>(*R); 438 if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) { 439 if (Complain) 440 Engine.log("callbr # of indirect destinations differ"); 441 return true; 442 } 443 444 // Perform the "try unify" step so that we can equate the indirect 445 // destinations before checking the call site. 446 for (unsigned I = 0; I < LI.getNumIndirectDests(); I++) 447 tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I)); 448 449 if (diffCallSites(LI, RI, Complain)) 450 return true; 451 452 if (TryUnify) 453 tryUnify(LI.getDefaultDest(), RI.getDefaultDest()); 454 return false; 455 456 } else if (isa<BranchInst>(L)) { 457 const BranchInst *LI = cast<BranchInst>(L); 458 const BranchInst *RI = cast<BranchInst>(R); 459 if (LI->isConditional() != RI->isConditional()) { 460 if (Complain) Engine.log("branch conditionality differs"); 461 return true; 462 } 463 464 if (LI->isConditional()) { 465 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { 466 if (Complain) Engine.log("branch conditions differ"); 467 return true; 468 } 469 if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1)); 470 } 471 if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0)); 472 return false; 473 474 } else if (isa<IndirectBrInst>(L)) { 475 const IndirectBrInst *LI = cast<IndirectBrInst>(L); 476 const IndirectBrInst *RI = cast<IndirectBrInst>(R); 477 if (LI->getNumDestinations() != RI->getNumDestinations()) { 478 if (Complain) Engine.log("indirectbr # of destinations differ"); 479 return true; 480 } 481 482 if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) { 483 if (Complain) Engine.log("indirectbr addresses differ"); 484 return true; 485 } 486 487 if (TryUnify) { 488 for (unsigned i = 0; i < LI->getNumDestinations(); i++) { 489 tryUnify(LI->getDestination(i), RI->getDestination(i)); 490 } 491 } 492 return false; 493 494 } else if (isa<SwitchInst>(L)) { 495 const SwitchInst *LI = cast<SwitchInst>(L); 496 const SwitchInst *RI = cast<SwitchInst>(R); 497 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) { 498 if (Complain) Engine.log("switch conditions differ"); 499 return true; 500 } 501 if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest()); 502 503 bool Difference = false; 504 505 DenseMap<const ConstantInt *, const BasicBlock *> LCases; 506 for (auto Case : LI->cases()) 507 LCases[Case.getCaseValue()] = Case.getCaseSuccessor(); 508 509 for (auto Case : RI->cases()) { 510 const ConstantInt *CaseValue = Case.getCaseValue(); 511 const BasicBlock *LCase = LCases[CaseValue]; 512 if (LCase) { 513 if (TryUnify) 514 tryUnify(LCase, Case.getCaseSuccessor()); 515 LCases.erase(CaseValue); 516 } else if (Complain || !Difference) { 517 if (Complain) 518 Engine.logf("right switch has extra case %r") << CaseValue; 519 Difference = true; 520 } 521 } 522 if (!Difference) 523 for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator 524 I = LCases.begin(), 525 E = LCases.end(); 526 I != E; ++I) { 527 if (Complain) 528 Engine.logf("left switch has extra case %l") << I->first; 529 Difference = true; 530 } 531 return Difference; 532 } else if (isa<UnreachableInst>(L)) { 533 return false; 534 } 535 536 if (L->getNumOperands() != R->getNumOperands()) { 537 if (Complain) Engine.log("instructions have different operand counts"); 538 return true; 539 } 540 541 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { 542 Value *LO = L->getOperand(I), *RO = R->getOperand(I); 543 if (!equivalentAsOperands(LO, RO, AC)) { 544 if (Complain) Engine.logf("operands %l and %r differ") << LO << RO; 545 return true; 546 } 547 } 548 549 return false; 550 } 551 552 public: 553 bool equivalentAsOperands(const Constant *L, const Constant *R, 554 const AssumptionContext *AC) { 555 // Use equality as a preliminary filter. 556 if (L == R) 557 return true; 558 559 if (L->getValueID() != R->getValueID()) 560 return false; 561 562 // Ask the engine about global values. 563 if (isa<GlobalValue>(L)) 564 return Engine.equivalentAsOperands(cast<GlobalValue>(L), 565 cast<GlobalValue>(R)); 566 567 // Compare constant expressions structurally. 568 if (isa<ConstantExpr>(L)) 569 return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R), 570 AC); 571 572 // Constants of the "same type" don't always actually have the same 573 // type; I don't know why. Just white-list them. 574 if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L)) 575 return true; 576 577 // Block addresses only match if we've already encountered the 578 // block. FIXME: tentative matches? 579 if (isa<BlockAddress>(L)) 580 return Blocks[cast<BlockAddress>(L)->getBasicBlock()] 581 == cast<BlockAddress>(R)->getBasicBlock(); 582 583 // If L and R are ConstantVectors, compare each element 584 if (isa<ConstantVector>(L)) { 585 const ConstantVector *CVL = cast<ConstantVector>(L); 586 const ConstantVector *CVR = cast<ConstantVector>(R); 587 if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements()) 588 return false; 589 for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) { 590 if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC)) 591 return false; 592 } 593 return true; 594 } 595 596 // If L and R are ConstantArrays, compare the element count and types. 597 if (isa<ConstantArray>(L)) { 598 const ConstantArray *CAL = cast<ConstantArray>(L); 599 const ConstantArray *CAR = cast<ConstantArray>(R); 600 // Sometimes a type may be equivalent, but not uniquified---e.g. it may 601 // contain a GEP instruction. Do a deeper comparison of the types. 602 if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements()) 603 return false; 604 605 for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) { 606 if (!equivalentAsOperands(CAL->getAggregateElement(I), 607 CAR->getAggregateElement(I), AC)) 608 return false; 609 } 610 611 return true; 612 } 613 614 // If L and R are ConstantStructs, compare each field and type. 615 if (isa<ConstantStruct>(L)) { 616 const ConstantStruct *CSL = cast<ConstantStruct>(L); 617 const ConstantStruct *CSR = cast<ConstantStruct>(R); 618 619 const StructType *LTy = cast<StructType>(CSL->getType()); 620 const StructType *RTy = cast<StructType>(CSR->getType()); 621 622 // The StructTypes should have the same attributes. Don't use 623 // isLayoutIdentical(), because that just checks the element pointers, 624 // which may not work here. 625 if (LTy->getNumElements() != RTy->getNumElements() || 626 LTy->isPacked() != RTy->isPacked()) 627 return false; 628 629 for (unsigned I = 0; I < LTy->getNumElements(); I++) { 630 const Value *LAgg = CSL->getAggregateElement(I); 631 const Value *RAgg = CSR->getAggregateElement(I); 632 633 if (LAgg == SavedLHS || RAgg == SavedRHS) { 634 if (LAgg != SavedLHS || RAgg != SavedRHS) 635 // If the left and right operands aren't both re-analyzing the 636 // variable, then the initialiers don't match, so report "false". 637 // Otherwise, we skip these operands.. 638 return false; 639 640 continue; 641 } 642 643 if (!equivalentAsOperands(LAgg, RAgg, AC)) { 644 return false; 645 } 646 } 647 648 return true; 649 } 650 651 return false; 652 } 653 654 bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R, 655 const AssumptionContext *AC) { 656 if (L == R) 657 return true; 658 659 if (L->getOpcode() != R->getOpcode()) 660 return false; 661 662 switch (L->getOpcode()) { 663 case Instruction::ICmp: 664 case Instruction::FCmp: 665 if (L->getPredicate() != R->getPredicate()) 666 return false; 667 break; 668 669 case Instruction::GetElementPtr: 670 // FIXME: inbounds? 671 break; 672 673 default: 674 break; 675 } 676 677 if (L->getNumOperands() != R->getNumOperands()) 678 return false; 679 680 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { 681 const auto *LOp = L->getOperand(I); 682 const auto *ROp = R->getOperand(I); 683 684 if (LOp == SavedLHS || ROp == SavedRHS) { 685 if (LOp != SavedLHS || ROp != SavedRHS) 686 // If the left and right operands aren't both re-analyzing the 687 // variable, then the initialiers don't match, so report "false". 688 // Otherwise, we skip these operands.. 689 return false; 690 691 continue; 692 } 693 694 if (!equivalentAsOperands(LOp, ROp, AC)) 695 return false; 696 } 697 698 return true; 699 } 700 701 // There are cases where we cannot determine whether two values are 702 // equivalent, because it depends on not yet processed basic blocks -- see the 703 // documentation on assumptions. 704 // 705 // AC is the context in which we are currently performing a diff. 706 // When we encounter a pair of values for which we can neither prove 707 // equivalence nor the opposite, we do the following: 708 // * If AC is nullptr, we treat the pair as non-equivalent. 709 // * If AC is set, we add an assumption for the basic blocks given by AC, 710 // and treat the pair as equivalent. The assumption is checked later. 711 bool equivalentAsOperands(const Value *L, const Value *R, 712 const AssumptionContext *AC) { 713 // Fall out if the values have different kind. 714 // This possibly shouldn't take priority over oracles. 715 if (L->getValueID() != R->getValueID()) 716 return false; 717 718 // Value subtypes: Argument, Constant, Instruction, BasicBlock, 719 // InlineAsm, MDNode, MDString, PseudoSourceValue 720 721 if (isa<Constant>(L)) 722 return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC); 723 724 if (isa<Instruction>(L)) { 725 auto It = Values.find(L); 726 if (It != Values.end()) 727 return It->second == R; 728 729 if (TentativeValues.count(std::make_pair(L, R))) 730 return true; 731 732 // L and R might be equivalent, this could depend on not yet processed 733 // basic blocks, so we cannot decide here. 734 if (AC) { 735 // Add an assumption, unless there is a conflict with an existing one 736 BlockDiffCandidate &BDC = 737 getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB); 738 auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R}); 739 if (!InsertionResult.second && InsertionResult.first->second != R) { 740 // We already have a conflicting equivalence assumption for L, so at 741 // least one must be wrong, and we know that there is a diff. 742 BDC.KnownToDiffer = true; 743 BDC.EquivalenceAssumptions.clear(); 744 return false; 745 } 746 // Optimistically assume equivalence, and check later once all BBs 747 // have been processed. 748 return true; 749 } 750 751 // Assumptions disabled, so pessimistically assume non-equivalence. 752 return false; 753 } 754 755 if (isa<Argument>(L)) 756 return Values[L] == R; 757 758 if (isa<BasicBlock>(L)) 759 return Blocks[cast<BasicBlock>(L)] != R; 760 761 // Pretend everything else is identical. 762 return true; 763 } 764 765 // Avoid a gcc warning about accessing 'this' in an initializer. 766 FunctionDifferenceEngine *this_() { return this; } 767 768 public: 769 FunctionDifferenceEngine(DifferenceEngine &Engine, 770 const Value *SavedLHS = nullptr, 771 const Value *SavedRHS = nullptr) 772 : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS), 773 Queue(QueueSorter(*this_())) {} 774 775 void diff(const Function *L, const Function *R) { 776 assert(Values.empty() && "Multiple diffs per engine are not supported!"); 777 778 if (L->arg_size() != R->arg_size()) 779 Engine.log("different argument counts"); 780 781 // Map the arguments. 782 for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(), 783 RI = R->arg_begin(), RE = R->arg_end(); 784 LI != LE && RI != RE; ++LI, ++RI) 785 Values[&*LI] = &*RI; 786 787 tryUnify(&*L->begin(), &*R->begin()); 788 processQueue(); 789 checkAndReportDiffCandidates(); 790 } 791 }; 792 793 struct DiffEntry { 794 DiffEntry() : Cost(0) {} 795 796 unsigned Cost; 797 llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange 798 }; 799 800 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L, 801 const Instruction *R) { 802 return !diff(L, R, false, false, false); 803 } 804 805 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart, 806 BasicBlock::const_iterator RStart) { 807 BasicBlock::const_iterator LE = LStart->getParent()->end(); 808 BasicBlock::const_iterator RE = RStart->getParent()->end(); 809 810 unsigned NL = std::distance(LStart, LE); 811 812 SmallVector<DiffEntry, 20> Paths1(NL+1); 813 SmallVector<DiffEntry, 20> Paths2(NL+1); 814 815 DiffEntry *Cur = Paths1.data(); 816 DiffEntry *Next = Paths2.data(); 817 818 const unsigned LeftCost = 2; 819 const unsigned RightCost = 2; 820 const unsigned MatchCost = 0; 821 822 assert(TentativeValues.empty()); 823 824 // Initialize the first column. 825 for (unsigned I = 0; I != NL+1; ++I) { 826 Cur[I].Cost = I * LeftCost; 827 for (unsigned J = 0; J != I; ++J) 828 Cur[I].Path.push_back(DC_left); 829 } 830 831 for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) { 832 // Initialize the first row. 833 Next[0] = Cur[0]; 834 Next[0].Cost += RightCost; 835 Next[0].Path.push_back(DC_right); 836 837 unsigned Index = 1; 838 for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) { 839 if (matchForBlockDiff(&*LI, &*RI)) { 840 Next[Index] = Cur[Index-1]; 841 Next[Index].Cost += MatchCost; 842 Next[Index].Path.push_back(DC_match); 843 TentativeValues.insert(std::make_pair(&*LI, &*RI)); 844 } else if (Next[Index-1].Cost <= Cur[Index].Cost) { 845 Next[Index] = Next[Index-1]; 846 Next[Index].Cost += LeftCost; 847 Next[Index].Path.push_back(DC_left); 848 } else { 849 Next[Index] = Cur[Index]; 850 Next[Index].Cost += RightCost; 851 Next[Index].Path.push_back(DC_right); 852 } 853 } 854 855 std::swap(Cur, Next); 856 } 857 858 // We don't need the tentative values anymore; everything from here 859 // on out should be non-tentative. 860 TentativeValues.clear(); 861 862 SmallVectorImpl<char> &Path = Cur[NL].Path; 863 BasicBlock::const_iterator LI = LStart, RI = RStart; 864 865 DiffLogBuilder Diff(Engine.getConsumer()); 866 867 // Drop trailing matches. 868 while (Path.size() && Path.back() == DC_match) 869 Path.pop_back(); 870 871 // Skip leading matches. 872 SmallVectorImpl<char>::iterator 873 PI = Path.begin(), PE = Path.end(); 874 while (PI != PE && *PI == DC_match) { 875 unify(&*LI, &*RI); 876 ++PI; 877 ++LI; 878 ++RI; 879 } 880 881 for (; PI != PE; ++PI) { 882 switch (static_cast<DiffChange>(*PI)) { 883 case DC_match: 884 assert(LI != LE && RI != RE); 885 { 886 const Instruction *L = &*LI, *R = &*RI; 887 unify(L, R); 888 Diff.addMatch(L, R); 889 } 890 ++LI; ++RI; 891 break; 892 893 case DC_left: 894 assert(LI != LE); 895 Diff.addLeft(&*LI); 896 ++LI; 897 break; 898 899 case DC_right: 900 assert(RI != RE); 901 Diff.addRight(&*RI); 902 ++RI; 903 break; 904 } 905 } 906 907 // Finishing unifying and complaining about the tails of the block, 908 // which should be matches all the way through. 909 while (LI != LE) { 910 assert(RI != RE); 911 unify(&*LI, &*RI); 912 ++LI; 913 ++RI; 914 } 915 916 // If the terminators have different kinds, but one is an invoke and the 917 // other is an unconditional branch immediately following a call, unify 918 // the results and the destinations. 919 const Instruction *LTerm = LStart->getParent()->getTerminator(); 920 const Instruction *RTerm = RStart->getParent()->getTerminator(); 921 if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) { 922 if (cast<BranchInst>(LTerm)->isConditional()) return; 923 BasicBlock::const_iterator I = LTerm->getIterator(); 924 if (I == LStart->getParent()->begin()) return; 925 --I; 926 if (!isa<CallInst>(*I)) return; 927 const CallInst *LCall = cast<CallInst>(&*I); 928 const InvokeInst *RInvoke = cast<InvokeInst>(RTerm); 929 if (!equivalentAsOperands(LCall->getCalledOperand(), 930 RInvoke->getCalledOperand(), nullptr)) 931 return; 932 if (!LCall->use_empty()) 933 Values[LCall] = RInvoke; 934 tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest()); 935 } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) { 936 if (cast<BranchInst>(RTerm)->isConditional()) return; 937 BasicBlock::const_iterator I = RTerm->getIterator(); 938 if (I == RStart->getParent()->begin()) return; 939 --I; 940 if (!isa<CallInst>(*I)) return; 941 const CallInst *RCall = cast<CallInst>(I); 942 const InvokeInst *LInvoke = cast<InvokeInst>(LTerm); 943 if (!equivalentAsOperands(LInvoke->getCalledOperand(), 944 RCall->getCalledOperand(), nullptr)) 945 return; 946 if (!LInvoke->use_empty()) 947 Values[LInvoke] = RCall; 948 tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0)); 949 } 950 } 951 } 952 953 void DifferenceEngine::Oracle::anchor() { } 954 955 void DifferenceEngine::diff(const Function *L, const Function *R) { 956 Context C(*this, L, R); 957 958 // FIXME: types 959 // FIXME: attributes and CC 960 // FIXME: parameter attributes 961 962 // If both are declarations, we're done. 963 if (L->empty() && R->empty()) 964 return; 965 else if (L->empty()) 966 log("left function is declaration, right function is definition"); 967 else if (R->empty()) 968 log("right function is declaration, left function is definition"); 969 else 970 FunctionDifferenceEngine(*this).diff(L, R); 971 } 972 973 void DifferenceEngine::diff(const Module *L, const Module *R) { 974 StringSet<> LNames; 975 SmallVector<std::pair<const Function *, const Function *>, 20> Queue; 976 977 unsigned LeftAnonCount = 0; 978 unsigned RightAnonCount = 0; 979 980 for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) { 981 const Function *LFn = &*I; 982 StringRef Name = LFn->getName(); 983 if (Name.empty()) { 984 ++LeftAnonCount; 985 continue; 986 } 987 988 LNames.insert(Name); 989 990 if (Function *RFn = R->getFunction(LFn->getName())) 991 Queue.push_back(std::make_pair(LFn, RFn)); 992 else 993 logf("function %l exists only in left module") << LFn; 994 } 995 996 for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) { 997 const Function *RFn = &*I; 998 StringRef Name = RFn->getName(); 999 if (Name.empty()) { 1000 ++RightAnonCount; 1001 continue; 1002 } 1003 1004 if (!LNames.count(Name)) 1005 logf("function %r exists only in right module") << RFn; 1006 } 1007 1008 if (LeftAnonCount != 0 || RightAnonCount != 0) { 1009 SmallString<32> Tmp; 1010 logf(("not comparing " + Twine(LeftAnonCount) + 1011 " anonymous functions in the left module and " + 1012 Twine(RightAnonCount) + " in the right module") 1013 .toStringRef(Tmp)); 1014 } 1015 1016 for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator 1017 I = Queue.begin(), 1018 E = Queue.end(); 1019 I != E; ++I) 1020 diff(I->first, I->second); 1021 } 1022 1023 bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L, 1024 const GlobalValue *R) { 1025 if (globalValueOracle) return (*globalValueOracle)(L, R); 1026 1027 if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) { 1028 const GlobalVariable *GVL = cast<GlobalVariable>(L); 1029 const GlobalVariable *GVR = cast<GlobalVariable>(R); 1030 if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() && 1031 GVR->hasLocalLinkage() && GVR->hasUniqueInitializer()) 1032 return FunctionDifferenceEngine(*this, GVL, GVR) 1033 .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(), 1034 nullptr); 1035 } 1036 1037 return L->getName() == R->getName(); 1038 } 1039