xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
1  //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This header defines the implementation of the LLVM difference
10  // engine, which structurally compares global values within a module.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "DifferenceEngine.h"
15  #include "llvm/ADT/DenseMap.h"
16  #include "llvm/ADT/DenseSet.h"
17  #include "llvm/ADT/SmallString.h"
18  #include "llvm/ADT/SmallVector.h"
19  #include "llvm/ADT/StringSet.h"
20  #include "llvm/IR/CFG.h"
21  #include "llvm/IR/Constants.h"
22  #include "llvm/IR/Function.h"
23  #include "llvm/IR/Instructions.h"
24  #include "llvm/IR/Module.h"
25  #include "llvm/Support/ErrorHandling.h"
26  #include "llvm/Support/raw_ostream.h"
27  #include "llvm/Support/type_traits.h"
28  #include <utility>
29  
30  using namespace llvm;
31  
32  namespace {
33  
34  /// A priority queue, implemented as a heap.
35  template <class T, class Sorter, unsigned InlineCapacity>
36  class PriorityQueue {
37    Sorter Precedes;
38    llvm::SmallVector<T, InlineCapacity> Storage;
39  
40  public:
41    PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
42  
43    /// Checks whether the heap is empty.
44    bool empty() const { return Storage.empty(); }
45  
46    /// Insert a new value on the heap.
47    void insert(const T &V) {
48      unsigned Index = Storage.size();
49      Storage.push_back(V);
50      if (Index == 0) return;
51  
52      T *data = Storage.data();
53      while (true) {
54        unsigned Target = (Index + 1) / 2 - 1;
55        if (!Precedes(data[Index], data[Target])) return;
56        std::swap(data[Index], data[Target]);
57        if (Target == 0) return;
58        Index = Target;
59      }
60    }
61  
62    /// Remove the minimum value in the heap.  Only valid on a non-empty heap.
63    T remove_min() {
64      assert(!empty());
65      T tmp = Storage[0];
66  
67      unsigned NewSize = Storage.size() - 1;
68      if (NewSize) {
69        // Move the slot at the end to the beginning.
70        if (std::is_trivially_copyable<T>::value)
71          Storage[0] = Storage[NewSize];
72        else
73          std::swap(Storage[0], Storage[NewSize]);
74  
75        // Bubble the root up as necessary.
76        unsigned Index = 0;
77        while (true) {
78          // With a 1-based index, the children would be Index*2 and Index*2+1.
79          unsigned R = (Index + 1) * 2;
80          unsigned L = R - 1;
81  
82          // If R is out of bounds, we're done after this in any case.
83          if (R >= NewSize) {
84            // If L is also out of bounds, we're done immediately.
85            if (L >= NewSize) break;
86  
87            // Otherwise, test whether we should swap L and Index.
88            if (Precedes(Storage[L], Storage[Index]))
89              std::swap(Storage[L], Storage[Index]);
90            break;
91          }
92  
93          // Otherwise, we need to compare with the smaller of L and R.
94          // Prefer R because it's closer to the end of the array.
95          unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
96  
97          // If Index is >= the min of L and R, then heap ordering is restored.
98          if (!Precedes(Storage[IndexToTest], Storage[Index]))
99            break;
100  
101          // Otherwise, keep bubbling up.
102          std::swap(Storage[IndexToTest], Storage[Index]);
103          Index = IndexToTest;
104        }
105      }
106      Storage.pop_back();
107  
108      return tmp;
109    }
110  };
111  
112  /// A function-scope difference engine.
113  class FunctionDifferenceEngine {
114    DifferenceEngine &Engine;
115  
116    // Some initializers may reference the variable we're currently checking. This
117    // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
118    // recursing.
119    const Value *SavedLHS;
120    const Value *SavedRHS;
121  
122    /// The current mapping from old local values to new local values.
123    DenseMap<const Value *, const Value *> Values;
124  
125    /// The current mapping from old blocks to new blocks.
126    DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
127  
128    DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
129  
130    unsigned getUnprocPredCount(const BasicBlock *Block) const {
131      unsigned Count = 0;
132      for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
133           ++I)
134        if (!Blocks.count(*I)) Count++;
135      return Count;
136    }
137  
138    typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
139  
140    /// A type which sorts a priority queue by the number of unprocessed
141    /// predecessor blocks it has remaining.
142    ///
143    /// This is actually really expensive to calculate.
144    struct QueueSorter {
145      const FunctionDifferenceEngine &fde;
146      explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
147  
148      bool operator()(BlockPair &Old, BlockPair &New) {
149        return fde.getUnprocPredCount(Old.first)
150             < fde.getUnprocPredCount(New.first);
151      }
152    };
153  
154    /// A queue of unified blocks to process.
155    PriorityQueue<BlockPair, QueueSorter, 20> Queue;
156  
157    /// Try to unify the given two blocks.  Enqueues them for processing
158    /// if they haven't already been processed.
159    ///
160    /// Returns true if there was a problem unifying them.
161    bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
162      const BasicBlock *&Ref = Blocks[L];
163  
164      if (Ref) {
165        if (Ref == R) return false;
166  
167        Engine.logf("successor %l cannot be equivalent to %r; "
168                    "it's already equivalent to %r")
169          << L << R << Ref;
170        return true;
171      }
172  
173      Ref = R;
174      Queue.insert(BlockPair(L, R));
175      return false;
176    }
177  
178    /// Unifies two instructions, given that they're known not to have
179    /// structural differences.
180    void unify(const Instruction *L, const Instruction *R) {
181      DifferenceEngine::Context C(Engine, L, R);
182  
183      bool Result = diff(L, R, true, true);
184      assert(!Result && "structural differences second time around?");
185      (void) Result;
186      if (!L->use_empty())
187        Values[L] = R;
188    }
189  
190    void processQueue() {
191      while (!Queue.empty()) {
192        BlockPair Pair = Queue.remove_min();
193        diff(Pair.first, Pair.second);
194      }
195    }
196  
197    void diff(const BasicBlock *L, const BasicBlock *R) {
198      DifferenceEngine::Context C(Engine, L, R);
199  
200      BasicBlock::const_iterator LI = L->begin(), LE = L->end();
201      BasicBlock::const_iterator RI = R->begin();
202  
203      do {
204        assert(LI != LE && RI != R->end());
205        const Instruction *LeftI = &*LI, *RightI = &*RI;
206  
207        // If the instructions differ, start the more sophisticated diff
208        // algorithm at the start of the block.
209        if (diff(LeftI, RightI, false, false)) {
210          TentativeValues.clear();
211          return runBlockDiff(L->begin(), R->begin());
212        }
213  
214        // Otherwise, tentatively unify them.
215        if (!LeftI->use_empty())
216          TentativeValues.insert(std::make_pair(LeftI, RightI));
217  
218        ++LI;
219        ++RI;
220      } while (LI != LE); // This is sufficient: we can't get equality of
221                          // terminators if there are residual instructions.
222  
223      // Unify everything in the block, non-tentatively this time.
224      TentativeValues.clear();
225      for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
226        unify(&*LI, &*RI);
227    }
228  
229    bool matchForBlockDiff(const Instruction *L, const Instruction *R);
230    void runBlockDiff(BasicBlock::const_iterator LI,
231                      BasicBlock::const_iterator RI);
232  
233    bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
234      // FIXME: call attributes
235      if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
236        if (Complain) Engine.log("called functions differ");
237        return true;
238      }
239      if (L.arg_size() != R.arg_size()) {
240        if (Complain) Engine.log("argument counts differ");
241        return true;
242      }
243      for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
244        if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
245          if (Complain)
246            Engine.logf("arguments %l and %r differ")
247                << L.getArgOperand(I) << R.getArgOperand(I);
248          return true;
249        }
250      return false;
251    }
252  
253    bool diff(const Instruction *L, const Instruction *R, bool Complain,
254              bool TryUnify) {
255      // FIXME: metadata (if Complain is set)
256  
257      // Different opcodes always imply different operations.
258      if (L->getOpcode() != R->getOpcode()) {
259        if (Complain) Engine.log("different instruction types");
260        return true;
261      }
262  
263      if (isa<CmpInst>(L)) {
264        if (cast<CmpInst>(L)->getPredicate()
265              != cast<CmpInst>(R)->getPredicate()) {
266          if (Complain) Engine.log("different predicates");
267          return true;
268        }
269      } else if (isa<CallInst>(L)) {
270        return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
271      } else if (isa<PHINode>(L)) {
272        const PHINode &LI = cast<PHINode>(*L);
273        const PHINode &RI = cast<PHINode>(*R);
274  
275        // This is really weird;  type uniquing is broken?
276        if (LI.getType() != RI.getType()) {
277          if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {
278            if (Complain) Engine.log("different phi types");
279            return true;
280          }
281        }
282  
283        if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {
284          if (Complain)
285            Engine.log("PHI node # of incoming values differ");
286          return true;
287        }
288  
289        for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {
290          if (TryUnify)
291            tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
292  
293          if (!equivalentAsOperands(LI.getIncomingValue(I),
294                                    RI.getIncomingValue(I))) {
295            if (Complain)
296              Engine.log("PHI node incoming values differ");
297            return true;
298          }
299        }
300  
301        return false;
302  
303      // Terminators.
304      } else if (isa<InvokeInst>(L)) {
305        const InvokeInst &LI = cast<InvokeInst>(*L);
306        const InvokeInst &RI = cast<InvokeInst>(*R);
307        if (diffCallSites(LI, RI, Complain))
308          return true;
309  
310        if (TryUnify) {
311          tryUnify(LI.getNormalDest(), RI.getNormalDest());
312          tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
313        }
314        return false;
315  
316      } else if (isa<CallBrInst>(L)) {
317        const CallBrInst &LI = cast<CallBrInst>(*L);
318        const CallBrInst &RI = cast<CallBrInst>(*R);
319        if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
320          if (Complain)
321            Engine.log("callbr # of indirect destinations differ");
322          return true;
323        }
324  
325        // Perform the "try unify" step so that we can equate the indirect
326        // destinations before checking the call site.
327        for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
328          tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
329  
330        if (diffCallSites(LI, RI, Complain))
331          return true;
332  
333        if (TryUnify)
334          tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
335        return false;
336  
337      } else if (isa<BranchInst>(L)) {
338        const BranchInst *LI = cast<BranchInst>(L);
339        const BranchInst *RI = cast<BranchInst>(R);
340        if (LI->isConditional() != RI->isConditional()) {
341          if (Complain) Engine.log("branch conditionality differs");
342          return true;
343        }
344  
345        if (LI->isConditional()) {
346          if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
347            if (Complain) Engine.log("branch conditions differ");
348            return true;
349          }
350          if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
351        }
352        if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
353        return false;
354  
355      } else if (isa<IndirectBrInst>(L)) {
356        const IndirectBrInst *LI = cast<IndirectBrInst>(L);
357        const IndirectBrInst *RI = cast<IndirectBrInst>(R);
358        if (LI->getNumDestinations() != RI->getNumDestinations()) {
359          if (Complain) Engine.log("indirectbr # of destinations differ");
360          return true;
361        }
362  
363        if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
364          if (Complain) Engine.log("indirectbr addresses differ");
365          return true;
366        }
367  
368        if (TryUnify) {
369          for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
370            tryUnify(LI->getDestination(i), RI->getDestination(i));
371          }
372        }
373        return false;
374  
375      } else if (isa<SwitchInst>(L)) {
376        const SwitchInst *LI = cast<SwitchInst>(L);
377        const SwitchInst *RI = cast<SwitchInst>(R);
378        if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
379          if (Complain) Engine.log("switch conditions differ");
380          return true;
381        }
382        if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
383  
384        bool Difference = false;
385  
386        DenseMap<const ConstantInt *, const BasicBlock *> LCases;
387        for (auto Case : LI->cases())
388          LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
389  
390        for (auto Case : RI->cases()) {
391          const ConstantInt *CaseValue = Case.getCaseValue();
392          const BasicBlock *LCase = LCases[CaseValue];
393          if (LCase) {
394            if (TryUnify)
395              tryUnify(LCase, Case.getCaseSuccessor());
396            LCases.erase(CaseValue);
397          } else if (Complain || !Difference) {
398            if (Complain)
399              Engine.logf("right switch has extra case %r") << CaseValue;
400            Difference = true;
401          }
402        }
403        if (!Difference)
404          for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
405                   I = LCases.begin(),
406                   E = LCases.end();
407               I != E; ++I) {
408            if (Complain)
409              Engine.logf("left switch has extra case %l") << I->first;
410            Difference = true;
411          }
412        return Difference;
413      } else if (isa<UnreachableInst>(L)) {
414        return false;
415      }
416  
417      if (L->getNumOperands() != R->getNumOperands()) {
418        if (Complain) Engine.log("instructions have different operand counts");
419        return true;
420      }
421  
422      for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
423        Value *LO = L->getOperand(I), *RO = R->getOperand(I);
424        if (!equivalentAsOperands(LO, RO)) {
425          if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
426          return true;
427        }
428      }
429  
430      return false;
431    }
432  
433  public:
434    bool equivalentAsOperands(const Constant *L, const Constant *R) {
435      // Use equality as a preliminary filter.
436      if (L == R)
437        return true;
438  
439      if (L->getValueID() != R->getValueID())
440        return false;
441  
442      // Ask the engine about global values.
443      if (isa<GlobalValue>(L))
444        return Engine.equivalentAsOperands(cast<GlobalValue>(L),
445                                           cast<GlobalValue>(R));
446  
447      // Compare constant expressions structurally.
448      if (isa<ConstantExpr>(L))
449        return equivalentAsOperands(cast<ConstantExpr>(L),
450                                    cast<ConstantExpr>(R));
451  
452      // Constants of the "same type" don't always actually have the same
453      // type; I don't know why.  Just white-list them.
454      if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
455        return true;
456  
457      // Block addresses only match if we've already encountered the
458      // block.  FIXME: tentative matches?
459      if (isa<BlockAddress>(L))
460        return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
461                   == cast<BlockAddress>(R)->getBasicBlock();
462  
463      // If L and R are ConstantVectors, compare each element
464      if (isa<ConstantVector>(L)) {
465        const ConstantVector *CVL = cast<ConstantVector>(L);
466        const ConstantVector *CVR = cast<ConstantVector>(R);
467        if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
468          return false;
469        for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
470          if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
471            return false;
472        }
473        return true;
474      }
475  
476      // If L and R are ConstantArrays, compare the element count and types.
477      if (isa<ConstantArray>(L)) {
478        const ConstantArray *CAL = cast<ConstantArray>(L);
479        const ConstantArray *CAR = cast<ConstantArray>(R);
480        // Sometimes a type may be equivalent, but not uniquified---e.g. it may
481        // contain a GEP instruction. Do a deeper comparison of the types.
482        if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
483          return false;
484  
485        for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
486          if (!equivalentAsOperands(CAL->getAggregateElement(I),
487                                    CAR->getAggregateElement(I)))
488            return false;
489        }
490  
491        return true;
492      }
493  
494      // If L and R are ConstantStructs, compare each field and type.
495      if (isa<ConstantStruct>(L)) {
496        const ConstantStruct *CSL = cast<ConstantStruct>(L);
497        const ConstantStruct *CSR = cast<ConstantStruct>(R);
498  
499        const StructType *LTy = cast<StructType>(CSL->getType());
500        const StructType *RTy = cast<StructType>(CSR->getType());
501  
502        // The StructTypes should have the same attributes. Don't use
503        // isLayoutIdentical(), because that just checks the element pointers,
504        // which may not work here.
505        if (LTy->getNumElements() != RTy->getNumElements() ||
506            LTy->isPacked() != RTy->isPacked())
507          return false;
508  
509        for (unsigned I = 0; I < LTy->getNumElements(); I++) {
510          const Value *LAgg = CSL->getAggregateElement(I);
511          const Value *RAgg = CSR->getAggregateElement(I);
512  
513          if (LAgg == SavedLHS || RAgg == SavedRHS) {
514            if (LAgg != SavedLHS || RAgg != SavedRHS)
515              // If the left and right operands aren't both re-analyzing the
516              // variable, then the initialiers don't match, so report "false".
517              // Otherwise, we skip these operands..
518              return false;
519  
520            continue;
521          }
522  
523          if (!equivalentAsOperands(LAgg, RAgg)) {
524            return false;
525          }
526        }
527  
528        return true;
529      }
530  
531      return false;
532    }
533  
534    bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
535      if (L == R)
536        return true;
537  
538      if (L->getOpcode() != R->getOpcode())
539        return false;
540  
541      switch (L->getOpcode()) {
542      case Instruction::ICmp:
543      case Instruction::FCmp:
544        if (L->getPredicate() != R->getPredicate())
545          return false;
546        break;
547  
548      case Instruction::GetElementPtr:
549        // FIXME: inbounds?
550        break;
551  
552      default:
553        break;
554      }
555  
556      if (L->getNumOperands() != R->getNumOperands())
557        return false;
558  
559      for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
560        const auto *LOp = L->getOperand(I);
561        const auto *ROp = R->getOperand(I);
562  
563        if (LOp == SavedLHS || ROp == SavedRHS) {
564          if (LOp != SavedLHS || ROp != SavedRHS)
565            // If the left and right operands aren't both re-analyzing the
566            // variable, then the initialiers don't match, so report "false".
567            // Otherwise, we skip these operands..
568            return false;
569  
570          continue;
571        }
572  
573        if (!equivalentAsOperands(LOp, ROp))
574          return false;
575      }
576  
577      return true;
578    }
579  
580    bool equivalentAsOperands(const Value *L, const Value *R) {
581      // Fall out if the values have different kind.
582      // This possibly shouldn't take priority over oracles.
583      if (L->getValueID() != R->getValueID())
584        return false;
585  
586      // Value subtypes:  Argument, Constant, Instruction, BasicBlock,
587      //                  InlineAsm, MDNode, MDString, PseudoSourceValue
588  
589      if (isa<Constant>(L))
590        return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
591  
592      if (isa<Instruction>(L))
593        return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
594  
595      if (isa<Argument>(L))
596        return Values[L] == R;
597  
598      if (isa<BasicBlock>(L))
599        return Blocks[cast<BasicBlock>(L)] != R;
600  
601      // Pretend everything else is identical.
602      return true;
603    }
604  
605    // Avoid a gcc warning about accessing 'this' in an initializer.
606    FunctionDifferenceEngine *this_() { return this; }
607  
608  public:
609    FunctionDifferenceEngine(DifferenceEngine &Engine,
610                             const Value *SavedLHS = nullptr,
611                             const Value *SavedRHS = nullptr)
612        : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
613          Queue(QueueSorter(*this_())) {}
614  
615    void diff(const Function *L, const Function *R) {
616      if (L->arg_size() != R->arg_size())
617        Engine.log("different argument counts");
618  
619      // Map the arguments.
620      for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
621                                        RI = R->arg_begin(), RE = R->arg_end();
622           LI != LE && RI != RE; ++LI, ++RI)
623        Values[&*LI] = &*RI;
624  
625      tryUnify(&*L->begin(), &*R->begin());
626      processQueue();
627    }
628  };
629  
630  struct DiffEntry {
631    DiffEntry() : Cost(0) {}
632  
633    unsigned Cost;
634    llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
635  };
636  
637  bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
638                                                   const Instruction *R) {
639    return !diff(L, R, false, false);
640  }
641  
642  void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
643                                              BasicBlock::const_iterator RStart) {
644    BasicBlock::const_iterator LE = LStart->getParent()->end();
645    BasicBlock::const_iterator RE = RStart->getParent()->end();
646  
647    unsigned NL = std::distance(LStart, LE);
648  
649    SmallVector<DiffEntry, 20> Paths1(NL+1);
650    SmallVector<DiffEntry, 20> Paths2(NL+1);
651  
652    DiffEntry *Cur = Paths1.data();
653    DiffEntry *Next = Paths2.data();
654  
655    const unsigned LeftCost = 2;
656    const unsigned RightCost = 2;
657    const unsigned MatchCost = 0;
658  
659    assert(TentativeValues.empty());
660  
661    // Initialize the first column.
662    for (unsigned I = 0; I != NL+1; ++I) {
663      Cur[I].Cost = I * LeftCost;
664      for (unsigned J = 0; J != I; ++J)
665        Cur[I].Path.push_back(DC_left);
666    }
667  
668    for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
669      // Initialize the first row.
670      Next[0] = Cur[0];
671      Next[0].Cost += RightCost;
672      Next[0].Path.push_back(DC_right);
673  
674      unsigned Index = 1;
675      for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
676        if (matchForBlockDiff(&*LI, &*RI)) {
677          Next[Index] = Cur[Index-1];
678          Next[Index].Cost += MatchCost;
679          Next[Index].Path.push_back(DC_match);
680          TentativeValues.insert(std::make_pair(&*LI, &*RI));
681        } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
682          Next[Index] = Next[Index-1];
683          Next[Index].Cost += LeftCost;
684          Next[Index].Path.push_back(DC_left);
685        } else {
686          Next[Index] = Cur[Index];
687          Next[Index].Cost += RightCost;
688          Next[Index].Path.push_back(DC_right);
689        }
690      }
691  
692      std::swap(Cur, Next);
693    }
694  
695    // We don't need the tentative values anymore; everything from here
696    // on out should be non-tentative.
697    TentativeValues.clear();
698  
699    SmallVectorImpl<char> &Path = Cur[NL].Path;
700    BasicBlock::const_iterator LI = LStart, RI = RStart;
701  
702    DiffLogBuilder Diff(Engine.getConsumer());
703  
704    // Drop trailing matches.
705    while (Path.size() && Path.back() == DC_match)
706      Path.pop_back();
707  
708    // Skip leading matches.
709    SmallVectorImpl<char>::iterator
710      PI = Path.begin(), PE = Path.end();
711    while (PI != PE && *PI == DC_match) {
712      unify(&*LI, &*RI);
713      ++PI;
714      ++LI;
715      ++RI;
716    }
717  
718    for (; PI != PE; ++PI) {
719      switch (static_cast<DiffChange>(*PI)) {
720      case DC_match:
721        assert(LI != LE && RI != RE);
722        {
723          const Instruction *L = &*LI, *R = &*RI;
724          unify(L, R);
725          Diff.addMatch(L, R);
726        }
727        ++LI; ++RI;
728        break;
729  
730      case DC_left:
731        assert(LI != LE);
732        Diff.addLeft(&*LI);
733        ++LI;
734        break;
735  
736      case DC_right:
737        assert(RI != RE);
738        Diff.addRight(&*RI);
739        ++RI;
740        break;
741      }
742    }
743  
744    // Finishing unifying and complaining about the tails of the block,
745    // which should be matches all the way through.
746    while (LI != LE) {
747      assert(RI != RE);
748      unify(&*LI, &*RI);
749      ++LI;
750      ++RI;
751    }
752  
753    // If the terminators have different kinds, but one is an invoke and the
754    // other is an unconditional branch immediately following a call, unify
755    // the results and the destinations.
756    const Instruction *LTerm = LStart->getParent()->getTerminator();
757    const Instruction *RTerm = RStart->getParent()->getTerminator();
758    if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
759      if (cast<BranchInst>(LTerm)->isConditional()) return;
760      BasicBlock::const_iterator I = LTerm->getIterator();
761      if (I == LStart->getParent()->begin()) return;
762      --I;
763      if (!isa<CallInst>(*I)) return;
764      const CallInst *LCall = cast<CallInst>(&*I);
765      const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
766      if (!equivalentAsOperands(LCall->getCalledOperand(),
767                                RInvoke->getCalledOperand()))
768        return;
769      if (!LCall->use_empty())
770        Values[LCall] = RInvoke;
771      tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
772    } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
773      if (cast<BranchInst>(RTerm)->isConditional()) return;
774      BasicBlock::const_iterator I = RTerm->getIterator();
775      if (I == RStart->getParent()->begin()) return;
776      --I;
777      if (!isa<CallInst>(*I)) return;
778      const CallInst *RCall = cast<CallInst>(I);
779      const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
780      if (!equivalentAsOperands(LInvoke->getCalledOperand(),
781                                RCall->getCalledOperand()))
782        return;
783      if (!LInvoke->use_empty())
784        Values[LInvoke] = RCall;
785      tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
786    }
787  }
788  }
789  
790  void DifferenceEngine::Oracle::anchor() { }
791  
792  void DifferenceEngine::diff(const Function *L, const Function *R) {
793    Context C(*this, L, R);
794  
795    // FIXME: types
796    // FIXME: attributes and CC
797    // FIXME: parameter attributes
798  
799    // If both are declarations, we're done.
800    if (L->empty() && R->empty())
801      return;
802    else if (L->empty())
803      log("left function is declaration, right function is definition");
804    else if (R->empty())
805      log("right function is declaration, left function is definition");
806    else
807      FunctionDifferenceEngine(*this).diff(L, R);
808  }
809  
810  void DifferenceEngine::diff(const Module *L, const Module *R) {
811    StringSet<> LNames;
812    SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
813  
814    unsigned LeftAnonCount = 0;
815    unsigned RightAnonCount = 0;
816  
817    for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
818      const Function *LFn = &*I;
819      StringRef Name = LFn->getName();
820      if (Name.empty()) {
821        ++LeftAnonCount;
822        continue;
823      }
824  
825      LNames.insert(Name);
826  
827      if (Function *RFn = R->getFunction(LFn->getName()))
828        Queue.push_back(std::make_pair(LFn, RFn));
829      else
830        logf("function %l exists only in left module") << LFn;
831    }
832  
833    for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
834      const Function *RFn = &*I;
835      StringRef Name = RFn->getName();
836      if (Name.empty()) {
837        ++RightAnonCount;
838        continue;
839      }
840  
841      if (!LNames.count(Name))
842        logf("function %r exists only in right module") << RFn;
843    }
844  
845    if (LeftAnonCount != 0 || RightAnonCount != 0) {
846      SmallString<32> Tmp;
847      logf(("not comparing " + Twine(LeftAnonCount) +
848            " anonymous functions in the left module and " +
849            Twine(RightAnonCount) + " in the right module")
850               .toStringRef(Tmp));
851    }
852  
853    for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
854             I = Queue.begin(),
855             E = Queue.end();
856         I != E; ++I)
857      diff(I->first, I->second);
858  }
859  
860  bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
861                                              const GlobalValue *R) {
862    if (globalValueOracle) return (*globalValueOracle)(L, R);
863  
864    if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
865      const GlobalVariable *GVL = cast<GlobalVariable>(L);
866      const GlobalVariable *GVR = cast<GlobalVariable>(R);
867      if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
868          GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
869        return FunctionDifferenceEngine(*this, GVL, GVR)
870            .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
871    }
872  
873    return L->getName() == R->getName();
874  }
875