xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp (revision 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623)
1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines the implementation of the LLVM difference
10 // engine, which structurally compares global values within a module.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "DifferenceEngine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/IR/CFG.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Support/type_traits.h"
28 #include <utility>
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 /// A priority queue, implemented as a heap.
35 template <class T, class Sorter, unsigned InlineCapacity>
36 class PriorityQueue {
37   Sorter Precedes;
38   llvm::SmallVector<T, InlineCapacity> Storage;
39 
40 public:
41   PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
42 
43   /// Checks whether the heap is empty.
44   bool empty() const { return Storage.empty(); }
45 
46   /// Insert a new value on the heap.
47   void insert(const T &V) {
48     unsigned Index = Storage.size();
49     Storage.push_back(V);
50     if (Index == 0) return;
51 
52     T *data = Storage.data();
53     while (true) {
54       unsigned Target = (Index + 1) / 2 - 1;
55       if (!Precedes(data[Index], data[Target])) return;
56       std::swap(data[Index], data[Target]);
57       if (Target == 0) return;
58       Index = Target;
59     }
60   }
61 
62   /// Remove the minimum value in the heap.  Only valid on a non-empty heap.
63   T remove_min() {
64     assert(!empty());
65     T tmp = Storage[0];
66 
67     unsigned NewSize = Storage.size() - 1;
68     if (NewSize) {
69       // Move the slot at the end to the beginning.
70       if (std::is_trivially_copyable<T>::value)
71         Storage[0] = Storage[NewSize];
72       else
73         std::swap(Storage[0], Storage[NewSize]);
74 
75       // Bubble the root up as necessary.
76       unsigned Index = 0;
77       while (true) {
78         // With a 1-based index, the children would be Index*2 and Index*2+1.
79         unsigned R = (Index + 1) * 2;
80         unsigned L = R - 1;
81 
82         // If R is out of bounds, we're done after this in any case.
83         if (R >= NewSize) {
84           // If L is also out of bounds, we're done immediately.
85           if (L >= NewSize) break;
86 
87           // Otherwise, test whether we should swap L and Index.
88           if (Precedes(Storage[L], Storage[Index]))
89             std::swap(Storage[L], Storage[Index]);
90           break;
91         }
92 
93         // Otherwise, we need to compare with the smaller of L and R.
94         // Prefer R because it's closer to the end of the array.
95         unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
96 
97         // If Index is >= the min of L and R, then heap ordering is restored.
98         if (!Precedes(Storage[IndexToTest], Storage[Index]))
99           break;
100 
101         // Otherwise, keep bubbling up.
102         std::swap(Storage[IndexToTest], Storage[Index]);
103         Index = IndexToTest;
104       }
105     }
106     Storage.pop_back();
107 
108     return tmp;
109   }
110 };
111 
112 /// A function-scope difference engine.
113 class FunctionDifferenceEngine {
114   DifferenceEngine &Engine;
115 
116   // Some initializers may reference the variable we're currently checking. This
117   // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
118   // recursing.
119   const Value *SavedLHS;
120   const Value *SavedRHS;
121 
122   /// The current mapping from old local values to new local values.
123   DenseMap<const Value *, const Value *> Values;
124 
125   /// The current mapping from old blocks to new blocks.
126   DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
127 
128   DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
129 
130   unsigned getUnprocPredCount(const BasicBlock *Block) const {
131     unsigned Count = 0;
132     for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
133          ++I)
134       if (!Blocks.count(*I)) Count++;
135     return Count;
136   }
137 
138   typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
139 
140   /// A type which sorts a priority queue by the number of unprocessed
141   /// predecessor blocks it has remaining.
142   ///
143   /// This is actually really expensive to calculate.
144   struct QueueSorter {
145     const FunctionDifferenceEngine &fde;
146     explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
147 
148     bool operator()(BlockPair &Old, BlockPair &New) {
149       return fde.getUnprocPredCount(Old.first)
150            < fde.getUnprocPredCount(New.first);
151     }
152   };
153 
154   /// A queue of unified blocks to process.
155   PriorityQueue<BlockPair, QueueSorter, 20> Queue;
156 
157   /// Try to unify the given two blocks.  Enqueues them for processing
158   /// if they haven't already been processed.
159   ///
160   /// Returns true if there was a problem unifying them.
161   bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
162     const BasicBlock *&Ref = Blocks[L];
163 
164     if (Ref) {
165       if (Ref == R) return false;
166 
167       Engine.logf("successor %l cannot be equivalent to %r; "
168                   "it's already equivalent to %r")
169         << L << R << Ref;
170       return true;
171     }
172 
173     Ref = R;
174     Queue.insert(BlockPair(L, R));
175     return false;
176   }
177 
178   /// Unifies two instructions, given that they're known not to have
179   /// structural differences.
180   void unify(const Instruction *L, const Instruction *R) {
181     DifferenceEngine::Context C(Engine, L, R);
182 
183     bool Result = diff(L, R, true, true);
184     assert(!Result && "structural differences second time around?");
185     (void) Result;
186     if (!L->use_empty())
187       Values[L] = R;
188   }
189 
190   void processQueue() {
191     while (!Queue.empty()) {
192       BlockPair Pair = Queue.remove_min();
193       diff(Pair.first, Pair.second);
194     }
195   }
196 
197   void diff(const BasicBlock *L, const BasicBlock *R) {
198     DifferenceEngine::Context C(Engine, L, R);
199 
200     BasicBlock::const_iterator LI = L->begin(), LE = L->end();
201     BasicBlock::const_iterator RI = R->begin();
202 
203     do {
204       assert(LI != LE && RI != R->end());
205       const Instruction *LeftI = &*LI, *RightI = &*RI;
206 
207       // If the instructions differ, start the more sophisticated diff
208       // algorithm at the start of the block.
209       if (diff(LeftI, RightI, false, false)) {
210         TentativeValues.clear();
211         return runBlockDiff(L->begin(), R->begin());
212       }
213 
214       // Otherwise, tentatively unify them.
215       if (!LeftI->use_empty())
216         TentativeValues.insert(std::make_pair(LeftI, RightI));
217 
218       ++LI;
219       ++RI;
220     } while (LI != LE); // This is sufficient: we can't get equality of
221                         // terminators if there are residual instructions.
222 
223     // Unify everything in the block, non-tentatively this time.
224     TentativeValues.clear();
225     for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
226       unify(&*LI, &*RI);
227   }
228 
229   bool matchForBlockDiff(const Instruction *L, const Instruction *R);
230   void runBlockDiff(BasicBlock::const_iterator LI,
231                     BasicBlock::const_iterator RI);
232 
233   bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
234     // FIXME: call attributes
235     if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
236       if (Complain) Engine.log("called functions differ");
237       return true;
238     }
239     if (L.arg_size() != R.arg_size()) {
240       if (Complain) Engine.log("argument counts differ");
241       return true;
242     }
243     for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
244       if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
245         if (Complain)
246           Engine.logf("arguments %l and %r differ")
247               << L.getArgOperand(I) << R.getArgOperand(I);
248         return true;
249       }
250     return false;
251   }
252 
253   bool diff(const Instruction *L, const Instruction *R, bool Complain,
254             bool TryUnify) {
255     // FIXME: metadata (if Complain is set)
256 
257     // Different opcodes always imply different operations.
258     if (L->getOpcode() != R->getOpcode()) {
259       if (Complain) Engine.log("different instruction types");
260       return true;
261     }
262 
263     if (isa<CmpInst>(L)) {
264       if (cast<CmpInst>(L)->getPredicate()
265             != cast<CmpInst>(R)->getPredicate()) {
266         if (Complain) Engine.log("different predicates");
267         return true;
268       }
269     } else if (isa<CallInst>(L)) {
270       return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
271     } else if (isa<PHINode>(L)) {
272       const PHINode &LI = cast<PHINode>(*L);
273       const PHINode &RI = cast<PHINode>(*R);
274 
275       // This is really weird;  type uniquing is broken?
276       if (LI.getType() != RI.getType()) {
277         if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {
278           if (Complain) Engine.log("different phi types");
279           return true;
280         }
281       }
282 
283       if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {
284         if (Complain)
285           Engine.log("PHI node # of incoming values differ");
286         return true;
287       }
288 
289       for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {
290         if (TryUnify)
291           tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
292 
293         if (!equivalentAsOperands(LI.getIncomingValue(I),
294                                   RI.getIncomingValue(I))) {
295           if (Complain)
296             Engine.log("PHI node incoming values differ");
297           return true;
298         }
299       }
300 
301       return false;
302 
303     // Terminators.
304     } else if (isa<InvokeInst>(L)) {
305       const InvokeInst &LI = cast<InvokeInst>(*L);
306       const InvokeInst &RI = cast<InvokeInst>(*R);
307       if (diffCallSites(LI, RI, Complain))
308         return true;
309 
310       if (TryUnify) {
311         tryUnify(LI.getNormalDest(), RI.getNormalDest());
312         tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
313       }
314       return false;
315 
316     } else if (isa<CallBrInst>(L)) {
317       const CallBrInst &LI = cast<CallBrInst>(*L);
318       const CallBrInst &RI = cast<CallBrInst>(*R);
319       if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
320         if (Complain)
321           Engine.log("callbr # of indirect destinations differ");
322         return true;
323       }
324 
325       // Perform the "try unify" step so that we can equate the indirect
326       // destinations before checking the call site.
327       for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
328         tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
329 
330       if (diffCallSites(LI, RI, Complain))
331         return true;
332 
333       if (TryUnify)
334         tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
335       return false;
336 
337     } else if (isa<BranchInst>(L)) {
338       const BranchInst *LI = cast<BranchInst>(L);
339       const BranchInst *RI = cast<BranchInst>(R);
340       if (LI->isConditional() != RI->isConditional()) {
341         if (Complain) Engine.log("branch conditionality differs");
342         return true;
343       }
344 
345       if (LI->isConditional()) {
346         if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
347           if (Complain) Engine.log("branch conditions differ");
348           return true;
349         }
350         if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
351       }
352       if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
353       return false;
354 
355     } else if (isa<IndirectBrInst>(L)) {
356       const IndirectBrInst *LI = cast<IndirectBrInst>(L);
357       const IndirectBrInst *RI = cast<IndirectBrInst>(R);
358       if (LI->getNumDestinations() != RI->getNumDestinations()) {
359         if (Complain) Engine.log("indirectbr # of destinations differ");
360         return true;
361       }
362 
363       if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
364         if (Complain) Engine.log("indirectbr addresses differ");
365         return true;
366       }
367 
368       if (TryUnify) {
369         for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
370           tryUnify(LI->getDestination(i), RI->getDestination(i));
371         }
372       }
373       return false;
374 
375     } else if (isa<SwitchInst>(L)) {
376       const SwitchInst *LI = cast<SwitchInst>(L);
377       const SwitchInst *RI = cast<SwitchInst>(R);
378       if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
379         if (Complain) Engine.log("switch conditions differ");
380         return true;
381       }
382       if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
383 
384       bool Difference = false;
385 
386       DenseMap<const ConstantInt *, const BasicBlock *> LCases;
387       for (auto Case : LI->cases())
388         LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
389 
390       for (auto Case : RI->cases()) {
391         const ConstantInt *CaseValue = Case.getCaseValue();
392         const BasicBlock *LCase = LCases[CaseValue];
393         if (LCase) {
394           if (TryUnify)
395             tryUnify(LCase, Case.getCaseSuccessor());
396           LCases.erase(CaseValue);
397         } else if (Complain || !Difference) {
398           if (Complain)
399             Engine.logf("right switch has extra case %r") << CaseValue;
400           Difference = true;
401         }
402       }
403       if (!Difference)
404         for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
405                  I = LCases.begin(),
406                  E = LCases.end();
407              I != E; ++I) {
408           if (Complain)
409             Engine.logf("left switch has extra case %l") << I->first;
410           Difference = true;
411         }
412       return Difference;
413     } else if (isa<UnreachableInst>(L)) {
414       return false;
415     }
416 
417     if (L->getNumOperands() != R->getNumOperands()) {
418       if (Complain) Engine.log("instructions have different operand counts");
419       return true;
420     }
421 
422     for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
423       Value *LO = L->getOperand(I), *RO = R->getOperand(I);
424       if (!equivalentAsOperands(LO, RO)) {
425         if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
426         return true;
427       }
428     }
429 
430     return false;
431   }
432 
433 public:
434   bool equivalentAsOperands(const Constant *L, const Constant *R) {
435     // Use equality as a preliminary filter.
436     if (L == R)
437       return true;
438 
439     if (L->getValueID() != R->getValueID())
440       return false;
441 
442     // Ask the engine about global values.
443     if (isa<GlobalValue>(L))
444       return Engine.equivalentAsOperands(cast<GlobalValue>(L),
445                                          cast<GlobalValue>(R));
446 
447     // Compare constant expressions structurally.
448     if (isa<ConstantExpr>(L))
449       return equivalentAsOperands(cast<ConstantExpr>(L),
450                                   cast<ConstantExpr>(R));
451 
452     // Constants of the "same type" don't always actually have the same
453     // type; I don't know why.  Just white-list them.
454     if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
455       return true;
456 
457     // Block addresses only match if we've already encountered the
458     // block.  FIXME: tentative matches?
459     if (isa<BlockAddress>(L))
460       return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
461                  == cast<BlockAddress>(R)->getBasicBlock();
462 
463     // If L and R are ConstantVectors, compare each element
464     if (isa<ConstantVector>(L)) {
465       const ConstantVector *CVL = cast<ConstantVector>(L);
466       const ConstantVector *CVR = cast<ConstantVector>(R);
467       if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
468         return false;
469       for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
470         if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
471           return false;
472       }
473       return true;
474     }
475 
476     // If L and R are ConstantArrays, compare the element count and types.
477     if (isa<ConstantArray>(L)) {
478       const ConstantArray *CAL = cast<ConstantArray>(L);
479       const ConstantArray *CAR = cast<ConstantArray>(R);
480       // Sometimes a type may be equivalent, but not uniquified---e.g. it may
481       // contain a GEP instruction. Do a deeper comparison of the types.
482       if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
483         return false;
484 
485       for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
486         if (!equivalentAsOperands(CAL->getAggregateElement(I),
487                                   CAR->getAggregateElement(I)))
488           return false;
489       }
490 
491       return true;
492     }
493 
494     // If L and R are ConstantStructs, compare each field and type.
495     if (isa<ConstantStruct>(L)) {
496       const ConstantStruct *CSL = cast<ConstantStruct>(L);
497       const ConstantStruct *CSR = cast<ConstantStruct>(R);
498 
499       const StructType *LTy = cast<StructType>(CSL->getType());
500       const StructType *RTy = cast<StructType>(CSR->getType());
501 
502       // The StructTypes should have the same attributes. Don't use
503       // isLayoutIdentical(), because that just checks the element pointers,
504       // which may not work here.
505       if (LTy->getNumElements() != RTy->getNumElements() ||
506           LTy->isPacked() != RTy->isPacked())
507         return false;
508 
509       for (unsigned I = 0; I < LTy->getNumElements(); I++) {
510         const Value *LAgg = CSL->getAggregateElement(I);
511         const Value *RAgg = CSR->getAggregateElement(I);
512 
513         if (LAgg == SavedLHS || RAgg == SavedRHS) {
514           if (LAgg != SavedLHS || RAgg != SavedRHS)
515             // If the left and right operands aren't both re-analyzing the
516             // variable, then the initialiers don't match, so report "false".
517             // Otherwise, we skip these operands..
518             return false;
519 
520           continue;
521         }
522 
523         if (!equivalentAsOperands(LAgg, RAgg)) {
524           return false;
525         }
526       }
527 
528       return true;
529     }
530 
531     return false;
532   }
533 
534   bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
535     if (L == R)
536       return true;
537 
538     if (L->getOpcode() != R->getOpcode())
539       return false;
540 
541     switch (L->getOpcode()) {
542     case Instruction::ICmp:
543     case Instruction::FCmp:
544       if (L->getPredicate() != R->getPredicate())
545         return false;
546       break;
547 
548     case Instruction::GetElementPtr:
549       // FIXME: inbounds?
550       break;
551 
552     default:
553       break;
554     }
555 
556     if (L->getNumOperands() != R->getNumOperands())
557       return false;
558 
559     for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
560       const auto *LOp = L->getOperand(I);
561       const auto *ROp = R->getOperand(I);
562 
563       if (LOp == SavedLHS || ROp == SavedRHS) {
564         if (LOp != SavedLHS || ROp != SavedRHS)
565           // If the left and right operands aren't both re-analyzing the
566           // variable, then the initialiers don't match, so report "false".
567           // Otherwise, we skip these operands..
568           return false;
569 
570         continue;
571       }
572 
573       if (!equivalentAsOperands(LOp, ROp))
574         return false;
575     }
576 
577     return true;
578   }
579 
580   bool equivalentAsOperands(const Value *L, const Value *R) {
581     // Fall out if the values have different kind.
582     // This possibly shouldn't take priority over oracles.
583     if (L->getValueID() != R->getValueID())
584       return false;
585 
586     // Value subtypes:  Argument, Constant, Instruction, BasicBlock,
587     //                  InlineAsm, MDNode, MDString, PseudoSourceValue
588 
589     if (isa<Constant>(L))
590       return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
591 
592     if (isa<Instruction>(L))
593       return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
594 
595     if (isa<Argument>(L))
596       return Values[L] == R;
597 
598     if (isa<BasicBlock>(L))
599       return Blocks[cast<BasicBlock>(L)] != R;
600 
601     // Pretend everything else is identical.
602     return true;
603   }
604 
605   // Avoid a gcc warning about accessing 'this' in an initializer.
606   FunctionDifferenceEngine *this_() { return this; }
607 
608 public:
609   FunctionDifferenceEngine(DifferenceEngine &Engine,
610                            const Value *SavedLHS = nullptr,
611                            const Value *SavedRHS = nullptr)
612       : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
613         Queue(QueueSorter(*this_())) {}
614 
615   void diff(const Function *L, const Function *R) {
616     if (L->arg_size() != R->arg_size())
617       Engine.log("different argument counts");
618 
619     // Map the arguments.
620     for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
621                                       RI = R->arg_begin(), RE = R->arg_end();
622          LI != LE && RI != RE; ++LI, ++RI)
623       Values[&*LI] = &*RI;
624 
625     tryUnify(&*L->begin(), &*R->begin());
626     processQueue();
627   }
628 };
629 
630 struct DiffEntry {
631   DiffEntry() : Cost(0) {}
632 
633   unsigned Cost;
634   llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
635 };
636 
637 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
638                                                  const Instruction *R) {
639   return !diff(L, R, false, false);
640 }
641 
642 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
643                                             BasicBlock::const_iterator RStart) {
644   BasicBlock::const_iterator LE = LStart->getParent()->end();
645   BasicBlock::const_iterator RE = RStart->getParent()->end();
646 
647   unsigned NL = std::distance(LStart, LE);
648 
649   SmallVector<DiffEntry, 20> Paths1(NL+1);
650   SmallVector<DiffEntry, 20> Paths2(NL+1);
651 
652   DiffEntry *Cur = Paths1.data();
653   DiffEntry *Next = Paths2.data();
654 
655   const unsigned LeftCost = 2;
656   const unsigned RightCost = 2;
657   const unsigned MatchCost = 0;
658 
659   assert(TentativeValues.empty());
660 
661   // Initialize the first column.
662   for (unsigned I = 0; I != NL+1; ++I) {
663     Cur[I].Cost = I * LeftCost;
664     for (unsigned J = 0; J != I; ++J)
665       Cur[I].Path.push_back(DC_left);
666   }
667 
668   for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
669     // Initialize the first row.
670     Next[0] = Cur[0];
671     Next[0].Cost += RightCost;
672     Next[0].Path.push_back(DC_right);
673 
674     unsigned Index = 1;
675     for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
676       if (matchForBlockDiff(&*LI, &*RI)) {
677         Next[Index] = Cur[Index-1];
678         Next[Index].Cost += MatchCost;
679         Next[Index].Path.push_back(DC_match);
680         TentativeValues.insert(std::make_pair(&*LI, &*RI));
681       } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
682         Next[Index] = Next[Index-1];
683         Next[Index].Cost += LeftCost;
684         Next[Index].Path.push_back(DC_left);
685       } else {
686         Next[Index] = Cur[Index];
687         Next[Index].Cost += RightCost;
688         Next[Index].Path.push_back(DC_right);
689       }
690     }
691 
692     std::swap(Cur, Next);
693   }
694 
695   // We don't need the tentative values anymore; everything from here
696   // on out should be non-tentative.
697   TentativeValues.clear();
698 
699   SmallVectorImpl<char> &Path = Cur[NL].Path;
700   BasicBlock::const_iterator LI = LStart, RI = RStart;
701 
702   DiffLogBuilder Diff(Engine.getConsumer());
703 
704   // Drop trailing matches.
705   while (Path.size() && Path.back() == DC_match)
706     Path.pop_back();
707 
708   // Skip leading matches.
709   SmallVectorImpl<char>::iterator
710     PI = Path.begin(), PE = Path.end();
711   while (PI != PE && *PI == DC_match) {
712     unify(&*LI, &*RI);
713     ++PI;
714     ++LI;
715     ++RI;
716   }
717 
718   for (; PI != PE; ++PI) {
719     switch (static_cast<DiffChange>(*PI)) {
720     case DC_match:
721       assert(LI != LE && RI != RE);
722       {
723         const Instruction *L = &*LI, *R = &*RI;
724         unify(L, R);
725         Diff.addMatch(L, R);
726       }
727       ++LI; ++RI;
728       break;
729 
730     case DC_left:
731       assert(LI != LE);
732       Diff.addLeft(&*LI);
733       ++LI;
734       break;
735 
736     case DC_right:
737       assert(RI != RE);
738       Diff.addRight(&*RI);
739       ++RI;
740       break;
741     }
742   }
743 
744   // Finishing unifying and complaining about the tails of the block,
745   // which should be matches all the way through.
746   while (LI != LE) {
747     assert(RI != RE);
748     unify(&*LI, &*RI);
749     ++LI;
750     ++RI;
751   }
752 
753   // If the terminators have different kinds, but one is an invoke and the
754   // other is an unconditional branch immediately following a call, unify
755   // the results and the destinations.
756   const Instruction *LTerm = LStart->getParent()->getTerminator();
757   const Instruction *RTerm = RStart->getParent()->getTerminator();
758   if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
759     if (cast<BranchInst>(LTerm)->isConditional()) return;
760     BasicBlock::const_iterator I = LTerm->getIterator();
761     if (I == LStart->getParent()->begin()) return;
762     --I;
763     if (!isa<CallInst>(*I)) return;
764     const CallInst *LCall = cast<CallInst>(&*I);
765     const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
766     if (!equivalentAsOperands(LCall->getCalledOperand(),
767                               RInvoke->getCalledOperand()))
768       return;
769     if (!LCall->use_empty())
770       Values[LCall] = RInvoke;
771     tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
772   } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
773     if (cast<BranchInst>(RTerm)->isConditional()) return;
774     BasicBlock::const_iterator I = RTerm->getIterator();
775     if (I == RStart->getParent()->begin()) return;
776     --I;
777     if (!isa<CallInst>(*I)) return;
778     const CallInst *RCall = cast<CallInst>(I);
779     const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
780     if (!equivalentAsOperands(LInvoke->getCalledOperand(),
781                               RCall->getCalledOperand()))
782       return;
783     if (!LInvoke->use_empty())
784       Values[LInvoke] = RCall;
785     tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
786   }
787 }
788 }
789 
790 void DifferenceEngine::Oracle::anchor() { }
791 
792 void DifferenceEngine::diff(const Function *L, const Function *R) {
793   Context C(*this, L, R);
794 
795   // FIXME: types
796   // FIXME: attributes and CC
797   // FIXME: parameter attributes
798 
799   // If both are declarations, we're done.
800   if (L->empty() && R->empty())
801     return;
802   else if (L->empty())
803     log("left function is declaration, right function is definition");
804   else if (R->empty())
805     log("right function is declaration, left function is definition");
806   else
807     FunctionDifferenceEngine(*this).diff(L, R);
808 }
809 
810 void DifferenceEngine::diff(const Module *L, const Module *R) {
811   StringSet<> LNames;
812   SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
813 
814   unsigned LeftAnonCount = 0;
815   unsigned RightAnonCount = 0;
816 
817   for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
818     const Function *LFn = &*I;
819     StringRef Name = LFn->getName();
820     if (Name.empty()) {
821       ++LeftAnonCount;
822       continue;
823     }
824 
825     LNames.insert(Name);
826 
827     if (Function *RFn = R->getFunction(LFn->getName()))
828       Queue.push_back(std::make_pair(LFn, RFn));
829     else
830       logf("function %l exists only in left module") << LFn;
831   }
832 
833   for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
834     const Function *RFn = &*I;
835     StringRef Name = RFn->getName();
836     if (Name.empty()) {
837       ++RightAnonCount;
838       continue;
839     }
840 
841     if (!LNames.count(Name))
842       logf("function %r exists only in right module") << RFn;
843   }
844 
845   if (LeftAnonCount != 0 || RightAnonCount != 0) {
846     SmallString<32> Tmp;
847     logf(("not comparing " + Twine(LeftAnonCount) +
848           " anonymous functions in the left module and " +
849           Twine(RightAnonCount) + " in the right module")
850              .toStringRef(Tmp));
851   }
852 
853   for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
854            I = Queue.begin(),
855            E = Queue.end();
856        I != E; ++I)
857     diff(I->first, I->second);
858 }
859 
860 bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
861                                             const GlobalValue *R) {
862   if (globalValueOracle) return (*globalValueOracle)(L, R);
863 
864   if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
865     const GlobalVariable *GVL = cast<GlobalVariable>(L);
866     const GlobalVariable *GVR = cast<GlobalVariable>(R);
867     if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
868         GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
869       return FunctionDifferenceEngine(*this, GVL, GVR)
870           .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
871   }
872 
873   return L->getName() == R->getName();
874 }
875