1 //===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains definitons for the AST differencing interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Tooling/ASTDiff/ASTDiff.h" 14 #include "clang/AST/ParentMapContext.h" 15 #include "clang/AST/RecursiveASTVisitor.h" 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Lex/Lexer.h" 18 #include "llvm/ADT/PriorityQueue.h" 19 20 #include <limits> 21 #include <memory> 22 #include <unordered_set> 23 24 using namespace llvm; 25 using namespace clang; 26 27 namespace clang { 28 namespace diff { 29 30 namespace { 31 /// Maps nodes of the left tree to ones on the right, and vice versa. 32 class Mapping { 33 public: 34 Mapping() = default; 35 Mapping(Mapping &&Other) = default; 36 Mapping &operator=(Mapping &&Other) = default; 37 38 Mapping(size_t Size) { 39 SrcToDst = std::make_unique<NodeId[]>(Size); 40 DstToSrc = std::make_unique<NodeId[]>(Size); 41 } 42 43 void link(NodeId Src, NodeId Dst) { 44 SrcToDst[Src] = Dst, DstToSrc[Dst] = Src; 45 } 46 47 NodeId getDst(NodeId Src) const { return SrcToDst[Src]; } 48 NodeId getSrc(NodeId Dst) const { return DstToSrc[Dst]; } 49 bool hasSrc(NodeId Src) const { return getDst(Src).isValid(); } 50 bool hasDst(NodeId Dst) const { return getSrc(Dst).isValid(); } 51 52 private: 53 std::unique_ptr<NodeId[]> SrcToDst, DstToSrc; 54 }; 55 } // end anonymous namespace 56 57 class ASTDiff::Impl { 58 public: 59 SyntaxTree::Impl &T1, &T2; 60 Mapping TheMapping; 61 62 Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, 63 const ComparisonOptions &Options); 64 65 /// Matches nodes one-by-one based on their similarity. 66 void computeMapping(); 67 68 // Compute Change for each node based on similarity. 69 void computeChangeKinds(Mapping &M); 70 71 NodeId getMapped(const std::unique_ptr<SyntaxTree::Impl> &Tree, 72 NodeId Id) const { 73 if (&*Tree == &T1) 74 return TheMapping.getDst(Id); 75 assert(&*Tree == &T2 && "Invalid tree."); 76 return TheMapping.getSrc(Id); 77 } 78 79 private: 80 // Returns true if the two subtrees are identical. 81 bool identical(NodeId Id1, NodeId Id2) const; 82 83 // Returns false if the nodes must not be mached. 84 bool isMatchingPossible(NodeId Id1, NodeId Id2) const; 85 86 // Returns true if the nodes' parents are matched. 87 bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const; 88 89 // Uses an optimal albeit slow algorithm to compute a mapping between two 90 // subtrees, but only if both have fewer nodes than MaxSize. 91 void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const; 92 93 // Computes the ratio of common descendants between the two nodes. 94 // Descendants are only considered to be equal when they are mapped in M. 95 double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; 96 97 // Returns the node that has the highest degree of similarity. 98 NodeId findCandidate(const Mapping &M, NodeId Id1) const; 99 100 // Returns a mapping of identical subtrees. 101 Mapping matchTopDown() const; 102 103 // Tries to match any yet unmapped nodes, in a bottom-up fashion. 104 void matchBottomUp(Mapping &M) const; 105 106 const ComparisonOptions &Options; 107 108 friend class ZhangShashaMatcher; 109 }; 110 111 /// Represents the AST of a TranslationUnit. 112 class SyntaxTree::Impl { 113 public: 114 Impl(SyntaxTree *Parent, ASTContext &AST); 115 /// Constructs a tree from an AST node. 116 Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST); 117 Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST); 118 template <class T> 119 Impl(SyntaxTree *Parent, 120 std::enable_if_t<std::is_base_of<Stmt, T>::value, T> *Node, 121 ASTContext &AST) 122 : Impl(Parent, dyn_cast<Stmt>(Node), AST) {} 123 template <class T> 124 Impl(SyntaxTree *Parent, 125 std::enable_if_t<std::is_base_of<Decl, T>::value, T> *Node, 126 ASTContext &AST) 127 : Impl(Parent, dyn_cast<Decl>(Node), AST) {} 128 129 SyntaxTree *Parent; 130 ASTContext &AST; 131 PrintingPolicy TypePP; 132 /// Nodes in preorder. 133 std::vector<Node> Nodes; 134 std::vector<NodeId> Leaves; 135 // Maps preorder indices to postorder ones. 136 std::vector<int> PostorderIds; 137 std::vector<NodeId> NodesBfs; 138 139 int getSize() const { return Nodes.size(); } 140 NodeId getRootId() const { return 0; } 141 PreorderIterator begin() const { return getRootId(); } 142 PreorderIterator end() const { return getSize(); } 143 144 const Node &getNode(NodeId Id) const { return Nodes[Id]; } 145 Node &getMutableNode(NodeId Id) { return Nodes[Id]; } 146 bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); } 147 void addNode(Node &N) { Nodes.push_back(N); } 148 int getNumberOfDescendants(NodeId Id) const; 149 bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const; 150 int findPositionInParent(NodeId Id, bool Shifted = false) const; 151 152 std::string getRelativeName(const NamedDecl *ND, 153 const DeclContext *Context) const; 154 std::string getRelativeName(const NamedDecl *ND) const; 155 156 std::string getNodeValue(NodeId Id) const; 157 std::string getNodeValue(const Node &Node) const; 158 std::string getDeclValue(const Decl *D) const; 159 std::string getStmtValue(const Stmt *S) const; 160 161 private: 162 void initTree(); 163 void setLeftMostDescendants(); 164 }; 165 166 static bool isSpecializedNodeExcluded(const Decl *D) { return D->isImplicit(); } 167 static bool isSpecializedNodeExcluded(const Stmt *S) { return false; } 168 static bool isSpecializedNodeExcluded(CXXCtorInitializer *I) { 169 return !I->isWritten(); 170 } 171 172 template <class T> 173 static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) { 174 if (!N) 175 return true; 176 SourceLocation SLoc = N->getSourceRange().getBegin(); 177 if (SLoc.isValid()) { 178 // Ignore everything from other files. 179 if (!SrcMgr.isInMainFile(SLoc)) 180 return true; 181 // Ignore macros. 182 if (SLoc != SrcMgr.getSpellingLoc(SLoc)) 183 return true; 184 } 185 return isSpecializedNodeExcluded(N); 186 } 187 188 namespace { 189 // Sets Height, Parent and Children for each node. 190 struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> { 191 int Id = 0, Depth = 0; 192 NodeId Parent; 193 SyntaxTree::Impl &Tree; 194 195 PreorderVisitor(SyntaxTree::Impl &Tree) : Tree(Tree) {} 196 197 template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) { 198 NodeId MyId = Id; 199 Tree.Nodes.emplace_back(); 200 Node &N = Tree.getMutableNode(MyId); 201 N.Parent = Parent; 202 N.Depth = Depth; 203 N.ASTNode = DynTypedNode::create(*ASTNode); 204 assert(!N.ASTNode.getNodeKind().isNone() && 205 "Expected nodes to have a valid kind."); 206 if (Parent.isValid()) { 207 Node &P = Tree.getMutableNode(Parent); 208 P.Children.push_back(MyId); 209 } 210 Parent = MyId; 211 ++Id; 212 ++Depth; 213 return std::make_tuple(MyId, Tree.getNode(MyId).Parent); 214 } 215 void PostTraverse(std::tuple<NodeId, NodeId> State) { 216 NodeId MyId, PreviousParent; 217 std::tie(MyId, PreviousParent) = State; 218 assert(MyId.isValid() && "Expecting to only traverse valid nodes."); 219 Parent = PreviousParent; 220 --Depth; 221 Node &N = Tree.getMutableNode(MyId); 222 N.RightMostDescendant = Id - 1; 223 assert(N.RightMostDescendant >= 0 && 224 N.RightMostDescendant < Tree.getSize() && 225 "Rightmost descendant must be a valid tree node."); 226 if (N.isLeaf()) 227 Tree.Leaves.push_back(MyId); 228 N.Height = 1; 229 for (NodeId Child : N.Children) 230 N.Height = std::max(N.Height, 1 + Tree.getNode(Child).Height); 231 } 232 bool TraverseDecl(Decl *D) { 233 if (isNodeExcluded(Tree.AST.getSourceManager(), D)) 234 return true; 235 auto SavedState = PreTraverse(D); 236 RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D); 237 PostTraverse(SavedState); 238 return true; 239 } 240 bool TraverseStmt(Stmt *S) { 241 if (auto *E = dyn_cast_or_null<Expr>(S)) 242 S = E->IgnoreImplicit(); 243 if (isNodeExcluded(Tree.AST.getSourceManager(), S)) 244 return true; 245 auto SavedState = PreTraverse(S); 246 RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S); 247 PostTraverse(SavedState); 248 return true; 249 } 250 bool TraverseType(QualType T) { return true; } 251 bool TraverseConstructorInitializer(CXXCtorInitializer *Init) { 252 if (isNodeExcluded(Tree.AST.getSourceManager(), Init)) 253 return true; 254 auto SavedState = PreTraverse(Init); 255 RecursiveASTVisitor<PreorderVisitor>::TraverseConstructorInitializer(Init); 256 PostTraverse(SavedState); 257 return true; 258 } 259 }; 260 } // end anonymous namespace 261 262 SyntaxTree::Impl::Impl(SyntaxTree *Parent, ASTContext &AST) 263 : Parent(Parent), AST(AST), TypePP(AST.getLangOpts()) { 264 TypePP.AnonymousTagLocations = false; 265 } 266 267 SyntaxTree::Impl::Impl(SyntaxTree *Parent, Decl *N, ASTContext &AST) 268 : Impl(Parent, AST) { 269 PreorderVisitor PreorderWalker(*this); 270 PreorderWalker.TraverseDecl(N); 271 initTree(); 272 } 273 274 SyntaxTree::Impl::Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST) 275 : Impl(Parent, AST) { 276 PreorderVisitor PreorderWalker(*this); 277 PreorderWalker.TraverseStmt(N); 278 initTree(); 279 } 280 281 static std::vector<NodeId> getSubtreePostorder(const SyntaxTree::Impl &Tree, 282 NodeId Root) { 283 std::vector<NodeId> Postorder; 284 std::function<void(NodeId)> Traverse = [&](NodeId Id) { 285 const Node &N = Tree.getNode(Id); 286 for (NodeId Child : N.Children) 287 Traverse(Child); 288 Postorder.push_back(Id); 289 }; 290 Traverse(Root); 291 return Postorder; 292 } 293 294 static std::vector<NodeId> getSubtreeBfs(const SyntaxTree::Impl &Tree, 295 NodeId Root) { 296 std::vector<NodeId> Ids; 297 size_t Expanded = 0; 298 Ids.push_back(Root); 299 while (Expanded < Ids.size()) 300 for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children) 301 Ids.push_back(Child); 302 return Ids; 303 } 304 305 void SyntaxTree::Impl::initTree() { 306 setLeftMostDescendants(); 307 int PostorderId = 0; 308 PostorderIds.resize(getSize()); 309 std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) { 310 for (NodeId Child : getNode(Id).Children) 311 PostorderTraverse(Child); 312 PostorderIds[Id] = PostorderId; 313 ++PostorderId; 314 }; 315 PostorderTraverse(getRootId()); 316 NodesBfs = getSubtreeBfs(*this, getRootId()); 317 } 318 319 void SyntaxTree::Impl::setLeftMostDescendants() { 320 for (NodeId Leaf : Leaves) { 321 getMutableNode(Leaf).LeftMostDescendant = Leaf; 322 NodeId Parent, Cur = Leaf; 323 while ((Parent = getNode(Cur).Parent).isValid() && 324 getNode(Parent).Children[0] == Cur) { 325 Cur = Parent; 326 getMutableNode(Cur).LeftMostDescendant = Leaf; 327 } 328 } 329 } 330 331 int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const { 332 return getNode(Id).RightMostDescendant - Id + 1; 333 } 334 335 bool SyntaxTree::Impl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const { 336 return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant; 337 } 338 339 int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const { 340 NodeId Parent = getNode(Id).Parent; 341 if (Parent.isInvalid()) 342 return 0; 343 const auto &Siblings = getNode(Parent).Children; 344 int Position = 0; 345 for (size_t I = 0, E = Siblings.size(); I < E; ++I) { 346 if (Shifted) 347 Position += getNode(Siblings[I]).Shift; 348 if (Siblings[I] == Id) { 349 Position += I; 350 return Position; 351 } 352 } 353 llvm_unreachable("Node not found in parent's children."); 354 } 355 356 // Returns the qualified name of ND. If it is subordinate to Context, 357 // then the prefix of the latter is removed from the returned value. 358 std::string 359 SyntaxTree::Impl::getRelativeName(const NamedDecl *ND, 360 const DeclContext *Context) const { 361 std::string Val = ND->getQualifiedNameAsString(); 362 std::string ContextPrefix; 363 if (!Context) 364 return Val; 365 if (auto *Namespace = dyn_cast<NamespaceDecl>(Context)) 366 ContextPrefix = Namespace->getQualifiedNameAsString(); 367 else if (auto *Record = dyn_cast<RecordDecl>(Context)) 368 ContextPrefix = Record->getQualifiedNameAsString(); 369 else if (AST.getLangOpts().CPlusPlus11) 370 if (auto *Tag = dyn_cast<TagDecl>(Context)) 371 ContextPrefix = Tag->getQualifiedNameAsString(); 372 // Strip the qualifier, if Val refers to something in the current scope. 373 // But leave one leading ':' in place, so that we know that this is a 374 // relative path. 375 if (!ContextPrefix.empty() && StringRef(Val).startswith(ContextPrefix)) 376 Val = Val.substr(ContextPrefix.size() + 1); 377 return Val; 378 } 379 380 std::string SyntaxTree::Impl::getRelativeName(const NamedDecl *ND) const { 381 return getRelativeName(ND, ND->getDeclContext()); 382 } 383 384 static const DeclContext *getEnclosingDeclContext(ASTContext &AST, 385 const Stmt *S) { 386 while (S) { 387 const auto &Parents = AST.getParents(*S); 388 if (Parents.empty()) 389 return nullptr; 390 const auto &P = Parents[0]; 391 if (const auto *D = P.get<Decl>()) 392 return D->getDeclContext(); 393 S = P.get<Stmt>(); 394 } 395 return nullptr; 396 } 397 398 static std::string getInitializerValue(const CXXCtorInitializer *Init, 399 const PrintingPolicy &TypePP) { 400 if (Init->isAnyMemberInitializer()) 401 return std::string(Init->getAnyMember()->getName()); 402 if (Init->isBaseInitializer()) 403 return QualType(Init->getBaseClass(), 0).getAsString(TypePP); 404 if (Init->isDelegatingInitializer()) 405 return Init->getTypeSourceInfo()->getType().getAsString(TypePP); 406 llvm_unreachable("Unknown initializer type"); 407 } 408 409 std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const { 410 return getNodeValue(getNode(Id)); 411 } 412 413 std::string SyntaxTree::Impl::getNodeValue(const Node &N) const { 414 const DynTypedNode &DTN = N.ASTNode; 415 if (auto *S = DTN.get<Stmt>()) 416 return getStmtValue(S); 417 if (auto *D = DTN.get<Decl>()) 418 return getDeclValue(D); 419 if (auto *Init = DTN.get<CXXCtorInitializer>()) 420 return getInitializerValue(Init, TypePP); 421 llvm_unreachable("Fatal: unhandled AST node.\n"); 422 } 423 424 std::string SyntaxTree::Impl::getDeclValue(const Decl *D) const { 425 std::string Value; 426 if (auto *V = dyn_cast<ValueDecl>(D)) 427 return getRelativeName(V) + "(" + V->getType().getAsString(TypePP) + ")"; 428 if (auto *N = dyn_cast<NamedDecl>(D)) 429 Value += getRelativeName(N) + ";"; 430 if (auto *T = dyn_cast<TypedefNameDecl>(D)) 431 return Value + T->getUnderlyingType().getAsString(TypePP) + ";"; 432 if (auto *T = dyn_cast<TypeDecl>(D)) 433 if (T->getTypeForDecl()) 434 Value += 435 T->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP) + 436 ";"; 437 if (auto *U = dyn_cast<UsingDirectiveDecl>(D)) 438 return std::string(U->getNominatedNamespace()->getName()); 439 if (auto *A = dyn_cast<AccessSpecDecl>(D)) { 440 CharSourceRange Range(A->getSourceRange(), false); 441 return std::string( 442 Lexer::getSourceText(Range, AST.getSourceManager(), AST.getLangOpts())); 443 } 444 return Value; 445 } 446 447 std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const { 448 if (auto *U = dyn_cast<UnaryOperator>(S)) 449 return std::string(UnaryOperator::getOpcodeStr(U->getOpcode())); 450 if (auto *B = dyn_cast<BinaryOperator>(S)) 451 return std::string(B->getOpcodeStr()); 452 if (auto *M = dyn_cast<MemberExpr>(S)) 453 return getRelativeName(M->getMemberDecl()); 454 if (auto *I = dyn_cast<IntegerLiteral>(S)) { 455 SmallString<256> Str; 456 I->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); 457 return std::string(Str.str()); 458 } 459 if (auto *F = dyn_cast<FloatingLiteral>(S)) { 460 SmallString<256> Str; 461 F->getValue().toString(Str); 462 return std::string(Str.str()); 463 } 464 if (auto *D = dyn_cast<DeclRefExpr>(S)) 465 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S)); 466 if (auto *String = dyn_cast<StringLiteral>(S)) 467 return std::string(String->getString()); 468 if (auto *B = dyn_cast<CXXBoolLiteralExpr>(S)) 469 return B->getValue() ? "true" : "false"; 470 return ""; 471 } 472 473 /// Identifies a node in a subtree by its postorder offset, starting at 1. 474 struct SNodeId { 475 int Id = 0; 476 477 explicit SNodeId(int Id) : Id(Id) {} 478 explicit SNodeId() = default; 479 480 operator int() const { return Id; } 481 SNodeId &operator++() { return ++Id, *this; } 482 SNodeId &operator--() { return --Id, *this; } 483 SNodeId operator+(int Other) const { return SNodeId(Id + Other); } 484 }; 485 486 class Subtree { 487 private: 488 /// The parent tree. 489 const SyntaxTree::Impl &Tree; 490 /// Maps SNodeIds to original ids. 491 std::vector<NodeId> RootIds; 492 /// Maps subtree nodes to their leftmost descendants wtihin the subtree. 493 std::vector<SNodeId> LeftMostDescendants; 494 495 public: 496 std::vector<SNodeId> KeyRoots; 497 498 Subtree(const SyntaxTree::Impl &Tree, NodeId SubtreeRoot) : Tree(Tree) { 499 RootIds = getSubtreePostorder(Tree, SubtreeRoot); 500 int NumLeaves = setLeftMostDescendants(); 501 computeKeyRoots(NumLeaves); 502 } 503 int getSize() const { return RootIds.size(); } 504 NodeId getIdInRoot(SNodeId Id) const { 505 assert(Id > 0 && Id <= getSize() && "Invalid subtree node index."); 506 return RootIds[Id - 1]; 507 } 508 const Node &getNode(SNodeId Id) const { 509 return Tree.getNode(getIdInRoot(Id)); 510 } 511 SNodeId getLeftMostDescendant(SNodeId Id) const { 512 assert(Id > 0 && Id <= getSize() && "Invalid subtree node index."); 513 return LeftMostDescendants[Id - 1]; 514 } 515 /// Returns the postorder index of the leftmost descendant in the subtree. 516 NodeId getPostorderOffset() const { 517 return Tree.PostorderIds[getIdInRoot(SNodeId(1))]; 518 } 519 std::string getNodeValue(SNodeId Id) const { 520 return Tree.getNodeValue(getIdInRoot(Id)); 521 } 522 523 private: 524 /// Returns the number of leafs in the subtree. 525 int setLeftMostDescendants() { 526 int NumLeaves = 0; 527 LeftMostDescendants.resize(getSize()); 528 for (int I = 0; I < getSize(); ++I) { 529 SNodeId SI(I + 1); 530 const Node &N = getNode(SI); 531 NumLeaves += N.isLeaf(); 532 assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() && 533 "Postorder traversal in subtree should correspond to traversal in " 534 "the root tree by a constant offset."); 535 LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] - 536 getPostorderOffset()); 537 } 538 return NumLeaves; 539 } 540 void computeKeyRoots(int Leaves) { 541 KeyRoots.resize(Leaves); 542 std::unordered_set<int> Visited; 543 int K = Leaves - 1; 544 for (SNodeId I(getSize()); I > 0; --I) { 545 SNodeId LeftDesc = getLeftMostDescendant(I); 546 if (Visited.count(LeftDesc)) 547 continue; 548 assert(K >= 0 && "K should be non-negative"); 549 KeyRoots[K] = I; 550 Visited.insert(LeftDesc); 551 --K; 552 } 553 } 554 }; 555 556 /// Implementation of Zhang and Shasha's Algorithm for tree edit distance. 557 /// Computes an optimal mapping between two trees using only insertion, 558 /// deletion and update as edit actions (similar to the Levenshtein distance). 559 class ZhangShashaMatcher { 560 const ASTDiff::Impl &DiffImpl; 561 Subtree S1; 562 Subtree S2; 563 std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist; 564 565 public: 566 ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTree::Impl &T1, 567 const SyntaxTree::Impl &T2, NodeId Id1, NodeId Id2) 568 : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) { 569 TreeDist = std::make_unique<std::unique_ptr<double[]>[]>( 570 size_t(S1.getSize()) + 1); 571 ForestDist = std::make_unique<std::unique_ptr<double[]>[]>( 572 size_t(S1.getSize()) + 1); 573 for (int I = 0, E = S1.getSize() + 1; I < E; ++I) { 574 TreeDist[I] = std::make_unique<double[]>(size_t(S2.getSize()) + 1); 575 ForestDist[I] = std::make_unique<double[]>(size_t(S2.getSize()) + 1); 576 } 577 } 578 579 std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() { 580 std::vector<std::pair<NodeId, NodeId>> Matches; 581 std::vector<std::pair<SNodeId, SNodeId>> TreePairs; 582 583 computeTreeDist(); 584 585 bool RootNodePair = true; 586 587 TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize())); 588 589 while (!TreePairs.empty()) { 590 SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col; 591 std::tie(LastRow, LastCol) = TreePairs.back(); 592 TreePairs.pop_back(); 593 594 if (!RootNodePair) { 595 computeForestDist(LastRow, LastCol); 596 } 597 598 RootNodePair = false; 599 600 FirstRow = S1.getLeftMostDescendant(LastRow); 601 FirstCol = S2.getLeftMostDescendant(LastCol); 602 603 Row = LastRow; 604 Col = LastCol; 605 606 while (Row > FirstRow || Col > FirstCol) { 607 if (Row > FirstRow && 608 ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) { 609 --Row; 610 } else if (Col > FirstCol && 611 ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) { 612 --Col; 613 } else { 614 SNodeId LMD1 = S1.getLeftMostDescendant(Row); 615 SNodeId LMD2 = S2.getLeftMostDescendant(Col); 616 if (LMD1 == S1.getLeftMostDescendant(LastRow) && 617 LMD2 == S2.getLeftMostDescendant(LastCol)) { 618 NodeId Id1 = S1.getIdInRoot(Row); 619 NodeId Id2 = S2.getIdInRoot(Col); 620 assert(DiffImpl.isMatchingPossible(Id1, Id2) && 621 "These nodes must not be matched."); 622 Matches.emplace_back(Id1, Id2); 623 --Row; 624 --Col; 625 } else { 626 TreePairs.emplace_back(Row, Col); 627 Row = LMD1; 628 Col = LMD2; 629 } 630 } 631 } 632 } 633 return Matches; 634 } 635 636 private: 637 /// We use a simple cost model for edit actions, which seems good enough. 638 /// Simple cost model for edit actions. This seems to make the matching 639 /// algorithm perform reasonably well. 640 /// The values range between 0 and 1, or infinity if this edit action should 641 /// always be avoided. 642 static constexpr double DeletionCost = 1; 643 static constexpr double InsertionCost = 1; 644 645 double getUpdateCost(SNodeId Id1, SNodeId Id2) { 646 if (!DiffImpl.isMatchingPossible(S1.getIdInRoot(Id1), S2.getIdInRoot(Id2))) 647 return std::numeric_limits<double>::max(); 648 return S1.getNodeValue(Id1) != S2.getNodeValue(Id2); 649 } 650 651 void computeTreeDist() { 652 for (SNodeId Id1 : S1.KeyRoots) 653 for (SNodeId Id2 : S2.KeyRoots) 654 computeForestDist(Id1, Id2); 655 } 656 657 void computeForestDist(SNodeId Id1, SNodeId Id2) { 658 assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0."); 659 SNodeId LMD1 = S1.getLeftMostDescendant(Id1); 660 SNodeId LMD2 = S2.getLeftMostDescendant(Id2); 661 662 ForestDist[LMD1][LMD2] = 0; 663 for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) { 664 ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost; 665 for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) { 666 ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost; 667 SNodeId DLMD1 = S1.getLeftMostDescendant(D1); 668 SNodeId DLMD2 = S2.getLeftMostDescendant(D2); 669 if (DLMD1 == LMD1 && DLMD2 == LMD2) { 670 double UpdateCost = getUpdateCost(D1, D2); 671 ForestDist[D1][D2] = 672 std::min({ForestDist[D1 - 1][D2] + DeletionCost, 673 ForestDist[D1][D2 - 1] + InsertionCost, 674 ForestDist[D1 - 1][D2 - 1] + UpdateCost}); 675 TreeDist[D1][D2] = ForestDist[D1][D2]; 676 } else { 677 ForestDist[D1][D2] = 678 std::min({ForestDist[D1 - 1][D2] + DeletionCost, 679 ForestDist[D1][D2 - 1] + InsertionCost, 680 ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]}); 681 } 682 } 683 } 684 } 685 }; 686 687 ASTNodeKind Node::getType() const { return ASTNode.getNodeKind(); } 688 689 StringRef Node::getTypeLabel() const { return getType().asStringRef(); } 690 691 llvm::Optional<std::string> Node::getQualifiedIdentifier() const { 692 if (auto *ND = ASTNode.get<NamedDecl>()) { 693 if (ND->getDeclName().isIdentifier()) 694 return ND->getQualifiedNameAsString(); 695 } 696 return llvm::None; 697 } 698 699 llvm::Optional<StringRef> Node::getIdentifier() const { 700 if (auto *ND = ASTNode.get<NamedDecl>()) { 701 if (ND->getDeclName().isIdentifier()) 702 return ND->getName(); 703 } 704 return llvm::None; 705 } 706 707 namespace { 708 // Compares nodes by their depth. 709 struct HeightLess { 710 const SyntaxTree::Impl &Tree; 711 HeightLess(const SyntaxTree::Impl &Tree) : Tree(Tree) {} 712 bool operator()(NodeId Id1, NodeId Id2) const { 713 return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height; 714 } 715 }; 716 } // end anonymous namespace 717 718 namespace { 719 // Priority queue for nodes, sorted descendingly by their height. 720 class PriorityList { 721 const SyntaxTree::Impl &Tree; 722 HeightLess Cmp; 723 std::vector<NodeId> Container; 724 PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List; 725 726 public: 727 PriorityList(const SyntaxTree::Impl &Tree) 728 : Tree(Tree), Cmp(Tree), List(Cmp, Container) {} 729 730 void push(NodeId id) { List.push(id); } 731 732 std::vector<NodeId> pop() { 733 int Max = peekMax(); 734 std::vector<NodeId> Result; 735 if (Max == 0) 736 return Result; 737 while (peekMax() == Max) { 738 Result.push_back(List.top()); 739 List.pop(); 740 } 741 // TODO this is here to get a stable output, not a good heuristic 742 llvm::sort(Result); 743 return Result; 744 } 745 int peekMax() const { 746 if (List.empty()) 747 return 0; 748 return Tree.getNode(List.top()).Height; 749 } 750 void open(NodeId Id) { 751 for (NodeId Child : Tree.getNode(Id).Children) 752 push(Child); 753 } 754 }; 755 } // end anonymous namespace 756 757 bool ASTDiff::Impl::identical(NodeId Id1, NodeId Id2) const { 758 const Node &N1 = T1.getNode(Id1); 759 const Node &N2 = T2.getNode(Id2); 760 if (N1.Children.size() != N2.Children.size() || 761 !isMatchingPossible(Id1, Id2) || 762 T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) 763 return false; 764 for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id) 765 if (!identical(N1.Children[Id], N2.Children[Id])) 766 return false; 767 return true; 768 } 769 770 bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const { 771 return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2)); 772 } 773 774 bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1, 775 NodeId Id2) const { 776 NodeId P1 = T1.getNode(Id1).Parent; 777 NodeId P2 = T2.getNode(Id2).Parent; 778 return (P1.isInvalid() && P2.isInvalid()) || 779 (P1.isValid() && P2.isValid() && M.getDst(P1) == P2); 780 } 781 782 void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, 783 NodeId Id2) const { 784 if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) > 785 Options.MaxSize) 786 return; 787 ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2); 788 std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes(); 789 for (const auto &Tuple : R) { 790 NodeId Src = Tuple.first; 791 NodeId Dst = Tuple.second; 792 if (!M.hasSrc(Src) && !M.hasDst(Dst)) 793 M.link(Src, Dst); 794 } 795 } 796 797 double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, 798 NodeId Id2) const { 799 int CommonDescendants = 0; 800 const Node &N1 = T1.getNode(Id1); 801 // Count the common descendants, excluding the subtree root. 802 for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) { 803 NodeId Dst = M.getDst(Src); 804 CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2)); 805 } 806 // We need to subtract 1 to get the number of descendants excluding the root. 807 double Denominator = T1.getNumberOfDescendants(Id1) - 1 + 808 T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; 809 // CommonDescendants is less than the size of one subtree. 810 assert(Denominator >= 0 && "Expected non-negative denominator."); 811 if (Denominator == 0) 812 return 0; 813 return CommonDescendants / Denominator; 814 } 815 816 NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { 817 NodeId Candidate; 818 double HighestSimilarity = 0.0; 819 for (NodeId Id2 : T2) { 820 if (!isMatchingPossible(Id1, Id2)) 821 continue; 822 if (M.hasDst(Id2)) 823 continue; 824 double Similarity = getJaccardSimilarity(M, Id1, Id2); 825 if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) { 826 HighestSimilarity = Similarity; 827 Candidate = Id2; 828 } 829 } 830 return Candidate; 831 } 832 833 void ASTDiff::Impl::matchBottomUp(Mapping &M) const { 834 std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.getRootId()); 835 for (NodeId Id1 : Postorder) { 836 if (Id1 == T1.getRootId() && !M.hasSrc(T1.getRootId()) && 837 !M.hasDst(T2.getRootId())) { 838 if (isMatchingPossible(T1.getRootId(), T2.getRootId())) { 839 M.link(T1.getRootId(), T2.getRootId()); 840 addOptimalMapping(M, T1.getRootId(), T2.getRootId()); 841 } 842 break; 843 } 844 bool Matched = M.hasSrc(Id1); 845 const Node &N1 = T1.getNode(Id1); 846 bool MatchedChildren = llvm::any_of( 847 N1.Children, [&](NodeId Child) { return M.hasSrc(Child); }); 848 if (Matched || !MatchedChildren) 849 continue; 850 NodeId Id2 = findCandidate(M, Id1); 851 if (Id2.isValid()) { 852 M.link(Id1, Id2); 853 addOptimalMapping(M, Id1, Id2); 854 } 855 } 856 } 857 858 Mapping ASTDiff::Impl::matchTopDown() const { 859 PriorityList L1(T1); 860 PriorityList L2(T2); 861 862 Mapping M(T1.getSize() + T2.getSize()); 863 864 L1.push(T1.getRootId()); 865 L2.push(T2.getRootId()); 866 867 int Max1, Max2; 868 while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) > 869 Options.MinHeight) { 870 if (Max1 > Max2) { 871 for (NodeId Id : L1.pop()) 872 L1.open(Id); 873 continue; 874 } 875 if (Max2 > Max1) { 876 for (NodeId Id : L2.pop()) 877 L2.open(Id); 878 continue; 879 } 880 std::vector<NodeId> H1, H2; 881 H1 = L1.pop(); 882 H2 = L2.pop(); 883 for (NodeId Id1 : H1) { 884 for (NodeId Id2 : H2) { 885 if (identical(Id1, Id2) && !M.hasSrc(Id1) && !M.hasDst(Id2)) { 886 for (int I = 0, E = T1.getNumberOfDescendants(Id1); I < E; ++I) 887 M.link(Id1 + I, Id2 + I); 888 } 889 } 890 } 891 for (NodeId Id1 : H1) { 892 if (!M.hasSrc(Id1)) 893 L1.open(Id1); 894 } 895 for (NodeId Id2 : H2) { 896 if (!M.hasDst(Id2)) 897 L2.open(Id2); 898 } 899 } 900 return M; 901 } 902 903 ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, 904 const ComparisonOptions &Options) 905 : T1(T1), T2(T2), Options(Options) { 906 computeMapping(); 907 computeChangeKinds(TheMapping); 908 } 909 910 void ASTDiff::Impl::computeMapping() { 911 TheMapping = matchTopDown(); 912 if (Options.StopAfterTopDown) 913 return; 914 matchBottomUp(TheMapping); 915 } 916 917 void ASTDiff::Impl::computeChangeKinds(Mapping &M) { 918 for (NodeId Id1 : T1) { 919 if (!M.hasSrc(Id1)) { 920 T1.getMutableNode(Id1).Change = Delete; 921 T1.getMutableNode(Id1).Shift -= 1; 922 } 923 } 924 for (NodeId Id2 : T2) { 925 if (!M.hasDst(Id2)) { 926 T2.getMutableNode(Id2).Change = Insert; 927 T2.getMutableNode(Id2).Shift -= 1; 928 } 929 } 930 for (NodeId Id1 : T1.NodesBfs) { 931 NodeId Id2 = M.getDst(Id1); 932 if (Id2.isInvalid()) 933 continue; 934 if (!haveSameParents(M, Id1, Id2) || 935 T1.findPositionInParent(Id1, true) != 936 T2.findPositionInParent(Id2, true)) { 937 T1.getMutableNode(Id1).Shift -= 1; 938 T2.getMutableNode(Id2).Shift -= 1; 939 } 940 } 941 for (NodeId Id2 : T2.NodesBfs) { 942 NodeId Id1 = M.getSrc(Id2); 943 if (Id1.isInvalid()) 944 continue; 945 Node &N1 = T1.getMutableNode(Id1); 946 Node &N2 = T2.getMutableNode(Id2); 947 if (Id1.isInvalid()) 948 continue; 949 if (!haveSameParents(M, Id1, Id2) || 950 T1.findPositionInParent(Id1, true) != 951 T2.findPositionInParent(Id2, true)) { 952 N1.Change = N2.Change = Move; 953 } 954 if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) { 955 N1.Change = N2.Change = (N1.Change == Move ? UpdateMove : Update); 956 } 957 } 958 } 959 960 ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2, 961 const ComparisonOptions &Options) 962 : DiffImpl(std::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {} 963 964 ASTDiff::~ASTDiff() = default; 965 966 NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const { 967 return DiffImpl->getMapped(SourceTree.TreeImpl, Id); 968 } 969 970 SyntaxTree::SyntaxTree(ASTContext &AST) 971 : TreeImpl(std::make_unique<SyntaxTree::Impl>( 972 this, AST.getTranslationUnitDecl(), AST)) {} 973 974 SyntaxTree::~SyntaxTree() = default; 975 976 const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; } 977 978 const Node &SyntaxTree::getNode(NodeId Id) const { 979 return TreeImpl->getNode(Id); 980 } 981 982 int SyntaxTree::getSize() const { return TreeImpl->getSize(); } 983 NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); } 984 SyntaxTree::PreorderIterator SyntaxTree::begin() const { 985 return TreeImpl->begin(); 986 } 987 SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); } 988 989 int SyntaxTree::findPositionInParent(NodeId Id) const { 990 return TreeImpl->findPositionInParent(Id); 991 } 992 993 std::pair<unsigned, unsigned> 994 SyntaxTree::getSourceRangeOffsets(const Node &N) const { 995 const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager(); 996 SourceRange Range = N.ASTNode.getSourceRange(); 997 SourceLocation BeginLoc = Range.getBegin(); 998 SourceLocation EndLoc = Lexer::getLocForEndOfToken( 999 Range.getEnd(), /*Offset=*/0, SrcMgr, TreeImpl->AST.getLangOpts()); 1000 if (auto *ThisExpr = N.ASTNode.get<CXXThisExpr>()) { 1001 if (ThisExpr->isImplicit()) 1002 EndLoc = BeginLoc; 1003 } 1004 unsigned Begin = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(BeginLoc)); 1005 unsigned End = SrcMgr.getFileOffset(SrcMgr.getExpansionLoc(EndLoc)); 1006 return {Begin, End}; 1007 } 1008 1009 std::string SyntaxTree::getNodeValue(NodeId Id) const { 1010 return TreeImpl->getNodeValue(Id); 1011 } 1012 1013 std::string SyntaxTree::getNodeValue(const Node &N) const { 1014 return TreeImpl->getNodeValue(N); 1015 } 1016 1017 } // end namespace diff 1018 } // end namespace clang 1019