xref: /freebsd/contrib/llvm-project/llvm/lib/Support/SuffixTree.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15ffd83dbSDimitry Andric //===- llvm/Support/SuffixTree.cpp - Implement Suffix Tree ------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This file implements the Suffix Tree class.
105ffd83dbSDimitry Andric //
115ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
125ffd83dbSDimitry Andric 
135ffd83dbSDimitry Andric #include "llvm/Support/SuffixTree.h"
145ffd83dbSDimitry Andric #include "llvm/Support/Allocator.h"
1506c3fb27SDimitry Andric #include "llvm/Support/Casting.h"
1606c3fb27SDimitry Andric #include "llvm/Support/SuffixTreeNode.h"
175ffd83dbSDimitry Andric 
185ffd83dbSDimitry Andric using namespace llvm;
195ffd83dbSDimitry Andric 
2006c3fb27SDimitry Andric /// \returns the number of elements in the substring associated with \p N.
numElementsInSubstring(const SuffixTreeNode * N)2106c3fb27SDimitry Andric static size_t numElementsInSubstring(const SuffixTreeNode *N) {
2206c3fb27SDimitry Andric   assert(N && "Got a null node?");
2306c3fb27SDimitry Andric   if (auto *Internal = dyn_cast<SuffixTreeInternalNode>(N))
2406c3fb27SDimitry Andric     if (Internal->isRoot())
2506c3fb27SDimitry Andric       return 0;
2606c3fb27SDimitry Andric   return N->getEndIdx() - N->getStartIdx() + 1;
2706c3fb27SDimitry Andric }
2806c3fb27SDimitry Andric 
SuffixTree(const ArrayRef<unsigned> & Str,bool OutlinerLeafDescendants)29*0fca6ea1SDimitry Andric SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
30*0fca6ea1SDimitry Andric                        bool OutlinerLeafDescendants)
31*0fca6ea1SDimitry Andric     : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
3206c3fb27SDimitry Andric   Root = insertRoot();
335ffd83dbSDimitry Andric   Active.Node = Root;
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric   // Keep track of the number of suffixes we have to add of the current
365ffd83dbSDimitry Andric   // prefix.
375ffd83dbSDimitry Andric   unsigned SuffixesToAdd = 0;
385ffd83dbSDimitry Andric 
395ffd83dbSDimitry Andric   // Construct the suffix tree iteratively on each prefix of the string.
405ffd83dbSDimitry Andric   // PfxEndIdx is the end index of the current prefix.
415ffd83dbSDimitry Andric   // End is one past the last element in the string.
425ffd83dbSDimitry Andric   for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
435ffd83dbSDimitry Andric     SuffixesToAdd++;
445ffd83dbSDimitry Andric     LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
455ffd83dbSDimitry Andric     SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
465ffd83dbSDimitry Andric   }
475ffd83dbSDimitry Andric 
485ffd83dbSDimitry Andric   // Set the suffix indices of each leaf.
495ffd83dbSDimitry Andric   assert(Root && "Root node can't be nullptr!");
505ffd83dbSDimitry Andric   setSuffixIndices();
51*0fca6ea1SDimitry Andric 
52*0fca6ea1SDimitry Andric   // Collect all leaf nodes of the suffix tree. And for each internal node,
53*0fca6ea1SDimitry Andric   // record the range of leaf nodes that are descendants of it.
54*0fca6ea1SDimitry Andric   if (OutlinerLeafDescendants)
55*0fca6ea1SDimitry Andric     setLeafNodes();
565ffd83dbSDimitry Andric }
575ffd83dbSDimitry Andric 
insertLeaf(SuffixTreeInternalNode & Parent,unsigned StartIdx,unsigned Edge)5806c3fb27SDimitry Andric SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
595ffd83dbSDimitry Andric                                        unsigned StartIdx, unsigned Edge) {
605ffd83dbSDimitry Andric   assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
6106c3fb27SDimitry Andric   auto *N = new (LeafNodeAllocator.Allocate())
6206c3fb27SDimitry Andric       SuffixTreeLeafNode(StartIdx, &LeafEndIdx);
635ffd83dbSDimitry Andric   Parent.Children[Edge] = N;
645ffd83dbSDimitry Andric   return N;
655ffd83dbSDimitry Andric }
665ffd83dbSDimitry Andric 
6706c3fb27SDimitry Andric SuffixTreeInternalNode *
insertInternalNode(SuffixTreeInternalNode * Parent,unsigned StartIdx,unsigned EndIdx,unsigned Edge)6806c3fb27SDimitry Andric SuffixTree::insertInternalNode(SuffixTreeInternalNode *Parent,
6906c3fb27SDimitry Andric                                unsigned StartIdx, unsigned EndIdx,
7006c3fb27SDimitry Andric                                unsigned Edge) {
715ffd83dbSDimitry Andric   assert(StartIdx <= EndIdx && "String can't start after it ends!");
7206c3fb27SDimitry Andric   assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) &&
735ffd83dbSDimitry Andric          "Non-root internal nodes must have parents!");
7406c3fb27SDimitry Andric   auto *N = new (InternalNodeAllocator.Allocate())
7506c3fb27SDimitry Andric       SuffixTreeInternalNode(StartIdx, EndIdx, Root);
765ffd83dbSDimitry Andric   if (Parent)
775ffd83dbSDimitry Andric     Parent->Children[Edge] = N;
785ffd83dbSDimitry Andric   return N;
795ffd83dbSDimitry Andric }
805ffd83dbSDimitry Andric 
insertRoot()8106c3fb27SDimitry Andric SuffixTreeInternalNode *SuffixTree::insertRoot() {
8206c3fb27SDimitry Andric   return insertInternalNode(/*Parent = */ nullptr, SuffixTreeNode::EmptyIdx,
8306c3fb27SDimitry Andric                             SuffixTreeNode::EmptyIdx, /*Edge = */ 0);
8406c3fb27SDimitry Andric }
8506c3fb27SDimitry Andric 
setSuffixIndices()865ffd83dbSDimitry Andric void SuffixTree::setSuffixIndices() {
875ffd83dbSDimitry Andric   // List of nodes we need to visit along with the current length of the
885ffd83dbSDimitry Andric   // string.
8906c3fb27SDimitry Andric   SmallVector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
905ffd83dbSDimitry Andric 
915ffd83dbSDimitry Andric   // Current node being visited.
925ffd83dbSDimitry Andric   SuffixTreeNode *CurrNode = Root;
935ffd83dbSDimitry Andric 
945ffd83dbSDimitry Andric   // Sum of the lengths of the nodes down the path to the current one.
955ffd83dbSDimitry Andric   unsigned CurrNodeLen = 0;
965ffd83dbSDimitry Andric   ToVisit.push_back({CurrNode, CurrNodeLen});
975ffd83dbSDimitry Andric   while (!ToVisit.empty()) {
985ffd83dbSDimitry Andric     std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
995ffd83dbSDimitry Andric     ToVisit.pop_back();
10006c3fb27SDimitry Andric     // Length of the current node from the root down to here.
10106c3fb27SDimitry Andric     CurrNode->setConcatLen(CurrNodeLen);
10206c3fb27SDimitry Andric     if (auto *InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode))
10306c3fb27SDimitry Andric       for (auto &ChildPair : InternalNode->Children) {
1045ffd83dbSDimitry Andric         assert(ChildPair.second && "Node had a null child!");
1055ffd83dbSDimitry Andric         ToVisit.push_back(
10606c3fb27SDimitry Andric             {ChildPair.second,
10706c3fb27SDimitry Andric              CurrNodeLen + numElementsInSubstring(ChildPair.second)});
1085ffd83dbSDimitry Andric       }
1095ffd83dbSDimitry Andric     // No children, so we are at the end of the string.
11006c3fb27SDimitry Andric     if (auto *LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode))
11106c3fb27SDimitry Andric       LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
1125ffd83dbSDimitry Andric   }
1135ffd83dbSDimitry Andric }
1145ffd83dbSDimitry Andric 
setLeafNodes()115*0fca6ea1SDimitry Andric void SuffixTree::setLeafNodes() {
116*0fca6ea1SDimitry Andric   // A stack that keeps track of nodes to visit for post-order DFS traversal.
117*0fca6ea1SDimitry Andric   SmallVector<SuffixTreeNode *> ToVisit;
118*0fca6ea1SDimitry Andric   ToVisit.push_back(Root);
119*0fca6ea1SDimitry Andric 
120*0fca6ea1SDimitry Andric   // This keeps track of the index of the next leaf node to be added to
121*0fca6ea1SDimitry Andric   // the LeafNodes vector of the suffix tree.
122*0fca6ea1SDimitry Andric   unsigned LeafCounter = 0;
123*0fca6ea1SDimitry Andric 
124*0fca6ea1SDimitry Andric   // This keeps track of nodes whose children have been added to the stack.
125*0fca6ea1SDimitry Andric   // The value is a pair, representing a node's first and last children.
126*0fca6ea1SDimitry Andric   DenseMap<SuffixTreeInternalNode *,
127*0fca6ea1SDimitry Andric            std::pair<SuffixTreeNode *, SuffixTreeNode *>>
128*0fca6ea1SDimitry Andric       ChildrenMap;
129*0fca6ea1SDimitry Andric 
130*0fca6ea1SDimitry Andric   // Traverse the tree in post-order.
131*0fca6ea1SDimitry Andric   while (!ToVisit.empty()) {
132*0fca6ea1SDimitry Andric     SuffixTreeNode *CurrNode = ToVisit.pop_back_val();
133*0fca6ea1SDimitry Andric     if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
134*0fca6ea1SDimitry Andric       // The current node is an internal node.
135*0fca6ea1SDimitry Andric       auto I = ChildrenMap.find(CurrInternalNode);
136*0fca6ea1SDimitry Andric       if (I == ChildrenMap.end()) {
137*0fca6ea1SDimitry Andric         // This is the first time we visit this node.
138*0fca6ea1SDimitry Andric         // Its children have not been added to the stack yet.
139*0fca6ea1SDimitry Andric         // We add current node back, and add its children to the stack.
140*0fca6ea1SDimitry Andric         // We keep track of the first and last children of the current node.
141*0fca6ea1SDimitry Andric         auto J = CurrInternalNode->Children.begin();
142*0fca6ea1SDimitry Andric         if (J != CurrInternalNode->Children.end()) {
143*0fca6ea1SDimitry Andric           ToVisit.push_back(CurrNode);
144*0fca6ea1SDimitry Andric           SuffixTreeNode *FirstChild = J->second;
145*0fca6ea1SDimitry Andric           SuffixTreeNode *LastChild = nullptr;
146*0fca6ea1SDimitry Andric           for (; J != CurrInternalNode->Children.end(); ++J) {
147*0fca6ea1SDimitry Andric             LastChild = J->second;
148*0fca6ea1SDimitry Andric             ToVisit.push_back(LastChild);
149*0fca6ea1SDimitry Andric           }
150*0fca6ea1SDimitry Andric           ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};
151*0fca6ea1SDimitry Andric         }
152*0fca6ea1SDimitry Andric       } else {
153*0fca6ea1SDimitry Andric         // This is the second time we visit this node.
154*0fca6ea1SDimitry Andric         // All of its children have already been processed.
155*0fca6ea1SDimitry Andric         // Now, we can set its LeftLeafIdx and RightLeafIdx;
156*0fca6ea1SDimitry Andric         auto [FirstChild, LastChild] = I->second;
157*0fca6ea1SDimitry Andric         // Get the first child to use its RightLeafIdx.
158*0fca6ea1SDimitry Andric         // The first child is the first one added to the stack, so it is
159*0fca6ea1SDimitry Andric         // the last one to be processed. Hence, the leaf descendants
160*0fca6ea1SDimitry Andric         // of the first child are assigned the largest index numbers.
161*0fca6ea1SDimitry Andric         CurrNode->setRightLeafIdx(FirstChild->getRightLeafIdx());
162*0fca6ea1SDimitry Andric         // Get the last child to use its LeftLeafIdx.
163*0fca6ea1SDimitry Andric         CurrNode->setLeftLeafIdx(LastChild->getLeftLeafIdx());
164*0fca6ea1SDimitry Andric         assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
165*0fca6ea1SDimitry Andric                "LeftLeafIdx should not be larger than RightLeafIdx");
166*0fca6ea1SDimitry Andric       }
167*0fca6ea1SDimitry Andric     } else {
168*0fca6ea1SDimitry Andric       // The current node is a leaf node.
169*0fca6ea1SDimitry Andric       // We can simply set its LeftLeafIdx and RightLeafIdx.
170*0fca6ea1SDimitry Andric       CurrNode->setLeftLeafIdx(LeafCounter);
171*0fca6ea1SDimitry Andric       CurrNode->setRightLeafIdx(LeafCounter);
172*0fca6ea1SDimitry Andric       ++LeafCounter;
173*0fca6ea1SDimitry Andric       auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
174*0fca6ea1SDimitry Andric       LeafNodes.push_back(CurrLeafNode);
175*0fca6ea1SDimitry Andric     }
176*0fca6ea1SDimitry Andric   }
177*0fca6ea1SDimitry Andric }
178*0fca6ea1SDimitry Andric 
extend(unsigned EndIdx,unsigned SuffixesToAdd)1795ffd83dbSDimitry Andric unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
18006c3fb27SDimitry Andric   SuffixTreeInternalNode *NeedsLink = nullptr;
1815ffd83dbSDimitry Andric 
1825ffd83dbSDimitry Andric   while (SuffixesToAdd > 0) {
1835ffd83dbSDimitry Andric 
1845ffd83dbSDimitry Andric     // Are we waiting to add anything other than just the last character?
1855ffd83dbSDimitry Andric     if (Active.Len == 0) {
1865ffd83dbSDimitry Andric       // If not, then say the active index is the end index.
1875ffd83dbSDimitry Andric       Active.Idx = EndIdx;
1885ffd83dbSDimitry Andric     }
1895ffd83dbSDimitry Andric 
1905ffd83dbSDimitry Andric     assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
1915ffd83dbSDimitry Andric 
1925ffd83dbSDimitry Andric     // The first character in the current substring we're looking at.
1935ffd83dbSDimitry Andric     unsigned FirstChar = Str[Active.Idx];
1945ffd83dbSDimitry Andric 
1955ffd83dbSDimitry Andric     // Have we inserted anything starting with FirstChar at the current node?
1965ffd83dbSDimitry Andric     if (Active.Node->Children.count(FirstChar) == 0) {
1975ffd83dbSDimitry Andric       // If not, then we can just insert a leaf and move to the next step.
1985ffd83dbSDimitry Andric       insertLeaf(*Active.Node, EndIdx, FirstChar);
1995ffd83dbSDimitry Andric 
2005ffd83dbSDimitry Andric       // The active node is an internal node, and we visited it, so it must
2015ffd83dbSDimitry Andric       // need a link if it doesn't have one.
2025ffd83dbSDimitry Andric       if (NeedsLink) {
20306c3fb27SDimitry Andric         NeedsLink->setLink(Active.Node);
2045ffd83dbSDimitry Andric         NeedsLink = nullptr;
2055ffd83dbSDimitry Andric       }
2065ffd83dbSDimitry Andric     } else {
2075ffd83dbSDimitry Andric       // There's a match with FirstChar, so look for the point in the tree to
2085ffd83dbSDimitry Andric       // insert a new node.
2095ffd83dbSDimitry Andric       SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
2105ffd83dbSDimitry Andric 
21106c3fb27SDimitry Andric       unsigned SubstringLen = numElementsInSubstring(NextNode);
2125ffd83dbSDimitry Andric 
2135ffd83dbSDimitry Andric       // Is the current suffix we're trying to insert longer than the size of
2145ffd83dbSDimitry Andric       // the child we want to move to?
2155ffd83dbSDimitry Andric       if (Active.Len >= SubstringLen) {
2165ffd83dbSDimitry Andric         // If yes, then consume the characters we've seen and move to the next
2175ffd83dbSDimitry Andric         // node.
21806c3fb27SDimitry Andric         assert(isa<SuffixTreeInternalNode>(NextNode) &&
21906c3fb27SDimitry Andric                "Expected an internal node?");
2205ffd83dbSDimitry Andric         Active.Idx += SubstringLen;
2215ffd83dbSDimitry Andric         Active.Len -= SubstringLen;
22206c3fb27SDimitry Andric         Active.Node = cast<SuffixTreeInternalNode>(NextNode);
2235ffd83dbSDimitry Andric         continue;
2245ffd83dbSDimitry Andric       }
2255ffd83dbSDimitry Andric 
2265ffd83dbSDimitry Andric       // Otherwise, the suffix we're trying to insert must be contained in the
2275ffd83dbSDimitry Andric       // next node we want to move to.
2285ffd83dbSDimitry Andric       unsigned LastChar = Str[EndIdx];
2295ffd83dbSDimitry Andric 
2305ffd83dbSDimitry Andric       // Is the string we're trying to insert a substring of the next node?
23106c3fb27SDimitry Andric       if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
2325ffd83dbSDimitry Andric         // If yes, then we're done for this step. Remember our insertion point
2335ffd83dbSDimitry Andric         // and move to the next end index. At this point, we have an implicit
2345ffd83dbSDimitry Andric         // suffix tree.
2355ffd83dbSDimitry Andric         if (NeedsLink && !Active.Node->isRoot()) {
23606c3fb27SDimitry Andric           NeedsLink->setLink(Active.Node);
2375ffd83dbSDimitry Andric           NeedsLink = nullptr;
2385ffd83dbSDimitry Andric         }
2395ffd83dbSDimitry Andric 
2405ffd83dbSDimitry Andric         Active.Len++;
2415ffd83dbSDimitry Andric         break;
2425ffd83dbSDimitry Andric       }
2435ffd83dbSDimitry Andric 
2445ffd83dbSDimitry Andric       // The string we're trying to insert isn't a substring of the next node,
2455ffd83dbSDimitry Andric       // but matches up to a point. Split the node.
2465ffd83dbSDimitry Andric       //
2475ffd83dbSDimitry Andric       // For example, say we ended our search at a node n and we're trying to
2485ffd83dbSDimitry Andric       // insert ABD. Then we'll create a new node s for AB, reduce n to just
2495ffd83dbSDimitry Andric       // representing C, and insert a new leaf node l to represent d. This
2505ffd83dbSDimitry Andric       // allows us to ensure that if n was a leaf, it remains a leaf.
2515ffd83dbSDimitry Andric       //
2525ffd83dbSDimitry Andric       //   | ABC  ---split--->  | AB
2535ffd83dbSDimitry Andric       //   n                    s
2545ffd83dbSDimitry Andric       //                     C / \ D
2555ffd83dbSDimitry Andric       //                      n   l
2565ffd83dbSDimitry Andric 
2575ffd83dbSDimitry Andric       // The node s from the diagram
25806c3fb27SDimitry Andric       SuffixTreeInternalNode *SplitNode = insertInternalNode(
25906c3fb27SDimitry Andric           Active.Node, NextNode->getStartIdx(),
26006c3fb27SDimitry Andric           NextNode->getStartIdx() + Active.Len - 1, FirstChar);
2615ffd83dbSDimitry Andric 
2625ffd83dbSDimitry Andric       // Insert the new node representing the new substring into the tree as
2635ffd83dbSDimitry Andric       // a child of the split node. This is the node l from the diagram.
2645ffd83dbSDimitry Andric       insertLeaf(*SplitNode, EndIdx, LastChar);
2655ffd83dbSDimitry Andric 
2665ffd83dbSDimitry Andric       // Make the old node a child of the split node and update its start
2675ffd83dbSDimitry Andric       // index. This is the node n from the diagram.
26806c3fb27SDimitry Andric       NextNode->incrementStartIdx(Active.Len);
26906c3fb27SDimitry Andric       SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode;
2705ffd83dbSDimitry Andric 
2715ffd83dbSDimitry Andric       // SplitNode is an internal node, update the suffix link.
2725ffd83dbSDimitry Andric       if (NeedsLink)
27306c3fb27SDimitry Andric         NeedsLink->setLink(SplitNode);
2745ffd83dbSDimitry Andric 
2755ffd83dbSDimitry Andric       NeedsLink = SplitNode;
2765ffd83dbSDimitry Andric     }
2775ffd83dbSDimitry Andric 
2785ffd83dbSDimitry Andric     // We've added something new to the tree, so there's one less suffix to
2795ffd83dbSDimitry Andric     // add.
2805ffd83dbSDimitry Andric     SuffixesToAdd--;
2815ffd83dbSDimitry Andric 
2825ffd83dbSDimitry Andric     if (Active.Node->isRoot()) {
2835ffd83dbSDimitry Andric       if (Active.Len > 0) {
2845ffd83dbSDimitry Andric         Active.Len--;
2855ffd83dbSDimitry Andric         Active.Idx = EndIdx - SuffixesToAdd + 1;
2865ffd83dbSDimitry Andric       }
2875ffd83dbSDimitry Andric     } else {
2885ffd83dbSDimitry Andric       // Start the next phase at the next smallest suffix.
28906c3fb27SDimitry Andric       Active.Node = Active.Node->getLink();
2905ffd83dbSDimitry Andric     }
2915ffd83dbSDimitry Andric   }
2925ffd83dbSDimitry Andric 
2935ffd83dbSDimitry Andric   return SuffixesToAdd;
2945ffd83dbSDimitry Andric }
29506c3fb27SDimitry Andric 
advance()29606c3fb27SDimitry Andric void SuffixTree::RepeatedSubstringIterator::advance() {
29706c3fb27SDimitry Andric   // Clear the current state. If we're at the end of the range, then this
29806c3fb27SDimitry Andric   // is the state we want to be in.
29906c3fb27SDimitry Andric   RS = RepeatedSubstring();
30006c3fb27SDimitry Andric   N = nullptr;
30106c3fb27SDimitry Andric 
30206c3fb27SDimitry Andric   // Each leaf node represents a repeat of a string.
30306c3fb27SDimitry Andric   SmallVector<unsigned> RepeatedSubstringStarts;
30406c3fb27SDimitry Andric 
30506c3fb27SDimitry Andric   // Continue visiting nodes until we find one which repeats more than once.
30606c3fb27SDimitry Andric   while (!InternalNodesToVisit.empty()) {
30706c3fb27SDimitry Andric     RepeatedSubstringStarts.clear();
30806c3fb27SDimitry Andric     auto *Curr = InternalNodesToVisit.back();
30906c3fb27SDimitry Andric     InternalNodesToVisit.pop_back();
31006c3fb27SDimitry Andric 
31106c3fb27SDimitry Andric     // Keep track of the length of the string associated with the node. If
31206c3fb27SDimitry Andric     // it's too short, we'll quit.
31306c3fb27SDimitry Andric     unsigned Length = Curr->getConcatLen();
31406c3fb27SDimitry Andric 
315*0fca6ea1SDimitry Andric     // Iterate over each child, saving internal nodes for visiting.
316*0fca6ea1SDimitry Andric     // Internal nodes represent individual strings, which may repeat.
317*0fca6ea1SDimitry Andric     for (auto &ChildPair : Curr->Children)
31806c3fb27SDimitry Andric       // Save all of this node's children for processing.
31906c3fb27SDimitry Andric       if (auto *InternalChild =
320*0fca6ea1SDimitry Andric               dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
32106c3fb27SDimitry Andric         InternalNodesToVisit.push_back(InternalChild);
32206c3fb27SDimitry Andric 
323*0fca6ea1SDimitry Andric     // If length of repeated substring is below threshold, then skip it.
32406c3fb27SDimitry Andric     if (Length < MinLength)
32506c3fb27SDimitry Andric       continue;
32606c3fb27SDimitry Andric 
32706c3fb27SDimitry Andric     // The root never represents a repeated substring. If we're looking at
32806c3fb27SDimitry Andric     // that, then skip it.
32906c3fb27SDimitry Andric     if (Curr->isRoot())
33006c3fb27SDimitry Andric       continue;
33106c3fb27SDimitry Andric 
332*0fca6ea1SDimitry Andric     // Collect leaf children or leaf descendants by OutlinerLeafDescendants.
333*0fca6ea1SDimitry Andric     if (OutlinerLeafDescendants) {
334*0fca6ea1SDimitry Andric       for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();
335*0fca6ea1SDimitry Andric            ++I)
336*0fca6ea1SDimitry Andric         RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());
337*0fca6ea1SDimitry Andric     } else {
338*0fca6ea1SDimitry Andric       for (auto &ChildPair : Curr->Children)
339*0fca6ea1SDimitry Andric         if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second))
340*0fca6ea1SDimitry Andric           RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
341*0fca6ea1SDimitry Andric     }
342*0fca6ea1SDimitry Andric 
34306c3fb27SDimitry Andric     // Do we have any repeated substrings?
34406c3fb27SDimitry Andric     if (RepeatedSubstringStarts.size() < 2)
34506c3fb27SDimitry Andric       continue;
34606c3fb27SDimitry Andric 
34706c3fb27SDimitry Andric     // Yes. Update the state to reflect this, and then bail out.
34806c3fb27SDimitry Andric     N = Curr;
34906c3fb27SDimitry Andric     RS.Length = Length;
35006c3fb27SDimitry Andric     for (unsigned StartIdx : RepeatedSubstringStarts)
35106c3fb27SDimitry Andric       RS.StartIndices.push_back(StartIdx);
35206c3fb27SDimitry Andric     break;
35306c3fb27SDimitry Andric   }
35406c3fb27SDimitry Andric   // At this point, either NewRS is an empty RepeatedSubstring, or it was
35506c3fb27SDimitry Andric   // set in the above loop. Similarly, N is either nullptr, or the node
35606c3fb27SDimitry Andric   // associated with NewRS.
35706c3fb27SDimitry Andric }
358