xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/EquivalenceClasses.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes ---*- C++ -*-===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // Generic implementation of equivalence classes through the use Tarjan's
10*0b57cec5SDimitry Andric // efficient union-find algorithm.
11*0b57cec5SDimitry Andric //
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
15*0b57cec5SDimitry Andric #define LLVM_ADT_EQUIVALENCECLASSES_H
16*0b57cec5SDimitry Andric 
17*0b57cec5SDimitry Andric #include <cassert>
18*0b57cec5SDimitry Andric #include <cstddef>
19*0b57cec5SDimitry Andric #include <cstdint>
20*0b57cec5SDimitry Andric #include <iterator>
21*0b57cec5SDimitry Andric #include <set>
22*0b57cec5SDimitry Andric 
23*0b57cec5SDimitry Andric namespace llvm {
24*0b57cec5SDimitry Andric 
25*0b57cec5SDimitry Andric /// EquivalenceClasses - This represents a collection of equivalence classes and
26*0b57cec5SDimitry Andric /// supports three efficient operations: insert an element into a class of its
27*0b57cec5SDimitry Andric /// own, union two classes, and find the class for a given element.  In
28*0b57cec5SDimitry Andric /// addition to these modification methods, it is possible to iterate over all
29*0b57cec5SDimitry Andric /// of the equivalence classes and all of the elements in a class.
30*0b57cec5SDimitry Andric ///
31*0b57cec5SDimitry Andric /// This implementation is an efficient implementation that only stores one copy
32*0b57cec5SDimitry Andric /// of the element being indexed per entry in the set, and allows any arbitrary
33*0b57cec5SDimitry Andric /// type to be indexed (as long as it can be ordered with operator<).
34*0b57cec5SDimitry Andric ///
35*0b57cec5SDimitry Andric /// Here is a simple example using integers:
36*0b57cec5SDimitry Andric ///
37*0b57cec5SDimitry Andric /// \code
38*0b57cec5SDimitry Andric ///  EquivalenceClasses<int> EC;
39*0b57cec5SDimitry Andric ///  EC.unionSets(1, 2);                // insert 1, 2 into the same set
40*0b57cec5SDimitry Andric ///  EC.insert(4); EC.insert(5);        // insert 4, 5 into own sets
41*0b57cec5SDimitry Andric ///  EC.unionSets(5, 1);                // merge the set for 1 with 5's set.
42*0b57cec5SDimitry Andric ///
43*0b57cec5SDimitry Andric ///  for (EquivalenceClasses<int>::iterator I = EC.begin(), E = EC.end();
44*0b57cec5SDimitry Andric ///       I != E; ++I) {           // Iterate over all of the equivalence sets.
45*0b57cec5SDimitry Andric ///    if (!I->isLeader()) continue;   // Ignore non-leader sets.
46*0b57cec5SDimitry Andric ///    for (EquivalenceClasses<int>::member_iterator MI = EC.member_begin(I);
47*0b57cec5SDimitry Andric ///         MI != EC.member_end(); ++MI)   // Loop over members in this set.
48*0b57cec5SDimitry Andric ///      cerr << *MI << " ";  // Print member.
49*0b57cec5SDimitry Andric ///    cerr << "\n";   // Finish set.
50*0b57cec5SDimitry Andric ///  }
51*0b57cec5SDimitry Andric /// \endcode
52*0b57cec5SDimitry Andric ///
53*0b57cec5SDimitry Andric /// This example prints:
54*0b57cec5SDimitry Andric ///   4
55*0b57cec5SDimitry Andric ///   5 1 2
56*0b57cec5SDimitry Andric ///
57*0b57cec5SDimitry Andric template <class ElemTy>
58*0b57cec5SDimitry Andric class EquivalenceClasses {
59*0b57cec5SDimitry Andric   /// ECValue - The EquivalenceClasses data structure is just a set of these.
60*0b57cec5SDimitry Andric   /// Each of these represents a relation for a value.  First it stores the
61*0b57cec5SDimitry Andric   /// value itself, which provides the ordering that the set queries.  Next, it
62*0b57cec5SDimitry Andric   /// provides a "next pointer", which is used to enumerate all of the elements
63*0b57cec5SDimitry Andric   /// in the unioned set.  Finally, it defines either a "end of list pointer" or
64*0b57cec5SDimitry Andric   /// "leader pointer" depending on whether the value itself is a leader.  A
65*0b57cec5SDimitry Andric   /// "leader pointer" points to the node that is the leader for this element,
66*0b57cec5SDimitry Andric   /// if the node is not a leader.  A "end of list pointer" points to the last
67*0b57cec5SDimitry Andric   /// node in the list of members of this list.  Whether or not a node is a
68*0b57cec5SDimitry Andric   /// leader is determined by a bit stolen from one of the pointers.
69*0b57cec5SDimitry Andric   class ECValue {
70*0b57cec5SDimitry Andric     friend class EquivalenceClasses;
71*0b57cec5SDimitry Andric 
72*0b57cec5SDimitry Andric     mutable const ECValue *Leader, *Next;
73*0b57cec5SDimitry Andric     ElemTy Data;
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric     // ECValue ctor - Start out with EndOfList pointing to this node, Next is
76*0b57cec5SDimitry Andric     // Null, isLeader = true.
77*0b57cec5SDimitry Andric     ECValue(const ElemTy &Elt)
78*0b57cec5SDimitry Andric       : Leader(this), Next((ECValue*)(intptr_t)1), Data(Elt) {}
79*0b57cec5SDimitry Andric 
80*0b57cec5SDimitry Andric     const ECValue *getLeader() const {
81*0b57cec5SDimitry Andric       if (isLeader()) return this;
82*0b57cec5SDimitry Andric       if (Leader->isLeader()) return Leader;
83*0b57cec5SDimitry Andric       // Path compression.
84*0b57cec5SDimitry Andric       return Leader = Leader->getLeader();
85*0b57cec5SDimitry Andric     }
86*0b57cec5SDimitry Andric 
87*0b57cec5SDimitry Andric     const ECValue *getEndOfList() const {
88*0b57cec5SDimitry Andric       assert(isLeader() && "Cannot get the end of a list for a non-leader!");
89*0b57cec5SDimitry Andric       return Leader;
90*0b57cec5SDimitry Andric     }
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric     void setNext(const ECValue *NewNext) const {
93*0b57cec5SDimitry Andric       assert(getNext() == nullptr && "Already has a next pointer!");
94*0b57cec5SDimitry Andric       Next = (const ECValue*)((intptr_t)NewNext | (intptr_t)isLeader());
95*0b57cec5SDimitry Andric     }
96*0b57cec5SDimitry Andric 
97*0b57cec5SDimitry Andric   public:
98*0b57cec5SDimitry Andric     ECValue(const ECValue &RHS) : Leader(this), Next((ECValue*)(intptr_t)1),
99*0b57cec5SDimitry Andric                                   Data(RHS.Data) {
100*0b57cec5SDimitry Andric       // Only support copying of singleton nodes.
101*0b57cec5SDimitry Andric       assert(RHS.isLeader() && RHS.getNext() == nullptr && "Not a singleton!");
102*0b57cec5SDimitry Andric     }
103*0b57cec5SDimitry Andric 
104*0b57cec5SDimitry Andric     bool operator<(const ECValue &UFN) const { return Data < UFN.Data; }
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric     bool isLeader() const { return (intptr_t)Next & 1; }
107*0b57cec5SDimitry Andric     const ElemTy &getData() const { return Data; }
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric     const ECValue *getNext() const {
110*0b57cec5SDimitry Andric       return (ECValue*)((intptr_t)Next & ~(intptr_t)1);
111*0b57cec5SDimitry Andric     }
112*0b57cec5SDimitry Andric 
113*0b57cec5SDimitry Andric     template<typename T>
114*0b57cec5SDimitry Andric     bool operator<(const T &Val) const { return Data < Val; }
115*0b57cec5SDimitry Andric   };
116*0b57cec5SDimitry Andric 
117*0b57cec5SDimitry Andric   /// TheMapping - This implicitly provides a mapping from ElemTy values to the
118*0b57cec5SDimitry Andric   /// ECValues, it just keeps the key as part of the value.
119*0b57cec5SDimitry Andric   std::set<ECValue> TheMapping;
120*0b57cec5SDimitry Andric 
121*0b57cec5SDimitry Andric public:
122*0b57cec5SDimitry Andric   EquivalenceClasses() = default;
123*0b57cec5SDimitry Andric   EquivalenceClasses(const EquivalenceClasses &RHS) {
124*0b57cec5SDimitry Andric     operator=(RHS);
125*0b57cec5SDimitry Andric   }
126*0b57cec5SDimitry Andric 
127*0b57cec5SDimitry Andric   const EquivalenceClasses &operator=(const EquivalenceClasses &RHS) {
128*0b57cec5SDimitry Andric     TheMapping.clear();
129*0b57cec5SDimitry Andric     for (iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
130*0b57cec5SDimitry Andric       if (I->isLeader()) {
131*0b57cec5SDimitry Andric         member_iterator MI = RHS.member_begin(I);
132*0b57cec5SDimitry Andric         member_iterator LeaderIt = member_begin(insert(*MI));
133*0b57cec5SDimitry Andric         for (++MI; MI != member_end(); ++MI)
134*0b57cec5SDimitry Andric           unionSets(LeaderIt, member_begin(insert(*MI)));
135*0b57cec5SDimitry Andric       }
136*0b57cec5SDimitry Andric     return *this;
137*0b57cec5SDimitry Andric   }
138*0b57cec5SDimitry Andric 
139*0b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
140*0b57cec5SDimitry Andric   // Inspection methods
141*0b57cec5SDimitry Andric   //
142*0b57cec5SDimitry Andric 
143*0b57cec5SDimitry Andric   /// iterator* - Provides a way to iterate over all values in the set.
144*0b57cec5SDimitry Andric   using iterator = typename std::set<ECValue>::const_iterator;
145*0b57cec5SDimitry Andric 
146*0b57cec5SDimitry Andric   iterator begin() const { return TheMapping.begin(); }
147*0b57cec5SDimitry Andric   iterator end() const { return TheMapping.end(); }
148*0b57cec5SDimitry Andric 
149*0b57cec5SDimitry Andric   bool empty() const { return TheMapping.empty(); }
150*0b57cec5SDimitry Andric 
151*0b57cec5SDimitry Andric   /// member_* Iterate over the members of an equivalence class.
152*0b57cec5SDimitry Andric   class member_iterator;
153*0b57cec5SDimitry Andric   member_iterator member_begin(iterator I) const {
154*0b57cec5SDimitry Andric     // Only leaders provide anything to iterate over.
155*0b57cec5SDimitry Andric     return member_iterator(I->isLeader() ? &*I : nullptr);
156*0b57cec5SDimitry Andric   }
157*0b57cec5SDimitry Andric   member_iterator member_end() const {
158*0b57cec5SDimitry Andric     return member_iterator(nullptr);
159*0b57cec5SDimitry Andric   }
160*0b57cec5SDimitry Andric 
161*0b57cec5SDimitry Andric   /// findValue - Return an iterator to the specified value.  If it does not
162*0b57cec5SDimitry Andric   /// exist, end() is returned.
163*0b57cec5SDimitry Andric   iterator findValue(const ElemTy &V) const {
164*0b57cec5SDimitry Andric     return TheMapping.find(V);
165*0b57cec5SDimitry Andric   }
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric   /// getLeaderValue - Return the leader for the specified value that is in the
168*0b57cec5SDimitry Andric   /// set.  It is an error to call this method for a value that is not yet in
169*0b57cec5SDimitry Andric   /// the set.  For that, call getOrInsertLeaderValue(V).
170*0b57cec5SDimitry Andric   const ElemTy &getLeaderValue(const ElemTy &V) const {
171*0b57cec5SDimitry Andric     member_iterator MI = findLeader(V);
172*0b57cec5SDimitry Andric     assert(MI != member_end() && "Value is not in the set!");
173*0b57cec5SDimitry Andric     return *MI;
174*0b57cec5SDimitry Andric   }
175*0b57cec5SDimitry Andric 
176*0b57cec5SDimitry Andric   /// getOrInsertLeaderValue - Return the leader for the specified value that is
177*0b57cec5SDimitry Andric   /// in the set.  If the member is not in the set, it is inserted, then
178*0b57cec5SDimitry Andric   /// returned.
179*0b57cec5SDimitry Andric   const ElemTy &getOrInsertLeaderValue(const ElemTy &V) {
180*0b57cec5SDimitry Andric     member_iterator MI = findLeader(insert(V));
181*0b57cec5SDimitry Andric     assert(MI != member_end() && "Value is not in the set!");
182*0b57cec5SDimitry Andric     return *MI;
183*0b57cec5SDimitry Andric   }
184*0b57cec5SDimitry Andric 
185*0b57cec5SDimitry Andric   /// getNumClasses - Return the number of equivalence classes in this set.
186*0b57cec5SDimitry Andric   /// Note that this is a linear time operation.
187*0b57cec5SDimitry Andric   unsigned getNumClasses() const {
188*0b57cec5SDimitry Andric     unsigned NC = 0;
189*0b57cec5SDimitry Andric     for (iterator I = begin(), E = end(); I != E; ++I)
190*0b57cec5SDimitry Andric       if (I->isLeader()) ++NC;
191*0b57cec5SDimitry Andric     return NC;
192*0b57cec5SDimitry Andric   }
193*0b57cec5SDimitry Andric 
194*0b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
195*0b57cec5SDimitry Andric   // Mutation methods
196*0b57cec5SDimitry Andric 
197*0b57cec5SDimitry Andric   /// insert - Insert a new value into the union/find set, ignoring the request
198*0b57cec5SDimitry Andric   /// if the value already exists.
199*0b57cec5SDimitry Andric   iterator insert(const ElemTy &Data) {
200*0b57cec5SDimitry Andric     return TheMapping.insert(ECValue(Data)).first;
201*0b57cec5SDimitry Andric   }
202*0b57cec5SDimitry Andric 
203*0b57cec5SDimitry Andric   /// findLeader - Given a value in the set, return a member iterator for the
204*0b57cec5SDimitry Andric   /// equivalence class it is in.  This does the path-compression part that
205*0b57cec5SDimitry Andric   /// makes union-find "union findy".  This returns an end iterator if the value
206*0b57cec5SDimitry Andric   /// is not in the equivalence class.
207*0b57cec5SDimitry Andric   member_iterator findLeader(iterator I) const {
208*0b57cec5SDimitry Andric     if (I == TheMapping.end()) return member_end();
209*0b57cec5SDimitry Andric     return member_iterator(I->getLeader());
210*0b57cec5SDimitry Andric   }
211*0b57cec5SDimitry Andric   member_iterator findLeader(const ElemTy &V) const {
212*0b57cec5SDimitry Andric     return findLeader(TheMapping.find(V));
213*0b57cec5SDimitry Andric   }
214*0b57cec5SDimitry Andric 
215*0b57cec5SDimitry Andric   /// union - Merge the two equivalence sets for the specified values, inserting
216*0b57cec5SDimitry Andric   /// them if they do not already exist in the equivalence set.
217*0b57cec5SDimitry Andric   member_iterator unionSets(const ElemTy &V1, const ElemTy &V2) {
218*0b57cec5SDimitry Andric     iterator V1I = insert(V1), V2I = insert(V2);
219*0b57cec5SDimitry Andric     return unionSets(findLeader(V1I), findLeader(V2I));
220*0b57cec5SDimitry Andric   }
221*0b57cec5SDimitry Andric   member_iterator unionSets(member_iterator L1, member_iterator L2) {
222*0b57cec5SDimitry Andric     assert(L1 != member_end() && L2 != member_end() && "Illegal inputs!");
223*0b57cec5SDimitry Andric     if (L1 == L2) return L1;   // Unifying the same two sets, noop.
224*0b57cec5SDimitry Andric 
225*0b57cec5SDimitry Andric     // Otherwise, this is a real union operation.  Set the end of the L1 list to
226*0b57cec5SDimitry Andric     // point to the L2 leader node.
227*0b57cec5SDimitry Andric     const ECValue &L1LV = *L1.Node, &L2LV = *L2.Node;
228*0b57cec5SDimitry Andric     L1LV.getEndOfList()->setNext(&L2LV);
229*0b57cec5SDimitry Andric 
230*0b57cec5SDimitry Andric     // Update L1LV's end of list pointer.
231*0b57cec5SDimitry Andric     L1LV.Leader = L2LV.getEndOfList();
232*0b57cec5SDimitry Andric 
233*0b57cec5SDimitry Andric     // Clear L2's leader flag:
234*0b57cec5SDimitry Andric     L2LV.Next = L2LV.getNext();
235*0b57cec5SDimitry Andric 
236*0b57cec5SDimitry Andric     // L2's leader is now L1.
237*0b57cec5SDimitry Andric     L2LV.Leader = &L1LV;
238*0b57cec5SDimitry Andric     return L1;
239*0b57cec5SDimitry Andric   }
240*0b57cec5SDimitry Andric 
241*0b57cec5SDimitry Andric   // isEquivalent - Return true if V1 is equivalent to V2. This can happen if
242*0b57cec5SDimitry Andric   // V1 is equal to V2 or if they belong to one equivalence class.
243*0b57cec5SDimitry Andric   bool isEquivalent(const ElemTy &V1, const ElemTy &V2) const {
244*0b57cec5SDimitry Andric     // Fast path: any element is equivalent to itself.
245*0b57cec5SDimitry Andric     if (V1 == V2)
246*0b57cec5SDimitry Andric       return true;
247*0b57cec5SDimitry Andric     auto It = findLeader(V1);
248*0b57cec5SDimitry Andric     return It != member_end() && It == findLeader(V2);
249*0b57cec5SDimitry Andric   }
250*0b57cec5SDimitry Andric 
251*0b57cec5SDimitry Andric   class member_iterator : public std::iterator<std::forward_iterator_tag,
252*0b57cec5SDimitry Andric                                                const ElemTy, ptrdiff_t> {
253*0b57cec5SDimitry Andric     friend class EquivalenceClasses;
254*0b57cec5SDimitry Andric 
255*0b57cec5SDimitry Andric     using super = std::iterator<std::forward_iterator_tag,
256*0b57cec5SDimitry Andric                                 const ElemTy, ptrdiff_t>;
257*0b57cec5SDimitry Andric 
258*0b57cec5SDimitry Andric     const ECValue *Node;
259*0b57cec5SDimitry Andric 
260*0b57cec5SDimitry Andric   public:
261*0b57cec5SDimitry Andric     using size_type = size_t;
262*0b57cec5SDimitry Andric     using pointer = typename super::pointer;
263*0b57cec5SDimitry Andric     using reference = typename super::reference;
264*0b57cec5SDimitry Andric 
265*0b57cec5SDimitry Andric     explicit member_iterator() = default;
266*0b57cec5SDimitry Andric     explicit member_iterator(const ECValue *N) : Node(N) {}
267*0b57cec5SDimitry Andric 
268*0b57cec5SDimitry Andric     reference operator*() const {
269*0b57cec5SDimitry Andric       assert(Node != nullptr && "Dereferencing end()!");
270*0b57cec5SDimitry Andric       return Node->getData();
271*0b57cec5SDimitry Andric     }
272*0b57cec5SDimitry Andric     pointer operator->() const { return &operator*(); }
273*0b57cec5SDimitry Andric 
274*0b57cec5SDimitry Andric     member_iterator &operator++() {
275*0b57cec5SDimitry Andric       assert(Node != nullptr && "++'d off the end of the list!");
276*0b57cec5SDimitry Andric       Node = Node->getNext();
277*0b57cec5SDimitry Andric       return *this;
278*0b57cec5SDimitry Andric     }
279*0b57cec5SDimitry Andric 
280*0b57cec5SDimitry Andric     member_iterator operator++(int) {    // postincrement operators.
281*0b57cec5SDimitry Andric       member_iterator tmp = *this;
282*0b57cec5SDimitry Andric       ++*this;
283*0b57cec5SDimitry Andric       return tmp;
284*0b57cec5SDimitry Andric     }
285*0b57cec5SDimitry Andric 
286*0b57cec5SDimitry Andric     bool operator==(const member_iterator &RHS) const {
287*0b57cec5SDimitry Andric       return Node == RHS.Node;
288*0b57cec5SDimitry Andric     }
289*0b57cec5SDimitry Andric     bool operator!=(const member_iterator &RHS) const {
290*0b57cec5SDimitry Andric       return Node != RHS.Node;
291*0b57cec5SDimitry Andric     }
292*0b57cec5SDimitry Andric   };
293*0b57cec5SDimitry Andric };
294*0b57cec5SDimitry Andric 
295*0b57cec5SDimitry Andric } // end namespace llvm
296*0b57cec5SDimitry Andric 
297*0b57cec5SDimitry Andric #endif // LLVM_ADT_EQUIVALENCECLASSES_H
298