xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/TrieRawHashMap.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_TRIERAWHASHMAP_H
10 #define LLVM_ADT_TRIERAWHASHMAP_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/Support/Compiler.h"
14 #include <atomic>
15 #include <optional>
16 
17 namespace llvm {
18 
19 class raw_ostream;
20 
21 /// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to
22 /// store/index data based on a hash value. It can be customized to work with
23 /// any hash algorithm or store any data.
24 ///
25 /// Data structure:
26 /// Data node stored in the Trie contains both hash and data:
27 /// struct {
28 ///    HashT Hash;
29 ///    DataT Data;
30 /// };
31 ///
32 /// Data is stored/indexed via a prefix tree, where each node in the tree can be
33 /// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two
34 /// data objects {0001, A} and {0100, B}, it can be stored in a trie
35 /// (assuming Root has 2 bits, SubTrie has 1 bit):
36 ///  +--------+
37 ///  |Root[00]| -> {0001, A}
38 ///  |    [01]| -> {0100, B}
39 ///  |    [10]| (empty)
40 ///  |    [11]| (empty)
41 ///  +--------+
42 ///
43 /// Inserting a new object {0010, C} will result in:
44 ///  +--------+    +----------+
45 ///  |Root[00]| -> |SubTrie[0]| -> {0001, A}
46 ///  |        |    |       [1]| -> {0010, C}
47 ///  |        |    +----------+
48 ///  |    [01]| -> {0100, B}
49 ///  |    [10]| (empty)
50 ///  |    [11]| (empty)
51 ///  +--------+
52 /// Note object A is sunk down to a sub-trie during the insertion. All the
53 /// nodes are inserted through compare-exchange to ensure thread-safe and
54 /// lock-free.
55 ///
56 /// To find an object in the trie, walk the tree with prefix of the hash until
57 /// the data node is found. Then the hash is compared with the hash stored in
58 /// the data node to see if the is the same object.
59 ///
60 /// Hash collision is not allowed so it is recommended to use trie with a
61 /// "strong" hashing algorithm. A well-distributed hash can also result in
62 /// better performance and memory usage.
63 ///
64 /// It currently does not support iteration and deletion.
65 
66 /// Base class for a lock-free thread-safe hash-mapped trie.
67 class ThreadSafeTrieRawHashMapBase {
68 public:
69   static constexpr size_t TrieContentBaseSize = 4;
70   static constexpr size_t DefaultNumRootBits = 6;
71   static constexpr size_t DefaultNumSubtrieBits = 4;
72 
73 private:
74   template <class T> struct AllocValueType {
75     char Base[TrieContentBaseSize];
76     alignas(T) char Content[sizeof(T)];
77   };
78 
79 protected:
80   template <class T>
81   static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>);
82 
83   template <class T>
84   static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>);
85 
86   template <class T>
87   static constexpr size_t DefaultContentOffset =
88       offsetof(AllocValueType<T>, Content);
89 
90 public:
new(size_t Size)91   static void *operator new(size_t Size) { return ::operator new(Size); }
delete(void * Ptr)92   void operator delete(void *Ptr) { ::operator delete(Ptr); }
93 
94 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
95   LLVM_DUMP_METHOD void dump() const;
96 #endif
97 
98   LLVM_ABI void print(raw_ostream &OS) const;
99 
100 protected:
101   /// Result of a lookup. Suitable for an insertion hint. Maybe could be
102   /// expanded into an iterator of sorts, but likely not useful (visiting
103   /// everything in the trie should probably be done some way other than
104   /// through an iterator pattern).
105   class PointerBase {
106   protected:
get()107     void *get() const { return I == -2u ? P : nullptr; }
108 
109   public:
110     PointerBase() noexcept = default;
111 
112   private:
113     friend class ThreadSafeTrieRawHashMapBase;
PointerBase(void * Content)114     explicit PointerBase(void *Content) : P(Content), I(-2u) {}
PointerBase(void * P,unsigned I,unsigned B)115     PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {}
116 
isHint()117     bool isHint() const { return I != -1u && I != -2u; }
118 
119     void *P = nullptr;
120     unsigned I = -1u;
121     unsigned B = 0;
122   };
123 
124   /// Find the stored content with hash.
125   LLVM_ABI PointerBase find(ArrayRef<uint8_t> Hash) const;
126 
127   /// Insert and return the stored content.
128   LLVM_ABI PointerBase
129   insert(PointerBase Hint, ArrayRef<uint8_t> Hash,
130          function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
131              Constructor);
132 
133   ThreadSafeTrieRawHashMapBase() = delete;
134 
135   LLVM_ABI ThreadSafeTrieRawHashMapBase(
136       size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
137       std::optional<size_t> NumRootBits = std::nullopt,
138       std::optional<size_t> NumSubtrieBits = std::nullopt);
139 
140   /// Destructor, which asserts if there's anything to do. Subclasses should
141   /// call \a destroyImpl().
142   ///
143   /// \pre \a destroyImpl() was already called.
144   LLVM_ABI ~ThreadSafeTrieRawHashMapBase();
145   LLVM_ABI void destroyImpl(function_ref<void(void *ValueMem)> Destructor);
146 
147   LLVM_ABI ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS);
148 
149   // Move assignment is not supported as it is not thread-safe.
150   ThreadSafeTrieRawHashMapBase &
151   operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete;
152 
153   // No copy.
154   ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete;
155   ThreadSafeTrieRawHashMapBase &
156   operator=(const ThreadSafeTrieRawHashMapBase &) = delete;
157 
158   // Debug functions. Implementation details and not guaranteed to be
159   // thread-safe.
160   LLVM_ABI PointerBase getRoot() const;
161   LLVM_ABI unsigned getStartBit(PointerBase P) const;
162   LLVM_ABI unsigned getNumBits(PointerBase P) const;
163   LLVM_ABI unsigned getNumSlotUsed(PointerBase P) const;
164   LLVM_ABI std::string getTriePrefixAsString(PointerBase P) const;
165   LLVM_ABI unsigned getNumTries() const;
166   // Visit next trie in the allocation chain.
167   LLVM_ABI PointerBase getNextTrie(PointerBase P) const;
168 
169 private:
170   friend class TrieRawHashMapTestHelper;
171   const unsigned short ContentAllocSize;
172   const unsigned short ContentAllocAlign;
173   const unsigned short ContentOffset;
174   unsigned short NumRootBits;
175   unsigned short NumSubtrieBits;
176   class ImplType;
177   // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in
178   // destroyImpl.
179   std::atomic<ImplType *> ImplPtr;
180   ImplType &getOrCreateImpl();
181   ImplType *getImpl() const;
182 };
183 
184 /// Lock-free thread-safe hash-mapped trie.
185 template <class T, size_t NumHashBytes>
186 class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase {
187 public:
188   using HashT = std::array<uint8_t, NumHashBytes>;
189 
190   class LazyValueConstructor;
191   struct value_type {
192     const HashT Hash;
193     T Data;
194 
195     value_type(value_type &&) = default;
196     value_type(const value_type &) = default;
197 
value_typevalue_type198     value_type(ArrayRef<uint8_t> Hash, const T &Data)
199         : Hash(makeHash(Hash)), Data(Data) {}
value_typevalue_type200     value_type(ArrayRef<uint8_t> Hash, T &&Data)
201         : Hash(makeHash(Hash)), Data(std::move(Data)) {}
202 
203   private:
204     friend class LazyValueConstructor;
205 
206     struct EmplaceTag {};
207     template <class... ArgsT>
value_typevalue_type208     value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args)
209         : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {}
210 
makeHashvalue_type211     static HashT makeHash(ArrayRef<uint8_t> HashRef) {
212       HashT Hash;
213       std::copy(HashRef.begin(), HashRef.end(), Hash.data());
214       return Hash;
215     }
216   };
217 
218   using ThreadSafeTrieRawHashMapBase::operator delete;
219   using HashType = HashT;
220 
221 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
222   using ThreadSafeTrieRawHashMapBase::dump;
223 #endif
224 
225   using ThreadSafeTrieRawHashMapBase::print;
226 
227 private:
228   template <class ValueT> class PointerImpl : PointerBase {
229     friend class ThreadSafeTrieRawHashMap;
230 
get()231     ValueT *get() const {
232       return reinterpret_cast<ValueT *>(PointerBase::get());
233     }
234 
235   public:
236     ValueT &operator*() const {
237       assert(get());
238       return *get();
239     }
240     ValueT *operator->() const {
241       assert(get());
242       return get();
243     }
244     explicit operator bool() const { return get(); }
245 
246     PointerImpl() = default;
247 
248   protected:
PointerImpl(PointerBase Result)249     PointerImpl(PointerBase Result) : PointerBase(Result) {}
250   };
251 
252 public:
253   class pointer;
254   class const_pointer;
255   class pointer : public PointerImpl<value_type> {
256     friend class ThreadSafeTrieRawHashMap;
257     friend class const_pointer;
258 
259   public:
260     pointer() = default;
261 
262   private:
pointer(PointerBase Result)263     pointer(PointerBase Result) : pointer::PointerImpl(Result) {}
264   };
265 
266   class const_pointer : public PointerImpl<const value_type> {
267     friend class ThreadSafeTrieRawHashMap;
268 
269   public:
270     const_pointer() = default;
const_pointer(const pointer & P)271     const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {}
272 
273   private:
const_pointer(PointerBase Result)274     const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {}
275   };
276 
277   class LazyValueConstructor {
278   public:
operator()279     value_type &operator()(T &&RHS) {
280       assert(Mem && "Constructor already called, or moved away");
281       return assign(::new (Mem) value_type(Hash, std::move(RHS)));
282     }
operator()283     value_type &operator()(const T &RHS) {
284       assert(Mem && "Constructor already called, or moved away");
285       return assign(::new (Mem) value_type(Hash, RHS));
286     }
emplace(ArgsT &&...Args)287     template <class... ArgsT> value_type &emplace(ArgsT &&...Args) {
288       assert(Mem && "Constructor already called, or moved away");
289       return assign(::new (Mem)
290                         value_type(Hash, typename value_type::EmplaceTag{},
291                                    std::forward<ArgsT>(Args)...));
292     }
293 
LazyValueConstructor(LazyValueConstructor && RHS)294     LazyValueConstructor(LazyValueConstructor &&RHS)
295         : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) {
296       RHS.Mem = nullptr; // Moved away, cannot call.
297     }
~LazyValueConstructor()298     ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); }
299 
300   private:
assign(value_type * V)301     value_type &assign(value_type *V) {
302       Mem = nullptr;
303       Result = V;
304       return *V;
305     }
306     friend class ThreadSafeTrieRawHashMap;
307     LazyValueConstructor() = delete;
LazyValueConstructor(void * Mem,value_type * & Result,ArrayRef<uint8_t> Hash)308     LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash)
309         : Mem(Mem), Result(Result), Hash(Hash) {
310       assert(Hash.size() == sizeof(HashT) && "Invalid hash");
311       assert(Mem && "Invalid memory for construction");
312     }
313     void *Mem;
314     value_type *&Result;
315     ArrayRef<uint8_t> Hash;
316   };
317 
318   /// Insert with a hint. Default-constructed hint will work, but it's
319   /// recommended to start with a lookup to avoid overhead in object creation
320   /// if it already exists.
insertLazy(const_pointer Hint,ArrayRef<uint8_t> Hash,function_ref<void (LazyValueConstructor)> OnConstruct)321   pointer insertLazy(const_pointer Hint, ArrayRef<uint8_t> Hash,
322                      function_ref<void(LazyValueConstructor)> OnConstruct) {
323     return pointer(ThreadSafeTrieRawHashMapBase::insert(
324         Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) {
325           value_type *Result = nullptr;
326           OnConstruct(LazyValueConstructor(Mem, Result, Hash));
327           return Result->Hash.data();
328         }));
329   }
330 
insertLazy(ArrayRef<uint8_t> Hash,function_ref<void (LazyValueConstructor)> OnConstruct)331   pointer insertLazy(ArrayRef<uint8_t> Hash,
332                      function_ref<void(LazyValueConstructor)> OnConstruct) {
333     return insertLazy(const_pointer(), Hash, OnConstruct);
334   }
335 
insert(const_pointer Hint,value_type && HashedData)336   pointer insert(const_pointer Hint, value_type &&HashedData) {
337     return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) {
338       C(std::move(HashedData.Data));
339     });
340   }
341 
insert(const_pointer Hint,const value_type & HashedData)342   pointer insert(const_pointer Hint, const value_type &HashedData) {
343     return insertLazy(Hint, HashedData.Hash,
344                       [&](LazyValueConstructor C) { C(HashedData.Data); });
345   }
346 
find(ArrayRef<uint8_t> Hash)347   pointer find(ArrayRef<uint8_t> Hash) {
348     assert(Hash.size() == std::tuple_size<HashT>::value);
349     return ThreadSafeTrieRawHashMapBase::find(Hash);
350   }
351 
find(ArrayRef<uint8_t> Hash)352   const_pointer find(ArrayRef<uint8_t> Hash) const {
353     assert(Hash.size() == std::tuple_size<HashT>::value);
354     return ThreadSafeTrieRawHashMapBase::find(Hash);
355   }
356 
357   ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt,
358                            std::optional<size_t> NumSubtrieBits = std::nullopt)
ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>,DefaultContentAllocAlign<value_type>,DefaultContentOffset<value_type>,NumRootBits,NumSubtrieBits)359       : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>,
360                                      DefaultContentAllocAlign<value_type>,
361                                      DefaultContentOffset<value_type>,
362                                      NumRootBits, NumSubtrieBits) {}
363 
~ThreadSafeTrieRawHashMap()364   ~ThreadSafeTrieRawHashMap() {
365     if constexpr (std::is_trivially_destructible<value_type>::value)
366       this->destroyImpl(nullptr);
367     else
368       this->destroyImpl(
369           [](void *P) { static_cast<value_type *>(P)->~value_type(); });
370   }
371 
372   // Move constructor okay.
373   ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&) = default;
374 
375   // No move assignment or any copy.
376   ThreadSafeTrieRawHashMap &operator=(ThreadSafeTrieRawHashMap &&) = delete;
377   ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &) = delete;
378   ThreadSafeTrieRawHashMap &
379   operator=(const ThreadSafeTrieRawHashMap &) = delete;
380 };
381 
382 } // namespace llvm
383 
384 #endif // LLVM_ADT_TRIERAWHASHMAP_H
385