1 //===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_TRIERAWHASHMAP_H 10 #define LLVM_ADT_TRIERAWHASHMAP_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/Support/Compiler.h" 14 #include <atomic> 15 #include <optional> 16 17 namespace llvm { 18 19 class raw_ostream; 20 21 /// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to 22 /// store/index data based on a hash value. It can be customized to work with 23 /// any hash algorithm or store any data. 24 /// 25 /// Data structure: 26 /// Data node stored in the Trie contains both hash and data: 27 /// struct { 28 /// HashT Hash; 29 /// DataT Data; 30 /// }; 31 /// 32 /// Data is stored/indexed via a prefix tree, where each node in the tree can be 33 /// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two 34 /// data objects {0001, A} and {0100, B}, it can be stored in a trie 35 /// (assuming Root has 2 bits, SubTrie has 1 bit): 36 /// +--------+ 37 /// |Root[00]| -> {0001, A} 38 /// | [01]| -> {0100, B} 39 /// | [10]| (empty) 40 /// | [11]| (empty) 41 /// +--------+ 42 /// 43 /// Inserting a new object {0010, C} will result in: 44 /// +--------+ +----------+ 45 /// |Root[00]| -> |SubTrie[0]| -> {0001, A} 46 /// | | | [1]| -> {0010, C} 47 /// | | +----------+ 48 /// | [01]| -> {0100, B} 49 /// | [10]| (empty) 50 /// | [11]| (empty) 51 /// +--------+ 52 /// Note object A is sunk down to a sub-trie during the insertion. All the 53 /// nodes are inserted through compare-exchange to ensure thread-safe and 54 /// lock-free. 55 /// 56 /// To find an object in the trie, walk the tree with prefix of the hash until 57 /// the data node is found. Then the hash is compared with the hash stored in 58 /// the data node to see if the is the same object. 59 /// 60 /// Hash collision is not allowed so it is recommended to use trie with a 61 /// "strong" hashing algorithm. A well-distributed hash can also result in 62 /// better performance and memory usage. 63 /// 64 /// It currently does not support iteration and deletion. 65 66 /// Base class for a lock-free thread-safe hash-mapped trie. 67 class ThreadSafeTrieRawHashMapBase { 68 public: 69 static constexpr size_t TrieContentBaseSize = 4; 70 static constexpr size_t DefaultNumRootBits = 6; 71 static constexpr size_t DefaultNumSubtrieBits = 4; 72 73 private: 74 template <class T> struct AllocValueType { 75 char Base[TrieContentBaseSize]; 76 alignas(T) char Content[sizeof(T)]; 77 }; 78 79 protected: 80 template <class T> 81 static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>); 82 83 template <class T> 84 static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>); 85 86 template <class T> 87 static constexpr size_t DefaultContentOffset = 88 offsetof(AllocValueType<T>, Content); 89 90 public: new(size_t Size)91 static void *operator new(size_t Size) { return ::operator new(Size); } delete(void * Ptr)92 void operator delete(void *Ptr) { ::operator delete(Ptr); } 93 94 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 95 LLVM_DUMP_METHOD void dump() const; 96 #endif 97 98 LLVM_ABI void print(raw_ostream &OS) const; 99 100 protected: 101 /// Result of a lookup. Suitable for an insertion hint. Maybe could be 102 /// expanded into an iterator of sorts, but likely not useful (visiting 103 /// everything in the trie should probably be done some way other than 104 /// through an iterator pattern). 105 class PointerBase { 106 protected: get()107 void *get() const { return I == -2u ? P : nullptr; } 108 109 public: 110 PointerBase() noexcept = default; 111 112 private: 113 friend class ThreadSafeTrieRawHashMapBase; PointerBase(void * Content)114 explicit PointerBase(void *Content) : P(Content), I(-2u) {} PointerBase(void * P,unsigned I,unsigned B)115 PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {} 116 isHint()117 bool isHint() const { return I != -1u && I != -2u; } 118 119 void *P = nullptr; 120 unsigned I = -1u; 121 unsigned B = 0; 122 }; 123 124 /// Find the stored content with hash. 125 LLVM_ABI PointerBase find(ArrayRef<uint8_t> Hash) const; 126 127 /// Insert and return the stored content. 128 LLVM_ABI PointerBase 129 insert(PointerBase Hint, ArrayRef<uint8_t> Hash, 130 function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)> 131 Constructor); 132 133 ThreadSafeTrieRawHashMapBase() = delete; 134 135 LLVM_ABI ThreadSafeTrieRawHashMapBase( 136 size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, 137 std::optional<size_t> NumRootBits = std::nullopt, 138 std::optional<size_t> NumSubtrieBits = std::nullopt); 139 140 /// Destructor, which asserts if there's anything to do. Subclasses should 141 /// call \a destroyImpl(). 142 /// 143 /// \pre \a destroyImpl() was already called. 144 LLVM_ABI ~ThreadSafeTrieRawHashMapBase(); 145 LLVM_ABI void destroyImpl(function_ref<void(void *ValueMem)> Destructor); 146 147 LLVM_ABI ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS); 148 149 // Move assignment is not supported as it is not thread-safe. 150 ThreadSafeTrieRawHashMapBase & 151 operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete; 152 153 // No copy. 154 ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete; 155 ThreadSafeTrieRawHashMapBase & 156 operator=(const ThreadSafeTrieRawHashMapBase &) = delete; 157 158 // Debug functions. Implementation details and not guaranteed to be 159 // thread-safe. 160 LLVM_ABI PointerBase getRoot() const; 161 LLVM_ABI unsigned getStartBit(PointerBase P) const; 162 LLVM_ABI unsigned getNumBits(PointerBase P) const; 163 LLVM_ABI unsigned getNumSlotUsed(PointerBase P) const; 164 LLVM_ABI std::string getTriePrefixAsString(PointerBase P) const; 165 LLVM_ABI unsigned getNumTries() const; 166 // Visit next trie in the allocation chain. 167 LLVM_ABI PointerBase getNextTrie(PointerBase P) const; 168 169 private: 170 friend class TrieRawHashMapTestHelper; 171 const unsigned short ContentAllocSize; 172 const unsigned short ContentAllocAlign; 173 const unsigned short ContentOffset; 174 unsigned short NumRootBits; 175 unsigned short NumSubtrieBits; 176 class ImplType; 177 // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in 178 // destroyImpl. 179 std::atomic<ImplType *> ImplPtr; 180 ImplType &getOrCreateImpl(); 181 ImplType *getImpl() const; 182 }; 183 184 /// Lock-free thread-safe hash-mapped trie. 185 template <class T, size_t NumHashBytes> 186 class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase { 187 public: 188 using HashT = std::array<uint8_t, NumHashBytes>; 189 190 class LazyValueConstructor; 191 struct value_type { 192 const HashT Hash; 193 T Data; 194 195 value_type(value_type &&) = default; 196 value_type(const value_type &) = default; 197 value_typevalue_type198 value_type(ArrayRef<uint8_t> Hash, const T &Data) 199 : Hash(makeHash(Hash)), Data(Data) {} value_typevalue_type200 value_type(ArrayRef<uint8_t> Hash, T &&Data) 201 : Hash(makeHash(Hash)), Data(std::move(Data)) {} 202 203 private: 204 friend class LazyValueConstructor; 205 206 struct EmplaceTag {}; 207 template <class... ArgsT> value_typevalue_type208 value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args) 209 : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {} 210 makeHashvalue_type211 static HashT makeHash(ArrayRef<uint8_t> HashRef) { 212 HashT Hash; 213 std::copy(HashRef.begin(), HashRef.end(), Hash.data()); 214 return Hash; 215 } 216 }; 217 218 using ThreadSafeTrieRawHashMapBase::operator delete; 219 using HashType = HashT; 220 221 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 222 using ThreadSafeTrieRawHashMapBase::dump; 223 #endif 224 225 using ThreadSafeTrieRawHashMapBase::print; 226 227 private: 228 template <class ValueT> class PointerImpl : PointerBase { 229 friend class ThreadSafeTrieRawHashMap; 230 get()231 ValueT *get() const { 232 return reinterpret_cast<ValueT *>(PointerBase::get()); 233 } 234 235 public: 236 ValueT &operator*() const { 237 assert(get()); 238 return *get(); 239 } 240 ValueT *operator->() const { 241 assert(get()); 242 return get(); 243 } 244 explicit operator bool() const { return get(); } 245 246 PointerImpl() = default; 247 248 protected: PointerImpl(PointerBase Result)249 PointerImpl(PointerBase Result) : PointerBase(Result) {} 250 }; 251 252 public: 253 class pointer; 254 class const_pointer; 255 class pointer : public PointerImpl<value_type> { 256 friend class ThreadSafeTrieRawHashMap; 257 friend class const_pointer; 258 259 public: 260 pointer() = default; 261 262 private: pointer(PointerBase Result)263 pointer(PointerBase Result) : pointer::PointerImpl(Result) {} 264 }; 265 266 class const_pointer : public PointerImpl<const value_type> { 267 friend class ThreadSafeTrieRawHashMap; 268 269 public: 270 const_pointer() = default; const_pointer(const pointer & P)271 const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {} 272 273 private: const_pointer(PointerBase Result)274 const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {} 275 }; 276 277 class LazyValueConstructor { 278 public: operator()279 value_type &operator()(T &&RHS) { 280 assert(Mem && "Constructor already called, or moved away"); 281 return assign(::new (Mem) value_type(Hash, std::move(RHS))); 282 } operator()283 value_type &operator()(const T &RHS) { 284 assert(Mem && "Constructor already called, or moved away"); 285 return assign(::new (Mem) value_type(Hash, RHS)); 286 } emplace(ArgsT &&...Args)287 template <class... ArgsT> value_type &emplace(ArgsT &&...Args) { 288 assert(Mem && "Constructor already called, or moved away"); 289 return assign(::new (Mem) 290 value_type(Hash, typename value_type::EmplaceTag{}, 291 std::forward<ArgsT>(Args)...)); 292 } 293 LazyValueConstructor(LazyValueConstructor && RHS)294 LazyValueConstructor(LazyValueConstructor &&RHS) 295 : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) { 296 RHS.Mem = nullptr; // Moved away, cannot call. 297 } ~LazyValueConstructor()298 ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); } 299 300 private: assign(value_type * V)301 value_type &assign(value_type *V) { 302 Mem = nullptr; 303 Result = V; 304 return *V; 305 } 306 friend class ThreadSafeTrieRawHashMap; 307 LazyValueConstructor() = delete; LazyValueConstructor(void * Mem,value_type * & Result,ArrayRef<uint8_t> Hash)308 LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash) 309 : Mem(Mem), Result(Result), Hash(Hash) { 310 assert(Hash.size() == sizeof(HashT) && "Invalid hash"); 311 assert(Mem && "Invalid memory for construction"); 312 } 313 void *Mem; 314 value_type *&Result; 315 ArrayRef<uint8_t> Hash; 316 }; 317 318 /// Insert with a hint. Default-constructed hint will work, but it's 319 /// recommended to start with a lookup to avoid overhead in object creation 320 /// if it already exists. insertLazy(const_pointer Hint,ArrayRef<uint8_t> Hash,function_ref<void (LazyValueConstructor)> OnConstruct)321 pointer insertLazy(const_pointer Hint, ArrayRef<uint8_t> Hash, 322 function_ref<void(LazyValueConstructor)> OnConstruct) { 323 return pointer(ThreadSafeTrieRawHashMapBase::insert( 324 Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) { 325 value_type *Result = nullptr; 326 OnConstruct(LazyValueConstructor(Mem, Result, Hash)); 327 return Result->Hash.data(); 328 })); 329 } 330 insertLazy(ArrayRef<uint8_t> Hash,function_ref<void (LazyValueConstructor)> OnConstruct)331 pointer insertLazy(ArrayRef<uint8_t> Hash, 332 function_ref<void(LazyValueConstructor)> OnConstruct) { 333 return insertLazy(const_pointer(), Hash, OnConstruct); 334 } 335 insert(const_pointer Hint,value_type && HashedData)336 pointer insert(const_pointer Hint, value_type &&HashedData) { 337 return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) { 338 C(std::move(HashedData.Data)); 339 }); 340 } 341 insert(const_pointer Hint,const value_type & HashedData)342 pointer insert(const_pointer Hint, const value_type &HashedData) { 343 return insertLazy(Hint, HashedData.Hash, 344 [&](LazyValueConstructor C) { C(HashedData.Data); }); 345 } 346 find(ArrayRef<uint8_t> Hash)347 pointer find(ArrayRef<uint8_t> Hash) { 348 assert(Hash.size() == std::tuple_size<HashT>::value); 349 return ThreadSafeTrieRawHashMapBase::find(Hash); 350 } 351 find(ArrayRef<uint8_t> Hash)352 const_pointer find(ArrayRef<uint8_t> Hash) const { 353 assert(Hash.size() == std::tuple_size<HashT>::value); 354 return ThreadSafeTrieRawHashMapBase::find(Hash); 355 } 356 357 ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt, 358 std::optional<size_t> NumSubtrieBits = std::nullopt) ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>,DefaultContentAllocAlign<value_type>,DefaultContentOffset<value_type>,NumRootBits,NumSubtrieBits)359 : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>, 360 DefaultContentAllocAlign<value_type>, 361 DefaultContentOffset<value_type>, 362 NumRootBits, NumSubtrieBits) {} 363 ~ThreadSafeTrieRawHashMap()364 ~ThreadSafeTrieRawHashMap() { 365 if constexpr (std::is_trivially_destructible<value_type>::value) 366 this->destroyImpl(nullptr); 367 else 368 this->destroyImpl( 369 [](void *P) { static_cast<value_type *>(P)->~value_type(); }); 370 } 371 372 // Move constructor okay. 373 ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&) = default; 374 375 // No move assignment or any copy. 376 ThreadSafeTrieRawHashMap &operator=(ThreadSafeTrieRawHashMap &&) = delete; 377 ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &) = delete; 378 ThreadSafeTrieRawHashMap & 379 operator=(const ThreadSafeTrieRawHashMap &) = delete; 380 }; 381 382 } // namespace llvm 383 384 #endif // LLVM_ADT_TRIERAWHASHMAP_H 385