1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 11 12 #include "llvm/ADT/SparseBitVector.h" 13 #include "llvm/ADT/iterator.h" 14 #include "llvm/DebugInfo/PDB/Native/RawError.h" 15 #include "llvm/Support/BinaryStreamReader.h" 16 #include "llvm/Support/BinaryStreamWriter.h" 17 #include "llvm/Support/Endian.h" 18 #include "llvm/Support/Error.h" 19 #include <cstdint> 20 #include <iterator> 21 #include <utility> 22 #include <vector> 23 24 namespace llvm { 25 26 namespace pdb { 27 28 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); 29 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); 30 31 template <typename ValueT> class HashTable; 32 33 template <typename ValueT> 34 class HashTableIterator 35 : public iterator_facade_base<HashTableIterator<ValueT>, 36 std::forward_iterator_tag, 37 const std::pair<uint32_t, ValueT>> { 38 using BaseT = typename HashTableIterator::iterator_facade_base; 39 friend HashTable<ValueT>; 40 HashTableIterator(const HashTable<ValueT> & Map,uint32_t Index,bool IsEnd)41 HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index, 42 bool IsEnd) 43 : Map(&Map), Index(Index), IsEnd(IsEnd) {} 44 45 public: HashTableIterator(const HashTable<ValueT> & Map)46 HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) { 47 int I = Map.Present.find_first(); 48 if (I == -1) { 49 Index = 0; 50 IsEnd = true; 51 } else { 52 Index = static_cast<uint32_t>(I); 53 IsEnd = false; 54 } 55 } 56 57 HashTableIterator(const HashTableIterator &R) = default; 58 HashTableIterator &operator=(const HashTableIterator &R) { 59 Map = R.Map; 60 return *this; 61 } 62 bool operator==(const HashTableIterator &R) const { 63 if (IsEnd && R.IsEnd) 64 return true; 65 if (IsEnd != R.IsEnd) 66 return false; 67 68 return (Map == R.Map) && (Index == R.Index); 69 } 70 const std::pair<uint32_t, ValueT> &operator*() const { 71 assert(Map->Present.test(Index)); 72 return Map->Buckets[Index]; 73 } 74 75 // Implement postfix op++ in terms of prefix op++ by using the superclass 76 // implementation. 77 using BaseT::operator++; 78 HashTableIterator &operator++() { 79 while (Index < Map->Buckets.size()) { 80 ++Index; 81 if (Map->Present.test(Index)) 82 return *this; 83 } 84 85 IsEnd = true; 86 return *this; 87 } 88 89 private: isEnd()90 bool isEnd() const { return IsEnd; } index()91 uint32_t index() const { return Index; } 92 93 const HashTable<ValueT> *Map; 94 uint32_t Index; 95 bool IsEnd; 96 }; 97 98 template <typename ValueT> 99 class HashTable { 100 struct Header { 101 support::ulittle32_t Size; 102 support::ulittle32_t Capacity; 103 }; 104 105 using BucketList = std::vector<std::pair<uint32_t, ValueT>>; 106 107 public: 108 using const_iterator = HashTableIterator<ValueT>; 109 friend const_iterator; 110 HashTable()111 HashTable() { Buckets.resize(8); } HashTable(uint32_t Capacity)112 explicit HashTable(uint32_t Capacity) { 113 Buckets.resize(Capacity); 114 } 115 load(BinaryStreamReader & Stream)116 Error load(BinaryStreamReader &Stream) { 117 const Header *H; 118 if (auto EC = Stream.readObject(H)) 119 return EC; 120 if (H->Capacity == 0) 121 return make_error<RawError>(raw_error_code::corrupt_file, 122 "Invalid Hash Table Capacity"); 123 if (H->Size > maxLoad(H->Capacity)) 124 return make_error<RawError>(raw_error_code::corrupt_file, 125 "Invalid Hash Table Size"); 126 127 Buckets.resize(H->Capacity); 128 129 if (auto EC = readSparseBitVector(Stream, Present)) 130 return EC; 131 if (Present.count() != H->Size) 132 return make_error<RawError>(raw_error_code::corrupt_file, 133 "Present bit vector does not match size!"); 134 135 if (auto EC = readSparseBitVector(Stream, Deleted)) 136 return EC; 137 if (Present.intersects(Deleted)) 138 return make_error<RawError>(raw_error_code::corrupt_file, 139 "Present bit vector intersects deleted!"); 140 141 for (uint32_t P : Present) { 142 if (auto EC = Stream.readInteger(Buckets[P].first)) 143 return EC; 144 const ValueT *Value; 145 if (auto EC = Stream.readObject(Value)) 146 return EC; 147 Buckets[P].second = *Value; 148 } 149 150 return Error::success(); 151 } 152 calculateSerializedLength()153 uint32_t calculateSerializedLength() const { 154 uint32_t Size = sizeof(Header); 155 156 constexpr int BitsPerWord = 8 * sizeof(uint32_t); 157 158 int NumBitsP = Present.find_last() + 1; 159 int NumBitsD = Deleted.find_last() + 1; 160 161 uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord; 162 uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord; 163 164 // Present bit set number of words (4 bytes), followed by that many actual 165 // words (4 bytes each). 166 Size += sizeof(uint32_t); 167 Size += NumWordsP * sizeof(uint32_t); 168 169 // Deleted bit set number of words (4 bytes), followed by that many actual 170 // words (4 bytes each). 171 Size += sizeof(uint32_t); 172 Size += NumWordsD * sizeof(uint32_t); 173 174 // One (Key, ValueT) pair for each entry Present. 175 Size += (sizeof(uint32_t) + sizeof(ValueT)) * size(); 176 177 return Size; 178 } 179 commit(BinaryStreamWriter & Writer)180 Error commit(BinaryStreamWriter &Writer) const { 181 Header H; 182 H.Size = size(); 183 H.Capacity = capacity(); 184 if (auto EC = Writer.writeObject(H)) 185 return EC; 186 187 if (auto EC = writeSparseBitVector(Writer, Present)) 188 return EC; 189 190 if (auto EC = writeSparseBitVector(Writer, Deleted)) 191 return EC; 192 193 for (const auto &Entry : *this) { 194 if (auto EC = Writer.writeInteger(Entry.first)) 195 return EC; 196 if (auto EC = Writer.writeObject(Entry.second)) 197 return EC; 198 } 199 return Error::success(); 200 } 201 clear()202 void clear() { 203 Buckets.resize(8); 204 Present.clear(); 205 Deleted.clear(); 206 } 207 empty()208 bool empty() const { return size() == 0; } capacity()209 uint32_t capacity() const { return Buckets.size(); } size()210 uint32_t size() const { return Present.count(); } 211 begin()212 const_iterator begin() const { return const_iterator(*this); } end()213 const_iterator end() const { return const_iterator(*this, 0, true); } 214 215 /// Find the entry whose key has the specified hash value, using the specified 216 /// traits defining hash function and equality. 217 template <typename Key, typename TraitsT> find_as(const Key & K,TraitsT & Traits)218 const_iterator find_as(const Key &K, TraitsT &Traits) const { 219 uint32_t H = Traits.hashLookupKey(K) % capacity(); 220 uint32_t I = H; 221 std::optional<uint32_t> FirstUnused; 222 do { 223 if (isPresent(I)) { 224 if (Traits.storageKeyToLookupKey(Buckets[I].first) == K) 225 return const_iterator(*this, I, false); 226 } else { 227 if (!FirstUnused) 228 FirstUnused = I; 229 // Insertion occurs via linear probing from the slot hint, and will be 230 // inserted at the first empty / deleted location. Therefore, if we are 231 // probing and find a location that is neither present nor deleted, then 232 // nothing must have EVER been inserted at this location, and thus it is 233 // not possible for a matching value to occur later. 234 if (!isDeleted(I)) 235 break; 236 } 237 I = (I + 1) % capacity(); 238 } while (I != H); 239 240 // The only way FirstUnused would not be set is if every single entry in the 241 // table were Present. But this would violate the load factor constraints 242 // that we impose, so it should never happen. 243 assert(FirstUnused); 244 return const_iterator(*this, *FirstUnused, true); 245 } 246 247 /// Set the entry using a key type that the specified Traits can convert 248 /// from a real key to an internal key. 249 template <typename Key, typename TraitsT> set_as(const Key & K,ValueT V,TraitsT & Traits)250 bool set_as(const Key &K, ValueT V, TraitsT &Traits) { 251 return set_as_internal(K, std::move(V), Traits, std::nullopt); 252 } 253 254 template <typename Key, typename TraitsT> get(const Key & K,TraitsT & Traits)255 ValueT get(const Key &K, TraitsT &Traits) const { 256 auto Iter = find_as(K, Traits); 257 assert(Iter != end()); 258 return (*Iter).second; 259 } 260 261 protected: isPresent(uint32_t K)262 bool isPresent(uint32_t K) const { return Present.test(K); } isDeleted(uint32_t K)263 bool isDeleted(uint32_t K) const { return Deleted.test(K); } 264 265 BucketList Buckets; 266 mutable SparseBitVector<> Present; 267 mutable SparseBitVector<> Deleted; 268 269 private: 270 /// Set the entry using a key type that the specified Traits can convert 271 /// from a real key to an internal key. 272 template <typename Key, typename TraitsT> set_as_internal(const Key & K,ValueT V,TraitsT & Traits,std::optional<uint32_t> InternalKey)273 bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits, 274 std::optional<uint32_t> InternalKey) { 275 auto Entry = find_as(K, Traits); 276 if (Entry != end()) { 277 assert(isPresent(Entry.index())); 278 assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); 279 // We're updating, no need to do anything special. 280 Buckets[Entry.index()].second = V; 281 return false; 282 } 283 284 auto &B = Buckets[Entry.index()]; 285 assert(!isPresent(Entry.index())); 286 assert(Entry.isEnd()); 287 B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K); 288 B.second = V; 289 Present.set(Entry.index()); 290 Deleted.reset(Entry.index()); 291 292 grow(Traits); 293 294 assert((find_as(K, Traits)) != end()); 295 return true; 296 } 297 maxLoad(uint32_t capacity)298 static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 299 300 template <typename TraitsT> grow(TraitsT & Traits)301 void grow(TraitsT &Traits) { 302 uint32_t S = size(); 303 uint32_t MaxLoad = maxLoad(capacity()); 304 if (S < maxLoad(capacity())) 305 return; 306 assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 307 308 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX; 309 310 // Growing requires rebuilding the table and re-hashing every item. Make a 311 // copy with a larger capacity, insert everything into the copy, then swap 312 // it in. 313 HashTable NewMap(NewCapacity); 314 for (auto I : Present) { 315 auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); 316 NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits, 317 Buckets[I].first); 318 } 319 320 Buckets.swap(NewMap.Buckets); 321 std::swap(Present, NewMap.Present); 322 std::swap(Deleted, NewMap.Deleted); 323 assert(capacity() == NewCapacity); 324 assert(size() == S); 325 } 326 }; 327 328 } // end namespace pdb 329 330 } // end namespace llvm 331 332 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 333