1 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements an interface allowing to conveniently build hashes of 10 // various data types, without relying on the underlying hasher type to know 11 // about hashed data types. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_SUPPORT_HASHBUILDER_H 16 #define LLVM_SUPPORT_HASHBUILDER_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/type_traits.h" 24 25 #include <iterator> 26 #include <optional> 27 #include <utility> 28 29 namespace llvm { 30 31 namespace hashbuilder_detail { 32 /// Trait to indicate whether a type's bits can be hashed directly (after 33 /// endianness correction). 34 template <typename U> 35 struct IsHashableData 36 : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; 37 38 } // namespace hashbuilder_detail 39 40 /// Declares the hasher member, and functions forwarding directly to the hasher. 41 template <typename HasherT> class HashBuilderBase { 42 public: 43 template <typename HasherT_ = HasherT> 44 using HashResultTy = decltype(std::declval<HasherT_ &>().final()); 45 46 HasherT &getHasher() { return Hasher; } 47 48 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 49 /// 50 /// This may not take the size of `Data` into account. 51 /// Users of this function should pay attention to respect endianness 52 /// contraints. 53 void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } 54 55 /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. 56 /// 57 /// This may not take the size of `Data` into account. 58 /// Users of this function should pay attention to respect endianness 59 /// contraints. 60 void update(StringRef Data) { 61 update( 62 ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size())); 63 } 64 65 /// Forward to `HasherT::final()` if available. 66 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { 67 return this->getHasher().final(); 68 } 69 70 /// Forward to `HasherT::result()` if available. 71 template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { 72 return this->getHasher().result(); 73 } 74 75 protected: 76 explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} 77 78 template <typename... ArgTypes> 79 explicit HashBuilderBase(ArgTypes &&...Args) 80 : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...), 81 Hasher(*OptionalHasher) {} 82 83 private: 84 std::optional<HasherT> OptionalHasher; 85 HasherT &Hasher; 86 }; 87 88 /// Interface to help hash various types through a hasher type. 89 /// 90 /// Via provided specializations of `add`, `addRange`, and `addRangeElements` 91 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed 92 /// without requiring any knowledge of hashed types from the hasher type. 93 /// 94 /// The only method expected from the templated hasher type `HasherT` is: 95 /// * void update(ArrayRef<uint8_t> Data) 96 /// 97 /// Additionally, the following methods will be forwarded to the hasher type: 98 /// * decltype(std::declval<HasherT &>().final()) final() 99 /// * decltype(std::declval<HasherT &>().result()) result() 100 /// 101 /// From a user point of view, the interface provides the following: 102 /// * `template<typename T> add(const T &Value)` 103 /// The `add` function implements hashing of various types. 104 /// * `template <typename ItT> void addRange(ItT First, ItT Last)` 105 /// The `addRange` function is designed to aid hashing a range of values. 106 /// It explicitly adds the size of the range in the hash. 107 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` 108 /// The `addRangeElements` function is also designed to aid hashing a range of 109 /// values. In contrast to `addRange`, it **ignores** the size of the range, 110 /// behaving as if elements were added one at a time with `add`. 111 /// 112 /// User-defined `struct` types can participate in this interface by providing 113 /// an `addHash` templated function. See the associated template specialization 114 /// for details. 115 /// 116 /// This interface does not impose requirements on the hasher 117 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for 118 /// variable-size types; for example for 119 /// ``` 120 /// builder.add({1}); 121 /// builder.add({2, 3}); 122 /// ``` 123 /// and 124 /// ``` 125 /// builder.add({1, 2}); 126 /// builder.add({3}); 127 /// ``` 128 /// . Thus, specializations of `add` and `addHash` for variable-size types must 129 /// not assume that the hasher type considers the size as part of the hash; they 130 /// must explicitly add the size to the hash. See for example specializations 131 /// for `ArrayRef` and `StringRef`. 132 /// 133 /// Additionally, since types are eventually forwarded to the hasher's 134 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash 135 /// computation (for example when computing `add((int)123)`). 136 /// Specifiying a non-`native` `Endianness` template parameter allows to compute 137 /// stable hash across platforms with different endianness. 138 template <typename HasherT, llvm::endianness Endianness> 139 class HashBuilder : public HashBuilderBase<HasherT> { 140 public: 141 explicit HashBuilder(HasherT &Hasher) : HashBuilderBase<HasherT>(Hasher) {} 142 template <typename... ArgTypes> 143 explicit HashBuilder(ArgTypes &&...Args) 144 : HashBuilderBase<HasherT>(Args...) {} 145 146 /// Implement hashing for hashable data types, e.g. integral or enum values. 147 template <typename T> 148 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, HashBuilder &> 149 add(T Value) { 150 return adjustForEndiannessAndAdd(Value); 151 } 152 153 /// Support hashing `ArrayRef`. 154 /// 155 /// `Value.size()` is taken into account to ensure cases like 156 /// ``` 157 /// builder.add({1}); 158 /// builder.add({2, 3}); 159 /// ``` 160 /// and 161 /// ``` 162 /// builder.add({1, 2}); 163 /// builder.add({3}); 164 /// ``` 165 /// do not collide. 166 template <typename T> HashBuilder &add(ArrayRef<T> Value) { 167 // As of implementation time, simply calling `addRange(Value)` would also go 168 // through the `update` fast path. But that would rely on the implementation 169 // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call 170 // `update` to guarantee the fast path. 171 add(Value.size()); 172 if (hashbuilder_detail::IsHashableData<T>::value && 173 Endianness == llvm::endianness::native) { 174 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 175 Value.size() * sizeof(T))); 176 } else { 177 for (auto &V : Value) 178 add(V); 179 } 180 return *this; 181 } 182 183 /// Support hashing `StringRef`. 184 /// 185 /// `Value.size()` is taken into account to ensure cases like 186 /// ``` 187 /// builder.add("a"); 188 /// builder.add("bc"); 189 /// ``` 190 /// and 191 /// ``` 192 /// builder.add("ab"); 193 /// builder.add("c"); 194 /// ``` 195 /// do not collide. 196 HashBuilder &add(StringRef Value) { 197 // As of implementation time, simply calling `addRange(Value)` would also go 198 // through `update`. But that would rely on the implementation of 199 // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to 200 // guarantee the fast path. 201 add(Value.size()); 202 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), 203 Value.size())); 204 return *this; 205 } 206 207 template <typename T> 208 using HasAddHashT = 209 decltype(addHash(std::declval<HashBuilder &>(), std::declval<T &>())); 210 /// Implement hashing for user-defined `struct`s. 211 /// 212 /// Any user-define `struct` can participate in hashing via `HashBuilder` by 213 /// providing a `addHash` templated function. 214 /// 215 /// ``` 216 /// template <typename HasherT, llvm::endianness Endianness> 217 /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 218 /// const UserDefinedStruct &Value); 219 /// ``` 220 /// 221 /// For example: 222 /// ``` 223 /// struct SimpleStruct { 224 /// char c; 225 /// int i; 226 /// }; 227 /// 228 /// template <typename HasherT, llvm::endianness Endianness> 229 /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 230 /// const SimpleStruct &Value) { 231 /// HBuilder.add(Value.c); 232 /// HBuilder.add(Value.i); 233 /// } 234 /// ``` 235 /// 236 /// To avoid endianness issues, specializations of `addHash` should 237 /// generally rely on exising `add`, `addRange`, and `addRangeElements` 238 /// functions. If directly using `update`, an implementation must correctly 239 /// handle endianness. 240 /// 241 /// ``` 242 /// struct __attribute__ ((packed)) StructWithFastHash { 243 /// int I; 244 /// char C; 245 /// 246 /// // If possible, we want to hash both `I` and `C` in a single 247 /// // `update` call for performance concerns. 248 /// template <typename HasherT, llvm::endianness Endianness> 249 /// friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 250 /// const StructWithFastHash &Value) { 251 /// if (Endianness == llvm::endianness::native) { 252 /// HBuilder.update(ArrayRef( 253 /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); 254 /// } else { 255 /// // Rely on existing `add` methods to handle endianness. 256 /// HBuilder.add(Value.I); 257 /// HBuilder.add(Value.C); 258 /// } 259 /// } 260 /// }; 261 /// ``` 262 /// 263 /// To avoid collisions, specialization of `addHash` for variable-size 264 /// types must take the size into account. 265 /// 266 /// For example: 267 /// ``` 268 /// struct CustomContainer { 269 /// private: 270 /// size_t Size; 271 /// int Elements[100]; 272 /// 273 /// public: 274 /// CustomContainer(size_t Size) : Size(Size) { 275 /// for (size_t I = 0; I != Size; ++I) 276 /// Elements[I] = I; 277 /// } 278 /// template <typename HasherT, llvm::endianness Endianness> 279 /// friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder, 280 /// const CustomContainer &Value) { 281 /// if (Endianness == llvm::endianness::native) { 282 /// HBuilder.update(ArrayRef( 283 /// reinterpret_cast<const uint8_t *>(&Value.Size), 284 /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); 285 /// } else { 286 /// // `addRange` will take care of encoding the size. 287 /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + 288 /// Value.Size); 289 /// } 290 /// } 291 /// }; 292 /// ``` 293 template <typename T> 294 std::enable_if_t<is_detected<HasAddHashT, T>::value && 295 !hashbuilder_detail::IsHashableData<T>::value, 296 HashBuilder &> 297 add(const T &Value) { 298 addHash(*this, Value); 299 return *this; 300 } 301 302 template <typename T1, typename T2> 303 HashBuilder &add(const std::pair<T1, T2> &Value) { 304 return add(Value.first, Value.second); 305 } 306 307 template <typename... Ts> HashBuilder &add(const std::tuple<Ts...> &Arg) { 308 std::apply([this](const auto &...Args) { this->add(Args...); }, Arg); 309 return *this; 310 } 311 312 /// A convenenience variadic helper. 313 /// It simply iterates over its arguments, in order. 314 /// ``` 315 /// add(Arg1, Arg2); 316 /// ``` 317 /// is equivalent to 318 /// ``` 319 /// add(Arg1) 320 /// add(Arg2) 321 /// ``` 322 template <typename... Ts> 323 std::enable_if_t<(sizeof...(Ts) > 1), HashBuilder &> add(const Ts &...Args) { 324 return (add(Args), ...); 325 } 326 327 template <typename ForwardIteratorT> 328 HashBuilder &addRange(ForwardIteratorT First, ForwardIteratorT Last) { 329 add(std::distance(First, Last)); 330 return addRangeElements(First, Last); 331 } 332 333 template <typename RangeT> HashBuilder &addRange(const RangeT &Range) { 334 return addRange(adl_begin(Range), adl_end(Range)); 335 } 336 337 template <typename ForwardIteratorT> 338 HashBuilder &addRangeElements(ForwardIteratorT First, ForwardIteratorT Last) { 339 return addRangeElementsImpl( 340 First, Last, 341 typename std::iterator_traits<ForwardIteratorT>::iterator_category()); 342 } 343 344 template <typename RangeT> 345 HashBuilder &addRangeElements(const RangeT &Range) { 346 return addRangeElements(adl_begin(Range), adl_end(Range)); 347 } 348 349 template <typename T> 350 using HasByteSwapT = decltype(support::endian::byte_swap( 351 std::declval<T &>(), llvm::endianness::little)); 352 /// Adjust `Value` for the target endianness and add it to the hash. 353 template <typename T> 354 std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilder &> 355 adjustForEndiannessAndAdd(const T &Value) { 356 T SwappedValue = support::endian::byte_swap(Value, Endianness); 357 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), 358 sizeof(SwappedValue))); 359 return *this; 360 } 361 362 private: 363 // FIXME: Once available, specialize this function for `contiguous_iterator`s, 364 // and use it for `ArrayRef` and `StringRef`. 365 template <typename ForwardIteratorT> 366 HashBuilder &addRangeElementsImpl(ForwardIteratorT First, 367 ForwardIteratorT Last, 368 std::forward_iterator_tag) { 369 for (auto It = First; It != Last; ++It) 370 add(*It); 371 return *this; 372 } 373 374 template <typename T> 375 std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && 376 Endianness == llvm::endianness::native, 377 HashBuilder &> 378 addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { 379 this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First), 380 (Last - First) * sizeof(T))); 381 return *this; 382 } 383 }; 384 385 namespace hashbuilder_detail { 386 class HashCodeHasher { 387 public: 388 HashCodeHasher() : Code(0) {} 389 void update(ArrayRef<uint8_t> Data) { 390 hash_code DataCode = hash_value(Data); 391 Code = hash_combine(Code, DataCode); 392 } 393 hash_code Code; 394 }; 395 396 using HashCodeHashBuilder = 397 HashBuilder<hashbuilder_detail::HashCodeHasher, llvm::endianness::native>; 398 } // namespace hashbuilder_detail 399 400 /// Provide a default implementation of `hash_value` when `addHash(const T &)` 401 /// is supported. 402 template <typename T> 403 std::enable_if_t< 404 is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, 405 hash_code> 406 hash_value(const T &Value) { 407 hashbuilder_detail::HashCodeHashBuilder HBuilder; 408 HBuilder.add(Value); 409 return HBuilder.getHasher().Code; 410 } 411 } // end namespace llvm 412 413 #endif // LLVM_SUPPORT_HASHBUILDER_H 414