1 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines CachedHashString and CachedHashStringRef. These are owning 10 // and not-owning string types that store their hash in addition to their string 11 // data. 12 // 13 // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap 14 // (because, unlike std::string, CachedHashString lets us have empty and 15 // tombstone values). 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_ADT_CACHED_HASH_STRING_H 20 #define LLVM_ADT_CACHED_HASH_STRING_H 21 22 #include "llvm/ADT/DenseMapInfo.h" 23 #include "llvm/ADT/StringRef.h" 24 25 namespace llvm { 26 27 /// A container which contains a StringRef plus a precomputed hash. 28 class CachedHashStringRef { 29 const char *P; 30 uint32_t Size; 31 uint32_t Hash; 32 33 public: 34 // Explicit because hashing a string isn't free. 35 explicit CachedHashStringRef(StringRef S) 36 : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {} 37 38 CachedHashStringRef(StringRef S, uint32_t Hash) 39 : P(S.data()), Size(S.size()), Hash(Hash) { 40 assert(S.size() <= std::numeric_limits<uint32_t>::max()); 41 } 42 43 StringRef val() const { return StringRef(P, Size); } 44 const char *data() const { return P; } 45 uint32_t size() const { return Size; } 46 uint32_t hash() const { return Hash; } 47 }; 48 49 template <> struct DenseMapInfo<CachedHashStringRef> { 50 static CachedHashStringRef getEmptyKey() { 51 return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0); 52 } 53 static CachedHashStringRef getTombstoneKey() { 54 return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1); 55 } 56 static unsigned getHashValue(const CachedHashStringRef &S) { 57 assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!"); 58 assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!"); 59 return S.hash(); 60 } 61 static bool isEqual(const CachedHashStringRef &LHS, 62 const CachedHashStringRef &RHS) { 63 return LHS.hash() == RHS.hash() && 64 DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val()); 65 } 66 }; 67 68 /// A container which contains a string, which it owns, plus a precomputed hash. 69 /// 70 /// We do not null-terminate the string. 71 class CachedHashString { 72 friend struct DenseMapInfo<CachedHashString>; 73 74 char *P; 75 uint32_t Size; 76 uint32_t Hash; 77 78 static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); } 79 static char *getTombstoneKeyPtr() { 80 return DenseMapInfo<char *>::getTombstoneKey(); 81 } 82 83 bool isEmptyOrTombstone() const { 84 return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr(); 85 } 86 87 struct ConstructEmptyOrTombstoneTy {}; 88 89 CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr) 90 : P(EmptyOrTombstonePtr), Size(0), Hash(0) { 91 assert(isEmptyOrTombstone()); 92 } 93 94 // TODO: Use small-string optimization to avoid allocating. 95 96 public: 97 explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {} 98 99 // Explicit because copying and hashing a string isn't free. 100 explicit CachedHashString(StringRef S) 101 : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {} 102 103 CachedHashString(StringRef S, uint32_t Hash) 104 : P(new char[S.size()]), Size(S.size()), Hash(Hash) { 105 memcpy(P, S.data(), S.size()); 106 } 107 108 // Ideally this class would not be copyable. But SetVector requires copyable 109 // keys, and we want this to be usable there. 110 CachedHashString(const CachedHashString &Other) 111 : Size(Other.Size), Hash(Other.Hash) { 112 if (Other.isEmptyOrTombstone()) { 113 P = Other.P; 114 } else { 115 P = new char[Size]; 116 memcpy(P, Other.P, Size); 117 } 118 } 119 120 CachedHashString &operator=(CachedHashString Other) { 121 swap(*this, Other); 122 return *this; 123 } 124 125 CachedHashString(CachedHashString &&Other) noexcept 126 : P(Other.P), Size(Other.Size), Hash(Other.Hash) { 127 Other.P = getEmptyKeyPtr(); 128 } 129 130 ~CachedHashString() { 131 if (!isEmptyOrTombstone()) 132 delete[] P; 133 } 134 135 StringRef val() const { return StringRef(P, Size); } 136 uint32_t size() const { return Size; } 137 uint32_t hash() const { return Hash; } 138 139 operator StringRef() const { return val(); } 140 operator CachedHashStringRef() const { 141 return CachedHashStringRef(val(), Hash); 142 } 143 144 friend void swap(CachedHashString &LHS, CachedHashString &RHS) { 145 using std::swap; 146 swap(LHS.P, RHS.P); 147 swap(LHS.Size, RHS.Size); 148 swap(LHS.Hash, RHS.Hash); 149 } 150 }; 151 152 template <> struct DenseMapInfo<CachedHashString> { 153 static CachedHashString getEmptyKey() { 154 return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(), 155 CachedHashString::getEmptyKeyPtr()); 156 } 157 static CachedHashString getTombstoneKey() { 158 return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(), 159 CachedHashString::getTombstoneKeyPtr()); 160 } 161 static unsigned getHashValue(const CachedHashString &S) { 162 assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!"); 163 assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!"); 164 return S.hash(); 165 } 166 static bool isEqual(const CachedHashString &LHS, 167 const CachedHashString &RHS) { 168 if (LHS.hash() != RHS.hash()) 169 return false; 170 if (LHS.P == CachedHashString::getEmptyKeyPtr()) 171 return RHS.P == CachedHashString::getEmptyKeyPtr(); 172 if (LHS.P == CachedHashString::getTombstoneKeyPtr()) 173 return RHS.P == CachedHashString::getTombstoneKeyPtr(); 174 175 // This is safe because if RHS.P is the empty or tombstone key, it will have 176 // length 0, so we'll never dereference its pointer. 177 return LHS.val() == RHS.val(); 178 } 179 }; 180 181 } // namespace llvm 182 183 #endif 184