1 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines CachedHashString and CachedHashStringRef. These are 11 /// owning and not-owning string types that store their hash in addition to 12 /// their string data. 13 /// 14 /// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap 15 /// (because, unlike std::string, CachedHashString lets us have empty and 16 /// tombstone values). 17 /// 18 //===----------------------------------------------------------------------===// 19 20 #ifndef LLVM_ADT_CACHEDHASHSTRING_H 21 #define LLVM_ADT_CACHEDHASHSTRING_H 22 23 #include "llvm/ADT/DenseMapInfo.h" 24 #include "llvm/ADT/StringRef.h" 25 26 namespace llvm { 27 28 /// A container which contains a StringRef plus a precomputed hash. 29 class CachedHashStringRef { 30 const char *P; 31 uint32_t Size; 32 uint32_t Hash; 33 34 public: 35 // Explicit because hashing a string isn't free. CachedHashStringRef(StringRef S)36 explicit CachedHashStringRef(StringRef S) 37 : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {} 38 CachedHashStringRef(StringRef S,uint32_t Hash)39 CachedHashStringRef(StringRef S, uint32_t Hash) 40 : P(S.data()), Size(S.size()), Hash(Hash) { 41 assert(S.size() <= std::numeric_limits<uint32_t>::max()); 42 } 43 val()44 StringRef val() const { return StringRef(P, Size); } data()45 const char *data() const { return P; } size()46 uint32_t size() const { return Size; } hash()47 uint32_t hash() const { return Hash; } 48 }; 49 50 template <> struct DenseMapInfo<CachedHashStringRef> { 51 static CachedHashStringRef getEmptyKey() { 52 return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0); 53 } 54 static CachedHashStringRef getTombstoneKey() { 55 return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1); 56 } 57 static unsigned getHashValue(const CachedHashStringRef &S) { 58 assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!"); 59 assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!"); 60 return S.hash(); 61 } 62 static bool isEqual(const CachedHashStringRef &LHS, 63 const CachedHashStringRef &RHS) { 64 return LHS.hash() == RHS.hash() && 65 DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val()); 66 } 67 }; 68 69 /// A container which contains a string, which it owns, plus a precomputed hash. 70 /// 71 /// We do not null-terminate the string. 72 class CachedHashString { 73 friend struct DenseMapInfo<CachedHashString>; 74 75 char *P; 76 uint32_t Size; 77 uint32_t Hash; 78 79 static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); } 80 static char *getTombstoneKeyPtr() { 81 return DenseMapInfo<char *>::getTombstoneKey(); 82 } 83 84 bool isEmptyOrTombstone() const { 85 return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr(); 86 } 87 88 struct ConstructEmptyOrTombstoneTy {}; 89 90 CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr) 91 : P(EmptyOrTombstonePtr), Size(0), Hash(0) { 92 assert(isEmptyOrTombstone()); 93 } 94 95 // TODO: Use small-string optimization to avoid allocating. 96 97 public: 98 explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {} 99 100 // Explicit because copying and hashing a string isn't free. 101 explicit CachedHashString(StringRef S) 102 : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {} 103 104 CachedHashString(StringRef S, uint32_t Hash) 105 : P(new char[S.size()]), Size(S.size()), Hash(Hash) { 106 memcpy(P, S.data(), S.size()); 107 } 108 109 // Ideally this class would not be copyable. But SetVector requires copyable 110 // keys, and we want this to be usable there. 111 CachedHashString(const CachedHashString &Other) 112 : Size(Other.Size), Hash(Other.Hash) { 113 if (Other.isEmptyOrTombstone()) { 114 P = Other.P; 115 } else { 116 P = new char[Size]; 117 memcpy(P, Other.P, Size); 118 } 119 } 120 121 CachedHashString &operator=(CachedHashString Other) { 122 swap(*this, Other); 123 return *this; 124 } 125 126 CachedHashString(CachedHashString &&Other) noexcept 127 : P(Other.P), Size(Other.Size), Hash(Other.Hash) { 128 Other.P = getEmptyKeyPtr(); 129 } 130 131 ~CachedHashString() { 132 if (!isEmptyOrTombstone()) 133 delete[] P; 134 } 135 136 StringRef val() const { return StringRef(P, Size); } 137 uint32_t size() const { return Size; } 138 uint32_t hash() const { return Hash; } 139 140 operator StringRef() const { return val(); } 141 operator CachedHashStringRef() const { 142 return CachedHashStringRef(val(), Hash); 143 } 144 145 friend void swap(CachedHashString &LHS, CachedHashString &RHS) { 146 using std::swap; 147 swap(LHS.P, RHS.P); 148 swap(LHS.Size, RHS.Size); 149 swap(LHS.Hash, RHS.Hash); 150 } 151 }; 152 153 template <> struct DenseMapInfo<CachedHashString> { 154 static CachedHashString getEmptyKey() { 155 return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(), 156 CachedHashString::getEmptyKeyPtr()); 157 } 158 static CachedHashString getTombstoneKey() { 159 return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(), 160 CachedHashString::getTombstoneKeyPtr()); 161 } 162 static unsigned getHashValue(const CachedHashString &S) { 163 assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!"); 164 assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!"); 165 return S.hash(); 166 } 167 static bool isEqual(const CachedHashString &LHS, 168 const CachedHashString &RHS) { 169 if (LHS.hash() != RHS.hash()) 170 return false; 171 if (LHS.P == CachedHashString::getEmptyKeyPtr()) 172 return RHS.P == CachedHashString::getEmptyKeyPtr(); 173 if (LHS.P == CachedHashString::getTombstoneKeyPtr()) 174 return RHS.P == CachedHashString::getTombstoneKeyPtr(); 175 176 // This is safe because if RHS.P is the empty or tombstone key, it will have 177 // length 0, so we'll never dereference its pointer. 178 return LHS.val() == RHS.val(); 179 } 180 }; 181 182 } // namespace llvm 183 184 #endif 185