xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ADT/CachedHashString.h (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines CachedHashString and CachedHashStringRef.  These are owning
10 // and not-owning string types that store their hash in addition to their string
11 // data.
12 //
13 // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
14 // (because, unlike std::string, CachedHashString lets us have empty and
15 // tombstone values).
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #ifndef LLVM_ADT_CACHEDHASHSTRING_H
20 #define LLVM_ADT_CACHEDHASHSTRING_H
21 
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/StringRef.h"
24 
25 namespace llvm {
26 
27 /// A container which contains a StringRef plus a precomputed hash.
28 class CachedHashStringRef {
29   const char *P;
30   uint32_t Size;
31   uint32_t Hash;
32 
33 public:
34   // Explicit because hashing a string isn't free.
35   explicit CachedHashStringRef(StringRef S)
36       : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
37 
38   CachedHashStringRef(StringRef S, uint32_t Hash)
39       : P(S.data()), Size(S.size()), Hash(Hash) {
40     assert(S.size() <= std::numeric_limits<uint32_t>::max());
41   }
42 
43   StringRef val() const { return StringRef(P, Size); }
44   const char *data() const { return P; }
45   uint32_t size() const { return Size; }
46   uint32_t hash() const { return Hash; }
47 };
48 
49 template <> struct DenseMapInfo<CachedHashStringRef> {
50   static CachedHashStringRef getEmptyKey() {
51     return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0);
52   }
53   static CachedHashStringRef getTombstoneKey() {
54     return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1);
55   }
56   static unsigned getHashValue(const CachedHashStringRef &S) {
57     assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
58     assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
59     return S.hash();
60   }
61   static bool isEqual(const CachedHashStringRef &LHS,
62                       const CachedHashStringRef &RHS) {
63     return LHS.hash() == RHS.hash() &&
64            DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val());
65   }
66 };
67 
68 /// A container which contains a string, which it owns, plus a precomputed hash.
69 ///
70 /// We do not null-terminate the string.
71 class CachedHashString {
72   friend struct DenseMapInfo<CachedHashString>;
73 
74   char *P;
75   uint32_t Size;
76   uint32_t Hash;
77 
78   static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
79   static char *getTombstoneKeyPtr() {
80     return DenseMapInfo<char *>::getTombstoneKey();
81   }
82 
83   bool isEmptyOrTombstone() const {
84     return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
85   }
86 
87   struct ConstructEmptyOrTombstoneTy {};
88 
89   CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr)
90       : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
91     assert(isEmptyOrTombstone());
92   }
93 
94   // TODO: Use small-string optimization to avoid allocating.
95 
96 public:
97   explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {}
98 
99   // Explicit because copying and hashing a string isn't free.
100   explicit CachedHashString(StringRef S)
101       : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
102 
103   CachedHashString(StringRef S, uint32_t Hash)
104       : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
105     memcpy(P, S.data(), S.size());
106   }
107 
108   // Ideally this class would not be copyable.  But SetVector requires copyable
109   // keys, and we want this to be usable there.
110   CachedHashString(const CachedHashString &Other)
111       : Size(Other.Size), Hash(Other.Hash) {
112     if (Other.isEmptyOrTombstone()) {
113       P = Other.P;
114     } else {
115       P = new char[Size];
116       memcpy(P, Other.P, Size);
117     }
118   }
119 
120   CachedHashString &operator=(CachedHashString Other) {
121     swap(*this, Other);
122     return *this;
123   }
124 
125   CachedHashString(CachedHashString &&Other) noexcept
126       : P(Other.P), Size(Other.Size), Hash(Other.Hash) {
127     Other.P = getEmptyKeyPtr();
128   }
129 
130   ~CachedHashString() {
131     if (!isEmptyOrTombstone())
132       delete[] P;
133   }
134 
135   StringRef val() const { return StringRef(P, Size); }
136   uint32_t size() const { return Size; }
137   uint32_t hash() const { return Hash; }
138 
139   operator StringRef() const { return val(); }
140   operator CachedHashStringRef() const {
141     return CachedHashStringRef(val(), Hash);
142   }
143 
144   friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
145     using std::swap;
146     swap(LHS.P, RHS.P);
147     swap(LHS.Size, RHS.Size);
148     swap(LHS.Hash, RHS.Hash);
149   }
150 };
151 
152 template <> struct DenseMapInfo<CachedHashString> {
153   static CachedHashString getEmptyKey() {
154     return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
155                             CachedHashString::getEmptyKeyPtr());
156   }
157   static CachedHashString getTombstoneKey() {
158     return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
159                             CachedHashString::getTombstoneKeyPtr());
160   }
161   static unsigned getHashValue(const CachedHashString &S) {
162     assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
163     assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
164     return S.hash();
165   }
166   static bool isEqual(const CachedHashString &LHS,
167                       const CachedHashString &RHS) {
168     if (LHS.hash() != RHS.hash())
169       return false;
170     if (LHS.P == CachedHashString::getEmptyKeyPtr())
171       return RHS.P == CachedHashString::getEmptyKeyPtr();
172     if (LHS.P == CachedHashString::getTombstoneKeyPtr())
173       return RHS.P == CachedHashString::getTombstoneKeyPtr();
174 
175     // This is safe because if RHS.P is the empty or tombstone key, it will have
176     // length 0, so we'll never dereference its pointer.
177     return LHS.val() == RHS.val();
178   }
179 };
180 
181 } // namespace llvm
182 
183 #endif
184