xref: /freebsd/lib/libopenbsd/ohash.c (revision a36eca08bbd549807428439b536fea522de0dc37)
1*a36eca08SCraig Rodrigues /* $OpenBSD: src/lib/libutil/ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */
2*a36eca08SCraig Rodrigues 
3*a36eca08SCraig Rodrigues /* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
4*a36eca08SCraig Rodrigues  *
5*a36eca08SCraig Rodrigues  * Permission to use, copy, modify, and distribute this software for any
6*a36eca08SCraig Rodrigues  * purpose with or without fee is hereby granted, provided that the above
7*a36eca08SCraig Rodrigues  * copyright notice and this permission notice appear in all copies.
8*a36eca08SCraig Rodrigues  *
9*a36eca08SCraig Rodrigues  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*a36eca08SCraig Rodrigues  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*a36eca08SCraig Rodrigues  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*a36eca08SCraig Rodrigues  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*a36eca08SCraig Rodrigues  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*a36eca08SCraig Rodrigues  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*a36eca08SCraig Rodrigues  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*a36eca08SCraig Rodrigues  */
17*a36eca08SCraig Rodrigues 
18*a36eca08SCraig Rodrigues #include <sys/cdefs.h>
19*a36eca08SCraig Rodrigues __FBSDID("$FreeBSD$");
20*a36eca08SCraig Rodrigues 
21*a36eca08SCraig Rodrigues #include <stddef.h>
22*a36eca08SCraig Rodrigues #include <stdint.h>
23*a36eca08SCraig Rodrigues #include <stdlib.h>
24*a36eca08SCraig Rodrigues #include <string.h>
25*a36eca08SCraig Rodrigues #include <limits.h>
26*a36eca08SCraig Rodrigues #include "ohash.h"
27*a36eca08SCraig Rodrigues 
28*a36eca08SCraig Rodrigues struct _ohash_record {
29*a36eca08SCraig Rodrigues 	uint32_t	hv;
30*a36eca08SCraig Rodrigues 	const char	*p;
31*a36eca08SCraig Rodrigues };
32*a36eca08SCraig Rodrigues 
33*a36eca08SCraig Rodrigues #define DELETED		((const char *)h)
34*a36eca08SCraig Rodrigues #define NONE		(h->size)
35*a36eca08SCraig Rodrigues 
36*a36eca08SCraig Rodrigues /* Don't bother changing the hash table if the change is small enough.  */
37*a36eca08SCraig Rodrigues #define MINSIZE		(1UL << 4)
38*a36eca08SCraig Rodrigues #define MINDELETED	4
39*a36eca08SCraig Rodrigues 
40*a36eca08SCraig Rodrigues static void ohash_resize(struct ohash *);
41*a36eca08SCraig Rodrigues 
42*a36eca08SCraig Rodrigues 
43*a36eca08SCraig Rodrigues /* This handles the common case of variable length keys, where the
44*a36eca08SCraig Rodrigues  * key is stored at the end of the record.
45*a36eca08SCraig Rodrigues  */
46*a36eca08SCraig Rodrigues void *
47*a36eca08SCraig Rodrigues ohash_create_entry(struct ohash_info *i, const char *start, const char **end)
48*a36eca08SCraig Rodrigues {
49*a36eca08SCraig Rodrigues 	char *p;
50*a36eca08SCraig Rodrigues 
51*a36eca08SCraig Rodrigues 	if (!*end)
52*a36eca08SCraig Rodrigues 		*end = start + strlen(start);
53*a36eca08SCraig Rodrigues 	p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data);
54*a36eca08SCraig Rodrigues 	if (p) {
55*a36eca08SCraig Rodrigues 		memcpy(p+i->key_offset, start, *end-start);
56*a36eca08SCraig Rodrigues 		p[i->key_offset + (*end - start)] = '\0';
57*a36eca08SCraig Rodrigues 	}
58*a36eca08SCraig Rodrigues 	return (void *)p;
59*a36eca08SCraig Rodrigues }
60*a36eca08SCraig Rodrigues 
61*a36eca08SCraig Rodrigues /* hash_delete only frees the hash structure. Use hash_first/hash_next
62*a36eca08SCraig Rodrigues  * to free entries as well.  */
63*a36eca08SCraig Rodrigues void
64*a36eca08SCraig Rodrigues ohash_delete(struct ohash *h)
65*a36eca08SCraig Rodrigues {
66*a36eca08SCraig Rodrigues 	(h->info.free)(h->t, h->info.data);
67*a36eca08SCraig Rodrigues #ifndef NDEBUG
68*a36eca08SCraig Rodrigues 	h->t = NULL;
69*a36eca08SCraig Rodrigues #endif
70*a36eca08SCraig Rodrigues }
71*a36eca08SCraig Rodrigues 
72*a36eca08SCraig Rodrigues static void
73*a36eca08SCraig Rodrigues ohash_resize(struct ohash *h)
74*a36eca08SCraig Rodrigues {
75*a36eca08SCraig Rodrigues 	struct _ohash_record *n;
76*a36eca08SCraig Rodrigues 	size_t ns;
77*a36eca08SCraig Rodrigues 	unsigned int	j;
78*a36eca08SCraig Rodrigues 	unsigned int	i, incr;
79*a36eca08SCraig Rodrigues 
80*a36eca08SCraig Rodrigues 	if (4 * h->deleted < h->total) {
81*a36eca08SCraig Rodrigues 		if (h->size >= (UINT_MAX >> 1U))
82*a36eca08SCraig Rodrigues 			ns = UINT_MAX;
83*a36eca08SCraig Rodrigues 		else
84*a36eca08SCraig Rodrigues 			ns = h->size << 1U;
85*a36eca08SCraig Rodrigues 	} else if (3 * h->deleted > 2 * h->total)
86*a36eca08SCraig Rodrigues 		ns = h->size >> 1U;
87*a36eca08SCraig Rodrigues 	else
88*a36eca08SCraig Rodrigues 		ns = h->size;
89*a36eca08SCraig Rodrigues 	if (ns < MINSIZE)
90*a36eca08SCraig Rodrigues 		ns = MINSIZE;
91*a36eca08SCraig Rodrigues #ifdef STATS_HASH
92*a36eca08SCraig Rodrigues 	STAT_HASH_EXPAND++;
93*a36eca08SCraig Rodrigues 	STAT_HASH_SIZE += ns - h->size;
94*a36eca08SCraig Rodrigues #endif
95*a36eca08SCraig Rodrigues 
96*a36eca08SCraig Rodrigues 	n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data);
97*a36eca08SCraig Rodrigues 	if (!n)
98*a36eca08SCraig Rodrigues 		return;
99*a36eca08SCraig Rodrigues 
100*a36eca08SCraig Rodrigues 	for (j = 0; j < h->size; j++) {
101*a36eca08SCraig Rodrigues 		if (h->t[j].p != NULL && h->t[j].p != DELETED) {
102*a36eca08SCraig Rodrigues 			i = h->t[j].hv % ns;
103*a36eca08SCraig Rodrigues 			incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
104*a36eca08SCraig Rodrigues 			while (n[i].p != NULL) {
105*a36eca08SCraig Rodrigues 				i += incr;
106*a36eca08SCraig Rodrigues 				if (i >= ns)
107*a36eca08SCraig Rodrigues 					i -= ns;
108*a36eca08SCraig Rodrigues 			}
109*a36eca08SCraig Rodrigues 			n[i].hv = h->t[j].hv;
110*a36eca08SCraig Rodrigues 			n[i].p = h->t[j].p;
111*a36eca08SCraig Rodrigues 		}
112*a36eca08SCraig Rodrigues 	}
113*a36eca08SCraig Rodrigues 	(h->info.free)(h->t, h->info.data);
114*a36eca08SCraig Rodrigues 	h->t = n;
115*a36eca08SCraig Rodrigues 	h->size = ns;
116*a36eca08SCraig Rodrigues 	h->total -= h->deleted;
117*a36eca08SCraig Rodrigues 	h->deleted = 0;
118*a36eca08SCraig Rodrigues }
119*a36eca08SCraig Rodrigues 
120*a36eca08SCraig Rodrigues void *
121*a36eca08SCraig Rodrigues ohash_remove(struct ohash *h, unsigned int i)
122*a36eca08SCraig Rodrigues {
123*a36eca08SCraig Rodrigues 	void		*result = (void *)h->t[i].p;
124*a36eca08SCraig Rodrigues 
125*a36eca08SCraig Rodrigues 	if (result == NULL || result == DELETED)
126*a36eca08SCraig Rodrigues 		return NULL;
127*a36eca08SCraig Rodrigues 
128*a36eca08SCraig Rodrigues #ifdef STATS_HASH
129*a36eca08SCraig Rodrigues 	STAT_HASH_ENTRIES--;
130*a36eca08SCraig Rodrigues #endif
131*a36eca08SCraig Rodrigues 	h->t[i].p = DELETED;
132*a36eca08SCraig Rodrigues 	h->deleted++;
133*a36eca08SCraig Rodrigues 	if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
134*a36eca08SCraig Rodrigues 		ohash_resize(h);
135*a36eca08SCraig Rodrigues 	return result;
136*a36eca08SCraig Rodrigues }
137*a36eca08SCraig Rodrigues 
138*a36eca08SCraig Rodrigues void *
139*a36eca08SCraig Rodrigues ohash_find(struct ohash *h, unsigned int i)
140*a36eca08SCraig Rodrigues {
141*a36eca08SCraig Rodrigues 	if (h->t[i].p == DELETED)
142*a36eca08SCraig Rodrigues 		return NULL;
143*a36eca08SCraig Rodrigues 	else
144*a36eca08SCraig Rodrigues 		return (void *)h->t[i].p;
145*a36eca08SCraig Rodrigues }
146*a36eca08SCraig Rodrigues 
147*a36eca08SCraig Rodrigues void *
148*a36eca08SCraig Rodrigues ohash_insert(struct ohash *h, unsigned int i, void *p)
149*a36eca08SCraig Rodrigues {
150*a36eca08SCraig Rodrigues #ifdef STATS_HASH
151*a36eca08SCraig Rodrigues 	STAT_HASH_ENTRIES++;
152*a36eca08SCraig Rodrigues #endif
153*a36eca08SCraig Rodrigues 	if (h->t[i].p == DELETED) {
154*a36eca08SCraig Rodrigues 		h->deleted--;
155*a36eca08SCraig Rodrigues 		h->t[i].p = p;
156*a36eca08SCraig Rodrigues 	} else {
157*a36eca08SCraig Rodrigues 		h->t[i].p = p;
158*a36eca08SCraig Rodrigues 		/* Arbitrary resize boundary.  Tweak if not efficient enough.  */
159*a36eca08SCraig Rodrigues 		if (++h->total * 4 > h->size * 3)
160*a36eca08SCraig Rodrigues 			ohash_resize(h);
161*a36eca08SCraig Rodrigues 	}
162*a36eca08SCraig Rodrigues 	return p;
163*a36eca08SCraig Rodrigues }
164*a36eca08SCraig Rodrigues 
165*a36eca08SCraig Rodrigues unsigned int
166*a36eca08SCraig Rodrigues ohash_entries(struct ohash *h)
167*a36eca08SCraig Rodrigues {
168*a36eca08SCraig Rodrigues 	return h->total - h->deleted;
169*a36eca08SCraig Rodrigues }
170*a36eca08SCraig Rodrigues 
171*a36eca08SCraig Rodrigues void *
172*a36eca08SCraig Rodrigues ohash_first(struct ohash *h, unsigned int *pos)
173*a36eca08SCraig Rodrigues {
174*a36eca08SCraig Rodrigues 	*pos = 0;
175*a36eca08SCraig Rodrigues 	return ohash_next(h, pos);
176*a36eca08SCraig Rodrigues }
177*a36eca08SCraig Rodrigues 
178*a36eca08SCraig Rodrigues void *
179*a36eca08SCraig Rodrigues ohash_next(struct ohash *h, unsigned int *pos)
180*a36eca08SCraig Rodrigues {
181*a36eca08SCraig Rodrigues 	for (; *pos < h->size; (*pos)++)
182*a36eca08SCraig Rodrigues 		if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
183*a36eca08SCraig Rodrigues 			return (void *)h->t[(*pos)++].p;
184*a36eca08SCraig Rodrigues 	return NULL;
185*a36eca08SCraig Rodrigues }
186*a36eca08SCraig Rodrigues 
187*a36eca08SCraig Rodrigues void
188*a36eca08SCraig Rodrigues ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info)
189*a36eca08SCraig Rodrigues {
190*a36eca08SCraig Rodrigues 	h->size = 1UL << size;
191*a36eca08SCraig Rodrigues 	if (h->size < MINSIZE)
192*a36eca08SCraig Rodrigues 		h->size = MINSIZE;
193*a36eca08SCraig Rodrigues #ifdef STATS_HASH
194*a36eca08SCraig Rodrigues 	STAT_HASH_CREATION++;
195*a36eca08SCraig Rodrigues 	STAT_HASH_SIZE += h->size;
196*a36eca08SCraig Rodrigues #endif
197*a36eca08SCraig Rodrigues 	/* Copy info so that caller may free it.  */
198*a36eca08SCraig Rodrigues 	h->info.key_offset = info->key_offset;
199*a36eca08SCraig Rodrigues 	h->info.calloc = info->calloc;
200*a36eca08SCraig Rodrigues 	h->info.free = info->free;
201*a36eca08SCraig Rodrigues 	h->info.alloc = info->alloc;
202*a36eca08SCraig Rodrigues 	h->info.data = info->data;
203*a36eca08SCraig Rodrigues 	h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record),
204*a36eca08SCraig Rodrigues 		    h->info.data);
205*a36eca08SCraig Rodrigues 	h->total = h->deleted = 0;
206*a36eca08SCraig Rodrigues }
207*a36eca08SCraig Rodrigues 
208*a36eca08SCraig Rodrigues uint32_t
209*a36eca08SCraig Rodrigues ohash_interval(const char *s, const char **e)
210*a36eca08SCraig Rodrigues {
211*a36eca08SCraig Rodrigues 	uint32_t k;
212*a36eca08SCraig Rodrigues 
213*a36eca08SCraig Rodrigues 	if (!*e)
214*a36eca08SCraig Rodrigues 		*e = s + strlen(s);
215*a36eca08SCraig Rodrigues 	if (s == *e)
216*a36eca08SCraig Rodrigues 		k = 0;
217*a36eca08SCraig Rodrigues 	else
218*a36eca08SCraig Rodrigues 		k = *s++;
219*a36eca08SCraig Rodrigues 	while (s != *e)
220*a36eca08SCraig Rodrigues 		k =  ((k << 2) | (k >> 30)) ^ *s++;
221*a36eca08SCraig Rodrigues 	return k;
222*a36eca08SCraig Rodrigues }
223*a36eca08SCraig Rodrigues 
224*a36eca08SCraig Rodrigues unsigned int
225*a36eca08SCraig Rodrigues ohash_lookup_interval(struct ohash *h, const char *start, const char *end,
226*a36eca08SCraig Rodrigues     uint32_t hv)
227*a36eca08SCraig Rodrigues {
228*a36eca08SCraig Rodrigues 	unsigned int	i, incr;
229*a36eca08SCraig Rodrigues 	unsigned int	empty;
230*a36eca08SCraig Rodrigues 
231*a36eca08SCraig Rodrigues #ifdef STATS_HASH
232*a36eca08SCraig Rodrigues 	STAT_HASH_LOOKUP++;
233*a36eca08SCraig Rodrigues #endif
234*a36eca08SCraig Rodrigues 	empty = NONE;
235*a36eca08SCraig Rodrigues 	i = hv % h->size;
236*a36eca08SCraig Rodrigues 	incr = ((hv % (h->size-2)) & ~1) + 1;
237*a36eca08SCraig Rodrigues 	while (h->t[i].p != NULL) {
238*a36eca08SCraig Rodrigues #ifdef STATS_HASH
239*a36eca08SCraig Rodrigues 		STAT_HASH_LENGTH++;
240*a36eca08SCraig Rodrigues #endif
241*a36eca08SCraig Rodrigues 		if (h->t[i].p == DELETED) {
242*a36eca08SCraig Rodrigues 			if (empty == NONE)
243*a36eca08SCraig Rodrigues 				empty = i;
244*a36eca08SCraig Rodrigues 		} else if (h->t[i].hv == hv &&
245*a36eca08SCraig Rodrigues 		    strncmp(h->t[i].p+h->info.key_offset, start,
246*a36eca08SCraig Rodrigues 			end - start) == 0 &&
247*a36eca08SCraig Rodrigues 		    (h->t[i].p+h->info.key_offset)[end-start] == '\0') {
248*a36eca08SCraig Rodrigues 			if (empty != NONE) {
249*a36eca08SCraig Rodrigues 				h->t[empty].hv = hv;
250*a36eca08SCraig Rodrigues 				h->t[empty].p = h->t[i].p;
251*a36eca08SCraig Rodrigues 				h->t[i].p = DELETED;
252*a36eca08SCraig Rodrigues 				return empty;
253*a36eca08SCraig Rodrigues 			} else {
254*a36eca08SCraig Rodrigues #ifdef STATS_HASH
255*a36eca08SCraig Rodrigues 				STAT_HASH_POSITIVE++;
256*a36eca08SCraig Rodrigues #endif
257*a36eca08SCraig Rodrigues 				return i;
258*a36eca08SCraig Rodrigues 			}
259*a36eca08SCraig Rodrigues 		}
260*a36eca08SCraig Rodrigues 		i += incr;
261*a36eca08SCraig Rodrigues 		if (i >= h->size)
262*a36eca08SCraig Rodrigues 			i -= h->size;
263*a36eca08SCraig Rodrigues 	}
264*a36eca08SCraig Rodrigues 
265*a36eca08SCraig Rodrigues 	/* Found an empty position.  */
266*a36eca08SCraig Rodrigues 	if (empty != NONE)
267*a36eca08SCraig Rodrigues 		i = empty;
268*a36eca08SCraig Rodrigues 	h->t[i].hv = hv;
269*a36eca08SCraig Rodrigues 	return i;
270*a36eca08SCraig Rodrigues }
271*a36eca08SCraig Rodrigues 
272*a36eca08SCraig Rodrigues unsigned int
273*a36eca08SCraig Rodrigues ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv)
274*a36eca08SCraig Rodrigues {
275*a36eca08SCraig Rodrigues 	unsigned int	i, incr;
276*a36eca08SCraig Rodrigues 	unsigned int	empty;
277*a36eca08SCraig Rodrigues 
278*a36eca08SCraig Rodrigues #ifdef STATS_HASH
279*a36eca08SCraig Rodrigues 	STAT_HASH_LOOKUP++;
280*a36eca08SCraig Rodrigues #endif
281*a36eca08SCraig Rodrigues 	empty = NONE;
282*a36eca08SCraig Rodrigues 	i = hv % h->size;
283*a36eca08SCraig Rodrigues 	incr = ((hv % (h->size-2)) & ~1) + 1;
284*a36eca08SCraig Rodrigues 	while (h->t[i].p != NULL) {
285*a36eca08SCraig Rodrigues #ifdef STATS_HASH
286*a36eca08SCraig Rodrigues 		STAT_HASH_LENGTH++;
287*a36eca08SCraig Rodrigues #endif
288*a36eca08SCraig Rodrigues 		if (h->t[i].p == DELETED) {
289*a36eca08SCraig Rodrigues 			if (empty == NONE)
290*a36eca08SCraig Rodrigues 				empty = i;
291*a36eca08SCraig Rodrigues 		} else if (h->t[i].hv == hv &&
292*a36eca08SCraig Rodrigues 		    memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) {
293*a36eca08SCraig Rodrigues 			if (empty != NONE) {
294*a36eca08SCraig Rodrigues 				h->t[empty].hv = hv;
295*a36eca08SCraig Rodrigues 				h->t[empty].p = h->t[i].p;
296*a36eca08SCraig Rodrigues 				h->t[i].p = DELETED;
297*a36eca08SCraig Rodrigues 				return empty;
298*a36eca08SCraig Rodrigues 			} else {
299*a36eca08SCraig Rodrigues #ifdef STATS_HASH
300*a36eca08SCraig Rodrigues 				STAT_HASH_POSITIVE++;
301*a36eca08SCraig Rodrigues #endif
302*a36eca08SCraig Rodrigues 			}	return i;
303*a36eca08SCraig Rodrigues 		}
304*a36eca08SCraig Rodrigues 		i += incr;
305*a36eca08SCraig Rodrigues 		if (i >= h->size)
306*a36eca08SCraig Rodrigues 			i -= h->size;
307*a36eca08SCraig Rodrigues 	}
308*a36eca08SCraig Rodrigues 
309*a36eca08SCraig Rodrigues 	/* Found an empty position.  */
310*a36eca08SCraig Rodrigues 	if (empty != NONE)
311*a36eca08SCraig Rodrigues 		i = empty;
312*a36eca08SCraig Rodrigues 	h->t[i].hv = hv;
313*a36eca08SCraig Rodrigues 	return i;
314*a36eca08SCraig Rodrigues }
315*a36eca08SCraig Rodrigues 
316*a36eca08SCraig Rodrigues unsigned int
317*a36eca08SCraig Rodrigues ohash_qlookup(struct ohash *h, const char *s)
318*a36eca08SCraig Rodrigues {
319*a36eca08SCraig Rodrigues 	const char *e = NULL;
320*a36eca08SCraig Rodrigues 	return ohash_qlookupi(h, s, &e);
321*a36eca08SCraig Rodrigues }
322*a36eca08SCraig Rodrigues 
323*a36eca08SCraig Rodrigues unsigned int
324*a36eca08SCraig Rodrigues ohash_qlookupi(struct ohash *h, const char *s, const char **e)
325*a36eca08SCraig Rodrigues {
326*a36eca08SCraig Rodrigues 	uint32_t hv;
327*a36eca08SCraig Rodrigues 
328*a36eca08SCraig Rodrigues 	hv = ohash_interval(s, e);
329*a36eca08SCraig Rodrigues 	return ohash_lookup_interval(h, s, *e, hv);
330*a36eca08SCraig Rodrigues }
331