xref: /freebsd/lib/libopenbsd/ohash.c (revision 1d386b48a555f61cb7325543adbbb5c3f3407a66)
1*a36eca08SCraig Rodrigues /* $OpenBSD: src/lib/libutil/ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */
2*a36eca08SCraig Rodrigues 
3*a36eca08SCraig Rodrigues /* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
4*a36eca08SCraig Rodrigues  *
5*a36eca08SCraig Rodrigues  * Permission to use, copy, modify, and distribute this software for any
6*a36eca08SCraig Rodrigues  * purpose with or without fee is hereby granted, provided that the above
7*a36eca08SCraig Rodrigues  * copyright notice and this permission notice appear in all copies.
8*a36eca08SCraig Rodrigues  *
9*a36eca08SCraig Rodrigues  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*a36eca08SCraig Rodrigues  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*a36eca08SCraig Rodrigues  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*a36eca08SCraig Rodrigues  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*a36eca08SCraig Rodrigues  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*a36eca08SCraig Rodrigues  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*a36eca08SCraig Rodrigues  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*a36eca08SCraig Rodrigues  */
17*a36eca08SCraig Rodrigues 
18*a36eca08SCraig Rodrigues #include <sys/cdefs.h>
19*a36eca08SCraig Rodrigues #include <stddef.h>
20*a36eca08SCraig Rodrigues #include <stdint.h>
21*a36eca08SCraig Rodrigues #include <stdlib.h>
22*a36eca08SCraig Rodrigues #include <string.h>
23*a36eca08SCraig Rodrigues #include <limits.h>
24*a36eca08SCraig Rodrigues #include "ohash.h"
25*a36eca08SCraig Rodrigues 
26*a36eca08SCraig Rodrigues struct _ohash_record {
27*a36eca08SCraig Rodrigues 	uint32_t	hv;
28*a36eca08SCraig Rodrigues 	const char	*p;
29*a36eca08SCraig Rodrigues };
30*a36eca08SCraig Rodrigues 
31*a36eca08SCraig Rodrigues #define DELETED		((const char *)h)
32*a36eca08SCraig Rodrigues #define NONE		(h->size)
33*a36eca08SCraig Rodrigues 
34*a36eca08SCraig Rodrigues /* Don't bother changing the hash table if the change is small enough.  */
35*a36eca08SCraig Rodrigues #define MINSIZE		(1UL << 4)
36*a36eca08SCraig Rodrigues #define MINDELETED	4
37*a36eca08SCraig Rodrigues 
38*a36eca08SCraig Rodrigues static void ohash_resize(struct ohash *);
39*a36eca08SCraig Rodrigues 
40*a36eca08SCraig Rodrigues 
41*a36eca08SCraig Rodrigues /* This handles the common case of variable length keys, where the
42*a36eca08SCraig Rodrigues  * key is stored at the end of the record.
43*a36eca08SCraig Rodrigues  */
44*a36eca08SCraig Rodrigues void *
ohash_create_entry(struct ohash_info * i,const char * start,const char ** end)45*a36eca08SCraig Rodrigues ohash_create_entry(struct ohash_info *i, const char *start, const char **end)
46*a36eca08SCraig Rodrigues {
47*a36eca08SCraig Rodrigues 	char *p;
48*a36eca08SCraig Rodrigues 
49*a36eca08SCraig Rodrigues 	if (!*end)
50*a36eca08SCraig Rodrigues 		*end = start + strlen(start);
51*a36eca08SCraig Rodrigues 	p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data);
52*a36eca08SCraig Rodrigues 	if (p) {
53*a36eca08SCraig Rodrigues 		memcpy(p+i->key_offset, start, *end-start);
54*a36eca08SCraig Rodrigues 		p[i->key_offset + (*end - start)] = '\0';
55*a36eca08SCraig Rodrigues 	}
56*a36eca08SCraig Rodrigues 	return (void *)p;
57*a36eca08SCraig Rodrigues }
58*a36eca08SCraig Rodrigues 
59*a36eca08SCraig Rodrigues /* hash_delete only frees the hash structure. Use hash_first/hash_next
60*a36eca08SCraig Rodrigues  * to free entries as well.  */
61*a36eca08SCraig Rodrigues void
ohash_delete(struct ohash * h)62*a36eca08SCraig Rodrigues ohash_delete(struct ohash *h)
63*a36eca08SCraig Rodrigues {
64*a36eca08SCraig Rodrigues 	(h->info.free)(h->t, h->info.data);
65*a36eca08SCraig Rodrigues #ifndef NDEBUG
66*a36eca08SCraig Rodrigues 	h->t = NULL;
67*a36eca08SCraig Rodrigues #endif
68*a36eca08SCraig Rodrigues }
69*a36eca08SCraig Rodrigues 
70*a36eca08SCraig Rodrigues static void
ohash_resize(struct ohash * h)71*a36eca08SCraig Rodrigues ohash_resize(struct ohash *h)
72*a36eca08SCraig Rodrigues {
73*a36eca08SCraig Rodrigues 	struct _ohash_record *n;
74*a36eca08SCraig Rodrigues 	size_t ns;
75*a36eca08SCraig Rodrigues 	unsigned int	j;
76*a36eca08SCraig Rodrigues 	unsigned int	i, incr;
77*a36eca08SCraig Rodrigues 
78*a36eca08SCraig Rodrigues 	if (4 * h->deleted < h->total) {
79*a36eca08SCraig Rodrigues 		if (h->size >= (UINT_MAX >> 1U))
80*a36eca08SCraig Rodrigues 			ns = UINT_MAX;
81*a36eca08SCraig Rodrigues 		else
82*a36eca08SCraig Rodrigues 			ns = h->size << 1U;
83*a36eca08SCraig Rodrigues 	} else if (3 * h->deleted > 2 * h->total)
84*a36eca08SCraig Rodrigues 		ns = h->size >> 1U;
85*a36eca08SCraig Rodrigues 	else
86*a36eca08SCraig Rodrigues 		ns = h->size;
87*a36eca08SCraig Rodrigues 	if (ns < MINSIZE)
88*a36eca08SCraig Rodrigues 		ns = MINSIZE;
89*a36eca08SCraig Rodrigues #ifdef STATS_HASH
90*a36eca08SCraig Rodrigues 	STAT_HASH_EXPAND++;
91*a36eca08SCraig Rodrigues 	STAT_HASH_SIZE += ns - h->size;
92*a36eca08SCraig Rodrigues #endif
93*a36eca08SCraig Rodrigues 
94*a36eca08SCraig Rodrigues 	n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data);
95*a36eca08SCraig Rodrigues 	if (!n)
96*a36eca08SCraig Rodrigues 		return;
97*a36eca08SCraig Rodrigues 
98*a36eca08SCraig Rodrigues 	for (j = 0; j < h->size; j++) {
99*a36eca08SCraig Rodrigues 		if (h->t[j].p != NULL && h->t[j].p != DELETED) {
100*a36eca08SCraig Rodrigues 			i = h->t[j].hv % ns;
101*a36eca08SCraig Rodrigues 			incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
102*a36eca08SCraig Rodrigues 			while (n[i].p != NULL) {
103*a36eca08SCraig Rodrigues 				i += incr;
104*a36eca08SCraig Rodrigues 				if (i >= ns)
105*a36eca08SCraig Rodrigues 					i -= ns;
106*a36eca08SCraig Rodrigues 			}
107*a36eca08SCraig Rodrigues 			n[i].hv = h->t[j].hv;
108*a36eca08SCraig Rodrigues 			n[i].p = h->t[j].p;
109*a36eca08SCraig Rodrigues 		}
110*a36eca08SCraig Rodrigues 	}
111*a36eca08SCraig Rodrigues 	(h->info.free)(h->t, h->info.data);
112*a36eca08SCraig Rodrigues 	h->t = n;
113*a36eca08SCraig Rodrigues 	h->size = ns;
114*a36eca08SCraig Rodrigues 	h->total -= h->deleted;
115*a36eca08SCraig Rodrigues 	h->deleted = 0;
116*a36eca08SCraig Rodrigues }
117*a36eca08SCraig Rodrigues 
118*a36eca08SCraig Rodrigues void *
ohash_remove(struct ohash * h,unsigned int i)119*a36eca08SCraig Rodrigues ohash_remove(struct ohash *h, unsigned int i)
120*a36eca08SCraig Rodrigues {
121*a36eca08SCraig Rodrigues 	void		*result = (void *)h->t[i].p;
122*a36eca08SCraig Rodrigues 
123*a36eca08SCraig Rodrigues 	if (result == NULL || result == DELETED)
124*a36eca08SCraig Rodrigues 		return NULL;
125*a36eca08SCraig Rodrigues 
126*a36eca08SCraig Rodrigues #ifdef STATS_HASH
127*a36eca08SCraig Rodrigues 	STAT_HASH_ENTRIES--;
128*a36eca08SCraig Rodrigues #endif
129*a36eca08SCraig Rodrigues 	h->t[i].p = DELETED;
130*a36eca08SCraig Rodrigues 	h->deleted++;
131*a36eca08SCraig Rodrigues 	if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
132*a36eca08SCraig Rodrigues 		ohash_resize(h);
133*a36eca08SCraig Rodrigues 	return result;
134*a36eca08SCraig Rodrigues }
135*a36eca08SCraig Rodrigues 
136*a36eca08SCraig Rodrigues void *
ohash_find(struct ohash * h,unsigned int i)137*a36eca08SCraig Rodrigues ohash_find(struct ohash *h, unsigned int i)
138*a36eca08SCraig Rodrigues {
139*a36eca08SCraig Rodrigues 	if (h->t[i].p == DELETED)
140*a36eca08SCraig Rodrigues 		return NULL;
141*a36eca08SCraig Rodrigues 	else
142*a36eca08SCraig Rodrigues 		return (void *)h->t[i].p;
143*a36eca08SCraig Rodrigues }
144*a36eca08SCraig Rodrigues 
145*a36eca08SCraig Rodrigues void *
ohash_insert(struct ohash * h,unsigned int i,void * p)146*a36eca08SCraig Rodrigues ohash_insert(struct ohash *h, unsigned int i, void *p)
147*a36eca08SCraig Rodrigues {
148*a36eca08SCraig Rodrigues #ifdef STATS_HASH
149*a36eca08SCraig Rodrigues 	STAT_HASH_ENTRIES++;
150*a36eca08SCraig Rodrigues #endif
151*a36eca08SCraig Rodrigues 	if (h->t[i].p == DELETED) {
152*a36eca08SCraig Rodrigues 		h->deleted--;
153*a36eca08SCraig Rodrigues 		h->t[i].p = p;
154*a36eca08SCraig Rodrigues 	} else {
155*a36eca08SCraig Rodrigues 		h->t[i].p = p;
156*a36eca08SCraig Rodrigues 		/* Arbitrary resize boundary.  Tweak if not efficient enough.  */
157*a36eca08SCraig Rodrigues 		if (++h->total * 4 > h->size * 3)
158*a36eca08SCraig Rodrigues 			ohash_resize(h);
159*a36eca08SCraig Rodrigues 	}
160*a36eca08SCraig Rodrigues 	return p;
161*a36eca08SCraig Rodrigues }
162*a36eca08SCraig Rodrigues 
163*a36eca08SCraig Rodrigues unsigned int
ohash_entries(struct ohash * h)164*a36eca08SCraig Rodrigues ohash_entries(struct ohash *h)
165*a36eca08SCraig Rodrigues {
166*a36eca08SCraig Rodrigues 	return h->total - h->deleted;
167*a36eca08SCraig Rodrigues }
168*a36eca08SCraig Rodrigues 
169*a36eca08SCraig Rodrigues void *
ohash_first(struct ohash * h,unsigned int * pos)170*a36eca08SCraig Rodrigues ohash_first(struct ohash *h, unsigned int *pos)
171*a36eca08SCraig Rodrigues {
172*a36eca08SCraig Rodrigues 	*pos = 0;
173*a36eca08SCraig Rodrigues 	return ohash_next(h, pos);
174*a36eca08SCraig Rodrigues }
175*a36eca08SCraig Rodrigues 
176*a36eca08SCraig Rodrigues void *
ohash_next(struct ohash * h,unsigned int * pos)177*a36eca08SCraig Rodrigues ohash_next(struct ohash *h, unsigned int *pos)
178*a36eca08SCraig Rodrigues {
179*a36eca08SCraig Rodrigues 	for (; *pos < h->size; (*pos)++)
180*a36eca08SCraig Rodrigues 		if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
181*a36eca08SCraig Rodrigues 			return (void *)h->t[(*pos)++].p;
182*a36eca08SCraig Rodrigues 	return NULL;
183*a36eca08SCraig Rodrigues }
184*a36eca08SCraig Rodrigues 
185*a36eca08SCraig Rodrigues void
ohash_init(struct ohash * h,unsigned int size,struct ohash_info * info)186*a36eca08SCraig Rodrigues ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info)
187*a36eca08SCraig Rodrigues {
188*a36eca08SCraig Rodrigues 	h->size = 1UL << size;
189*a36eca08SCraig Rodrigues 	if (h->size < MINSIZE)
190*a36eca08SCraig Rodrigues 		h->size = MINSIZE;
191*a36eca08SCraig Rodrigues #ifdef STATS_HASH
192*a36eca08SCraig Rodrigues 	STAT_HASH_CREATION++;
193*a36eca08SCraig Rodrigues 	STAT_HASH_SIZE += h->size;
194*a36eca08SCraig Rodrigues #endif
195*a36eca08SCraig Rodrigues 	/* Copy info so that caller may free it.  */
196*a36eca08SCraig Rodrigues 	h->info.key_offset = info->key_offset;
197*a36eca08SCraig Rodrigues 	h->info.calloc = info->calloc;
198*a36eca08SCraig Rodrigues 	h->info.free = info->free;
199*a36eca08SCraig Rodrigues 	h->info.alloc = info->alloc;
200*a36eca08SCraig Rodrigues 	h->info.data = info->data;
201*a36eca08SCraig Rodrigues 	h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record),
202*a36eca08SCraig Rodrigues 		    h->info.data);
203*a36eca08SCraig Rodrigues 	h->total = h->deleted = 0;
204*a36eca08SCraig Rodrigues }
205*a36eca08SCraig Rodrigues 
206*a36eca08SCraig Rodrigues uint32_t
ohash_interval(const char * s,const char ** e)207*a36eca08SCraig Rodrigues ohash_interval(const char *s, const char **e)
208*a36eca08SCraig Rodrigues {
209*a36eca08SCraig Rodrigues 	uint32_t k;
210*a36eca08SCraig Rodrigues 
211*a36eca08SCraig Rodrigues 	if (!*e)
212*a36eca08SCraig Rodrigues 		*e = s + strlen(s);
213*a36eca08SCraig Rodrigues 	if (s == *e)
214*a36eca08SCraig Rodrigues 		k = 0;
215*a36eca08SCraig Rodrigues 	else
216*a36eca08SCraig Rodrigues 		k = *s++;
217*a36eca08SCraig Rodrigues 	while (s != *e)
218*a36eca08SCraig Rodrigues 		k =  ((k << 2) | (k >> 30)) ^ *s++;
219*a36eca08SCraig Rodrigues 	return k;
220*a36eca08SCraig Rodrigues }
221*a36eca08SCraig Rodrigues 
222*a36eca08SCraig Rodrigues unsigned int
ohash_lookup_interval(struct ohash * h,const char * start,const char * end,uint32_t hv)223*a36eca08SCraig Rodrigues ohash_lookup_interval(struct ohash *h, const char *start, const char *end,
224*a36eca08SCraig Rodrigues     uint32_t hv)
225*a36eca08SCraig Rodrigues {
226*a36eca08SCraig Rodrigues 	unsigned int	i, incr;
227*a36eca08SCraig Rodrigues 	unsigned int	empty;
228*a36eca08SCraig Rodrigues 
229*a36eca08SCraig Rodrigues #ifdef STATS_HASH
230*a36eca08SCraig Rodrigues 	STAT_HASH_LOOKUP++;
231*a36eca08SCraig Rodrigues #endif
232*a36eca08SCraig Rodrigues 	empty = NONE;
233*a36eca08SCraig Rodrigues 	i = hv % h->size;
234*a36eca08SCraig Rodrigues 	incr = ((hv % (h->size-2)) & ~1) + 1;
235*a36eca08SCraig Rodrigues 	while (h->t[i].p != NULL) {
236*a36eca08SCraig Rodrigues #ifdef STATS_HASH
237*a36eca08SCraig Rodrigues 		STAT_HASH_LENGTH++;
238*a36eca08SCraig Rodrigues #endif
239*a36eca08SCraig Rodrigues 		if (h->t[i].p == DELETED) {
240*a36eca08SCraig Rodrigues 			if (empty == NONE)
241*a36eca08SCraig Rodrigues 				empty = i;
242*a36eca08SCraig Rodrigues 		} else if (h->t[i].hv == hv &&
243*a36eca08SCraig Rodrigues 		    strncmp(h->t[i].p+h->info.key_offset, start,
244*a36eca08SCraig Rodrigues 			end - start) == 0 &&
245*a36eca08SCraig Rodrigues 		    (h->t[i].p+h->info.key_offset)[end-start] == '\0') {
246*a36eca08SCraig Rodrigues 			if (empty != NONE) {
247*a36eca08SCraig Rodrigues 				h->t[empty].hv = hv;
248*a36eca08SCraig Rodrigues 				h->t[empty].p = h->t[i].p;
249*a36eca08SCraig Rodrigues 				h->t[i].p = DELETED;
250*a36eca08SCraig Rodrigues 				return empty;
251*a36eca08SCraig Rodrigues 			} else {
252*a36eca08SCraig Rodrigues #ifdef STATS_HASH
253*a36eca08SCraig Rodrigues 				STAT_HASH_POSITIVE++;
254*a36eca08SCraig Rodrigues #endif
255*a36eca08SCraig Rodrigues 				return i;
256*a36eca08SCraig Rodrigues 			}
257*a36eca08SCraig Rodrigues 		}
258*a36eca08SCraig Rodrigues 		i += incr;
259*a36eca08SCraig Rodrigues 		if (i >= h->size)
260*a36eca08SCraig Rodrigues 			i -= h->size;
261*a36eca08SCraig Rodrigues 	}
262*a36eca08SCraig Rodrigues 
263*a36eca08SCraig Rodrigues 	/* Found an empty position.  */
264*a36eca08SCraig Rodrigues 	if (empty != NONE)
265*a36eca08SCraig Rodrigues 		i = empty;
266*a36eca08SCraig Rodrigues 	h->t[i].hv = hv;
267*a36eca08SCraig Rodrigues 	return i;
268*a36eca08SCraig Rodrigues }
269*a36eca08SCraig Rodrigues 
270*a36eca08SCraig Rodrigues unsigned int
ohash_lookup_memory(struct ohash * h,const char * k,size_t size,uint32_t hv)271*a36eca08SCraig Rodrigues ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv)
272*a36eca08SCraig Rodrigues {
273*a36eca08SCraig Rodrigues 	unsigned int	i, incr;
274*a36eca08SCraig Rodrigues 	unsigned int	empty;
275*a36eca08SCraig Rodrigues 
276*a36eca08SCraig Rodrigues #ifdef STATS_HASH
277*a36eca08SCraig Rodrigues 	STAT_HASH_LOOKUP++;
278*a36eca08SCraig Rodrigues #endif
279*a36eca08SCraig Rodrigues 	empty = NONE;
280*a36eca08SCraig Rodrigues 	i = hv % h->size;
281*a36eca08SCraig Rodrigues 	incr = ((hv % (h->size-2)) & ~1) + 1;
282*a36eca08SCraig Rodrigues 	while (h->t[i].p != NULL) {
283*a36eca08SCraig Rodrigues #ifdef STATS_HASH
284*a36eca08SCraig Rodrigues 		STAT_HASH_LENGTH++;
285*a36eca08SCraig Rodrigues #endif
286*a36eca08SCraig Rodrigues 		if (h->t[i].p == DELETED) {
287*a36eca08SCraig Rodrigues 			if (empty == NONE)
288*a36eca08SCraig Rodrigues 				empty = i;
289*a36eca08SCraig Rodrigues 		} else if (h->t[i].hv == hv &&
290*a36eca08SCraig Rodrigues 		    memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) {
291*a36eca08SCraig Rodrigues 			if (empty != NONE) {
292*a36eca08SCraig Rodrigues 				h->t[empty].hv = hv;
293*a36eca08SCraig Rodrigues 				h->t[empty].p = h->t[i].p;
294*a36eca08SCraig Rodrigues 				h->t[i].p = DELETED;
295*a36eca08SCraig Rodrigues 				return empty;
296*a36eca08SCraig Rodrigues 			} else {
297*a36eca08SCraig Rodrigues #ifdef STATS_HASH
298*a36eca08SCraig Rodrigues 				STAT_HASH_POSITIVE++;
299*a36eca08SCraig Rodrigues #endif
300*a36eca08SCraig Rodrigues 			}	return i;
301*a36eca08SCraig Rodrigues 		}
302*a36eca08SCraig Rodrigues 		i += incr;
303*a36eca08SCraig Rodrigues 		if (i >= h->size)
304*a36eca08SCraig Rodrigues 			i -= h->size;
305*a36eca08SCraig Rodrigues 	}
306*a36eca08SCraig Rodrigues 
307*a36eca08SCraig Rodrigues 	/* Found an empty position.  */
308*a36eca08SCraig Rodrigues 	if (empty != NONE)
309*a36eca08SCraig Rodrigues 		i = empty;
310*a36eca08SCraig Rodrigues 	h->t[i].hv = hv;
311*a36eca08SCraig Rodrigues 	return i;
312*a36eca08SCraig Rodrigues }
313*a36eca08SCraig Rodrigues 
314*a36eca08SCraig Rodrigues unsigned int
ohash_qlookup(struct ohash * h,const char * s)315*a36eca08SCraig Rodrigues ohash_qlookup(struct ohash *h, const char *s)
316*a36eca08SCraig Rodrigues {
317*a36eca08SCraig Rodrigues 	const char *e = NULL;
318*a36eca08SCraig Rodrigues 	return ohash_qlookupi(h, s, &e);
319*a36eca08SCraig Rodrigues }
320*a36eca08SCraig Rodrigues 
321*a36eca08SCraig Rodrigues unsigned int
ohash_qlookupi(struct ohash * h,const char * s,const char ** e)322*a36eca08SCraig Rodrigues ohash_qlookupi(struct ohash *h, const char *s, const char **e)
323*a36eca08SCraig Rodrigues {
324*a36eca08SCraig Rodrigues 	uint32_t hv;
325*a36eca08SCraig Rodrigues 
326*a36eca08SCraig Rodrigues 	hv = ohash_interval(s, e);
327*a36eca08SCraig Rodrigues 	return ohash_lookup_interval(h, s, *e, hv);
328*a36eca08SCraig Rodrigues }
329