1*a36eca08SCraig Rodrigues /* $OpenBSD: src/lib/libutil/ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */
2*a36eca08SCraig Rodrigues
3*a36eca08SCraig Rodrigues /* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
4*a36eca08SCraig Rodrigues *
5*a36eca08SCraig Rodrigues * Permission to use, copy, modify, and distribute this software for any
6*a36eca08SCraig Rodrigues * purpose with or without fee is hereby granted, provided that the above
7*a36eca08SCraig Rodrigues * copyright notice and this permission notice appear in all copies.
8*a36eca08SCraig Rodrigues *
9*a36eca08SCraig Rodrigues * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*a36eca08SCraig Rodrigues * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*a36eca08SCraig Rodrigues * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*a36eca08SCraig Rodrigues * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*a36eca08SCraig Rodrigues * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*a36eca08SCraig Rodrigues * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*a36eca08SCraig Rodrigues * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*a36eca08SCraig Rodrigues */
17*a36eca08SCraig Rodrigues
18*a36eca08SCraig Rodrigues #include <sys/cdefs.h>
19*a36eca08SCraig Rodrigues #include <stddef.h>
20*a36eca08SCraig Rodrigues #include <stdint.h>
21*a36eca08SCraig Rodrigues #include <stdlib.h>
22*a36eca08SCraig Rodrigues #include <string.h>
23*a36eca08SCraig Rodrigues #include <limits.h>
24*a36eca08SCraig Rodrigues #include "ohash.h"
25*a36eca08SCraig Rodrigues
26*a36eca08SCraig Rodrigues struct _ohash_record {
27*a36eca08SCraig Rodrigues uint32_t hv;
28*a36eca08SCraig Rodrigues const char *p;
29*a36eca08SCraig Rodrigues };
30*a36eca08SCraig Rodrigues
31*a36eca08SCraig Rodrigues #define DELETED ((const char *)h)
32*a36eca08SCraig Rodrigues #define NONE (h->size)
33*a36eca08SCraig Rodrigues
34*a36eca08SCraig Rodrigues /* Don't bother changing the hash table if the change is small enough. */
35*a36eca08SCraig Rodrigues #define MINSIZE (1UL << 4)
36*a36eca08SCraig Rodrigues #define MINDELETED 4
37*a36eca08SCraig Rodrigues
38*a36eca08SCraig Rodrigues static void ohash_resize(struct ohash *);
39*a36eca08SCraig Rodrigues
40*a36eca08SCraig Rodrigues
41*a36eca08SCraig Rodrigues /* This handles the common case of variable length keys, where the
42*a36eca08SCraig Rodrigues * key is stored at the end of the record.
43*a36eca08SCraig Rodrigues */
44*a36eca08SCraig Rodrigues void *
ohash_create_entry(struct ohash_info * i,const char * start,const char ** end)45*a36eca08SCraig Rodrigues ohash_create_entry(struct ohash_info *i, const char *start, const char **end)
46*a36eca08SCraig Rodrigues {
47*a36eca08SCraig Rodrigues char *p;
48*a36eca08SCraig Rodrigues
49*a36eca08SCraig Rodrigues if (!*end)
50*a36eca08SCraig Rodrigues *end = start + strlen(start);
51*a36eca08SCraig Rodrigues p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data);
52*a36eca08SCraig Rodrigues if (p) {
53*a36eca08SCraig Rodrigues memcpy(p+i->key_offset, start, *end-start);
54*a36eca08SCraig Rodrigues p[i->key_offset + (*end - start)] = '\0';
55*a36eca08SCraig Rodrigues }
56*a36eca08SCraig Rodrigues return (void *)p;
57*a36eca08SCraig Rodrigues }
58*a36eca08SCraig Rodrigues
59*a36eca08SCraig Rodrigues /* hash_delete only frees the hash structure. Use hash_first/hash_next
60*a36eca08SCraig Rodrigues * to free entries as well. */
61*a36eca08SCraig Rodrigues void
ohash_delete(struct ohash * h)62*a36eca08SCraig Rodrigues ohash_delete(struct ohash *h)
63*a36eca08SCraig Rodrigues {
64*a36eca08SCraig Rodrigues (h->info.free)(h->t, h->info.data);
65*a36eca08SCraig Rodrigues #ifndef NDEBUG
66*a36eca08SCraig Rodrigues h->t = NULL;
67*a36eca08SCraig Rodrigues #endif
68*a36eca08SCraig Rodrigues }
69*a36eca08SCraig Rodrigues
70*a36eca08SCraig Rodrigues static void
ohash_resize(struct ohash * h)71*a36eca08SCraig Rodrigues ohash_resize(struct ohash *h)
72*a36eca08SCraig Rodrigues {
73*a36eca08SCraig Rodrigues struct _ohash_record *n;
74*a36eca08SCraig Rodrigues size_t ns;
75*a36eca08SCraig Rodrigues unsigned int j;
76*a36eca08SCraig Rodrigues unsigned int i, incr;
77*a36eca08SCraig Rodrigues
78*a36eca08SCraig Rodrigues if (4 * h->deleted < h->total) {
79*a36eca08SCraig Rodrigues if (h->size >= (UINT_MAX >> 1U))
80*a36eca08SCraig Rodrigues ns = UINT_MAX;
81*a36eca08SCraig Rodrigues else
82*a36eca08SCraig Rodrigues ns = h->size << 1U;
83*a36eca08SCraig Rodrigues } else if (3 * h->deleted > 2 * h->total)
84*a36eca08SCraig Rodrigues ns = h->size >> 1U;
85*a36eca08SCraig Rodrigues else
86*a36eca08SCraig Rodrigues ns = h->size;
87*a36eca08SCraig Rodrigues if (ns < MINSIZE)
88*a36eca08SCraig Rodrigues ns = MINSIZE;
89*a36eca08SCraig Rodrigues #ifdef STATS_HASH
90*a36eca08SCraig Rodrigues STAT_HASH_EXPAND++;
91*a36eca08SCraig Rodrigues STAT_HASH_SIZE += ns - h->size;
92*a36eca08SCraig Rodrigues #endif
93*a36eca08SCraig Rodrigues
94*a36eca08SCraig Rodrigues n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data);
95*a36eca08SCraig Rodrigues if (!n)
96*a36eca08SCraig Rodrigues return;
97*a36eca08SCraig Rodrigues
98*a36eca08SCraig Rodrigues for (j = 0; j < h->size; j++) {
99*a36eca08SCraig Rodrigues if (h->t[j].p != NULL && h->t[j].p != DELETED) {
100*a36eca08SCraig Rodrigues i = h->t[j].hv % ns;
101*a36eca08SCraig Rodrigues incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
102*a36eca08SCraig Rodrigues while (n[i].p != NULL) {
103*a36eca08SCraig Rodrigues i += incr;
104*a36eca08SCraig Rodrigues if (i >= ns)
105*a36eca08SCraig Rodrigues i -= ns;
106*a36eca08SCraig Rodrigues }
107*a36eca08SCraig Rodrigues n[i].hv = h->t[j].hv;
108*a36eca08SCraig Rodrigues n[i].p = h->t[j].p;
109*a36eca08SCraig Rodrigues }
110*a36eca08SCraig Rodrigues }
111*a36eca08SCraig Rodrigues (h->info.free)(h->t, h->info.data);
112*a36eca08SCraig Rodrigues h->t = n;
113*a36eca08SCraig Rodrigues h->size = ns;
114*a36eca08SCraig Rodrigues h->total -= h->deleted;
115*a36eca08SCraig Rodrigues h->deleted = 0;
116*a36eca08SCraig Rodrigues }
117*a36eca08SCraig Rodrigues
118*a36eca08SCraig Rodrigues void *
ohash_remove(struct ohash * h,unsigned int i)119*a36eca08SCraig Rodrigues ohash_remove(struct ohash *h, unsigned int i)
120*a36eca08SCraig Rodrigues {
121*a36eca08SCraig Rodrigues void *result = (void *)h->t[i].p;
122*a36eca08SCraig Rodrigues
123*a36eca08SCraig Rodrigues if (result == NULL || result == DELETED)
124*a36eca08SCraig Rodrigues return NULL;
125*a36eca08SCraig Rodrigues
126*a36eca08SCraig Rodrigues #ifdef STATS_HASH
127*a36eca08SCraig Rodrigues STAT_HASH_ENTRIES--;
128*a36eca08SCraig Rodrigues #endif
129*a36eca08SCraig Rodrigues h->t[i].p = DELETED;
130*a36eca08SCraig Rodrigues h->deleted++;
131*a36eca08SCraig Rodrigues if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
132*a36eca08SCraig Rodrigues ohash_resize(h);
133*a36eca08SCraig Rodrigues return result;
134*a36eca08SCraig Rodrigues }
135*a36eca08SCraig Rodrigues
136*a36eca08SCraig Rodrigues void *
ohash_find(struct ohash * h,unsigned int i)137*a36eca08SCraig Rodrigues ohash_find(struct ohash *h, unsigned int i)
138*a36eca08SCraig Rodrigues {
139*a36eca08SCraig Rodrigues if (h->t[i].p == DELETED)
140*a36eca08SCraig Rodrigues return NULL;
141*a36eca08SCraig Rodrigues else
142*a36eca08SCraig Rodrigues return (void *)h->t[i].p;
143*a36eca08SCraig Rodrigues }
144*a36eca08SCraig Rodrigues
145*a36eca08SCraig Rodrigues void *
ohash_insert(struct ohash * h,unsigned int i,void * p)146*a36eca08SCraig Rodrigues ohash_insert(struct ohash *h, unsigned int i, void *p)
147*a36eca08SCraig Rodrigues {
148*a36eca08SCraig Rodrigues #ifdef STATS_HASH
149*a36eca08SCraig Rodrigues STAT_HASH_ENTRIES++;
150*a36eca08SCraig Rodrigues #endif
151*a36eca08SCraig Rodrigues if (h->t[i].p == DELETED) {
152*a36eca08SCraig Rodrigues h->deleted--;
153*a36eca08SCraig Rodrigues h->t[i].p = p;
154*a36eca08SCraig Rodrigues } else {
155*a36eca08SCraig Rodrigues h->t[i].p = p;
156*a36eca08SCraig Rodrigues /* Arbitrary resize boundary. Tweak if not efficient enough. */
157*a36eca08SCraig Rodrigues if (++h->total * 4 > h->size * 3)
158*a36eca08SCraig Rodrigues ohash_resize(h);
159*a36eca08SCraig Rodrigues }
160*a36eca08SCraig Rodrigues return p;
161*a36eca08SCraig Rodrigues }
162*a36eca08SCraig Rodrigues
163*a36eca08SCraig Rodrigues unsigned int
ohash_entries(struct ohash * h)164*a36eca08SCraig Rodrigues ohash_entries(struct ohash *h)
165*a36eca08SCraig Rodrigues {
166*a36eca08SCraig Rodrigues return h->total - h->deleted;
167*a36eca08SCraig Rodrigues }
168*a36eca08SCraig Rodrigues
169*a36eca08SCraig Rodrigues void *
ohash_first(struct ohash * h,unsigned int * pos)170*a36eca08SCraig Rodrigues ohash_first(struct ohash *h, unsigned int *pos)
171*a36eca08SCraig Rodrigues {
172*a36eca08SCraig Rodrigues *pos = 0;
173*a36eca08SCraig Rodrigues return ohash_next(h, pos);
174*a36eca08SCraig Rodrigues }
175*a36eca08SCraig Rodrigues
176*a36eca08SCraig Rodrigues void *
ohash_next(struct ohash * h,unsigned int * pos)177*a36eca08SCraig Rodrigues ohash_next(struct ohash *h, unsigned int *pos)
178*a36eca08SCraig Rodrigues {
179*a36eca08SCraig Rodrigues for (; *pos < h->size; (*pos)++)
180*a36eca08SCraig Rodrigues if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
181*a36eca08SCraig Rodrigues return (void *)h->t[(*pos)++].p;
182*a36eca08SCraig Rodrigues return NULL;
183*a36eca08SCraig Rodrigues }
184*a36eca08SCraig Rodrigues
185*a36eca08SCraig Rodrigues void
ohash_init(struct ohash * h,unsigned int size,struct ohash_info * info)186*a36eca08SCraig Rodrigues ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info)
187*a36eca08SCraig Rodrigues {
188*a36eca08SCraig Rodrigues h->size = 1UL << size;
189*a36eca08SCraig Rodrigues if (h->size < MINSIZE)
190*a36eca08SCraig Rodrigues h->size = MINSIZE;
191*a36eca08SCraig Rodrigues #ifdef STATS_HASH
192*a36eca08SCraig Rodrigues STAT_HASH_CREATION++;
193*a36eca08SCraig Rodrigues STAT_HASH_SIZE += h->size;
194*a36eca08SCraig Rodrigues #endif
195*a36eca08SCraig Rodrigues /* Copy info so that caller may free it. */
196*a36eca08SCraig Rodrigues h->info.key_offset = info->key_offset;
197*a36eca08SCraig Rodrigues h->info.calloc = info->calloc;
198*a36eca08SCraig Rodrigues h->info.free = info->free;
199*a36eca08SCraig Rodrigues h->info.alloc = info->alloc;
200*a36eca08SCraig Rodrigues h->info.data = info->data;
201*a36eca08SCraig Rodrigues h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record),
202*a36eca08SCraig Rodrigues h->info.data);
203*a36eca08SCraig Rodrigues h->total = h->deleted = 0;
204*a36eca08SCraig Rodrigues }
205*a36eca08SCraig Rodrigues
206*a36eca08SCraig Rodrigues uint32_t
ohash_interval(const char * s,const char ** e)207*a36eca08SCraig Rodrigues ohash_interval(const char *s, const char **e)
208*a36eca08SCraig Rodrigues {
209*a36eca08SCraig Rodrigues uint32_t k;
210*a36eca08SCraig Rodrigues
211*a36eca08SCraig Rodrigues if (!*e)
212*a36eca08SCraig Rodrigues *e = s + strlen(s);
213*a36eca08SCraig Rodrigues if (s == *e)
214*a36eca08SCraig Rodrigues k = 0;
215*a36eca08SCraig Rodrigues else
216*a36eca08SCraig Rodrigues k = *s++;
217*a36eca08SCraig Rodrigues while (s != *e)
218*a36eca08SCraig Rodrigues k = ((k << 2) | (k >> 30)) ^ *s++;
219*a36eca08SCraig Rodrigues return k;
220*a36eca08SCraig Rodrigues }
221*a36eca08SCraig Rodrigues
222*a36eca08SCraig Rodrigues unsigned int
ohash_lookup_interval(struct ohash * h,const char * start,const char * end,uint32_t hv)223*a36eca08SCraig Rodrigues ohash_lookup_interval(struct ohash *h, const char *start, const char *end,
224*a36eca08SCraig Rodrigues uint32_t hv)
225*a36eca08SCraig Rodrigues {
226*a36eca08SCraig Rodrigues unsigned int i, incr;
227*a36eca08SCraig Rodrigues unsigned int empty;
228*a36eca08SCraig Rodrigues
229*a36eca08SCraig Rodrigues #ifdef STATS_HASH
230*a36eca08SCraig Rodrigues STAT_HASH_LOOKUP++;
231*a36eca08SCraig Rodrigues #endif
232*a36eca08SCraig Rodrigues empty = NONE;
233*a36eca08SCraig Rodrigues i = hv % h->size;
234*a36eca08SCraig Rodrigues incr = ((hv % (h->size-2)) & ~1) + 1;
235*a36eca08SCraig Rodrigues while (h->t[i].p != NULL) {
236*a36eca08SCraig Rodrigues #ifdef STATS_HASH
237*a36eca08SCraig Rodrigues STAT_HASH_LENGTH++;
238*a36eca08SCraig Rodrigues #endif
239*a36eca08SCraig Rodrigues if (h->t[i].p == DELETED) {
240*a36eca08SCraig Rodrigues if (empty == NONE)
241*a36eca08SCraig Rodrigues empty = i;
242*a36eca08SCraig Rodrigues } else if (h->t[i].hv == hv &&
243*a36eca08SCraig Rodrigues strncmp(h->t[i].p+h->info.key_offset, start,
244*a36eca08SCraig Rodrigues end - start) == 0 &&
245*a36eca08SCraig Rodrigues (h->t[i].p+h->info.key_offset)[end-start] == '\0') {
246*a36eca08SCraig Rodrigues if (empty != NONE) {
247*a36eca08SCraig Rodrigues h->t[empty].hv = hv;
248*a36eca08SCraig Rodrigues h->t[empty].p = h->t[i].p;
249*a36eca08SCraig Rodrigues h->t[i].p = DELETED;
250*a36eca08SCraig Rodrigues return empty;
251*a36eca08SCraig Rodrigues } else {
252*a36eca08SCraig Rodrigues #ifdef STATS_HASH
253*a36eca08SCraig Rodrigues STAT_HASH_POSITIVE++;
254*a36eca08SCraig Rodrigues #endif
255*a36eca08SCraig Rodrigues return i;
256*a36eca08SCraig Rodrigues }
257*a36eca08SCraig Rodrigues }
258*a36eca08SCraig Rodrigues i += incr;
259*a36eca08SCraig Rodrigues if (i >= h->size)
260*a36eca08SCraig Rodrigues i -= h->size;
261*a36eca08SCraig Rodrigues }
262*a36eca08SCraig Rodrigues
263*a36eca08SCraig Rodrigues /* Found an empty position. */
264*a36eca08SCraig Rodrigues if (empty != NONE)
265*a36eca08SCraig Rodrigues i = empty;
266*a36eca08SCraig Rodrigues h->t[i].hv = hv;
267*a36eca08SCraig Rodrigues return i;
268*a36eca08SCraig Rodrigues }
269*a36eca08SCraig Rodrigues
270*a36eca08SCraig Rodrigues unsigned int
ohash_lookup_memory(struct ohash * h,const char * k,size_t size,uint32_t hv)271*a36eca08SCraig Rodrigues ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv)
272*a36eca08SCraig Rodrigues {
273*a36eca08SCraig Rodrigues unsigned int i, incr;
274*a36eca08SCraig Rodrigues unsigned int empty;
275*a36eca08SCraig Rodrigues
276*a36eca08SCraig Rodrigues #ifdef STATS_HASH
277*a36eca08SCraig Rodrigues STAT_HASH_LOOKUP++;
278*a36eca08SCraig Rodrigues #endif
279*a36eca08SCraig Rodrigues empty = NONE;
280*a36eca08SCraig Rodrigues i = hv % h->size;
281*a36eca08SCraig Rodrigues incr = ((hv % (h->size-2)) & ~1) + 1;
282*a36eca08SCraig Rodrigues while (h->t[i].p != NULL) {
283*a36eca08SCraig Rodrigues #ifdef STATS_HASH
284*a36eca08SCraig Rodrigues STAT_HASH_LENGTH++;
285*a36eca08SCraig Rodrigues #endif
286*a36eca08SCraig Rodrigues if (h->t[i].p == DELETED) {
287*a36eca08SCraig Rodrigues if (empty == NONE)
288*a36eca08SCraig Rodrigues empty = i;
289*a36eca08SCraig Rodrigues } else if (h->t[i].hv == hv &&
290*a36eca08SCraig Rodrigues memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) {
291*a36eca08SCraig Rodrigues if (empty != NONE) {
292*a36eca08SCraig Rodrigues h->t[empty].hv = hv;
293*a36eca08SCraig Rodrigues h->t[empty].p = h->t[i].p;
294*a36eca08SCraig Rodrigues h->t[i].p = DELETED;
295*a36eca08SCraig Rodrigues return empty;
296*a36eca08SCraig Rodrigues } else {
297*a36eca08SCraig Rodrigues #ifdef STATS_HASH
298*a36eca08SCraig Rodrigues STAT_HASH_POSITIVE++;
299*a36eca08SCraig Rodrigues #endif
300*a36eca08SCraig Rodrigues } return i;
301*a36eca08SCraig Rodrigues }
302*a36eca08SCraig Rodrigues i += incr;
303*a36eca08SCraig Rodrigues if (i >= h->size)
304*a36eca08SCraig Rodrigues i -= h->size;
305*a36eca08SCraig Rodrigues }
306*a36eca08SCraig Rodrigues
307*a36eca08SCraig Rodrigues /* Found an empty position. */
308*a36eca08SCraig Rodrigues if (empty != NONE)
309*a36eca08SCraig Rodrigues i = empty;
310*a36eca08SCraig Rodrigues h->t[i].hv = hv;
311*a36eca08SCraig Rodrigues return i;
312*a36eca08SCraig Rodrigues }
313*a36eca08SCraig Rodrigues
314*a36eca08SCraig Rodrigues unsigned int
ohash_qlookup(struct ohash * h,const char * s)315*a36eca08SCraig Rodrigues ohash_qlookup(struct ohash *h, const char *s)
316*a36eca08SCraig Rodrigues {
317*a36eca08SCraig Rodrigues const char *e = NULL;
318*a36eca08SCraig Rodrigues return ohash_qlookupi(h, s, &e);
319*a36eca08SCraig Rodrigues }
320*a36eca08SCraig Rodrigues
321*a36eca08SCraig Rodrigues unsigned int
ohash_qlookupi(struct ohash * h,const char * s,const char ** e)322*a36eca08SCraig Rodrigues ohash_qlookupi(struct ohash *h, const char *s, const char **e)
323*a36eca08SCraig Rodrigues {
324*a36eca08SCraig Rodrigues uint32_t hv;
325*a36eca08SCraig Rodrigues
326*a36eca08SCraig Rodrigues hv = ohash_interval(s, e);
327*a36eca08SCraig Rodrigues return ohash_lookup_interval(h, s, *e, hv);
328*a36eca08SCraig Rodrigues }
329