xref: /illumos-gate/usr/src/lib/libc/port/gen/hsearch.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*	Copyright (c) 1988 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 
33 /*
34  * Compile time switches:
35  *
36  *  MULT - use a multiplicative hashing function.
37  *  DIV - use the remainder mod table size as a hashing function.
38  *  CHAINED - use a linked list to resolve collisions.
39  *  OPEN - use open addressing to resolve collisions.
40  *  BRENT - use Brent's modification to improve the OPEN algorithm.
41  *  SORTUP - CHAINED list is sorted in increasing order.
42  *  SORTDOWN - CHAINED list is sorted in decreasing order.
43  *  START - CHAINED list with entries appended at front.
44  *  DRIVER - compile in a main program to drive the tests.
45  *  DEBUG - compile some debugging printout statements.
46  *  USCR - user supplied comparison routine.
47  */
48 
49 #pragma weak hcreate = _hcreate
50 #pragma weak hdestroy = _hdestroy
51 #pragma weak hsearch = _hsearch
52 
53 #include "synonyms.h"
54 #include <mtlib.h>
55 #include <limits.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <thread.h>
60 #include <synch.h>
61 #include <search.h>
62 
63 typedef char *POINTER;
64 
65 #define	SUCCEED		0
66 #define	FAIL		1
67 #define	TRUE		1
68 #define	FALSE		0
69 #define	repeat		for (;;)
70 #define	until(A)	if (A) break;
71 
72 #ifdef OPEN
73 #undef CHAINED
74 #else
75 #ifndef CHAINED
76 #define	OPEN
77 #endif
78 #endif
79 
80 #ifdef MULT
81 #undef DIV
82 #else
83 #ifndef DIV
84 #define	MULT
85 #endif
86 #endif
87 
88 #ifdef START
89 #undef SORTUP
90 #undef SORTDOWN
91 #else
92 #ifdef SORTUP
93 #undef SORTDOWN
94 #endif
95 #endif
96 
97 #ifdef USCR
98 #define	COMPARE(A, B) (* hcompar)((A), (B))
99 extern int (* hcompar)();
100 #else
101 #define	COMPARE(A, B) strcmp((A), (B))
102 #endif
103 
104 #ifdef MULT
105 #define	SHIFT ((CHAR_BIT * sizeof (int)) - m) /* Shift factor */
106 #define	FACTOR 035761254233	/* Magic multiplication factor */
107 #define	HASH hashm		/* Multiplicative hash function */
108 #define	HASH2 hash2m	/* Secondary hash function */
109 static unsigned int hashm(POINTER);
110 static unsigned int hash2m(POINTER);
111 #else
112 #ifdef DIV
113 #define	HASH hashd		/* Division hashing routine */
114 #define	HASH2(A) 1		/* Secondary hash function */
115 static unsigned int hashd();
116 #endif
117 #endif
118 
119 #ifdef CHAINED
120 typedef struct node {	/* Part of the linked list of entries */
121 	ENTRY item;
122 	struct node *next;
123 } NODE;
124 typedef NODE *TABELEM;
125 static NODE **table;	/* The address of the hash table */
126 static ENTRY *build();
127 #else
128 #ifdef OPEN
129 typedef ENTRY TABELEM;	/* What the table contains (TABle ELEMents) */
130 static TABELEM *table;	/* The address of the hash table */
131 static unsigned int count = 0;	/* Number of entries in hash table */
132 #endif
133 #endif
134 
135 static unsigned int length;	/* Size of the hash table */
136 static unsigned int m;		/* Log base 2 of length */
137 static unsigned int prcnt;	/* Number of probes this item */
138 static mutex_t table_lock = DEFAULTMUTEX;
139 #define	RETURN(n)    { lmutex_unlock(&table_lock); return (n); }
140 
141 /*
142  * forward declarations
143  */
144 
145 static unsigned int crunch(POINTER);
146 
147 #ifdef DRIVER
148 static void hdump();
149 
150 main()
151 {
152 	char line[80];	/* Room for the input line */
153 	int i = 0;		/* Data generator */
154 	ENTRY *res;		/* Result of hsearch */
155 	ENTRY *new;		/* Test entry */
156 
157 start:
158 	if (hcreate(5))
159 		printf("Length = %u, m = %u\n", length, m);
160 	else {
161 		fprintf(stderr, "Out of core\n");
162 		exit(FAIL);
163 	}
164 	repeat {
165 	hdump();
166 	printf("Enter a probe: ");
167 	until(EOF == scanf("%s", line) || strcmp(line, "quit") == 0);
168 #ifdef DEBUG
169 	printf("%s, ", line);
170 	printf("division: %d, ", hashd(line));
171 	printf("multiplication: %d\n", hashm(line));
172 #endif
173 	new = (ENTRY *) malloc(sizeof (ENTRY));
174 	if (new == NULL) {
175 	    fprintf(stderr, "Out of core \n");
176 	    exit(FAIL);
177 	} else {
178 	    new->key = malloc((unsigned)strlen(line) + 1);
179 	    if (new->key == NULL) {
180 		fprintf(stderr, "Out of core \n");
181 		exit(FAIL);
182 	    }
183 	    strcpy(new->key, line);
184 	    new->data = malloc(sizeof (int));
185 	    if (new->data == NULL) {
186 		fprintf(stderr, "Out of core \n");
187 		exit(FAIL);
188 	    }
189 	    *new->data = i++;
190 	}
191 	res = hsearch(*new, ENTER);
192 	printf("The number of probes required was %d\n", prcnt);
193 	if (res == (ENTRY *) 0)
194 	    printf("Table is full\n");
195 	else {
196 	    printf("Success: ");
197 	    printf("Key = %s, Value = %d\n", res->key, *res->data);
198 	}
199 	}
200 	printf("Do you wish to start another hash table (yes/no?)");
201 	if (EOF == scanf("%s", line) || strcmp(line, "no") == 0)
202 		exit(SUCCEED);
203 	hdestroy();
204 	goto start;
205 }
206 #endif
207 
208 int
209 hcreate(size_t size)	/* Create a hash table no smaller than size */
210 	/* Minimum "size" for hash table */
211 {
212 	size_t unsize;	/* Holds the shifted size */
213 	TABELEM *local_table;
214 	TABELEM *old_table;
215 	unsigned int local_length;
216 	unsigned int local_m;
217 
218 	if (size == 0)
219 		return (FALSE);
220 
221 	unsize = size;	/* +1 for empty table slot; -1 for ceiling */
222 	local_length = 1;	/* Maximum entries in table */
223 	local_m = 0;		/* Log2 length */
224 	while (unsize) {
225 		unsize >>= 1;
226 		local_length <<= 1;
227 		local_m++;
228 	}
229 
230 	local_table = (TABELEM *) calloc(local_length, sizeof (TABELEM));
231 
232 	lmutex_lock(&table_lock);
233 	old_table = table;
234 	table = local_table;
235 	length = local_length;
236 	m = local_m;
237 	lmutex_unlock(&table_lock);
238 	if (old_table != NULL)
239 		free(old_table);
240 	return (local_table != NULL);
241 }
242 
243 void
244 hdestroy(void)	/* Reset the module to its initial state */
245 {
246 	POINTER local_table;
247 
248 	lmutex_lock(&table_lock);
249 #ifdef CHAINED
250 	int i;
251 	NODE *p, *oldp;
252 	for (i = 0; i < length; i++) {
253 		if (table[i] != (NODE *)NULL) {
254 			p = table[i];
255 			while (p != (NODE *)NULL) {
256 				oldp = p;
257 				p = p -> next;
258 				/*
259 				 * This is a locking vs malloc() violation.
260 				 * Fortunately, it is not actually compiled.
261 				 */
262 				free(oldp);
263 			}
264 		}
265 	}
266 #endif
267 	local_table = (POINTER)table;
268 	table = 0;
269 #ifdef OPEN
270 	count = 0;
271 #endif
272 	lmutex_unlock(&table_lock);
273 	free(local_table);
274 }
275 
276 #ifdef OPEN
277 /*
278  * Hash search of a fixed-capacity table.  Open addressing used to
279  *  resolve collisions.  Algorithm modified from Knuth, Volume 3,
280  *  section 6.4, algorithm D.  Labels flag corresponding actions.
281  */
282 
283 /* Find or insert the item into the table */
284 ENTRY
285 *hsearch(ENTRY item, ACTION action)
286 	/* "item" to be inserted or found */
287 	/* action: FIND or ENTER */
288 {
289 	unsigned int i;	/* Insertion index */
290 	unsigned int c;	/* Secondary probe displacement */
291 
292 	lmutex_lock(&table_lock);
293 	prcnt = 1;
294 
295 /* D1: */
296 	i = HASH(item.key);	/* Primary hash on key */
297 #ifdef DEBUG
298 	if (action == ENTER)
299 		printf("hash = %o\n", i);
300 #endif
301 
302 /* D2: */
303 	if (table[i].key == NULL)	/* Empty slot? */
304 		goto D6;
305 	else if (COMPARE(table[i].key, item.key) == 0)	/* Match? */
306 		RETURN(&table[i]);
307 
308 /* D3: */
309 	c = HASH2(item.key);	/* No match => compute secondary hash */
310 #ifdef DEBUG
311 	if (action == ENTER)
312 		printf("hash2 = %o\n", c);
313 #endif
314 
315 D4:
316 	i = (i + c) % length;	/* Advance to next slot */
317 	prcnt++;
318 
319 /* D5: */
320 	if (table[i].key == NULL)	/* Empty slot? */
321 		goto D6;
322 	else if (COMPARE(table[i].key, item.key) == 0)	/* Match? */
323 		RETURN(&table[i])
324 	else
325 		goto D4;
326 
327 D6:	if (action == FIND)		/* Insert if requested */
328 		RETURN((ENTRY *) NULL);
329 	if (count == (length - 1))	/* Table full? */
330 		RETURN((ENTRY *) 0);
331 
332 #ifdef BRENT
333 /*
334  * Brent's variation of the open addressing algorithm.  Do extra
335  * work during insertion to speed retrieval.  May require switching
336  * of previously placed items.  Adapted from Knuth, Volume 3, section
337  * 4.6 and Brent's article in CACM, volume 10, #2, February 1973.
338  */
339 
340 	{
341 	unsigned int p0 = HASH(item.key);   /* First probe index */
342 	unsigned int c0 = HASH2(item.key);  /* Main branch increment */
343 	unsigned int r = prcnt - 1; /* Current minimum distance */
344 	unsigned int j;		/* Counts along main branch */
345 	unsigned int k;		/* Counts along secondary branch */
346 	unsigned int curj;	/* Current best main branch site */
347 	unsigned int curpos;	/* Current best table index */
348 	unsigned int pj;	/* Main branch indices */
349 	unsigned int cj;	/* Secondary branch increment distance */
350 	unsigned int pjk;	/* Secondary branch probe indices */
351 
352 	if (prcnt >= 3) {
353 		for (j = 0; j < prcnt; j++) {   /* Count along main branch */
354 			pj = (p0 + j * c0) % length; /* New main branch index */
355 			cj = HASH2(table[pj].key); /* Secondary branch incr. */
356 			for (k = 1; j+k <= r; k++) {
357 					/* Count on secondary branch */
358 				pjk = (pj + k * cj) % length;
359 					/* Secondary probe */
360 				if (table[pjk].key == NULL) {
361 					/* Improvement found */
362 					r = j + k; /* Decrement upper bound */
363 					curj = pj; /* Save main probe index */
364 					curpos = pjk;
365 						/* Save secondeary index */
366 				}
367 			}
368 		}
369 		if (r != prcnt - 1) {	/* If an improvement occurred */
370 			table[curpos] = table[curj]; /* Old key to new site */
371 #ifdef DEBUG
372 			printf("Switch curpos = %o, curj = %o, oldi = %o\n",
373 				curj, curpos, i);
374 #endif
375 			i = curj;
376 		}
377 	}
378 	}
379 #endif
380 	count++;			/* Increment table occupancy count */
381 	table[i] = item;		/* Save item */
382 
383 	lmutex_unlock(&table_lock);
384 	return (&table[i]);		/* Address of item is returned */
385 }
386 #endif
387 
388 #ifdef USCR
389 #ifdef DRIVER
390 static int
391 compare(a, b)
392 POINTER a;
393 POINTER b;
394 {
395     return (strcmp(a, b));
396 }
397 
398 int (* hcompar)() = compare;
399 #endif
400 #endif
401 
402 #ifdef CHAINED
403 #ifdef SORTUP
404 #define	STRCMP(A, B) (COMPARE((A), (B)) > 0)
405 #else
406 #ifdef SORTDOWN
407 #define	STRCMP(A, B) (COMPARE((A), (B)) < 0)
408 #else
409 #define	STRCMP(A, B) (COMPARE((A), (B)) != 0)
410 #endif
411 #endif
412 
413 ENTRY
414 *hsearch(item, action)	/* Chained search with sorted lists */
415 ENTRY item;		/* Item to be inserted or found */
416 ACTION action;		/* FIND or ENTER */
417 {
418 	NODE *p;		/* Searches through the linked list */
419 	NODE **q;		/* Where to store the pointer to a new NODE */
420 	unsigned int i;		/* Result of hash */
421 	int res;		/* Result of string comparison */
422 
423 	lmutex_lock(&table_lock);
424 	prcnt = 1;
425 
426 	i = HASH(item.key);	/* Table[i] contains list head */
427 
428 	if (table[i] == (NODE*)NULL) { /* List has not yet been begun */
429 		if (action == FIND)
430 			RETURN((ENTRY *) NULL);
431 		else
432 			RETURN(build(&table[i], (NODE *) NULL, item));
433 	} else {		/* List is not empty */
434 		q = &table[i];
435 		p = table[i];
436 		while (p != NULL && (res = STRCMP(item.key, p->item.key))) {
437 			prcnt++;
438 			q = &(p->next);
439 			p = p->next;
440 		}
441 
442 		if (p != NULL && res == 0)	/* Item has been found */
443 			RETURN(&(p->item));
444 		else {			/* Item is not yet on list */
445 			if (action == FIND)
446 				RETURN((ENTRY *) NULL);
447 			else
448 #ifdef START
449 				RETURN(build(&table[i], table[i], item));
450 #else
451 				RETURN(build(q, p, item));
452 #endif
453 		}
454 	}
455 }
456 
457 static ENTRY
458 *build(last, next, item)
459 NODE **last;		/* Where to store in last list item */
460 NODE *next;		/* Link to next list item */
461 ENTRY item;		/* Item to be kept in node */
462 {
463 	/*
464 	 * This is a locking vs malloc() violation.
465 	 * Fortunately, it is not actually compiled.
466 	 */
467 	NODE *p = (NODE *) malloc(sizeof (NODE));
468 
469 	if (p != NULL) {
470 		p->item = item;
471 		*last = p;
472 		p->next = next;
473 		return (&(p->item));
474 	} else
475 		return (NULL);
476 }
477 #endif
478 
479 #ifdef DIV
480 static unsigned int
481 hashd(key)		/* Division hashing scheme */
482 POINTER key;		/* Key to be hashed */
483 {
484     return (crunch(key) % length);
485 }
486 #else
487 #ifdef MULT
488 /*
489  *  NOTE: The following algorithm only works on machines where
490  *  the results of multiplying two integers is the least
491  *  significant part of the double word integer required to hold
492  *  the result.  It is adapted from Knuth, Volume 3, section 6.4.
493  */
494 
495 static unsigned int
496 hashm(POINTER key)	/* Multiplication hashing scheme */
497 	/* "key" to be hashed */
498 {
499 	return ((int)(((unsigned)(crunch(key) * FACTOR)) >> SHIFT));
500 }
501 
502 /*
503  * Secondary hashing, for use with multiplicitive hashing scheme.
504  * Adapted from Knuth, Volume 3, section 6.4.
505  */
506 
507 static unsigned int
508 hash2m(POINTER key)	/* Secondary hashing routine */
509 	/* "key" is the string to be hashed */
510 {
511     return (((unsigned int)((crunch(key) * FACTOR) << m) >> SHIFT) | 1);
512 }
513 #endif
514 #endif
515 
516 /* PJ Weinberger's hash function */
517 static unsigned int
518 crunch(POINTER key)	/* Convert multicharacter key to unsigned int */
519 {
520 	unsigned int h = 0;
521 	unsigned int g;
522 	unsigned char *p = (unsigned char *)key;
523 
524 	for (; *p; p++) {
525 		h = (h << 4) + *p;
526 		g = h & 0xf0000000;
527 		if (g != 0) {
528 			h ^= (g >> 24);
529 			h ^= g;
530 		}
531 	}
532 	return (h);
533 }
534 
535 #ifdef DRIVER
536 static void
537 hdump()			/* Dumps loc, data, probe count, key */
538 {
539 	unsigned int i;	/* Counts table slots */
540 #ifdef OPEN
541 	unsigned int sum = 0;	/* Counts probes */
542 #else
543 #ifdef CHAINED
544 	NODE *a;		/* Current Node on list */
545 #endif
546 #endif
547 
548 	for (i = 0; i < length; i++)
549 #ifdef OPEN
550 		if (table[i].key == NULL)
551 			printf("%o.\t-,\t-,\t(NULL)\n", i);
552 		else {
553 			unsigned int oldpr = prcnt;
554 				/* Save current probe count */
555 
556 			hsearch(table[i], FIND);
557 			sum += prcnt;
558 			printf("%o.\t%d,\t%d,\t%s\n", i,
559 				*table[i].data, prcnt, table[i].key);
560 			prcnt = oldpr;
561 		}
562 	printf("Total probes = %d\n", sum);
563 #else
564 #ifdef CHAINED
565 	if (table[i] == NULL)
566 	    printf("%o.\t-,\t-,\t(NULL)\n", i);
567 	else {
568 	    printf("%o.", i);
569 	    for (a = table[i]; a != NULL; a = a->next)
570 		printf("\t%d,\t%#0.4x,\t%s\n",
571 		    *a->item.data, a, a->item.key);
572 	}
573 #endif
574 #endif
575 }
576 #endif
577