xref: /illumos-gate/usr/src/lib/libc/port/gen/hsearch.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Compile time switches:
34  *
35  *  MULT - use a multiplicative hashing function.
36  *  DIV - use the remainder mod table size as a hashing function.
37  *  CHAINED - use a linked list to resolve collisions.
38  *  OPEN - use open addressing to resolve collisions.
39  *  BRENT - use Brent's modification to improve the OPEN algorithm.
40  *  SORTUP - CHAINED list is sorted in increasing order.
41  *  SORTDOWN - CHAINED list is sorted in decreasing order.
42  *  START - CHAINED list with entries appended at front.
43  *  DRIVER - compile in a main program to drive the tests.
44  *  DEBUG - compile some debugging printout statements.
45  *  USCR - user supplied comparison routine.
46  */
47 
48 #pragma weak _hcreate = hcreate
49 #pragma weak _hdestroy = hdestroy
50 #pragma weak _hsearch = hsearch
51 
52 #include "lint.h"
53 #include <mtlib.h>
54 #include <limits.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <thread.h>
59 #include <synch.h>
60 #include <search.h>
61 
62 typedef char *POINTER;
63 
64 #define	SUCCEED		0
65 #define	FAIL		1
66 #define	TRUE		1
67 #define	FALSE		0
68 #define	repeat		for (;;)
69 #define	until(A)	if (A) break;
70 
71 #ifdef OPEN
72 #undef CHAINED
73 #else
74 #ifndef CHAINED
75 #define	OPEN
76 #endif
77 #endif
78 
79 #ifdef MULT
80 #undef DIV
81 #else
82 #ifndef DIV
83 #define	MULT
84 #endif
85 #endif
86 
87 #ifdef START
88 #undef SORTUP
89 #undef SORTDOWN
90 #else
91 #ifdef SORTUP
92 #undef SORTDOWN
93 #endif
94 #endif
95 
96 #ifdef USCR
97 #define	COMPARE(A, B) (* hcompar)((A), (B))
98 extern int (* hcompar)();
99 #else
100 #define	COMPARE(A, B) strcmp((A), (B))
101 #endif
102 
103 #ifdef MULT
104 #define	SHIFT ((CHAR_BIT * sizeof (int)) - m) /* Shift factor */
105 #define	FACTOR 035761254233	/* Magic multiplication factor */
106 #define	HASH hashm		/* Multiplicative hash function */
107 #define	HASH2 hash2m	/* Secondary hash function */
108 static unsigned int hashm(POINTER);
109 static unsigned int hash2m(POINTER);
110 #else
111 #ifdef DIV
112 #define	HASH hashd		/* Division hashing routine */
113 #define	HASH2(A) 1		/* Secondary hash function */
114 static unsigned int hashd();
115 #endif
116 #endif
117 
118 #ifdef CHAINED
119 typedef struct node {	/* Part of the linked list of entries */
120 	ENTRY item;
121 	struct node *next;
122 } NODE;
123 typedef NODE *TABELEM;
124 static NODE **table;	/* The address of the hash table */
125 static ENTRY *build();
126 #else
127 #ifdef OPEN
128 typedef ENTRY TABELEM;	/* What the table contains (TABle ELEMents) */
129 static TABELEM *table;	/* The address of the hash table */
130 static unsigned int count = 0;	/* Number of entries in hash table */
131 #endif
132 #endif
133 
134 static unsigned int length;	/* Size of the hash table */
135 static unsigned int m;		/* Log base 2 of length */
136 static unsigned int prcnt;	/* Number of probes this item */
137 static mutex_t table_lock = DEFAULTMUTEX;
138 #define	RETURN(n)    { lmutex_unlock(&table_lock); return (n); }
139 
140 /*
141  * forward declarations
142  */
143 
144 static unsigned int crunch(POINTER);
145 
146 #ifdef DRIVER
147 static void hdump();
148 
149 main()
150 {
151 	char line[80];	/* Room for the input line */
152 	int i = 0;		/* Data generator */
153 	ENTRY *res;		/* Result of hsearch */
154 	ENTRY *new;		/* Test entry */
155 
156 start:
157 	if (hcreate(5))
158 		printf("Length = %u, m = %u\n", length, m);
159 	else {
160 		fprintf(stderr, "Out of core\n");
161 		exit(FAIL);
162 	}
163 	repeat {
164 	hdump();
165 	printf("Enter a probe: ");
166 	until(EOF == scanf("%s", line) || strcmp(line, "quit") == 0);
167 #ifdef DEBUG
168 	printf("%s, ", line);
169 	printf("division: %d, ", hashd(line));
170 	printf("multiplication: %d\n", hashm(line));
171 #endif
172 	new = (ENTRY *) malloc(sizeof (ENTRY));
173 	if (new == NULL) {
174 		fprintf(stderr, "Out of core \n");
175 		exit(FAIL);
176 	} else {
177 		new->key = malloc((unsigned)strlen(line) + 1);
178 		if (new->key == NULL) {
179 			fprintf(stderr, "Out of core \n");
180 			exit(FAIL);
181 		}
182 		strcpy(new->key, line);
183 		new->data = malloc(sizeof (int));
184 		if (new->data == NULL) {
185 			fprintf(stderr, "Out of core \n");
186 			exit(FAIL);
187 		}
188 		*new->data = i++;
189 	}
190 	res = hsearch(*new, ENTER);
191 	printf("The number of probes required was %d\n", prcnt);
192 	if (res == (ENTRY *) 0)
193 		printf("Table is full\n");
194 	else {
195 		printf("Success: ");
196 		printf("Key = %s, Value = %d\n", res->key, *res->data);
197 	}
198 	}
199 	printf("Do you wish to start another hash table (yes/no?)");
200 	if (EOF == scanf("%s", line) || strcmp(line, "no") == 0)
201 		exit(SUCCEED);
202 	hdestroy();
203 	goto start;
204 }
205 #endif
206 
207 int
208 hcreate(size_t size)	/* Create a hash table no smaller than size */
209 	/* Minimum "size" for hash table */
210 {
211 	size_t unsize;	/* Holds the shifted size */
212 	TABELEM *local_table;
213 	TABELEM *old_table;
214 	unsigned int local_length;
215 	unsigned int local_m;
216 
217 	if (size == 0)
218 		return (FALSE);
219 
220 	unsize = size;	/* +1 for empty table slot; -1 for ceiling */
221 	local_length = 1;	/* Maximum entries in table */
222 	local_m = 0;		/* Log2 length */
223 	while (unsize) {
224 		unsize >>= 1;
225 		local_length <<= 1;
226 		local_m++;
227 	}
228 
229 	local_table = (TABELEM *) calloc(local_length, sizeof (TABELEM));
230 
231 	lmutex_lock(&table_lock);
232 	old_table = table;
233 	table = local_table;
234 	length = local_length;
235 	m = local_m;
236 	lmutex_unlock(&table_lock);
237 	if (old_table != NULL)
238 		free(old_table);
239 	return (local_table != NULL);
240 }
241 
242 void
243 hdestroy(void)	/* Reset the module to its initial state */
244 {
245 	POINTER local_table;
246 
247 	lmutex_lock(&table_lock);
248 #ifdef CHAINED
249 	int i;
250 	NODE *p, *oldp;
251 	for (i = 0; i < length; i++) {
252 		if (table[i] != (NODE *)NULL) {
253 			p = table[i];
254 			while (p != (NODE *)NULL) {
255 				oldp = p;
256 				p = p -> next;
257 				/*
258 				 * This is a locking vs malloc() violation.
259 				 * Fortunately, it is not actually compiled.
260 				 */
261 				free(oldp);
262 			}
263 		}
264 	}
265 #endif
266 	local_table = (POINTER)table;
267 	table = 0;
268 #ifdef OPEN
269 	count = 0;
270 #endif
271 	lmutex_unlock(&table_lock);
272 	free(local_table);
273 }
274 
275 #ifdef OPEN
276 /*
277  * Hash search of a fixed-capacity table.  Open addressing used to
278  *  resolve collisions.  Algorithm modified from Knuth, Volume 3,
279  *  section 6.4, algorithm D.  Labels flag corresponding actions.
280  */
281 
282 /* Find or insert the item into the table */
283 ENTRY
284 *hsearch(ENTRY item, ACTION action)
285 	/* "item" to be inserted or found */
286 	/* action: FIND or ENTER */
287 {
288 	unsigned int i;	/* Insertion index */
289 	unsigned int c;	/* Secondary probe displacement */
290 
291 	lmutex_lock(&table_lock);
292 	prcnt = 1;
293 
294 /* D1: */
295 	i = HASH(item.key);	/* Primary hash on key */
296 #ifdef DEBUG
297 	if (action == ENTER)
298 		printf("hash = %o\n", i);
299 #endif
300 
301 /* D2: */
302 	if (table[i].key == NULL)	/* Empty slot? */
303 		goto D6;
304 	else if (COMPARE(table[i].key, item.key) == 0)	/* Match? */
305 		RETURN(&table[i]);
306 
307 /* D3: */
308 	c = HASH2(item.key);	/* No match => compute secondary hash */
309 #ifdef DEBUG
310 	if (action == ENTER)
311 		printf("hash2 = %o\n", c);
312 #endif
313 
314 D4:
315 	i = (i + c) % length;	/* Advance to next slot */
316 	prcnt++;
317 
318 /* D5: */
319 	if (table[i].key == NULL)	/* Empty slot? */
320 		goto D6;
321 	else if (COMPARE(table[i].key, item.key) == 0)	/* Match? */
322 		RETURN(&table[i])
323 	else
324 		goto D4;
325 
326 D6:	if (action == FIND)		/* Insert if requested */
327 		RETURN((ENTRY *) NULL);
328 	if (count == (length - 1))	/* Table full? */
329 		RETURN((ENTRY *) 0);
330 
331 #ifdef BRENT
332 /*
333  * Brent's variation of the open addressing algorithm.  Do extra
334  * work during insertion to speed retrieval.  May require switching
335  * of previously placed items.  Adapted from Knuth, Volume 3, section
336  * 4.6 and Brent's article in CACM, volume 10, #2, February 1973.
337  */
338 
339 	{
340 	unsigned int p0 = HASH(item.key);   /* First probe index */
341 	unsigned int c0 = HASH2(item.key);  /* Main branch increment */
342 	unsigned int r = prcnt - 1; /* Current minimum distance */
343 	unsigned int j;		/* Counts along main branch */
344 	unsigned int k;		/* Counts along secondary branch */
345 	unsigned int curj;	/* Current best main branch site */
346 	unsigned int curpos;	/* Current best table index */
347 	unsigned int pj;	/* Main branch indices */
348 	unsigned int cj;	/* Secondary branch increment distance */
349 	unsigned int pjk;	/* Secondary branch probe indices */
350 
351 	if (prcnt >= 3) {
352 		for (j = 0; j < prcnt; j++) {   /* Count along main branch */
353 			pj = (p0 + j * c0) % length; /* New main branch index */
354 			cj = HASH2(table[pj].key); /* Secondary branch incr. */
355 			for (k = 1; j+k <= r; k++) {
356 					/* Count on secondary branch */
357 				pjk = (pj + k * cj) % length;
358 					/* Secondary probe */
359 				if (table[pjk].key == NULL) {
360 					/* Improvement found */
361 					r = j + k; /* Decrement upper bound */
362 					curj = pj; /* Save main probe index */
363 					curpos = pjk;
364 						/* Save secondeary index */
365 				}
366 			}
367 		}
368 		if (r != prcnt - 1) {	/* If an improvement occurred */
369 			table[curpos] = table[curj]; /* Old key to new site */
370 #ifdef DEBUG
371 			printf("Switch curpos = %o, curj = %o, oldi = %o\n",
372 				curj, curpos, i);
373 #endif
374 			i = curj;
375 		}
376 	}
377 	}
378 #endif
379 	count++;			/* Increment table occupancy count */
380 	table[i] = item;		/* Save item */
381 
382 	lmutex_unlock(&table_lock);
383 	return (&table[i]);		/* Address of item is returned */
384 }
385 #endif
386 
387 #ifdef USCR
388 #ifdef DRIVER
389 static int
390 compare(a, b)
391 POINTER a;
392 POINTER b;
393 {
394     return (strcmp(a, b));
395 }
396 
397 int (* hcompar)() = compare;
398 #endif
399 #endif
400 
401 #ifdef CHAINED
402 #ifdef SORTUP
403 #define	STRCMP(A, B) (COMPARE((A), (B)) > 0)
404 #else
405 #ifdef SORTDOWN
406 #define	STRCMP(A, B) (COMPARE((A), (B)) < 0)
407 #else
408 #define	STRCMP(A, B) (COMPARE((A), (B)) != 0)
409 #endif
410 #endif
411 
412 ENTRY
413 *hsearch(item, action)	/* Chained search with sorted lists */
414 ENTRY item;		/* Item to be inserted or found */
415 ACTION action;		/* FIND or ENTER */
416 {
417 	NODE *p;		/* Searches through the linked list */
418 	NODE **q;		/* Where to store the pointer to a new NODE */
419 	unsigned int i;		/* Result of hash */
420 	int res;		/* Result of string comparison */
421 
422 	lmutex_lock(&table_lock);
423 	prcnt = 1;
424 
425 	i = HASH(item.key);	/* Table[i] contains list head */
426 
427 	if (table[i] == (NODE*)NULL) { /* List has not yet been begun */
428 		if (action == FIND)
429 			RETURN((ENTRY *) NULL);
430 		else
431 			RETURN(build(&table[i], (NODE *) NULL, item));
432 	} else {		/* List is not empty */
433 		q = &table[i];
434 		p = table[i];
435 		while (p != NULL && (res = STRCMP(item.key, p->item.key))) {
436 			prcnt++;
437 			q = &(p->next);
438 			p = p->next;
439 		}
440 
441 		if (p != NULL && res == 0)	/* Item has been found */
442 			RETURN(&(p->item));
443 		else {			/* Item is not yet on list */
444 			if (action == FIND)
445 				RETURN((ENTRY *) NULL);
446 			else
447 #ifdef START
448 				RETURN(build(&table[i], table[i], item));
449 #else
450 				RETURN(build(q, p, item));
451 #endif
452 		}
453 	}
454 }
455 
456 static ENTRY
457 *build(last, next, item)
458 NODE **last;		/* Where to store in last list item */
459 NODE *next;		/* Link to next list item */
460 ENTRY item;		/* Item to be kept in node */
461 {
462 	/*
463 	 * This is a locking vs malloc() violation.
464 	 * Fortunately, it is not actually compiled.
465 	 */
466 	NODE *p = (NODE *) malloc(sizeof (NODE));
467 
468 	if (p != NULL) {
469 		p->item = item;
470 		*last = p;
471 		p->next = next;
472 		return (&(p->item));
473 	} else
474 		return (NULL);
475 }
476 #endif
477 
478 #ifdef DIV
479 static unsigned int
480 hashd(key)		/* Division hashing scheme */
481 POINTER key;		/* Key to be hashed */
482 {
483     return (crunch(key) % length);
484 }
485 #else
486 #ifdef MULT
487 /*
488  *  NOTE: The following algorithm only works on machines where
489  *  the results of multiplying two integers is the least
490  *  significant part of the double word integer required to hold
491  *  the result.  It is adapted from Knuth, Volume 3, section 6.4.
492  */
493 
494 static unsigned int
495 hashm(POINTER key)	/* Multiplication hashing scheme */
496 	/* "key" to be hashed */
497 {
498 	return ((int)(((unsigned)(crunch(key) * FACTOR)) >> SHIFT));
499 }
500 
501 /*
502  * Secondary hashing, for use with multiplicitive hashing scheme.
503  * Adapted from Knuth, Volume 3, section 6.4.
504  */
505 
506 static unsigned int
507 hash2m(POINTER key)	/* Secondary hashing routine */
508 	/* "key" is the string to be hashed */
509 {
510     return (((unsigned int)((crunch(key) * FACTOR) << m) >> SHIFT) | 1);
511 }
512 #endif
513 #endif
514 
515 /* PJ Weinberger's hash function */
516 static unsigned int
517 crunch(POINTER key)	/* Convert multicharacter key to unsigned int */
518 {
519 	unsigned int h = 0;
520 	unsigned int g;
521 	unsigned char *p = (unsigned char *)key;
522 
523 	for (; *p; p++) {
524 		h = (h << 4) + *p;
525 		g = h & 0xf0000000;
526 		if (g != 0) {
527 			h ^= (g >> 24);
528 			h ^= g;
529 		}
530 	}
531 	return (h);
532 }
533 
534 #ifdef DRIVER
535 static void
536 hdump()			/* Dumps loc, data, probe count, key */
537 {
538 	unsigned int i;	/* Counts table slots */
539 #ifdef OPEN
540 	unsigned int sum = 0;	/* Counts probes */
541 #else
542 #ifdef CHAINED
543 	NODE *a;		/* Current Node on list */
544 #endif
545 #endif
546 
547 	for (i = 0; i < length; i++)
548 #ifdef OPEN
549 		if (table[i].key == NULL)
550 			printf("%o.\t-,\t-,\t(NULL)\n", i);
551 		else {
552 			unsigned int oldpr = prcnt;
553 				/* Save current probe count */
554 
555 			hsearch(table[i], FIND);
556 			sum += prcnt;
557 			printf("%o.\t%d,\t%d,\t%s\n", i,
558 			    *table[i].data, prcnt, table[i].key);
559 			prcnt = oldpr;
560 		}
561 	printf("Total probes = %d\n", sum);
562 #else
563 #ifdef CHAINED
564 	if (table[i] == NULL)
565 		printf("%o.\t-,\t-,\t(NULL)\n", i);
566 	else {
567 		printf("%o.", i);
568 		for (a = table[i]; a != NULL; a = a->next)
569 			printf("\t%d,\t%#0.4x,\t%s\n",
570 			    *a->item.data, a, a->item.key);
571 	}
572 #endif
573 #endif
574 }
575 #endif
576