xref: /illumos-gate/usr/src/lib/libc/port/gen/hsearch.c (revision a386cc11a86ecb60f5a48078d22c1500e2ad003e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 /*
31  * Compile time switches:
32  *
33  *  MULT - use a multiplicative hashing function.
34  *  DIV - use the remainder mod table size as a hashing function.
35  *  CHAINED - use a linked list to resolve collisions.
36  *  OPEN - use open addressing to resolve collisions.
37  *  BRENT - use Brent's modification to improve the OPEN algorithm.
38  *  SORTUP - CHAINED list is sorted in increasing order.
39  *  SORTDOWN - CHAINED list is sorted in decreasing order.
40  *  START - CHAINED list with entries appended at front.
41  *  DRIVER - compile in a main program to drive the tests.
42  *  DEBUG - compile some debugging printout statements.
43  *  USCR - user supplied comparison routine.
44  */
45 
46 #pragma weak _hcreate = hcreate
47 #pragma weak _hdestroy = hdestroy
48 #pragma weak _hsearch = hsearch
49 
50 #include "lint.h"
51 #include <mtlib.h>
52 #include <limits.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <thread.h>
57 #include <synch.h>
58 #include <search.h>
59 
60 typedef char *POINTER;
61 
62 #define	SUCCEED		0
63 #define	FAIL		1
64 #define	TRUE		1
65 #define	FALSE		0
66 #define	repeat		for (;;)
67 #define	until(A)	if (A) break;
68 
69 #ifdef OPEN
70 #undef CHAINED
71 #else
72 #ifndef CHAINED
73 #define	OPEN
74 #endif
75 #endif
76 
77 #ifdef MULT
78 #undef DIV
79 #else
80 #ifndef DIV
81 #define	MULT
82 #endif
83 #endif
84 
85 #ifdef START
86 #undef SORTUP
87 #undef SORTDOWN
88 #else
89 #ifdef SORTUP
90 #undef SORTDOWN
91 #endif
92 #endif
93 
94 #ifdef USCR
95 #define	COMPARE(A, B) (* hcompar)((A), (B))
96 extern int (* hcompar)();
97 #else
98 #define	COMPARE(A, B) strcmp((A), (B))
99 #endif
100 
101 #ifdef MULT
102 #define	SHIFT ((CHAR_BIT * sizeof (int)) - m) /* Shift factor */
103 #define	FACTOR 035761254233	/* Magic multiplication factor */
104 #define	HASH hashm		/* Multiplicative hash function */
105 #define	HASH2 hash2m	/* Secondary hash function */
106 static unsigned int hashm(POINTER);
107 static unsigned int hash2m(POINTER);
108 #else
109 #ifdef DIV
110 #define	HASH hashd		/* Division hashing routine */
111 #define	HASH2(A) 1		/* Secondary hash function */
112 static unsigned int hashd();
113 #endif
114 #endif
115 
116 #ifdef CHAINED
117 typedef struct node {	/* Part of the linked list of entries */
118 	ENTRY item;
119 	struct node *next;
120 } NODE;
121 typedef NODE *TABELEM;
122 static NODE **table;	/* The address of the hash table */
123 static ENTRY *build();
124 #else
125 #ifdef OPEN
126 typedef ENTRY TABELEM;	/* What the table contains (TABle ELEMents) */
127 static TABELEM *table;	/* The address of the hash table */
128 static unsigned int count = 0;	/* Number of entries in hash table */
129 #endif
130 #endif
131 
132 static unsigned int length;	/* Size of the hash table */
133 static unsigned int m;		/* Log base 2 of length */
134 static unsigned int prcnt;	/* Number of probes this item */
135 static mutex_t table_lock = DEFAULTMUTEX;
136 #define	RETURN(n)    { lmutex_unlock(&table_lock); return (n); }
137 
138 /*
139  * forward declarations
140  */
141 
142 static unsigned int crunch(POINTER);
143 
144 #ifdef DRIVER
145 static void hdump();
146 
147 main()
148 {
149 	char line[80];	/* Room for the input line */
150 	int i = 0;		/* Data generator */
151 	ENTRY *res;		/* Result of hsearch */
152 	ENTRY *new;		/* Test entry */
153 
154 start:
155 	if (hcreate(5))
156 		printf("Length = %u, m = %u\n", length, m);
157 	else {
158 		fprintf(stderr, "Out of core\n");
159 		exit(FAIL);
160 	}
161 	repeat {
162 	hdump();
163 	printf("Enter a probe: ");
164 	until(EOF == scanf("%s", line) || strcmp(line, "quit") == 0);
165 #ifdef DEBUG
166 	printf("%s, ", line);
167 	printf("division: %d, ", hashd(line));
168 	printf("multiplication: %d\n", hashm(line));
169 #endif
170 	new = (ENTRY *) malloc(sizeof (ENTRY));
171 	if (new == NULL) {
172 		fprintf(stderr, "Out of core \n");
173 		exit(FAIL);
174 	} else {
175 		new->key = malloc((unsigned)strlen(line) + 1);
176 		if (new->key == NULL) {
177 			fprintf(stderr, "Out of core \n");
178 			exit(FAIL);
179 		}
180 		(void) strcpy(new->key, line);
181 		new->data = malloc(sizeof (int));
182 		if (new->data == NULL) {
183 			fprintf(stderr, "Out of core \n");
184 			exit(FAIL);
185 		}
186 		*new->data = i++;
187 	}
188 	res = hsearch(*new, ENTER);
189 	printf("The number of probes required was %d\n", prcnt);
190 	if (res == (ENTRY *) 0)
191 		printf("Table is full\n");
192 	else {
193 		printf("Success: ");
194 		printf("Key = %s, Value = %d\n", res->key, *res->data);
195 	}
196 	}
197 	printf("Do you wish to start another hash table (yes/no?)");
198 	if (EOF == scanf("%s", line) || strcmp(line, "no") == 0)
199 		exit(SUCCEED);
200 	hdestroy();
201 	goto start;
202 }
203 #endif
204 
205 int
206 hcreate(size_t size)	/* Create a hash table no smaller than size */
207 	/* Minimum "size" for hash table */
208 {
209 	size_t unsize;	/* Holds the shifted size */
210 	TABELEM *local_table;
211 	TABELEM *old_table;
212 	unsigned int local_length;
213 	unsigned int local_m;
214 
215 	if (size == 0)
216 		return (FALSE);
217 
218 	unsize = size;	/* +1 for empty table slot; -1 for ceiling */
219 	local_length = 1;	/* Maximum entries in table */
220 	local_m = 0;		/* Log2 length */
221 	while (unsize) {
222 		unsize >>= 1;
223 		local_length <<= 1;
224 		local_m++;
225 	}
226 
227 	local_table = (TABELEM *) calloc(local_length, sizeof (TABELEM));
228 
229 	lmutex_lock(&table_lock);
230 	old_table = table;
231 	table = local_table;
232 	length = local_length;
233 	m = local_m;
234 	lmutex_unlock(&table_lock);
235 	if (old_table != NULL)
236 		free(old_table);
237 	return (local_table != NULL);
238 }
239 
240 void
241 hdestroy(void)	/* Reset the module to its initial state */
242 {
243 	POINTER local_table;
244 
245 	lmutex_lock(&table_lock);
246 #ifdef CHAINED
247 	int i;
248 	NODE *p, *oldp;
249 	for (i = 0; i < length; i++) {
250 		if (table[i] != (NODE *)NULL) {
251 			p = table[i];
252 			while (p != (NODE *)NULL) {
253 				oldp = p;
254 				p = p -> next;
255 				/*
256 				 * This is a locking vs malloc() violation.
257 				 * Fortunately, it is not actually compiled.
258 				 */
259 				free(oldp);
260 			}
261 		}
262 	}
263 #endif
264 	local_table = (POINTER)table;
265 	table = 0;
266 #ifdef OPEN
267 	count = 0;
268 #endif
269 	lmutex_unlock(&table_lock);
270 	free(local_table);
271 }
272 
273 #ifdef OPEN
274 /*
275  * Hash search of a fixed-capacity table.  Open addressing used to
276  *  resolve collisions.  Algorithm modified from Knuth, Volume 3,
277  *  section 6.4, algorithm D.  Labels flag corresponding actions.
278  */
279 
280 /* Find or insert the item into the table */
281 ENTRY
282 *hsearch(ENTRY item, ACTION action)
283 	/* "item" to be inserted or found */
284 	/* action: FIND or ENTER */
285 {
286 	unsigned int i;	/* Insertion index */
287 	unsigned int c;	/* Secondary probe displacement */
288 
289 	lmutex_lock(&table_lock);
290 	prcnt = 1;
291 
292 /* D1: */
293 	i = HASH(item.key);	/* Primary hash on key */
294 #ifdef DEBUG
295 	if (action == ENTER)
296 		printf("hash = %o\n", i);
297 #endif
298 
299 /* D2: */
300 	if (table[i].key == NULL)	/* Empty slot? */
301 		goto D6;
302 	else if (COMPARE(table[i].key, item.key) == 0)	/* Match? */
303 		RETURN(&table[i]);
304 
305 /* D3: */
306 	c = HASH2(item.key);	/* No match => compute secondary hash */
307 #ifdef DEBUG
308 	if (action == ENTER)
309 		printf("hash2 = %o\n", c);
310 #endif
311 
312 D4:
313 	i = (i + c) % length;	/* Advance to next slot */
314 	prcnt++;
315 
316 /* D5: */
317 	if (table[i].key == NULL)	/* Empty slot? */
318 		goto D6;
319 	else if (COMPARE(table[i].key, item.key) == 0)	/* Match? */
320 		RETURN(&table[i])
321 	else
322 		goto D4;
323 
324 D6:	if (action == FIND)		/* Insert if requested */
325 		RETURN((ENTRY *) NULL);
326 	if (count == (length - 1))	/* Table full? */
327 		RETURN((ENTRY *) 0);
328 
329 #ifdef BRENT
330 /*
331  * Brent's variation of the open addressing algorithm.  Do extra
332  * work during insertion to speed retrieval.  May require switching
333  * of previously placed items.  Adapted from Knuth, Volume 3, section
334  * 4.6 and Brent's article in CACM, volume 10, #2, February 1973.
335  */
336 
337 	{
338 	unsigned int p0 = HASH(item.key);   /* First probe index */
339 	unsigned int c0 = HASH2(item.key);  /* Main branch increment */
340 	unsigned int r = prcnt - 1; /* Current minimum distance */
341 	unsigned int j;		/* Counts along main branch */
342 	unsigned int k;		/* Counts along secondary branch */
343 	unsigned int curj;	/* Current best main branch site */
344 	unsigned int curpos;	/* Current best table index */
345 	unsigned int pj;	/* Main branch indices */
346 	unsigned int cj;	/* Secondary branch increment distance */
347 	unsigned int pjk;	/* Secondary branch probe indices */
348 
349 	if (prcnt >= 3) {
350 		for (j = 0; j < prcnt; j++) {   /* Count along main branch */
351 			pj = (p0 + j * c0) % length; /* New main branch index */
352 			cj = HASH2(table[pj].key); /* Secondary branch incr. */
353 			for (k = 1; j+k <= r; k++) {
354 					/* Count on secondary branch */
355 				pjk = (pj + k * cj) % length;
356 					/* Secondary probe */
357 				if (table[pjk].key == NULL) {
358 					/* Improvement found */
359 					r = j + k; /* Decrement upper bound */
360 					curj = pj; /* Save main probe index */
361 					curpos = pjk;
362 						/* Save secondeary index */
363 				}
364 			}
365 		}
366 		if (r != prcnt - 1) {	/* If an improvement occurred */
367 			table[curpos] = table[curj]; /* Old key to new site */
368 #ifdef DEBUG
369 			printf("Switch curpos = %o, curj = %o, oldi = %o\n",
370 				curj, curpos, i);
371 #endif
372 			i = curj;
373 		}
374 	}
375 	}
376 #endif
377 	count++;			/* Increment table occupancy count */
378 	table[i] = item;		/* Save item */
379 
380 	lmutex_unlock(&table_lock);
381 	return (&table[i]);		/* Address of item is returned */
382 }
383 #endif
384 
385 #ifdef USCR
386 #ifdef DRIVER
387 static int
388 compare(a, b)
389 POINTER a;
390 POINTER b;
391 {
392     return (strcmp(a, b));
393 }
394 
395 int (* hcompar)() = compare;
396 #endif
397 #endif
398 
399 #ifdef CHAINED
400 #ifdef SORTUP
401 #define	STRCMP(A, B) (COMPARE((A), (B)) > 0)
402 #else
403 #ifdef SORTDOWN
404 #define	STRCMP(A, B) (COMPARE((A), (B)) < 0)
405 #else
406 #define	STRCMP(A, B) (COMPARE((A), (B)) != 0)
407 #endif
408 #endif
409 
410 ENTRY
411 *hsearch(item, action)	/* Chained search with sorted lists */
412 ENTRY item;		/* Item to be inserted or found */
413 ACTION action;		/* FIND or ENTER */
414 {
415 	NODE *p;		/* Searches through the linked list */
416 	NODE **q;		/* Where to store the pointer to a new NODE */
417 	unsigned int i;		/* Result of hash */
418 	int res;		/* Result of string comparison */
419 
420 	lmutex_lock(&table_lock);
421 	prcnt = 1;
422 
423 	i = HASH(item.key);	/* Table[i] contains list head */
424 
425 	if (table[i] == (NODE*)NULL) { /* List has not yet been begun */
426 		if (action == FIND)
427 			RETURN((ENTRY *) NULL);
428 		else
429 			RETURN(build(&table[i], (NODE *) NULL, item));
430 	} else {		/* List is not empty */
431 		q = &table[i];
432 		p = table[i];
433 		while (p != NULL && (res = STRCMP(item.key, p->item.key))) {
434 			prcnt++;
435 			q = &(p->next);
436 			p = p->next;
437 		}
438 
439 		if (p != NULL && res == 0)	/* Item has been found */
440 			RETURN(&(p->item));
441 		else {			/* Item is not yet on list */
442 			if (action == FIND)
443 				RETURN((ENTRY *) NULL);
444 			else
445 #ifdef START
446 				RETURN(build(&table[i], table[i], item));
447 #else
448 				RETURN(build(q, p, item));
449 #endif
450 		}
451 	}
452 }
453 
454 static ENTRY
455 *build(last, next, item)
456 NODE **last;		/* Where to store in last list item */
457 NODE *next;		/* Link to next list item */
458 ENTRY item;		/* Item to be kept in node */
459 {
460 	/*
461 	 * This is a locking vs malloc() violation.
462 	 * Fortunately, it is not actually compiled.
463 	 */
464 	NODE *p = (NODE *) malloc(sizeof (NODE));
465 
466 	if (p != NULL) {
467 		p->item = item;
468 		*last = p;
469 		p->next = next;
470 		return (&(p->item));
471 	} else
472 		return (NULL);
473 }
474 #endif
475 
476 #ifdef DIV
477 static unsigned int
478 hashd(key)		/* Division hashing scheme */
479 POINTER key;		/* Key to be hashed */
480 {
481     return (crunch(key) % length);
482 }
483 #else
484 #ifdef MULT
485 /*
486  *  NOTE: The following algorithm only works on machines where
487  *  the results of multiplying two integers is the least
488  *  significant part of the double word integer required to hold
489  *  the result.  It is adapted from Knuth, Volume 3, section 6.4.
490  */
491 
492 static unsigned int
493 hashm(POINTER key)	/* Multiplication hashing scheme */
494 	/* "key" to be hashed */
495 {
496 	return ((int)(((unsigned)(crunch(key) * FACTOR)) >> SHIFT));
497 }
498 
499 /*
500  * Secondary hashing, for use with multiplicitive hashing scheme.
501  * Adapted from Knuth, Volume 3, section 6.4.
502  */
503 
504 static unsigned int
505 hash2m(POINTER key)	/* Secondary hashing routine */
506 	/* "key" is the string to be hashed */
507 {
508     return (((unsigned int)((crunch(key) * FACTOR) << m) >> SHIFT) | 1);
509 }
510 #endif
511 #endif
512 
513 /* PJ Weinberger's hash function */
514 static unsigned int
515 crunch(POINTER key)	/* Convert multicharacter key to unsigned int */
516 {
517 	unsigned int h = 0;
518 	unsigned int g;
519 	unsigned char *p = (unsigned char *)key;
520 
521 	for (; *p; p++) {
522 		h = (h << 4) + *p;
523 		g = h & 0xf0000000;
524 		if (g != 0) {
525 			h ^= (g >> 24);
526 			h ^= g;
527 		}
528 	}
529 	return (h);
530 }
531 
532 #ifdef DRIVER
533 static void
534 hdump()			/* Dumps loc, data, probe count, key */
535 {
536 	unsigned int i;	/* Counts table slots */
537 #ifdef OPEN
538 	unsigned int sum = 0;	/* Counts probes */
539 #else
540 #ifdef CHAINED
541 	NODE *a;		/* Current Node on list */
542 #endif
543 #endif
544 
545 	for (i = 0; i < length; i++)
546 #ifdef OPEN
547 		if (table[i].key == NULL)
548 			printf("%o.\t-,\t-,\t(NULL)\n", i);
549 		else {
550 			unsigned int oldpr = prcnt;
551 				/* Save current probe count */
552 
553 			hsearch(table[i], FIND);
554 			sum += prcnt;
555 			printf("%o.\t%d,\t%d,\t%s\n", i,
556 			    *table[i].data, prcnt, table[i].key);
557 			prcnt = oldpr;
558 		}
559 	printf("Total probes = %d\n", sum);
560 #else
561 #ifdef CHAINED
562 	if (table[i] == NULL)
563 		printf("%o.\t-,\t-,\t(NULL)\n", i);
564 	else {
565 		printf("%o.", i);
566 		for (a = table[i]; a != NULL; a = a->next)
567 			printf("\t%d,\t%#0.4x,\t%s\n",
568 			    *a->item.data, a, a->item.key);
569 	}
570 #endif
571 #endif
572 }
573 #endif
574