xref: /titanic_52/usr/src/lib/libbc/libc/gen/common/hsearch.c (revision 6185db853e024a486ff8837e6784dd290d866112)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1996 Sun Microsystems, Inc. All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*LINTLIBRARY*/
33 /* Compile time switches:
34 
35    MULT - use a multiplicative hashing function.
36    DIV - use the remainder mod table size as a hashing function.
37    CHAINED - use a linked list to resolve collisions.
38    OPEN - use open addressing to resolve collisions.
39    BRENT - use Brent's modification to improve the OPEN algorithm.
40    SORTUP - CHAINED list is sorted in increasing order.
41    SORTDOWN - CHAINED list is sorted in decreasing order.
42    START - CHAINED list with entries appended at front.
43    DRIVER - compile in a main program to drive the tests.
44    DEBUG - compile some debugging printout statements.
45    USCR - user supplied comparison routine.
46 */
47 
48 #include <stdio.h>
49 #include <limits.h>
50 #include <malloc.h>
51 #include <string.h>
52 
53 #define SUCCEED		0
54 #define FAIL		1
55 #define TRUE		1
56 #define FALSE		0
57 #define repeat		for(;;)
58 #define until(A)	if(A) break;
59 
60 #ifdef OPEN
61 #    undef CHAINED
62 #else
63 #ifndef CHAINED
64 #    define OPEN
65 #endif
66 #endif
67 
68 #ifdef MULT
69 #    undef DIV
70 #else
71 #ifndef DIV
72 #    define MULT
73 #endif
74 #endif
75 
76 #ifdef START
77 #    undef SORTUP
78 #    undef SORTDOWN
79 #else
80 #ifdef SORTUP
81 #    undef SORTDOWN
82 #endif
83 #endif
84 
85 #ifdef USCR
86 #    define COMPARE(A, B) (* hcompar)((A), (B))
87      extern int (* hcompar)();
88 #else
89 #    define COMPARE(A, B) strcmp((A), (B))
90 #endif
91 
92 #ifdef MULT
93 #    define SHIFT ((bitsper * sizeof(int)) - m) /* Shift factor */
94 #    define FACTOR 035761254233	/* Magic multiplication factor */
95 #    define HASH hashm		/* Multiplicative hash function */
96 #    define HASH2 hash2m	/* Secondary hash function */
97 static unsigned int bitsper;	/* Bits per byte */
98 static unsigned int hashm();
99 static unsigned int hash2m();
100 #else
101 #ifdef DIV
102 #    define HASH hashd		/* Division hashing routine */
103 #    define HASH2(A) 1		/* Secondary hash function */
104 static unsigned int hashd();
105 #endif
106 #endif
107 
108 typedef enum {
109     FIND,		/* Find, if present */
110     ENTER		/* Find; enter if not present */
111 } ACTION;
112 typedef char *POINTER;
113 typedef struct entry {	/* Hash table entry */
114     POINTER key;
115     POINTER data;
116 } ENTRY;
117 
118 #ifdef CHAINED
119 typedef struct node {	/* Part of the linked list of entries */
120 	ENTRY item;
121 	struct node *next;
122 } NODE;
123 typedef NODE *TABELEM;
124 static NODE **table;	/* The address of the hash table */
125 static ENTRY *build();
126 #else
127 #ifdef OPEN
128 typedef ENTRY TABELEM;	/* What the table contains (TABle ELEMents) */
129 static TABELEM *table;	/* The address of the hash table */
130 static unsigned int count = 0;	/* Number of entries in hash table */
131 #endif
132 #endif
133 
134 static unsigned int length;	/* Size of the hash table */
135 static unsigned int m;		/* Log base 2 of length */
136 static unsigned int prcnt;	/* Number of probes this item */
137 
138 int hcreate();
139 void hdestroy();
140 ENTRY *hsearch();
141 static unsigned int crunch();
142 
143 #ifdef DRIVER
144 static void hdump();
145 
146 main()
147 {
148     char line[80];	/* Room for the input line */
149     int i = 0;		/* Data generator */
150     ENTRY *res;		/* Result of hsearch */
151     ENTRY *new;		/* Test entry */
152 
153     if(hcreate(5))
154 	printf("Length = %u, m = %u\n", length, m);
155     else {
156 	fprintf(stderr, "Out of core\n");
157 	exit(FAIL);
158     }
159 
160     repeat {
161 	hdump();
162 	printf("Enter a probe: ");
163 	until (EOF == scanf("%s", line));
164 #ifdef DEBUG
165 	printf("%s, ", line);
166 	printf("division: %d, ", hashd(line));
167 	printf("multiplication: %d\n", hashm(line));
168 #endif
169 	new = (ENTRY *) malloc(sizeof(ENTRY));
170 	if(new == NULL) {
171 	    fprintf(stderr, "Out of core \n");
172 	    exit(FAIL);
173 	}
174 	else {
175 	    new->key = malloc((unsigned) strlen(line) + 1);
176 	    if(new->key == NULL) {
177 		fprintf(stderr, "Out of core \n");
178 		exit(FAIL);
179 	    }
180 	    strcpy(new->key, line);
181 	    new->data = malloc(sizeof(int));
182 	    if(new->data == NULL) {
183 		fprintf(stderr, "Out of core \n");
184 		exit(FAIL);
185 	    }
186 	    *new->data = i++;
187 	}
188 	res = hsearch(*new, ENTER);
189 	printf("The number of probes required was %d\n", prcnt);
190 	if(res == (ENTRY *) 0)
191 	    printf("Table is full\n");
192 	else {
193 	    printf("Success: ");
194 	    printf("Key = %s, Value = %d\n", res->key, *res->data);
195 	}
196     }
197     exit(SUCCEED);
198 }
199 #endif
200 
201 /*
202  * Create a hash table no smaller than size
203  *
204  *	size:	Minimum size for hash table
205  */
206 int
207 hcreate(int size)
208 {
209     unsigned int unsize;	/* Holds the shifted size */
210 
211     if(size <= 0)
212 	return(FALSE);
213 
214     unsize = size;	/* +1 for empty table slot; -1 for ceiling */
215     length = 1;		/* Maximum entries in tabbe */
216     m = 0;		/* Log2 length */
217     while(unsize) {
218 	unsize >>= 1;
219 	length <<= 1;
220 	m++;
221     }
222 
223     table = (TABELEM *) calloc(length, sizeof(TABELEM));
224     return (table != NULL);
225 }
226 
227 void
228 hdestroy(void)	/* Reset the module to its initial state */
229 {
230     free((POINTER) table);
231 #ifdef OPEN
232     count = 0;
233 #endif
234 }
235 
236 #ifdef OPEN
237 /* Hash search of a fixed-capacity table.  Open addressing used to
238    resolve collisions.  Algorithm modified from Knuth, Volume 3,
239    section 6.4, algorithm D.  Labels flag corresponding actions.
240 */
241 
242 /*
243  * Find or insert the item into the table
244  *
245  *	item:	Item to be inserted or found
246  *	action:	FIND or ENTER
247  */
248 ENTRY *
249 hsearch(ENTRY item, ACTION action)
250 {
251     unsigned int i;	/* Insertion index */
252     unsigned int c;	/* Secondary probe displacement */
253 
254     prcnt = 1;
255 
256 /* D1: */
257     i = HASH(item.key);	/* Primary hash on key */
258 #ifdef DEBUG
259     if(action == ENTER)
260 	printf("hash = %o\n", i);
261 #endif
262 
263 /* D2: */
264     if(table[i].key == NULL)	/* Empty slot? */
265 	goto D6;
266     else if(COMPARE(table[i].key, item.key) == 0)	/* Match? */
267 	return(&table[i]);
268 
269 /* D3: */
270     c = HASH2(item.key);	/* No match => compute secondary hash */
271 #ifdef DEBUG
272     if(action == ENTER)
273 	printf("hash2 = %o\n", c);
274 #endif
275 
276 D4:
277     i = (i + c) % length;	/* Advance to next slot */
278     prcnt++;
279 
280 /* D5: */
281     if(table[i].key == NULL)	/* Empty slot? */
282 	goto D6;
283     else if(COMPARE(table[i].key, item.key) == 0)	/* Match? */
284 	return(&table[i]);
285     else
286 	goto D4;
287 
288 D6: if(action == FIND)		/* Insert if requested */
289 	return((ENTRY *) NULL);
290     if(count == (length - 1))	/* Table full? */
291 	return((ENTRY *) 0);
292 
293 #ifdef BRENT
294 /* Brent's variation of the open addressing algorithm.  Do extra
295    work during insertion to speed retrieval.  May require switching
296    of previously placed items.  Adapted from Knuth, Volume 3, section
297    4.6 and Brent's article in CACM, volume 10, #2, February 1973.
298 */
299 
300     {   unsigned int p0 = HASH(item.key);   /* First probe index */
301 	unsigned int c0 = HASH2(item.key);  /* Main branch increment */
302 	unsigned int r = prcnt - 1; /* Current minimum distance */
303 	unsigned int j;         /* Counts along main branch */
304 	unsigned int k;         /* Counts along secondary branch */
305 	unsigned int curj;      /* Current best main branch site */
306 	unsigned int curpos;    /* Current best table index */
307 	unsigned int pj;        /* Main branch indices */
308 	unsigned int cj;        /* Secondary branch increment distance*/
309 	unsigned int pjk;       /* Secondary branch probe indices */
310 
311 	if(prcnt >= 3) {
312 	    for(j = 0; j < prcnt; j++) {   /* Count along main branch */
313 		pj = (p0 + j * c0) % length; /* New main branch index */
314 		cj = HASH2(table[pj].key); /* Secondary branch incr. */
315 		for(k=1; j+k <= r; k++) { /* Count on secondary branch*/
316 		    pjk = (pj + k * cj) % length; /* Secondary probe */
317 		    if(table[pjk].key == NULL) { /* Improvement found */
318 		        r = j + k;	/* Decrement upper bound */
319 		        curj = pj;	/* Save main probe index */
320 		        curpos = pjk;	/* Save secondeary index */
321 		    }
322 		}
323 	    }
324 	    if(r != prcnt - 1) {       /* If an improvement occurred */
325 		table[curpos] = table[curj]; /* Old key to new site */
326 #ifdef DEBUG
327 		printf("Switch curpos = %o, curj = %o, oldi = %o\n",
328 		    curj, curpos, i);
329 #endif
330 		i = curj;
331 	    }
332 	}
333     }
334 #endif
335     count++;			/* Increment table occupancy count */
336     table[i] = item;		/* Save item */
337     return(&table[i]);		/* Address of item is returned */
338 }
339 #endif
340 
341 #ifdef USCR
342 #    ifdef DRIVER
343 static int
344 compare(POINTER a, POINTER b)
345 {
346     return (strcmp(a, b));
347 }
348 
349 int (* hcompar)() = compare;
350 #    endif
351 #endif
352 
353 #ifdef CHAINED
354 #    ifdef SORTUP
355 #        define STRCMP(A, B) (COMPARE((A), (B)) > 0)
356 #    else
357 #    ifdef SORTDOWN
358 #        define STRCMP(A, B) (COMPARE((A), (B)) < 0)
359 #    else
360 #        define STRCMP(A, B) (COMPARE((A), (B)) != 0)
361 #    endif
362 #    endif
363 
364 /*
365  * Chained search with sorted lists
366  *
367  *	item:	Item to be inserted or found
368  *	action: FIND or ENTER
369  */
370 ENTRY *
371 hsearch(ENTRY item, ACTION action)
372 {
373     NODE *p;		/* Searches through the linked list */
374     NODE **q;		/* Where to store the pointer to a new NODE */
375     unsigned int i;	/* Result of hash */
376     int res;		/* Result of string comparison */
377 
378     prcnt = 1;
379 
380     i = HASH(item.key);	/* Table[i] contains list head */
381 
382     if(table[i] == (NODE*)NULL) { /* List has not yet been begun */
383 	if(action == FIND)
384 	    return((ENTRY *) NULL);
385 	else
386 	    return(build(&table[i], (NODE *) NULL, item));
387     }
388     else {			/* List is not empty */
389 	q = &table[i];
390 	p = table[i];
391 	while(p != NULL && (res = STRCMP(item.key, p->item.key))) {
392 	    prcnt++;
393 	    q = &(p->next);
394 	    p = p->next;
395 	}
396 
397 	if(p != NULL && res == 0)	/* Item has been found */
398 	    return(&(p->item));
399 	else {			/* Item is not yet on list */
400 	    if(action == FIND)
401 		return((ENTRY *) NULL);
402 	    else
403 #ifdef START
404 		return(build(&table[i], table[i], item));
405 #else
406 		return(build(q, p, item));
407 #endif
408 	}
409     }
410 }
411 
412 /*
413  *	last:		Where to store in last list item
414  *	next:		Link to next list item
415  *	item:		Item to be kept in node
416  */
417 static ENTRY *
418 build(NODE **last, NODE *next, ENTRY item)
419 {
420     NODE *p = (NODE *) malloc(sizeof(NODE));
421 
422     if(p != NULL) {
423 	p->item = item;
424 	*last = p;
425 	p->next = next;
426 	return(&(p->item));
427     }
428     else
429 	return(NULL);
430 }
431 #endif
432 
433 #ifdef DIV
434 /*
435  * Division hashing scheme
436  *
437  *	key:	Key to be hashed
438  */
439 static unsigned int
440 hashd(POINTER key)
441 {
442     return (crunch(key) % length);
443 }
444 #else
445 #ifdef MULT
446 /*
447  *    NOTE: The following algorithm only works on machines where
448  *    the results of multiplying two integers is the least
449  *    significant part of the double word integer required to hold
450  *    the result.  It is adapted from Knuth, Volume 3, section 6.4.
451  */
452 
453 /*
454  * Multiplication hashing scheme
455  *
456  *	key:	Key to be hashed
457  */
458 static unsigned int
459 hashm(POINTER key)
460 {
461     static int first = TRUE;	/* TRUE on the first call only */
462 
463     if(first) {		/* Compute the number of bits in a byte */
464 	unsigned char c = UCHAR_MAX;	/* A byte full of 1's */
465 	bitsper = 0;
466 	while(c) {		/* Shift until no more 1's */
467 	    c >>= 1;
468 	    bitsper++;		/* Count number of shifts */
469 	}
470 	first = FALSE;
471     }
472     return ((int) (((unsigned) (crunch(key) * FACTOR)) >> SHIFT));
473 }
474 
475 /*
476  * Secondary hashing, for use with multiplicitive hashing scheme.
477  * Adapted from Knuth, Volume 3, section 6.4.
478  */
479 
480 /*
481  * Secondary hashing routine
482  *
483  *	key:	String to be hashed
484  */
485 static unsigned int
486 hash2m(POINTER key)
487 {
488     return ((int) (((unsigned) ((crunch(key) * FACTOR) << m) >> SHIFT) | 1));
489 }
490 #endif
491 #endif
492 
493 /* Convert multicharacter key to unsigned int */
494 static unsigned int
495 crunch(POINTER key)
496 {
497     unsigned int sum = 0;	/* Results */
498     int s;			/* Length of the key */
499 
500     for(s = 0; *key; s++)	/* Simply add up the bytes */
501 	sum += *key++;
502 
503     return (sum + s);
504 }
505 
506 #ifdef DRIVER
507 static void
508 hdump()			/* Dumps loc, data, probe count, key */
509 {
510     unsigned int i;	/* Counts table slots */
511 #ifdef OPEN
512     unsigned int sum = 0;	/* Counts probes */
513 #else
514 #ifdef CHAINED
515     NODE *a;		/* Current Node on list */
516 #endif
517 #endif
518 
519     for(i = 0; i < length; i++)
520 #ifdef OPEN
521 	if(table[i].key == NULL)
522 	    printf("%o.\t-,\t-,\t(NULL)\n", i);
523 	else {
524 	    unsigned int oldpr = prcnt; /* Save current probe count */
525 	    hsearch(table[i], FIND);
526 	    sum += prcnt;
527 	    printf("%o.\t%d,\t%d,\t%s\n", i,
528 		*table[i].data, prcnt, table[i].key);
529 	    prcnt = oldpr;
530 	}
531     printf("Total probes = %d\n", sum);
532 #else
533 #ifdef CHAINED
534 	if(table[i] == NULL)
535 	    printf("%o.\t-,\t-,\t(NULL)\n", i);
536 	else {
537 	    printf("%o.", i);
538 	    for(a = table[i]; a != NULL; a = a->next)
539 		printf("\t%d,\t%#0.4x,\t%s\n",
540 		    *a->item.data, a, a->item.key);
541 	}
542 #endif
543 #endif
544 }
545 #endif
546