1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 1996 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 /*LINTLIBRARY*/
33 /* Compile time switches:
34
35 MULT - use a multiplicative hashing function.
36 DIV - use the remainder mod table size as a hashing function.
37 CHAINED - use a linked list to resolve collisions.
38 OPEN - use open addressing to resolve collisions.
39 BRENT - use Brent's modification to improve the OPEN algorithm.
40 SORTUP - CHAINED list is sorted in increasing order.
41 SORTDOWN - CHAINED list is sorted in decreasing order.
42 START - CHAINED list with entries appended at front.
43 DRIVER - compile in a main program to drive the tests.
44 DEBUG - compile some debugging printout statements.
45 USCR - user supplied comparison routine.
46 */
47
48 #include <stdio.h>
49 #include <limits.h>
50 #include <malloc.h>
51 #include <string.h>
52
53 #define SUCCEED 0
54 #define FAIL 1
55 #define TRUE 1
56 #define FALSE 0
57 #define repeat for(;;)
58 #define until(A) if(A) break;
59
60 #ifdef OPEN
61 # undef CHAINED
62 #else
63 #ifndef CHAINED
64 # define OPEN
65 #endif
66 #endif
67
68 #ifdef MULT
69 # undef DIV
70 #else
71 #ifndef DIV
72 # define MULT
73 #endif
74 #endif
75
76 #ifdef START
77 # undef SORTUP
78 # undef SORTDOWN
79 #else
80 #ifdef SORTUP
81 # undef SORTDOWN
82 #endif
83 #endif
84
85 #ifdef USCR
86 # define COMPARE(A, B) (* hcompar)((A), (B))
87 extern int (* hcompar)();
88 #else
89 # define COMPARE(A, B) strcmp((A), (B))
90 #endif
91
92 #ifdef MULT
93 # define SHIFT ((bitsper * sizeof(int)) - m) /* Shift factor */
94 # define FACTOR 035761254233 /* Magic multiplication factor */
95 # define HASH hashm /* Multiplicative hash function */
96 # define HASH2 hash2m /* Secondary hash function */
97 static unsigned int bitsper; /* Bits per byte */
98 static unsigned int hashm();
99 static unsigned int hash2m();
100 #else
101 #ifdef DIV
102 # define HASH hashd /* Division hashing routine */
103 # define HASH2(A) 1 /* Secondary hash function */
104 static unsigned int hashd();
105 #endif
106 #endif
107
108 typedef enum {
109 FIND, /* Find, if present */
110 ENTER /* Find; enter if not present */
111 } ACTION;
112 typedef char *POINTER;
113 typedef struct entry { /* Hash table entry */
114 POINTER key;
115 POINTER data;
116 } ENTRY;
117
118 #ifdef CHAINED
119 typedef struct node { /* Part of the linked list of entries */
120 ENTRY item;
121 struct node *next;
122 } NODE;
123 typedef NODE *TABELEM;
124 static NODE **table; /* The address of the hash table */
125 static ENTRY *build();
126 #else
127 #ifdef OPEN
128 typedef ENTRY TABELEM; /* What the table contains (TABle ELEMents) */
129 static TABELEM *table; /* The address of the hash table */
130 static unsigned int count = 0; /* Number of entries in hash table */
131 #endif
132 #endif
133
134 static unsigned int length; /* Size of the hash table */
135 static unsigned int m; /* Log base 2 of length */
136 static unsigned int prcnt; /* Number of probes this item */
137
138 int hcreate();
139 void hdestroy();
140 ENTRY *hsearch();
141 static unsigned int crunch();
142
143 #ifdef DRIVER
144 static void hdump();
145
main()146 main()
147 {
148 char line[80]; /* Room for the input line */
149 int i = 0; /* Data generator */
150 ENTRY *res; /* Result of hsearch */
151 ENTRY *new; /* Test entry */
152
153 if(hcreate(5))
154 printf("Length = %u, m = %u\n", length, m);
155 else {
156 fprintf(stderr, "Out of core\n");
157 exit(FAIL);
158 }
159
160 repeat {
161 hdump();
162 printf("Enter a probe: ");
163 until (EOF == scanf("%s", line));
164 #ifdef DEBUG
165 printf("%s, ", line);
166 printf("division: %d, ", hashd(line));
167 printf("multiplication: %d\n", hashm(line));
168 #endif
169 new = (ENTRY *) malloc(sizeof(ENTRY));
170 if(new == NULL) {
171 fprintf(stderr, "Out of core \n");
172 exit(FAIL);
173 }
174 else {
175 new->key = malloc((unsigned) strlen(line) + 1);
176 if(new->key == NULL) {
177 fprintf(stderr, "Out of core \n");
178 exit(FAIL);
179 }
180 strcpy(new->key, line);
181 new->data = malloc(sizeof(int));
182 if(new->data == NULL) {
183 fprintf(stderr, "Out of core \n");
184 exit(FAIL);
185 }
186 *new->data = i++;
187 }
188 res = hsearch(*new, ENTER);
189 printf("The number of probes required was %d\n", prcnt);
190 if(res == (ENTRY *) 0)
191 printf("Table is full\n");
192 else {
193 printf("Success: ");
194 printf("Key = %s, Value = %d\n", res->key, *res->data);
195 }
196 }
197 exit(SUCCEED);
198 }
199 #endif
200
201 /*
202 * Create a hash table no smaller than size
203 *
204 * size: Minimum size for hash table
205 */
206 int
hcreate(int size)207 hcreate(int size)
208 {
209 unsigned int unsize; /* Holds the shifted size */
210
211 if(size <= 0)
212 return(FALSE);
213
214 unsize = size; /* +1 for empty table slot; -1 for ceiling */
215 length = 1; /* Maximum entries in tabbe */
216 m = 0; /* Log2 length */
217 while(unsize) {
218 unsize >>= 1;
219 length <<= 1;
220 m++;
221 }
222
223 table = (TABELEM *) calloc(length, sizeof(TABELEM));
224 return (table != NULL);
225 }
226
227 void
hdestroy(void)228 hdestroy(void) /* Reset the module to its initial state */
229 {
230 free((POINTER) table);
231 #ifdef OPEN
232 count = 0;
233 #endif
234 }
235
236 #ifdef OPEN
237 /* Hash search of a fixed-capacity table. Open addressing used to
238 resolve collisions. Algorithm modified from Knuth, Volume 3,
239 section 6.4, algorithm D. Labels flag corresponding actions.
240 */
241
242 /*
243 * Find or insert the item into the table
244 *
245 * item: Item to be inserted or found
246 * action: FIND or ENTER
247 */
248 ENTRY *
hsearch(ENTRY item,ACTION action)249 hsearch(ENTRY item, ACTION action)
250 {
251 unsigned int i; /* Insertion index */
252 unsigned int c; /* Secondary probe displacement */
253
254 prcnt = 1;
255
256 /* D1: */
257 i = HASH(item.key); /* Primary hash on key */
258 #ifdef DEBUG
259 if(action == ENTER)
260 printf("hash = %o\n", i);
261 #endif
262
263 /* D2: */
264 if(table[i].key == NULL) /* Empty slot? */
265 goto D6;
266 else if(COMPARE(table[i].key, item.key) == 0) /* Match? */
267 return(&table[i]);
268
269 /* D3: */
270 c = HASH2(item.key); /* No match => compute secondary hash */
271 #ifdef DEBUG
272 if(action == ENTER)
273 printf("hash2 = %o\n", c);
274 #endif
275
276 D4:
277 i = (i + c) % length; /* Advance to next slot */
278 prcnt++;
279
280 /* D5: */
281 if(table[i].key == NULL) /* Empty slot? */
282 goto D6;
283 else if(COMPARE(table[i].key, item.key) == 0) /* Match? */
284 return(&table[i]);
285 else
286 goto D4;
287
288 D6: if(action == FIND) /* Insert if requested */
289 return((ENTRY *) NULL);
290 if(count == (length - 1)) /* Table full? */
291 return((ENTRY *) 0);
292
293 #ifdef BRENT
294 /* Brent's variation of the open addressing algorithm. Do extra
295 work during insertion to speed retrieval. May require switching
296 of previously placed items. Adapted from Knuth, Volume 3, section
297 4.6 and Brent's article in CACM, volume 10, #2, February 1973.
298 */
299
300 { unsigned int p0 = HASH(item.key); /* First probe index */
301 unsigned int c0 = HASH2(item.key); /* Main branch increment */
302 unsigned int r = prcnt - 1; /* Current minimum distance */
303 unsigned int j; /* Counts along main branch */
304 unsigned int k; /* Counts along secondary branch */
305 unsigned int curj; /* Current best main branch site */
306 unsigned int curpos; /* Current best table index */
307 unsigned int pj; /* Main branch indices */
308 unsigned int cj; /* Secondary branch increment distance*/
309 unsigned int pjk; /* Secondary branch probe indices */
310
311 if(prcnt >= 3) {
312 for(j = 0; j < prcnt; j++) { /* Count along main branch */
313 pj = (p0 + j * c0) % length; /* New main branch index */
314 cj = HASH2(table[pj].key); /* Secondary branch incr. */
315 for(k=1; j+k <= r; k++) { /* Count on secondary branch*/
316 pjk = (pj + k * cj) % length; /* Secondary probe */
317 if(table[pjk].key == NULL) { /* Improvement found */
318 r = j + k; /* Decrement upper bound */
319 curj = pj; /* Save main probe index */
320 curpos = pjk; /* Save secondeary index */
321 }
322 }
323 }
324 if(r != prcnt - 1) { /* If an improvement occurred */
325 table[curpos] = table[curj]; /* Old key to new site */
326 #ifdef DEBUG
327 printf("Switch curpos = %o, curj = %o, oldi = %o\n",
328 curj, curpos, i);
329 #endif
330 i = curj;
331 }
332 }
333 }
334 #endif
335 count++; /* Increment table occupancy count */
336 table[i] = item; /* Save item */
337 return(&table[i]); /* Address of item is returned */
338 }
339 #endif
340
341 #ifdef USCR
342 # ifdef DRIVER
343 static int
compare(POINTER a,POINTER b)344 compare(POINTER a, POINTER b)
345 {
346 return (strcmp(a, b));
347 }
348
349 int (* hcompar)() = compare;
350 # endif
351 #endif
352
353 #ifdef CHAINED
354 # ifdef SORTUP
355 # define STRCMP(A, B) (COMPARE((A), (B)) > 0)
356 # else
357 # ifdef SORTDOWN
358 # define STRCMP(A, B) (COMPARE((A), (B)) < 0)
359 # else
360 # define STRCMP(A, B) (COMPARE((A), (B)) != 0)
361 # endif
362 # endif
363
364 /*
365 * Chained search with sorted lists
366 *
367 * item: Item to be inserted or found
368 * action: FIND or ENTER
369 */
370 ENTRY *
hsearch(ENTRY item,ACTION action)371 hsearch(ENTRY item, ACTION action)
372 {
373 NODE *p; /* Searches through the linked list */
374 NODE **q; /* Where to store the pointer to a new NODE */
375 unsigned int i; /* Result of hash */
376 int res; /* Result of string comparison */
377
378 prcnt = 1;
379
380 i = HASH(item.key); /* Table[i] contains list head */
381
382 if(table[i] == (NODE*)NULL) { /* List has not yet been begun */
383 if(action == FIND)
384 return((ENTRY *) NULL);
385 else
386 return(build(&table[i], (NODE *) NULL, item));
387 }
388 else { /* List is not empty */
389 q = &table[i];
390 p = table[i];
391 while(p != NULL && (res = STRCMP(item.key, p->item.key))) {
392 prcnt++;
393 q = &(p->next);
394 p = p->next;
395 }
396
397 if(p != NULL && res == 0) /* Item has been found */
398 return(&(p->item));
399 else { /* Item is not yet on list */
400 if(action == FIND)
401 return((ENTRY *) NULL);
402 else
403 #ifdef START
404 return(build(&table[i], table[i], item));
405 #else
406 return(build(q, p, item));
407 #endif
408 }
409 }
410 }
411
412 /*
413 * last: Where to store in last list item
414 * next: Link to next list item
415 * item: Item to be kept in node
416 */
417 static ENTRY *
build(NODE ** last,NODE * next,ENTRY item)418 build(NODE **last, NODE *next, ENTRY item)
419 {
420 NODE *p = (NODE *) malloc(sizeof(NODE));
421
422 if(p != NULL) {
423 p->item = item;
424 *last = p;
425 p->next = next;
426 return(&(p->item));
427 }
428 else
429 return(NULL);
430 }
431 #endif
432
433 #ifdef DIV
434 /*
435 * Division hashing scheme
436 *
437 * key: Key to be hashed
438 */
439 static unsigned int
hashd(POINTER key)440 hashd(POINTER key)
441 {
442 return (crunch(key) % length);
443 }
444 #else
445 #ifdef MULT
446 /*
447 * NOTE: The following algorithm only works on machines where
448 * the results of multiplying two integers is the least
449 * significant part of the double word integer required to hold
450 * the result. It is adapted from Knuth, Volume 3, section 6.4.
451 */
452
453 /*
454 * Multiplication hashing scheme
455 *
456 * key: Key to be hashed
457 */
458 static unsigned int
hashm(POINTER key)459 hashm(POINTER key)
460 {
461 static int first = TRUE; /* TRUE on the first call only */
462
463 if(first) { /* Compute the number of bits in a byte */
464 unsigned char c = UCHAR_MAX; /* A byte full of 1's */
465 bitsper = 0;
466 while(c) { /* Shift until no more 1's */
467 c >>= 1;
468 bitsper++; /* Count number of shifts */
469 }
470 first = FALSE;
471 }
472 return ((int) (((unsigned) (crunch(key) * FACTOR)) >> SHIFT));
473 }
474
475 /*
476 * Secondary hashing, for use with multiplicitive hashing scheme.
477 * Adapted from Knuth, Volume 3, section 6.4.
478 */
479
480 /*
481 * Secondary hashing routine
482 *
483 * key: String to be hashed
484 */
485 static unsigned int
hash2m(POINTER key)486 hash2m(POINTER key)
487 {
488 return ((int) (((unsigned) ((crunch(key) * FACTOR) << m) >> SHIFT) | 1));
489 }
490 #endif
491 #endif
492
493 /* Convert multicharacter key to unsigned int */
494 static unsigned int
crunch(POINTER key)495 crunch(POINTER key)
496 {
497 unsigned int sum = 0; /* Results */
498 int s; /* Length of the key */
499
500 for(s = 0; *key; s++) /* Simply add up the bytes */
501 sum += *key++;
502
503 return (sum + s);
504 }
505
506 #ifdef DRIVER
507 static void
hdump()508 hdump() /* Dumps loc, data, probe count, key */
509 {
510 unsigned int i; /* Counts table slots */
511 #ifdef OPEN
512 unsigned int sum = 0; /* Counts probes */
513 #else
514 #ifdef CHAINED
515 NODE *a; /* Current Node on list */
516 #endif
517 #endif
518
519 for(i = 0; i < length; i++)
520 #ifdef OPEN
521 if(table[i].key == NULL)
522 printf("%o.\t-,\t-,\t(NULL)\n", i);
523 else {
524 unsigned int oldpr = prcnt; /* Save current probe count */
525 hsearch(table[i], FIND);
526 sum += prcnt;
527 printf("%o.\t%d,\t%d,\t%s\n", i,
528 *table[i].data, prcnt, table[i].key);
529 prcnt = oldpr;
530 }
531 printf("Total probes = %d\n", sum);
532 #else
533 #ifdef CHAINED
534 if(table[i] == NULL)
535 printf("%o.\t-,\t-,\t(NULL)\n", i);
536 else {
537 printf("%o.", i);
538 for(a = table[i]; a != NULL; a = a->next)
539 printf("\t%d,\t%#0.4x,\t%s\n",
540 *a->item.data, a, a->item.key);
541 }
542 #endif
543 #endif
544 }
545 #endif
546