/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ /* * Compile time switches: * * MULT - use a multiplicative hashing function. * DIV - use the remainder mod table size as a hashing function. * CHAINED - use a linked list to resolve collisions. * OPEN - use open addressing to resolve collisions. * BRENT - use Brent's modification to improve the OPEN algorithm. * SORTUP - CHAINED list is sorted in increasing order. * SORTDOWN - CHAINED list is sorted in decreasing order. * START - CHAINED list with entries appended at front. * DRIVER - compile in a main program to drive the tests. * DEBUG - compile some debugging printout statements. * USCR - user supplied comparison routine. */ #pragma weak hcreate = _hcreate #pragma weak hdestroy = _hdestroy #pragma weak hsearch = _hsearch #include "synonyms.h" #include #include #include #include #include #include #include #include typedef char *POINTER; #define SUCCEED 0 #define FAIL 1 #define TRUE 1 #define FALSE 0 #define repeat for (;;) #define until(A) if (A) break; #ifdef OPEN #undef CHAINED #else #ifndef CHAINED #define OPEN #endif #endif #ifdef MULT #undef DIV #else #ifndef DIV #define MULT #endif #endif #ifdef START #undef SORTUP #undef SORTDOWN #else #ifdef SORTUP #undef SORTDOWN #endif #endif #ifdef USCR #define COMPARE(A, B) (* hcompar)((A), (B)) extern int (* hcompar)(); #else #define COMPARE(A, B) strcmp((A), (B)) #endif #ifdef MULT #define SHIFT ((CHAR_BIT * sizeof (int)) - m) /* Shift factor */ #define FACTOR 035761254233 /* Magic multiplication factor */ #define HASH hashm /* Multiplicative hash function */ #define HASH2 hash2m /* Secondary hash function */ static unsigned int hashm(POINTER); static unsigned int hash2m(POINTER); #else #ifdef DIV #define HASH hashd /* Division hashing routine */ #define HASH2(A) 1 /* Secondary hash function */ static unsigned int hashd(); #endif #endif #ifdef CHAINED typedef struct node { /* Part of the linked list of entries */ ENTRY item; struct node *next; } NODE; typedef NODE *TABELEM; static NODE **table; /* The address of the hash table */ static ENTRY *build(); #else #ifdef OPEN typedef ENTRY TABELEM; /* What the table contains (TABle ELEMents) */ static TABELEM *table; /* The address of the hash table */ static unsigned int count = 0; /* Number of entries in hash table */ #endif #endif static unsigned int length; /* Size of the hash table */ static unsigned int m; /* Log base 2 of length */ static unsigned int prcnt; /* Number of probes this item */ static mutex_t table_lock = DEFAULTMUTEX; #define RETURN(n) { lmutex_unlock(&table_lock); return (n); } /* * forward declarations */ static unsigned int crunch(POINTER); #ifdef DRIVER static void hdump(); main() { char line[80]; /* Room for the input line */ int i = 0; /* Data generator */ ENTRY *res; /* Result of hsearch */ ENTRY *new; /* Test entry */ start: if (hcreate(5)) printf("Length = %u, m = %u\n", length, m); else { fprintf(stderr, "Out of core\n"); exit(FAIL); } repeat { hdump(); printf("Enter a probe: "); until(EOF == scanf("%s", line) || strcmp(line, "quit") == 0); #ifdef DEBUG printf("%s, ", line); printf("division: %d, ", hashd(line)); printf("multiplication: %d\n", hashm(line)); #endif new = (ENTRY *) malloc(sizeof (ENTRY)); if (new == NULL) { fprintf(stderr, "Out of core \n"); exit(FAIL); } else { new->key = malloc((unsigned)strlen(line) + 1); if (new->key == NULL) { fprintf(stderr, "Out of core \n"); exit(FAIL); } strcpy(new->key, line); new->data = malloc(sizeof (int)); if (new->data == NULL) { fprintf(stderr, "Out of core \n"); exit(FAIL); } *new->data = i++; } res = hsearch(*new, ENTER); printf("The number of probes required was %d\n", prcnt); if (res == (ENTRY *) 0) printf("Table is full\n"); else { printf("Success: "); printf("Key = %s, Value = %d\n", res->key, *res->data); } } printf("Do you wish to start another hash table (yes/no?)"); if (EOF == scanf("%s", line) || strcmp(line, "no") == 0) exit(SUCCEED); hdestroy(); goto start; } #endif int hcreate(size_t size) /* Create a hash table no smaller than size */ /* Minimum "size" for hash table */ { size_t unsize; /* Holds the shifted size */ TABELEM *local_table; TABELEM *old_table; unsigned int local_length; unsigned int local_m; if (size == 0) return (FALSE); unsize = size; /* +1 for empty table slot; -1 for ceiling */ local_length = 1; /* Maximum entries in table */ local_m = 0; /* Log2 length */ while (unsize) { unsize >>= 1; local_length <<= 1; local_m++; } local_table = (TABELEM *) calloc(local_length, sizeof (TABELEM)); lmutex_lock(&table_lock); old_table = table; table = local_table; length = local_length; m = local_m; lmutex_unlock(&table_lock); if (old_table != NULL) free(old_table); return (local_table != NULL); } void hdestroy(void) /* Reset the module to its initial state */ { POINTER local_table; lmutex_lock(&table_lock); #ifdef CHAINED int i; NODE *p, *oldp; for (i = 0; i < length; i++) { if (table[i] != (NODE *)NULL) { p = table[i]; while (p != (NODE *)NULL) { oldp = p; p = p -> next; /* * This is a locking vs malloc() violation. * Fortunately, it is not actually compiled. */ free(oldp); } } } #endif local_table = (POINTER)table; table = 0; #ifdef OPEN count = 0; #endif lmutex_unlock(&table_lock); free(local_table); } #ifdef OPEN /* * Hash search of a fixed-capacity table. Open addressing used to * resolve collisions. Algorithm modified from Knuth, Volume 3, * section 6.4, algorithm D. Labels flag corresponding actions. */ /* Find or insert the item into the table */ ENTRY *hsearch(ENTRY item, ACTION action) /* "item" to be inserted or found */ /* action: FIND or ENTER */ { unsigned int i; /* Insertion index */ unsigned int c; /* Secondary probe displacement */ lmutex_lock(&table_lock); prcnt = 1; /* D1: */ i = HASH(item.key); /* Primary hash on key */ #ifdef DEBUG if (action == ENTER) printf("hash = %o\n", i); #endif /* D2: */ if (table[i].key == NULL) /* Empty slot? */ goto D6; else if (COMPARE(table[i].key, item.key) == 0) /* Match? */ RETURN(&table[i]); /* D3: */ c = HASH2(item.key); /* No match => compute secondary hash */ #ifdef DEBUG if (action == ENTER) printf("hash2 = %o\n", c); #endif D4: i = (i + c) % length; /* Advance to next slot */ prcnt++; /* D5: */ if (table[i].key == NULL) /* Empty slot? */ goto D6; else if (COMPARE(table[i].key, item.key) == 0) /* Match? */ RETURN(&table[i]) else goto D4; D6: if (action == FIND) /* Insert if requested */ RETURN((ENTRY *) NULL); if (count == (length - 1)) /* Table full? */ RETURN((ENTRY *) 0); #ifdef BRENT /* * Brent's variation of the open addressing algorithm. Do extra * work during insertion to speed retrieval. May require switching * of previously placed items. Adapted from Knuth, Volume 3, section * 4.6 and Brent's article in CACM, volume 10, #2, February 1973. */ { unsigned int p0 = HASH(item.key); /* First probe index */ unsigned int c0 = HASH2(item.key); /* Main branch increment */ unsigned int r = prcnt - 1; /* Current minimum distance */ unsigned int j; /* Counts along main branch */ unsigned int k; /* Counts along secondary branch */ unsigned int curj; /* Current best main branch site */ unsigned int curpos; /* Current best table index */ unsigned int pj; /* Main branch indices */ unsigned int cj; /* Secondary branch increment distance */ unsigned int pjk; /* Secondary branch probe indices */ if (prcnt >= 3) { for (j = 0; j < prcnt; j++) { /* Count along main branch */ pj = (p0 + j * c0) % length; /* New main branch index */ cj = HASH2(table[pj].key); /* Secondary branch incr. */ for (k = 1; j+k <= r; k++) { /* Count on secondary branch */ pjk = (pj + k * cj) % length; /* Secondary probe */ if (table[pjk].key == NULL) { /* Improvement found */ r = j + k; /* Decrement upper bound */ curj = pj; /* Save main probe index */ curpos = pjk; /* Save secondeary index */ } } } if (r != prcnt - 1) { /* If an improvement occurred */ table[curpos] = table[curj]; /* Old key to new site */ #ifdef DEBUG printf("Switch curpos = %o, curj = %o, oldi = %o\n", curj, curpos, i); #endif i = curj; } } } #endif count++; /* Increment table occupancy count */ table[i] = item; /* Save item */ lmutex_unlock(&table_lock); return (&table[i]); /* Address of item is returned */ } #endif #ifdef USCR #ifdef DRIVER static int compare(a, b) POINTER a; POINTER b; { return (strcmp(a, b)); } int (* hcompar)() = compare; #endif #endif #ifdef CHAINED #ifdef SORTUP #define STRCMP(A, B) (COMPARE((A), (B)) > 0) #else #ifdef SORTDOWN #define STRCMP(A, B) (COMPARE((A), (B)) < 0) #else #define STRCMP(A, B) (COMPARE((A), (B)) != 0) #endif #endif ENTRY *hsearch(item, action) /* Chained search with sorted lists */ ENTRY item; /* Item to be inserted or found */ ACTION action; /* FIND or ENTER */ { NODE *p; /* Searches through the linked list */ NODE **q; /* Where to store the pointer to a new NODE */ unsigned int i; /* Result of hash */ int res; /* Result of string comparison */ lmutex_lock(&table_lock); prcnt = 1; i = HASH(item.key); /* Table[i] contains list head */ if (table[i] == (NODE*)NULL) { /* List has not yet been begun */ if (action == FIND) RETURN((ENTRY *) NULL); else RETURN(build(&table[i], (NODE *) NULL, item)); } else { /* List is not empty */ q = &table[i]; p = table[i]; while (p != NULL && (res = STRCMP(item.key, p->item.key))) { prcnt++; q = &(p->next); p = p->next; } if (p != NULL && res == 0) /* Item has been found */ RETURN(&(p->item)); else { /* Item is not yet on list */ if (action == FIND) RETURN((ENTRY *) NULL); else #ifdef START RETURN(build(&table[i], table[i], item)); #else RETURN(build(q, p, item)); #endif } } } static ENTRY *build(last, next, item) NODE **last; /* Where to store in last list item */ NODE *next; /* Link to next list item */ ENTRY item; /* Item to be kept in node */ { /* * This is a locking vs malloc() violation. * Fortunately, it is not actually compiled. */ NODE *p = (NODE *) malloc(sizeof (NODE)); if (p != NULL) { p->item = item; *last = p; p->next = next; return (&(p->item)); } else return (NULL); } #endif #ifdef DIV static unsigned int hashd(key) /* Division hashing scheme */ POINTER key; /* Key to be hashed */ { return (crunch(key) % length); } #else #ifdef MULT /* * NOTE: The following algorithm only works on machines where * the results of multiplying two integers is the least * significant part of the double word integer required to hold * the result. It is adapted from Knuth, Volume 3, section 6.4. */ static unsigned int hashm(POINTER key) /* Multiplication hashing scheme */ /* "key" to be hashed */ { return ((int)(((unsigned)(crunch(key) * FACTOR)) >> SHIFT)); } /* * Secondary hashing, for use with multiplicitive hashing scheme. * Adapted from Knuth, Volume 3, section 6.4. */ static unsigned int hash2m(POINTER key) /* Secondary hashing routine */ /* "key" is the string to be hashed */ { return (((unsigned int)((crunch(key) * FACTOR) << m) >> SHIFT) | 1); } #endif #endif /* PJ Weinberger's hash function */ static unsigned int crunch(POINTER key) /* Convert multicharacter key to unsigned int */ { unsigned int h = 0; unsigned int g; unsigned char *p = (unsigned char *)key; for (; *p; p++) { h = (h << 4) + *p; g = h & 0xf0000000; if (g != 0) { h ^= (g >> 24); h ^= g; } } return (h); } #ifdef DRIVER static void hdump() /* Dumps loc, data, probe count, key */ { unsigned int i; /* Counts table slots */ #ifdef OPEN unsigned int sum = 0; /* Counts probes */ #else #ifdef CHAINED NODE *a; /* Current Node on list */ #endif #endif for (i = 0; i < length; i++) #ifdef OPEN if (table[i].key == NULL) printf("%o.\t-,\t-,\t(NULL)\n", i); else { unsigned int oldpr = prcnt; /* Save current probe count */ hsearch(table[i], FIND); sum += prcnt; printf("%o.\t%d,\t%d,\t%s\n", i, *table[i].data, prcnt, table[i].key); prcnt = oldpr; } printf("Total probes = %d\n", sum); #else #ifdef CHAINED if (table[i] == NULL) printf("%o.\t-,\t-,\t(NULL)\n", i); else { printf("%o.", i); for (a = table[i]; a != NULL; a = a->next) printf("\t%d,\t%#0.4x,\t%s\n", *a->item.data, a, a->item.key); } #endif #endif } #endif