144099b7bSPaul Traina /************************************************************************ 244099b7bSPaul Traina Copyright 1988, 1991 by Carnegie Mellon University 344099b7bSPaul Traina 444099b7bSPaul Traina All Rights Reserved 544099b7bSPaul Traina 644099b7bSPaul Traina Permission to use, copy, modify, and distribute this software and its 744099b7bSPaul Traina documentation for any purpose and without fee is hereby granted, provided 844099b7bSPaul Traina that the above copyright notice appear in all copies and that both that 944099b7bSPaul Traina copyright notice and this permission notice appear in supporting 1044099b7bSPaul Traina documentation, and that the name of Carnegie Mellon University not be used 1144099b7bSPaul Traina in advertising or publicity pertaining to distribution of the software 1244099b7bSPaul Traina without specific, written prior permission. 1344099b7bSPaul Traina 1444099b7bSPaul Traina CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 1544099b7bSPaul Traina SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 1644099b7bSPaul Traina IN NO EVENT SHALL CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL 1744099b7bSPaul Traina DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 1844099b7bSPaul Traina PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 1944099b7bSPaul Traina ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 2044099b7bSPaul Traina SOFTWARE. 2144099b7bSPaul Traina ************************************************************************/ 2244099b7bSPaul Traina 2344099b7bSPaul Traina #ifndef lint 2444099b7bSPaul Traina static char rcsid[] = "$Id: hash.c,v 1.1.1.1 1994/09/10 14:44:55 csgr Exp $"; 2544099b7bSPaul Traina #endif 2644099b7bSPaul Traina 2744099b7bSPaul Traina 2844099b7bSPaul Traina /* 2944099b7bSPaul Traina * Generalized hash table ADT 3044099b7bSPaul Traina * 3144099b7bSPaul Traina * Provides multiple, dynamically-allocated, variable-sized hash tables on 3244099b7bSPaul Traina * various data and keys. 3344099b7bSPaul Traina * 3444099b7bSPaul Traina * This package attempts to follow some of the coding conventions suggested 3544099b7bSPaul Traina * by Bob Sidebotham and the AFS Clean Code Committee of the 3644099b7bSPaul Traina * Information Technology Center at Carnegie Mellon. 3744099b7bSPaul Traina */ 3844099b7bSPaul Traina 3944099b7bSPaul Traina 4044099b7bSPaul Traina #include <sys/types.h> 4144099b7bSPaul Traina #include <stdlib.h> 4244099b7bSPaul Traina 4344099b7bSPaul Traina #ifndef USE_BFUNCS 4444099b7bSPaul Traina #include <memory.h> 4544099b7bSPaul Traina /* Yes, memcpy is OK here (no overlapped copies). */ 4644099b7bSPaul Traina #define bcopy(a,b,c) memcpy(b,a,c) 4744099b7bSPaul Traina #define bzero(p,l) memset(p,0,l) 4844099b7bSPaul Traina #define bcmp(a,b,c) memcmp(a,b,c) 4944099b7bSPaul Traina #endif 5044099b7bSPaul Traina 5144099b7bSPaul Traina #include "hash.h" 5244099b7bSPaul Traina 5344099b7bSPaul Traina #define TRUE 1 5444099b7bSPaul Traina #define FALSE 0 5544099b7bSPaul Traina #ifndef NULL 5644099b7bSPaul Traina #define NULL 0 5744099b7bSPaul Traina #endif 5844099b7bSPaul Traina 5944099b7bSPaul Traina /* 6044099b7bSPaul Traina * This can be changed to make internal routines visible to debuggers, etc. 6144099b7bSPaul Traina */ 6244099b7bSPaul Traina #ifndef PRIVATE 6344099b7bSPaul Traina #define PRIVATE static 6444099b7bSPaul Traina #endif 6544099b7bSPaul Traina 6644099b7bSPaul Traina #ifdef __STDC__ 6744099b7bSPaul Traina #define P(args) args 6844099b7bSPaul Traina #else 6944099b7bSPaul Traina #define P(args) () 7044099b7bSPaul Traina #endif 7144099b7bSPaul Traina 7244099b7bSPaul Traina PRIVATE void hashi_FreeMembers P((hash_member *, hash_freefp)); 7344099b7bSPaul Traina 7444099b7bSPaul Traina #undef P 7544099b7bSPaul Traina 7644099b7bSPaul Traina 7744099b7bSPaul Traina 7844099b7bSPaul Traina /* 7944099b7bSPaul Traina * Hash table initialization routine. 8044099b7bSPaul Traina * 8144099b7bSPaul Traina * This routine creates and intializes a hash table of size "tablesize" 8244099b7bSPaul Traina * entries. Successful calls return a pointer to the hash table (which must 8344099b7bSPaul Traina * be passed to other hash routines to identify the hash table). Failed 8444099b7bSPaul Traina * calls return NULL. 8544099b7bSPaul Traina */ 8644099b7bSPaul Traina 8744099b7bSPaul Traina hash_tbl * 8844099b7bSPaul Traina hash_Init(tablesize) 8944099b7bSPaul Traina unsigned tablesize; 9044099b7bSPaul Traina { 9144099b7bSPaul Traina register hash_tbl *hashtblptr; 9244099b7bSPaul Traina register unsigned totalsize; 9344099b7bSPaul Traina 9444099b7bSPaul Traina if (tablesize > 0) { 9544099b7bSPaul Traina totalsize = sizeof(hash_tbl) 9644099b7bSPaul Traina + sizeof(hash_member *) * (tablesize - 1); 9744099b7bSPaul Traina hashtblptr = (hash_tbl *) malloc(totalsize); 9844099b7bSPaul Traina if (hashtblptr) { 9944099b7bSPaul Traina bzero((char *) hashtblptr, totalsize); 10044099b7bSPaul Traina hashtblptr->size = tablesize; /* Success! */ 10144099b7bSPaul Traina hashtblptr->bucketnum = 0; 10244099b7bSPaul Traina hashtblptr->member = (hashtblptr->table)[0]; 10344099b7bSPaul Traina } 10444099b7bSPaul Traina } else { 10544099b7bSPaul Traina hashtblptr = NULL; /* Disallow zero-length tables */ 10644099b7bSPaul Traina } 10744099b7bSPaul Traina return hashtblptr; /* NULL if failure */ 10844099b7bSPaul Traina } 10944099b7bSPaul Traina 11044099b7bSPaul Traina 11144099b7bSPaul Traina 11244099b7bSPaul Traina /* 11344099b7bSPaul Traina * Frees an entire linked list of bucket members (used in the open 11444099b7bSPaul Traina * hashing scheme). Does nothing if the passed pointer is NULL. 11544099b7bSPaul Traina */ 11644099b7bSPaul Traina 11744099b7bSPaul Traina PRIVATE void 11844099b7bSPaul Traina hashi_FreeMembers(bucketptr, free_data) 11944099b7bSPaul Traina hash_member *bucketptr; 12044099b7bSPaul Traina hash_freefp free_data; 12144099b7bSPaul Traina { 12244099b7bSPaul Traina hash_member *nextbucket; 12344099b7bSPaul Traina while (bucketptr) { 12444099b7bSPaul Traina nextbucket = bucketptr->next; 12544099b7bSPaul Traina (*free_data) (bucketptr->data); 12644099b7bSPaul Traina free((char *) bucketptr); 12744099b7bSPaul Traina bucketptr = nextbucket; 12844099b7bSPaul Traina } 12944099b7bSPaul Traina } 13044099b7bSPaul Traina 13144099b7bSPaul Traina 13244099b7bSPaul Traina 13344099b7bSPaul Traina 13444099b7bSPaul Traina /* 13544099b7bSPaul Traina * This routine re-initializes the hash table. It frees all the allocated 13644099b7bSPaul Traina * memory and resets all bucket pointers to NULL. 13744099b7bSPaul Traina */ 13844099b7bSPaul Traina 13944099b7bSPaul Traina void 14044099b7bSPaul Traina hash_Reset(hashtable, free_data) 14144099b7bSPaul Traina hash_tbl *hashtable; 14244099b7bSPaul Traina hash_freefp free_data; 14344099b7bSPaul Traina { 14444099b7bSPaul Traina hash_member **bucketptr; 14544099b7bSPaul Traina unsigned i; 14644099b7bSPaul Traina 14744099b7bSPaul Traina bucketptr = hashtable->table; 14844099b7bSPaul Traina for (i = 0; i < hashtable->size; i++) { 14944099b7bSPaul Traina hashi_FreeMembers(*bucketptr, free_data); 15044099b7bSPaul Traina *bucketptr++ = NULL; 15144099b7bSPaul Traina } 15244099b7bSPaul Traina hashtable->bucketnum = 0; 15344099b7bSPaul Traina hashtable->member = (hashtable->table)[0]; 15444099b7bSPaul Traina } 15544099b7bSPaul Traina 15644099b7bSPaul Traina 15744099b7bSPaul Traina 15844099b7bSPaul Traina /* 15944099b7bSPaul Traina * Generic hash function to calculate a hash code from the given string. 16044099b7bSPaul Traina * 16144099b7bSPaul Traina * For each byte of the string, this function left-shifts the value in an 16244099b7bSPaul Traina * accumulator and then adds the byte into the accumulator. The contents of 16344099b7bSPaul Traina * the accumulator is returned after the entire string has been processed. 16444099b7bSPaul Traina * It is assumed that this result will be used as the "hashcode" parameter in 16544099b7bSPaul Traina * calls to other functions in this package. These functions automatically 16644099b7bSPaul Traina * adjust the hashcode for the size of each hashtable. 16744099b7bSPaul Traina * 16844099b7bSPaul Traina * This algorithm probably works best when the hash table size is a prime 16944099b7bSPaul Traina * number. 17044099b7bSPaul Traina * 17144099b7bSPaul Traina * Hopefully, this function is better than the previous one which returned 17244099b7bSPaul Traina * the sum of the squares of all the bytes. I'm still open to other 17344099b7bSPaul Traina * suggestions for a default hash function. The programmer is more than 17444099b7bSPaul Traina * welcome to supply his/her own hash function as that is one of the design 17544099b7bSPaul Traina * features of this package. 17644099b7bSPaul Traina */ 17744099b7bSPaul Traina 17844099b7bSPaul Traina unsigned 17944099b7bSPaul Traina hash_HashFunction(string, len) 18044099b7bSPaul Traina unsigned char *string; 18144099b7bSPaul Traina register unsigned len; 18244099b7bSPaul Traina { 18344099b7bSPaul Traina register unsigned accum; 18444099b7bSPaul Traina 18544099b7bSPaul Traina accum = 0; 18644099b7bSPaul Traina for (; len > 0; len--) { 18744099b7bSPaul Traina accum <<= 1; 18844099b7bSPaul Traina accum += (unsigned) (*string++ & 0xFF); 18944099b7bSPaul Traina } 19044099b7bSPaul Traina return accum; 19144099b7bSPaul Traina } 19244099b7bSPaul Traina 19344099b7bSPaul Traina 19444099b7bSPaul Traina 19544099b7bSPaul Traina /* 19644099b7bSPaul Traina * Returns TRUE if at least one entry for the given key exists; FALSE 19744099b7bSPaul Traina * otherwise. 19844099b7bSPaul Traina */ 19944099b7bSPaul Traina 20044099b7bSPaul Traina int 20144099b7bSPaul Traina hash_Exists(hashtable, hashcode, compare, key) 20244099b7bSPaul Traina hash_tbl *hashtable; 20344099b7bSPaul Traina unsigned hashcode; 20444099b7bSPaul Traina hash_cmpfp compare; 20544099b7bSPaul Traina hash_datum *key; 20644099b7bSPaul Traina { 20744099b7bSPaul Traina register hash_member *memberptr; 20844099b7bSPaul Traina 20944099b7bSPaul Traina memberptr = (hashtable->table)[hashcode % (hashtable->size)]; 21044099b7bSPaul Traina while (memberptr) { 21144099b7bSPaul Traina if ((*compare) (key, memberptr->data)) { 21244099b7bSPaul Traina return TRUE; /* Entry does exist */ 21344099b7bSPaul Traina } 21444099b7bSPaul Traina memberptr = memberptr->next; 21544099b7bSPaul Traina } 21644099b7bSPaul Traina return FALSE; /* Entry does not exist */ 21744099b7bSPaul Traina } 21844099b7bSPaul Traina 21944099b7bSPaul Traina 22044099b7bSPaul Traina 22144099b7bSPaul Traina /* 22244099b7bSPaul Traina * Insert the data item "element" into the hash table using "hashcode" 22344099b7bSPaul Traina * to determine the bucket number, and "compare" and "key" to determine 22444099b7bSPaul Traina * its uniqueness. 22544099b7bSPaul Traina * 22644099b7bSPaul Traina * If the insertion is successful 0 is returned. If a matching entry 22744099b7bSPaul Traina * already exists in the given bucket of the hash table, or some other error 22844099b7bSPaul Traina * occurs, -1 is returned and the insertion is not done. 22944099b7bSPaul Traina */ 23044099b7bSPaul Traina 23144099b7bSPaul Traina int 23244099b7bSPaul Traina hash_Insert(hashtable, hashcode, compare, key, element) 23344099b7bSPaul Traina hash_tbl *hashtable; 23444099b7bSPaul Traina unsigned hashcode; 23544099b7bSPaul Traina hash_cmpfp compare; 23644099b7bSPaul Traina hash_datum *key, *element; 23744099b7bSPaul Traina { 23844099b7bSPaul Traina hash_member *temp; 23944099b7bSPaul Traina 24044099b7bSPaul Traina hashcode %= hashtable->size; 24144099b7bSPaul Traina if (hash_Exists(hashtable, hashcode, compare, key)) { 24244099b7bSPaul Traina return -1; /* At least one entry already exists */ 24344099b7bSPaul Traina } 24444099b7bSPaul Traina temp = (hash_member *) malloc(sizeof(hash_member)); 24544099b7bSPaul Traina if (!temp) 24644099b7bSPaul Traina return -1; /* malloc failed! */ 24744099b7bSPaul Traina 24844099b7bSPaul Traina temp->data = element; 24944099b7bSPaul Traina temp->next = (hashtable->table)[hashcode]; 25044099b7bSPaul Traina (hashtable->table)[hashcode] = temp; 25144099b7bSPaul Traina return 0; /* Success */ 25244099b7bSPaul Traina } 25344099b7bSPaul Traina 25444099b7bSPaul Traina 25544099b7bSPaul Traina 25644099b7bSPaul Traina /* 25744099b7bSPaul Traina * Delete all data elements which match the given key. If at least one 25844099b7bSPaul Traina * element is found and the deletion is successful, 0 is returned. 25944099b7bSPaul Traina * If no matching elements can be found in the hash table, -1 is returned. 26044099b7bSPaul Traina */ 26144099b7bSPaul Traina 26244099b7bSPaul Traina int 26344099b7bSPaul Traina hash_Delete(hashtable, hashcode, compare, key, free_data) 26444099b7bSPaul Traina hash_tbl *hashtable; 26544099b7bSPaul Traina unsigned hashcode; 26644099b7bSPaul Traina hash_cmpfp compare; 26744099b7bSPaul Traina hash_datum *key; 26844099b7bSPaul Traina hash_freefp free_data; 26944099b7bSPaul Traina { 27044099b7bSPaul Traina hash_member *memberptr, *tempptr; 27144099b7bSPaul Traina hash_member *previous = NULL; 27244099b7bSPaul Traina int retval; 27344099b7bSPaul Traina 27444099b7bSPaul Traina retval = -1; 27544099b7bSPaul Traina hashcode %= hashtable->size; 27644099b7bSPaul Traina 27744099b7bSPaul Traina /* 27844099b7bSPaul Traina * Delete the first member of the list if it matches. Since this moves 27944099b7bSPaul Traina * the second member into the first position we have to keep doing this 28044099b7bSPaul Traina * over and over until it no longer matches. 28144099b7bSPaul Traina */ 28244099b7bSPaul Traina memberptr = (hashtable->table)[hashcode]; 28344099b7bSPaul Traina while (memberptr && (*compare) (key, memberptr->data)) { 28444099b7bSPaul Traina (hashtable->table)[hashcode] = memberptr->next; 28544099b7bSPaul Traina /* 28644099b7bSPaul Traina * Stop hashi_FreeMembers() from deleting the whole list! 28744099b7bSPaul Traina */ 28844099b7bSPaul Traina memberptr->next = NULL; 28944099b7bSPaul Traina hashi_FreeMembers(memberptr, free_data); 29044099b7bSPaul Traina memberptr = (hashtable->table)[hashcode]; 29144099b7bSPaul Traina retval = 0; 29244099b7bSPaul Traina } 29344099b7bSPaul Traina 29444099b7bSPaul Traina /* 29544099b7bSPaul Traina * Now traverse the rest of the list 29644099b7bSPaul Traina */ 29744099b7bSPaul Traina if (memberptr) { 29844099b7bSPaul Traina previous = memberptr; 29944099b7bSPaul Traina memberptr = memberptr->next; 30044099b7bSPaul Traina } 30144099b7bSPaul Traina while (memberptr) { 30244099b7bSPaul Traina if ((*compare) (key, memberptr->data)) { 30344099b7bSPaul Traina tempptr = memberptr; 30444099b7bSPaul Traina previous->next = memberptr = memberptr->next; 30544099b7bSPaul Traina /* 30644099b7bSPaul Traina * Put the brakes on hashi_FreeMembers(). . . . 30744099b7bSPaul Traina */ 30844099b7bSPaul Traina tempptr->next = NULL; 30944099b7bSPaul Traina hashi_FreeMembers(tempptr, free_data); 31044099b7bSPaul Traina retval = 0; 31144099b7bSPaul Traina } else { 31244099b7bSPaul Traina previous = memberptr; 31344099b7bSPaul Traina memberptr = memberptr->next; 31444099b7bSPaul Traina } 31544099b7bSPaul Traina } 31644099b7bSPaul Traina return retval; 31744099b7bSPaul Traina } 31844099b7bSPaul Traina 31944099b7bSPaul Traina 32044099b7bSPaul Traina 32144099b7bSPaul Traina /* 32244099b7bSPaul Traina * Locate and return the data entry associated with the given key. 32344099b7bSPaul Traina * 32444099b7bSPaul Traina * If the data entry is found, a pointer to it is returned. Otherwise, 32544099b7bSPaul Traina * NULL is returned. 32644099b7bSPaul Traina */ 32744099b7bSPaul Traina 32844099b7bSPaul Traina hash_datum * 32944099b7bSPaul Traina hash_Lookup(hashtable, hashcode, compare, key) 33044099b7bSPaul Traina hash_tbl *hashtable; 33144099b7bSPaul Traina unsigned hashcode; 33244099b7bSPaul Traina hash_cmpfp compare; 33344099b7bSPaul Traina hash_datum *key; 33444099b7bSPaul Traina { 33544099b7bSPaul Traina hash_member *memberptr; 33644099b7bSPaul Traina 33744099b7bSPaul Traina memberptr = (hashtable->table)[hashcode % (hashtable->size)]; 33844099b7bSPaul Traina while (memberptr) { 33944099b7bSPaul Traina if ((*compare) (key, memberptr->data)) { 34044099b7bSPaul Traina return (memberptr->data); 34144099b7bSPaul Traina } 34244099b7bSPaul Traina memberptr = memberptr->next; 34344099b7bSPaul Traina } 34444099b7bSPaul Traina return NULL; 34544099b7bSPaul Traina } 34644099b7bSPaul Traina 34744099b7bSPaul Traina 34844099b7bSPaul Traina 34944099b7bSPaul Traina /* 35044099b7bSPaul Traina * Return the next available entry in the hashtable for a linear search 35144099b7bSPaul Traina */ 35244099b7bSPaul Traina 35344099b7bSPaul Traina hash_datum * 35444099b7bSPaul Traina hash_NextEntry(hashtable) 35544099b7bSPaul Traina hash_tbl *hashtable; 35644099b7bSPaul Traina { 35744099b7bSPaul Traina register unsigned bucket; 35844099b7bSPaul Traina register hash_member *memberptr; 35944099b7bSPaul Traina 36044099b7bSPaul Traina /* 36144099b7bSPaul Traina * First try to pick up where we left off. 36244099b7bSPaul Traina */ 36344099b7bSPaul Traina memberptr = hashtable->member; 36444099b7bSPaul Traina if (memberptr) { 36544099b7bSPaul Traina hashtable->member = memberptr->next; /* Set up for next call */ 36644099b7bSPaul Traina return memberptr->data; /* Return the data */ 36744099b7bSPaul Traina } 36844099b7bSPaul Traina /* 36944099b7bSPaul Traina * We hit the end of a chain, so look through the array of buckets 37044099b7bSPaul Traina * until we find a new chain (non-empty bucket) or run out of buckets. 37144099b7bSPaul Traina */ 37244099b7bSPaul Traina bucket = hashtable->bucketnum + 1; 37344099b7bSPaul Traina while ((bucket < hashtable->size) && 37444099b7bSPaul Traina !(memberptr = (hashtable->table)[bucket])) { 37544099b7bSPaul Traina bucket++; 37644099b7bSPaul Traina } 37744099b7bSPaul Traina 37844099b7bSPaul Traina /* 37944099b7bSPaul Traina * Check to see if we ran out of buckets. 38044099b7bSPaul Traina */ 38144099b7bSPaul Traina if (bucket >= hashtable->size) { 38244099b7bSPaul Traina /* 38344099b7bSPaul Traina * Reset to top of table for next call. 38444099b7bSPaul Traina */ 38544099b7bSPaul Traina hashtable->bucketnum = 0; 38644099b7bSPaul Traina hashtable->member = (hashtable->table)[0]; 38744099b7bSPaul Traina /* 38844099b7bSPaul Traina * But return end-of-table indication to the caller this time. 38944099b7bSPaul Traina */ 39044099b7bSPaul Traina return NULL; 39144099b7bSPaul Traina } 39244099b7bSPaul Traina /* 39344099b7bSPaul Traina * Must have found a non-empty bucket. 39444099b7bSPaul Traina */ 39544099b7bSPaul Traina hashtable->bucketnum = bucket; 39644099b7bSPaul Traina hashtable->member = memberptr->next; /* Set up for next call */ 39744099b7bSPaul Traina return memberptr->data; /* Return the data */ 39844099b7bSPaul Traina } 39944099b7bSPaul Traina 40044099b7bSPaul Traina 40144099b7bSPaul Traina 40244099b7bSPaul Traina /* 40344099b7bSPaul Traina * Return the first entry in a hash table for a linear search 40444099b7bSPaul Traina */ 40544099b7bSPaul Traina 40644099b7bSPaul Traina hash_datum * 40744099b7bSPaul Traina hash_FirstEntry(hashtable) 40844099b7bSPaul Traina hash_tbl *hashtable; 40944099b7bSPaul Traina { 41044099b7bSPaul Traina hashtable->bucketnum = 0; 41144099b7bSPaul Traina hashtable->member = (hashtable->table)[0]; 41244099b7bSPaul Traina return hash_NextEntry(hashtable); 41344099b7bSPaul Traina } 41444099b7bSPaul Traina 41544099b7bSPaul Traina /* 41644099b7bSPaul Traina * Local Variables: 41744099b7bSPaul Traina * tab-width: 4 41844099b7bSPaul Traina * c-indent-level: 4 41944099b7bSPaul Traina * c-argdecl-indent: 4 42044099b7bSPaul Traina * c-continued-statement-offset: 4 42144099b7bSPaul Traina * c-continued-brace-offset: -4 42244099b7bSPaul Traina * c-label-offset: -4 42344099b7bSPaul Traina * c-brace-offset: 0 42444099b7bSPaul Traina * End: 42544099b7bSPaul Traina */ 426