1 /* $NetBSD: hash.c,v 1.29 2020/09/01 21:11:31 rillig Exp $ */ 2 3 /* 4 * Copyright (c) 1988, 1989, 1990 The Regents of the University of California. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam de Boor. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * Copyright (c) 1988, 1989 by Adam de Boor 37 * Copyright (c) 1989 by Berkeley Softworks 38 * All rights reserved. 39 * 40 * This code is derived from software contributed to Berkeley by 41 * Adam de Boor. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by the University of 54 * California, Berkeley and its contributors. 55 * 4. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 */ 71 72 #ifndef MAKE_NATIVE 73 static char rcsid[] = "$NetBSD: hash.c,v 1.29 2020/09/01 21:11:31 rillig Exp $"; 74 #else 75 #include <sys/cdefs.h> 76 #ifndef lint 77 #if 0 78 static char sccsid[] = "@(#)hash.c 8.1 (Berkeley) 6/6/93"; 79 #else 80 __RCSID("$NetBSD: hash.c,v 1.29 2020/09/01 21:11:31 rillig Exp $"); 81 #endif 82 #endif /* not lint */ 83 #endif 84 85 /* hash.c -- 86 * 87 * This module contains routines to manipulate a hash table. 88 * See hash.h for a definition of the structure of the hash 89 * table. Hash tables grow automatically as the amount of 90 * information increases. 91 */ 92 #include "make.h" 93 94 /* 95 * Forward references to local procedures that are used before they're 96 * defined: 97 */ 98 99 static void RebuildTable(Hash_Table *); 100 101 /* 102 * The following defines the ratio of # entries to # buckets 103 * at which we rebuild the table to make it larger. 104 */ 105 106 #define rebuildLimit 3 107 108 /* The hash function(s) */ 109 110 #ifndef HASH 111 /* The default: this one matches Gosling's emacs */ 112 #define HASH(h, key, p) do { \ 113 for (h = 0, p = key; *p;) \ 114 h = (h << 5) - h + *p++; \ 115 } while (0) 116 117 #endif 118 119 /* Sets up the hash table. 120 * 121 * Input: 122 * t Structure to to hold the table. 123 * numBuckets How many buckets to create for starters. This 124 * number is rounded up to a power of two. If 125 * <= 0, a reasonable default is chosen. The 126 * table will grow in size later as needed. 127 */ 128 void 129 Hash_InitTable(Hash_Table *t, int numBuckets) 130 { 131 int i; 132 struct Hash_Entry **hp; 133 134 /* 135 * Round up the size to a power of two. 136 */ 137 if (numBuckets <= 0) 138 i = 16; 139 else { 140 for (i = 2; i < numBuckets; i <<= 1) 141 continue; 142 } 143 t->numEntries = 0; 144 t->maxchain = 0; 145 t->bucketsSize = i; 146 t->bucketsMask = i - 1; 147 t->buckets = hp = bmake_malloc(sizeof(*hp) * i); 148 while (--i >= 0) 149 *hp++ = NULL; 150 } 151 152 /* Removes everything from the hash table and frees up the memory space it 153 * occupied (except for the space in the Hash_Table structure). */ 154 void 155 Hash_DeleteTable(Hash_Table *t) 156 { 157 struct Hash_Entry **hp, *h, *nexth = NULL; 158 int i; 159 160 for (hp = t->buckets, i = t->bucketsSize; --i >= 0;) { 161 for (h = *hp++; h != NULL; h = nexth) { 162 nexth = h->next; 163 free(h); 164 } 165 } 166 free(t->buckets); 167 168 /* 169 * Set up the hash table to cause memory faults on any future access 170 * attempts until re-initialization. 171 */ 172 t->buckets = NULL; 173 } 174 175 /* Searches the hash table for an entry corresponding to the key. 176 * 177 * Input: 178 * t Hash table to search. 179 * key A hash key. 180 * 181 * Results: 182 * Returns a pointer to the entry for key, or NULL if the table contains 183 * no entry for the key. 184 */ 185 Hash_Entry * 186 Hash_FindEntry(Hash_Table *t, const char *key) 187 { 188 Hash_Entry *e; 189 unsigned h; 190 const char *p; 191 int chainlen; 192 193 if (t == NULL || t->buckets == NULL) { 194 return NULL; 195 } 196 HASH(h, key, p); 197 p = key; 198 chainlen = 0; 199 #ifdef DEBUG_HASH_LOOKUP 200 if (DEBUG(HASH)) 201 fprintf(debug_file, "%s: %p h=%x key=%s\n", __func__, 202 t, h, key); 203 #endif 204 for (e = t->buckets[h & t->bucketsMask]; e != NULL; e = e->next) { 205 chainlen++; 206 if (e->namehash == h && strcmp(e->name, p) == 0) 207 break; 208 } 209 if (chainlen > t->maxchain) 210 t->maxchain = chainlen; 211 return e; 212 } 213 214 /* Searches the hash table for an entry corresponding to the key. 215 * If no entry is found, then one is created. 216 * 217 * Input: 218 * t Hash table to search. 219 * key A hash key. 220 * newPtr Filled with TRUE if new entry created, 221 * FALSE otherwise. 222 */ 223 Hash_Entry * 224 Hash_CreateEntry(Hash_Table *t, const char *key, Boolean *newPtr) 225 { 226 Hash_Entry *e; 227 unsigned h; 228 const char *p; 229 int keylen; 230 int chainlen; 231 struct Hash_Entry **hp; 232 233 /* 234 * Hash the key. As a side effect, save the length (strlen) of the 235 * key in case we need to create the entry. 236 */ 237 HASH(h, key, p); 238 keylen = p - key; 239 p = key; 240 chainlen = 0; 241 #ifdef DEBUG_HASH_LOOKUP 242 if (DEBUG(HASH)) 243 fprintf(debug_file, "%s: %p h=%x key=%s\n", __func__, 244 t, h, key); 245 #endif 246 for (e = t->buckets[h & t->bucketsMask]; e != NULL; e = e->next) { 247 chainlen++; 248 if (e->namehash == h && strcmp(e->name, p) == 0) { 249 if (newPtr != NULL) 250 *newPtr = FALSE; 251 break; 252 } 253 } 254 if (chainlen > t->maxchain) 255 t->maxchain = chainlen; 256 if (e) 257 return e; 258 259 /* 260 * The desired entry isn't there. Before allocating a new entry, 261 * expand the table if necessary (and this changes the resulting 262 * bucket chain). 263 */ 264 if (t->numEntries >= rebuildLimit * t->bucketsSize) 265 RebuildTable(t); 266 e = bmake_malloc(sizeof(*e) + keylen); 267 hp = &t->buckets[h & t->bucketsMask]; 268 e->next = *hp; 269 *hp = e; 270 Hash_SetValue(e, NULL); 271 e->namehash = h; 272 (void)strcpy(e->name, p); 273 t->numEntries++; 274 275 if (newPtr != NULL) 276 *newPtr = TRUE; 277 return e; 278 } 279 280 /* Delete the given hash table entry and free memory associated with it. */ 281 void 282 Hash_DeleteEntry(Hash_Table *t, Hash_Entry *e) 283 { 284 Hash_Entry **hp, *p; 285 286 if (e == NULL) 287 return; 288 for (hp = &t->buckets[e->namehash & t->bucketsMask]; 289 (p = *hp) != NULL; hp = &p->next) { 290 if (p == e) { 291 *hp = p->next; 292 free(p); 293 t->numEntries--; 294 return; 295 } 296 } 297 (void)write(2, "bad call to Hash_DeleteEntry\n", 29); 298 abort(); 299 } 300 301 /* Sets things up for enumerating all entries in the hash table. 302 * 303 * Input: 304 * t Table to be searched. 305 * searchPtr Area in which to keep state about search. 306 * 307 * Results: 308 * The return value is the address of the first entry in 309 * the hash table, or NULL if the table is empty. 310 */ 311 Hash_Entry * 312 Hash_EnumFirst(Hash_Table *t, Hash_Search *searchPtr) 313 { 314 searchPtr->table = t; 315 searchPtr->nextBucket = 0; 316 searchPtr->entry = NULL; 317 return Hash_EnumNext(searchPtr); 318 } 319 320 /* Returns the next entry in the hash table, or NULL if the end of the table 321 * is reached. 322 * 323 * Input: 324 * searchPtr Area used to keep state about search. 325 */ 326 Hash_Entry * 327 Hash_EnumNext(Hash_Search *searchPtr) 328 { 329 Hash_Entry *e; 330 Hash_Table *t = searchPtr->table; 331 332 /* 333 * The entry field points to the most recently returned 334 * entry, or is NULL if we are starting up. If not NULL, we have 335 * to start at the next one in the chain. 336 */ 337 e = searchPtr->entry; 338 if (e != NULL) 339 e = e->next; 340 /* 341 * If the chain ran out, or if we are starting up, we need to 342 * find the next nonempty chain. 343 */ 344 while (e == NULL) { 345 if (searchPtr->nextBucket >= t->bucketsSize) 346 return NULL; 347 e = t->buckets[searchPtr->nextBucket++]; 348 } 349 searchPtr->entry = e; 350 return e; 351 } 352 353 /* Makes a new hash table that is larger than the old one. The entire hash 354 * table is moved, so any bucket numbers from the old table become invalid. */ 355 static void 356 RebuildTable(Hash_Table *t) 357 { 358 Hash_Entry *e, *next = NULL, **hp, **xp; 359 int i, mask; 360 Hash_Entry **oldhp; 361 int oldsize; 362 363 oldhp = t->buckets; 364 oldsize = i = t->bucketsSize; 365 i <<= 1; 366 t->bucketsSize = i; 367 t->bucketsMask = mask = i - 1; 368 t->buckets = hp = bmake_malloc(sizeof(*hp) * i); 369 while (--i >= 0) 370 *hp++ = NULL; 371 for (hp = oldhp, i = oldsize; --i >= 0;) { 372 for (e = *hp++; e != NULL; e = next) { 373 next = e->next; 374 xp = &t->buckets[e->namehash & mask]; 375 e->next = *xp; 376 *xp = e; 377 } 378 } 379 free(oldhp); 380 if (DEBUG(HASH)) 381 fprintf(debug_file, "%s: %p size=%d entries=%d maxchain=%d\n", 382 __func__, t, t->bucketsSize, t->numEntries, t->maxchain); 383 t->maxchain = 0; 384 } 385 386 void 387 Hash_ForEach(Hash_Table *t, void (*action)(void *, void *), void *data) 388 { 389 Hash_Search search; 390 Hash_Entry *e; 391 392 for (e = Hash_EnumFirst(t, &search); 393 e != NULL; 394 e = Hash_EnumNext(&search)) 395 action(Hash_GetValue(e), data); 396 } 397 398 void 399 Hash_DebugStats(Hash_Table *t, const char *name) 400 { 401 if (DEBUG(HASH)) 402 fprintf(debug_file, "Hash_Table %s: size=%d numEntries=%d maxchain=%d\n", 403 name, t->bucketsSize, t->numEntries, t->maxchain); 404 } 405