1 /* Copyright (c) 2013-2019, David Anderson 2 All rights reserved. 3 4 Redistribution and use in source and binary forms, with 5 or without modification, are permitted provided that the 6 following conditions are met: 7 8 Redistributions of source code must retain the above 9 copyright notice, this list of conditions and the following 10 disclaimer. 11 12 Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following 14 disclaimer in the documentation and/or other materials 15 provided with the distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 18 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 29 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 */ 32 33 34 /* The interfaces follow tsearch (See the Single 35 Unix Specification) but the implementation is 36 written without reference to the source of any 37 version of tsearch or any hashing code. 38 39 An additional interface is added (compared to 40 a real tsearch) to let the caller identify a 41 'hash' function with each hash table (called 42 a tree below, but that is a misnomer). 43 44 So read 'tree' below as hash table. 45 46 See http://www.prevanders.net/tsearch.html 47 for information and an example of use. 48 49 Based on Knuth, chapter 6.4 50 51 This uses a hash based on the key. 52 Collision resolution is by chaining. 53 54 twalk() and tdestroy() walk in a random order. 55 The 'preorder' etc labels mean nothing in a hash, so everything 56 is called a leaf. 57 58 */ 59 60 61 #include "config.h" 62 #ifdef HAVE_UNUSED_ATTRIBUTE 63 #define UNUSEDARG __attribute__ ((unused)) 64 #else 65 #define UNUSEDARG 66 #endif 67 #ifdef HAVE_STDLIB_H 68 #include "stdlib.h" /* for malloc, free() etc */ 69 #endif /* HAVE_STDLIB_H */ 70 #ifdef HAVE_MALLOC_H 71 /* Useful include for some Windows compilers. */ 72 #include <malloc.h> 73 #endif /* HAVE_MALLOC_H */ 74 #include <stdio.h> /* for printf() */ 75 #ifdef HAVE_STDINT_H 76 #include <stdint.h> /* for uintptr_t */ 77 #endif /* HAVE_STDINT_H */ 78 /* This must match the types and print options 79 found in libdwarf.h. */ 80 #define Dwarf_Unsigned unsigned long long 81 #if defined(_WIN32) && defined(HAVE_NONSTANDARD_PRINTF_64_FORMAT) 82 #define DW_PR_DUx "I64x" 83 #define DW_PR_DUu "I64u" 84 #else 85 #define DW_PR_DUx "llx" 86 #define DW_PR_DUu "llu" 87 #endif /* DW_PR defines */ 88 #include "dwarf_tsearch.h" 89 90 /* A table of primes used to size and resize the hash table. 91 From public sources of prime numbers, arbitrarily chosen 92 to approximately double in size at each step. 93 */ 94 static unsigned long primes[] = 95 { 96 #if 0 /* for testing only */ 97 5,11, 17,23, 31, 47, 53, 98 #endif 99 79, 100 1009, 101 5591, 102 10007, 103 21839, 104 41413, 105 99907, 106 199967, 107 400009, 108 800029, 109 1600141, 110 3000089, 111 6000121, 112 12000257, 113 24000143, 114 48000203, 115 100000127, 116 200001611, 117 400000669, 118 800000573, 119 0 /* Here we are giving up */ 120 }; 121 122 static unsigned long allowed_fill_percent = 90; 123 124 125 struct hs_base { 126 unsigned long tablesize_; 127 unsigned long tablesize_entry_index_; 128 unsigned long allowed_fill_; 129 /* Record_count means number of active records, 130 counting all records on chains. 131 When the record_count is > 90% of a full 132 tablesize_ we redo the table before adding 133 a new entry. */ 134 unsigned long record_count_; 135 /* hashtab_ is an array of hs_entry, 136 indexes 0 through tablesize_ -1. */ 137 struct ts_entry * hashtab_; 138 DW_TSHASHTYPE (*hashfunc_)(const void *key); 139 }; 140 141 struct ts_entry { 142 const void * keyptr; 143 /* So that a keyptr of 0 (if added) is 144 not confused with an empty hash slot, 145 we must mark used slots as used in the hash tab */ 146 unsigned char entryused; 147 struct ts_entry *next; 148 }; 149 150 enum search_intent_t 151 { 152 want_insert, 153 only_find, 154 want_delete 155 }; 156 157 static struct ts_entry * 158 tsearch_inner( const void *key, struct hs_base* head, 159 int (*compar)(const void *, const void *), 160 const enum search_intent_t intent, int*inserted, 161 struct ts_entry **parent_ptr); 162 static void 163 dwarf_tdestroy_inner(struct hs_base*h, 164 void (*free_node)(void *nodep), 165 int depth); 166 167 168 /* A trivial integer-based percentage calculation. 169 Percents >100 are reasonable for a hash-with-chains 170 situation (even if they might not be the best choice 171 for performance). */ 172 static unsigned long 173 calculate_allowed_fill(unsigned long fill_percent, unsigned long ct) 174 { 175 unsigned long v = 0; 176 if(ct < 100000) { 177 unsigned long v2 = (ct *fill_percent)/100; 178 return v2; 179 } 180 v = (ct /100)*fill_percent; 181 return v; 182 } 183 184 /* Initialize the hash and pass in the hash function. 185 If the entry count needed is unknown, pass in 0 as a count estimate, 186 but if the number of hash entries needed can be estimated, 187 pass in the estimate (which need not be prime, we actually use 188 the nearest higher prime from the above table). 189 If the estimated count is 190 Return the tree base, or return NULL if insufficient memory. */ 191 void * 192 dwarf_initialize_search_hash( void **treeptr, 193 DW_TSHASHTYPE(*hashfunc)(const void *key), 194 unsigned long size_estimate) 195 { 196 unsigned long prime_to_use = primes[0]; 197 unsigned entry_index = 0; 198 unsigned k = 0; 199 struct hs_base *base = 0; 200 201 base = *(struct hs_base **)treeptr; 202 if(base) { 203 /* initalized already. */ 204 return base ; 205 } 206 base = calloc(sizeof(struct hs_base),1); 207 if(!base) { 208 /* Out of memory. */ 209 return NULL ; 210 } 211 prime_to_use = primes[0]; 212 while(size_estimate && (size_estimate > prime_to_use)) { 213 k = k +1; 214 prime_to_use = primes[k]; 215 if(prime_to_use == 0) { 216 /* Oops. Too large. */ 217 free(base); 218 return NULL; 219 } 220 entry_index = k; 221 } 222 base->tablesize_ = prime_to_use; 223 base->allowed_fill_ = calculate_allowed_fill(allowed_fill_percent, 224 prime_to_use); 225 if( base->allowed_fill_< (base->tablesize_/2)) { 226 free(base); 227 /* Oops. We are in trouble. Coding mistake here. */ 228 return NULL; 229 } 230 base->record_count_ = 0; 231 base->tablesize_entry_index_ = entry_index; 232 /* hashtab_ is an array of hs_entry, 233 indexes 0 through tablesize_ -1. */ 234 base->hashfunc_ = hashfunc; 235 base->hashtab_ = calloc(sizeof(struct ts_entry),base->tablesize_); 236 if(!base->hashtab_) { 237 free(base); 238 return NULL; 239 } 240 *treeptr = base; 241 return base; 242 } 243 244 245 /* We don't really care whether hashpos or chainpos 246 are 32 or 64 bits. 32 suffices. */ 247 static void print_entry(struct ts_entry *t,const char *descr, 248 char *(* keyprint)(const void *), 249 unsigned long hashpos, 250 unsigned long chainpos) 251 { 252 char *v = 0; 253 if(!t->entryused) { 254 return; 255 } 256 v = keyprint(t->keyptr); 257 printf( 258 "[%4lu.%02lu] 0x%08" DW_PR_DUx 259 " <keyptr 0x%08" DW_PR_DUx 260 "> <key %s> %s\n", 261 hashpos,chainpos, 262 (Dwarf_Unsigned)(uintptr_t)t, 263 (Dwarf_Unsigned)(uintptr_t)t->keyptr, 264 v, 265 descr); 266 } 267 268 /* For debugging */ 269 static void 270 dumptree_inner(const struct hs_base *h, 271 char *(* keyprint)(const void *), 272 const char *descr, int printdetails) 273 { 274 unsigned long ix = 0; 275 unsigned long tsize = h->tablesize_; 276 struct ts_entry *p = &h->hashtab_[0]; 277 unsigned long hashused = 0; 278 unsigned long maxchainlength = 0; 279 unsigned long chainsgt1 = 0; 280 printf("dumptree head ptr : 0x%08" DW_PR_DUx 281 " size %" DW_PR_DUu 282 " entries %" DW_PR_DUu 283 " allowed %" DW_PR_DUu " %s\n", 284 (Dwarf_Unsigned)(uintptr_t)h, 285 (Dwarf_Unsigned)h->tablesize_, 286 (Dwarf_Unsigned)h->record_count_, 287 (Dwarf_Unsigned)h->allowed_fill_, 288 descr); 289 for( ; ix < tsize; ix++,p++) { 290 unsigned long chainlength = 0; 291 struct ts_entry*n = 0; 292 int chainpos = 0; 293 if(p->entryused) { 294 ++hashused; 295 chainlength = 1; 296 if(printdetails) { 297 print_entry(p,"head",keyprint,ix,chainpos); 298 } 299 } 300 chainpos++; 301 for(n = p->next; n ; n = n->next) { 302 chainlength++; 303 if(printdetails) { 304 print_entry(n,"chain",keyprint,ix,chainpos); 305 } 306 } 307 if(chainlength > maxchainlength) { 308 maxchainlength = chainlength; 309 } 310 if (chainlength > 1) { 311 chainsgt1++; 312 } 313 } 314 printf("Hashtable: %lu of %lu hash entries used.\n",hashused,tsize); 315 printf("Hashtable: %lu chains length longer than 1. \n",chainsgt1); 316 printf("Hashtable: %lu is maximum chain length.\n",maxchainlength); 317 } 318 319 /* Dumping the tree. 320 */ 321 void 322 dwarf_tdump(const void*headp_in, 323 char *(* keyprint)(const void *), 324 const char *msg) 325 { 326 const struct hs_base *head = (const struct hs_base *)headp_in; 327 if(!head) { 328 printf("dumptree null tree ptr : %s\n",msg); 329 return; 330 } 331 dumptree_inner(head,keyprint,msg,1); 332 } 333 334 static struct ts_entry * 335 allocate_ts_entry(const void *key) 336 { 337 struct ts_entry *e = 0; 338 339 e = (struct ts_entry *) malloc(sizeof(struct ts_entry)); 340 if(!e) { 341 return NULL; 342 } 343 e->keyptr = key; 344 e->entryused = 1; 345 e->next = 0; 346 return e; 347 } 348 349 static void 350 resize_table(struct hs_base *head, 351 int (*compar)(const void *, const void *)) 352 { 353 struct hs_base newhead; 354 unsigned new_entry_index = 0; 355 unsigned long prime_to_use = 0; 356 357 /* Copy the values we have. */ 358 newhead = *head; 359 /* But drop the hashtab_ from new. calloc below. */ 360 newhead.hashtab_ = 0; 361 newhead.record_count_ = 0; 362 new_entry_index = head->tablesize_entry_index_ +1; 363 prime_to_use = primes[new_entry_index]; 364 if(prime_to_use == 0) { 365 /* Oops, too large. Leave table size as is, though 366 it will get slow as it overfills. */ 367 return; 368 } 369 newhead.tablesize_ = prime_to_use; 370 newhead.allowed_fill_ = calculate_allowed_fill(allowed_fill_percent, 371 prime_to_use); 372 if( newhead.allowed_fill_< (newhead.tablesize_/2)) { 373 /* Oops. We are in trouble. */ 374 return; 375 } 376 newhead.tablesize_entry_index_ = new_entry_index; 377 newhead.hashtab_ = calloc(sizeof(struct ts_entry),newhead.tablesize_); 378 if(!newhead.hashtab_) { 379 /* Oops, too large. Leave table size as is, though 380 things will get slow as it overfills. */ 381 free(newhead.hashtab_); 382 return; 383 } 384 { 385 /* Insert all the records from the old table into 386 the new table. */ 387 int fillnewfail = 0; 388 unsigned long ix = 0; 389 unsigned long tsize = head->tablesize_; 390 struct ts_entry *p = &head->hashtab_[0]; 391 for( ; ix < tsize; ix++,p++) { 392 int inserted = 0; 393 struct ts_entry*n = 0; 394 if(fillnewfail) { 395 break; 396 } 397 if(p->keyptr) { 398 tsearch_inner(p->keyptr, 399 &newhead,compar, 400 want_insert, 401 &inserted, 402 0); 403 if(!inserted) { 404 fillnewfail = 1; 405 break; 406 } 407 } 408 for(n = p->next; n ; n = n->next) { 409 inserted = 0; 410 tsearch_inner(n->keyptr, 411 &newhead,compar, 412 want_insert, 413 &inserted, 414 0); 415 if(!inserted) { 416 fillnewfail = 1; 417 break; 418 } 419 } 420 } 421 if(fillnewfail) { 422 free(newhead.hashtab_); 423 return; 424 } 425 } 426 /* Now get rid of the chain entries of the old table. */ 427 dwarf_tdestroy_inner(head,0,0); 428 /* Now get rid of the old table itself. */ 429 free(head->hashtab_); 430 head->hashtab_ = 0; 431 *head = newhead; 432 return; 433 } 434 435 /* Inner search of the hash and synonym chains. 436 */ 437 static struct ts_entry * 438 tsearch_inner( const void *key, struct hs_base* head, 439 int (*compar)(const void *, const void *), 440 const enum search_intent_t intent, int*inserted, 441 /* owner_ptr used for delete. Only set 442 if the to-be-deleted item is on a chain, 443 not in the hashtab. Points to the item 444 pointing to the to-be-deleted-item.*/ 445 struct ts_entry **owner_ptr) 446 { 447 struct ts_entry *s =0; 448 struct ts_entry *c =0; 449 struct ts_entry *q =0; 450 int kc = 0; 451 DW_TSHASHTYPE keyhash = 0; 452 DW_TSHASHTYPE hindx = 0; 453 struct ts_entry *chain_parent = 0; 454 455 if(! head->hashfunc_) { 456 /* Not fully initialized. */ 457 return NULL; 458 } 459 keyhash = head->hashfunc_(key); 460 if (intent == want_insert) { 461 if( head->record_count_ > head->allowed_fill_) { 462 resize_table(head,compar); 463 } 464 } 465 hindx = keyhash%head->tablesize_; 466 s = &head->hashtab_[hindx]; 467 if(!s->entryused) { 468 /* Not found. */ 469 if(intent != want_insert) { 470 return NULL; 471 } 472 /* Insert in the base hash table in an 473 empty slot. */ 474 *inserted = 1; 475 head->record_count_++; 476 s->keyptr = (const void *)key; 477 s->entryused = 1; 478 s->next = 0; 479 return s; 480 } 481 kc = compar(key,s->keyptr); 482 if(kc == 0 ) { 483 /* found! */ 484 if(want_delete) { 485 *owner_ptr = 0; 486 } 487 return (void *)&(s->keyptr); 488 } 489 chain_parent = s; 490 for(c = s->next; c; c = c->next) { 491 kc = compar(key,c->keyptr); 492 if(kc == 0 ) { 493 /* found! */ 494 if(want_delete) { 495 *owner_ptr = chain_parent; 496 } 497 return (void *)&(c->keyptr); 498 } 499 chain_parent = c; 500 } 501 if(intent != want_insert) { 502 return NULL; 503 } 504 /* Insert following head record of the chain. */ 505 q = allocate_ts_entry(key); 506 if (!q) { 507 return q; 508 } 509 q->next = s->next; 510 s->next = q; 511 head->record_count_++; 512 *inserted = 1; 513 return q; 514 } 515 /* Search and, if missing, insert. */ 516 void * 517 dwarf_tsearch(const void *key, void **headin, 518 int (*compar)(const void *, const void *)) 519 { 520 struct hs_base **rootp = (struct hs_base **)headin; 521 struct hs_base *head = *rootp; 522 struct ts_entry *r = 0; 523 int inserted = 0; 524 /* nullme won't be set. */ 525 struct ts_entry *nullme = 0; 526 527 if (!head) { 528 /* something is wrong here, not initialized. */ 529 return NULL; 530 } 531 r = tsearch_inner(key,head,compar,want_insert,&inserted,&nullme); 532 if (!r) { 533 return NULL; 534 } 535 return (void *)&(r->keyptr); 536 } 537 538 539 /* Search. */ 540 void * 541 dwarf_tfind(const void *key, void *const *rootp, 542 int (*compar)(const void *, const void *)) 543 { 544 /* Nothing will change, but we discard const 545 so we can use tsearch_inner(). */ 546 struct hs_base **proot = (struct hs_base **)rootp; 547 struct hs_base *head = *proot; 548 struct ts_entry *r = 0; 549 /* inserted flag won't be set. */ 550 int inserted = 0; 551 /* nullme won't be set. */ 552 struct ts_entry * nullme = 0; 553 /* Get to actual tree. */ 554 555 if (!head) { 556 return NULL; 557 } 558 559 r = tsearch_inner(key,head,compar,only_find,&inserted,&nullme); 560 if(!r) { 561 return NULL; 562 } 563 return (void *)(&(r->keyptr)); 564 } 565 566 /* Unlike the simple binary tree case, 567 a fully-empty hash situation does not null the *rootp 568 */ 569 void * 570 dwarf_tdelete(const void *key, void **rootp, 571 int (*compar)(const void *, const void *)) 572 { 573 struct hs_base **proot = (struct hs_base **)rootp; 574 struct hs_base *head = *proot; 575 struct ts_entry *found = 0; 576 /* inserted flag won't be set. */ 577 int inserted = 0; 578 struct ts_entry * parentp = 0; 579 580 if (!head) { 581 return NULL; 582 } 583 584 found = tsearch_inner(key,head,compar,want_delete,&inserted, 585 &parentp); 586 if(found) { 587 if(parentp) { 588 /* Delete a chain entry. */ 589 head->record_count_--; 590 parentp->next = found->next; 591 /* We free our storage. It would be up 592 to caller to do a tfind to find 593 a record and delete content if necessary. */ 594 free(found); 595 return (void *)&(parentp->keyptr); 596 } 597 /* So found is the head of a chain. */ 598 if(found->next) { 599 /* Delete a chain entry, pull up to hash tab, freeing 600 up the chain entry. */ 601 struct ts_entry *pullup = found->next; 602 *found = *pullup; 603 free(pullup); 604 head->record_count_--; 605 return (void *)&(found->keyptr); 606 } else { 607 /* Delete a main hash table entry. 608 Problem: what the heck to return as a keyptr pointer? 609 Well, we return NULL. As in the standard 610 tsearch, returning NULL does not mean 611 failure! Here it just means 'empty chain somewhere'. 612 */ 613 head->record_count_--; 614 found->next = 0; 615 found->keyptr = 0; 616 found->entryused = 0; 617 return NULL; 618 } 619 } 620 return NULL; 621 } 622 623 static void 624 dwarf_twalk_inner(const struct hs_base *h, 625 struct ts_entry *p, 626 void (*action)(const void *nodep, const DW_VISIT which, 627 UNUSEDARG const int depth), 628 UNUSEDARG unsigned level) 629 { 630 unsigned long ix = 0; 631 unsigned long tsize = h->tablesize_; 632 for( ; ix < tsize; ix++,p++) { 633 struct ts_entry*n = 0; 634 if(p->keyptr) { 635 action((void *)(&(p->keyptr)),dwarf_leaf,level); 636 } 637 for(n = p->next; n ; n = n->next) { 638 action((void *)(&(n->keyptr)),dwarf_leaf,level); 639 } 640 } 641 } 642 643 void 644 dwarf_twalk(const void *rootp, 645 void (*action)(const void *nodep, const DW_VISIT which, 646 UNUSEDARG const int depth)) 647 { 648 const struct hs_base *head = (const struct hs_base *)rootp; 649 struct ts_entry *root = 0; 650 if(!head) { 651 return; 652 } 653 root = head->hashtab_; 654 /* Get to actual tree. */ 655 dwarf_twalk_inner(head,root,action,0); 656 } 657 658 static void 659 dwarf_tdestroy_inner(struct hs_base*h, 660 void (*free_node)(void *nodep), 661 UNUSEDARG int depth) 662 { 663 unsigned long ix = 0; 664 unsigned long tsize = h->tablesize_; 665 struct ts_entry *p = &h->hashtab_[0]; 666 for( ; ix < tsize; ix++,p++) { 667 struct ts_entry*n = 0; 668 struct ts_entry*prev = 0; 669 if(p->keyptr && p->entryused) { 670 if(free_node) { 671 free_node((void *)(p->keyptr)); 672 } 673 --h->record_count_; 674 } 675 /* Now walk and free up the chain entries. */ 676 for(n = p->next; n ; ) { 677 if(free_node) { 678 free_node((void *)(n->keyptr)); 679 } 680 --h->record_count_; 681 prev = n; 682 n = n->next; 683 free(prev); 684 } 685 } 686 } 687 688 /* Walk the tree, freeing all space in the tree 689 and calling the user's callback function on each node. 690 691 It is up to the caller to zero out anything pointing to 692 head (ie, that has the value rootp holds) after this 693 returns. 694 */ 695 void 696 dwarf_tdestroy(void *rootp, void (*free_node)(void *nodep)) 697 { 698 struct hs_base *head = (struct hs_base *)rootp; 699 struct ts_entry *root = 0; 700 if(!head) { 701 return; 702 } 703 root = head->hashtab_; 704 dwarf_tdestroy_inner(head,free_node,0); 705 free(root); 706 free(head); 707 } 708