1 /* Copyright (c) 2013-2019, David Anderson
2 All rights reserved.
3
4 Redistribution and use in source and binary forms, with
5 or without modification, are permitted provided that the
6 following conditions are met:
7
8 Redistributions of source code must retain the above
9 copyright notice, this list of conditions and the following
10 disclaimer.
11
12 Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials
15 provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
18 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 */
32
33
34 /* The interfaces follow tsearch (See the Single
35 Unix Specification) but the implementation is
36 written without reference to the source of any
37 version of tsearch or any hashing code.
38
39 An additional interface is added (compared to
40 a real tsearch) to let the caller identify a
41 'hash' function with each hash table (called
42 a tree below, but that is a misnomer).
43
44 So read 'tree' below as hash table.
45
46 See http://www.prevanders.net/tsearch.html
47 for information and an example of use.
48
49 Based on Knuth, chapter 6.4
50
51 This uses a hash based on the key.
52 Collision resolution is by chaining.
53
54 twalk() and tdestroy() walk in a random order.
55 The 'preorder' etc labels mean nothing in a hash, so everything
56 is called a leaf.
57
58 */
59
60
61 #include "config.h"
62 #ifdef HAVE_UNUSED_ATTRIBUTE
63 #define UNUSEDARG __attribute__ ((unused))
64 #else
65 #define UNUSEDARG
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include "stdlib.h" /* for malloc, free() etc */
69 #endif /* HAVE_STDLIB_H */
70 #ifdef HAVE_MALLOC_H
71 /* Useful include for some Windows compilers. */
72 #include <malloc.h>
73 #endif /* HAVE_MALLOC_H */
74 #include <stdio.h> /* for printf() */
75 #ifdef HAVE_STDINT_H
76 #include <stdint.h> /* for uintptr_t */
77 #endif /* HAVE_STDINT_H */
78 /* This must match the types and print options
79 found in libdwarf.h. */
80 #define Dwarf_Unsigned unsigned long long
81 #if defined(_WIN32) && defined(HAVE_NONSTANDARD_PRINTF_64_FORMAT)
82 #define DW_PR_DUx "I64x"
83 #define DW_PR_DUu "I64u"
84 #else
85 #define DW_PR_DUx "llx"
86 #define DW_PR_DUu "llu"
87 #endif /* DW_PR defines */
88 #include "dwarf_tsearch.h"
89
90 /* A table of primes used to size and resize the hash table.
91 From public sources of prime numbers, arbitrarily chosen
92 to approximately double in size at each step.
93 */
94 static unsigned long primes[] =
95 {
96 #if 0 /* for testing only */
97 5,11, 17,23, 31, 47, 53,
98 #endif
99 79,
100 1009,
101 5591,
102 10007,
103 21839,
104 41413,
105 99907,
106 199967,
107 400009,
108 800029,
109 1600141,
110 3000089,
111 6000121,
112 12000257,
113 24000143,
114 48000203,
115 100000127,
116 200001611,
117 400000669,
118 800000573,
119 0 /* Here we are giving up */
120 };
121
122 static unsigned long allowed_fill_percent = 90;
123
124
125 struct hs_base {
126 unsigned long tablesize_;
127 unsigned long tablesize_entry_index_;
128 unsigned long allowed_fill_;
129 /* Record_count means number of active records,
130 counting all records on chains.
131 When the record_count is > 90% of a full
132 tablesize_ we redo the table before adding
133 a new entry. */
134 unsigned long record_count_;
135 /* hashtab_ is an array of hs_entry,
136 indexes 0 through tablesize_ -1. */
137 struct ts_entry * hashtab_;
138 DW_TSHASHTYPE (*hashfunc_)(const void *key);
139 };
140
141 struct ts_entry {
142 const void * keyptr;
143 /* So that a keyptr of 0 (if added) is
144 not confused with an empty hash slot,
145 we must mark used slots as used in the hash tab */
146 unsigned char entryused;
147 struct ts_entry *next;
148 };
149
150 enum search_intent_t
151 {
152 want_insert,
153 only_find,
154 want_delete
155 };
156
157 static struct ts_entry *
158 tsearch_inner( const void *key, struct hs_base* head,
159 int (*compar)(const void *, const void *),
160 const enum search_intent_t intent, int*inserted,
161 struct ts_entry **parent_ptr);
162 static void
163 dwarf_tdestroy_inner(struct hs_base*h,
164 void (*free_node)(void *nodep),
165 int depth);
166
167
168 /* A trivial integer-based percentage calculation.
169 Percents >100 are reasonable for a hash-with-chains
170 situation (even if they might not be the best choice
171 for performance). */
172 static unsigned long
calculate_allowed_fill(unsigned long fill_percent,unsigned long ct)173 calculate_allowed_fill(unsigned long fill_percent, unsigned long ct)
174 {
175 unsigned long v = 0;
176 if(ct < 100000) {
177 unsigned long v2 = (ct *fill_percent)/100;
178 return v2;
179 }
180 v = (ct /100)*fill_percent;
181 return v;
182 }
183
184 /* Initialize the hash and pass in the hash function.
185 If the entry count needed is unknown, pass in 0 as a count estimate,
186 but if the number of hash entries needed can be estimated,
187 pass in the estimate (which need not be prime, we actually use
188 the nearest higher prime from the above table).
189 If the estimated count is
190 Return the tree base, or return NULL if insufficient memory. */
191 void *
dwarf_initialize_search_hash(void ** treeptr,DW_TSHASHTYPE (* hashfunc)(const void * key),unsigned long size_estimate)192 dwarf_initialize_search_hash( void **treeptr,
193 DW_TSHASHTYPE(*hashfunc)(const void *key),
194 unsigned long size_estimate)
195 {
196 unsigned long prime_to_use = primes[0];
197 unsigned entry_index = 0;
198 unsigned k = 0;
199 struct hs_base *base = 0;
200
201 base = *(struct hs_base **)treeptr;
202 if(base) {
203 /* initalized already. */
204 return base ;
205 }
206 base = calloc(sizeof(struct hs_base),1);
207 if(!base) {
208 /* Out of memory. */
209 return NULL ;
210 }
211 prime_to_use = primes[0];
212 while(size_estimate && (size_estimate > prime_to_use)) {
213 k = k +1;
214 prime_to_use = primes[k];
215 if(prime_to_use == 0) {
216 /* Oops. Too large. */
217 free(base);
218 return NULL;
219 }
220 entry_index = k;
221 }
222 base->tablesize_ = prime_to_use;
223 base->allowed_fill_ = calculate_allowed_fill(allowed_fill_percent,
224 prime_to_use);
225 if( base->allowed_fill_< (base->tablesize_/2)) {
226 free(base);
227 /* Oops. We are in trouble. Coding mistake here. */
228 return NULL;
229 }
230 base->record_count_ = 0;
231 base->tablesize_entry_index_ = entry_index;
232 /* hashtab_ is an array of hs_entry,
233 indexes 0 through tablesize_ -1. */
234 base->hashfunc_ = hashfunc;
235 base->hashtab_ = calloc(sizeof(struct ts_entry),base->tablesize_);
236 if(!base->hashtab_) {
237 free(base);
238 return NULL;
239 }
240 *treeptr = base;
241 return base;
242 }
243
244
245 /* We don't really care whether hashpos or chainpos
246 are 32 or 64 bits. 32 suffices. */
print_entry(struct ts_entry * t,const char * descr,char * (* keyprint)(const void *),unsigned long hashpos,unsigned long chainpos)247 static void print_entry(struct ts_entry *t,const char *descr,
248 char *(* keyprint)(const void *),
249 unsigned long hashpos,
250 unsigned long chainpos)
251 {
252 char *v = 0;
253 if(!t->entryused) {
254 return;
255 }
256 v = keyprint(t->keyptr);
257 printf(
258 "[%4lu.%02lu] 0x%08" DW_PR_DUx
259 " <keyptr 0x%08" DW_PR_DUx
260 "> <key %s> %s\n",
261 hashpos,chainpos,
262 (Dwarf_Unsigned)(uintptr_t)t,
263 (Dwarf_Unsigned)(uintptr_t)t->keyptr,
264 v,
265 descr);
266 }
267
268 /* For debugging */
269 static void
dumptree_inner(const struct hs_base * h,char * (* keyprint)(const void *),const char * descr,int printdetails)270 dumptree_inner(const struct hs_base *h,
271 char *(* keyprint)(const void *),
272 const char *descr, int printdetails)
273 {
274 unsigned long ix = 0;
275 unsigned long tsize = h->tablesize_;
276 struct ts_entry *p = &h->hashtab_[0];
277 unsigned long hashused = 0;
278 unsigned long maxchainlength = 0;
279 unsigned long chainsgt1 = 0;
280 printf("dumptree head ptr : 0x%08" DW_PR_DUx
281 " size %" DW_PR_DUu
282 " entries %" DW_PR_DUu
283 " allowed %" DW_PR_DUu " %s\n",
284 (Dwarf_Unsigned)(uintptr_t)h,
285 (Dwarf_Unsigned)h->tablesize_,
286 (Dwarf_Unsigned)h->record_count_,
287 (Dwarf_Unsigned)h->allowed_fill_,
288 descr);
289 for( ; ix < tsize; ix++,p++) {
290 unsigned long chainlength = 0;
291 struct ts_entry*n = 0;
292 int chainpos = 0;
293 if(p->entryused) {
294 ++hashused;
295 chainlength = 1;
296 if(printdetails) {
297 print_entry(p,"head",keyprint,ix,chainpos);
298 }
299 }
300 chainpos++;
301 for(n = p->next; n ; n = n->next) {
302 chainlength++;
303 if(printdetails) {
304 print_entry(n,"chain",keyprint,ix,chainpos);
305 }
306 }
307 if(chainlength > maxchainlength) {
308 maxchainlength = chainlength;
309 }
310 if (chainlength > 1) {
311 chainsgt1++;
312 }
313 }
314 printf("Hashtable: %lu of %lu hash entries used.\n",hashused,tsize);
315 printf("Hashtable: %lu chains length longer than 1. \n",chainsgt1);
316 printf("Hashtable: %lu is maximum chain length.\n",maxchainlength);
317 }
318
319 /* Dumping the tree.
320 */
321 void
dwarf_tdump(const void * headp_in,char * (* keyprint)(const void *),const char * msg)322 dwarf_tdump(const void*headp_in,
323 char *(* keyprint)(const void *),
324 const char *msg)
325 {
326 const struct hs_base *head = (const struct hs_base *)headp_in;
327 if(!head) {
328 printf("dumptree null tree ptr : %s\n",msg);
329 return;
330 }
331 dumptree_inner(head,keyprint,msg,1);
332 }
333
334 static struct ts_entry *
allocate_ts_entry(const void * key)335 allocate_ts_entry(const void *key)
336 {
337 struct ts_entry *e = 0;
338
339 e = (struct ts_entry *) malloc(sizeof(struct ts_entry));
340 if(!e) {
341 return NULL;
342 }
343 e->keyptr = key;
344 e->entryused = 1;
345 e->next = 0;
346 return e;
347 }
348
349 static void
resize_table(struct hs_base * head,int (* compar)(const void *,const void *))350 resize_table(struct hs_base *head,
351 int (*compar)(const void *, const void *))
352 {
353 struct hs_base newhead;
354 unsigned new_entry_index = 0;
355 unsigned long prime_to_use = 0;
356
357 /* Copy the values we have. */
358 newhead = *head;
359 /* But drop the hashtab_ from new. calloc below. */
360 newhead.hashtab_ = 0;
361 newhead.record_count_ = 0;
362 new_entry_index = head->tablesize_entry_index_ +1;
363 prime_to_use = primes[new_entry_index];
364 if(prime_to_use == 0) {
365 /* Oops, too large. Leave table size as is, though
366 it will get slow as it overfills. */
367 return;
368 }
369 newhead.tablesize_ = prime_to_use;
370 newhead.allowed_fill_ = calculate_allowed_fill(allowed_fill_percent,
371 prime_to_use);
372 if( newhead.allowed_fill_< (newhead.tablesize_/2)) {
373 /* Oops. We are in trouble. */
374 return;
375 }
376 newhead.tablesize_entry_index_ = new_entry_index;
377 newhead.hashtab_ = calloc(sizeof(struct ts_entry),newhead.tablesize_);
378 if(!newhead.hashtab_) {
379 /* Oops, too large. Leave table size as is, though
380 things will get slow as it overfills. */
381 free(newhead.hashtab_);
382 return;
383 }
384 {
385 /* Insert all the records from the old table into
386 the new table. */
387 int fillnewfail = 0;
388 unsigned long ix = 0;
389 unsigned long tsize = head->tablesize_;
390 struct ts_entry *p = &head->hashtab_[0];
391 for( ; ix < tsize; ix++,p++) {
392 int inserted = 0;
393 struct ts_entry*n = 0;
394 if(fillnewfail) {
395 break;
396 }
397 if(p->keyptr) {
398 tsearch_inner(p->keyptr,
399 &newhead,compar,
400 want_insert,
401 &inserted,
402 0);
403 if(!inserted) {
404 fillnewfail = 1;
405 break;
406 }
407 }
408 for(n = p->next; n ; n = n->next) {
409 inserted = 0;
410 tsearch_inner(n->keyptr,
411 &newhead,compar,
412 want_insert,
413 &inserted,
414 0);
415 if(!inserted) {
416 fillnewfail = 1;
417 break;
418 }
419 }
420 }
421 if(fillnewfail) {
422 free(newhead.hashtab_);
423 return;
424 }
425 }
426 /* Now get rid of the chain entries of the old table. */
427 dwarf_tdestroy_inner(head,0,0);
428 /* Now get rid of the old table itself. */
429 free(head->hashtab_);
430 head->hashtab_ = 0;
431 *head = newhead;
432 return;
433 }
434
435 /* Inner search of the hash and synonym chains.
436 */
437 static struct ts_entry *
tsearch_inner(const void * key,struct hs_base * head,int (* compar)(const void *,const void *),const enum search_intent_t intent,int * inserted,struct ts_entry ** owner_ptr)438 tsearch_inner( const void *key, struct hs_base* head,
439 int (*compar)(const void *, const void *),
440 const enum search_intent_t intent, int*inserted,
441 /* owner_ptr used for delete. Only set
442 if the to-be-deleted item is on a chain,
443 not in the hashtab. Points to the item
444 pointing to the to-be-deleted-item.*/
445 struct ts_entry **owner_ptr)
446 {
447 struct ts_entry *s =0;
448 struct ts_entry *c =0;
449 struct ts_entry *q =0;
450 int kc = 0;
451 DW_TSHASHTYPE keyhash = 0;
452 DW_TSHASHTYPE hindx = 0;
453 struct ts_entry *chain_parent = 0;
454
455 if(! head->hashfunc_) {
456 /* Not fully initialized. */
457 return NULL;
458 }
459 keyhash = head->hashfunc_(key);
460 if (intent == want_insert) {
461 if( head->record_count_ > head->allowed_fill_) {
462 resize_table(head,compar);
463 }
464 }
465 hindx = keyhash%head->tablesize_;
466 s = &head->hashtab_[hindx];
467 if(!s->entryused) {
468 /* Not found. */
469 if(intent != want_insert) {
470 return NULL;
471 }
472 /* Insert in the base hash table in an
473 empty slot. */
474 *inserted = 1;
475 head->record_count_++;
476 s->keyptr = (const void *)key;
477 s->entryused = 1;
478 s->next = 0;
479 return s;
480 }
481 kc = compar(key,s->keyptr);
482 if(kc == 0 ) {
483 /* found! */
484 if(want_delete) {
485 *owner_ptr = 0;
486 }
487 return (void *)&(s->keyptr);
488 }
489 chain_parent = s;
490 for(c = s->next; c; c = c->next) {
491 kc = compar(key,c->keyptr);
492 if(kc == 0 ) {
493 /* found! */
494 if(want_delete) {
495 *owner_ptr = chain_parent;
496 }
497 return (void *)&(c->keyptr);
498 }
499 chain_parent = c;
500 }
501 if(intent != want_insert) {
502 return NULL;
503 }
504 /* Insert following head record of the chain. */
505 q = allocate_ts_entry(key);
506 if (!q) {
507 return q;
508 }
509 q->next = s->next;
510 s->next = q;
511 head->record_count_++;
512 *inserted = 1;
513 return q;
514 }
515 /* Search and, if missing, insert. */
516 void *
dwarf_tsearch(const void * key,void ** headin,int (* compar)(const void *,const void *))517 dwarf_tsearch(const void *key, void **headin,
518 int (*compar)(const void *, const void *))
519 {
520 struct hs_base **rootp = (struct hs_base **)headin;
521 struct hs_base *head = *rootp;
522 struct ts_entry *r = 0;
523 int inserted = 0;
524 /* nullme won't be set. */
525 struct ts_entry *nullme = 0;
526
527 if (!head) {
528 /* something is wrong here, not initialized. */
529 return NULL;
530 }
531 r = tsearch_inner(key,head,compar,want_insert,&inserted,&nullme);
532 if (!r) {
533 return NULL;
534 }
535 return (void *)&(r->keyptr);
536 }
537
538
539 /* Search. */
540 void *
dwarf_tfind(const void * key,void * const * rootp,int (* compar)(const void *,const void *))541 dwarf_tfind(const void *key, void *const *rootp,
542 int (*compar)(const void *, const void *))
543 {
544 /* Nothing will change, but we discard const
545 so we can use tsearch_inner(). */
546 struct hs_base **proot = (struct hs_base **)rootp;
547 struct hs_base *head = *proot;
548 struct ts_entry *r = 0;
549 /* inserted flag won't be set. */
550 int inserted = 0;
551 /* nullme won't be set. */
552 struct ts_entry * nullme = 0;
553 /* Get to actual tree. */
554
555 if (!head) {
556 return NULL;
557 }
558
559 r = tsearch_inner(key,head,compar,only_find,&inserted,&nullme);
560 if(!r) {
561 return NULL;
562 }
563 return (void *)(&(r->keyptr));
564 }
565
566 /* Unlike the simple binary tree case,
567 a fully-empty hash situation does not null the *rootp
568 */
569 void *
dwarf_tdelete(const void * key,void ** rootp,int (* compar)(const void *,const void *))570 dwarf_tdelete(const void *key, void **rootp,
571 int (*compar)(const void *, const void *))
572 {
573 struct hs_base **proot = (struct hs_base **)rootp;
574 struct hs_base *head = *proot;
575 struct ts_entry *found = 0;
576 /* inserted flag won't be set. */
577 int inserted = 0;
578 struct ts_entry * parentp = 0;
579
580 if (!head) {
581 return NULL;
582 }
583
584 found = tsearch_inner(key,head,compar,want_delete,&inserted,
585 &parentp);
586 if(found) {
587 if(parentp) {
588 /* Delete a chain entry. */
589 head->record_count_--;
590 parentp->next = found->next;
591 /* We free our storage. It would be up
592 to caller to do a tfind to find
593 a record and delete content if necessary. */
594 free(found);
595 return (void *)&(parentp->keyptr);
596 }
597 /* So found is the head of a chain. */
598 if(found->next) {
599 /* Delete a chain entry, pull up to hash tab, freeing
600 up the chain entry. */
601 struct ts_entry *pullup = found->next;
602 *found = *pullup;
603 free(pullup);
604 head->record_count_--;
605 return (void *)&(found->keyptr);
606 } else {
607 /* Delete a main hash table entry.
608 Problem: what the heck to return as a keyptr pointer?
609 Well, we return NULL. As in the standard
610 tsearch, returning NULL does not mean
611 failure! Here it just means 'empty chain somewhere'.
612 */
613 head->record_count_--;
614 found->next = 0;
615 found->keyptr = 0;
616 found->entryused = 0;
617 return NULL;
618 }
619 }
620 return NULL;
621 }
622
623 static void
dwarf_twalk_inner(const struct hs_base * h,struct ts_entry * p,void (* action)(const void * nodep,const DW_VISIT which,UNUSEDARG const int depth),UNUSEDARG unsigned level)624 dwarf_twalk_inner(const struct hs_base *h,
625 struct ts_entry *p,
626 void (*action)(const void *nodep, const DW_VISIT which,
627 UNUSEDARG const int depth),
628 UNUSEDARG unsigned level)
629 {
630 unsigned long ix = 0;
631 unsigned long tsize = h->tablesize_;
632 for( ; ix < tsize; ix++,p++) {
633 struct ts_entry*n = 0;
634 if(p->keyptr) {
635 action((void *)(&(p->keyptr)),dwarf_leaf,level);
636 }
637 for(n = p->next; n ; n = n->next) {
638 action((void *)(&(n->keyptr)),dwarf_leaf,level);
639 }
640 }
641 }
642
643 void
dwarf_twalk(const void * rootp,void (* action)(const void * nodep,const DW_VISIT which,UNUSEDARG const int depth))644 dwarf_twalk(const void *rootp,
645 void (*action)(const void *nodep, const DW_VISIT which,
646 UNUSEDARG const int depth))
647 {
648 const struct hs_base *head = (const struct hs_base *)rootp;
649 struct ts_entry *root = 0;
650 if(!head) {
651 return;
652 }
653 root = head->hashtab_;
654 /* Get to actual tree. */
655 dwarf_twalk_inner(head,root,action,0);
656 }
657
658 static void
dwarf_tdestroy_inner(struct hs_base * h,void (* free_node)(void * nodep),UNUSEDARG int depth)659 dwarf_tdestroy_inner(struct hs_base*h,
660 void (*free_node)(void *nodep),
661 UNUSEDARG int depth)
662 {
663 unsigned long ix = 0;
664 unsigned long tsize = h->tablesize_;
665 struct ts_entry *p = &h->hashtab_[0];
666 for( ; ix < tsize; ix++,p++) {
667 struct ts_entry*n = 0;
668 struct ts_entry*prev = 0;
669 if(p->keyptr && p->entryused) {
670 if(free_node) {
671 free_node((void *)(p->keyptr));
672 }
673 --h->record_count_;
674 }
675 /* Now walk and free up the chain entries. */
676 for(n = p->next; n ; ) {
677 if(free_node) {
678 free_node((void *)(n->keyptr));
679 }
680 --h->record_count_;
681 prev = n;
682 n = n->next;
683 free(prev);
684 }
685 }
686 }
687
688 /* Walk the tree, freeing all space in the tree
689 and calling the user's callback function on each node.
690
691 It is up to the caller to zero out anything pointing to
692 head (ie, that has the value rootp holds) after this
693 returns.
694 */
695 void
dwarf_tdestroy(void * rootp,void (* free_node)(void * nodep))696 dwarf_tdestroy(void *rootp, void (*free_node)(void *nodep))
697 {
698 struct hs_base *head = (struct hs_base *)rootp;
699 struct ts_entry *root = 0;
700 if(!head) {
701 return;
702 }
703 root = head->hashtab_;
704 dwarf_tdestroy_inner(head,free_node,0);
705 free(root);
706 free(head);
707 }
708