xref: /illumos-gate/usr/src/lib/libdwarf/common/dwarf_tsearchhash.c (revision ceab728f83b0af9260d2d3fb69014f3781af2101)
1 /* Copyright (c) 2013-2019, David Anderson
2 All rights reserved.
3 
4 Redistribution and use in source and binary forms, with
5 or without modification, are permitted provided that the
6 following conditions are met:
7 
8     Redistributions of source code must retain the above
9     copyright notice, this list of conditions and the following
10     disclaimer.
11 
12     Redistributions in binary form must reproduce the above
13     copyright notice, this list of conditions and the following
14     disclaimer in the documentation and/or other materials
15     provided with the distribution.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
18 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 */
32 
33 
34 /*  The interfaces follow tsearch (See the Single
35     Unix Specification) but the implementation is
36     written without reference to the source of any
37     version of tsearch or any hashing code.
38 
39     An additional interface is added (compared to
40     a real tsearch) to let the caller identify a
41     'hash' function with each hash table (called
42     a tree below, but that is a misnomer).
43 
44     So read 'tree' below as hash table.
45 
46     See http://www.prevanders.net/tsearch.html
47     for information and an example of use.
48 
49     Based on Knuth, chapter 6.4
50 
51     This uses a hash based on the key.
52     Collision resolution is by chaining.
53 
54     twalk() and tdestroy() walk in a random order.
55     The 'preorder' etc labels mean nothing in a hash, so everything
56     is called a leaf.
57 
58 */
59 
60 
61 #include "config.h"
62 #ifdef HAVE_UNUSED_ATTRIBUTE
63 #define  UNUSEDARG __attribute__ ((unused))
64 #else
65 #define  UNUSEDARG
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include "stdlib.h" /* for malloc, free() etc */
69 #endif /* HAVE_STDLIB_H */
70 #ifdef HAVE_MALLOC_H
71 /* Useful include for some Windows compilers. */
72 #include <malloc.h>
73 #endif /* HAVE_MALLOC_H */
74 #include <stdio.h>  /* for printf() */
75 #ifdef HAVE_STDINT_H
76 #include <stdint.h> /* for uintptr_t */
77 #endif /* HAVE_STDINT_H */
78 /*  This must match the types and print options
79     found in libdwarf.h.  */
80 #define Dwarf_Unsigned unsigned long long
81 #if defined(_WIN32) && defined(HAVE_NONSTANDARD_PRINTF_64_FORMAT)
82 #define DW_PR_DUx "I64x"
83 #define DW_PR_DUu "I64u"
84 #else
85 #define DW_PR_DUx "llx"
86 #define DW_PR_DUu "llu"
87 #endif /* DW_PR defines */
88 #include "dwarf_tsearch.h"
89 
90 /*  A table of primes used to size  and resize the hash table.
91     From public sources of prime numbers, arbitrarily chosen
92     to approximately double in size at each step.
93 */
94 static unsigned long primes[] =
95 {
96 #if 0 /* for testing only */
97 5,11, 17,23, 31, 47, 53,
98 #endif
99 79,
100 1009,
101 5591,
102 10007,
103 21839,
104 41413,
105 99907,
106 199967,
107 400009,
108 800029,
109 1600141,
110 3000089,
111 6000121,
112 12000257,
113 24000143,
114 48000203,
115 100000127,
116 200001611,
117 400000669,
118 800000573,
119 0 /* Here we are giving up */
120 };
121 
122 static unsigned long allowed_fill_percent = 90;
123 
124 
125 struct hs_base {
126     unsigned long tablesize_;
127     unsigned long tablesize_entry_index_;
128     unsigned long allowed_fill_;
129     /* Record_count means number of active records,
130         counting all records on chains.
131         When the record_count is > 90% of a full
132         tablesize_ we redo the table before adding
133         a new entry.  */
134     unsigned long record_count_;
135     /*  hashtab_ is an array of hs_entry,
136         indexes 0 through tablesize_ -1. */
137     struct ts_entry * hashtab_;
138     DW_TSHASHTYPE (*hashfunc_)(const void *key);
139 };
140 
141 struct ts_entry {
142     const void * keyptr;
143     /*  So that a keyptr of 0 (if added) is
144         not confused with an empty hash slot,
145         we must mark used slots as used in the hash tab */
146     unsigned char entryused;
147     struct ts_entry *next;
148 };
149 
150 enum search_intent_t
151 {
152     want_insert,
153     only_find,
154     want_delete
155 };
156 
157 static struct ts_entry *
158 tsearch_inner( const void *key, struct hs_base* head,
159     int (*compar)(const void *, const void *),
160     const enum search_intent_t intent, int*inserted,
161     struct ts_entry **parent_ptr);
162 static void
163 dwarf_tdestroy_inner(struct hs_base*h,
164     void (*free_node)(void *nodep),
165     int depth);
166 
167 
168 /*  A trivial integer-based percentage calculation.
169     Percents >100 are reasonable for a hash-with-chains
170     situation (even if they might not be the best choice
171     for performance). */
172 static unsigned long
173 calculate_allowed_fill(unsigned long fill_percent, unsigned long ct)
174 {
175     unsigned long v = 0;
176     if(ct < 100000) {
177         unsigned long v2 = (ct *fill_percent)/100;
178         return v2;
179     }
180     v = (ct /100)*fill_percent;
181     return v;
182 }
183 
184 /* Initialize the hash and pass in the hash function.
185    If the entry count needed is unknown, pass in  0 as a count estimate,
186    but if the number of hash entries needed can be estimated,
187    pass in the estimate (which need not be prime, we actually use
188    the nearest higher prime from the above table).
189    If the estimated count is
190    Return the tree base, or return NULL if insufficient memory. */
191 void *
192 dwarf_initialize_search_hash( void **treeptr,
193     DW_TSHASHTYPE(*hashfunc)(const void *key),
194     unsigned long size_estimate)
195 {
196     unsigned long prime_to_use = primes[0];
197     unsigned entry_index = 0;
198     unsigned k = 0;
199     struct hs_base *base = 0;
200 
201     base = *(struct hs_base **)treeptr;
202     if(base) {
203         /* initalized already. */
204         return base ;
205     }
206     base = calloc(sizeof(struct hs_base),1);
207     if(!base) {
208         /* Out of memory. */
209         return NULL ;
210     }
211     prime_to_use = primes[0];
212     while(size_estimate && (size_estimate > prime_to_use)) {
213         k = k +1;
214         prime_to_use = primes[k];
215         if(prime_to_use == 0) {
216             /* Oops. Too large. */
217             free(base);
218             return NULL;
219         }
220         entry_index = k;
221     }
222     base->tablesize_ = prime_to_use;
223     base->allowed_fill_ = calculate_allowed_fill(allowed_fill_percent,
224         prime_to_use);
225     if( base->allowed_fill_< (base->tablesize_/2)) {
226         free(base);
227         /* Oops. We are in trouble. Coding mistake here.  */
228         return NULL;
229     }
230     base->record_count_ = 0;
231     base->tablesize_entry_index_ = entry_index;
232     /*  hashtab_ is an array of hs_entry,
233         indexes 0 through tablesize_ -1. */
234     base->hashfunc_ = hashfunc;
235     base->hashtab_ = calloc(sizeof(struct ts_entry),base->tablesize_);
236     if(!base->hashtab_) {
237         free(base);
238         return NULL;
239     }
240     *treeptr = base;
241     return base;
242 }
243 
244 
245 /*  We don't really care whether hashpos or chainpos
246     are 32 or 64 bits. 32 suffices. */
247 static void print_entry(struct ts_entry *t,const char *descr,
248     char *(* keyprint)(const void *),
249     unsigned long hashpos,
250     unsigned long chainpos)
251 {
252     char *v = 0;
253     if(!t->entryused) {
254         return;
255     }
256     v = keyprint(t->keyptr);
257     printf(
258         "[%4lu.%02lu] 0x%08" DW_PR_DUx
259         " <keyptr 0x%08" DW_PR_DUx
260         "> <key %s> %s\n",
261         hashpos,chainpos,
262         (Dwarf_Unsigned)(uintptr_t)t,
263         (Dwarf_Unsigned)(uintptr_t)t->keyptr,
264         v,
265         descr);
266 }
267 
268 /* For debugging */
269 static void
270 dumptree_inner(const struct hs_base *h,
271     char *(* keyprint)(const void *),
272     const char *descr, int printdetails)
273 {
274     unsigned long ix = 0;
275     unsigned long tsize = h->tablesize_;
276     struct ts_entry *p = &h->hashtab_[0];
277     unsigned long hashused = 0;
278     unsigned long maxchainlength = 0;
279     unsigned long chainsgt1 = 0;
280     printf("dumptree head ptr : 0x%08" DW_PR_DUx
281         " size %"    DW_PR_DUu
282         " entries %" DW_PR_DUu
283         " allowed %" DW_PR_DUu " %s\n",
284         (Dwarf_Unsigned)(uintptr_t)h,
285         (Dwarf_Unsigned)h->tablesize_,
286         (Dwarf_Unsigned)h->record_count_,
287         (Dwarf_Unsigned)h->allowed_fill_,
288         descr);
289     for(  ; ix < tsize; ix++,p++) {
290         unsigned long chainlength = 0;
291         struct ts_entry*n = 0;
292         int chainpos = 0;
293         if(p->entryused) {
294             ++hashused;
295             chainlength = 1;
296             if(printdetails) {
297                 print_entry(p,"head",keyprint,ix,chainpos);
298             }
299         }
300         chainpos++;
301         for(n = p->next; n ; n = n->next) {
302             chainlength++;
303             if(printdetails) {
304                 print_entry(n,"chain",keyprint,ix,chainpos);
305             }
306         }
307         if(chainlength > maxchainlength) {
308             maxchainlength = chainlength;
309         }
310         if (chainlength > 1) {
311             chainsgt1++;
312         }
313     }
314     printf("Hashtable: %lu of %lu hash entries used.\n",hashused,tsize);
315     printf("Hashtable: %lu chains length longer than 1. \n",chainsgt1);
316     printf("Hashtable: %lu is maximum chain length.\n",maxchainlength);
317 }
318 
319 /*  Dumping the tree.
320     */
321 void
322 dwarf_tdump(const void*headp_in,
323     char *(* keyprint)(const void *),
324     const char *msg)
325 {
326     const struct hs_base *head = (const struct hs_base *)headp_in;
327     if(!head) {
328         printf("dumptree null tree ptr : %s\n",msg);
329         return;
330     }
331     dumptree_inner(head,keyprint,msg,1);
332 }
333 
334 static struct ts_entry *
335 allocate_ts_entry(const void *key)
336 {
337     struct ts_entry *e =  0;
338 
339     e = (struct ts_entry *) malloc(sizeof(struct ts_entry));
340     if(!e) {
341         return NULL;
342     }
343     e->keyptr = key;
344     e->entryused = 1;
345     e->next = 0;
346     return e;
347 }
348 
349 static void
350 resize_table(struct hs_base *head,
351     int (*compar)(const void *, const void *))
352 {
353     struct hs_base newhead;
354     unsigned new_entry_index = 0;
355     unsigned long prime_to_use = 0;
356 
357     /* Copy the values we have. */
358     newhead = *head;
359     /* But drop the hashtab_ from new. calloc below. */
360     newhead.hashtab_ = 0;
361     newhead.record_count_ = 0;
362     new_entry_index = head->tablesize_entry_index_ +1;
363     prime_to_use = primes[new_entry_index];
364     if(prime_to_use == 0) {
365         /*  Oops, too large. Leave table size as is, though
366             it will get slow as it overfills. */
367         return;
368     }
369     newhead.tablesize_ = prime_to_use;
370     newhead.allowed_fill_ = calculate_allowed_fill(allowed_fill_percent,
371         prime_to_use);
372     if( newhead.allowed_fill_< (newhead.tablesize_/2)) {
373         /* Oops. We are in trouble.  */
374         return;
375     }
376     newhead.tablesize_entry_index_ = new_entry_index;
377     newhead.hashtab_ = calloc(sizeof(struct ts_entry),newhead.tablesize_);
378     if(!newhead.hashtab_) {
379         /*  Oops, too large. Leave table size as is, though
380             things will get slow as it overfills. */
381         free(newhead.hashtab_);
382         return;
383     }
384     {
385         /*  Insert all the records from the old table into
386             the new table. */
387         int fillnewfail = 0;
388         unsigned long ix = 0;
389         unsigned long tsize = head->tablesize_;
390         struct ts_entry *p = &head->hashtab_[0];
391         for(  ; ix < tsize; ix++,p++) {
392             int inserted = 0;
393             struct ts_entry*n = 0;
394             if(fillnewfail) {
395                 break;
396             }
397             if(p->keyptr) {
398                 tsearch_inner(p->keyptr,
399                     &newhead,compar,
400                     want_insert,
401                     &inserted,
402                     0);
403                 if(!inserted) {
404                     fillnewfail = 1;
405                     break;
406                 }
407             }
408             for(n = p->next; n ; n = n->next) {
409                 inserted = 0;
410                 tsearch_inner(n->keyptr,
411                     &newhead,compar,
412                     want_insert,
413                     &inserted,
414                     0);
415                 if(!inserted) {
416                     fillnewfail = 1;
417                     break;
418                 }
419             }
420         }
421         if(fillnewfail) {
422             free(newhead.hashtab_);
423             return;
424         }
425     }
426     /* Now get rid of the chain entries of the old table. */
427     dwarf_tdestroy_inner(head,0,0);
428     /* Now get rid of the old table itself. */
429     free(head->hashtab_);
430     head->hashtab_ = 0;
431     *head = newhead;
432     return;
433 }
434 
435 /*   Inner search of the hash and synonym chains.
436   */
437 static struct ts_entry *
438 tsearch_inner( const void *key, struct hs_base* head,
439     int (*compar)(const void *, const void *),
440     const enum search_intent_t intent, int*inserted,
441     /* owner_ptr used for delete.  Only set
442         if the to-be-deleted item is on a chain,
443         not in the hashtab. Points to the item
444         pointing to the to-be-deleted-item.*/
445     struct ts_entry **owner_ptr)
446 {
447     struct ts_entry *s =0;
448     struct ts_entry *c =0;
449     struct ts_entry *q =0;
450     int kc = 0;
451     DW_TSHASHTYPE keyhash =  0;
452     DW_TSHASHTYPE hindx = 0;
453     struct ts_entry *chain_parent = 0;
454 
455     if(! head->hashfunc_) {
456         /* Not fully initialized. */
457         return NULL;
458     }
459     keyhash =  head->hashfunc_(key);
460     if (intent == want_insert) {
461         if( head->record_count_ > head->allowed_fill_) {
462             resize_table(head,compar);
463         }
464     }
465     hindx = keyhash%head->tablesize_;
466     s = &head->hashtab_[hindx];
467     if(!s->entryused) {
468         /* Not found. */
469         if(intent != want_insert) {
470             return NULL;
471         }
472         /*  Insert in the base hash table in an
473             empty slot. */
474         *inserted = 1;
475         head->record_count_++;
476         s->keyptr = (const void *)key;
477         s->entryused = 1;
478         s->next = 0;
479         return s;
480     }
481     kc = compar(key,s->keyptr);
482     if(kc == 0 ) {
483         /* found! */
484         if(want_delete) {
485             *owner_ptr = 0;
486         }
487         return (void *)&(s->keyptr);
488     }
489     chain_parent = s;
490     for(c = s->next; c; c = c->next)  {
491         kc = compar(key,c->keyptr);
492         if(kc == 0 ) {
493             /* found! */
494             if(want_delete) {
495                 *owner_ptr = chain_parent;
496             }
497             return (void *)&(c->keyptr);
498         }
499         chain_parent = c;
500     }
501     if(intent != want_insert) {
502         return NULL;
503     }
504     /* Insert following head record of the chain. */
505     q = allocate_ts_entry(key);
506     if (!q) {
507         return q;
508     }
509     q->next = s->next;
510     s->next = q;
511     head->record_count_++;
512     *inserted = 1;
513     return q;
514 }
515 /* Search and, if missing, insert. */
516 void *
517 dwarf_tsearch(const void *key, void **headin,
518     int (*compar)(const void *, const void *))
519 {
520     struct hs_base **rootp = (struct hs_base **)headin;
521     struct hs_base *head = *rootp;
522     struct ts_entry *r = 0;
523     int inserted = 0;
524     /* nullme won't be set. */
525     struct ts_entry *nullme = 0;
526 
527     if (!head) {
528         /* something is wrong here, not initialized. */
529         return NULL;
530     }
531     r = tsearch_inner(key,head,compar,want_insert,&inserted,&nullme);
532     if (!r) {
533         return NULL;
534     }
535     return (void *)&(r->keyptr);
536 }
537 
538 
539 /* Search. */
540 void *
541 dwarf_tfind(const void *key, void *const *rootp,
542     int (*compar)(const void *, const void *))
543 {
544     /*  Nothing will change, but we discard const
545         so we can use tsearch_inner(). */
546     struct hs_base **proot = (struct hs_base **)rootp;
547     struct hs_base *head = *proot;
548     struct ts_entry *r = 0;
549     /* inserted flag won't be set. */
550     int inserted = 0;
551     /* nullme won't be set. */
552     struct ts_entry * nullme = 0;
553     /* Get to actual tree. */
554 
555     if (!head) {
556         return NULL;
557     }
558 
559     r = tsearch_inner(key,head,compar,only_find,&inserted,&nullme);
560     if(!r) {
561         return NULL;
562     }
563     return (void *)(&(r->keyptr));
564 }
565 
566 /*  Unlike the simple binary tree case,
567     a fully-empty hash situation does not null the *rootp
568 */
569 void *
570 dwarf_tdelete(const void *key, void **rootp,
571     int (*compar)(const void *, const void *))
572 {
573     struct hs_base **proot = (struct hs_base **)rootp;
574     struct hs_base *head = *proot;
575     struct ts_entry *found = 0;
576     /* inserted flag won't be set. */
577     int inserted = 0;
578     struct ts_entry * parentp = 0;
579 
580     if (!head) {
581         return NULL;
582     }
583 
584     found = tsearch_inner(key,head,compar,want_delete,&inserted,
585         &parentp);
586     if(found) {
587         if(parentp) {
588             /* Delete a chain entry. */
589             head->record_count_--;
590             parentp->next = found->next;
591             /*  We free our storage. It would be up
592                 to caller to do a tfind to find
593                 a record and delete content if necessary. */
594             free(found);
595             return (void *)&(parentp->keyptr);
596         }
597         /* So found is the head of a chain. */
598         if(found->next) {
599             /*  Delete a chain entry, pull up to hash tab, freeing
600                 up the chain entry. */
601             struct ts_entry *pullup = found->next;
602             *found = *pullup;
603             free(pullup);
604             head->record_count_--;
605             return (void *)&(found->keyptr);
606         } else {
607             /*  Delete a main hash table entry.
608                 Problem: what the heck to return as a keyptr pointer?
609                 Well, we return NULL. As in the standard
610                 tsearch, returning NULL does not mean
611                 failure! Here it just means 'empty chain somewhere'.
612             */
613             head->record_count_--;
614             found->next = 0;
615             found->keyptr = 0;
616             found->entryused = 0;
617             return NULL;
618         }
619     }
620     return NULL;
621 }
622 
623 static void
624 dwarf_twalk_inner(const struct hs_base *h,
625     struct ts_entry *p,
626     void (*action)(const void *nodep, const DW_VISIT which,
627         UNUSEDARG const int depth),
628     UNUSEDARG unsigned level)
629 {
630     unsigned long ix = 0;
631     unsigned long tsize = h->tablesize_;
632     for(  ; ix < tsize; ix++,p++) {
633         struct ts_entry*n = 0;
634         if(p->keyptr) {
635             action((void *)(&(p->keyptr)),dwarf_leaf,level);
636         }
637         for(n = p->next; n ; n = n->next) {
638             action((void *)(&(n->keyptr)),dwarf_leaf,level);
639         }
640     }
641 }
642 
643 void
644 dwarf_twalk(const void *rootp,
645     void (*action)(const void *nodep, const DW_VISIT which,
646         UNUSEDARG const int depth))
647 {
648     const struct hs_base *head = (const struct hs_base *)rootp;
649     struct ts_entry *root = 0;
650     if(!head) {
651         return;
652     }
653     root = head->hashtab_;
654     /* Get to actual tree. */
655     dwarf_twalk_inner(head,root,action,0);
656 }
657 
658 static void
659 dwarf_tdestroy_inner(struct hs_base*h,
660     void (*free_node)(void *nodep),
661     UNUSEDARG int depth)
662 {
663     unsigned long ix = 0;
664     unsigned long tsize = h->tablesize_;
665     struct ts_entry *p = &h->hashtab_[0];
666     for(  ; ix < tsize; ix++,p++) {
667         struct ts_entry*n = 0;
668         struct ts_entry*prev = 0;
669         if(p->keyptr && p->entryused) {
670             if(free_node) {
671                 free_node((void *)(p->keyptr));
672             }
673             --h->record_count_;
674         }
675         /* Now walk and free up the chain entries. */
676         for(n = p->next; n ; ) {
677             if(free_node) {
678                 free_node((void *)(n->keyptr));
679             }
680             --h->record_count_;
681             prev = n;
682             n = n->next;
683             free(prev);
684         }
685     }
686 }
687 
688 /*  Walk the tree, freeing all space in the tree
689     and calling the user's callback function on each node.
690 
691     It is up to the caller to zero out anything pointing to
692     head (ie, that has the value rootp holds) after this
693     returns.
694 */
695 void
696 dwarf_tdestroy(void *rootp, void (*free_node)(void *nodep))
697 {
698     struct hs_base *head = (struct hs_base *)rootp;
699     struct ts_entry *root = 0;
700     if(!head) {
701         return;
702     }
703     root = head->hashtab_;
704     dwarf_tdestroy_inner(head,free_node,0);
705     free(root);
706     free(head);
707 }
708