xref: /illumos-gate/usr/src/lib/libdwarf/common/dwarf_util.c (revision d6beba26494f4877120c99b5931876f56ba5dee5)
1 /*
2   Copyright (C) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
3   Portions Copyright (C) 2007-2010 David Anderson. All Rights Reserved.
4 
5   This program is free software; you can redistribute it and/or modify it
6   under the terms of version 2.1 of the GNU Lesser General Public License
7   as published by the Free Software Foundation.
8 
9   This program is distributed in the hope that it would be useful, but
10   WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13   Further, this software is distributed without any warranty that it is
14   free of the rightful claim of any third person regarding infringement
15   or the like.  Any license provided herein, whether implied or
16   otherwise, applies only to this software file.  Patent licenses, if
17   any, provided herein do not apply to combinations of this program with
18   other software, or any other product whatsoever.
19 
20   You should have received a copy of the GNU Lesser General Public
21   License along with this program; if not, write the Free Software
22   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston MA 02110-1301,
23   USA.
24 
25   Contact information:  Silicon Graphics, Inc., 1500 Crittenden Lane,
26   Mountain View, CA 94043, or:
27 
28   http://www.sgi.com
29 
30   For further information regarding this notice, see:
31 
32   http://oss.sgi.com/projects/GenInfo/NoticeExplan
33 
34 */
35 /* The address of the Free Software Foundation is
36    Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
37    Boston, MA 02110-1301, USA.
38    SGI has moved from the Crittenden Lane address.
39 */
40 
41 
42 
43 
44 
45 #include "config.h"
46 #include "dwarf_incl.h"
47 #include <stdio.h>
48 #include "dwarf_die_deliv.h"
49 
50 
51 
52 /*
53     Given a form, and a pointer to the bytes encoding
54     a value of that form, val_ptr, this function returns
55     the length, in bytes, of a value of that form.
56     When using this function, check for a return of 0
57     a recursive DW_FORM_INDIRECT value.
58 */
59 Dwarf_Unsigned
60 _dwarf_get_size_of_val(Dwarf_Debug dbg,
61     Dwarf_Unsigned form,
62     Dwarf_Half address_size,
63     Dwarf_Small * val_ptr, int v_length_size)
64 {
65     Dwarf_Unsigned length = 0;
66     Dwarf_Word leb128_length = 0;
67     Dwarf_Unsigned form_indirect = 0;
68     Dwarf_Unsigned ret_value = 0;
69 
70     switch (form) {
71 
72     default:                    /* Handles form = 0. */
73         return (form);
74 
75     case DW_FORM_addr:
76         if(address_size) {
77             return address_size;
78         }
79         /* This should never happen, address_size should be set. */
80         return (dbg->de_pointer_size);
81 
82     /* DWARF2 was wrong on the size of the attribute for
83        DW_FORM_ref_addr.  We assume compilers are using the
84        corrected DWARF3 text (for 32bit pointer target objects pointer and
85        offsets are the same size anyway). */
86     case DW_FORM_ref_addr:
87         return (v_length_size);
88 
89     case DW_FORM_block1:
90         return (*(Dwarf_Small *) val_ptr + 1);
91 
92     case DW_FORM_block2:
93         READ_UNALIGNED(dbg, ret_value, Dwarf_Unsigned,
94                        val_ptr, sizeof(Dwarf_Half));
95         return (ret_value + sizeof(Dwarf_Half));
96 
97     case DW_FORM_block4:
98         READ_UNALIGNED(dbg, ret_value, Dwarf_Unsigned,
99                        val_ptr, sizeof(Dwarf_ufixed));
100         return (ret_value + sizeof(Dwarf_ufixed));
101 
102 
103     case DW_FORM_data1:
104         return (1);
105 
106     case DW_FORM_data2:
107         return (2);
108 
109     case DW_FORM_data4:
110         return (4);
111 
112     case DW_FORM_data8:
113         return (8);
114 
115     case DW_FORM_string:
116         return (strlen((char *) val_ptr) + 1);
117 
118     case DW_FORM_block:
119     case DW_FORM_exprloc:
120         length = _dwarf_decode_u_leb128(val_ptr, &leb128_length);
121         return (length + leb128_length);
122 
123     case DW_FORM_flag_present:
124         return (0);
125     case DW_FORM_flag:
126         return (1);
127 
128     case DW_FORM_sec_offset:
129         /* If 32bit dwarf, is 4. Else is 64bit dwarf and is 8. */
130         return (v_length_size);
131 
132     case DW_FORM_ref_udata:
133         length = _dwarf_decode_u_leb128(val_ptr, &leb128_length);
134         return (leb128_length);
135 
136     case DW_FORM_indirect:
137         {
138             Dwarf_Word indir_len = 0;
139 
140             form_indirect = _dwarf_decode_u_leb128(val_ptr, &indir_len);
141             if (form_indirect == DW_FORM_indirect) {
142                 return (0);     /* We are in big trouble: The true form
143                                    of DW_FORM_indirect is
144                                    DW_FORM_indirect? Nonsense. Should
145                                    never happen. */
146             }
147             return (indir_len + _dwarf_get_size_of_val(dbg,
148                    form_indirect,
149                    address_size,
150                    val_ptr + indir_len,
151                    v_length_size));
152         }
153 
154     case DW_FORM_ref1:
155         return (1);
156 
157     case DW_FORM_ref2:
158         return (2);
159 
160     case DW_FORM_ref4:
161         return (4);
162 
163     case DW_FORM_ref8:
164         return (8);
165 
166     case DW_FORM_sdata:
167         _dwarf_decode_s_leb128(val_ptr, &leb128_length);
168         return (leb128_length);
169 
170     case DW_FORM_strp:
171         return (v_length_size);
172 
173     case DW_FORM_udata:
174         _dwarf_decode_u_leb128(val_ptr, &leb128_length);
175         return (leb128_length);
176     }
177 }
178 
179 /* We allow an arbitrary number of HT_MULTIPLE entries
180    before resizing.  It seems up to 20 or 30
181    would work nearly as well.
182    We could have a different resize multiple than 'resize now'
183    test multiple, but for now we don't do that.
184 */
185 #define HT_MULTIPLE 8
186 
187 /* Copy the old entries, updating each to be in
188    a new list.  Don't delete anything. Leave the
189    htin with stale data. */
190 static void
191 copy_abbrev_table_to_new_table(Dwarf_Hash_Table htin,
192   Dwarf_Hash_Table htout)
193 {
194     Dwarf_Hash_Table_Entry entry_in = htin->tb_entries;
195     unsigned entry_in_count = htin->tb_table_entry_count;
196     Dwarf_Hash_Table_Entry entry_out = htout->tb_entries;
197     unsigned entry_out_count = htout->tb_table_entry_count;
198     unsigned k = 0;
199     for ( ;  k < entry_in_count; ++k,++entry_in) {
200         Dwarf_Abbrev_List listent = entry_in->at_head;
201         Dwarf_Abbrev_List nextlistent = 0;
202 
203         for (  ; listent ; listent = nextlistent) {
204              unsigned newtmp = listent->ab_code;
205              unsigned newhash = newtmp%entry_out_count;
206              Dwarf_Hash_Table_Entry e;
207              nextlistent = listent->ab_next;
208              e = entry_out+newhash;
209              /* Move_entry_to_new_hash. This reverses the
210                 order of the entries, effectively, but
211                 that does not seem significant. */
212              listent->ab_next = e->at_head;
213              e->at_head = listent;
214 
215              htout->tb_total_abbrev_count++;
216         }
217     }
218 }
219 
220 /*
221     This function returns a pointer to a Dwarf_Abbrev_List_s
222     struct for the abbrev with the given code.  It puts the
223     struct on the appropriate hash table.  It also adds all
224     the abbrev between the last abbrev added and this one to
225     the hash table.  In other words, the .debug_abbrev section
226     is scanned sequentially from the top for an abbrev with
227     the given code.  All intervening abbrevs are also put
228     into the hash table.
229 
230     This function hashes the given code, and checks the chain
231     at that hash table entry to see if a Dwarf_Abbrev_List_s
232     with the given code exists.  If yes, it returns a pointer
233     to that struct.  Otherwise, it scans the .debug_abbrev
234     section from the last byte scanned for that CU till either
235     an abbrev with the given code is found, or an abbrev code
236     of 0 is read.  It puts Dwarf_Abbrev_List_s entries for all
237     abbrev's read till that point into the hash table.  The
238     hash table contains both a head pointer and a tail pointer
239     for each entry.
240 
241     While the lists can move and entries can be moved between
242     lists on reallocation, any given Dwarf_Abbrev_list entry
243     never moves once allocated, so the pointer is safe to return.
244 
245     Returns NULL on error.
246 */
247 Dwarf_Abbrev_List
248 _dwarf_get_abbrev_for_code(Dwarf_CU_Context cu_context, Dwarf_Unsigned code)
249 {
250     Dwarf_Debug dbg = cu_context->cc_dbg;
251     Dwarf_Hash_Table hash_table_base = cu_context->cc_abbrev_hash_table;
252     Dwarf_Hash_Table_Entry entry_base = 0;
253     Dwarf_Hash_Table_Entry entry_cur = 0;
254     Dwarf_Word hash_num = 0;
255     Dwarf_Unsigned abbrev_code = 0;
256     Dwarf_Unsigned abbrev_tag  = 0;
257     Dwarf_Unsigned attr_name = 0;
258     Dwarf_Unsigned attr_form = 0;
259 
260     Dwarf_Abbrev_List hash_abbrev_entry = 0;
261 
262     Dwarf_Abbrev_List inner_list_entry = 0;
263     Dwarf_Hash_Table_Entry inner_hash_entry = 0;
264 
265     Dwarf_Byte_Ptr abbrev_ptr = 0;
266     unsigned hashable_val;
267 
268     if ( !hash_table_base->tb_entries ) {
269          hash_table_base->tb_table_entry_count =  HT_MULTIPLE;
270          hash_table_base->tb_total_abbrev_count= 0;
271          hash_table_base->tb_entries =  _dwarf_get_alloc(dbg,
272             DW_DLA_HASH_TABLE_ENTRY,
273             hash_table_base->tb_table_entry_count);
274          if(! hash_table_base->tb_entries) {
275              return NULL;
276          }
277 
278     } else if (hash_table_base->tb_total_abbrev_count >
279           ( hash_table_base->tb_table_entry_count * HT_MULTIPLE) ) {
280         struct Dwarf_Hash_Table_s newht;
281         /* Effectively multiplies by >= HT_MULTIPLE */
282         newht.tb_table_entry_count =  hash_table_base->tb_total_abbrev_count;
283         newht.tb_total_abbrev_count = 0;
284         newht.tb_entries =  _dwarf_get_alloc(dbg,
285             DW_DLA_HASH_TABLE_ENTRY,
286             newht.tb_table_entry_count);
287 
288         if(! newht.tb_entries) {
289              return NULL;
290         }
291         /* Copy the existing entries to the new table,
292            rehashing each.
293         */
294         copy_abbrev_table_to_new_table(hash_table_base, &newht);
295         /* Dealloc only the entries hash table array, not the lists
296            of things pointed to by a hash table entry array. */
297         dwarf_dealloc(dbg, hash_table_base->tb_entries,DW_DLA_HASH_TABLE_ENTRY);
298         hash_table_base->tb_entries = 0;
299         /* Now overwrite the existing table descriptor with
300            the new, newly valid, contents. */
301         *hash_table_base = newht;
302     } /* Else is ok as is, add entry */
303 
304 
305     hashable_val = code;
306     hash_num = hashable_val %
307         hash_table_base->tb_table_entry_count;
308     entry_base = hash_table_base->tb_entries;
309     entry_cur  = entry_base + hash_num;
310 
311     /* Determine if the 'code' is the list of synonyms already. */
312     for (hash_abbrev_entry = entry_cur->at_head;
313          hash_abbrev_entry != NULL && hash_abbrev_entry->ab_code != code;
314          hash_abbrev_entry = hash_abbrev_entry->ab_next);
315     if (hash_abbrev_entry != NULL) {
316         /* This returns a pointer to an abbrev list entry, not
317            the list itself. */
318         return (hash_abbrev_entry);
319     }
320 
321     abbrev_ptr = cu_context->cc_last_abbrev_ptr != NULL ?
322         cu_context->cc_last_abbrev_ptr :
323         dbg->de_debug_abbrev.dss_data + cu_context->cc_abbrev_offset;
324 
325     /* End of abbrev's for this cu, since abbrev code is 0. */
326     if (*abbrev_ptr == 0) {
327         return (NULL);
328     }
329 
330     do {
331         unsigned new_hashable_val;
332         DECODE_LEB128_UWORD(abbrev_ptr, abbrev_code);
333         DECODE_LEB128_UWORD(abbrev_ptr, abbrev_tag);
334 
335         inner_list_entry = (Dwarf_Abbrev_List)
336             _dwarf_get_alloc(cu_context->cc_dbg, DW_DLA_ABBREV_LIST, 1);
337         if (inner_list_entry == NULL)
338             return (NULL);
339 
340         new_hashable_val = abbrev_code;
341         hash_num = new_hashable_val %
342             hash_table_base->tb_table_entry_count;
343         inner_hash_entry = entry_base + hash_num;
344         /* Move_entry_to_new_hash */
345         inner_list_entry->ab_next = inner_hash_entry->at_head;
346         inner_hash_entry->at_head = inner_list_entry;
347 
348         hash_table_base->tb_total_abbrev_count++;
349 
350         inner_list_entry->ab_code = abbrev_code;
351         inner_list_entry->ab_tag = abbrev_tag;
352         inner_list_entry->ab_has_child = *(abbrev_ptr++);
353         inner_list_entry->ab_abbrev_ptr = abbrev_ptr;
354 
355         /* Cycle thru the abbrev content, ignoring the content except
356            to find the end of the content. */
357         do {
358             DECODE_LEB128_UWORD(abbrev_ptr, attr_name);
359             DECODE_LEB128_UWORD(abbrev_ptr, attr_form);
360         } while (attr_name != 0 && attr_form != 0);
361 
362     } while (*abbrev_ptr != 0 && abbrev_code != code);
363 
364     cu_context->cc_last_abbrev_ptr = abbrev_ptr;
365     return (abbrev_code == code ? inner_list_entry : NULL);
366 }
367 
368 
369 /* return 1 if string ends before 'endptr' else
370 ** return 0 meaning string is not properly terminated.
371 ** Presumption is the 'endptr' pts to end of some dwarf section data.
372 */
373 int
374 _dwarf_string_valid(void *startptr, void *endptr)
375 {
376 
377     char *start = startptr;
378     char *end = endptr;
379 
380     while (start < end) {
381         if (*start == 0) {
382             return 1;           /* OK! */
383         }
384         ++start;
385         ++end;
386     }
387     return 0;                   /* FAIL! bad string! */
388 }
389 
390 /*
391   A byte-swapping version of memcpy
392   for cross-endian use.
393   Only 2,4,8 should be lengths passed in.
394 */
395 void *
396 _dwarf_memcpy_swap_bytes(void *s1, const void *s2, size_t len)
397 {
398     void *orig_s1 = s1;
399     unsigned char *targ = (unsigned char *) s1;
400     unsigned char *src = (unsigned char *) s2;
401 
402     if (len == 4) {
403         targ[3] = src[0];
404         targ[2] = src[1];
405         targ[1] = src[2];
406         targ[0] = src[3];
407     } else if (len == 8) {
408         targ[7] = src[0];
409         targ[6] = src[1];
410         targ[5] = src[2];
411         targ[4] = src[3];
412         targ[3] = src[4];
413         targ[2] = src[5];
414         targ[1] = src[6];
415         targ[0] = src[7];
416     } else if (len == 2) {
417         targ[1] = src[0];
418         targ[0] = src[1];
419     }
420 /* should NOT get below here: is not the intended use */
421     else if (len == 1) {
422         targ[0] = src[0];
423     } else {
424         memcpy(s1, s2, len);
425     }
426 
427     return orig_s1;
428 }
429 
430 
431 /*
432   This calculation used to be sprinkled all over.
433   Now brought to one place.
434 
435   We try to accurately compute the size of a cu header
436   given a known cu header location ( an offset in .debug_info).
437 
438 */
439 /* ARGSUSED */
440 Dwarf_Unsigned
441 _dwarf_length_of_cu_header(Dwarf_Debug dbg, Dwarf_Unsigned offset)
442 {
443     int local_length_size = 0;
444     int local_extension_size = 0;
445     Dwarf_Unsigned length = 0;
446     Dwarf_Small *cuptr = dbg->de_debug_info.dss_data + offset;
447 
448     READ_AREA_LENGTH(dbg, length, Dwarf_Unsigned,
449                      cuptr, local_length_size, local_extension_size);
450 
451     return local_extension_size +       /* initial extesion, if present
452                                          */
453         local_length_size +     /* Size of cu length field. */
454         sizeof(Dwarf_Half) +    /* Size of version stamp field. */
455         local_length_size +     /* Size of abbrev offset field. */
456         sizeof(Dwarf_Small);    /* Size of address size field. */
457 
458 }
459 
460 /*
461         Pretend we know nothing about the CU
462         and just roughly compute the result.
463 */
464 Dwarf_Unsigned
465 _dwarf_length_of_cu_header_simple(Dwarf_Debug dbg)
466 {
467     return dbg->de_length_size +        /* Size of cu length field. */
468         sizeof(Dwarf_Half) +    /* Size of version stamp field. */
469         dbg->de_length_size +   /* Size of abbrev offset field. */
470         sizeof(Dwarf_Small);    /* Size of address size field. */
471 }
472 
473 /* Now that we delay loading .debug_info, we need to do the
474    load in more places. So putting the load
475    code in one place now instead of replicating it in multiple
476    places.
477 
478 */
479 int
480 _dwarf_load_debug_info(Dwarf_Debug dbg, Dwarf_Error * error)
481 {
482     int res = DW_DLV_ERROR;
483 
484     /* Testing de_debug_info.dss_data allows us to avoid testing
485        de_debug_abbrev.dss_data.
486        One test instead of 2. .debug_info is useless
487        without .debug_abbrev. */
488     if (dbg->de_debug_info.dss_data) {
489         return DW_DLV_OK;
490     }
491 
492     res = _dwarf_load_section(dbg, &dbg->de_debug_abbrev,error);
493     if (res != DW_DLV_OK) {
494         return res;
495     }
496     res = _dwarf_load_section(dbg, &dbg->de_debug_info, error);
497     return res;
498 
499 }
500 void
501 _dwarf_free_abbrev_hash_table_contents(Dwarf_Debug dbg,Dwarf_Hash_Table hash_table)
502 {
503     /* A Hash Table is an array with tb_table_entry_count struct
504        Dwarf_Hash_Table_s entries in the array. */
505     int hashnum = 0;
506     for (; hashnum < hash_table->tb_table_entry_count; ++hashnum) {
507         struct Dwarf_Abbrev_List_s *abbrev = 0;
508         struct Dwarf_Abbrev_List_s *nextabbrev = 0;
509         struct  Dwarf_Hash_Table_Entry_s *tb =  &hash_table->tb_entries[hashnum];
510 
511         abbrev = tb->at_head;
512         for (; abbrev; abbrev = nextabbrev) {
513             nextabbrev = abbrev->ab_next;
514             dwarf_dealloc(dbg, abbrev, DW_DLA_ABBREV_LIST);
515         }
516     }
517     /* Frees all the entries at once: an array. */
518     dwarf_dealloc(dbg,hash_table->tb_entries,DW_DLA_HASH_TABLE_ENTRY);
519 }
520 
521 /*
522     If no die provided the size value returned might be wrong.
523     If different compilation units have different address sizes
524     this may not give the correct value in all contexts if the die
525     pointer is NULL.
526     If the Elf offset size != address_size
527     (for example if address_size = 4 but recorded in elf64 object)
528     this may not give the correct value in all contexts if the die
529     pointer is NULL.
530     If the die pointer is non-NULL (in which case it must point to
531     a valid DIE) this will return the correct size.
532 */
533 int
534 _dwarf_get_address_size(Dwarf_Debug dbg, Dwarf_Die die)
535 {
536     Dwarf_CU_Context context = 0;
537     Dwarf_Half addrsize = 0;
538     if(!die) {
539         return dbg->de_pointer_size;
540     }
541     context = die->di_cu_context;
542     addrsize = context->cc_address_size;
543     return addrsize;
544 }
545 
546 
547 
548