xref: /illumos-gate/usr/src/lib/libdwarf/common/dwarf_abbrev.c (revision d327dbeacda682ba3d4efc9b451baa429ba8830c)
1 /*
2   Copyright (C) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
3   Portions Copyright (C) 2009-2019 David Anderson. All Rights Reserved.
4 
5   This program is free software; you can redistribute it and/or modify it
6   under the terms of version 2.1 of the GNU Lesser General Public License
7   as published by the Free Software Foundation.
8 
9   This program is distributed in the hope that it would be useful, but
10   WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13   Further, this software is distributed without any warranty that it is
14   free of the rightful claim of any third person regarding infringement
15   or the like.  Any license provided herein, whether implied or
16   otherwise, applies only to this software file.  Patent licenses, if
17   any, provided herein do not apply to combinations of this program with
18   other software, or any other product whatsoever.
19 
20   You should have received a copy of the GNU Lesser General Public
21   License along with this program; if not, write the Free Software
22   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston MA 02110-1301,
23   USA.
24 
25 */
26 
27 #include "config.h"
28 #include <stdio.h>
29 #include "dwarf_incl.h"
30 #include "dwarf_abbrev.h"
31 #include "dwarf_alloc.h"
32 #include "dwarf_error.h"
33 #include "dwarf_util.h"
34 #include "dwarfstring.h"
35 
36 #define TRUE 1
37 #define FALSE 0
38 
39 /*  This is used to print a .debug_abbrev section without
40     knowing about the DIEs that use the abbrevs.
41 
42     dwarf_get_abbrev() and,
43     in dwarf_util.c,  _dwarf_get_abbrev_for_code()
44 
45 
46     When we have a simple .o
47     there is at least a hope of iterating through
48     the abbrevs meaningfully without knowing
49     a CU context.
50 
51     This often fails or gets incorrect info
52     because there is no guarantee the .debug_abbrev
53     section is free of garbage bytes.
54 
55     In an object with multiple CU/TUs the
56     output is difficult/impossible to usefully interpret.
57 
58     In a dwp (Package File)  it is really impossible
59     to associate abbrevs with a CU.
60 
61 */
62 
63 int
64 _dwarf_count_abbrev_entries(Dwarf_Debug dbg,
65     Dwarf_Byte_Ptr abbrev_ptr,
66     Dwarf_Byte_Ptr abbrev_section_end,
67     Dwarf_Unsigned *abbrev_count_out,
68     Dwarf_Byte_Ptr *abbrev_ptr_out,
69     Dwarf_Error *error)
70 {
71     Dwarf_Unsigned abbrev_count = 0;
72     Dwarf_Unsigned attr_name = 0;
73     Dwarf_Unsigned attr_form = 0;
74     UNUSEDARG Dwarf_Unsigned implicit_const = 0;
75 
76     /*  The abbreviations table ends with an entry with a single
77         byte of zero for the abbreviation code.
78         Padding bytes following that zero are allowed, but
79         here we simply stop looking past that zero abbrev.
80 
81         We also stop looking if the block/section ends,
82         though the DWARF2 and later standards do not specifically
83         allow section/block end to terminate an abbreviations
84         list. */
85 
86     do {
87         DECODE_LEB128_UWORD_CK(abbrev_ptr, attr_name,
88             dbg,error,abbrev_section_end);
89         if (attr_name > DW_AT_hi_user) {
90             _dwarf_error(dbg, error,DW_DLE_ATTR_CORRUPT);
91             return DW_DLV_ERROR;
92         }
93         DECODE_LEB128_UWORD_CK(abbrev_ptr, attr_form,
94             dbg,error,abbrev_section_end);
95         if (!_dwarf_valid_form_we_know(attr_form,attr_name)) {
96             dwarfstring m;
97 
98             dwarfstring_constructor(&m);
99             dwarfstring_append_printf_u(&m,
100                 "DW_DLE_UNKNOWN_FORM: Abbrev invalid form 0x%"
101                 DW_PR_DUx,attr_form);
102             dwarfstring_append_printf_u(&m,
103                 " with attribute 0x%" DW_PR_DUx,attr_name);
104             dwarfstring_append(&m," so abbreviations unusable. ");
105             _dwarf_error_string(dbg, error, DW_DLE_UNKNOWN_FORM,
106                 dwarfstring_string(&m));
107             dwarfstring_destructor(&m);
108             return DW_DLV_ERROR;
109         }
110         if (attr_form ==  DW_FORM_implicit_const) {
111             /* The value is here, not in a DIE. */
112             DECODE_LEB128_SWORD_CK(abbrev_ptr, implicit_const,
113                 dbg,error,abbrev_section_end);
114         }
115         abbrev_count++;
116     } while ((abbrev_ptr < abbrev_section_end) &&
117         (attr_name != 0 || attr_form != 0));
118     /* We counted one too high,we included the 0,0 */
119     *abbrev_count_out = abbrev_count-1;
120     *abbrev_ptr_out = abbrev_ptr;
121     return DW_DLV_OK;
122 }
123 
124 int
125 dwarf_get_abbrev(Dwarf_Debug dbg,
126     Dwarf_Unsigned offset,
127     Dwarf_Abbrev * returned_abbrev,
128     Dwarf_Unsigned * length,
129     Dwarf_Unsigned * abbr_count, Dwarf_Error * error)
130 {
131     Dwarf_Byte_Ptr abbrev_ptr = 0;
132     Dwarf_Byte_Ptr abbrev_ptr_out = 0;
133     Dwarf_Byte_Ptr abbrev_section_end = 0;
134     Dwarf_Abbrev ret_abbrev = 0;
135     Dwarf_Unsigned labbr_count = 0;
136     Dwarf_Unsigned utmp     = 0;
137     int res = 0;
138 
139     if (!dbg) {
140         _dwarf_error(NULL, error, DW_DLE_DBG_NULL);
141         return DW_DLV_ERROR;
142     }
143     if (dbg->de_debug_abbrev.dss_data == 0) {
144         /*  Loads abbrev section (and .debug_info as we do those
145             together). */
146         res = _dwarf_load_debug_info(dbg, error);
147 
148         if (res != DW_DLV_OK) {
149             return res;
150         }
151     }
152 
153     if (offset >= dbg->de_debug_abbrev.dss_size) {
154         return DW_DLV_NO_ENTRY;
155     }
156     ret_abbrev = (Dwarf_Abbrev) _dwarf_get_alloc(dbg, DW_DLA_ABBREV, 1);
157     if (ret_abbrev == NULL) {
158         _dwarf_error(dbg, error, DW_DLE_ALLOC_FAIL);
159         return DW_DLV_ERROR;
160     }
161     ret_abbrev->dab_dbg = dbg;
162     if (returned_abbrev == 0 || abbr_count == 0) {
163         dwarf_dealloc(dbg, ret_abbrev, DW_DLA_ABBREV);
164         _dwarf_error(dbg, error, DW_DLE_DWARF_ABBREV_NULL);
165         return DW_DLV_ERROR;
166     }
167 
168 
169     *abbr_count = 0;
170     if (length) {
171         *length = 1;
172     }
173 
174 
175     abbrev_ptr = dbg->de_debug_abbrev.dss_data + offset;
176     abbrev_section_end =
177         dbg->de_debug_abbrev.dss_data + dbg->de_debug_abbrev.dss_size;
178 #if 0
179     DECODE_LEB128_UWORD_CK(abbrev_ptr, utmp,
180         dbg,error,abbrev_section_end);
181 #endif
182     res = _dwarf_leb128_uword_wrapper(dbg,&abbrev_ptr,
183         abbrev_section_end,&utmp,error);
184     if (res == DW_DLV_ERROR) {
185         dwarf_dealloc(dbg, ret_abbrev, DW_DLA_ABBREV);
186         return res;
187     }
188     ret_abbrev->dab_code = utmp;
189     if (ret_abbrev->dab_code == 0) {
190         *returned_abbrev = ret_abbrev;
191         *abbr_count = 0;
192         if (length) {
193             *length = 1;
194         }
195         return DW_DLV_OK;
196     }
197 
198 #if 0
199     DECODE_LEB128_UWORD_CK(abbrev_ptr, utmp,
200         dbg,error,abbrev_section_end);
201 #endif
202     res = _dwarf_leb128_uword_wrapper(dbg,&abbrev_ptr,
203         abbrev_section_end,&utmp,error);
204     if (res == DW_DLV_ERROR) {
205         dwarf_dealloc(dbg, ret_abbrev, DW_DLA_ABBREV);
206         return res;
207     }
208     if (utmp > DW_TAG_hi_user) {
209         return _dwarf_format_TAG_err_msg(dbg,
210             utmp,"DW_DLE_TAG_CORRUPT",
211             error);
212     }
213     ret_abbrev->dab_tag = utmp;
214     if (abbrev_ptr >= abbrev_section_end) {
215         dwarfstring m;
216         dwarf_dealloc(dbg, ret_abbrev, DW_DLA_ABBREV);
217 
218         dwarfstring_constructor(&m);
219         dwarfstring_append_printf_u(&m,
220             "DW_DLE_ABBREV_DECODE_ERROR: Ran off the end "
221             "of the abbrev section reading tag, starting at"
222             " abbrev section offset 0x%x",offset);
223         _dwarf_error_string(dbg, error,
224             DW_DLE_ABBREV_DECODE_ERROR,
225             dwarfstring_string(&m));
226         dwarfstring_destructor(&m);
227         return DW_DLV_ERROR;
228     }
229     ret_abbrev->dab_has_child = *(abbrev_ptr++);
230     ret_abbrev->dab_abbrev_ptr = abbrev_ptr;
231     ret_abbrev->dab_next_ptr = abbrev_ptr;
232     ret_abbrev->dab_next_index = 0;
233     res = _dwarf_count_abbrev_entries(dbg,abbrev_ptr,
234         abbrev_section_end,&labbr_count,&abbrev_ptr_out,error);
235     if (res == DW_DLV_ERROR) {
236         dwarf_dealloc(dbg, ret_abbrev, DW_DLA_ABBREV);
237         return res;
238     }
239     abbrev_ptr = abbrev_ptr_out;
240 
241     /* Global section offset. */
242     ret_abbrev->dab_goffset = offset;
243     ret_abbrev->dab_count = labbr_count;
244     if (abbrev_ptr > abbrev_section_end) {
245         dwarf_dealloc(dbg, ret_abbrev, DW_DLA_ABBREV);
246         _dwarf_error_string(dbg, error,
247             DW_DLE_ABBREV_DECODE_ERROR,
248             "DW_DLE_ABBREV_DECODE_ERROR: Ran off the end "
249             "of the abbrev section reading abbrev_entries.");
250         _dwarf_error(dbg, error, DW_DLE_ABBREV_DECODE_ERROR);
251         return DW_DLV_ERROR;
252     }
253     if (length) {
254         *length = abbrev_ptr - dbg->de_debug_abbrev.dss_data - offset;
255     }
256     *returned_abbrev = ret_abbrev;
257     *abbr_count = labbr_count;
258     return DW_DLV_OK;
259 }
260 
261 int
262 dwarf_get_abbrev_code(Dwarf_Abbrev abbrev,
263     Dwarf_Unsigned * returned_code,
264     Dwarf_Error * error)
265 {
266     if (abbrev == NULL) {
267         _dwarf_error(NULL, error, DW_DLE_DWARF_ABBREV_NULL);
268         return DW_DLV_ERROR;
269     }
270 
271     *returned_code = abbrev->dab_code;
272     return DW_DLV_OK;
273 }
274 
275 /*  DWARF defines DW_TAG_hi_user as 0xffff so no tag should be
276     over 16 bits.  */
277 int
278 dwarf_get_abbrev_tag(Dwarf_Abbrev abbrev,
279     Dwarf_Half * returned_tag, Dwarf_Error * error)
280 {
281     if (abbrev == NULL) {
282         _dwarf_error(NULL, error, DW_DLE_DWARF_ABBREV_NULL);
283         return (DW_DLV_ERROR);
284     }
285 
286     *returned_tag = abbrev->dab_tag;
287     return (DW_DLV_OK);
288 }
289 
290 
291 int
292 dwarf_get_abbrev_children_flag(Dwarf_Abbrev abbrev,
293     Dwarf_Signed * returned_flag,
294     Dwarf_Error * error)
295 {
296     if (abbrev == NULL) {
297         _dwarf_error(NULL, error, DW_DLE_DWARF_ABBREV_NULL);
298         return (DW_DLV_ERROR);
299     }
300 
301     *returned_flag = abbrev->dab_has_child;
302     return (DW_DLV_OK);
303 }
304 
305 
306 /*  This does not return the implicit const, nor
307     does it return all bits of the uleb attribute
308     nor does it return all bits of the uleb form
309     value.
310     See dwarf_get_abbrev_entry_b().
311 */
312 
313 int
314 dwarf_get_abbrev_entry(Dwarf_Abbrev abbrev,
315     Dwarf_Signed indx,
316     Dwarf_Half   * returned_attr_num,
317     Dwarf_Signed * returned_form,
318     Dwarf_Off    * returned_offset,
319     Dwarf_Error * error)
320 {
321     int res;
322     Dwarf_Unsigned attr = 0;
323     Dwarf_Unsigned form = 0;
324     Dwarf_Signed implicitconst = 0;
325     Dwarf_Unsigned uindex = (Dwarf_Unsigned)indx;
326     Dwarf_Bool filter_outliers = TRUE;
327 
328     res = dwarf_get_abbrev_entry_b(abbrev,
329         uindex,
330         filter_outliers,
331         &attr,
332         &form,
333         &implicitconst,
334         returned_offset,
335         error);
336     if (res != DW_DLV_OK) {
337         return res;
338     }
339     /* returned_offset already set by dwarf_get_abbrev_entry_b; */
340     if (returned_attr_num) {
341         *returned_attr_num = (Dwarf_Half)attr;
342     }
343     if (returned_form) {
344         *returned_form = (Dwarf_Signed)form;
345     }
346     return DW_DLV_OK;
347 }
348 
349 /*  If filter_outliers is non-zero then
350     the routine will return DW_DLV_ERROR
351     if the leb reading generates a number that
352     is so large it cannot be correct.
353 
354     If filter_outliers is 0 the uleb/sleb
355     values read are returned, even if
356     the values are unreasonable. This is
357     a useful option if one wishes to
358     have callers examine the return values
359     in greater detail than the checking here
360     provides.
361 
362 */
363 int
364 dwarf_get_abbrev_entry_b(Dwarf_Abbrev abbrev,
365     Dwarf_Unsigned indx,
366     Dwarf_Bool     filter_outliers,
367     Dwarf_Unsigned * returned_attr_num,
368     Dwarf_Unsigned * returned_form,
369     Dwarf_Signed   * returned_implicitconst,
370     Dwarf_Off      * offset,
371     Dwarf_Error    * error)
372 {
373     Dwarf_Byte_Ptr abbrev_ptr = 0;
374     Dwarf_Byte_Ptr abbrev_end = 0;
375     Dwarf_Byte_Ptr mark_abbrev_ptr = 0;
376     Dwarf_Unsigned attr = 0;
377     Dwarf_Unsigned form = 0;
378     Dwarf_Unsigned implicitconst = 0;
379     Dwarf_Debug dbg = 0;
380     Dwarf_Signed local_indx = (Dwarf_Signed)indx;
381 
382     if (abbrev == NULL) {
383         _dwarf_error(NULL, error, DW_DLE_DWARF_ABBREV_NULL);
384         return (DW_DLV_ERROR);
385     }
386     if (abbrev->dab_code == 0) {
387         return (DW_DLV_NO_ENTRY);
388     }
389 
390     if (abbrev->dab_dbg == NULL) {
391         _dwarf_error(NULL, error, DW_DLE_DBG_NULL);
392         return (DW_DLV_ERROR);
393     }
394     dbg = abbrev->dab_dbg;
395     abbrev_ptr = abbrev->dab_abbrev_ptr;
396     abbrev_end = dbg->de_debug_abbrev.dss_data +
397         dbg->de_debug_abbrev.dss_size;
398     if ((Dwarf_Unsigned)local_indx >=  abbrev->dab_next_index) {
399         /*  We want a part not yet scanned ,
400             so we can start closer to the desired value. */
401         abbrev_ptr   = abbrev->dab_next_ptr;
402         local_indx -= abbrev->dab_next_index;
403     }
404 
405     for (attr = 1, form = 1;
406         local_indx >= 0 && abbrev_ptr < abbrev_end &&
407         (attr != 0 || form != 0);
408         local_indx--) {
409 
410         mark_abbrev_ptr = abbrev_ptr;
411         DECODE_LEB128_UWORD_CK(abbrev_ptr, attr,dbg,
412             error,abbrev_end);
413         if (filter_outliers && attr > DW_AT_hi_user) {
414             _dwarf_error(dbg, error,DW_DLE_ATTR_CORRUPT);
415             return DW_DLV_ERROR;
416         }
417         DECODE_LEB128_UWORD_CK(abbrev_ptr, form,dbg,
418             error,abbrev_end);
419         if (filter_outliers &&
420             !_dwarf_valid_form_we_know(form,attr)) {
421             _dwarf_error(dbg, error, DW_DLE_UNKNOWN_FORM);
422             return (DW_DLV_ERROR);
423         }
424         if (form ==  DW_FORM_implicit_const) {
425             /* The value is here, not in a DIE. */
426             DECODE_LEB128_SWORD_CK( abbrev_ptr, implicitconst,
427                 dbg,error,abbrev_end);
428         } else {
429             implicitconst = 0;
430         }
431     }
432 
433     if (abbrev_ptr >= abbrev_end) {
434         _dwarf_error_string(dbg, error,
435             DW_DLE_ABBREV_DECODE_ERROR,
436             "DW_DLE_ABBREV_DECODE_ERROR: Ran off the end "
437             "of the abbrev section reading abbrev entries..");
438         return DW_DLV_ERROR;
439     }
440 
441     if (local_indx >= 0) {
442         return DW_DLV_NO_ENTRY;
443     }
444 
445     if (returned_form != NULL) {
446         *returned_form = form;
447     }
448     if (offset != NULL) {
449         *offset = mark_abbrev_ptr - dbg->de_debug_abbrev.dss_data;
450     }
451     if (returned_attr_num) {
452         *returned_attr_num = attr;
453     }
454     if (returned_implicitconst) {
455         /*  Callers should only examine implict const value
456             if the form is DW_FORM_implicit_const.  */
457         *returned_implicitconst = implicitconst;
458     }
459     abbrev->dab_next_ptr = abbrev_ptr;
460     abbrev->dab_next_index = (Dwarf_Unsigned)local_indx ;
461     return DW_DLV_OK;
462 }
463 
464 /*  This function is not entirely safe to call.
465     The problem is that the DWARF[234] specification does not insist
466     that bytes in .debug_abbrev that are not referenced by .debug_info
467     or .debug_types need to be initialized to anything specific.
468     Any garbage bytes may cause trouble.  Not all compilers/linkers
469     leave unreferenced garbage bytes in .debug_abbrev, so this may
470     work for most objects.
471     In case of error could return a bogus value, there is
472     no documented way to detect error.
473 */
474 int
475 dwarf_get_abbrev_count(Dwarf_Debug dbg)
476 {
477     Dwarf_Abbrev ab;
478     Dwarf_Unsigned offset = 0;
479     Dwarf_Unsigned length = 0;
480     Dwarf_Unsigned attr_count = 0;
481     Dwarf_Unsigned abbrev_count = 0;
482     int abres = DW_DLV_OK;
483     Dwarf_Error err = 0;
484 
485     while ((abres = dwarf_get_abbrev(dbg, offset, &ab,
486         &length, &attr_count,
487         &err)) == DW_DLV_OK) {
488 
489         ++abbrev_count;
490         offset += length;
491         dwarf_dealloc(dbg, ab, DW_DLA_ABBREV);
492     }
493     if (err) {
494         dwarf_dealloc(dbg,err,DW_DLA_ERROR);
495         err = 0;
496     }
497     return abbrev_count;
498 }
499