xref: /illumos-gate/usr/src/lib/libdwarf/common/dwarf_util.h (revision ff67a31b6b184e832f89a53763c02c35bd1a7291)
1 #ifndef DWARF_UTIL_H
2 #define DWARF_UTIL_H
3 /*
4 
5   Copyright (C) 2000,2003,2004 Silicon Graphics, Inc.  All Rights Reserved.
6   Portions Copyright (C) 2007-2020 David Anderson. All Rights Reserved.
7   Portions Copyright (C) 2010-2012 SN Systems Ltd. All Rights Reserved
8 
9   This program is free software; you can redistribute it
10   and/or modify it under the terms of version 2.1 of the
11   GNU Lesser General Public License as published by the Free
12   Software Foundation.
13 
14   This program is distributed in the hope that it would be
15   useful, but WITHOUT ANY WARRANTY; without even the implied
16   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   PURPOSE.
18 
19   Further, this software is distributed without any warranty
20   that it is free of the rightful claim of any third person
21   regarding infringement or the like.  Any license provided
22   herein, whether implied or otherwise, applies only to this
23   software file.  Patent licenses, if any, provided herein
24   do not apply to combinations of this program with other
25   software, or any other product whatsoever.
26 
27   You should have received a copy of the GNU Lesser General
28   Public License along with this program; if not, write the
29   Free Software Foundation, Inc., 51 Franklin Street - Fifth
30   Floor, Boston MA 02110-1301, USA.
31 
32 */
33 
34 /*
35     Decodes unsigned leb128 encoded numbers.
36     Make sure ptr is a pointer to a 1-byte type.
37     In 2003 and earlier this was a hand-inlined
38     version of _dwarf_decode_u_leb128() which did
39     not work correctly if Dwarf_Unsigned was 64 bits.
40 
41     April 2016: now uses a reader that is careful.
42     'return' only in case of error
43     else falls through.
44 */
45 #define DECODE_LEB128_UWORD_CK(ptr, value,dbg,errptr,endptr) \
46     do {                                              \
47         Dwarf_Unsigned lu_leblen = 0;                     \
48         Dwarf_Unsigned lu_local = 0;                  \
49         int lu_res = 0;                               \
50         lu_res = _dwarf_decode_u_leb128_chk(ptr,&lu_leblen,&lu_local,endptr); \
51         if (lu_res == DW_DLV_ERROR) {                 \
52             _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER);  \
53             return DW_DLV_ERROR;                      \
54         }                                             \
55         value = lu_local;                             \
56         ptr += lu_leblen;                             \
57     } while (0)
58 
59 #define DECODE_LEB128_UWORD_LEN_CK(ptr, value,leblen,dbg,errptr,endptr) \
60     do {                                              \
61         Dwarf_Unsigned lu_leblen = 0;                     \
62         Dwarf_Unsigned lu_local = 0;                  \
63         int lu_res = 0;                               \
64         lu_res = _dwarf_decode_u_leb128_chk(ptr,&lu_leblen,&lu_local,endptr); \
65         if (lu_res == DW_DLV_ERROR) {                 \
66             _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER);  \
67             return DW_DLV_ERROR;                      \
68         }                                             \
69         value = lu_local;                             \
70         ptr += lu_leblen;                             \
71         leblen = lu_leblen;                          \
72     } while (0)
73 
74 /*
75     Decodes signed leb128 encoded numbers.
76     Make sure ptr is a pointer to a 1-byte type.
77     In 2003 and earlier this was a hand-inlined
78     version of _dwarf_decode_s_leb128() which did
79     not work correctly if Dwarf_Unsigned was 64 bits.
80 
81 */
82 #define DECODE_LEB128_SWORD_CK(ptr, value,dbg,errptr,endptr) \
83     do {                                              \
84         Dwarf_Unsigned uleblen = 0;                       \
85         Dwarf_Signed local = 0;                       \
86         int lu_res = 0;                               \
87         lu_res = _dwarf_decode_s_leb128_chk(ptr,&uleblen,&local,endptr); \
88         if (lu_res == DW_DLV_ERROR) {                 \
89             _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER);  \
90             return DW_DLV_ERROR;                      \
91         }                                             \
92         value = local;                                \
93         ptr += uleblen;                               \
94     } while (0)
95 #define DECODE_LEB128_SWORD_LEN_CK(ptr, value,leblen,dbg,errptr,endptr) \
96     do {                                              \
97         Dwarf_Unsigned lu_leblen = 0;                     \
98         Dwarf_Signed lu_local = 0;                    \
99         int lu_res = 0;                               \
100         lu_res = _dwarf_decode_s_leb128_chk(ptr,&lu_leblen,\
101             &lu_local,endptr); \
102         if (lu_res == DW_DLV_ERROR) {                 \
103             _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER);  \
104             return DW_DLV_ERROR;                      \
105         }                                             \
106         leblen = lu_leblen;                           \
107         value = lu_local;                             \
108         ptr += lu_leblen;                             \
109     } while (0)
110 
111 
112 /*
113     Skips leb128_encoded numbers that are guaranteed
114     to be no more than 4 bytes long.  Same for both
115     signed and unsigned numbers.
116 
117     These seem bogus as they assume 4 bytes get a 4 byte
118     word. Wrong. FIXME
119 
120     'return' only in case of error
121     else falls through.
122 */
123 #define SKIP_LEB128_WORD_CK(ptr,dbg,errptr,endptr)                     \
124     do {                                          \
125         if ((*(ptr++) & 0x80) != 0) {             \
126             if (ptr >= endptr) {                  \
127                 _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER); \
128                 return DW_DLV_ERROR;              \
129             }                                     \
130             if ((*(ptr++) & 0x80) != 0) {         \
131                 if (ptr >= endptr) {              \
132                     _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER); \
133                     return DW_DLV_ERROR;          \
134                 }                                 \
135                 if ((*(ptr++) & 0x80) != 0) {     \
136                     if (ptr >= endptr) {          \
137                         _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER); \
138                         return DW_DLV_ERROR;      \
139                     }                             \
140                     ptr++;                        \
141                     if (ptr >= endptr) {          \
142                         _dwarf_error(dbg, errptr, DW_DLE_LEB_IMPROPER); \
143                         return DW_DLV_ERROR;      \
144                     }                             \
145                 }                                 \
146             }                                     \
147         }                                         \
148     } while (0)
149 
150 
151 /*  Any error  found here represents a bug that cannot
152     be dealloc-d as the caller will not know there was no dbg */
153 #define CHECK_DIE(die, error_ret_value)                          \
154     do {                                                         \
155         if (die == NULL) {                                       \
156             _dwarf_error(NULL, error, DW_DLE_DIE_NULL);          \
157             return(error_ret_value);                             \
158         }                                                        \
159         if (die->di_cu_context == NULL) {                        \
160             _dwarf_error(NULL, error, DW_DLE_DIE_NO_CU_CONTEXT); \
161             return(error_ret_value);                             \
162         }                                                        \
163         if (die->di_cu_context->cc_dbg == NULL) {                \
164             _dwarf_error(NULL, error, DW_DLE_DBG_NULL);          \
165             return(error_ret_value);                             \
166         }                                                        \
167     } while (0)
168 
169 
170 /*
171    Reads 'source' for 'length' bytes from unaligned addr.
172 
173    Avoids any constant-in-conditional warnings and
174    avoids a test in the generated code (for non-const cases,
175    which are in the majority.)
176    Uses a temp to avoid the test.
177    The decl here should avoid any problem of size in the temp.
178    This code is ENDIAN DEPENDENT
179    The memcpy args are the endian issue.
180 
181    Does not update the 'source' field.
182 
183    for READ_UNALIGNED_CK the error code refers to host endianness.
184 */
185 typedef Dwarf_Unsigned BIGGEST_UINT;
186 
187 #ifdef WORDS_BIGENDIAN
188 #define READ_UNALIGNED_CK(dbg,dest,desttype, source, length,error,endptr) \
189     do {                                                         \
190         BIGGEST_UINT _ltmp = 0;                                  \
191         Dwarf_Byte_Ptr readend = source+length;                  \
192         if (readend <  source) {                                 \
193             _dwarf_error(dbg, error, DW_DLE_READ_BIGENDIAN_ERROR); \
194             return DW_DLV_ERROR;                                 \
195         }                                                        \
196         if (readend > endptr) {                                  \
197             _dwarf_error(dbg, error, DW_DLE_READ_BIGENDIAN_ERROR); \
198             return DW_DLV_ERROR;                                 \
199         }                                                        \
200         dbg->de_copy_word( (((char *)(&_ltmp)) +                 \
201             sizeof(_ltmp) - length),source, length) ;            \
202         dest = (desttype)_ltmp;                                  \
203     } while (0)
204 
205 
206 /*
207     This macro sign-extends a variable depending on the length.
208     It fills the bytes between the size of the destination and
209     the length with appropriate padding.
210     This code is ENDIAN DEPENDENT but dependent only
211     on host endianness, not object file endianness.
212     The memcpy args are the issue.
213 */
214 #define SIGN_EXTEND(dest, length)                                 \
215     do {                                                          \
216         if (*(Dwarf_Sbyte *)((char *)&dest +                      \
217             sizeof(dest) - length) < 0) {                         \
218             memcpy((char *)&dest, "\xff\xff\xff\xff\xff\xff\xff\xff",\
219                 sizeof(dest) - length);                           \
220         }                                                         \
221     } while (0)
222 #else /* LITTLE ENDIAN */
223 #define READ_UNALIGNED_CK(dbg,dest,desttype, source, length,error,endptr) \
224     do  {                                       \
225         BIGGEST_UINT _ltmp = 0;                 \
226         Dwarf_Byte_Ptr readend = source+length; \
227         if (readend < source) {                 \
228             _dwarf_error(dbg, error,            \
229                 DW_DLE_READ_LITTLEENDIAN_ERROR);\
230             return DW_DLV_ERROR;                \
231         }                                       \
232         if (readend > endptr) {                 \
233             _dwarf_error(dbg, error,            \
234                 DW_DLE_READ_LITTLEENDIAN_ERROR);\
235             return DW_DLV_ERROR;                \
236         }                                       \
237         dbg->de_copy_word( (char *)(&_ltmp) ,   \
238             source, length) ;                   \
239         dest = (desttype)_ltmp;                 \
240     } while (0)
241 
242 
243 /*
244     This macro sign-extends a variable depending on the length.
245     It fills the bytes between the size of the destination and
246     the length with appropriate padding.
247     This code is ENDIAN DEPENDENT but dependent only
248     on host endianness, not object file endianness.
249     The memcpy args are the issue.
250 */
251 #define SIGN_EXTEND(dest, length)                               \
252     do {                                                        \
253         if (*(Dwarf_Sbyte *)((char *)&dest + (length-1)) < 0) { \
254             memcpy((char *)&dest+length,                        \
255                 "\xff\xff\xff\xff\xff\xff\xff\xff",             \
256                 sizeof(dest) - length);                         \
257         }                                                       \
258     } while (0)
259 
260 #endif /* ! LITTLE_ENDIAN */
261 
262 
263 
264 /*
265     READ_AREA LENGTH reads the length (the older way
266     of pure 32 or 64 bit
267     or the dwarf v3 64bit-extension way)
268 
269     It reads the bits from where rw_src_data_p  points to
270     and updates the rw_src_data_p to point past what was just read.
271 
272     It updates w_length_size (to the size of an offset, either 4 or 8)
273     and w_exten_size (set 0 unless this frame has the DWARF3
274     and later  64bit
275     extension, in which case w_exten_size is set to 4).
276 
277     r_dbg is just the current dbg pointer.
278     w_target is the output length field.
279     r_targtype is the output type. Always Dwarf_Unsigned so far.
280 
281 */
282 /*  This one handles the v3 64bit extension
283     and 32bit (and   SGI/MIPS fixed 64  bit via the
284         dwarf_init-set r_dbg->de_length_size)..
285     It does not recognize any but the one distingushed value
286     (the only one with defined meaning).
287     It assumes that no CU will have a length
288         0xffffffxx  (32bit length)
289         or
290         0xffffffxx xxxxxxxx (64bit length)
291     which makes possible auto-detection of the extension.
292 
293     This depends on knowing that only a non-zero length
294     is legitimate (AFAICT), and for IRIX non-standard -64
295     dwarf that the first 32 bits of the 64bit offset will be
296     zero (because the compiler could not handle a truly large
297     value as of Jan 2003 and because no app has that much debug
298     info anyway, at least not in the IRIX case).
299 
300     At present not testing for '64bit elf' here as that
301     does not seem necessary (none of the 64bit length seems
302     appropriate unless it's  ident[EI_CLASS] == ELFCLASS64).
303 */
304 /*  The w_target > r_sectionlen compare is done without adding in case
305     the w_target value read is so large any addition would overflow.
306     A basic value sanity check. */
307 #define READ_AREA_LENGTH_CK(r_dbg,w_target,r_targtype,         \
308     rw_src_data_p,w_length_size,w_exten_size,w_error,          \
309     r_sectionlen,r_endptr)                                     \
310     do {                                                       \
311         READ_UNALIGNED_CK(r_dbg,w_target,r_targtype,           \
312             rw_src_data_p, ORIGINAL_DWARF_OFFSET_SIZE,         \
313             w_error,r_endptr);                                 \
314         if (w_target == DISTINGUISHED_VALUE) {                 \
315             /* dwarf3 64bit extension */                       \
316             w_length_size  = DISTINGUISHED_VALUE_OFFSET_SIZE;  \
317             rw_src_data_p += ORIGINAL_DWARF_OFFSET_SIZE;       \
318             w_exten_size   = ORIGINAL_DWARF_OFFSET_SIZE;       \
319             READ_UNALIGNED_CK(r_dbg,w_target,r_targtype,       \
320                 rw_src_data_p, DISTINGUISHED_VALUE_OFFSET_SIZE,\
321                 w_error,r_endptr);                             \
322             if (w_target > r_sectionlen) {                     \
323                 _dwarf_error(r_dbg,w_error,                    \
324                     DW_DLE_HEADER_LEN_BIGGER_THAN_SECSIZE);    \
325                 return DW_DLV_ERROR;                           \
326             }                                                  \
327             rw_src_data_p += DISTINGUISHED_VALUE_OFFSET_SIZE;  \
328         } else {                                               \
329             if (w_target == 0 && r_dbg->de_big_endian_object) {\
330                 /* Might be IRIX: We have to distinguish between */\
331                 /* 32-bit DWARF format and IRIX 64-bit         \
332                     DWARF format. */                           \
333                 if (r_dbg->de_length_size == 8) {              \
334                     /* IRIX 64 bit, big endian.  This test */  \
335                     /* is not a truly precise test, a precise test*/ \
336                     /* would check if the target was IRIX.  */  \
337                     READ_UNALIGNED_CK(r_dbg,w_target,r_targtype,\
338                         rw_src_data_p,                          \
339                         DISTINGUISHED_VALUE_OFFSET_SIZE,      \
340                         w_error,r_endptr);                     \
341                     if (w_target > r_sectionlen) {             \
342                         _dwarf_error(r_dbg,w_error,            \
343                             DW_DLE_HEADER_LEN_BIGGER_THAN_SECSIZE);\
344                         return DW_DLV_ERROR;                   \
345                     }                                          \
346                     w_length_size  = DISTINGUISHED_VALUE_OFFSET_SIZE;\
347                     rw_src_data_p += DISTINGUISHED_VALUE_OFFSET_SIZE;\
348                     w_exten_size = 0;                          \
349                 } else {                                       \
350                     /* 32 bit, big endian */                   \
351                     w_length_size  = ORIGINAL_DWARF_OFFSET_SIZE;\
352                     rw_src_data_p += w_length_size;            \
353                     w_exten_size = 0;                          \
354                 }                                              \
355             } else {                                           \
356                 if (w_target > r_sectionlen) {                 \
357                     _dwarf_error(r_dbg,w_error,                \
358                         DW_DLE_HEADER_LEN_BIGGER_THAN_SECSIZE);\
359                     return DW_DLV_ERROR;                       \
360                 }                                              \
361                 /* Standard 32 bit dwarf2/dwarf3 */            \
362                 w_exten_size   = 0;                            \
363                 w_length_size  = ORIGINAL_DWARF_OFFSET_SIZE;   \
364                 rw_src_data_p += w_length_size;                \
365             }                                                  \
366         }                                                      \
367     } while (0)
368 
369 
370 /* Fuller checking. Returns DW_DLV_ERROR or DW_DLV_OK
371    Caller must set Dwarf_Error */
372 int _dwarf_decode_u_leb128_chk(Dwarf_Small * leb128,
373     Dwarf_Unsigned * leb128_length,
374     Dwarf_Unsigned *outval,Dwarf_Byte_Ptr endptr);
375 
376 int _dwarf_format_TAG_err_msg(Dwarf_Debug dbg,
377     Dwarf_Unsigned tag,const char *m,
378     Dwarf_Error *error);
379 
380 
381 int _dwarf_decode_s_leb128_chk(Dwarf_Small * leb128,
382     Dwarf_Unsigned * leb128_length,
383     Dwarf_Signed *outval, Dwarf_Byte_Ptr endptr);
384 
385 int
386 _dwarf_get_size_of_val(Dwarf_Debug dbg,
387     Dwarf_Unsigned form,
388     Dwarf_Half cu_version,
389     Dwarf_Half address_size,
390     Dwarf_Small * val_ptr,
391     int v_length_size,
392     Dwarf_Unsigned *size_out,
393     Dwarf_Small *section_end_ptr,
394     Dwarf_Error *error);
395 
396 struct Dwarf_Hash_Table_Entry_s;
397 /* This single struct is the base for the hash table.
398    The intent is that once the total_abbrev_count across
399    all the entries is greater than  10*current_table_entry_count
400    one should build a new Dwarf_Hash_Table_Base_s, rehash
401    all the existing entries, and delete the old table and entries.
402    (10 is a heuristic, nothing magic about it, but once the
403    count gets to 30 or 40 times current_table_entry_count
404    things really slow down a lot. One (500MB) application had
405    127000 abbreviations in one compilation unit)
406    The incoming 'code' is an abbrev number and those simply
407    increase linearly so the hashing is perfect always.
408 */
409 struct Dwarf_Hash_Table_s {
410     unsigned long       tb_table_entry_count;
411     unsigned long       tb_total_abbrev_count;
412     /* Each table entry is a list of abbreviations. */
413     struct  Dwarf_Hash_Table_Entry_s *tb_entries;
414 };
415 
416 /*
417     This struct is used to build a hash table for the
418     abbreviation codes for a compile-unit.
419 */
420 struct Dwarf_Hash_Table_Entry_s {
421     Dwarf_Abbrev_List at_head;
422 };
423 
424 
425 
426 int _dwarf_get_abbrev_for_code(Dwarf_CU_Context cu_context,
427     Dwarf_Unsigned code,
428     Dwarf_Abbrev_List *list_out,Dwarf_Error *error);
429 
430 
431 /* return 1 if string ends before 'endptr' else
432 ** return 0 meaning string is not properly terminated.
433 ** Presumption is the 'endptr' pts to end of some dwarf section data.
434 */
435 int _dwarf_check_string_valid(Dwarf_Debug dbg,void *areaptr,
436     void *startptr, void *endptr,
437     int suggested_error, Dwarf_Error *error);
438 
439 int _dwarf_length_of_cu_header(Dwarf_Debug dbg, Dwarf_Unsigned offset,
440     Dwarf_Bool is_info,
441     Dwarf_Unsigned *area_length_out,
442     Dwarf_Error *error);
443 
444 Dwarf_Unsigned _dwarf_length_of_cu_header_simple(Dwarf_Debug,Dwarf_Bool dinfo);
445 
446 int  _dwarf_load_debug_info(Dwarf_Debug dbg, Dwarf_Error *error);
447 int  _dwarf_load_debug_types(Dwarf_Debug dbg, Dwarf_Error *error);
448 void _dwarf_free_abbrev_hash_table_contents(Dwarf_Debug dbg,
449     struct Dwarf_Hash_Table_s* hash_table);
450 int _dwarf_get_address_size(Dwarf_Debug dbg, Dwarf_Die die);
451 int _dwarf_reference_outside_section(Dwarf_Die die,
452     Dwarf_Small * startaddr,
453     Dwarf_Small * pastend);
454 void _dwarf_error_mv_s_to_t(Dwarf_Debug dbgs,Dwarf_Error *errs,
455     Dwarf_Debug dbgt,Dwarf_Error *errt);
456 
457 int _dwarf_internal_get_die_comp_dir(Dwarf_Die die, const char **compdir_out,
458     const char **comp_name_out,
459     Dwarf_Error *error);
460 
461 int _dwarf_what_section_are_we(Dwarf_Debug dbg,
462     Dwarf_Small *our_pointer,
463     const char **      section_name_out,
464     Dwarf_Small    **sec_start_ptr_out,
465     Dwarf_Unsigned *sec_len_out,
466     Dwarf_Small    **sec_end_ptr_out,
467     Dwarf_Error *error);
468 
469 /*  wrappers return either DW_DLV_OK or DW_DLV_ERROR.
470     Never DW_DLV_NO_ENTRY. */
471 int
472 _dwarf_read_unaligned_ck_wrapper(Dwarf_Debug dbg,
473     Dwarf_Unsigned *out_value,
474     Dwarf_Small *readfrom,
475     int          readlength,
476     Dwarf_Small *end_arange,
477     Dwarf_Error *err);
478 int
479 _dwarf_read_area_length_ck_wrapper(Dwarf_Debug dbg,
480     Dwarf_Unsigned *out_value,
481     Dwarf_Small **readfrom,
482     int    *  length_size_out,
483     int    *  exten_size_out,
484     Dwarf_Unsigned sectionlength,
485     Dwarf_Small *endsection,
486     Dwarf_Error *err);
487 int
488 _dwarf_leb128_uword_wrapper(Dwarf_Debug dbg,
489     Dwarf_Small ** startptr,
490     Dwarf_Small * endptr,
491     Dwarf_Unsigned *out_value,
492     Dwarf_Error * error);
493 int
494 _dwarf_leb128_sword_wrapper(Dwarf_Debug dbg,
495     Dwarf_Small ** startptr,
496     Dwarf_Small * endptr,
497     Dwarf_Signed *out_value,
498     Dwarf_Error * error);
499 
500 
501 
502 #endif /* DWARF_UTIL_H */
503