1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 *
25 * Copyright 2011 Jason King. All rights reserved.
26 */
27
28 #include <assert.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <gelf.h>
32 #include <libelf.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include <sys/fcntl.h>
38 #include <sys/stat.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41
42 #include "dis_target.h"
43 #include "dis_util.h"
44
45 /*
46 * Standard ELF disassembler target.
47 *
48 * We only support disassembly of ELF files, though this target interface could
49 * be extended in the future. Each basic type (target, func, section) contains
50 * enough information to uniquely identify the location within the file. The
51 * interfaces use libelf(3LIB) to do the actual processing of the file.
52 */
53
54 /*
55 * Symbol table entry type. We maintain our own symbol table sorted by address,
56 * with the symbol name already resolved against the ELF symbol table.
57 */
58 typedef struct sym_entry {
59 GElf_Sym se_sym; /* value of symbol */
60 char *se_name; /* name of symbol */
61 int se_shndx; /* section where symbol is located */
62 } sym_entry_t;
63
64 /*
65 * Create a map of the virtual address ranges of every section. This will
66 * allow us to create dummpy mappings for unassigned addresses. Otherwise
67 * multiple sections with unassigned addresses will appear to overlap and
68 * mess up symbol resolution (which uses the virtual address).
69 */
70 typedef struct dis_shnmap {
71 const char *dm_name; /* name of section */
72 uint64_t dm_start; /* virtual address of section */
73 size_t dm_length; /* address length */
74 boolean_t dm_mapped; /* did we assign the mapping */
75 } dis_shnmap_t;
76
77 /*
78 * Target data structure. This structure keeps track of the ELF file
79 * information, a few bits of pre-processed section index information, and
80 * sorted versions of the symbol table. We also keep track of the last symbol
81 * looked up, as the majority of lookups remain within the same symbol.
82 */
83 struct dis_tgt {
84 Elf *dt_elf; /* libelf handle */
85 Elf *dt_elf_root; /* main libelf handle (for archives) */
86 const char *dt_filename; /* name of file */
87 int dt_fd; /* underlying file descriptor */
88 size_t dt_shstrndx; /* section index of .shstrtab */
89 size_t dt_symidx; /* section index of symbol table */
90 sym_entry_t *dt_symcache; /* last symbol looked up */
91 sym_entry_t *dt_symtab; /* sorted symbol table */
92 int dt_symcount; /* # of symbol table entries */
93 struct dis_tgt *dt_next; /* next target (for archives) */
94 Elf_Arhdr *dt_arhdr; /* archive header (for archives) */
95 dis_shnmap_t *dt_shnmap; /* section address map */
96 size_t dt_shncount; /* # of sections in target */
97 };
98
99 /*
100 * Function data structure. We resolve the symbol and lookup the associated ELF
101 * data when building this structure. The offset is calculated based on the
102 * section's starting address.
103 */
104 struct dis_func {
105 sym_entry_t *df_sym; /* symbol table reference */
106 Elf_Data *df_data; /* associated ELF data */
107 size_t df_offset; /* offset within data */
108 };
109
110 /*
111 * Section data structure. We store the entire section header so that we can
112 * determine some properties (such as whether or not it contains text) after
113 * building the structure.
114 */
115 struct dis_scn {
116 GElf_Shdr ds_shdr;
117 const char *ds_name;
118 Elf_Data *ds_data;
119 };
120
121 /* Lifted from Psymtab.c, omitting STT_TLS */
122 #define DATA_TYPES \
123 ((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
124 #define IS_DATA_TYPE(tp) (((1 << (tp)) & DATA_TYPES) != 0)
125
126 /*
127 * Save the virtual address range for this section and select the
128 * best section to use as the symbol table. We prefer SHT_SYMTAB
129 * over SHT_DYNSYM.
130 */
131 /* ARGSUSED */
132 static void
tgt_scn_init(dis_tgt_t * tgt,dis_scn_t * scn,void * data)133 tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
134 {
135 int *index = data;
136
137 *index += 1;
138
139 tgt->dt_shnmap[*index].dm_name = scn->ds_name;
140 tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr;
141 tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size;
142 tgt->dt_shnmap[*index].dm_mapped = B_FALSE;
143
144 /*
145 * Prefer SHT_SYMTAB over SHT_DYNSYM
146 */
147 if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
148 tgt->dt_symidx = *index;
149 else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
150 tgt->dt_symidx = *index;
151 }
152
153 static int
sym_compare(const void * a,const void * b)154 sym_compare(const void *a, const void *b)
155 {
156 const sym_entry_t *syma = a;
157 const sym_entry_t *symb = b;
158 const char *aname = syma->se_name;
159 const char *bname = symb->se_name;
160 size_t alen;
161 size_t blen;
162
163 if (syma->se_sym.st_value < symb->se_sym.st_value)
164 return (-1);
165
166 if (syma->se_sym.st_value > symb->se_sym.st_value)
167 return (1);
168
169 /*
170 * Prefer functions over non-functions
171 */
172 if (GELF_ST_TYPE(syma->se_sym.st_info) !=
173 GELF_ST_TYPE(symb->se_sym.st_info)) {
174 if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
175 return (-1);
176 if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
177 return (1);
178 }
179
180 /*
181 * For symbols with the same address and type, we sort them according to
182 * a hierarchy:
183 *
184 * 1. weak symbols (common name)
185 * 2. global symbols (external name)
186 * 3. local symbols
187 */
188 if (GELF_ST_BIND(syma->se_sym.st_info) !=
189 GELF_ST_BIND(symb->se_sym.st_info)) {
190 if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
191 return (-1);
192 if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
193 return (1);
194
195 if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
196 return (-1);
197 if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
198 return (1);
199 }
200
201 /*
202 * As a last resort, if we have multiple symbols of the same type at the
203 * same address, prefer the version with the fewest leading underscores.
204 */
205 if (aname == NULL)
206 return (-1);
207 if (bname == NULL)
208 return (1);
209
210 while (*aname == '_' && *bname == '_') {
211 aname++;
212 bname++;
213 }
214
215 if (*bname == '_')
216 return (-1);
217 if (*aname == '_')
218 return (1);
219
220 /*
221 * Prefer the symbol with the smaller size.
222 */
223 if (syma->se_sym.st_size < symb->se_sym.st_size)
224 return (-1);
225 if (syma->se_sym.st_size > symb->se_sym.st_size)
226 return (1);
227
228 /*
229 * We really do have two identical symbols, choose the one with the
230 * shortest name if we can, heuristically taking it to be the most
231 * representative.
232 */
233 alen = strlen(syma->se_name);
234 blen = strlen(symb->se_name);
235
236 if (alen < blen)
237 return (-1);
238 else if (alen > blen)
239 return (1);
240
241 /*
242 * If all else fails, compare the names, so that we give a stable
243 * sort
244 */
245 return (strcmp(syma->se_name, symb->se_name));
246 }
247
248 /*
249 * Construct an optimized symbol table sorted by starting address.
250 */
251 static void
construct_symtab(dis_tgt_t * tgt)252 construct_symtab(dis_tgt_t *tgt)
253 {
254 Elf_Scn *scn;
255 GElf_Shdr shdr;
256 Elf_Data *symdata;
257 int i;
258 GElf_Word *symshndx = NULL;
259 int symshndx_size;
260 sym_entry_t *sym;
261 sym_entry_t *p_symtab = NULL;
262 int nsym = 0; /* count of symbols we're not interested in */
263
264 /*
265 * Find the symshndx section, if any
266 */
267 for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
268 scn = elf_nextscn(tgt->dt_elf, scn)) {
269 if (gelf_getshdr(scn, &shdr) == NULL)
270 break;
271 if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
272 shdr.sh_link == tgt->dt_symidx) {
273 Elf_Data *data;
274
275 if ((data = elf_getdata(scn, NULL)) != NULL) {
276 symshndx = (GElf_Word *)data->d_buf;
277 symshndx_size = data->d_size /
278 sizeof (GElf_Word);
279 break;
280 }
281 }
282 }
283
284 if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
285 die("%s: failed to get section information", tgt->dt_filename);
286 if (gelf_getshdr(scn, &shdr) == NULL)
287 die("%s: failed to get section header", tgt->dt_filename);
288 if (shdr.sh_entsize == 0)
289 die("%s: symbol table has zero size", tgt->dt_filename);
290
291 if ((symdata = elf_getdata(scn, NULL)) == NULL)
292 die("%s: failed to get symbol table", tgt->dt_filename);
293
294 tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
295 1, EV_CURRENT);
296
297 p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
298
299 for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
300 if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
301 warn("%s: gelf_getsym returned NULL for %d",
302 tgt->dt_filename, i);
303 nsym++;
304 continue;
305 }
306
307 /*
308 * We're only interested in data symbols.
309 */
310 if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
311 nsym++;
312 continue;
313 }
314
315 if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
316 if (i > symshndx_size) {
317 warn("%s: bad SHNX_XINDEX %d",
318 tgt->dt_filename, i);
319 sym->se_shndx = -1;
320 } else {
321 sym->se_shndx = symshndx[i];
322 }
323 } else {
324 sym->se_shndx = sym->se_sym.st_shndx;
325 }
326
327 /* Deal with symbols with special section indicies */
328 if (sym->se_shndx == SHN_ABS) {
329 /*
330 * If st_value == 0, references to these
331 * symbols in code are modified in situ
332 * thus we will never attempt to look
333 * them up.
334 */
335 if (sym->se_sym.st_value == 0) {
336 /*
337 * References to these symbols in code
338 * are modified in situ by the runtime
339 * linker and no code on disk will ever
340 * attempt to look them up.
341 */
342 nsym++;
343 continue;
344 } else {
345 /*
346 * If st_value != 0, (such as examining
347 * something in /system/object/.../object)
348 * the values should resolve to a value
349 * within an existing section (such as
350 * .data). This also means it never needs
351 * to have st_value mapped.
352 */
353 sym++;
354 continue;
355 }
356 }
357
358 /*
359 * Ignore the symbol if it has some other special
360 * section index
361 */
362 if (sym->se_shndx == SHN_UNDEF ||
363 sym->se_shndx >= SHN_LORESERVE) {
364 nsym++;
365 continue;
366 }
367
368 if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
369 (size_t)sym->se_sym.st_name)) == NULL) {
370 warn("%s: failed to lookup symbol %d name",
371 tgt->dt_filename, i);
372 nsym++;
373 continue;
374 }
375
376 /*
377 * If we had to map this section, its symbol value
378 * also needs to be mapped.
379 */
380 if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
381 sym->se_sym.st_value +=
382 tgt->dt_shnmap[sym->se_shndx].dm_start;
383
384 sym++;
385 }
386
387 tgt->dt_symcount -= nsym;
388 tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
389 sizeof (sym_entry_t));
390
391 qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
392 sym_compare);
393 }
394
395 /*
396 * Assign virtual address ranges for sections that need it
397 */
398 static void
create_addrmap(dis_tgt_t * tgt)399 create_addrmap(dis_tgt_t *tgt)
400 {
401 uint64_t addr;
402 int i;
403
404 if (tgt->dt_shnmap == NULL)
405 return;
406
407 /* find the greatest used address */
408 for (addr = 0, i = 1; i < tgt->dt_shncount; i++)
409 if (tgt->dt_shnmap[i].dm_start > addr)
410 addr = tgt->dt_shnmap[i].dm_start +
411 tgt->dt_shnmap[i].dm_length;
412
413 addr = P2ROUNDUP(addr, 0x1000);
414
415 /*
416 * Assign section a starting address beyond the largest mapped section
417 * if no address was given.
418 */
419 for (i = 1; i < tgt->dt_shncount; i++) {
420 if (tgt->dt_shnmap[i].dm_start != 0)
421 continue;
422
423 tgt->dt_shnmap[i].dm_start = addr;
424 tgt->dt_shnmap[i].dm_mapped = B_TRUE;
425 addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000);
426 }
427 }
428
429 /*
430 * Create a target backed by an ELF file.
431 */
432 dis_tgt_t *
dis_tgt_create(const char * file)433 dis_tgt_create(const char *file)
434 {
435 dis_tgt_t *tgt, *current;
436 int idx;
437 Elf *elf;
438 GElf_Ehdr ehdr;
439 Elf_Arhdr *arhdr = NULL;
440 int cmd;
441
442 if (elf_version(EV_CURRENT) == EV_NONE)
443 die("libelf out of date");
444
445 tgt = safe_malloc(sizeof (dis_tgt_t));
446
447 if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
448 warn("%s: failed opening file, reason: %s", file,
449 strerror(errno));
450 free(tgt);
451 return (NULL);
452 }
453
454 if ((tgt->dt_elf_root =
455 elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
456 warn("%s: invalid or corrupt ELF file", file);
457 dis_tgt_destroy(tgt);
458 return (NULL);
459 }
460
461 current = tgt;
462 cmd = ELF_C_READ;
463 while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
464 size_t shnum = 0;
465
466 if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
467 (arhdr = elf_getarhdr(elf)) == NULL) {
468 warn("%s: malformed archive", file);
469 dis_tgt_destroy(tgt);
470 return (NULL);
471 }
472
473 /*
474 * Make sure that this Elf file is sane
475 */
476 if (gelf_getehdr(elf, &ehdr) == NULL) {
477 if (arhdr != NULL) {
478 /*
479 * For archives, we drive on in the face of bad
480 * members. The "/" and "//" members are
481 * special, and should be silently ignored.
482 */
483 if (strcmp(arhdr->ar_name, "/") != 0 &&
484 strcmp(arhdr->ar_name, "//") != 0)
485 warn("%s[%s]: invalid file type",
486 file, arhdr->ar_name);
487 cmd = elf_next(elf);
488 (void) elf_end(elf);
489 continue;
490 }
491
492 warn("%s: invalid file type", file);
493 dis_tgt_destroy(tgt);
494 return (NULL);
495 }
496
497 /*
498 * If we're seeing a new Elf object, then we have an
499 * archive. In this case, we create a new target, and chain it
500 * off the master target. We can later iterate over these
501 * targets using dis_tgt_next().
502 */
503 if (current->dt_elf != NULL) {
504 dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
505 next->dt_elf_root = tgt->dt_elf_root;
506 next->dt_fd = -1;
507 current->dt_next = next;
508 current = next;
509 }
510 current->dt_elf = elf;
511 current->dt_arhdr = arhdr;
512
513 if (elf_getshdrstrndx(elf, ¤t->dt_shstrndx) == -1) {
514 warn("%s: failed to get section string table for "
515 "file", file);
516 dis_tgt_destroy(tgt);
517 return (NULL);
518 }
519
520 if (elf_getshdrnum(elf, &shnum) == -1) {
521 warn("%s: failed to get number of sections in file",
522 file);
523 dis_tgt_destroy(tgt);
524 return (NULL);
525 }
526
527 current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) *
528 shnum);
529 current->dt_shncount = shnum;
530
531 idx = 0;
532 dis_tgt_section_iter(current, tgt_scn_init, &idx);
533 current->dt_filename = file;
534
535 create_addrmap(current);
536 if (current->dt_symidx != 0)
537 construct_symtab(current);
538
539 cmd = elf_next(elf);
540 }
541
542 /*
543 * Final sanity check. If we had an archive with no members, then bail
544 * out with a nice message.
545 */
546 if (tgt->dt_elf == NULL) {
547 warn("%s: empty archive\n", file);
548 dis_tgt_destroy(tgt);
549 return (NULL);
550 }
551
552 return (tgt);
553 }
554
555 /*
556 * Return the filename associated with the target.
557 */
558 const char *
dis_tgt_name(dis_tgt_t * tgt)559 dis_tgt_name(dis_tgt_t *tgt)
560 {
561 return (tgt->dt_filename);
562 }
563
564 /*
565 * Return the archive member name, if any.
566 */
567 const char *
dis_tgt_member(dis_tgt_t * tgt)568 dis_tgt_member(dis_tgt_t *tgt)
569 {
570 if (tgt->dt_arhdr)
571 return (tgt->dt_arhdr->ar_name);
572 else
573 return (NULL);
574 }
575
576 /*
577 * Return the Elf_Ehdr associated with this target. Needed to determine which
578 * disassembler to use.
579 */
580 void
dis_tgt_ehdr(dis_tgt_t * tgt,GElf_Ehdr * ehdr)581 dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
582 {
583 (void) gelf_getehdr(tgt->dt_elf, ehdr);
584 }
585
586 /*
587 * Return the next target in the list, if this is an archive.
588 */
589 dis_tgt_t *
dis_tgt_next(dis_tgt_t * tgt)590 dis_tgt_next(dis_tgt_t *tgt)
591 {
592 return (tgt->dt_next);
593 }
594
595 /*
596 * Destroy a target and free up any associated memory.
597 */
598 void
dis_tgt_destroy(dis_tgt_t * tgt)599 dis_tgt_destroy(dis_tgt_t *tgt)
600 {
601 dis_tgt_t *current, *next;
602
603 current = tgt->dt_next;
604 while (current != NULL) {
605 next = current->dt_next;
606 if (current->dt_elf)
607 (void) elf_end(current->dt_elf);
608 if (current->dt_symtab)
609 free(current->dt_symtab);
610 free(current);
611 current = next;
612 }
613
614 if (tgt->dt_elf)
615 (void) elf_end(tgt->dt_elf);
616 if (tgt->dt_elf_root)
617 (void) elf_end(tgt->dt_elf_root);
618
619 if (tgt->dt_symtab)
620 free(tgt->dt_symtab);
621
622 free(tgt);
623 }
624
625 /*
626 * Given an address, return the section it is in and set the offset within
627 * the section.
628 */
629 const char *
dis_find_section(dis_tgt_t * tgt,uint64_t addr,off_t * offset)630 dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset)
631 {
632 int i;
633
634 for (i = 1; i < tgt->dt_shncount; i++) {
635 if ((addr >= tgt->dt_shnmap[i].dm_start) &&
636 (addr < tgt->dt_shnmap[i].dm_start +
637 tgt->dt_shnmap[i].dm_length)) {
638 *offset = addr - tgt->dt_shnmap[i].dm_start;
639 return (tgt->dt_shnmap[i].dm_name);
640 }
641 }
642
643 *offset = 0;
644 return (NULL);
645 }
646
647 /*
648 * Given an address, returns the name of the corresponding symbol, as well as
649 * the offset within that symbol. If no matching symbol is found, then NULL is
650 * returned.
651 *
652 * If 'cache_result' is specified, then we keep track of the resulting symbol.
653 * This cached result is consulted first on subsequent lookups in order to avoid
654 * unecessary lookups. This flag should be used for resolving the current PC,
655 * as the majority of addresses stay within the current function.
656 */
657 const char *
dis_tgt_lookup(dis_tgt_t * tgt,uint64_t addr,off_t * offset,int cache_result,size_t * size,int * isfunc)658 dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
659 size_t *size, int *isfunc)
660 {
661 int lo, hi, mid;
662 sym_entry_t *sym, *osym, *match;
663 int found;
664
665 *offset = 0;
666 *size = 0;
667 if (isfunc != NULL)
668 *isfunc = 0;
669
670 if (tgt->dt_symcache != NULL &&
671 addr >= tgt->dt_symcache->se_sym.st_value &&
672 addr < tgt->dt_symcache->se_sym.st_value +
673 tgt->dt_symcache->se_sym.st_size) {
674 sym = tgt->dt_symcache;
675 *offset = addr - sym->se_sym.st_value;
676 *size = sym->se_sym.st_size;
677 if (isfunc != NULL)
678 *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) ==
679 STT_FUNC);
680 return (sym->se_name);
681 }
682
683 lo = 0;
684 hi = (tgt->dt_symcount - 1);
685 found = 0;
686 match = osym = NULL;
687 while (lo <= hi) {
688 mid = (lo + hi) / 2;
689
690 sym = &tgt->dt_symtab[mid];
691
692 if (addr >= sym->se_sym.st_value &&
693 addr < sym->se_sym.st_value + sym->se_sym.st_size &&
694 (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
695 osym = sym;
696 found = 1;
697 } else if (addr == sym->se_sym.st_value) {
698 /*
699 * Particularly for .plt objects, it's possible to have
700 * a zero sized object. We want to return this, but we
701 * want it to be a last resort.
702 */
703 match = sym;
704 }
705
706 if (addr < sym->se_sym.st_value)
707 hi = mid - 1;
708 else
709 lo = mid + 1;
710 }
711
712 if (!found) {
713 if (match)
714 osym = match;
715 else
716 return (NULL);
717 }
718
719 /*
720 * Walk backwards to find the best match.
721 */
722 do {
723 sym = osym;
724
725 if (osym == tgt->dt_symtab)
726 break;
727
728 osym = osym - 1;
729 } while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
730 (addr >= osym->se_sym.st_value) &&
731 (addr < osym->se_sym.st_value + osym->se_sym.st_size));
732
733 if (cache_result)
734 tgt->dt_symcache = sym;
735
736 *offset = addr - sym->se_sym.st_value;
737 *size = sym->se_sym.st_size;
738 if (isfunc)
739 *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
740
741 return (sym->se_name);
742 }
743
744 /*
745 * Given an address, return the starting offset of the next symbol in the file.
746 * Only needed on variable length instruction architectures.
747 */
748 off_t
dis_tgt_next_symbol(dis_tgt_t * tgt,uint64_t addr)749 dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
750 {
751 sym_entry_t *sym;
752
753 sym = (tgt->dt_symcache != NULL) ? tgt->dt_symcache : tgt->dt_symtab;
754
755 while (sym != (tgt->dt_symtab + tgt->dt_symcount)) {
756 if (sym->se_sym.st_value >= addr)
757 return (sym->se_sym.st_value - addr);
758 sym++;
759 }
760
761 return (0);
762 }
763
764 /*
765 * Iterate over all sections in the target, executing the given callback for
766 * each.
767 */
768 void
dis_tgt_section_iter(dis_tgt_t * tgt,section_iter_f func,void * data)769 dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
770 {
771 dis_scn_t sdata;
772 Elf_Scn *scn;
773 int idx;
774
775 for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
776 scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
777
778 if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
779 warn("%s: failed to get section %d header",
780 tgt->dt_filename, idx);
781 continue;
782 }
783
784 if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
785 sdata.ds_shdr.sh_name)) == NULL) {
786 warn("%s: failed to get section %d name",
787 tgt->dt_filename, idx);
788 continue;
789 }
790
791 if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
792 warn("%s: failed to get data for section '%s'",
793 tgt->dt_filename, sdata.ds_name);
794 continue;
795 }
796
797 /*
798 * dis_tgt_section_iter is also used before the section map
799 * is initialized, so only check when we need to. If the
800 * section map is uninitialized, it will return 0 and have
801 * no net effect.
802 */
803 if (sdata.ds_shdr.sh_addr == 0)
804 sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start;
805
806 func(tgt, &sdata, data);
807 }
808 }
809
810 /*
811 * Return 1 if the given section contains text, 0 otherwise.
812 */
813 int
dis_section_istext(dis_scn_t * scn)814 dis_section_istext(dis_scn_t *scn)
815 {
816 return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
817 (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
818 }
819
820 /*
821 * Return a pointer to the section data.
822 */
823 void *
dis_section_data(dis_scn_t * scn)824 dis_section_data(dis_scn_t *scn)
825 {
826 return (scn->ds_data->d_buf);
827 }
828
829 /*
830 * Return the size of the section data.
831 */
832 size_t
dis_section_size(dis_scn_t * scn)833 dis_section_size(dis_scn_t *scn)
834 {
835 return (scn->ds_data->d_size);
836 }
837
838 /*
839 * Return the address for the given section.
840 */
841 uint64_t
dis_section_addr(dis_scn_t * scn)842 dis_section_addr(dis_scn_t *scn)
843 {
844 return (scn->ds_shdr.sh_addr);
845 }
846
847 /*
848 * Return the name of the current section.
849 */
850 const char *
dis_section_name(dis_scn_t * scn)851 dis_section_name(dis_scn_t *scn)
852 {
853 return (scn->ds_name);
854 }
855
856 /*
857 * Create an allocated copy of the given section
858 */
859 dis_scn_t *
dis_section_copy(dis_scn_t * scn)860 dis_section_copy(dis_scn_t *scn)
861 {
862 dis_scn_t *new;
863
864 new = safe_malloc(sizeof (dis_scn_t));
865 (void) memcpy(new, scn, sizeof (dis_scn_t));
866
867 return (new);
868 }
869
870 /*
871 * Free section memory
872 */
873 void
dis_section_free(dis_scn_t * scn)874 dis_section_free(dis_scn_t *scn)
875 {
876 free(scn);
877 }
878
879 /*
880 * Iterate over all functions in the target, executing the given callback for
881 * each one.
882 */
883 void
dis_tgt_function_iter(dis_tgt_t * tgt,function_iter_f func,void * data)884 dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
885 {
886 int i;
887 sym_entry_t *sym;
888 dis_func_t df;
889 Elf_Scn *scn;
890 GElf_Shdr shdr;
891
892 for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
893
894 /* ignore non-functions */
895 if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
896 (sym->se_name == NULL) ||
897 (sym->se_sym.st_size == 0) ||
898 (sym->se_shndx >= SHN_LORESERVE))
899 continue;
900
901 /* get the ELF data associated with this function */
902 if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
903 gelf_getshdr(scn, &shdr) == NULL ||
904 (df.df_data = elf_getdata(scn, NULL)) == NULL ||
905 df.df_data->d_size == 0) {
906 warn("%s: failed to read section %d",
907 tgt->dt_filename, sym->se_shndx);
908 continue;
909 }
910
911 if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
912 shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start;
913
914 /*
915 * Verify that the address lies within the section that we think
916 * it does.
917 */
918 if (sym->se_sym.st_value < shdr.sh_addr ||
919 (sym->se_sym.st_value + sym->se_sym.st_size) >
920 (shdr.sh_addr + shdr.sh_size)) {
921 warn("%s: bad section %d for address %p",
922 tgt->dt_filename, sym->se_sym.st_shndx,
923 sym->se_sym.st_value);
924 continue;
925 }
926
927 df.df_sym = sym;
928 df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
929
930 func(tgt, &df, data);
931 }
932 }
933
934 /*
935 * Return the data associated with a given function.
936 */
937 void *
dis_function_data(dis_func_t * func)938 dis_function_data(dis_func_t *func)
939 {
940 return ((char *)func->df_data->d_buf + func->df_offset);
941 }
942
943 /*
944 * Return the size of a function.
945 */
946 size_t
dis_function_size(dis_func_t * func)947 dis_function_size(dis_func_t *func)
948 {
949 return (func->df_sym->se_sym.st_size);
950 }
951
952 /*
953 * Return the address of a function.
954 */
955 uint64_t
dis_function_addr(dis_func_t * func)956 dis_function_addr(dis_func_t *func)
957 {
958 return (func->df_sym->se_sym.st_value);
959 }
960
961 /*
962 * Return the name of the function
963 */
964 const char *
dis_function_name(dis_func_t * func)965 dis_function_name(dis_func_t *func)
966 {
967 return (func->df_sym->se_name);
968 }
969
970 /*
971 * Return a copy of a function.
972 */
973 dis_func_t *
dis_function_copy(dis_func_t * func)974 dis_function_copy(dis_func_t *func)
975 {
976 dis_func_t *new;
977
978 new = safe_malloc(sizeof (dis_func_t));
979 (void) memcpy(new, func, sizeof (dis_func_t));
980
981 return (new);
982 }
983
984 /*
985 * Free function memory
986 */
987 void
dis_function_free(dis_func_t * func)988 dis_function_free(dis_func_t *func)
989 {
990 free(func);
991 }
992