1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 *
25 * Copyright 2011 Jason King. All rights reserved.
26 */
27
28 #include <assert.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <gelf.h>
32 #include <libelf.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include <sys/fcntl.h>
38 #include <sys/stat.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41
42 #include "dis_target.h"
43 #include "dis_util.h"
44
45 /*
46 * Standard ELF disassembler target.
47 *
48 * We only support disassembly of ELF files, though this target interface could
49 * be extended in the future. Each basic type (target, func, section) contains
50 * enough information to uniquely identify the location within the file. The
51 * interfaces use libelf(3LIB) to do the actual processing of the file.
52 */
53
54 /*
55 * Symbol table entry type. We maintain our own symbol table sorted by address,
56 * with the symbol name already resolved against the ELF symbol table.
57 */
58 typedef struct sym_entry {
59 GElf_Sym se_sym; /* value of symbol */
60 char *se_name; /* name of symbol */
61 int se_shndx; /* section where symbol is located */
62 } sym_entry_t;
63
64 /*
65 * Create a map of the virtual address ranges of every section. This will
66 * allow us to create dummpy mappings for unassigned addresses. Otherwise
67 * multiple sections with unassigned addresses will appear to overlap and
68 * mess up symbol resolution (which uses the virtual address).
69 */
70 typedef struct dis_shnmap {
71 const char *dm_name; /* name of section */
72 uint64_t dm_start; /* virtual address of section */
73 size_t dm_length; /* address length */
74 boolean_t dm_mapped; /* did we assign the mapping */
75 } dis_shnmap_t;
76
77 /*
78 * Target data structure. This structure keeps track of the ELF file
79 * information, a few bits of pre-processed section index information, and
80 * sorted versions of the symbol table. We also keep track of the last symbol
81 * looked up, as the majority of lookups remain within the same symbol.
82 */
83 struct dis_tgt {
84 Elf *dt_elf; /* libelf handle */
85 Elf *dt_elf_root; /* main libelf handle (for archives) */
86 const char *dt_filename; /* name of file */
87 int dt_fd; /* underlying file descriptor */
88 size_t dt_shstrndx; /* section index of .shstrtab */
89 size_t dt_symidx; /* section index of symbol table */
90 sym_entry_t *dt_symcache; /* last symbol looked up */
91 sym_entry_t *dt_symtab; /* sorted symbol table */
92 int dt_symcount; /* # of symbol table entries */
93 struct dis_tgt *dt_next; /* next target (for archives) */
94 Elf_Arhdr *dt_arhdr; /* archive header (for archives) */
95 dis_shnmap_t *dt_shnmap; /* section address map */
96 size_t dt_shncount; /* # of sections in target */
97 };
98
99 /*
100 * Function data structure. We resolve the symbol and lookup the associated ELF
101 * data when building this structure. The offset is calculated based on the
102 * section's starting address.
103 */
104 struct dis_func {
105 sym_entry_t *df_sym; /* symbol table reference */
106 Elf_Data *df_data; /* associated ELF data */
107 size_t df_offset; /* offset within data */
108 };
109
110 /*
111 * Section data structure. We store the entire section header so that we can
112 * determine some properties (such as whether or not it contains text) after
113 * building the structure.
114 */
115 struct dis_scn {
116 GElf_Shdr ds_shdr;
117 const char *ds_name;
118 Elf_Data *ds_data;
119 };
120
121 /* Lifted from Psymtab.c, omitting STT_TLS */
122 #define DATA_TYPES \
123 ((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
124 #define IS_DATA_TYPE(tp) (((1 << (tp)) & DATA_TYPES) != 0)
125
126 /*
127 * Save the virtual address range for this section and select the
128 * best section to use as the symbol table. We prefer SHT_SYMTAB
129 * over SHT_DYNSYM.
130 */
131 /* ARGSUSED */
132 static void
tgt_scn_init(dis_tgt_t * tgt,dis_scn_t * scn,void * data)133 tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
134 {
135 int *index = data;
136
137 *index += 1;
138
139 tgt->dt_shnmap[*index].dm_name = scn->ds_name;
140 tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr;
141 tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size;
142 tgt->dt_shnmap[*index].dm_mapped = B_FALSE;
143
144 /*
145 * Prefer SHT_SYMTAB over SHT_DYNSYM
146 */
147 if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
148 tgt->dt_symidx = *index;
149 else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
150 tgt->dt_symidx = *index;
151 }
152
153 static int
sym_compare(const void * a,const void * b)154 sym_compare(const void *a, const void *b)
155 {
156 const sym_entry_t *syma = a;
157 const sym_entry_t *symb = b;
158 const char *aname = syma->se_name;
159 const char *bname = symb->se_name;
160
161 if (syma->se_sym.st_value < symb->se_sym.st_value)
162 return (-1);
163
164 if (syma->se_sym.st_value > symb->se_sym.st_value)
165 return (1);
166
167 /*
168 * Prefer functions over non-functions
169 */
170 if (GELF_ST_TYPE(syma->se_sym.st_info) !=
171 GELF_ST_TYPE(symb->se_sym.st_info)) {
172 if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
173 return (-1);
174 if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
175 return (1);
176 }
177
178 /*
179 * For symbols with the same address and type, we sort them according to
180 * a hierarchy:
181 *
182 * 1. weak symbols (common name)
183 * 2. global symbols (external name)
184 * 3. local symbols
185 */
186 if (GELF_ST_BIND(syma->se_sym.st_info) !=
187 GELF_ST_BIND(symb->se_sym.st_info)) {
188 if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
189 return (-1);
190 if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
191 return (1);
192
193 if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
194 return (-1);
195 if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
196 return (1);
197 }
198
199 /*
200 * As a last resort, if we have multiple symbols of the same type at the
201 * same address, prefer the version with the fewest leading underscores.
202 */
203 if (aname == NULL)
204 return (-1);
205 if (bname == NULL)
206 return (1);
207
208 while (*aname == '_' && *bname == '_') {
209 aname++;
210 bname++;
211 }
212
213 if (*bname == '_')
214 return (-1);
215 if (*aname == '_')
216 return (1);
217
218 /*
219 * Prefer the symbol with the smaller size.
220 */
221 if (syma->se_sym.st_size < symb->se_sym.st_size)
222 return (-1);
223 if (syma->se_sym.st_size > symb->se_sym.st_size)
224 return (1);
225
226 /*
227 * We really do have two identical symbols for some reason. Just report
228 * them as equal, and to the lucky one go the spoils.
229 */
230 return (0);
231 }
232
233 /*
234 * Construct an optimized symbol table sorted by starting address.
235 */
236 static void
construct_symtab(dis_tgt_t * tgt)237 construct_symtab(dis_tgt_t *tgt)
238 {
239 Elf_Scn *scn;
240 GElf_Shdr shdr;
241 Elf_Data *symdata;
242 int i;
243 GElf_Word *symshndx = NULL;
244 int symshndx_size;
245 sym_entry_t *sym;
246 sym_entry_t *p_symtab = NULL;
247 int nsym = 0; /* count of symbols we're not interested in */
248
249 /*
250 * Find the symshndx section, if any
251 */
252 for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
253 scn = elf_nextscn(tgt->dt_elf, scn)) {
254 if (gelf_getshdr(scn, &shdr) == NULL)
255 break;
256 if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
257 shdr.sh_link == tgt->dt_symidx) {
258 Elf_Data *data;
259
260 if ((data = elf_getdata(scn, NULL)) != NULL) {
261 symshndx = (GElf_Word *)data->d_buf;
262 symshndx_size = data->d_size /
263 sizeof (GElf_Word);
264 break;
265 }
266 }
267 }
268
269 if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
270 die("%s: failed to get section information", tgt->dt_filename);
271 if (gelf_getshdr(scn, &shdr) == NULL)
272 die("%s: failed to get section header", tgt->dt_filename);
273 if (shdr.sh_entsize == 0)
274 die("%s: symbol table has zero size", tgt->dt_filename);
275
276 if ((symdata = elf_getdata(scn, NULL)) == NULL)
277 die("%s: failed to get symbol table", tgt->dt_filename);
278
279 tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
280 1, EV_CURRENT);
281
282 p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
283
284 for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
285 if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
286 warn("%s: gelf_getsym returned NULL for %d",
287 tgt->dt_filename, i);
288 nsym++;
289 continue;
290 }
291
292 /*
293 * We're only interested in data symbols.
294 */
295 if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
296 nsym++;
297 continue;
298 }
299
300 if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
301 if (i > symshndx_size) {
302 warn("%s: bad SHNX_XINDEX %d",
303 tgt->dt_filename, i);
304 sym->se_shndx = -1;
305 } else {
306 sym->se_shndx = symshndx[i];
307 }
308 } else {
309 sym->se_shndx = sym->se_sym.st_shndx;
310 }
311
312 /* Deal with symbols with special section indicies */
313 if (sym->se_shndx == SHN_ABS) {
314 /*
315 * If st_value == 0, references to these
316 * symbols in code are modified in situ
317 * thus we will never attempt to look
318 * them up.
319 */
320 if (sym->se_sym.st_value == 0) {
321 /*
322 * References to these symbols in code
323 * are modified in situ by the runtime
324 * linker and no code on disk will ever
325 * attempt to look them up.
326 */
327 nsym++;
328 continue;
329 } else {
330 /*
331 * If st_value != 0, (such as examining
332 * something in /system/object/.../object)
333 * the values should resolve to a value
334 * within an existing section (such as
335 * .data). This also means it never needs
336 * to have st_value mapped.
337 */
338 sym++;
339 continue;
340 }
341 }
342
343 /*
344 * Ignore the symbol if it has some other special
345 * section index
346 */
347 if (sym->se_shndx == SHN_UNDEF ||
348 sym->se_shndx >= SHN_LORESERVE) {
349 nsym++;
350 continue;
351 }
352
353 if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
354 (size_t)sym->se_sym.st_name)) == NULL) {
355 warn("%s: failed to lookup symbol %d name",
356 tgt->dt_filename, i);
357 nsym++;
358 continue;
359 }
360
361 /*
362 * If we had to map this section, its symbol value
363 * also needs to be mapped.
364 */
365 if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
366 sym->se_sym.st_value +=
367 tgt->dt_shnmap[sym->se_shndx].dm_start;
368
369 sym++;
370 }
371
372 tgt->dt_symcount -= nsym;
373 tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
374 sizeof (sym_entry_t));
375
376 qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
377 sym_compare);
378 }
379
380 /*
381 * Assign virtual address ranges for sections that need it
382 */
383 static void
create_addrmap(dis_tgt_t * tgt)384 create_addrmap(dis_tgt_t *tgt)
385 {
386 uint64_t addr;
387 int i;
388
389 if (tgt->dt_shnmap == NULL)
390 return;
391
392 /* find the greatest used address */
393 for (addr = 0, i = 1; i < tgt->dt_shncount; i++)
394 if (tgt->dt_shnmap[i].dm_start > addr)
395 addr = tgt->dt_shnmap[i].dm_start +
396 tgt->dt_shnmap[i].dm_length;
397
398 addr = P2ROUNDUP(addr, 0x1000);
399
400 /*
401 * Assign section a starting address beyond the largest mapped section
402 * if no address was given.
403 */
404 for (i = 1; i < tgt->dt_shncount; i++) {
405 if (tgt->dt_shnmap[i].dm_start != 0)
406 continue;
407
408 tgt->dt_shnmap[i].dm_start = addr;
409 tgt->dt_shnmap[i].dm_mapped = B_TRUE;
410 addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000);
411 }
412 }
413
414 /*
415 * Create a target backed by an ELF file.
416 */
417 dis_tgt_t *
dis_tgt_create(const char * file)418 dis_tgt_create(const char *file)
419 {
420 dis_tgt_t *tgt, *current;
421 int idx;
422 Elf *elf;
423 GElf_Ehdr ehdr;
424 Elf_Arhdr *arhdr = NULL;
425 int cmd;
426
427 if (elf_version(EV_CURRENT) == EV_NONE)
428 die("libelf(3ELF) out of date");
429
430 tgt = safe_malloc(sizeof (dis_tgt_t));
431
432 if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
433 warn("%s: failed opening file, reason: %s", file,
434 strerror(errno));
435 free(tgt);
436 return (NULL);
437 }
438
439 if ((tgt->dt_elf_root =
440 elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
441 warn("%s: invalid or corrupt ELF file", file);
442 dis_tgt_destroy(tgt);
443 return (NULL);
444 }
445
446 current = tgt;
447 cmd = ELF_C_READ;
448 while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
449 size_t shnum = 0;
450
451 if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
452 (arhdr = elf_getarhdr(elf)) == NULL) {
453 warn("%s: malformed archive", file);
454 dis_tgt_destroy(tgt);
455 return (NULL);
456 }
457
458 /*
459 * Make sure that this Elf file is sane
460 */
461 if (gelf_getehdr(elf, &ehdr) == NULL) {
462 if (arhdr != NULL) {
463 /*
464 * For archives, we drive on in the face of bad
465 * members. The "/" and "//" members are
466 * special, and should be silently ignored.
467 */
468 if (strcmp(arhdr->ar_name, "/") != 0 &&
469 strcmp(arhdr->ar_name, "//") != 0)
470 warn("%s[%s]: invalid file type",
471 file, arhdr->ar_name);
472 cmd = elf_next(elf);
473 (void) elf_end(elf);
474 continue;
475 }
476
477 warn("%s: invalid file type", file);
478 dis_tgt_destroy(tgt);
479 return (NULL);
480 }
481
482 /*
483 * If we're seeing a new Elf object, then we have an
484 * archive. In this case, we create a new target, and chain it
485 * off the master target. We can later iterate over these
486 * targets using dis_tgt_next().
487 */
488 if (current->dt_elf != NULL) {
489 dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
490 next->dt_elf_root = tgt->dt_elf_root;
491 next->dt_fd = -1;
492 current->dt_next = next;
493 current = next;
494 }
495 current->dt_elf = elf;
496 current->dt_arhdr = arhdr;
497
498 if (elf_getshdrstrndx(elf, ¤t->dt_shstrndx) == -1) {
499 warn("%s: failed to get section string table for "
500 "file", file);
501 dis_tgt_destroy(tgt);
502 return (NULL);
503 }
504
505 if (elf_getshdrnum(elf, &shnum) == -1) {
506 warn("%s: failed to get number of sections in file",
507 file);
508 dis_tgt_destroy(tgt);
509 return (NULL);
510 }
511
512 current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) *
513 shnum);
514 current->dt_shncount = shnum;
515
516 idx = 0;
517 dis_tgt_section_iter(current, tgt_scn_init, &idx);
518 current->dt_filename = file;
519
520 create_addrmap(current);
521 if (current->dt_symidx != 0)
522 construct_symtab(current);
523
524 cmd = elf_next(elf);
525 }
526
527 /*
528 * Final sanity check. If we had an archive with no members, then bail
529 * out with a nice message.
530 */
531 if (tgt->dt_elf == NULL) {
532 warn("%s: empty archive\n", file);
533 dis_tgt_destroy(tgt);
534 return (NULL);
535 }
536
537 return (tgt);
538 }
539
540 /*
541 * Return the filename associated with the target.
542 */
543 const char *
dis_tgt_name(dis_tgt_t * tgt)544 dis_tgt_name(dis_tgt_t *tgt)
545 {
546 return (tgt->dt_filename);
547 }
548
549 /*
550 * Return the archive member name, if any.
551 */
552 const char *
dis_tgt_member(dis_tgt_t * tgt)553 dis_tgt_member(dis_tgt_t *tgt)
554 {
555 if (tgt->dt_arhdr)
556 return (tgt->dt_arhdr->ar_name);
557 else
558 return (NULL);
559 }
560
561 /*
562 * Return the Elf_Ehdr associated with this target. Needed to determine which
563 * disassembler to use.
564 */
565 void
dis_tgt_ehdr(dis_tgt_t * tgt,GElf_Ehdr * ehdr)566 dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
567 {
568 (void) gelf_getehdr(tgt->dt_elf, ehdr);
569 }
570
571 /*
572 * Return the next target in the list, if this is an archive.
573 */
574 dis_tgt_t *
dis_tgt_next(dis_tgt_t * tgt)575 dis_tgt_next(dis_tgt_t *tgt)
576 {
577 return (tgt->dt_next);
578 }
579
580 /*
581 * Destroy a target and free up any associated memory.
582 */
583 void
dis_tgt_destroy(dis_tgt_t * tgt)584 dis_tgt_destroy(dis_tgt_t *tgt)
585 {
586 dis_tgt_t *current, *next;
587
588 current = tgt->dt_next;
589 while (current != NULL) {
590 next = current->dt_next;
591 if (current->dt_elf)
592 (void) elf_end(current->dt_elf);
593 if (current->dt_symtab)
594 free(current->dt_symtab);
595 free(current);
596 current = next;
597 }
598
599 if (tgt->dt_elf)
600 (void) elf_end(tgt->dt_elf);
601 if (tgt->dt_elf_root)
602 (void) elf_end(tgt->dt_elf_root);
603
604 if (tgt->dt_symtab)
605 free(tgt->dt_symtab);
606
607 free(tgt);
608 }
609
610 /*
611 * Given an address, return the section it is in and set the offset within
612 * the section.
613 */
614 const char *
dis_find_section(dis_tgt_t * tgt,uint64_t addr,off_t * offset)615 dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset)
616 {
617 int i;
618
619 for (i = 1; i < tgt->dt_shncount; i++) {
620 if ((addr >= tgt->dt_shnmap[i].dm_start) &&
621 (addr < tgt->dt_shnmap[i].dm_start +
622 tgt->dt_shnmap[i].dm_length)) {
623 *offset = addr - tgt->dt_shnmap[i].dm_start;
624 return (tgt->dt_shnmap[i].dm_name);
625 }
626 }
627
628 *offset = 0;
629 return (NULL);
630 }
631
632 /*
633 * Given an address, returns the name of the corresponding symbol, as well as
634 * the offset within that symbol. If no matching symbol is found, then NULL is
635 * returned.
636 *
637 * If 'cache_result' is specified, then we keep track of the resulting symbol.
638 * This cached result is consulted first on subsequent lookups in order to avoid
639 * unecessary lookups. This flag should be used for resolving the current PC,
640 * as the majority of addresses stay within the current function.
641 */
642 const char *
dis_tgt_lookup(dis_tgt_t * tgt,uint64_t addr,off_t * offset,int cache_result,size_t * size,int * isfunc)643 dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
644 size_t *size, int *isfunc)
645 {
646 int lo, hi, mid;
647 sym_entry_t *sym, *osym, *match;
648 int found;
649
650 *offset = 0;
651 *size = 0;
652 if (isfunc != NULL)
653 *isfunc = 0;
654
655 if (tgt->dt_symcache != NULL &&
656 addr >= tgt->dt_symcache->se_sym.st_value &&
657 addr < tgt->dt_symcache->se_sym.st_value +
658 tgt->dt_symcache->se_sym.st_size) {
659 sym = tgt->dt_symcache;
660 *offset = addr - sym->se_sym.st_value;
661 *size = sym->se_sym.st_size;
662 if (isfunc != NULL)
663 *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) ==
664 STT_FUNC);
665 return (sym->se_name);
666 }
667
668 lo = 0;
669 hi = (tgt->dt_symcount - 1);
670 found = 0;
671 match = osym = NULL;
672 while (lo <= hi) {
673 mid = (lo + hi) / 2;
674
675 sym = &tgt->dt_symtab[mid];
676
677 if (addr >= sym->se_sym.st_value &&
678 addr < sym->se_sym.st_value + sym->se_sym.st_size &&
679 (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
680 osym = sym;
681 found = 1;
682 } else if (addr == sym->se_sym.st_value) {
683 /*
684 * Particularly for .plt objects, it's possible to have
685 * a zero sized object. We want to return this, but we
686 * want it to be a last resort.
687 */
688 match = sym;
689 }
690
691 if (addr < sym->se_sym.st_value)
692 hi = mid - 1;
693 else
694 lo = mid + 1;
695 }
696
697 if (!found) {
698 if (match)
699 osym = match;
700 else
701 return (NULL);
702 }
703
704 /*
705 * Walk backwards to find the best match.
706 */
707 do {
708 sym = osym;
709
710 if (osym == tgt->dt_symtab)
711 break;
712
713 osym = osym - 1;
714 } while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
715 (addr >= osym->se_sym.st_value) &&
716 (addr < osym->se_sym.st_value + osym->se_sym.st_size));
717
718 if (cache_result)
719 tgt->dt_symcache = sym;
720
721 *offset = addr - sym->se_sym.st_value;
722 *size = sym->se_sym.st_size;
723 if (isfunc)
724 *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
725
726 return (sym->se_name);
727 }
728
729 /*
730 * Given an address, return the starting offset of the next symbol in the file.
731 * Only needed on variable length instruction architectures.
732 */
733 off_t
dis_tgt_next_symbol(dis_tgt_t * tgt,uint64_t addr)734 dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
735 {
736 sym_entry_t *sym;
737
738 sym = (tgt->dt_symcache != NULL) ? tgt->dt_symcache : tgt->dt_symtab;
739
740 while (sym != (tgt->dt_symtab + tgt->dt_symcount)) {
741 if (sym->se_sym.st_value >= addr)
742 return (sym->se_sym.st_value - addr);
743 sym++;
744 }
745
746 return (0);
747 }
748
749 /*
750 * Iterate over all sections in the target, executing the given callback for
751 * each.
752 */
753 void
dis_tgt_section_iter(dis_tgt_t * tgt,section_iter_f func,void * data)754 dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
755 {
756 dis_scn_t sdata;
757 Elf_Scn *scn;
758 int idx;
759
760 for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
761 scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
762
763 if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
764 warn("%s: failed to get section %d header",
765 tgt->dt_filename, idx);
766 continue;
767 }
768
769 if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
770 sdata.ds_shdr.sh_name)) == NULL) {
771 warn("%s: failed to get section %d name",
772 tgt->dt_filename, idx);
773 continue;
774 }
775
776 if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
777 warn("%s: failed to get data for section '%s'",
778 tgt->dt_filename, sdata.ds_name);
779 continue;
780 }
781
782 /*
783 * dis_tgt_section_iter is also used before the section map
784 * is initialized, so only check when we need to. If the
785 * section map is uninitialized, it will return 0 and have
786 * no net effect.
787 */
788 if (sdata.ds_shdr.sh_addr == 0)
789 sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start;
790
791 func(tgt, &sdata, data);
792 }
793 }
794
795 /*
796 * Return 1 if the given section contains text, 0 otherwise.
797 */
798 int
dis_section_istext(dis_scn_t * scn)799 dis_section_istext(dis_scn_t *scn)
800 {
801 return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
802 (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
803 }
804
805 /*
806 * Return a pointer to the section data.
807 */
808 void *
dis_section_data(dis_scn_t * scn)809 dis_section_data(dis_scn_t *scn)
810 {
811 return (scn->ds_data->d_buf);
812 }
813
814 /*
815 * Return the size of the section data.
816 */
817 size_t
dis_section_size(dis_scn_t * scn)818 dis_section_size(dis_scn_t *scn)
819 {
820 return (scn->ds_data->d_size);
821 }
822
823 /*
824 * Return the address for the given section.
825 */
826 uint64_t
dis_section_addr(dis_scn_t * scn)827 dis_section_addr(dis_scn_t *scn)
828 {
829 return (scn->ds_shdr.sh_addr);
830 }
831
832 /*
833 * Return the name of the current section.
834 */
835 const char *
dis_section_name(dis_scn_t * scn)836 dis_section_name(dis_scn_t *scn)
837 {
838 return (scn->ds_name);
839 }
840
841 /*
842 * Create an allocated copy of the given section
843 */
844 dis_scn_t *
dis_section_copy(dis_scn_t * scn)845 dis_section_copy(dis_scn_t *scn)
846 {
847 dis_scn_t *new;
848
849 new = safe_malloc(sizeof (dis_scn_t));
850 (void) memcpy(new, scn, sizeof (dis_scn_t));
851
852 return (new);
853 }
854
855 /*
856 * Free section memory
857 */
858 void
dis_section_free(dis_scn_t * scn)859 dis_section_free(dis_scn_t *scn)
860 {
861 free(scn);
862 }
863
864 /*
865 * Iterate over all functions in the target, executing the given callback for
866 * each one.
867 */
868 void
dis_tgt_function_iter(dis_tgt_t * tgt,function_iter_f func,void * data)869 dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
870 {
871 int i;
872 sym_entry_t *sym;
873 dis_func_t df;
874 Elf_Scn *scn;
875 GElf_Shdr shdr;
876
877 for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
878
879 /* ignore non-functions */
880 if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
881 (sym->se_name == NULL) ||
882 (sym->se_sym.st_size == 0) ||
883 (sym->se_shndx >= SHN_LORESERVE))
884 continue;
885
886 /* get the ELF data associated with this function */
887 if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
888 gelf_getshdr(scn, &shdr) == NULL ||
889 (df.df_data = elf_getdata(scn, NULL)) == NULL ||
890 df.df_data->d_size == 0) {
891 warn("%s: failed to read section %d",
892 tgt->dt_filename, sym->se_shndx);
893 continue;
894 }
895
896 if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
897 shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start;
898
899 /*
900 * Verify that the address lies within the section that we think
901 * it does.
902 */
903 if (sym->se_sym.st_value < shdr.sh_addr ||
904 (sym->se_sym.st_value + sym->se_sym.st_size) >
905 (shdr.sh_addr + shdr.sh_size)) {
906 warn("%s: bad section %d for address %p",
907 tgt->dt_filename, sym->se_sym.st_shndx,
908 sym->se_sym.st_value);
909 continue;
910 }
911
912 df.df_sym = sym;
913 df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
914
915 func(tgt, &df, data);
916 }
917 }
918
919 /*
920 * Return the data associated with a given function.
921 */
922 void *
dis_function_data(dis_func_t * func)923 dis_function_data(dis_func_t *func)
924 {
925 return ((char *)func->df_data->d_buf + func->df_offset);
926 }
927
928 /*
929 * Return the size of a function.
930 */
931 size_t
dis_function_size(dis_func_t * func)932 dis_function_size(dis_func_t *func)
933 {
934 return (func->df_sym->se_sym.st_size);
935 }
936
937 /*
938 * Return the address of a function.
939 */
940 uint64_t
dis_function_addr(dis_func_t * func)941 dis_function_addr(dis_func_t *func)
942 {
943 return (func->df_sym->se_sym.st_value);
944 }
945
946 /*
947 * Return the name of the function
948 */
949 const char *
dis_function_name(dis_func_t * func)950 dis_function_name(dis_func_t *func)
951 {
952 return (func->df_sym->se_name);
953 }
954
955 /*
956 * Return a copy of a function.
957 */
958 dis_func_t *
dis_function_copy(dis_func_t * func)959 dis_function_copy(dis_func_t *func)
960 {
961 dis_func_t *new;
962
963 new = safe_malloc(sizeof (dis_func_t));
964 (void) memcpy(new, func, sizeof (dis_func_t));
965
966 return (new);
967 }
968
969 /*
970 * Free function memory
971 */
972 void
dis_function_free(dis_func_t * func)973 dis_function_free(dis_func_t *func)
974 {
975 free(func);
976 }
977