xref: /illumos-gate/usr/src/cmd/dis/dis_target.c (revision 4b9db4f6425b1a08fca4390f446072c4a6aae8d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  *
25  * Copyright 2011 Jason King.  All rights reserved.
26  */
27 
28 #include <assert.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <gelf.h>
32 #include <libelf.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include <sys/fcntl.h>
38 #include <sys/stat.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 
42 #include "dis_target.h"
43 #include "dis_util.h"
44 
45 /*
46  * Standard ELF disassembler target.
47  *
48  * We only support disassembly of ELF files, though this target interface could
49  * be extended in the future.  Each basic type (target, func, section) contains
50  * enough information to uniquely identify the location within the file.  The
51  * interfaces use libelf(3LIB) to do the actual processing of the file.
52  */
53 
54 /*
55  * Symbol table entry type.  We maintain our own symbol table sorted by address,
56  * with the symbol name already resolved against the ELF symbol table.
57  */
58 typedef struct sym_entry {
59 	GElf_Sym	se_sym;		/* value of symbol */
60 	char		*se_name;	/* name of symbol */
61 	int		se_shndx;	/* section where symbol is located */
62 } sym_entry_t;
63 
64 /*
65  * Create a map of the virtual address ranges of every section.  This will
66  * allow us to create dummpy mappings for unassigned addresses.  Otherwise
67  * multiple sections with unassigned addresses will appear to overlap and
68  * mess up symbol resolution (which uses the virtual address).
69  */
70 typedef struct dis_shnmap {
71 	const char	*dm_name;	/* name of section */
72 	uint64_t	dm_start;	/* virtual address of section */
73 	size_t		dm_length;	/* address length */
74 	boolean_t	dm_mapped;	/* did we assign the mapping */
75 } dis_shnmap_t;
76 
77 /*
78  * Target data structure.  This structure keeps track of the ELF file
79  * information, a few bits of pre-processed section index information, and
80  * sorted versions of the symbol table.  We also keep track of the last symbol
81  * looked up, as the majority of lookups remain within the same symbol.
82  */
83 struct dis_tgt {
84 	Elf		*dt_elf;	/* libelf handle */
85 	Elf		*dt_elf_root;	/* main libelf handle (for archives) */
86 	const char	*dt_filename;	/* name of file */
87 	int		dt_fd;		/* underlying file descriptor */
88 	size_t		dt_shstrndx;	/* section index of .shstrtab */
89 	size_t		dt_symidx;	/* section index of symbol table */
90 	sym_entry_t	*dt_symcache;	/* last symbol looked up */
91 	sym_entry_t	*dt_symtab;	/* sorted symbol table */
92 	int		dt_symcount;	/* # of symbol table entries */
93 	struct dis_tgt	*dt_next;	/* next target (for archives) */
94 	Elf_Arhdr	*dt_arhdr;	/* archive header (for archives) */
95 	dis_shnmap_t	*dt_shnmap;	/* section address map */
96 	size_t		dt_shncount;	/* # of sections in target */
97 };
98 
99 /*
100  * Function data structure.  We resolve the symbol and lookup the associated ELF
101  * data when building this structure.  The offset is calculated based on the
102  * section's starting address.
103  */
104 struct dis_func {
105 	sym_entry_t	*df_sym;	/* symbol table reference */
106 	Elf_Data	*df_data;	/* associated ELF data */
107 	size_t		df_offset;	/* offset within data */
108 };
109 
110 /*
111  * Section data structure.  We store the entire section header so that we can
112  * determine some properties (such as whether or not it contains text) after
113  * building the structure.
114  */
115 struct dis_scn {
116 	GElf_Shdr	ds_shdr;
117 	const char	*ds_name;
118 	Elf_Data	*ds_data;
119 };
120 
121 /* Lifted from Psymtab.c, omitting STT_TLS */
122 #define	DATA_TYPES      \
123 	((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
124 #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
125 
126 /*
127  * Save the virtual address range for this section and select the
128  * best section to use as the symbol table.  We prefer SHT_SYMTAB
129  * over SHT_DYNSYM.
130  */
131 /* ARGSUSED */
132 static void
tgt_scn_init(dis_tgt_t * tgt,dis_scn_t * scn,void * data)133 tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
134 {
135 	int *index = data;
136 
137 	*index += 1;
138 
139 	tgt->dt_shnmap[*index].dm_name = scn->ds_name;
140 	tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr;
141 	tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size;
142 	tgt->dt_shnmap[*index].dm_mapped = B_FALSE;
143 
144 	/*
145 	 * Prefer SHT_SYMTAB over SHT_DYNSYM
146 	 */
147 	if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
148 		tgt->dt_symidx = *index;
149 	else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
150 		tgt->dt_symidx = *index;
151 }
152 
153 static int
sym_compare(const void * a,const void * b)154 sym_compare(const void *a, const void *b)
155 {
156 	const sym_entry_t *syma = a;
157 	const sym_entry_t *symb = b;
158 	const char *aname = syma->se_name;
159 	const char *bname = symb->se_name;
160 	size_t alen;
161 	size_t blen;
162 
163 	if (syma->se_sym.st_value < symb->se_sym.st_value)
164 		return (-1);
165 
166 	if (syma->se_sym.st_value > symb->se_sym.st_value)
167 		return (1);
168 
169 	/*
170 	 * Prefer functions over non-functions
171 	 */
172 	if (GELF_ST_TYPE(syma->se_sym.st_info) !=
173 	    GELF_ST_TYPE(symb->se_sym.st_info)) {
174 		if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
175 			return (-1);
176 		if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
177 			return (1);
178 	}
179 
180 	/*
181 	 * For symbols with the same address and type, we sort them according to
182 	 * a hierarchy:
183 	 *
184 	 *	1. weak symbols (common name)
185 	 *	2. global symbols (external name)
186 	 *	3. local symbols
187 	 */
188 	if (GELF_ST_BIND(syma->se_sym.st_info) !=
189 	    GELF_ST_BIND(symb->se_sym.st_info)) {
190 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
191 			return (-1);
192 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
193 			return (1);
194 
195 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
196 			return (-1);
197 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
198 			return (1);
199 	}
200 
201 	/*
202 	 * As a last resort, if we have multiple symbols of the same type at the
203 	 * same address, prefer the version with the fewest leading underscores.
204 	 */
205 	if (aname == NULL)
206 		return (-1);
207 	if (bname == NULL)
208 		return (1);
209 
210 	while (*aname == '_' && *bname == '_') {
211 		aname++;
212 		bname++;
213 	}
214 
215 	if (*bname == '_')
216 		return (-1);
217 	if (*aname == '_')
218 		return (1);
219 
220 	/*
221 	 * Prefer the symbol with the smaller size.
222 	 */
223 	if (syma->se_sym.st_size < symb->se_sym.st_size)
224 		return (-1);
225 	if (syma->se_sym.st_size > symb->se_sym.st_size)
226 		return (1);
227 
228 	/*
229 	 * We really do have two identical symbols, choose the one with the
230 	 * shortest name if we can, heuristically taking it to be the most
231 	 * representative.
232 	 */
233 	alen = strlen(syma->se_name);
234 	blen = strlen(symb->se_name);
235 
236 	if (alen < blen)
237 		return (-1);
238 	else if (alen > blen)
239 		return (1);
240 
241 	/*
242 	 * If all else fails, compare the names, so that we give a stable
243 	 * sort
244 	 */
245 	return (strcmp(syma->se_name, symb->se_name));
246 }
247 
248 /*
249  * Construct an optimized symbol table sorted by starting address.
250  */
251 static void
construct_symtab(dis_tgt_t * tgt)252 construct_symtab(dis_tgt_t *tgt)
253 {
254 	Elf_Scn *scn;
255 	GElf_Shdr shdr;
256 	Elf_Data *symdata;
257 	int i;
258 	GElf_Word *symshndx = NULL;
259 	int symshndx_size;
260 	sym_entry_t *sym;
261 	sym_entry_t *p_symtab = NULL;
262 	int nsym = 0; /* count of symbols we're not interested in */
263 
264 	/*
265 	 * Find the symshndx section, if any
266 	 */
267 	for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
268 	    scn = elf_nextscn(tgt->dt_elf, scn)) {
269 		if (gelf_getshdr(scn, &shdr) == NULL)
270 			break;
271 		if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
272 		    shdr.sh_link == tgt->dt_symidx) {
273 			Elf_Data	*data;
274 
275 			if ((data = elf_getdata(scn, NULL)) != NULL) {
276 				symshndx = (GElf_Word *)data->d_buf;
277 				symshndx_size = data->d_size /
278 				    sizeof (GElf_Word);
279 				break;
280 			}
281 		}
282 	}
283 
284 	if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
285 		die("%s: failed to get section information", tgt->dt_filename);
286 	if (gelf_getshdr(scn, &shdr) == NULL)
287 		die("%s: failed to get section header", tgt->dt_filename);
288 	if (shdr.sh_entsize == 0)
289 		die("%s: symbol table has zero size", tgt->dt_filename);
290 
291 	if ((symdata = elf_getdata(scn, NULL)) == NULL)
292 		die("%s: failed to get symbol table", tgt->dt_filename);
293 
294 	tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
295 	    1, EV_CURRENT);
296 
297 	p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
298 
299 	for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
300 		if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
301 			warn("%s: gelf_getsym returned NULL for %d",
302 			    tgt->dt_filename, i);
303 			nsym++;
304 			continue;
305 		}
306 
307 		/*
308 		 * We're only interested in data symbols.
309 		 */
310 		if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
311 			nsym++;
312 			continue;
313 		}
314 
315 		if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
316 			if (i > symshndx_size) {
317 				warn("%s: bad SHNX_XINDEX %d",
318 				    tgt->dt_filename, i);
319 				sym->se_shndx = -1;
320 			} else {
321 				sym->se_shndx = symshndx[i];
322 			}
323 		} else {
324 			sym->se_shndx = sym->se_sym.st_shndx;
325 		}
326 
327 		/* Deal with symbols with special section indicies */
328 		if (sym->se_shndx == SHN_ABS) {
329 			/*
330 			 * If st_value == 0, references to these
331 			 * symbols in code are modified in situ
332 			 * thus we will never attempt to look
333 			 * them up.
334 			 */
335 			if (sym->se_sym.st_value == 0) {
336 				/*
337 				 * References to these symbols in code
338 				 * are modified in situ by the runtime
339 				 * linker and no code on disk will ever
340 				 * attempt to look them up.
341 				 */
342 				nsym++;
343 				continue;
344 			} else {
345 				/*
346 				 * If st_value != 0, (such as examining
347 				 * something in /system/object/.../object)
348 				 * the values should resolve to a value
349 				 * within an existing section (such as
350 				 * .data).  This also means it never needs
351 				 * to have st_value mapped.
352 				 */
353 				sym++;
354 				continue;
355 			}
356 		}
357 
358 		/*
359 		 * Ignore the symbol if it has some other special
360 		 * section index
361 		 */
362 		if (sym->se_shndx == SHN_UNDEF ||
363 		    sym->se_shndx >= SHN_LORESERVE) {
364 			nsym++;
365 			continue;
366 		}
367 
368 		if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
369 		    (size_t)sym->se_sym.st_name)) == NULL) {
370 			warn("%s: failed to lookup symbol %d name",
371 			    tgt->dt_filename, i);
372 			nsym++;
373 			continue;
374 		}
375 
376 		/*
377 		 * If we had to map this section, its symbol value
378 		 * also needs to be mapped.
379 		 */
380 		if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
381 			sym->se_sym.st_value +=
382 			    tgt->dt_shnmap[sym->se_shndx].dm_start;
383 
384 		sym++;
385 	}
386 
387 	tgt->dt_symcount -= nsym;
388 	tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
389 	    sizeof (sym_entry_t));
390 
391 	qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
392 	    sym_compare);
393 }
394 
395 /*
396  * Assign virtual address ranges for sections that need it
397  */
398 static void
create_addrmap(dis_tgt_t * tgt)399 create_addrmap(dis_tgt_t *tgt)
400 {
401 	uint64_t addr;
402 	int i;
403 
404 	if (tgt->dt_shnmap == NULL)
405 		return;
406 
407 	/* find the greatest used address */
408 	for (addr = 0, i = 1; i < tgt->dt_shncount; i++)
409 		if (tgt->dt_shnmap[i].dm_start > addr)
410 			addr = tgt->dt_shnmap[i].dm_start +
411 			    tgt->dt_shnmap[i].dm_length;
412 
413 	addr = P2ROUNDUP(addr, 0x1000);
414 
415 	/*
416 	 * Assign section a starting address beyond the largest mapped section
417 	 * if no address was given.
418 	 */
419 	for (i = 1; i < tgt->dt_shncount; i++) {
420 		if (tgt->dt_shnmap[i].dm_start != 0)
421 			continue;
422 
423 		tgt->dt_shnmap[i].dm_start = addr;
424 		tgt->dt_shnmap[i].dm_mapped = B_TRUE;
425 		addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000);
426 	}
427 }
428 
429 /*
430  * Create a target backed by an ELF file.
431  */
432 dis_tgt_t *
dis_tgt_create(const char * file)433 dis_tgt_create(const char *file)
434 {
435 	dis_tgt_t *tgt, *current;
436 	int idx;
437 	Elf *elf;
438 	GElf_Ehdr ehdr;
439 	Elf_Arhdr *arhdr = NULL;
440 	int cmd;
441 
442 	if (elf_version(EV_CURRENT) == EV_NONE)
443 		die("libelf out of date");
444 
445 	tgt = safe_malloc(sizeof (dis_tgt_t));
446 
447 	if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
448 		warn("%s: failed opening file, reason: %s", file,
449 		    strerror(errno));
450 		free(tgt);
451 		return (NULL);
452 	}
453 
454 	if ((tgt->dt_elf_root =
455 	    elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
456 		warn("%s: invalid or corrupt ELF file", file);
457 		dis_tgt_destroy(tgt);
458 		return (NULL);
459 	}
460 
461 	current = tgt;
462 	cmd = ELF_C_READ;
463 	while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
464 		size_t shnum = 0;
465 
466 		if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
467 		    (arhdr = elf_getarhdr(elf)) == NULL) {
468 			warn("%s: malformed archive", file);
469 			dis_tgt_destroy(tgt);
470 			return (NULL);
471 		}
472 
473 		/*
474 		 * Make sure that this Elf file is sane
475 		 */
476 		if (gelf_getehdr(elf, &ehdr) == NULL) {
477 			if (arhdr != NULL) {
478 				/*
479 				 * For archives, we drive on in the face of bad
480 				 * members.  The "/" and "//" members are
481 				 * special, and should be silently ignored.
482 				 */
483 				if (strcmp(arhdr->ar_name, "/") != 0 &&
484 				    strcmp(arhdr->ar_name, "//") != 0)
485 					warn("%s[%s]: invalid file type",
486 					    file, arhdr->ar_name);
487 				cmd = elf_next(elf);
488 				(void) elf_end(elf);
489 				continue;
490 			}
491 
492 			warn("%s: invalid file type", file);
493 			dis_tgt_destroy(tgt);
494 			return (NULL);
495 		}
496 
497 		/*
498 		 * If we're seeing a new Elf object, then we have an
499 		 * archive. In this case, we create a new target, and chain it
500 		 * off the master target.  We can later iterate over these
501 		 * targets using dis_tgt_next().
502 		 */
503 		if (current->dt_elf != NULL) {
504 			dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
505 			next->dt_elf_root = tgt->dt_elf_root;
506 			next->dt_fd = -1;
507 			current->dt_next = next;
508 			current = next;
509 		}
510 		current->dt_elf = elf;
511 		current->dt_arhdr = arhdr;
512 
513 		if (elf_getshdrstrndx(elf, &current->dt_shstrndx) == -1) {
514 			warn("%s: failed to get section string table for "
515 			    "file", file);
516 			dis_tgt_destroy(tgt);
517 			return (NULL);
518 		}
519 
520 		if (elf_getshdrnum(elf, &shnum) == -1) {
521 			warn("%s: failed to get number of sections in file",
522 			    file);
523 			dis_tgt_destroy(tgt);
524 			return (NULL);
525 		}
526 
527 		current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) *
528 		    shnum);
529 		current->dt_shncount = shnum;
530 
531 		idx = 0;
532 		dis_tgt_section_iter(current, tgt_scn_init, &idx);
533 		current->dt_filename = file;
534 
535 		create_addrmap(current);
536 		if (current->dt_symidx != 0)
537 			construct_symtab(current);
538 
539 		cmd = elf_next(elf);
540 	}
541 
542 	/*
543 	 * Final sanity check.  If we had an archive with no members, then bail
544 	 * out with a nice message.
545 	 */
546 	if (tgt->dt_elf == NULL) {
547 		warn("%s: empty archive\n", file);
548 		dis_tgt_destroy(tgt);
549 		return (NULL);
550 	}
551 
552 	return (tgt);
553 }
554 
555 /*
556  * Return the filename associated with the target.
557  */
558 const char *
dis_tgt_name(dis_tgt_t * tgt)559 dis_tgt_name(dis_tgt_t *tgt)
560 {
561 	return (tgt->dt_filename);
562 }
563 
564 /*
565  * Return the archive member name, if any.
566  */
567 const char *
dis_tgt_member(dis_tgt_t * tgt)568 dis_tgt_member(dis_tgt_t *tgt)
569 {
570 	if (tgt->dt_arhdr)
571 		return (tgt->dt_arhdr->ar_name);
572 	else
573 		return (NULL);
574 }
575 
576 /*
577  * Return the Elf_Ehdr associated with this target.  Needed to determine which
578  * disassembler to use.
579  */
580 void
dis_tgt_ehdr(dis_tgt_t * tgt,GElf_Ehdr * ehdr)581 dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
582 {
583 	(void) gelf_getehdr(tgt->dt_elf, ehdr);
584 }
585 
586 /*
587  * Return the next target in the list, if this is an archive.
588  */
589 dis_tgt_t *
dis_tgt_next(dis_tgt_t * tgt)590 dis_tgt_next(dis_tgt_t *tgt)
591 {
592 	return (tgt->dt_next);
593 }
594 
595 /*
596  * Destroy a target and free up any associated memory.
597  */
598 void
dis_tgt_destroy(dis_tgt_t * tgt)599 dis_tgt_destroy(dis_tgt_t *tgt)
600 {
601 	dis_tgt_t *current, *next;
602 
603 	current = tgt->dt_next;
604 	while (current != NULL) {
605 		next = current->dt_next;
606 		if (current->dt_elf)
607 			(void) elf_end(current->dt_elf);
608 		if (current->dt_symtab)
609 			free(current->dt_symtab);
610 		free(current);
611 		current = next;
612 	}
613 
614 	if (tgt->dt_elf)
615 		(void) elf_end(tgt->dt_elf);
616 	if (tgt->dt_elf_root)
617 		(void) elf_end(tgt->dt_elf_root);
618 
619 	if (tgt->dt_symtab)
620 		free(tgt->dt_symtab);
621 
622 	free(tgt);
623 }
624 
625 /*
626  * Given an address, return the section it is in and set the offset within
627  * the section.
628  */
629 const char *
dis_find_section(dis_tgt_t * tgt,uint64_t addr,off_t * offset)630 dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset)
631 {
632 	int i;
633 
634 	for (i = 1; i < tgt->dt_shncount; i++) {
635 		if ((addr >= tgt->dt_shnmap[i].dm_start) &&
636 		    (addr < tgt->dt_shnmap[i].dm_start +
637 		    tgt->dt_shnmap[i].dm_length)) {
638 			*offset = addr - tgt->dt_shnmap[i].dm_start;
639 			return (tgt->dt_shnmap[i].dm_name);
640 		}
641 	}
642 
643 	*offset = 0;
644 	return (NULL);
645 }
646 
647 /*
648  * Given an address, returns the name of the corresponding symbol, as well as
649  * the offset within that symbol.  If no matching symbol is found, then NULL is
650  * returned.
651  *
652  * If 'cache_result' is specified, then we keep track of the resulting symbol.
653  * This cached result is consulted first on subsequent lookups in order to avoid
654  * unecessary lookups.  This flag should be used for resolving the current PC,
655  * as the majority of addresses stay within the current function.
656  */
657 const char *
dis_tgt_lookup(dis_tgt_t * tgt,uint64_t addr,off_t * offset,int cache_result,size_t * size,int * isfunc)658 dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
659     size_t *size, int *isfunc)
660 {
661 	int lo, hi, mid;
662 	sym_entry_t *sym, *osym, *match;
663 	int found;
664 
665 	*offset = 0;
666 	*size = 0;
667 	if (isfunc != NULL)
668 		*isfunc = 0;
669 
670 	if (tgt->dt_symcache != NULL &&
671 	    addr >= tgt->dt_symcache->se_sym.st_value &&
672 	    addr < tgt->dt_symcache->se_sym.st_value +
673 	    tgt->dt_symcache->se_sym.st_size) {
674 		sym = tgt->dt_symcache;
675 		*offset = addr - sym->se_sym.st_value;
676 		*size = sym->se_sym.st_size;
677 		if (isfunc != NULL)
678 			*isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) ==
679 			    STT_FUNC);
680 		return (sym->se_name);
681 	}
682 
683 	lo = 0;
684 	hi = (tgt->dt_symcount - 1);
685 	found = 0;
686 	match = osym = NULL;
687 	while (lo <= hi) {
688 		mid = (lo + hi) / 2;
689 
690 		sym = &tgt->dt_symtab[mid];
691 
692 		if (addr >= sym->se_sym.st_value &&
693 		    addr < sym->se_sym.st_value + sym->se_sym.st_size &&
694 		    (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
695 			osym = sym;
696 			found = 1;
697 		} else if (addr == sym->se_sym.st_value) {
698 			/*
699 			 * Particularly for .plt objects, it's possible to have
700 			 * a zero sized object.  We want to return this, but we
701 			 * want it to be a last resort.
702 			 */
703 			match = sym;
704 		}
705 
706 		if (addr < sym->se_sym.st_value)
707 			hi = mid - 1;
708 		else
709 			lo = mid + 1;
710 	}
711 
712 	if (!found) {
713 		if (match)
714 			osym = match;
715 		else
716 			return (NULL);
717 	}
718 
719 	/*
720 	 * Walk backwards to find the best match.
721 	 */
722 	do {
723 		sym = osym;
724 
725 		if (osym == tgt->dt_symtab)
726 			break;
727 
728 		osym = osym - 1;
729 	} while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
730 	    (addr >= osym->se_sym.st_value) &&
731 	    (addr < osym->se_sym.st_value + osym->se_sym.st_size));
732 
733 	if (cache_result)
734 		tgt->dt_symcache = sym;
735 
736 	*offset = addr - sym->se_sym.st_value;
737 	*size = sym->se_sym.st_size;
738 	if (isfunc)
739 		*isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
740 
741 	return (sym->se_name);
742 }
743 
744 /*
745  * Given an address, return the starting offset of the next symbol in the file.
746  * Only needed on variable length instruction architectures.
747  */
748 off_t
dis_tgt_next_symbol(dis_tgt_t * tgt,uint64_t addr)749 dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
750 {
751 	sym_entry_t *sym;
752 
753 	sym = (tgt->dt_symcache != NULL) ? tgt->dt_symcache : tgt->dt_symtab;
754 
755 	while (sym != (tgt->dt_symtab + tgt->dt_symcount)) {
756 		if (sym->se_sym.st_value >= addr)
757 			return (sym->se_sym.st_value - addr);
758 		sym++;
759 	}
760 
761 	return (0);
762 }
763 
764 /*
765  * Iterate over all sections in the target, executing the given callback for
766  * each.
767  */
768 void
dis_tgt_section_iter(dis_tgt_t * tgt,section_iter_f func,void * data)769 dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
770 {
771 	dis_scn_t sdata;
772 	Elf_Scn *scn;
773 	int idx;
774 
775 	for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
776 	    scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
777 
778 		if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
779 			warn("%s: failed to get section %d header",
780 			    tgt->dt_filename, idx);
781 			continue;
782 		}
783 
784 		if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
785 		    sdata.ds_shdr.sh_name)) == NULL) {
786 			warn("%s: failed to get section %d name",
787 			    tgt->dt_filename, idx);
788 			continue;
789 		}
790 
791 		if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
792 			warn("%s: failed to get data for section '%s'",
793 			    tgt->dt_filename, sdata.ds_name);
794 			continue;
795 		}
796 
797 		/*
798 		 * dis_tgt_section_iter is also used before the section map
799 		 * is initialized, so only check when we need to.  If the
800 		 * section map is uninitialized, it will return 0 and have
801 		 * no net effect.
802 		 */
803 		if (sdata.ds_shdr.sh_addr == 0)
804 			sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start;
805 
806 		func(tgt, &sdata, data);
807 	}
808 }
809 
810 /*
811  * Return 1 if the given section contains text, 0 otherwise.
812  */
813 int
dis_section_istext(dis_scn_t * scn)814 dis_section_istext(dis_scn_t *scn)
815 {
816 	return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
817 	    (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
818 }
819 
820 /*
821  * Return a pointer to the section data.
822  */
823 void *
dis_section_data(dis_scn_t * scn)824 dis_section_data(dis_scn_t *scn)
825 {
826 	return (scn->ds_data->d_buf);
827 }
828 
829 /*
830  * Return the size of the section data.
831  */
832 size_t
dis_section_size(dis_scn_t * scn)833 dis_section_size(dis_scn_t *scn)
834 {
835 	return (scn->ds_data->d_size);
836 }
837 
838 /*
839  * Return the address for the given section.
840  */
841 uint64_t
dis_section_addr(dis_scn_t * scn)842 dis_section_addr(dis_scn_t *scn)
843 {
844 	return (scn->ds_shdr.sh_addr);
845 }
846 
847 /*
848  * Return the name of the current section.
849  */
850 const char *
dis_section_name(dis_scn_t * scn)851 dis_section_name(dis_scn_t *scn)
852 {
853 	return (scn->ds_name);
854 }
855 
856 /*
857  * Create an allocated copy of the given section
858  */
859 dis_scn_t *
dis_section_copy(dis_scn_t * scn)860 dis_section_copy(dis_scn_t *scn)
861 {
862 	dis_scn_t *new;
863 
864 	new = safe_malloc(sizeof (dis_scn_t));
865 	(void) memcpy(new, scn, sizeof (dis_scn_t));
866 
867 	return (new);
868 }
869 
870 /*
871  * Free section memory
872  */
873 void
dis_section_free(dis_scn_t * scn)874 dis_section_free(dis_scn_t *scn)
875 {
876 	free(scn);
877 }
878 
879 /*
880  * Iterate over all functions in the target, executing the given callback for
881  * each one.
882  */
883 void
dis_tgt_function_iter(dis_tgt_t * tgt,function_iter_f func,void * data)884 dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
885 {
886 	int i;
887 	sym_entry_t *sym;
888 	dis_func_t df;
889 	Elf_Scn *scn;
890 	GElf_Shdr	shdr;
891 
892 	for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
893 
894 		/* ignore non-functions */
895 		if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
896 		    (sym->se_name == NULL) ||
897 		    (sym->se_sym.st_size == 0) ||
898 		    (sym->se_shndx >= SHN_LORESERVE))
899 			continue;
900 
901 		/* get the ELF data associated with this function */
902 		if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
903 		    gelf_getshdr(scn, &shdr) == NULL ||
904 		    (df.df_data = elf_getdata(scn, NULL)) == NULL ||
905 		    df.df_data->d_size == 0) {
906 			warn("%s: failed to read section %d",
907 			    tgt->dt_filename, sym->se_shndx);
908 			continue;
909 		}
910 
911 		if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
912 			shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start;
913 
914 		/*
915 		 * Verify that the address lies within the section that we think
916 		 * it does.
917 		 */
918 		if (sym->se_sym.st_value < shdr.sh_addr ||
919 		    (sym->se_sym.st_value + sym->se_sym.st_size) >
920 		    (shdr.sh_addr + shdr.sh_size)) {
921 			warn("%s: bad section %d for address %p",
922 			    tgt->dt_filename, sym->se_sym.st_shndx,
923 			    sym->se_sym.st_value);
924 			continue;
925 		}
926 
927 		df.df_sym = sym;
928 		df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
929 
930 		func(tgt, &df, data);
931 	}
932 }
933 
934 /*
935  * Return the data associated with a given function.
936  */
937 void *
dis_function_data(dis_func_t * func)938 dis_function_data(dis_func_t *func)
939 {
940 	return ((char *)func->df_data->d_buf + func->df_offset);
941 }
942 
943 /*
944  * Return the size of a function.
945  */
946 size_t
dis_function_size(dis_func_t * func)947 dis_function_size(dis_func_t *func)
948 {
949 	return (func->df_sym->se_sym.st_size);
950 }
951 
952 /*
953  * Return the address of a function.
954  */
955 uint64_t
dis_function_addr(dis_func_t * func)956 dis_function_addr(dis_func_t *func)
957 {
958 	return (func->df_sym->se_sym.st_value);
959 }
960 
961 /*
962  * Return the name of the function
963  */
964 const char *
dis_function_name(dis_func_t * func)965 dis_function_name(dis_func_t *func)
966 {
967 	return (func->df_sym->se_name);
968 }
969 
970 /*
971  * Return a copy of a function.
972  */
973 dis_func_t *
dis_function_copy(dis_func_t * func)974 dis_function_copy(dis_func_t *func)
975 {
976 	dis_func_t *new;
977 
978 	new = safe_malloc(sizeof (dis_func_t));
979 	(void) memcpy(new, func, sizeof (dis_func_t));
980 
981 	return (new);
982 }
983 
984 /*
985  * Free function memory
986  */
987 void
dis_function_free(dis_func_t * func)988 dis_function_free(dis_func_t *func)
989 {
990 	free(func);
991 }
992