xref: /titanic_50/usr/src/cmd/dis/dis_target.c (revision f7184619589931c4b827180c213074c470f08a8f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  *
25  * Copyright 2011 Jason King.  All rights reserved.
26  */
27 
28 #include <assert.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <gelf.h>
32 #include <libelf.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include <sys/fcntl.h>
38 #include <sys/stat.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 
42 #include "dis_target.h"
43 #include "dis_util.h"
44 
45 /*
46  * Standard ELF disassembler target.
47  *
48  * We only support disassembly of ELF files, though this target interface could
49  * be extended in the future.  Each basic type (target, func, section) contains
50  * enough information to uniquely identify the location within the file.  The
51  * interfaces use libelf(3LIB) to do the actual processing of the file.
52  */
53 
54 /*
55  * Symbol table entry type.  We maintain our own symbol table sorted by address,
56  * with the symbol name already resolved against the ELF symbol table.
57  */
58 typedef struct sym_entry {
59 	GElf_Sym	se_sym;		/* value of symbol */
60 	char		*se_name;	/* name of symbol */
61 	int		se_shndx;	/* section where symbol is located */
62 } sym_entry_t;
63 
64 /*
65  * Create a map of the virtual address ranges of every section.  This will
66  * allow us to create dummpy mappings for unassigned addresses.  Otherwise
67  * multiple sections with unassigned addresses will appear to overlap and
68  * mess up symbol resolution (which uses the virtual address).
69  */
70 typedef struct dis_shnmap {
71 	const char 	*dm_name;	/* name of section */
72 	uint64_t	dm_start;	/* virtual address of section */
73 	size_t		dm_length;	/* address length */
74 	boolean_t	dm_mapped;	/* did we assign the mapping */
75 } dis_shnmap_t;
76 
77 /*
78  * Target data structure.  This structure keeps track of the ELF file
79  * information, a few bits of pre-processed section index information, and
80  * sorted versions of the symbol table.  We also keep track of the last symbol
81  * looked up, as the majority of lookups remain within the same symbol.
82  */
83 struct dis_tgt {
84 	Elf		*dt_elf;	/* libelf handle */
85 	Elf		*dt_elf_root;	/* main libelf handle (for archives) */
86 	const char	*dt_filename;	/* name of file */
87 	int		dt_fd;		/* underlying file descriptor */
88 	size_t		dt_shstrndx;	/* section index of .shstrtab */
89 	size_t		dt_symidx;	/* section index of symbol table */
90 	sym_entry_t	*dt_symcache;	/* last symbol looked up */
91 	sym_entry_t	*dt_symtab;	/* sorted symbol table */
92 	int		dt_symcount;	/* # of symbol table entries */
93 	struct dis_tgt	*dt_next;	/* next target (for archives) */
94 	Elf_Arhdr	*dt_arhdr;	/* archive header (for archives) */
95 	dis_shnmap_t	*dt_shnmap;	/* section address map */
96 	size_t		dt_shncount;	/* # of sections in target */
97 };
98 
99 /*
100  * Function data structure.  We resolve the symbol and lookup the associated ELF
101  * data when building this structure.  The offset is calculated based on the
102  * section's starting address.
103  */
104 struct dis_func {
105 	sym_entry_t	*df_sym;	/* symbol table reference */
106 	Elf_Data	*df_data;	/* associated ELF data */
107 	size_t		df_offset;	/* offset within data */
108 };
109 
110 /*
111  * Section data structure.  We store the entire section header so that we can
112  * determine some properties (such as whether or not it contains text) after
113  * building the structure.
114  */
115 struct dis_scn {
116 	GElf_Shdr	ds_shdr;
117 	const char	*ds_name;
118 	Elf_Data	*ds_data;
119 };
120 
121 /* Lifted from Psymtab.c, omitting STT_TLS */
122 #define	DATA_TYPES      \
123 	((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
124 #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
125 
126 /*
127  * Save the virtual address range for this section and select the
128  * best section to use as the symbol table.  We prefer SHT_SYMTAB
129  * over SHT_DYNSYM.
130  */
131 /* ARGSUSED */
132 static void
tgt_scn_init(dis_tgt_t * tgt,dis_scn_t * scn,void * data)133 tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
134 {
135 	int *index = data;
136 
137 	*index += 1;
138 
139 	tgt->dt_shnmap[*index].dm_name = scn->ds_name;
140 	tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr;
141 	tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size;
142 	tgt->dt_shnmap[*index].dm_mapped = B_FALSE;
143 
144 	/*
145 	 * Prefer SHT_SYMTAB over SHT_DYNSYM
146 	 */
147 	if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
148 		tgt->dt_symidx = *index;
149 	else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
150 		tgt->dt_symidx = *index;
151 }
152 
153 static int
sym_compare(const void * a,const void * b)154 sym_compare(const void *a, const void *b)
155 {
156 	const sym_entry_t *syma = a;
157 	const sym_entry_t *symb = b;
158 	const char *aname = syma->se_name;
159 	const char *bname = symb->se_name;
160 
161 	if (syma->se_sym.st_value < symb->se_sym.st_value)
162 		return (-1);
163 
164 	if (syma->se_sym.st_value > symb->se_sym.st_value)
165 		return (1);
166 
167 	/*
168 	 * Prefer functions over non-functions
169 	 */
170 	if (GELF_ST_TYPE(syma->se_sym.st_info) !=
171 	    GELF_ST_TYPE(symb->se_sym.st_info)) {
172 		if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
173 			return (-1);
174 		if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
175 			return (1);
176 	}
177 
178 	/*
179 	 * For symbols with the same address and type, we sort them according to
180 	 * a hierarchy:
181 	 *
182 	 * 	1. weak symbols (common name)
183 	 * 	2. global symbols (external name)
184 	 * 	3. local symbols
185 	 */
186 	if (GELF_ST_BIND(syma->se_sym.st_info) !=
187 	    GELF_ST_BIND(symb->se_sym.st_info)) {
188 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
189 			return (-1);
190 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
191 			return (1);
192 
193 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
194 			return (-1);
195 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
196 			return (1);
197 	}
198 
199 	/*
200 	 * As a last resort, if we have multiple symbols of the same type at the
201 	 * same address, prefer the version with the fewest leading underscores.
202 	 */
203 	if (aname == NULL)
204 		return (-1);
205 	if (bname == NULL)
206 		return (1);
207 
208 	while (*aname == '_' && *bname == '_') {
209 		aname++;
210 		bname++;
211 	}
212 
213 	if (*bname == '_')
214 		return (-1);
215 	if (*aname == '_')
216 		return (1);
217 
218 	/*
219 	 * Prefer the symbol with the smaller size.
220 	 */
221 	if (syma->se_sym.st_size < symb->se_sym.st_size)
222 		return (-1);
223 	if (syma->se_sym.st_size > symb->se_sym.st_size)
224 		return (1);
225 
226 	/*
227 	 * We really do have two identical symbols for some reason.  Just report
228 	 * them as equal, and to the lucky one go the spoils.
229 	 */
230 	return (0);
231 }
232 
233 /*
234  * Construct an optimized symbol table sorted by starting address.
235  */
236 static void
construct_symtab(dis_tgt_t * tgt)237 construct_symtab(dis_tgt_t *tgt)
238 {
239 	Elf_Scn *scn;
240 	GElf_Shdr shdr;
241 	Elf_Data *symdata;
242 	int i;
243 	GElf_Word *symshndx = NULL;
244 	int symshndx_size;
245 	sym_entry_t *sym;
246 	sym_entry_t *p_symtab = NULL;
247 	int nsym = 0; /* count of symbols we're not interested in */
248 
249 	/*
250 	 * Find the symshndx section, if any
251 	 */
252 	for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
253 	    scn = elf_nextscn(tgt->dt_elf, scn)) {
254 		if (gelf_getshdr(scn, &shdr) == NULL)
255 			break;
256 		if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
257 		    shdr.sh_link == tgt->dt_symidx) {
258 			Elf_Data	*data;
259 
260 			if ((data = elf_getdata(scn, NULL)) != NULL) {
261 				symshndx = (GElf_Word *)data->d_buf;
262 				symshndx_size = data->d_size /
263 				    sizeof (GElf_Word);
264 				break;
265 			}
266 		}
267 	}
268 
269 	if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
270 		die("%s: failed to get section information", tgt->dt_filename);
271 	if (gelf_getshdr(scn, &shdr) == NULL)
272 		die("%s: failed to get section header", tgt->dt_filename);
273 	if (shdr.sh_entsize == 0)
274 		die("%s: symbol table has zero size", tgt->dt_filename);
275 
276 	if ((symdata = elf_getdata(scn, NULL)) == NULL)
277 		die("%s: failed to get symbol table", tgt->dt_filename);
278 
279 	tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
280 	    1, EV_CURRENT);
281 
282 	p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
283 
284 	for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
285 		if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
286 			warn("%s: gelf_getsym returned NULL for %d",
287 			    tgt->dt_filename, i);
288 			nsym++;
289 			continue;
290 		}
291 
292 		/*
293 		 * We're only interested in data symbols.
294 		 */
295 		if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
296 			nsym++;
297 			continue;
298 		}
299 
300 		if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
301 			if (i > symshndx_size) {
302 				warn("%s: bad SHNX_XINDEX %d",
303 				    tgt->dt_filename, i);
304 				sym->se_shndx = -1;
305 			} else {
306 				sym->se_shndx = symshndx[i];
307 			}
308 		} else {
309 			sym->se_shndx = sym->se_sym.st_shndx;
310 		}
311 
312 		/* Deal with symbols with special section indicies */
313 		if (sym->se_shndx == SHN_ABS) {
314 			/*
315 			 * If st_value == 0, references to these
316 			 * symbols in code are modified in situ
317 			 * thus we will never attempt to look
318 			 * them up.
319 			 */
320 			if (sym->se_sym.st_value == 0) {
321 				/*
322 				 * References to these symbols in code
323 				 * are modified in situ by the runtime
324 				 * linker and no code on disk will ever
325 				 * attempt to look them up.
326 				 */
327 				nsym++;
328 				continue;
329 			} else {
330 				/*
331 				 * If st_value != 0, (such as examining
332 				 * something in /system/object/.../object)
333 				 * the values should resolve to a value
334 				 * within an existing section (such as
335 				 * .data).  This also means it never needs
336 				 * to have st_value mapped.
337 				 */
338 				sym++;
339 				continue;
340 			}
341 		}
342 
343 		/*
344 		 * Ignore the symbol if it has some other special
345 		 * section index
346 		 */
347 		if (sym->se_shndx == SHN_UNDEF ||
348 		    sym->se_shndx >= SHN_LORESERVE) {
349 			nsym++;
350 			continue;
351 		}
352 
353 		if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
354 		    (size_t)sym->se_sym.st_name)) == NULL) {
355 			warn("%s: failed to lookup symbol %d name",
356 			    tgt->dt_filename, i);
357 			nsym++;
358 			continue;
359 		}
360 
361 		/*
362 		 * If we had to map this section, its symbol value
363 		 * also needs to be mapped.
364 		 */
365 		if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
366 			sym->se_sym.st_value +=
367 			    tgt->dt_shnmap[sym->se_shndx].dm_start;
368 
369 		sym++;
370 	}
371 
372 	tgt->dt_symcount -= nsym;
373 	tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
374 	    sizeof (sym_entry_t));
375 
376 	qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
377 	    sym_compare);
378 }
379 
380 /*
381  * Assign virtual address ranges for sections that need it
382  */
383 static void
create_addrmap(dis_tgt_t * tgt)384 create_addrmap(dis_tgt_t *tgt)
385 {
386 	uint64_t addr;
387 	int i;
388 
389 	if (tgt->dt_shnmap == NULL)
390 		return;
391 
392 	/* find the greatest used address */
393 	for (addr = 0, i = 1; i < tgt->dt_shncount; i++)
394 		if (tgt->dt_shnmap[i].dm_start > addr)
395 			addr = tgt->dt_shnmap[i].dm_start +
396 			    tgt->dt_shnmap[i].dm_length;
397 
398 	addr = P2ROUNDUP(addr, 0x1000);
399 
400 	/*
401 	 * Assign section a starting address beyond the largest mapped section
402 	 * if no address was given.
403 	 */
404 	for (i = 1; i < tgt->dt_shncount; i++) {
405 		if (tgt->dt_shnmap[i].dm_start != 0)
406 			continue;
407 
408 		tgt->dt_shnmap[i].dm_start = addr;
409 		tgt->dt_shnmap[i].dm_mapped = B_TRUE;
410 		addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000);
411 	}
412 }
413 
414 /*
415  * Create a target backed by an ELF file.
416  */
417 dis_tgt_t *
dis_tgt_create(const char * file)418 dis_tgt_create(const char *file)
419 {
420 	dis_tgt_t *tgt, *current;
421 	int idx;
422 	Elf *elf;
423 	GElf_Ehdr ehdr;
424 	Elf_Arhdr *arhdr = NULL;
425 	int cmd;
426 
427 	if (elf_version(EV_CURRENT) == EV_NONE)
428 		die("libelf(3ELF) out of date");
429 
430 	tgt = safe_malloc(sizeof (dis_tgt_t));
431 
432 	if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
433 		warn("%s: failed opening file, reason: %s", file,
434 		    strerror(errno));
435 		free(tgt);
436 		return (NULL);
437 	}
438 
439 	if ((tgt->dt_elf_root =
440 	    elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
441 		warn("%s: invalid or corrupt ELF file", file);
442 		dis_tgt_destroy(tgt);
443 		return (NULL);
444 	}
445 
446 	current = tgt;
447 	cmd = ELF_C_READ;
448 	while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
449 		size_t shnum = 0;
450 
451 		if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
452 		    (arhdr = elf_getarhdr(elf)) == NULL) {
453 			warn("%s: malformed archive", file);
454 			dis_tgt_destroy(tgt);
455 			return (NULL);
456 		}
457 
458 		/*
459 		 * Make sure that this Elf file is sane
460 		 */
461 		if (gelf_getehdr(elf, &ehdr) == NULL) {
462 			if (arhdr != NULL) {
463 				/*
464 				 * For archives, we drive on in the face of bad
465 				 * members.  The "/" and "//" members are
466 				 * special, and should be silently ignored.
467 				 */
468 				if (strcmp(arhdr->ar_name, "/") != 0 &&
469 				    strcmp(arhdr->ar_name, "//") != 0)
470 					warn("%s[%s]: invalid file type",
471 					    file, arhdr->ar_name);
472 				cmd = elf_next(elf);
473 				(void) elf_end(elf);
474 				continue;
475 			}
476 
477 			warn("%s: invalid file type", file);
478 			dis_tgt_destroy(tgt);
479 			return (NULL);
480 		}
481 
482 		/*
483 		 * If we're seeing a new Elf object, then we have an
484 		 * archive. In this case, we create a new target, and chain it
485 		 * off the master target.  We can later iterate over these
486 		 * targets using dis_tgt_next().
487 		 */
488 		if (current->dt_elf != NULL) {
489 			dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
490 			next->dt_elf_root = tgt->dt_elf_root;
491 			next->dt_fd = -1;
492 			current->dt_next = next;
493 			current = next;
494 		}
495 		current->dt_elf = elf;
496 		current->dt_arhdr = arhdr;
497 
498 		if (elf_getshdrstrndx(elf, &current->dt_shstrndx) == -1) {
499 			warn("%s: failed to get section string table for "
500 			    "file", file);
501 			dis_tgt_destroy(tgt);
502 			return (NULL);
503 		}
504 
505 		if (elf_getshdrnum(elf, &shnum) == -1) {
506 			warn("%s: failed to get number of sections in file",
507 			    file);
508 			dis_tgt_destroy(tgt);
509 			return (NULL);
510 		}
511 
512 		current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) *
513 		    shnum);
514 		current->dt_shncount = shnum;
515 
516 		idx = 0;
517 		dis_tgt_section_iter(current, tgt_scn_init, &idx);
518 		current->dt_filename = file;
519 
520 		create_addrmap(current);
521 		if (current->dt_symidx != 0)
522 			construct_symtab(current);
523 
524 		cmd = elf_next(elf);
525 	}
526 
527 	/*
528 	 * Final sanity check.  If we had an archive with no members, then bail
529 	 * out with a nice message.
530 	 */
531 	if (tgt->dt_elf == NULL) {
532 		warn("%s: empty archive\n", file);
533 		dis_tgt_destroy(tgt);
534 		return (NULL);
535 	}
536 
537 	return (tgt);
538 }
539 
540 /*
541  * Return the filename associated with the target.
542  */
543 const char *
dis_tgt_name(dis_tgt_t * tgt)544 dis_tgt_name(dis_tgt_t *tgt)
545 {
546 	return (tgt->dt_filename);
547 }
548 
549 /*
550  * Return the archive member name, if any.
551  */
552 const char *
dis_tgt_member(dis_tgt_t * tgt)553 dis_tgt_member(dis_tgt_t *tgt)
554 {
555 	if (tgt->dt_arhdr)
556 		return (tgt->dt_arhdr->ar_name);
557 	else
558 		return (NULL);
559 }
560 
561 /*
562  * Return the Elf_Ehdr associated with this target.  Needed to determine which
563  * disassembler to use.
564  */
565 void
dis_tgt_ehdr(dis_tgt_t * tgt,GElf_Ehdr * ehdr)566 dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
567 {
568 	(void) gelf_getehdr(tgt->dt_elf, ehdr);
569 }
570 
571 /*
572  * Return the next target in the list, if this is an archive.
573  */
574 dis_tgt_t *
dis_tgt_next(dis_tgt_t * tgt)575 dis_tgt_next(dis_tgt_t *tgt)
576 {
577 	return (tgt->dt_next);
578 }
579 
580 /*
581  * Destroy a target and free up any associated memory.
582  */
583 void
dis_tgt_destroy(dis_tgt_t * tgt)584 dis_tgt_destroy(dis_tgt_t *tgt)
585 {
586 	dis_tgt_t *current, *next;
587 
588 	current = tgt->dt_next;
589 	while (current != NULL) {
590 		next = current->dt_next;
591 		if (current->dt_elf)
592 			(void) elf_end(current->dt_elf);
593 		if (current->dt_symtab)
594 			free(current->dt_symtab);
595 		free(current);
596 		current = next;
597 	}
598 
599 	if (tgt->dt_elf)
600 		(void) elf_end(tgt->dt_elf);
601 	if (tgt->dt_elf_root)
602 		(void) elf_end(tgt->dt_elf_root);
603 
604 	if (tgt->dt_symtab)
605 		free(tgt->dt_symtab);
606 
607 	free(tgt);
608 }
609 
610 /*
611  * Given an address, return the section it is in and set the offset within
612  * the section.
613  */
614 const char *
dis_find_section(dis_tgt_t * tgt,uint64_t addr,off_t * offset)615 dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset)
616 {
617 	int i;
618 
619 	for (i = 1; i < tgt->dt_shncount; i++) {
620 		if ((addr >= tgt->dt_shnmap[i].dm_start) &&
621 		    (addr < tgt->dt_shnmap[i].dm_start +
622 		    tgt->dt_shnmap[i].dm_length)) {
623 			*offset = addr - tgt->dt_shnmap[i].dm_start;
624 			return (tgt->dt_shnmap[i].dm_name);
625 		}
626 	}
627 
628 	*offset = 0;
629 	return (NULL);
630 }
631 
632 /*
633  * Given an address, returns the name of the corresponding symbol, as well as
634  * the offset within that symbol.  If no matching symbol is found, then NULL is
635  * returned.
636  *
637  * If 'cache_result' is specified, then we keep track of the resulting symbol.
638  * This cached result is consulted first on subsequent lookups in order to avoid
639  * unecessary lookups.  This flag should be used for resolving the current PC,
640  * as the majority of addresses stay within the current function.
641  */
642 const char *
dis_tgt_lookup(dis_tgt_t * tgt,uint64_t addr,off_t * offset,int cache_result,size_t * size,int * isfunc)643 dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
644     size_t *size, int *isfunc)
645 {
646 	int lo, hi, mid;
647 	sym_entry_t *sym, *osym, *match;
648 	int found;
649 
650 	*offset = 0;
651 	*size = 0;
652 	if (isfunc != NULL)
653 		*isfunc = 0;
654 
655 	if (tgt->dt_symcache != NULL &&
656 	    addr >= tgt->dt_symcache->se_sym.st_value &&
657 	    addr < tgt->dt_symcache->se_sym.st_value +
658 	    tgt->dt_symcache->se_sym.st_size) {
659 		sym = tgt->dt_symcache;
660 		*offset = addr - sym->se_sym.st_value;
661 		*size = sym->se_sym.st_size;
662 		if (isfunc != NULL)
663 			*isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) ==
664 			    STT_FUNC);
665 		return (sym->se_name);
666 	}
667 
668 	lo = 0;
669 	hi = (tgt->dt_symcount - 1);
670 	found = 0;
671 	match = osym = NULL;
672 	while (lo <= hi) {
673 		mid = (lo + hi) / 2;
674 
675 		sym = &tgt->dt_symtab[mid];
676 
677 		if (addr >= sym->se_sym.st_value &&
678 		    addr < sym->se_sym.st_value + sym->se_sym.st_size &&
679 		    (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
680 			osym = sym;
681 			found = 1;
682 		} else if (addr == sym->se_sym.st_value) {
683 			/*
684 			 * Particularly for .plt objects, it's possible to have
685 			 * a zero sized object.  We want to return this, but we
686 			 * want it to be a last resort.
687 			 */
688 			match = sym;
689 		}
690 
691 		if (addr < sym->se_sym.st_value)
692 			hi = mid - 1;
693 		else
694 			lo = mid + 1;
695 	}
696 
697 	if (!found) {
698 		if (match)
699 			osym = match;
700 		else
701 			return (NULL);
702 	}
703 
704 	/*
705 	 * Walk backwards to find the best match.
706 	 */
707 	do {
708 		sym = osym;
709 
710 		if (osym == tgt->dt_symtab)
711 			break;
712 
713 		osym = osym - 1;
714 	} while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
715 	    (addr >= osym->se_sym.st_value) &&
716 	    (addr < osym->se_sym.st_value + osym->se_sym.st_size));
717 
718 	if (cache_result)
719 		tgt->dt_symcache = sym;
720 
721 	*offset = addr - sym->se_sym.st_value;
722 	*size = sym->se_sym.st_size;
723 	if (isfunc)
724 		*isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
725 
726 	return (sym->se_name);
727 }
728 
729 /*
730  * Given an address, return the starting offset of the next symbol in the file.
731  * Only needed on variable length instruction architectures.
732  */
733 off_t
dis_tgt_next_symbol(dis_tgt_t * tgt,uint64_t addr)734 dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
735 {
736 	sym_entry_t *sym;
737 
738 	sym = (tgt->dt_symcache != NULL) ? tgt->dt_symcache : tgt->dt_symtab;
739 
740 	while (sym != (tgt->dt_symtab + tgt->dt_symcount)) {
741 		if (sym->se_sym.st_value >= addr)
742 			return (sym->se_sym.st_value - addr);
743 		sym++;
744 	}
745 
746 	return (0);
747 }
748 
749 /*
750  * Iterate over all sections in the target, executing the given callback for
751  * each.
752  */
753 void
dis_tgt_section_iter(dis_tgt_t * tgt,section_iter_f func,void * data)754 dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
755 {
756 	dis_scn_t sdata;
757 	Elf_Scn *scn;
758 	int idx;
759 
760 	for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
761 	    scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
762 
763 		if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
764 			warn("%s: failed to get section %d header",
765 			    tgt->dt_filename, idx);
766 			continue;
767 		}
768 
769 		if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
770 		    sdata.ds_shdr.sh_name)) == NULL) {
771 			warn("%s: failed to get section %d name",
772 			    tgt->dt_filename, idx);
773 			continue;
774 		}
775 
776 		if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
777 			warn("%s: failed to get data for section '%s'",
778 			    tgt->dt_filename, sdata.ds_name);
779 			continue;
780 		}
781 
782 		/*
783 		 * dis_tgt_section_iter is also used before the section map
784 		 * is initialized, so only check when we need to.  If the
785 		 * section map is uninitialized, it will return 0 and have
786 		 * no net effect.
787 		 */
788 		if (sdata.ds_shdr.sh_addr == 0)
789 			sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start;
790 
791 		func(tgt, &sdata, data);
792 	}
793 }
794 
795 /*
796  * Return 1 if the given section contains text, 0 otherwise.
797  */
798 int
dis_section_istext(dis_scn_t * scn)799 dis_section_istext(dis_scn_t *scn)
800 {
801 	return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
802 	    (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
803 }
804 
805 /*
806  * Return a pointer to the section data.
807  */
808 void *
dis_section_data(dis_scn_t * scn)809 dis_section_data(dis_scn_t *scn)
810 {
811 	return (scn->ds_data->d_buf);
812 }
813 
814 /*
815  * Return the size of the section data.
816  */
817 size_t
dis_section_size(dis_scn_t * scn)818 dis_section_size(dis_scn_t *scn)
819 {
820 	return (scn->ds_data->d_size);
821 }
822 
823 /*
824  * Return the address for the given section.
825  */
826 uint64_t
dis_section_addr(dis_scn_t * scn)827 dis_section_addr(dis_scn_t *scn)
828 {
829 	return (scn->ds_shdr.sh_addr);
830 }
831 
832 /*
833  * Return the name of the current section.
834  */
835 const char *
dis_section_name(dis_scn_t * scn)836 dis_section_name(dis_scn_t *scn)
837 {
838 	return (scn->ds_name);
839 }
840 
841 /*
842  * Create an allocated copy of the given section
843  */
844 dis_scn_t *
dis_section_copy(dis_scn_t * scn)845 dis_section_copy(dis_scn_t *scn)
846 {
847 	dis_scn_t *new;
848 
849 	new = safe_malloc(sizeof (dis_scn_t));
850 	(void) memcpy(new, scn, sizeof (dis_scn_t));
851 
852 	return (new);
853 }
854 
855 /*
856  * Free section memory
857  */
858 void
dis_section_free(dis_scn_t * scn)859 dis_section_free(dis_scn_t *scn)
860 {
861 	free(scn);
862 }
863 
864 /*
865  * Iterate over all functions in the target, executing the given callback for
866  * each one.
867  */
868 void
dis_tgt_function_iter(dis_tgt_t * tgt,function_iter_f func,void * data)869 dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
870 {
871 	int i;
872 	sym_entry_t *sym;
873 	dis_func_t df;
874 	Elf_Scn *scn;
875 	GElf_Shdr	shdr;
876 
877 	for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
878 
879 		/* ignore non-functions */
880 		if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
881 		    (sym->se_name == NULL) ||
882 		    (sym->se_sym.st_size == 0) ||
883 		    (sym->se_shndx >= SHN_LORESERVE))
884 			continue;
885 
886 		/* get the ELF data associated with this function */
887 		if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
888 		    gelf_getshdr(scn, &shdr) == NULL ||
889 		    (df.df_data = elf_getdata(scn, NULL)) == NULL ||
890 		    df.df_data->d_size == 0) {
891 			warn("%s: failed to read section %d",
892 			    tgt->dt_filename, sym->se_shndx);
893 			continue;
894 		}
895 
896 		if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
897 			shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start;
898 
899 		/*
900 		 * Verify that the address lies within the section that we think
901 		 * it does.
902 		 */
903 		if (sym->se_sym.st_value < shdr.sh_addr ||
904 		    (sym->se_sym.st_value + sym->se_sym.st_size) >
905 		    (shdr.sh_addr + shdr.sh_size)) {
906 			warn("%s: bad section %d for address %p",
907 			    tgt->dt_filename, sym->se_sym.st_shndx,
908 			    sym->se_sym.st_value);
909 			continue;
910 		}
911 
912 		df.df_sym = sym;
913 		df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
914 
915 		func(tgt, &df, data);
916 	}
917 }
918 
919 /*
920  * Return the data associated with a given function.
921  */
922 void *
dis_function_data(dis_func_t * func)923 dis_function_data(dis_func_t *func)
924 {
925 	return ((char *)func->df_data->d_buf + func->df_offset);
926 }
927 
928 /*
929  * Return the size of a function.
930  */
931 size_t
dis_function_size(dis_func_t * func)932 dis_function_size(dis_func_t *func)
933 {
934 	return (func->df_sym->se_sym.st_size);
935 }
936 
937 /*
938  * Return the address of a function.
939  */
940 uint64_t
dis_function_addr(dis_func_t * func)941 dis_function_addr(dis_func_t *func)
942 {
943 	return (func->df_sym->se_sym.st_value);
944 }
945 
946 /*
947  * Return the name of the function
948  */
949 const char *
dis_function_name(dis_func_t * func)950 dis_function_name(dis_func_t *func)
951 {
952 	return (func->df_sym->se_name);
953 }
954 
955 /*
956  * Return a copy of a function.
957  */
958 dis_func_t *
dis_function_copy(dis_func_t * func)959 dis_function_copy(dis_func_t *func)
960 {
961 	dis_func_t *new;
962 
963 	new = safe_malloc(sizeof (dis_func_t));
964 	(void) memcpy(new, func, sizeof (dis_func_t));
965 
966 	return (new);
967 }
968 
969 /*
970  * Free function memory
971  */
972 void
dis_function_free(dis_func_t * func)973 dis_function_free(dis_func_t *func)
974 {
975 	free(func);
976 }
977