xref: /illumos-gate/usr/src/cmd/dis/dis_main.c (revision 2bbdd445a21f9d61f4a0ca0faf05d5ceb2bd91f3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2011 Jason King.  All rights reserved.
27  * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
28  */
29 
30 #include <ctype.h>
31 #include <getopt.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/sysmacros.h>
36 #include <sys/elf_SPARC.h>
37 
38 #include <libdisasm.h>
39 
40 #include "dis_target.h"
41 #include "dis_util.h"
42 #include "dis_list.h"
43 
44 int g_demangle;		/* Demangle C++ names */
45 int g_quiet;		/* Quiet mode */
46 int g_numeric;		/* Numeric mode */
47 int g_flags;		/* libdisasm language flags */
48 int g_doall;		/* true if no functions or sections were given */
49 
50 dis_namelist_t *g_funclist;	/* list of functions to disassemble, if any */
51 dis_namelist_t *g_seclist;	/* list of sections to disassemble, if any */
52 
53 /*
54  * Section options for -d, -D, and -s
55  */
56 #define	DIS_DATA_RELATIVE	1
57 #define	DIS_DATA_ABSOLUTE	2
58 #define	DIS_TEXT		3
59 
60 /*
61  * libdisasm callback data.  Keeps track of current data (function or section)
62  * and offset within that data.
63  */
64 typedef struct dis_buffer {
65 	dis_tgt_t	*db_tgt;	/* current dis target */
66 	void		*db_data;	/* function or section data */
67 	uint64_t	db_addr;	/* address of function start */
68 	size_t		db_size;	/* size of data */
69 	uint64_t	db_nextaddr;	/* next address to be read */
70 } dis_buffer_t;
71 
72 #define	MINSYMWIDTH	22	/* Minimum width of symbol portion of line */
73 
74 /*
75  * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
76  * formatted symbol, based on the offset and current setttings.
77  */
78 void
79 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
80     size_t buflen)
81 {
82 	if (symbol == NULL || g_numeric) {
83 		if (g_flags & DIS_OCTAL)
84 			(void) snprintf(buf, buflen, "0%llo", addr);
85 		else
86 			(void) snprintf(buf, buflen, "0x%llx", addr);
87 	} else {
88 		if (g_demangle)
89 			symbol = dis_demangle(symbol);
90 
91 		if (offset == 0)
92 			(void) snprintf(buf, buflen, "%s", symbol);
93 		else if (g_flags & DIS_OCTAL)
94 			(void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
95 		else
96 			(void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
97 	}
98 }
99 
100 /*
101  * Determine if we are on an architecture with fixed-size instructions,
102  * and if so, what size they are.
103  */
104 static int
105 insn_size(dis_handle_t *dhp)
106 {
107 	int min = dis_min_instrlen(dhp);
108 	int max = dis_max_instrlen(dhp);
109 
110 	if (min == max)
111 		return (min);
112 
113 	return (0);
114 }
115 
116 /*
117  * The main disassembly routine.  Given a fixed-sized buffer and starting
118  * address, disassemble the data using the supplied target and libdisasm handle.
119  */
120 void
121 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
122     size_t datalen)
123 {
124 	dis_buffer_t db = { 0 };
125 	char buf[BUFSIZE];
126 	char symbuf[BUFSIZE];
127 	const char *symbol;
128 	const char *last_symbol;
129 	off_t symoffset;
130 	int i;
131 	int bytesperline;
132 	size_t symsize;
133 	int isfunc;
134 	size_t symwidth = 0;
135 	int ret;
136 	int insz = insn_size(dhp);
137 
138 	db.db_tgt = tgt;
139 	db.db_data = data;
140 	db.db_addr = addr;
141 	db.db_size = datalen;
142 
143 	dis_set_data(dhp, &db);
144 
145 	if ((bytesperline = dis_max_instrlen(dhp)) > 6)
146 		bytesperline = 6;
147 
148 	symbol = NULL;
149 
150 	while (addr < db.db_addr + db.db_size) {
151 
152 		ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
153 		if (ret != 0 && insz > 0) {
154 			/*
155 			 * Since we know instructions are fixed size, we
156 			 * always know the address of the next instruction
157 			 */
158 			(void) snprintf(buf, sizeof (buf),
159 			    "*** invalid opcode ***");
160 			db.db_nextaddr = addr + insz;
161 
162 		} else if (ret != 0) {
163 			off_t next;
164 
165 			(void) snprintf(buf, sizeof (buf),
166 			    "*** invalid opcode ***");
167 
168 			/*
169 			 * On architectures with variable sized instructions
170 			 * we have no way to figure out where the next
171 			 * instruction starts if we encounter an invalid
172 			 * instruction.  Instead we print the rest of the
173 			 * instruction stream as hex until we reach the
174 			 * next valid symbol in the section.
175 			 */
176 			if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
177 				db.db_nextaddr = db.db_addr + db.db_size;
178 			} else {
179 				if (next > db.db_size)
180 					db.db_nextaddr = db.db_addr +
181 					    db.db_size;
182 				else
183 					db.db_nextaddr = addr + next;
184 			}
185 		}
186 
187 		/*
188 		 * Print out the line as:
189 		 *
190 		 * 	address:	bytes	text
191 		 *
192 		 * If there are more than 6 bytes in any given instruction,
193 		 * spread the bytes across two lines.  We try to get symbolic
194 		 * information for the address, but if that fails we print out
195 		 * the numeric address instead.
196 		 *
197 		 * We try to keep the address portion of the text aligned at
198 		 * MINSYMWIDTH characters.  If we are disassembling a function
199 		 * with a long name, this can be annoying.  So we pick a width
200 		 * based on the maximum width that the current symbol can be.
201 		 * This at least produces text aligned within each function.
202 		 */
203 		last_symbol = symbol;
204 		symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
205 		    &isfunc);
206 		if (symbol == NULL) {
207 			symbol = dis_find_section(tgt, addr, &symoffset);
208 			symsize = symoffset;
209 		}
210 
211 		if (symbol != last_symbol)
212 			getsymname(addr, symbol, symsize, symbuf,
213 			    sizeof (symbuf));
214 
215 		symwidth = MAX(symwidth, strlen(symbuf));
216 		getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
217 
218 		/*
219 		 * If we've crossed a new function boundary, print out the
220 		 * function name on a blank line.
221 		 */
222 		if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
223 			(void) printf("%s()\n", symbol);
224 
225 		(void) printf("    %s:%*s ", symbuf,
226 		    symwidth - strlen(symbuf), "");
227 
228 		/* print bytes */
229 		for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
230 		    i++) {
231 			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
232 			if (g_flags & DIS_OCTAL)
233 				(void) printf("%03o ", byte);
234 			else
235 				(void) printf("%02x ", byte);
236 		}
237 
238 		/* trailing spaces for missing bytes */
239 		for (; i < bytesperline; i++) {
240 			if (g_flags & DIS_OCTAL)
241 				(void) printf("    ");
242 			else
243 				(void) printf("   ");
244 		}
245 
246 		/* contents of disassembly */
247 		(void) printf(" %s", buf);
248 
249 		/* excess bytes that spill over onto subsequent lines */
250 		for (; i < db.db_nextaddr - addr; i++) {
251 			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
252 			if (i % bytesperline == 0)
253 				(void) printf("\n    %*s  ", symwidth, "");
254 			if (g_flags & DIS_OCTAL)
255 				(void) printf("%03o ", byte);
256 			else
257 				(void) printf("%02x ", byte);
258 		}
259 
260 		(void) printf("\n");
261 
262 		addr = db.db_nextaddr;
263 	}
264 }
265 
266 /*
267  * libdisasm wrapper around symbol lookup.  Invoke the target-specific lookup
268  * function, and convert the result using getsymname().
269  */
270 int
271 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
272     size_t *symlen)
273 {
274 	dis_buffer_t *db = data;
275 	const char *symbol;
276 	off_t offset;
277 	size_t size;
278 
279 	/*
280 	 * If NULL symbol is returned, getsymname takes care of
281 	 * printing appropriate address in buf instead of symbol.
282 	 */
283 	symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
284 
285 	if (buf != NULL)
286 		getsymname(addr, symbol, offset, buf, buflen);
287 
288 	if (start != NULL)
289 		*start = addr - offset;
290 	if (symlen != NULL)
291 		*symlen = size;
292 
293 	if (symbol == NULL)
294 		return (-1);
295 
296 	return (0);
297 }
298 
299 /*
300  * libdisasm wrapper around target reading.  libdisasm will always read data
301  * in order, so update our current offset within the buffer appropriately.
302  * We only support reading from within the current object; libdisasm should
303  * never ask us to do otherwise.
304  */
305 int
306 do_read(void *data, uint64_t addr, void *buf, size_t len)
307 {
308 	dis_buffer_t *db = data;
309 	size_t offset;
310 
311 	if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
312 		return (-1);
313 
314 	offset = addr - db->db_addr;
315 	len = MIN(len, db->db_size - offset);
316 
317 	(void) memcpy(buf, (char *)db->db_data + offset, len);
318 
319 	db->db_nextaddr = addr + len;
320 
321 	return (len);
322 }
323 
324 /*
325  * Routine to dump raw data in a human-readable format.  Used by the -d and -D
326  * options.  We model our output after the xxd(1) program, which gives nicely
327  * formatted output, along with an ASCII translation of the result.
328  */
329 void
330 dump_data(uint64_t addr, void *data, size_t datalen)
331 {
332 	uintptr_t curaddr = addr & (~0xf);
333 	uint8_t *bytes = data;
334 	int i;
335 	int width;
336 
337 	/*
338 	 * Determine if the address given to us fits in 32-bit range, in which
339 	 * case use a 4-byte width.
340 	 */
341 	if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
342 		width = 8;
343 	else
344 		width = 16;
345 
346 	while (curaddr < addr + datalen) {
347 		/*
348 		 * Display leading address
349 		 */
350 		(void) printf("%0*x: ", width, curaddr);
351 
352 		/*
353 		 * Print out data in two-byte chunks.  If the current address
354 		 * is before the starting address or after the end of the
355 		 * section, print spaces.
356 		 */
357 		for (i = 0; i < 16; i++) {
358 			if (curaddr + i < addr ||curaddr + i >= addr + datalen)
359 				(void) printf("  ");
360 			else
361 				(void) printf("%02x",
362 				    bytes[curaddr + i - addr]);
363 
364 			if (i & 1)
365 				(void) printf(" ");
366 		}
367 
368 		(void) printf(" ");
369 
370 		/*
371 		 * Print out the ASCII representation
372 		 */
373 		for (i = 0; i < 16; i++) {
374 			if (curaddr + i < addr ||
375 			    curaddr + i >= addr + datalen) {
376 				(void) printf(" ");
377 			} else {
378 				uint8_t byte = bytes[curaddr + i - addr];
379 				if (isprint(byte))
380 					(void) printf("%c", byte);
381 				else
382 					(void) printf(".");
383 			}
384 		}
385 
386 		(void) printf("\n");
387 
388 		curaddr += 16;
389 	}
390 }
391 
392 /*
393  * Disassemble a section implicitly specified as part of a file.  This function
394  * is called for all sections when no other flags are specified.  We ignore any
395  * data sections, and print out only those sections containing text.
396  */
397 void
398 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
399 {
400 	dis_handle_t *dhp = data;
401 
402 	/* ignore data sections */
403 	if (!dis_section_istext(scn))
404 		return;
405 
406 	if (!g_quiet)
407 		(void) printf("\nsection %s\n", dis_section_name(scn));
408 
409 	dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
410 	    dis_section_size(scn));
411 }
412 
413 /*
414  * Structure passed to dis_named_{section,function} which keeps track of both
415  * the target and the libdisasm handle.
416  */
417 typedef struct callback_arg {
418 	dis_tgt_t	*ca_tgt;
419 	dis_handle_t	*ca_handle;
420 } callback_arg_t;
421 
422 /*
423  * Disassemble a section explicitly named with -s, -d, or -D.  The 'type'
424  * argument contains the type of argument given.  Pass the data onto the
425  * appropriate helper routine.
426  */
427 void
428 dis_named_section(dis_scn_t *scn, int type, void *data)
429 {
430 	callback_arg_t *ca = data;
431 
432 	if (!g_quiet)
433 		(void) printf("\nsection %s\n", dis_section_name(scn));
434 
435 	switch (type) {
436 	case DIS_DATA_RELATIVE:
437 		dump_data(0, dis_section_data(scn), dis_section_size(scn));
438 		break;
439 	case DIS_DATA_ABSOLUTE:
440 		dump_data(dis_section_addr(scn), dis_section_data(scn),
441 		    dis_section_size(scn));
442 		break;
443 	case DIS_TEXT:
444 		dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
445 		    dis_section_data(scn), dis_section_size(scn));
446 		break;
447 	}
448 }
449 
450 /*
451  * Disassemble a function explicitly specified with '-F'.  The 'type' argument
452  * is unused.
453  */
454 /* ARGSUSED */
455 void
456 dis_named_function(dis_func_t *func, int type, void *data)
457 {
458 	callback_arg_t *ca = data;
459 
460 	dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
461 	    dis_function_data(func), dis_function_size(func));
462 }
463 
464 /*
465  * Disassemble a complete file.  First, we determine the type of the file based
466  * on the ELF machine type, and instantiate a version of the disassembler
467  * appropriate for the file.  We then resolve any named sections or functions
468  * against the file, and iterate over the results (or all sections if no flags
469  * were specified).
470  */
471 void
472 dis_file(const char *filename)
473 {
474 	dis_tgt_t *tgt, *current;
475 	dis_scnlist_t *sections;
476 	dis_funclist_t *functions;
477 	dis_handle_t *dhp;
478 	GElf_Ehdr ehdr;
479 
480 	/*
481 	 * First, initialize the target
482 	 */
483 	if ((tgt = dis_tgt_create(filename)) == NULL)
484 		return;
485 
486 	if (!g_quiet)
487 		(void) printf("disassembly for %s\n\n",  filename);
488 
489 	/*
490 	 * A given file may contain multiple targets (if it is an archive, for
491 	 * example).  We iterate over all possible targets if this is the case.
492 	 */
493 	for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
494 		dis_tgt_ehdr(current, &ehdr);
495 
496 		/*
497 		 * Eventually, this should probably live within libdisasm, and
498 		 * we should be able to disassemble targets from different
499 		 * architectures.  For now, we only support objects as the
500 		 * native machine type.
501 		 */
502 		switch (ehdr.e_machine) {
503 		case EM_SPARC:
504 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
505 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
506 				warn("invalid E_IDENT field for SPARC object");
507 				return;
508 			}
509 			g_flags |= DIS_SPARC_V8;
510 			break;
511 
512 		case EM_SPARC32PLUS:
513 		{
514 			uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
515 
516 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
517 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
518 				warn("invalid E_IDENT field for SPARC object");
519 				return;
520 			}
521 
522 			if (flags != 0 &&
523 			    (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
524 			    EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
525 				g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
526 			else
527 				g_flags |= DIS_SPARC_V9;
528 			break;
529 		}
530 
531 		case EM_SPARCV9:
532 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
533 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
534 				warn("invalid E_IDENT field for SPARC object");
535 				return;
536 			}
537 
538 			g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
539 			break;
540 
541 		case EM_386:
542 			g_flags |= DIS_X86_SIZE32;
543 			break;
544 
545 		case EM_AMD64:
546 			g_flags |= DIS_X86_SIZE64;
547 			break;
548 
549 		default:
550 			die("%s: unsupported ELF machine 0x%x", filename,
551 			    ehdr.e_machine);
552 		}
553 
554 		/*
555 		 * If ET_REL (.o), printing immediate symbols is likely to
556 		 * result in garbage, as symbol lookups on unrelocated
557 		 * immediates find false and useless matches.
558 		 */
559 
560 		if (ehdr.e_type == ET_REL)
561 			g_flags |= DIS_NOIMMSYM;
562 
563 		if (!g_quiet && dis_tgt_member(current) != NULL)
564 			(void) printf("\narchive member %s\n",
565 			    dis_tgt_member(current));
566 
567 		/*
568 		 * Instantiate a libdisasm handle based on the file type.
569 		 */
570 		if ((dhp = dis_handle_create(g_flags, current, do_lookup,
571 		    do_read)) == NULL)
572 			die("%s: failed to initialize disassembler: %s",
573 			    filename, dis_strerror(dis_errno()));
574 
575 		if (g_doall) {
576 			/*
577 			 * With no arguments, iterate over all sections and
578 			 * disassemble only those that contain text.
579 			 */
580 			dis_tgt_section_iter(current, dis_text_section, dhp);
581 		} else {
582 			callback_arg_t ca;
583 
584 			ca.ca_tgt = current;
585 			ca.ca_handle = dhp;
586 
587 			/*
588 			 * If sections or functions were explicitly specified,
589 			 * resolve those names against the object, and iterate
590 			 * over just the resulting data.
591 			 */
592 			sections = dis_namelist_resolve_sections(g_seclist,
593 			    current);
594 			functions = dis_namelist_resolve_functions(g_funclist,
595 			    current);
596 
597 			dis_scnlist_iter(sections, dis_named_section, &ca);
598 			dis_funclist_iter(functions, dis_named_function, &ca);
599 
600 			dis_scnlist_destroy(sections);
601 			dis_funclist_destroy(functions);
602 		}
603 
604 		dis_handle_destroy(dhp);
605 	}
606 
607 	dis_tgt_destroy(tgt);
608 }
609 
610 void
611 usage(void)
612 {
613 	(void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
614 	(void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
615 	exit(2);
616 }
617 
618 typedef struct lib_node {
619 	char *path;
620 	struct lib_node *next;
621 } lib_node_t;
622 
623 int
624 main(int argc, char **argv)
625 {
626 	int optchar;
627 	int i;
628 	lib_node_t *libs = NULL;
629 
630 	g_funclist = dis_namelist_create();
631 	g_seclist = dis_namelist_create();
632 
633 	while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
634 		switch (optchar) {
635 		case 'C':
636 			g_demangle = 1;
637 			break;
638 		case 'd':
639 			dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
640 			break;
641 		case 'D':
642 			dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
643 			break;
644 		case 'F':
645 			dis_namelist_add(g_funclist, optarg, 0);
646 			break;
647 		case 'l': {
648 			/*
649 			 * The '-l foo' option historically would attempt to
650 			 * disassemble '$LIBDIR/libfoo.a'.  The $LIBDIR
651 			 * environment variable has never been supported or
652 			 * documented for our linker.  However, until this
653 			 * option is formally EOLed, we have to support it.
654 			 */
655 			char *dir;
656 			lib_node_t *node;
657 			size_t len;
658 
659 			if ((dir = getenv("LIBDIR")) == NULL ||
660 			    dir[0] == '\0')
661 				dir = "/usr/lib";
662 			node = safe_malloc(sizeof (lib_node_t));
663 			len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
664 			node->path = safe_malloc(len);
665 
666 			(void) snprintf(node->path, len, "%s/lib%s.a", dir,
667 			    optarg);
668 			node->next = libs;
669 			libs = node;
670 			break;
671 		}
672 		case 'L':
673 			/*
674 			 * The '-L' option historically would attempt to read
675 			 * the .debug section of the target to determine source
676 			 * line information in order to annotate the output.
677 			 * No compiler has emitted these sections in many years,
678 			 * and the option has never done what it purported to
679 			 * do.  We silently consume the option for
680 			 * compatibility.
681 			 */
682 			break;
683 		case 'n':
684 			g_numeric = 1;
685 			break;
686 		case 'o':
687 			g_flags |= DIS_OCTAL;
688 			break;
689 		case 'q':
690 			g_quiet = 1;
691 			break;
692 		case 't':
693 			dis_namelist_add(g_seclist, optarg, DIS_TEXT);
694 			break;
695 		case 'V':
696 			(void) printf("Solaris disassembler version 1.0\n");
697 			return (0);
698 		default:
699 			usage();
700 			break;
701 		}
702 	}
703 
704 	argc -= optind;
705 	argv += optind;
706 
707 	if (argc == 0 && libs == NULL) {
708 		warn("no objects specified");
709 		usage();
710 	}
711 
712 	if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
713 		g_doall = 1;
714 
715 	/*
716 	 * See comment for 'l' option, above.
717 	 */
718 	while (libs != NULL) {
719 		lib_node_t *node = libs->next;
720 
721 		dis_file(libs->path);
722 		free(libs->path);
723 		free(libs);
724 		libs = node;
725 	}
726 
727 	for (i = 0; i < argc; i++)
728 		dis_file(argv[i]);
729 
730 	dis_namelist_destroy(g_funclist);
731 	dis_namelist_destroy(g_seclist);
732 
733 	return (g_error);
734 }
735