xref: /illumos-gate/usr/src/cmd/dis/dis_main.c (revision bde2df36223c26750e6e5e801907d885e088ee30)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2011 Jason King.  All rights reserved.
27  */
28 
29 #include <ctype.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <sys/sysmacros.h>
35 #include <sys/elf_SPARC.h>
36 
37 #include <libdisasm.h>
38 
39 #include "dis_target.h"
40 #include "dis_util.h"
41 #include "dis_list.h"
42 
43 int g_demangle;		/* Demangle C++ names */
44 int g_quiet;		/* Quiet mode */
45 int g_numeric;		/* Numeric mode */
46 int g_flags;		/* libdisasm language flags */
47 int g_doall;		/* true if no functions or sections were given */
48 
49 dis_namelist_t *g_funclist;	/* list of functions to disassemble, if any */
50 dis_namelist_t *g_seclist;	/* list of sections to disassemble, if any */
51 
52 /*
53  * Section options for -d, -D, and -s
54  */
55 #define	DIS_DATA_RELATIVE	1
56 #define	DIS_DATA_ABSOLUTE	2
57 #define	DIS_TEXT		3
58 
59 /*
60  * libdisasm callback data.  Keeps track of current data (function or section)
61  * and offset within that data.
62  */
63 typedef struct dis_buffer {
64 	dis_tgt_t	*db_tgt;	/* current dis target */
65 	void		*db_data;	/* function or section data */
66 	uint64_t	db_addr;	/* address of function start */
67 	size_t		db_size;	/* size of data */
68 	uint64_t	db_nextaddr;	/* next address to be read */
69 } dis_buffer_t;
70 
71 #define	MINSYMWIDTH	22	/* Minimum width of symbol portion of line */
72 
73 /*
74  * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
75  * formatted symbol, based on the offset and current setttings.
76  */
77 void
78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
79     size_t buflen)
80 {
81 	if (symbol == NULL || g_numeric) {
82 		if (g_flags & DIS_OCTAL)
83 			(void) snprintf(buf, buflen, "0%llo", addr);
84 		else
85 			(void) snprintf(buf, buflen, "0x%llx", addr);
86 	} else {
87 		if (g_demangle)
88 			symbol = dis_demangle(symbol);
89 
90 		if (offset == 0)
91 			(void) snprintf(buf, buflen, "%s", symbol);
92 		else if (g_flags & DIS_OCTAL)
93 			(void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
94 		else
95 			(void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
96 	}
97 }
98 
99 /*
100  * The main disassembly routine.  Given a fixed-sized buffer and starting
101  * address, disassemble the data using the supplied target and libdisasm handle.
102  */
103 void
104 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
105     size_t datalen)
106 {
107 	dis_buffer_t db = { 0 };
108 	char buf[BUFSIZE];
109 	char symbuf[BUFSIZE];
110 	const char *symbol;
111 	const char *last_symbol;
112 	off_t symoffset;
113 	int i;
114 	int bytesperline;
115 	size_t symsize;
116 	int isfunc;
117 	size_t symwidth = 0;
118 
119 	db.db_tgt = tgt;
120 	db.db_data = data;
121 	db.db_addr = addr;
122 	db.db_size = datalen;
123 
124 	dis_set_data(dhp, &db);
125 
126 	if ((bytesperline = dis_max_instrlen(dhp)) > 6)
127 		bytesperline = 6;
128 
129 	symbol = NULL;
130 
131 	while (addr < db.db_addr + db.db_size) {
132 
133 		if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
134 #if defined(__sparc)
135 			/*
136 			 * Since sparc instructions are fixed size, we
137 			 * always know the address of the next instruction
138 			 */
139 			(void) snprintf(buf, sizeof (buf),
140 			    "*** invalid opcode ***");
141 			db.db_nextaddr = addr + 4;
142 
143 #else
144 			off_t next;
145 
146 			(void) snprintf(buf, sizeof (buf),
147 			    "*** invalid opcode ***");
148 
149 			/*
150 			 * On architectures with variable sized instructions
151 			 * we have no way to figure out where the next
152 			 * instruction starts if we encounter an invalid
153 			 * instruction.  Instead we print the rest of the
154 			 * instruction stream as hex until we reach the
155 			 * next valid symbol in the section.
156 			 */
157 			if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
158 				db.db_nextaddr = db.db_addr + db.db_size;
159 			} else {
160 				if (next > db.db_size)
161 					db.db_nextaddr = db.db_addr +
162 					    db.db_size;
163 				else
164 					db.db_nextaddr = addr + next;
165 			}
166 #endif
167 		}
168 
169 		/*
170 		 * Print out the line as:
171 		 *
172 		 * 	address:	bytes	text
173 		 *
174 		 * If there are more than 6 bytes in any given instruction,
175 		 * spread the bytes across two lines.  We try to get symbolic
176 		 * information for the address, but if that fails we print out
177 		 * the numeric address instead.
178 		 *
179 		 * We try to keep the address portion of the text aligned at
180 		 * MINSYMWIDTH characters.  If we are disassembling a function
181 		 * with a long name, this can be annoying.  So we pick a width
182 		 * based on the maximum width that the current symbol can be.
183 		 * This at least produces text aligned within each function.
184 		 */
185 		last_symbol = symbol;
186 		symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
187 		    &isfunc);
188 		if (symbol == NULL) {
189 			symbol = dis_find_section(tgt, addr, &symoffset);
190 			symsize = symoffset;
191 		}
192 
193 		if (symbol != last_symbol)
194 			getsymname(addr, symbol, symsize, symbuf,
195 			    sizeof (symbuf));
196 
197 		symwidth = MAX(symwidth, strlen(symbuf));
198 		getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
199 
200 		/*
201 		 * If we've crossed a new function boundary, print out the
202 		 * function name on a blank line.
203 		 */
204 		if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
205 			(void) printf("%s()\n", symbol);
206 
207 		(void) printf("    %s:%*s ", symbuf,
208 		    symwidth - strlen(symbuf), "");
209 
210 		/* print bytes */
211 		for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
212 		    i++) {
213 			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
214 			if (g_flags & DIS_OCTAL)
215 				(void) printf("%03o ", byte);
216 			else
217 				(void) printf("%02x ", byte);
218 		}
219 
220 		/* trailing spaces for missing bytes */
221 		for (; i < bytesperline; i++) {
222 			if (g_flags & DIS_OCTAL)
223 				(void) printf("    ");
224 			else
225 				(void) printf("   ");
226 		}
227 
228 		/* contents of disassembly */
229 		(void) printf(" %s", buf);
230 
231 		/* excess bytes that spill over onto subsequent lines */
232 		for (; i < db.db_nextaddr - addr; i++) {
233 			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
234 			if (i % bytesperline == 0)
235 				(void) printf("\n    %*s  ", symwidth, "");
236 			if (g_flags & DIS_OCTAL)
237 				(void) printf("%03o ", byte);
238 			else
239 				(void) printf("%02x ", byte);
240 		}
241 
242 		(void) printf("\n");
243 
244 		addr = db.db_nextaddr;
245 	}
246 }
247 
248 /*
249  * libdisasm wrapper around symbol lookup.  Invoke the target-specific lookup
250  * function, and convert the result using getsymname().
251  */
252 int
253 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
254     size_t *symlen)
255 {
256 	dis_buffer_t *db = data;
257 	const char *symbol;
258 	off_t offset;
259 	size_t size;
260 
261 	/*
262 	 * If NULL symbol is returned, getsymname takes care of
263 	 * printing appropriate address in buf instead of symbol.
264 	 */
265 	symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
266 
267 	if (buf != NULL)
268 		getsymname(addr, symbol, offset, buf, buflen);
269 
270 	if (start != NULL)
271 		*start = addr - offset;
272 	if (symlen != NULL)
273 		*symlen = size;
274 
275 	if (symbol == NULL)
276 		return (-1);
277 
278 	return (0);
279 }
280 
281 /*
282  * libdisasm wrapper around target reading.  libdisasm will always read data
283  * in order, so update our current offset within the buffer appropriately.
284  * We only support reading from within the current object; libdisasm should
285  * never ask us to do otherwise.
286  */
287 int
288 do_read(void *data, uint64_t addr, void *buf, size_t len)
289 {
290 	dis_buffer_t *db = data;
291 	size_t offset;
292 
293 	if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
294 		return (-1);
295 
296 	offset = addr - db->db_addr;
297 	len = MIN(len, db->db_size - offset);
298 
299 	(void) memcpy(buf, (char *)db->db_data + offset, len);
300 
301 	db->db_nextaddr = addr + len;
302 
303 	return (len);
304 }
305 
306 /*
307  * Routine to dump raw data in a human-readable format.  Used by the -d and -D
308  * options.  We model our output after the xxd(1) program, which gives nicely
309  * formatted output, along with an ASCII translation of the result.
310  */
311 void
312 dump_data(uint64_t addr, void *data, size_t datalen)
313 {
314 	uintptr_t curaddr = addr & (~0xf);
315 	uint8_t *bytes = data;
316 	int i;
317 	int width;
318 
319 	/*
320 	 * Determine if the address given to us fits in 32-bit range, in which
321 	 * case use a 4-byte width.
322 	 */
323 	if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
324 		width = 8;
325 	else
326 		width = 16;
327 
328 	while (curaddr < addr + datalen) {
329 		/*
330 		 * Display leading address
331 		 */
332 		(void) printf("%0*x: ", width, curaddr);
333 
334 		/*
335 		 * Print out data in two-byte chunks.  If the current address
336 		 * is before the starting address or after the end of the
337 		 * section, print spaces.
338 		 */
339 		for (i = 0; i < 16; i++) {
340 			if (curaddr + i < addr ||curaddr + i >= addr + datalen)
341 				(void) printf("  ");
342 			else
343 				(void) printf("%02x",
344 				    bytes[curaddr + i - addr]);
345 
346 			if (i & 1)
347 				(void) printf(" ");
348 		}
349 
350 		(void) printf(" ");
351 
352 		/*
353 		 * Print out the ASCII representation
354 		 */
355 		for (i = 0; i < 16; i++) {
356 			if (curaddr + i < addr ||
357 			    curaddr + i >= addr + datalen) {
358 				(void) printf(" ");
359 			} else {
360 				uint8_t byte = bytes[curaddr + i - addr];
361 				if (isprint(byte))
362 					(void) printf("%c", byte);
363 				else
364 					(void) printf(".");
365 			}
366 		}
367 
368 		(void) printf("\n");
369 
370 		curaddr += 16;
371 	}
372 }
373 
374 /*
375  * Disassemble a section implicitly specified as part of a file.  This function
376  * is called for all sections when no other flags are specified.  We ignore any
377  * data sections, and print out only those sections containing text.
378  */
379 void
380 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
381 {
382 	dis_handle_t *dhp = data;
383 
384 	/* ignore data sections */
385 	if (!dis_section_istext(scn))
386 		return;
387 
388 	if (!g_quiet)
389 		(void) printf("\nsection %s\n", dis_section_name(scn));
390 
391 	dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
392 	    dis_section_size(scn));
393 }
394 
395 /*
396  * Structure passed to dis_named_{section,function} which keeps track of both
397  * the target and the libdisasm handle.
398  */
399 typedef struct callback_arg {
400 	dis_tgt_t	*ca_tgt;
401 	dis_handle_t	*ca_handle;
402 } callback_arg_t;
403 
404 /*
405  * Disassemble a section explicitly named with -s, -d, or -D.  The 'type'
406  * argument contains the type of argument given.  Pass the data onto the
407  * appropriate helper routine.
408  */
409 void
410 dis_named_section(dis_scn_t *scn, int type, void *data)
411 {
412 	callback_arg_t *ca = data;
413 
414 	if (!g_quiet)
415 		(void) printf("\nsection %s\n", dis_section_name(scn));
416 
417 	switch (type) {
418 	case DIS_DATA_RELATIVE:
419 		dump_data(0, dis_section_data(scn), dis_section_size(scn));
420 		break;
421 	case DIS_DATA_ABSOLUTE:
422 		dump_data(dis_section_addr(scn), dis_section_data(scn),
423 		    dis_section_size(scn));
424 		break;
425 	case DIS_TEXT:
426 		dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
427 		    dis_section_data(scn), dis_section_size(scn));
428 		break;
429 	}
430 }
431 
432 /*
433  * Disassemble a function explicitly specified with '-F'.  The 'type' argument
434  * is unused.
435  */
436 /* ARGSUSED */
437 void
438 dis_named_function(dis_func_t *func, int type, void *data)
439 {
440 	callback_arg_t *ca = data;
441 
442 	dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
443 	    dis_function_data(func), dis_function_size(func));
444 }
445 
446 /*
447  * Disassemble a complete file.  First, we determine the type of the file based
448  * on the ELF machine type, and instantiate a version of the disassembler
449  * appropriate for the file.  We then resolve any named sections or functions
450  * against the file, and iterate over the results (or all sections if no flags
451  * were specified).
452  */
453 void
454 dis_file(const char *filename)
455 {
456 	dis_tgt_t *tgt, *current;
457 	dis_scnlist_t *sections;
458 	dis_funclist_t *functions;
459 	dis_handle_t *dhp;
460 	GElf_Ehdr ehdr;
461 
462 	/*
463 	 * First, initialize the target
464 	 */
465 	if ((tgt = dis_tgt_create(filename)) == NULL)
466 		return;
467 
468 	if (!g_quiet)
469 		(void) printf("disassembly for %s\n\n",  filename);
470 
471 	/*
472 	 * A given file may contain multiple targets (if it is an archive, for
473 	 * example).  We iterate over all possible targets if this is the case.
474 	 */
475 	for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
476 		dis_tgt_ehdr(current, &ehdr);
477 
478 		/*
479 		 * Eventually, this should probably live within libdisasm, and
480 		 * we should be able to disassemble targets from different
481 		 * architectures.  For now, we only support objects as the
482 		 * native machine type.
483 		 */
484 		switch (ehdr.e_machine) {
485 #ifdef __sparc
486 		case EM_SPARC:
487 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
488 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
489 				warn("invalid E_IDENT field for SPARC object");
490 				return;
491 			}
492 			g_flags |= DIS_SPARC_V8;
493 			break;
494 
495 		case EM_SPARC32PLUS:
496 		{
497 			uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
498 
499 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
500 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
501 				warn("invalid E_IDENT field for SPARC object");
502 				return;
503 			}
504 
505 			if (flags != 0 &&
506 			    (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
507 			    EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
508 				g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
509 			else
510 				g_flags |= DIS_SPARC_V9;
511 			break;
512 		}
513 
514 		case EM_SPARCV9:
515 			if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
516 			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
517 				warn("invalid E_IDENT field for SPARC object");
518 				return;
519 			}
520 
521 			g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
522 			break;
523 #endif /* __sparc */
524 
525 #if defined(__i386) || defined(__amd64)
526 		case EM_386:
527 			g_flags |= DIS_X86_SIZE32;
528 			break;
529 
530 		case EM_AMD64:
531 			g_flags |= DIS_X86_SIZE64;
532 			break;
533 #endif /* __i386 || __amd64 */
534 
535 		default:
536 			die("%s: unsupported ELF machine 0x%x", filename,
537 			    ehdr.e_machine);
538 		}
539 
540 		/*
541 		 * If ET_REL (.o), printing immediate symbols is likely to
542 		 * result in garbage, as symbol lookups on unrelocated
543 		 * immediates find false and useless matches.
544 		 */
545 
546 		if (ehdr.e_type == ET_REL)
547 			g_flags |= DIS_NOIMMSYM;
548 
549 		if (!g_quiet && dis_tgt_member(current) != NULL)
550 			(void) printf("\narchive member %s\n",
551 			    dis_tgt_member(current));
552 
553 		/*
554 		 * Instantiate a libdisasm handle based on the file type.
555 		 */
556 		if ((dhp = dis_handle_create(g_flags, current, do_lookup,
557 		    do_read)) == NULL)
558 			die("%s: failed to initialize disassembler: %s",
559 			    filename, dis_strerror(dis_errno()));
560 
561 		if (g_doall) {
562 			/*
563 			 * With no arguments, iterate over all sections and
564 			 * disassemble only those that contain text.
565 			 */
566 			dis_tgt_section_iter(current, dis_text_section, dhp);
567 		} else {
568 			callback_arg_t ca;
569 
570 			ca.ca_tgt = current;
571 			ca.ca_handle = dhp;
572 
573 			/*
574 			 * If sections or functions were explicitly specified,
575 			 * resolve those names against the object, and iterate
576 			 * over just the resulting data.
577 			 */
578 			sections = dis_namelist_resolve_sections(g_seclist,
579 			    current);
580 			functions = dis_namelist_resolve_functions(g_funclist,
581 			    current);
582 
583 			dis_scnlist_iter(sections, dis_named_section, &ca);
584 			dis_funclist_iter(functions, dis_named_function, &ca);
585 
586 			dis_scnlist_destroy(sections);
587 			dis_funclist_destroy(functions);
588 		}
589 
590 		dis_handle_destroy(dhp);
591 	}
592 
593 	dis_tgt_destroy(tgt);
594 }
595 
596 void
597 usage(void)
598 {
599 	(void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
600 	(void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
601 	exit(2);
602 }
603 
604 typedef struct lib_node {
605 	char *path;
606 	struct lib_node *next;
607 } lib_node_t;
608 
609 int
610 main(int argc, char **argv)
611 {
612 	int optchar;
613 	int i;
614 	lib_node_t *libs = NULL;
615 
616 	g_funclist = dis_namelist_create();
617 	g_seclist = dis_namelist_create();
618 
619 	while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
620 		switch (optchar) {
621 		case 'C':
622 			g_demangle = 1;
623 			break;
624 		case 'd':
625 			dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
626 			break;
627 		case 'D':
628 			dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
629 			break;
630 		case 'F':
631 			dis_namelist_add(g_funclist, optarg, 0);
632 			break;
633 		case 'l': {
634 			/*
635 			 * The '-l foo' option historically would attempt to
636 			 * disassemble '$LIBDIR/libfoo.a'.  The $LIBDIR
637 			 * environment variable has never been supported or
638 			 * documented for our linker.  However, until this
639 			 * option is formally EOLed, we have to support it.
640 			 */
641 			char *dir;
642 			lib_node_t *node;
643 			size_t len;
644 
645 			if ((dir = getenv("LIBDIR")) == NULL ||
646 			    dir[0] == '\0')
647 				dir = "/usr/lib";
648 			node = safe_malloc(sizeof (lib_node_t));
649 			len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
650 			node->path = safe_malloc(len);
651 
652 			(void) snprintf(node->path, len, "%s/lib%s.a", dir,
653 			    optarg);
654 			node->next = libs;
655 			libs = node;
656 			break;
657 		}
658 		case 'L':
659 			/*
660 			 * The '-L' option historically would attempt to read
661 			 * the .debug section of the target to determine source
662 			 * line information in order to annotate the output.
663 			 * No compiler has emitted these sections in many years,
664 			 * and the option has never done what it purported to
665 			 * do.  We silently consume the option for
666 			 * compatibility.
667 			 */
668 			break;
669 		case 'n':
670 			g_numeric = 1;
671 			break;
672 		case 'o':
673 			g_flags |= DIS_OCTAL;
674 			break;
675 		case 'q':
676 			g_quiet = 1;
677 			break;
678 		case 't':
679 			dis_namelist_add(g_seclist, optarg, DIS_TEXT);
680 			break;
681 		case 'V':
682 			(void) printf("Solaris disassembler version 1.0\n");
683 			return (0);
684 		default:
685 			usage();
686 			break;
687 		}
688 	}
689 
690 	argc -= optind;
691 	argv += optind;
692 
693 	if (argc == 0 && libs == NULL) {
694 		warn("no objects specified");
695 		usage();
696 	}
697 
698 	if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
699 		g_doall = 1;
700 
701 	/*
702 	 * See comment for 'l' option, above.
703 	 */
704 	while (libs != NULL) {
705 		lib_node_t *node = libs->next;
706 
707 		dis_file(libs->path);
708 		free(libs->path);
709 		free(libs);
710 		libs = node;
711 	}
712 
713 	for (i = 0; i < argc; i++)
714 		dis_file(argv[i]);
715 
716 	dis_namelist_destroy(g_funclist);
717 	dis_namelist_destroy(g_seclist);
718 
719 	return (g_error);
720 }
721