xref: /titanic_41/usr/src/cmd/dis/dis_main.c (revision d89fccd8788afe1e920f842edd883fe192a1b8fe)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  
22  /*
23   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24   * Use is subject to license terms.
25   */
26  
27  #pragma ident	"%Z%%M%	%I%	%E% SMI"
28  
29  #include <ctype.h>
30  #include <getopt.h>
31  #include <stdio.h>
32  #include <stdlib.h>
33  #include <string.h>
34  #include <sys/sysmacros.h>
35  #include <sys/elf_SPARC.h>
36  
37  #include <libdisasm.h>
38  
39  #include "dis_target.h"
40  #include "dis_util.h"
41  #include "dis_list.h"
42  
43  int g_demangle;		/* Demangle C++ names */
44  int g_quiet;		/* Quiet mode */
45  int g_numeric;		/* Numeric mode */
46  int g_flags;		/* libdisasm language flags */
47  int g_doall;		/* true if no functions or sections were given */
48  
49  dis_namelist_t *g_funclist;	/* list of functions to disassemble, if any */
50  dis_namelist_t *g_seclist;	/* list of sections to disassemble, if any */
51  
52  /*
53   * Section options for -d, -D, and -s
54   */
55  #define	DIS_DATA_RELATIVE	1
56  #define	DIS_DATA_ABSOLUTE	2
57  #define	DIS_TEXT		3
58  
59  /*
60   * libdisasm callback data.  Keeps track of current data (function or section)
61   * and offset within that data.
62   */
63  typedef struct dis_buffer {
64  	dis_tgt_t	*db_tgt;	/* current dis target */
65  	void		*db_data;	/* function or section data */
66  	uint64_t	db_addr;	/* address of function start */
67  	size_t		db_size;	/* size of data */
68  	uint64_t	db_nextaddr;	/* next address to be read */
69  } dis_buffer_t;
70  
71  #define	MINSYMWIDTH	22	/* Minimum width of symbol portion of line */
72  
73  /*
74   * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
75   * formatted symbol, based on the offset and current setttings.
76   */
77  void
78  getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
79      size_t buflen)
80  {
81  	if (symbol == NULL || g_numeric)
82  		(void) snprintf(buf, buflen, "%llx", addr);
83  	else {
84  		if (g_demangle)
85  			symbol = dis_demangle(symbol);
86  
87  		if (offset == 0)
88  			(void) snprintf(buf, buflen, "%s", symbol);
89  		else if (g_flags & DIS_OCTAL)
90  			(void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
91  		else
92  			(void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
93  	}
94  }
95  
96  /*
97   * The main disassembly routine.  Given a fixed-sized buffer and starting
98   * address, disassemble the data using the supplied target and libdisasm handle.
99   */
100  void
101  dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
102      size_t datalen)
103  {
104  	dis_buffer_t db = { 0 };
105  	char buf[BUFSIZE];
106  	char symbuf[BUFSIZE];
107  	const char *symbol;
108  	off_t symoffset;
109  	int i;
110  	int bytesperline;
111  	size_t symsize;
112  	int isfunc;
113  	size_t symwidth = 0;
114  
115  	db.db_tgt = tgt;
116  	db.db_data = data;
117  	db.db_addr = addr;
118  	db.db_size = datalen;
119  
120  	dis_set_data(dhp, &db);
121  
122  	if ((bytesperline = dis_max_instrlen(dhp)) > 6)
123  		bytesperline = 6;
124  
125  	while (addr < db.db_addr + db.db_size) {
126  
127  		if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
128  			/*
129  			 * If we encounter an invalid opcode, we just
130  			 * print "*** invalid opcode ***" at that first bad
131  			 * instruction and continue with printing the rest
132  			 * of the instruction stream as hex data,
133  			 * We then find the next valid symbol in the section,
134  			 * and disassemble from there.
135  			 */
136  			off_t next;
137  
138  			(void) snprintf(buf, sizeof (buf),
139  			    "*** invalid opcode ***");
140  
141  			if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
142  				db.db_nextaddr = db.db_addr + db.db_size;
143  			} else {
144  				if (next > db.db_size)
145  					db.db_nextaddr = db.db_addr +
146  					    db.db_size;
147  				else
148  					db.db_nextaddr = addr + next;
149  			}
150  		}
151  
152  		/*
153  		 * Print out the line as:
154  		 *
155  		 * 	address:	bytes	text
156  		 *
157  		 * If there are more than 6 bytes in any given instruction,
158  		 * spread the bytes across two lines.  We try to get symbolic
159  		 * information for the address, but if that fails we print out
160  		 * the numeric address instead.
161  		 *
162  		 * We try to keep the address portion of the text aligned at
163  		 * MINSYMWIDTH characters.  If we are disassembling a function
164  		 * with a long name, this can be annoying.  So we pick a width
165  		 * based on the maximum width that the current symbol can be.
166  		 * This at least produces text aligned within each function.
167  		 */
168  		symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
169  		    &isfunc);
170  		/* Get the maximum length for this symbol */
171  		getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf));
172  		symwidth = MAX(strlen(symbuf), MINSYMWIDTH);
173  
174  		getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
175  
176  		/*
177  		 * If we've crossed a new function boundary, print out the
178  		 * function name on a blank line.
179  		 */
180  		if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
181  			(void) printf("%s()\n", symbol);
182  
183  		(void) printf("    %s:%*s ", symbuf,
184  		    symwidth - strlen(symbuf), "");
185  
186  		/* print bytes */
187  		for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
188  		    i++) {
189  			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
190  			if (g_flags & DIS_OCTAL)
191  				(void) printf("%03o ", byte);
192  			else
193  				(void) printf("%02x ", byte);
194  		}
195  
196  		/* trailing spaces for missing bytes */
197  		for (; i < bytesperline; i++) {
198  			if (g_flags & DIS_OCTAL)
199  				(void) printf("    ");
200  			else
201  				(void) printf("   ");
202  		}
203  
204  		/* contents of disassembly */
205  		(void) printf(" %s", buf);
206  
207  		/* excess bytes that spill over onto subsequent lines */
208  		for (; i < db.db_nextaddr - addr; i++) {
209  			int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
210  			if (i % bytesperline == 0)
211  				(void) printf("\n    %*s  ", symwidth, "");
212  			if (g_flags & DIS_OCTAL)
213  				(void) printf("%03o ", byte);
214  			else
215  				(void) printf("%02x ", byte);
216  		}
217  
218  		(void) printf("\n");
219  
220  		addr = db.db_nextaddr;
221  	}
222  }
223  
224  /*
225   * libdisasm wrapper around symbol lookup.  Invoke the target-specific lookup
226   * function, and convert the result using getsymname().
227   */
228  int
229  do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
230      size_t *symlen)
231  {
232  	dis_buffer_t *db = data;
233  	const char *symbol;
234  	off_t offset;
235  	size_t size;
236  
237  	/*
238  	 * If NULL symbol is returned, getsymname takes care of
239  	 * printing appropriate address in buf instead of symbol.
240  	 */
241  	symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
242  
243  	if (buf != NULL)
244  		getsymname(addr, symbol, offset, buf, buflen);
245  
246  	if (start != NULL)
247  		*start = addr - offset;
248  	if (symlen != NULL)
249  		*symlen = size;
250  
251  	return (0);
252  }
253  
254  /*
255   * libdisasm wrapper around target reading.  libdisasm will always read data
256   * in order, so update our current offset within the buffer appropriately.
257   * We only support reading from within the current object; libdisasm should
258   * never ask us to do otherwise.
259   */
260  int
261  do_read(void *data, uint64_t addr, void *buf, size_t len)
262  {
263  	dis_buffer_t *db = data;
264  	size_t offset;
265  
266  	if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
267  		return (-1);
268  
269  	offset = addr - db->db_addr;
270  	len = MIN(len, db->db_size - offset);
271  
272  	(void) memcpy(buf, (char *)db->db_data + offset, len);
273  
274  	db->db_nextaddr = addr + len;
275  
276  	return (len);
277  }
278  
279  /*
280   * Routine to dump raw data in a human-readable format.  Used by the -d and -D
281   * options.  We model our output after the xxd(1) program, which gives nicely
282   * formatted output, along with an ASCII translation of the result.
283   */
284  void
285  dump_data(uint64_t addr, void *data, size_t datalen)
286  {
287  	uintptr_t curaddr = addr & (~0xf);
288  	uint8_t *bytes = data;
289  	int i;
290  	int width;
291  
292  	/*
293  	 * Determine if the address given to us fits in 32-bit range, in which
294  	 * case use a 4-byte width.
295  	 */
296  	if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
297  		width = 8;
298  	else
299  		width = 16;
300  
301  	while (curaddr < addr + datalen) {
302  		/*
303  		 * Display leading address
304  		 */
305  		(void) printf("%0*x: ", width, curaddr);
306  
307  		/*
308  		 * Print out data in two-byte chunks.  If the current address
309  		 * is before the starting address or after the end of the
310  		 * section, print spaces.
311  		 */
312  		for (i = 0; i < 16; i++) {
313  			if (curaddr + i < addr ||curaddr + i >= addr + datalen)
314  				(void) printf("  ");
315  			else
316  				(void) printf("%02x",
317  				    bytes[curaddr + i - addr]);
318  
319  			if (i & 1)
320  				(void) printf(" ");
321  		}
322  
323  		(void) printf(" ");
324  
325  		/*
326  		 * Print out the ASCII representation
327  		 */
328  		for (i = 0; i < 16; i++) {
329  			if (curaddr + i < addr ||
330  			    curaddr + i >= addr + datalen) {
331  				(void) printf(" ");
332  			} else {
333  				uint8_t byte = bytes[curaddr + i - addr];
334  				if (isprint(byte))
335  					(void) printf("%c", byte);
336  				else
337  					(void) printf(".");
338  			}
339  		}
340  
341  		(void) printf("\n");
342  
343  		curaddr += 16;
344  	}
345  }
346  
347  /*
348   * Disassemble a section implicitly specified as part of a file.  This function
349   * is called for all sections when no other flags are specified.  We ignore any
350   * data sections, and print out only those sections containing text.
351   */
352  void
353  dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
354  {
355  	dis_handle_t *dhp = data;
356  
357  	/* ignore data sections */
358  	if (!dis_section_istext(scn))
359  		return;
360  
361  	if (!g_quiet)
362  		(void) printf("\nsection %s\n", dis_section_name(scn));
363  
364  	dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
365  	    dis_section_size(scn));
366  }
367  
368  /*
369   * Structure passed to dis_named_{section,function} which keeps track of both
370   * the target and the libdisasm handle.
371   */
372  typedef struct callback_arg {
373  	dis_tgt_t	*ca_tgt;
374  	dis_handle_t	*ca_handle;
375  } callback_arg_t;
376  
377  /*
378   * Disassemble a section explicitly named with -s, -d, or -D.  The 'type'
379   * argument contains the type of argument given.  Pass the data onto the
380   * appropriate helper routine.
381   */
382  void
383  dis_named_section(dis_scn_t *scn, int type, void *data)
384  {
385  	callback_arg_t *ca = data;
386  
387  	if (!g_quiet)
388  		(void) printf("\nsection %s\n", dis_section_name(scn));
389  
390  	switch (type) {
391  	case DIS_DATA_RELATIVE:
392  		dump_data(0, dis_section_data(scn), dis_section_size(scn));
393  		break;
394  	case DIS_DATA_ABSOLUTE:
395  		dump_data(dis_section_addr(scn), dis_section_data(scn),
396  		    dis_section_size(scn));
397  		break;
398  	case DIS_TEXT:
399  		dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
400  		    dis_section_data(scn), dis_section_size(scn));
401  		break;
402  	}
403  }
404  
405  /*
406   * Disassemble a function explicitly specified with '-F'.  The 'type' argument
407   * is unused.
408   */
409  /* ARGSUSED */
410  void
411  dis_named_function(dis_func_t *func, int type, void *data)
412  {
413  	callback_arg_t *ca = data;
414  
415  	dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
416  	    dis_function_data(func), dis_function_size(func));
417  }
418  
419  /*
420   * Disassemble a complete file.  First, we determine the type of the file based
421   * on the ELF machine type, and instantiate a version of the disassembler
422   * appropriate for the file.  We then resolve any named sections or functions
423   * against the file, and iterate over the results (or all sections if no flags
424   * were specified).
425   */
426  void
427  dis_file(const char *filename)
428  {
429  	dis_tgt_t *tgt, *current;
430  	dis_scnlist_t *sections;
431  	dis_funclist_t *functions;
432  	dis_handle_t *dhp;
433  	GElf_Ehdr ehdr;
434  
435  	/*
436  	 * First, initialize the target
437  	 */
438  	if ((tgt = dis_tgt_create(filename)) == NULL)
439  		return;
440  
441  	if (!g_quiet)
442  		(void) printf("disassembly for %s\n\n",  filename);
443  
444  	/*
445  	 * A given file may contain multiple targets (if it is an archive, for
446  	 * example).  We iterate over all possible targets if this is the case.
447  	 */
448  	for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
449  		dis_tgt_ehdr(current, &ehdr);
450  
451  		/*
452  		 * Eventually, this should probably live within libdisasm, and
453  		 * we should be able to disassemble targets from different
454  		 * architectures.  For now, we only support objects as the
455  		 * native machine type.
456  		 */
457  		switch (ehdr.e_machine) {
458  #ifdef __sparc
459  		case EM_SPARC:
460  			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
461  			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
462  				warn("invalid E_IDENT field for SPARC object");
463  				return;
464  			}
465  			g_flags |= DIS_SPARC_V8;
466  			break;
467  
468  		case EM_SPARC32PLUS:
469  			if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
470  			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
471  				warn("invalid E_IDENT field for SPARC object");
472  				return;
473  			}
474  
475  			switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) {
476  			case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
477  			    EF_SPARC_SUN_US3):
478  			case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1):
479  				g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
480  			default:
481  				g_flags |= DIS_SPARC_V9;
482  			}
483  			break;
484  
485  		case EM_SPARCV9:
486  			if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
487  			    ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
488  				warn("invalid E_IDENT field for SPARC object");
489  				return;
490  			}
491  
492  			g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
493  			break;
494  #endif /* __sparc */
495  
496  #if defined(__i386) || defined(__amd64)
497  		case EM_386:
498  			g_flags |= DIS_X86_SIZE32;
499  			break;
500  
501  		case EM_AMD64:
502  			g_flags |= DIS_X86_SIZE64;
503  			break;
504  #endif /* __i386 || __amd64 */
505  
506  		default:
507  			die("%s: unsupported ELF machine 0x%x", filename,
508  			    ehdr.e_machine);
509  		}
510  
511  		if (!g_quiet && dis_tgt_member(current) != NULL)
512  			(void) printf("\narchive member %s\n",
513  			    dis_tgt_member(current));
514  
515  		/*
516  		 * Instantiate a libdisasm handle based on the file type.
517  		 */
518  		if ((dhp = dis_handle_create(g_flags, current, do_lookup,
519  		    do_read)) == NULL)
520  			die("%s: failed to initialize disassembler: %s",
521  			    filename, dis_strerror(dis_errno()));
522  
523  		if (g_doall) {
524  			/*
525  			 * With no arguments, iterate over all sections and
526  			 * disassemble only those that contain text.
527  			 */
528  			dis_tgt_section_iter(current, dis_text_section, dhp);
529  		} else {
530  			callback_arg_t ca;
531  
532  			ca.ca_tgt = current;
533  			ca.ca_handle = dhp;
534  
535  			/*
536  			 * If sections or functions were explicitly specified,
537  			 * resolve those names against the object, and iterate
538  			 * over just the resulting data.
539  			 */
540  			sections = dis_namelist_resolve_sections(g_seclist,
541  			    current);
542  			functions = dis_namelist_resolve_functions(g_funclist,
543  			    current);
544  
545  			dis_scnlist_iter(sections, dis_named_section, &ca);
546  			dis_funclist_iter(functions, dis_named_function, &ca);
547  
548  			dis_scnlist_destroy(sections);
549  			dis_funclist_destroy(functions);
550  		}
551  
552  		dis_handle_destroy(dhp);
553  	}
554  
555  	dis_tgt_destroy(tgt);
556  }
557  
558  void
559  usage(void)
560  {
561  	(void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
562  	(void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
563  	exit(2);
564  }
565  
566  typedef struct lib_node {
567  	char *path;
568  	struct lib_node *next;
569  } lib_node_t;
570  
571  int
572  main(int argc, char **argv)
573  {
574  	int optchar;
575  	int i;
576  	lib_node_t *libs = NULL;
577  
578  	g_funclist = dis_namelist_create();
579  	g_seclist = dis_namelist_create();
580  
581  	while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
582  		switch (optchar) {
583  		case 'C':
584  			g_demangle = 1;
585  			break;
586  		case 'd':
587  			dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
588  			break;
589  		case 'D':
590  			dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
591  			break;
592  		case 'F':
593  			dis_namelist_add(g_funclist, optarg, 0);
594  			break;
595  		case 'l': {
596  			/*
597  			 * The '-l foo' option historically would attempt to
598  			 * disassemble '$LIBDIR/libfoo.a'.  The $LIBDIR
599  			 * environment variable has never been supported or
600  			 * documented for our linker.  However, until this
601  			 * option is formally EOLed, we have to support it.
602  			 */
603  			char *dir;
604  			lib_node_t *node;
605  			size_t len;
606  
607  			if ((dir = getenv("LIBDIR")) == NULL ||
608  			    dir[0] == '\0')
609  				dir = "/usr/lib";
610  			node = safe_malloc(sizeof (lib_node_t));
611  			len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
612  			node->path = safe_malloc(len);
613  
614  			(void) snprintf(node->path, len, "%s/lib%s.a", dir,
615  			    optarg);
616  			node->next = libs;
617  			libs = node;
618  			break;
619  		}
620  		case 'L':
621  			/*
622  			 * The '-L' option historically would attempt to read
623  			 * the .debug section of the target to determine source
624  			 * line information in order to annotate the output.
625  			 * No compiler has emitted these sections in many years,
626  			 * and the option has never done what it purported to
627  			 * do.  We silently consume the option for
628  			 * compatibility.
629  			 */
630  			break;
631  		case 'n':
632  			g_numeric = 1;
633  			break;
634  		case 'o':
635  			g_flags |= DIS_OCTAL;
636  			break;
637  		case 'q':
638  			g_quiet = 1;
639  			break;
640  		case 't':
641  			dis_namelist_add(g_seclist, optarg, DIS_TEXT);
642  			break;
643  		case 'V':
644  			(void) printf("Solaris disassembler version 1.0\n");
645  			return (0);
646  		default:
647  			usage();
648  			break;
649  		}
650  	}
651  
652  	argc -= optind;
653  	argv += optind;
654  
655  	if (argc == 0 && libs == NULL) {
656  		warn("no objects specified");
657  		usage();
658  	}
659  
660  	if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
661  		g_doall = 1;
662  
663  	/*
664  	 * See comment for 'l' option, above.
665  	 */
666  	while (libs != NULL) {
667  		lib_node_t *node = libs->next;
668  
669  		dis_file(libs->path);
670  		free(libs->path);
671  		free(libs);
672  		libs = node;
673  	}
674  
675  	for (i = 0; i < argc; i++)
676  		dis_file(argv[i]);
677  
678  	dis_namelist_destroy(g_funclist);
679  	dis_namelist_destroy(g_seclist);
680  
681  	return (g_error);
682  }
683