1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright 2011 Jason King. All rights reserved.
27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
28 */
29
30 #include <ctype.h>
31 #include <getopt.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/sysmacros.h>
36 #include <sys/elf_SPARC.h>
37
38 #include <libdisasm.h>
39
40 #include "dis_target.h"
41 #include "dis_util.h"
42 #include "dis_list.h"
43
44 int g_demangle; /* Demangle C++ names */
45 int g_quiet; /* Quiet mode */
46 int g_numeric; /* Numeric mode */
47 int g_flags; /* libdisasm language flags */
48 int g_doall; /* true if no functions or sections were given */
49
50 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
51 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
52
53 /*
54 * Section options for -d, -D, and -s
55 */
56 #define DIS_DATA_RELATIVE 1
57 #define DIS_DATA_ABSOLUTE 2
58 #define DIS_TEXT 3
59
60 /*
61 * libdisasm callback data. Keeps track of current data (function or section)
62 * and offset within that data.
63 */
64 typedef struct dis_buffer {
65 dis_tgt_t *db_tgt; /* current dis target */
66 void *db_data; /* function or section data */
67 uint64_t db_addr; /* address of function start */
68 size_t db_size; /* size of data */
69 uint64_t db_nextaddr; /* next address to be read */
70 } dis_buffer_t;
71
72 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
73
74 /*
75 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
76 * formatted symbol, based on the offset and current setttings.
77 */
78 void
getsymname(uint64_t addr,const char * symbol,off_t offset,char * buf,size_t buflen)79 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
80 size_t buflen)
81 {
82 if (symbol == NULL || g_numeric) {
83 if (g_flags & DIS_OCTAL)
84 (void) snprintf(buf, buflen, "0%llo", addr);
85 else
86 (void) snprintf(buf, buflen, "0x%llx", addr);
87 } else {
88 if (g_demangle)
89 symbol = dis_demangle(symbol);
90
91 if (offset == 0)
92 (void) snprintf(buf, buflen, "%s", symbol);
93 else if (g_flags & DIS_OCTAL)
94 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
95 else
96 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
97 }
98 }
99
100 /*
101 * Determine if we are on an architecture with fixed-size instructions,
102 * and if so, what size they are.
103 */
104 static int
insn_size(dis_handle_t * dhp)105 insn_size(dis_handle_t *dhp)
106 {
107 int min = dis_min_instrlen(dhp);
108 int max = dis_max_instrlen(dhp);
109
110 if (min == max)
111 return (min);
112
113 return (0);
114 }
115
116 /*
117 * The main disassembly routine. Given a fixed-sized buffer and starting
118 * address, disassemble the data using the supplied target and libdisasm handle.
119 */
120 void
dis_data(dis_tgt_t * tgt,dis_handle_t * dhp,uint64_t addr,void * data,size_t datalen)121 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
122 size_t datalen)
123 {
124 dis_buffer_t db = { 0 };
125 char buf[BUFSIZE];
126 char symbuf[BUFSIZE];
127 const char *symbol;
128 const char *last_symbol;
129 off_t symoffset;
130 int i;
131 int bytesperline;
132 size_t symsize;
133 int isfunc;
134 size_t symwidth = 0;
135 int ret;
136 int insz = insn_size(dhp);
137
138 db.db_tgt = tgt;
139 db.db_data = data;
140 db.db_addr = addr;
141 db.db_size = datalen;
142
143 dis_set_data(dhp, &db);
144
145 if ((bytesperline = dis_max_instrlen(dhp)) > 6)
146 bytesperline = 6;
147
148 symbol = NULL;
149
150 while (addr < db.db_addr + db.db_size) {
151
152 ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
153 if (ret != 0 && insz > 0) {
154 /*
155 * Since we know instructions are fixed size, we
156 * always know the address of the next instruction
157 */
158 (void) snprintf(buf, sizeof (buf),
159 "*** invalid opcode ***");
160 db.db_nextaddr = addr + insz;
161
162 } else if (ret != 0) {
163 off_t next;
164
165 (void) snprintf(buf, sizeof (buf),
166 "*** invalid opcode ***");
167
168 /*
169 * On architectures with variable sized instructions
170 * we have no way to figure out where the next
171 * instruction starts if we encounter an invalid
172 * instruction. Instead we print the rest of the
173 * instruction stream as hex until we reach the
174 * next valid symbol in the section.
175 */
176 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
177 db.db_nextaddr = db.db_addr + db.db_size;
178 } else {
179 if (next > db.db_size)
180 db.db_nextaddr = db.db_addr +
181 db.db_size;
182 else
183 db.db_nextaddr = addr + next;
184 }
185 }
186
187 /*
188 * Print out the line as:
189 *
190 * address: bytes text
191 *
192 * If there are more than 6 bytes in any given instruction,
193 * spread the bytes across two lines. We try to get symbolic
194 * information for the address, but if that fails we print out
195 * the numeric address instead.
196 *
197 * We try to keep the address portion of the text aligned at
198 * MINSYMWIDTH characters. If we are disassembling a function
199 * with a long name, this can be annoying. So we pick a width
200 * based on the maximum width that the current symbol can be.
201 * This at least produces text aligned within each function.
202 */
203 last_symbol = symbol;
204 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
205 &isfunc);
206 if (symbol == NULL) {
207 symbol = dis_find_section(tgt, addr, &symoffset);
208 symsize = symoffset;
209 }
210
211 if (symbol != last_symbol)
212 getsymname(addr, symbol, symsize, symbuf,
213 sizeof (symbuf));
214
215 symwidth = MAX(symwidth, strlen(symbuf));
216 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
217
218 /*
219 * If we've crossed a new function boundary, print out the
220 * function name on a blank line.
221 */
222 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
223 (void) printf("%s()\n", symbol);
224
225 (void) printf(" %s:%*s ", symbuf,
226 symwidth - strlen(symbuf), "");
227
228 /* print bytes */
229 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
230 i++) {
231 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
232 if (g_flags & DIS_OCTAL)
233 (void) printf("%03o ", byte);
234 else
235 (void) printf("%02x ", byte);
236 }
237
238 /* trailing spaces for missing bytes */
239 for (; i < bytesperline; i++) {
240 if (g_flags & DIS_OCTAL)
241 (void) printf(" ");
242 else
243 (void) printf(" ");
244 }
245
246 /* contents of disassembly */
247 (void) printf(" %s", buf);
248
249 /* excess bytes that spill over onto subsequent lines */
250 for (; i < db.db_nextaddr - addr; i++) {
251 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
252 if (i % bytesperline == 0)
253 (void) printf("\n %*s ", symwidth, "");
254 if (g_flags & DIS_OCTAL)
255 (void) printf("%03o ", byte);
256 else
257 (void) printf("%02x ", byte);
258 }
259
260 (void) printf("\n");
261
262 addr = db.db_nextaddr;
263 }
264 }
265
266 /*
267 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
268 * function, and convert the result using getsymname().
269 */
270 int
do_lookup(void * data,uint64_t addr,char * buf,size_t buflen,uint64_t * start,size_t * symlen)271 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
272 size_t *symlen)
273 {
274 dis_buffer_t *db = data;
275 const char *symbol;
276 off_t offset;
277 size_t size;
278
279 /*
280 * If NULL symbol is returned, getsymname takes care of
281 * printing appropriate address in buf instead of symbol.
282 */
283 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
284
285 if (buf != NULL)
286 getsymname(addr, symbol, offset, buf, buflen);
287
288 if (start != NULL)
289 *start = addr - offset;
290 if (symlen != NULL)
291 *symlen = size;
292
293 if (symbol == NULL)
294 return (-1);
295
296 return (0);
297 }
298
299 /*
300 * libdisasm wrapper around target reading. libdisasm will always read data
301 * in order, so update our current offset within the buffer appropriately.
302 * We only support reading from within the current object; libdisasm should
303 * never ask us to do otherwise.
304 */
305 int
do_read(void * data,uint64_t addr,void * buf,size_t len)306 do_read(void *data, uint64_t addr, void *buf, size_t len)
307 {
308 dis_buffer_t *db = data;
309 size_t offset;
310
311 if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
312 return (-1);
313
314 offset = addr - db->db_addr;
315 len = MIN(len, db->db_size - offset);
316
317 (void) memcpy(buf, (char *)db->db_data + offset, len);
318
319 db->db_nextaddr = addr + len;
320
321 return (len);
322 }
323
324 /*
325 * Routine to dump raw data in a human-readable format. Used by the -d and -D
326 * options. We model our output after the xxd(1) program, which gives nicely
327 * formatted output, along with an ASCII translation of the result.
328 */
329 void
dump_data(uint64_t addr,void * data,size_t datalen)330 dump_data(uint64_t addr, void *data, size_t datalen)
331 {
332 uintptr_t curaddr = addr & (~0xf);
333 uint8_t *bytes = data;
334 int i;
335 int width;
336
337 /*
338 * Determine if the address given to us fits in 32-bit range, in which
339 * case use a 4-byte width.
340 */
341 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
342 width = 8;
343 else
344 width = 16;
345
346 while (curaddr < addr + datalen) {
347 /*
348 * Display leading address
349 */
350 (void) printf("%0*x: ", width, curaddr);
351
352 /*
353 * Print out data in two-byte chunks. If the current address
354 * is before the starting address or after the end of the
355 * section, print spaces.
356 */
357 for (i = 0; i < 16; i++) {
358 if (curaddr + i < addr ||curaddr + i >= addr + datalen)
359 (void) printf(" ");
360 else
361 (void) printf("%02x",
362 bytes[curaddr + i - addr]);
363
364 if (i & 1)
365 (void) printf(" ");
366 }
367
368 (void) printf(" ");
369
370 /*
371 * Print out the ASCII representation
372 */
373 for (i = 0; i < 16; i++) {
374 if (curaddr + i < addr ||
375 curaddr + i >= addr + datalen) {
376 (void) printf(" ");
377 } else {
378 uint8_t byte = bytes[curaddr + i - addr];
379 if (isprint(byte))
380 (void) printf("%c", byte);
381 else
382 (void) printf(".");
383 }
384 }
385
386 (void) printf("\n");
387
388 curaddr += 16;
389 }
390 }
391
392 /*
393 * Disassemble a section implicitly specified as part of a file. This function
394 * is called for all sections when no other flags are specified. We ignore any
395 * data sections, and print out only those sections containing text.
396 */
397 void
dis_text_section(dis_tgt_t * tgt,dis_scn_t * scn,void * data)398 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
399 {
400 dis_handle_t *dhp = data;
401
402 /* ignore data sections */
403 if (!dis_section_istext(scn))
404 return;
405
406 if (!g_quiet)
407 (void) printf("\nsection %s\n", dis_section_name(scn));
408
409 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
410 dis_section_size(scn));
411 }
412
413 /*
414 * Structure passed to dis_named_{section,function} which keeps track of both
415 * the target and the libdisasm handle.
416 */
417 typedef struct callback_arg {
418 dis_tgt_t *ca_tgt;
419 dis_handle_t *ca_handle;
420 } callback_arg_t;
421
422 /*
423 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
424 * argument contains the type of argument given. Pass the data onto the
425 * appropriate helper routine.
426 */
427 void
dis_named_section(dis_scn_t * scn,int type,void * data)428 dis_named_section(dis_scn_t *scn, int type, void *data)
429 {
430 callback_arg_t *ca = data;
431
432 if (!g_quiet)
433 (void) printf("\nsection %s\n", dis_section_name(scn));
434
435 switch (type) {
436 case DIS_DATA_RELATIVE:
437 dump_data(0, dis_section_data(scn), dis_section_size(scn));
438 break;
439 case DIS_DATA_ABSOLUTE:
440 dump_data(dis_section_addr(scn), dis_section_data(scn),
441 dis_section_size(scn));
442 break;
443 case DIS_TEXT:
444 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
445 dis_section_data(scn), dis_section_size(scn));
446 break;
447 }
448 }
449
450 /*
451 * Disassemble a function explicitly specified with '-F'. The 'type' argument
452 * is unused.
453 */
454 /* ARGSUSED */
455 void
dis_named_function(dis_func_t * func,int type,void * data)456 dis_named_function(dis_func_t *func, int type, void *data)
457 {
458 callback_arg_t *ca = data;
459
460 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
461 dis_function_data(func), dis_function_size(func));
462 }
463
464 /*
465 * Disassemble a complete file. First, we determine the type of the file based
466 * on the ELF machine type, and instantiate a version of the disassembler
467 * appropriate for the file. We then resolve any named sections or functions
468 * against the file, and iterate over the results (or all sections if no flags
469 * were specified).
470 */
471 void
dis_file(const char * filename)472 dis_file(const char *filename)
473 {
474 dis_tgt_t *tgt, *current;
475 dis_scnlist_t *sections;
476 dis_funclist_t *functions;
477 dis_handle_t *dhp;
478 GElf_Ehdr ehdr;
479
480 /*
481 * First, initialize the target
482 */
483 if ((tgt = dis_tgt_create(filename)) == NULL)
484 return;
485
486 if (!g_quiet)
487 (void) printf("disassembly for %s\n\n", filename);
488
489 /*
490 * A given file may contain multiple targets (if it is an archive, for
491 * example). We iterate over all possible targets if this is the case.
492 */
493 for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
494 dis_tgt_ehdr(current, &ehdr);
495
496 /*
497 * Eventually, this should probably live within libdisasm, and
498 * we should be able to disassemble targets from different
499 * architectures. For now, we only support objects as the
500 * native machine type.
501 */
502 switch (ehdr.e_machine) {
503 case EM_SPARC:
504 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
505 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
506 warn("invalid E_IDENT field for SPARC object");
507 return;
508 }
509 g_flags |= DIS_SPARC_V8;
510 break;
511
512 case EM_SPARC32PLUS:
513 {
514 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
515
516 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
517 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
518 warn("invalid E_IDENT field for SPARC object");
519 return;
520 }
521
522 if (flags != 0 &&
523 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
524 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
525 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
526 else
527 g_flags |= DIS_SPARC_V9;
528 break;
529 }
530
531 case EM_SPARCV9:
532 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
533 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
534 warn("invalid E_IDENT field for SPARC object");
535 return;
536 }
537
538 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
539 break;
540
541 case EM_386:
542 g_flags |= DIS_X86_SIZE32;
543 break;
544
545 case EM_AMD64:
546 g_flags |= DIS_X86_SIZE64;
547 break;
548
549 default:
550 die("%s: unsupported ELF machine 0x%x", filename,
551 ehdr.e_machine);
552 }
553
554 /*
555 * If ET_REL (.o), printing immediate symbols is likely to
556 * result in garbage, as symbol lookups on unrelocated
557 * immediates find false and useless matches.
558 */
559
560 if (ehdr.e_type == ET_REL)
561 g_flags |= DIS_NOIMMSYM;
562
563 if (!g_quiet && dis_tgt_member(current) != NULL)
564 (void) printf("\narchive member %s\n",
565 dis_tgt_member(current));
566
567 /*
568 * Instantiate a libdisasm handle based on the file type.
569 */
570 if ((dhp = dis_handle_create(g_flags, current, do_lookup,
571 do_read)) == NULL)
572 die("%s: failed to initialize disassembler: %s",
573 filename, dis_strerror(dis_errno()));
574
575 if (g_doall) {
576 /*
577 * With no arguments, iterate over all sections and
578 * disassemble only those that contain text.
579 */
580 dis_tgt_section_iter(current, dis_text_section, dhp);
581 } else {
582 callback_arg_t ca;
583
584 ca.ca_tgt = current;
585 ca.ca_handle = dhp;
586
587 /*
588 * If sections or functions were explicitly specified,
589 * resolve those names against the object, and iterate
590 * over just the resulting data.
591 */
592 sections = dis_namelist_resolve_sections(g_seclist,
593 current);
594 functions = dis_namelist_resolve_functions(g_funclist,
595 current);
596
597 dis_scnlist_iter(sections, dis_named_section, &ca);
598 dis_funclist_iter(functions, dis_named_function, &ca);
599
600 dis_scnlist_destroy(sections);
601 dis_funclist_destroy(functions);
602 }
603
604 dis_handle_destroy(dhp);
605 }
606
607 dis_tgt_destroy(tgt);
608 }
609
610 void
usage(void)611 usage(void)
612 {
613 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
614 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
615 exit(2);
616 }
617
618 typedef struct lib_node {
619 char *path;
620 struct lib_node *next;
621 } lib_node_t;
622
623 int
main(int argc,char ** argv)624 main(int argc, char **argv)
625 {
626 int optchar;
627 int i;
628 lib_node_t *libs = NULL;
629
630 g_funclist = dis_namelist_create();
631 g_seclist = dis_namelist_create();
632
633 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
634 switch (optchar) {
635 case 'C':
636 g_demangle = 1;
637 break;
638 case 'd':
639 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
640 break;
641 case 'D':
642 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
643 break;
644 case 'F':
645 dis_namelist_add(g_funclist, optarg, 0);
646 break;
647 case 'l': {
648 /*
649 * The '-l foo' option historically would attempt to
650 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
651 * environment variable has never been supported or
652 * documented for our linker. However, until this
653 * option is formally EOLed, we have to support it.
654 */
655 char *dir;
656 lib_node_t *node;
657 size_t len;
658
659 if ((dir = getenv("LIBDIR")) == NULL ||
660 dir[0] == '\0')
661 dir = "/usr/lib";
662 node = safe_malloc(sizeof (lib_node_t));
663 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
664 node->path = safe_malloc(len);
665
666 (void) snprintf(node->path, len, "%s/lib%s.a", dir,
667 optarg);
668 node->next = libs;
669 libs = node;
670 break;
671 }
672 case 'L':
673 /*
674 * The '-L' option historically would attempt to read
675 * the .debug section of the target to determine source
676 * line information in order to annotate the output.
677 * No compiler has emitted these sections in many years,
678 * and the option has never done what it purported to
679 * do. We silently consume the option for
680 * compatibility.
681 */
682 break;
683 case 'n':
684 g_numeric = 1;
685 break;
686 case 'o':
687 g_flags |= DIS_OCTAL;
688 break;
689 case 'q':
690 g_quiet = 1;
691 break;
692 case 't':
693 dis_namelist_add(g_seclist, optarg, DIS_TEXT);
694 break;
695 case 'V':
696 (void) printf("Solaris disassembler version 1.0\n");
697 return (0);
698 default:
699 usage();
700 break;
701 }
702 }
703
704 argc -= optind;
705 argv += optind;
706
707 if (argc == 0 && libs == NULL) {
708 warn("no objects specified");
709 usage();
710 }
711
712 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
713 g_doall = 1;
714
715 /*
716 * See comment for 'l' option, above.
717 */
718 while (libs != NULL) {
719 lib_node_t *node = libs->next;
720
721 dis_file(libs->path);
722 free(libs->path);
723 free(libs);
724 libs = node;
725 }
726
727 for (i = 0; i < argc; i++)
728 dis_file(argv[i]);
729
730 dis_namelist_destroy(g_funclist);
731 dis_namelist_destroy(g_seclist);
732
733 return (g_error);
734 }
735