1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2011 Jason King. All rights reserved. 27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org> 28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> 29 */ 30 31 #include <ctype.h> 32 #include <getopt.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <sys/sysmacros.h> 37 #include <sys/elf_SPARC.h> 38 39 #include <libdisasm.h> 40 41 #include "dis_target.h" 42 #include "dis_util.h" 43 #include "dis_list.h" 44 45 int g_demangle; /* Demangle C++ names */ 46 int g_quiet; /* Quiet mode */ 47 int g_numeric; /* Numeric mode */ 48 int g_flags; /* libdisasm language flags */ 49 int g_doall; /* true if no functions or sections were given */ 50 51 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 52 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 53 54 /* 55 * Section options for -d, -D, and -s 56 */ 57 #define DIS_DATA_RELATIVE 1 58 #define DIS_DATA_ABSOLUTE 2 59 #define DIS_TEXT 3 60 61 /* 62 * libdisasm callback data. Keeps track of current data (function or section) 63 * and offset within that data. 64 */ 65 typedef struct dis_buffer { 66 dis_tgt_t *db_tgt; /* current dis target */ 67 void *db_data; /* function or section data */ 68 uint64_t db_addr; /* address of function start */ 69 size_t db_size; /* size of data */ 70 uint64_t db_nextaddr; /* next address to be read */ 71 } dis_buffer_t; 72 73 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 74 75 /* 76 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 77 * formatted symbol, based on the offset and current setttings. 78 */ 79 void 80 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 81 size_t buflen) 82 { 83 if (symbol == NULL || g_numeric) { 84 if (g_flags & DIS_OCTAL) 85 (void) snprintf(buf, buflen, "0%llo", addr); 86 else 87 (void) snprintf(buf, buflen, "0x%llx", addr); 88 } else { 89 if (g_demangle) 90 symbol = dis_demangle(symbol); 91 92 if (offset == 0) 93 (void) snprintf(buf, buflen, "%s", symbol); 94 else if (g_flags & DIS_OCTAL) 95 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 96 else 97 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 98 } 99 } 100 101 /* 102 * Determine if we are on an architecture with fixed-size instructions, 103 * and if so, what size they are. 104 */ 105 static int 106 insn_size(dis_handle_t *dhp) 107 { 108 int min = dis_min_instrlen(dhp); 109 int max = dis_max_instrlen(dhp); 110 111 if (min == max) 112 return (min); 113 114 return (0); 115 } 116 117 /* 118 * The main disassembly routine. Given a fixed-sized buffer and starting 119 * address, disassemble the data using the supplied target and libdisasm handle. 120 */ 121 void 122 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 123 size_t datalen) 124 { 125 dis_buffer_t db = { 0 }; 126 char buf[BUFSIZE]; 127 char symbuf[BUFSIZE]; 128 const char *symbol; 129 const char *last_symbol; 130 off_t symoffset; 131 int i; 132 int bytesperline; 133 size_t symsize; 134 int isfunc; 135 size_t symwidth = 0; 136 int ret; 137 int insz = insn_size(dhp); 138 139 db.db_tgt = tgt; 140 db.db_data = data; 141 db.db_addr = addr; 142 db.db_size = datalen; 143 144 dis_set_data(dhp, &db); 145 146 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 147 bytesperline = 6; 148 149 symbol = NULL; 150 151 while (addr < db.db_addr + db.db_size) { 152 153 ret = dis_disassemble(dhp, addr, buf, BUFSIZE); 154 if (ret != 0 && insz > 0) { 155 /* 156 * Since we know instructions are fixed size, we 157 * always know the address of the next instruction 158 */ 159 (void) snprintf(buf, sizeof (buf), 160 "*** invalid opcode ***"); 161 db.db_nextaddr = addr + insz; 162 163 } else if (ret != 0) { 164 off_t next; 165 166 (void) snprintf(buf, sizeof (buf), 167 "*** invalid opcode ***"); 168 169 /* 170 * On architectures with variable sized instructions 171 * we have no way to figure out where the next 172 * instruction starts if we encounter an invalid 173 * instruction. Instead we print the rest of the 174 * instruction stream as hex until we reach the 175 * next valid symbol in the section. 176 */ 177 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 178 db.db_nextaddr = db.db_addr + db.db_size; 179 } else { 180 if (next > db.db_size) 181 db.db_nextaddr = db.db_addr + 182 db.db_size; 183 else 184 db.db_nextaddr = addr + next; 185 } 186 } 187 188 /* 189 * Print out the line as: 190 * 191 * address: bytes text 192 * 193 * If there are more than 6 bytes in any given instruction, 194 * spread the bytes across two lines. We try to get symbolic 195 * information for the address, but if that fails we print out 196 * the numeric address instead. 197 * 198 * We try to keep the address portion of the text aligned at 199 * MINSYMWIDTH characters. If we are disassembling a function 200 * with a long name, this can be annoying. So we pick a width 201 * based on the maximum width that the current symbol can be. 202 * This at least produces text aligned within each function. 203 */ 204 last_symbol = symbol; 205 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 206 &isfunc); 207 if (symbol == NULL) { 208 symbol = dis_find_section(tgt, addr, &symoffset); 209 symsize = symoffset; 210 } 211 212 if (symbol != last_symbol) 213 getsymname(addr, symbol, symsize, symbuf, 214 sizeof (symbuf)); 215 216 symwidth = MAX(symwidth, strlen(symbuf)); 217 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 218 219 /* 220 * If we've crossed a new function boundary, print out the 221 * function name on a blank line. 222 */ 223 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 224 (void) printf("%s()\n", symbol); 225 226 (void) printf(" %s:%*s ", symbuf, 227 symwidth - strlen(symbuf), ""); 228 229 /* print bytes */ 230 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 231 i++) { 232 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 233 if (g_flags & DIS_OCTAL) 234 (void) printf("%03o ", byte); 235 else 236 (void) printf("%02x ", byte); 237 } 238 239 /* trailing spaces for missing bytes */ 240 for (; i < bytesperline; i++) { 241 if (g_flags & DIS_OCTAL) 242 (void) printf(" "); 243 else 244 (void) printf(" "); 245 } 246 247 /* contents of disassembly */ 248 (void) printf(" %s", buf); 249 250 /* excess bytes that spill over onto subsequent lines */ 251 for (; i < db.db_nextaddr - addr; i++) { 252 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 253 if (i % bytesperline == 0) 254 (void) printf("\n %*s ", symwidth, ""); 255 if (g_flags & DIS_OCTAL) 256 (void) printf("%03o ", byte); 257 else 258 (void) printf("%02x ", byte); 259 } 260 261 (void) printf("\n"); 262 263 addr = db.db_nextaddr; 264 } 265 } 266 267 /* 268 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 269 * function, and convert the result using getsymname(). 270 */ 271 int 272 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 273 size_t *symlen) 274 { 275 dis_buffer_t *db = data; 276 const char *symbol; 277 off_t offset; 278 size_t size; 279 280 /* 281 * If NULL symbol is returned, getsymname takes care of 282 * printing appropriate address in buf instead of symbol. 283 */ 284 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 285 286 if (buf != NULL) 287 getsymname(addr, symbol, offset, buf, buflen); 288 289 if (start != NULL) 290 *start = addr - offset; 291 if (symlen != NULL) 292 *symlen = size; 293 294 if (symbol == NULL) 295 return (-1); 296 297 return (0); 298 } 299 300 /* 301 * libdisasm wrapper around target reading. libdisasm will always read data 302 * in order, so update our current offset within the buffer appropriately. 303 * We only support reading from within the current object; libdisasm should 304 * never ask us to do otherwise. 305 */ 306 int 307 do_read(void *data, uint64_t addr, void *buf, size_t len) 308 { 309 dis_buffer_t *db = data; 310 size_t offset; 311 312 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 313 return (-1); 314 315 offset = addr - db->db_addr; 316 len = MIN(len, db->db_size - offset); 317 318 (void) memcpy(buf, (char *)db->db_data + offset, len); 319 320 db->db_nextaddr = addr + len; 321 322 return (len); 323 } 324 325 /* 326 * Routine to dump raw data in a human-readable format. Used by the -d and -D 327 * options. We model our output after the xxd(1) program, which gives nicely 328 * formatted output, along with an ASCII translation of the result. 329 */ 330 void 331 dump_data(uint64_t addr, void *data, size_t datalen) 332 { 333 uintptr_t curaddr = addr & (~0xf); 334 uint8_t *bytes = data; 335 int i; 336 int width; 337 338 /* 339 * Determine if the address given to us fits in 32-bit range, in which 340 * case use a 4-byte width. 341 */ 342 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 343 width = 8; 344 else 345 width = 16; 346 347 while (curaddr < addr + datalen) { 348 /* 349 * Display leading address 350 */ 351 (void) printf("%0*x: ", width, curaddr); 352 353 /* 354 * Print out data in two-byte chunks. If the current address 355 * is before the starting address or after the end of the 356 * section, print spaces. 357 */ 358 for (i = 0; i < 16; i++) { 359 if (curaddr + i < addr ||curaddr + i >= addr + datalen) 360 (void) printf(" "); 361 else 362 (void) printf("%02x", 363 bytes[curaddr + i - addr]); 364 365 if (i & 1) 366 (void) printf(" "); 367 } 368 369 (void) printf(" "); 370 371 /* 372 * Print out the ASCII representation 373 */ 374 for (i = 0; i < 16; i++) { 375 if (curaddr + i < addr || 376 curaddr + i >= addr + datalen) { 377 (void) printf(" "); 378 } else { 379 uint8_t byte = bytes[curaddr + i - addr]; 380 if (isprint(byte)) 381 (void) printf("%c", byte); 382 else 383 (void) printf("."); 384 } 385 } 386 387 (void) printf("\n"); 388 389 curaddr += 16; 390 } 391 } 392 393 /* 394 * Disassemble a section implicitly specified as part of a file. This function 395 * is called for all sections when no other flags are specified. We ignore any 396 * data sections, and print out only those sections containing text. 397 */ 398 void 399 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 400 { 401 dis_handle_t *dhp = data; 402 403 /* ignore data sections */ 404 if (!dis_section_istext(scn)) 405 return; 406 407 if (!g_quiet) 408 (void) printf("\nsection %s\n", dis_section_name(scn)); 409 410 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 411 dis_section_size(scn)); 412 } 413 414 /* 415 * Structure passed to dis_named_{section,function} which keeps track of both 416 * the target and the libdisasm handle. 417 */ 418 typedef struct callback_arg { 419 dis_tgt_t *ca_tgt; 420 dis_handle_t *ca_handle; 421 } callback_arg_t; 422 423 /* 424 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 425 * argument contains the type of argument given. Pass the data onto the 426 * appropriate helper routine. 427 */ 428 void 429 dis_named_section(dis_scn_t *scn, int type, void *data) 430 { 431 callback_arg_t *ca = data; 432 433 if (!g_quiet) 434 (void) printf("\nsection %s\n", dis_section_name(scn)); 435 436 switch (type) { 437 case DIS_DATA_RELATIVE: 438 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 439 break; 440 case DIS_DATA_ABSOLUTE: 441 dump_data(dis_section_addr(scn), dis_section_data(scn), 442 dis_section_size(scn)); 443 break; 444 case DIS_TEXT: 445 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 446 dis_section_data(scn), dis_section_size(scn)); 447 break; 448 } 449 } 450 451 /* 452 * Disassemble a function explicitly specified with '-F'. The 'type' argument 453 * is unused. 454 */ 455 /* ARGSUSED */ 456 void 457 dis_named_function(dis_func_t *func, int type, void *data) 458 { 459 callback_arg_t *ca = data; 460 461 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 462 dis_function_data(func), dis_function_size(func)); 463 } 464 465 /* 466 * Disassemble a complete file. First, we determine the type of the file based 467 * on the ELF machine type, and instantiate a version of the disassembler 468 * appropriate for the file. We then resolve any named sections or functions 469 * against the file, and iterate over the results (or all sections if no flags 470 * were specified). 471 */ 472 void 473 dis_file(const char *filename) 474 { 475 dis_tgt_t *tgt, *current; 476 dis_scnlist_t *sections; 477 dis_funclist_t *functions; 478 dis_handle_t *dhp; 479 GElf_Ehdr ehdr; 480 481 /* 482 * First, initialize the target 483 */ 484 if ((tgt = dis_tgt_create(filename)) == NULL) 485 return; 486 487 if (!g_quiet) 488 (void) printf("disassembly for %s\n\n", filename); 489 490 /* 491 * A given file may contain multiple targets (if it is an archive, for 492 * example). We iterate over all possible targets if this is the case. 493 */ 494 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 495 dis_tgt_ehdr(current, &ehdr); 496 497 /* 498 * Eventually, this should probably live within libdisasm, and 499 * we should be able to disassemble targets from different 500 * architectures. For now, we only support objects as the 501 * native machine type. 502 */ 503 switch (ehdr.e_machine) { 504 case EM_SPARC: 505 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 506 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 507 warn("invalid E_IDENT field for SPARC object"); 508 return; 509 } 510 g_flags |= DIS_SPARC_V8; 511 break; 512 513 case EM_SPARC32PLUS: 514 { 515 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK; 516 517 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 518 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 519 warn("invalid E_IDENT field for SPARC object"); 520 return; 521 } 522 523 if (flags != 0 && 524 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 525 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS) 526 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 527 else 528 g_flags |= DIS_SPARC_V9; 529 break; 530 } 531 532 case EM_SPARCV9: 533 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 534 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 535 warn("invalid E_IDENT field for SPARC object"); 536 return; 537 } 538 539 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 540 break; 541 542 case EM_386: 543 g_flags |= DIS_X86_SIZE32; 544 break; 545 546 case EM_AMD64: 547 g_flags |= DIS_X86_SIZE64; 548 break; 549 550 case EM_S370: 551 g_flags |= DIS_S370; 552 553 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 554 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 555 warn("invalid E_IDENT field for S370 object"); 556 return; 557 } 558 break; 559 560 case EM_S390: 561 /* 562 * Both 390 and z/Architecture use EM_S390, the only 563 * differences is the class: ELFCLASS32 for plain 564 * old s390 and ELFCLASS64 for z/Architecture (aka. 565 * s390x). 566 */ 567 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 568 g_flags |= DIS_S390_31; 569 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 570 g_flags |= DIS_S390_64; 571 } else { 572 warn("invalid E_IDENT field for S390 object"); 573 return; 574 } 575 576 if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 577 warn("invalid E_IDENT field for S390 object"); 578 return; 579 } 580 break; 581 582 default: 583 die("%s: unsupported ELF machine 0x%x", filename, 584 ehdr.e_machine); 585 } 586 587 /* 588 * If ET_REL (.o), printing immediate symbols is likely to 589 * result in garbage, as symbol lookups on unrelocated 590 * immediates find false and useless matches. 591 */ 592 593 if (ehdr.e_type == ET_REL) 594 g_flags |= DIS_NOIMMSYM; 595 596 if (!g_quiet && dis_tgt_member(current) != NULL) 597 (void) printf("\narchive member %s\n", 598 dis_tgt_member(current)); 599 600 /* 601 * Instantiate a libdisasm handle based on the file type. 602 */ 603 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 604 do_read)) == NULL) 605 die("%s: failed to initialize disassembler: %s", 606 filename, dis_strerror(dis_errno())); 607 608 if (g_doall) { 609 /* 610 * With no arguments, iterate over all sections and 611 * disassemble only those that contain text. 612 */ 613 dis_tgt_section_iter(current, dis_text_section, dhp); 614 } else { 615 callback_arg_t ca; 616 617 ca.ca_tgt = current; 618 ca.ca_handle = dhp; 619 620 /* 621 * If sections or functions were explicitly specified, 622 * resolve those names against the object, and iterate 623 * over just the resulting data. 624 */ 625 sections = dis_namelist_resolve_sections(g_seclist, 626 current); 627 functions = dis_namelist_resolve_functions(g_funclist, 628 current); 629 630 dis_scnlist_iter(sections, dis_named_section, &ca); 631 dis_funclist_iter(functions, dis_named_function, &ca); 632 633 dis_scnlist_destroy(sections); 634 dis_funclist_destroy(functions); 635 } 636 637 dis_handle_destroy(dhp); 638 } 639 640 dis_tgt_destroy(tgt); 641 } 642 643 void 644 usage(void) 645 { 646 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 647 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 648 exit(2); 649 } 650 651 typedef struct lib_node { 652 char *path; 653 struct lib_node *next; 654 } lib_node_t; 655 656 int 657 main(int argc, char **argv) 658 { 659 int optchar; 660 int i; 661 lib_node_t *libs = NULL; 662 663 g_funclist = dis_namelist_create(); 664 g_seclist = dis_namelist_create(); 665 666 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 667 switch (optchar) { 668 case 'C': 669 g_demangle = 1; 670 break; 671 case 'd': 672 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 673 break; 674 case 'D': 675 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 676 break; 677 case 'F': 678 dis_namelist_add(g_funclist, optarg, 0); 679 break; 680 case 'l': { 681 /* 682 * The '-l foo' option historically would attempt to 683 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 684 * environment variable has never been supported or 685 * documented for our linker. However, until this 686 * option is formally EOLed, we have to support it. 687 */ 688 char *dir; 689 lib_node_t *node; 690 size_t len; 691 692 if ((dir = getenv("LIBDIR")) == NULL || 693 dir[0] == '\0') 694 dir = "/usr/lib"; 695 node = safe_malloc(sizeof (lib_node_t)); 696 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 697 node->path = safe_malloc(len); 698 699 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 700 optarg); 701 node->next = libs; 702 libs = node; 703 break; 704 } 705 case 'L': 706 /* 707 * The '-L' option historically would attempt to read 708 * the .debug section of the target to determine source 709 * line information in order to annotate the output. 710 * No compiler has emitted these sections in many years, 711 * and the option has never done what it purported to 712 * do. We silently consume the option for 713 * compatibility. 714 */ 715 break; 716 case 'n': 717 g_numeric = 1; 718 break; 719 case 'o': 720 g_flags |= DIS_OCTAL; 721 break; 722 case 'q': 723 g_quiet = 1; 724 break; 725 case 't': 726 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 727 break; 728 case 'V': 729 (void) printf("Solaris disassembler version 1.0\n"); 730 return (0); 731 default: 732 usage(); 733 break; 734 } 735 } 736 737 argc -= optind; 738 argv += optind; 739 740 if (argc == 0 && libs == NULL) { 741 warn("no objects specified"); 742 usage(); 743 } 744 745 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 746 g_doall = 1; 747 748 /* 749 * See comment for 'l' option, above. 750 */ 751 while (libs != NULL) { 752 lib_node_t *node = libs->next; 753 754 dis_file(libs->path); 755 free(libs->path); 756 free(libs); 757 libs = node; 758 } 759 760 for (i = 0; i < argc; i++) 761 dis_file(argv[i]); 762 763 dis_namelist_destroy(g_funclist); 764 dis_namelist_destroy(g_seclist); 765 766 return (g_error); 767 } 768