1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2011 Jason King. All rights reserved. 27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org> 28 */ 29 30 #include <ctype.h> 31 #include <getopt.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <sys/sysmacros.h> 36 #include <sys/elf_SPARC.h> 37 38 #include <libdisasm.h> 39 40 #include "dis_target.h" 41 #include "dis_util.h" 42 #include "dis_list.h" 43 44 int g_demangle; /* Demangle C++ names */ 45 int g_quiet; /* Quiet mode */ 46 int g_numeric; /* Numeric mode */ 47 int g_flags; /* libdisasm language flags */ 48 int g_doall; /* true if no functions or sections were given */ 49 50 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 51 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 52 53 /* 54 * Section options for -d, -D, and -s 55 */ 56 #define DIS_DATA_RELATIVE 1 57 #define DIS_DATA_ABSOLUTE 2 58 #define DIS_TEXT 3 59 60 /* 61 * libdisasm callback data. Keeps track of current data (function or section) 62 * and offset within that data. 63 */ 64 typedef struct dis_buffer { 65 dis_tgt_t *db_tgt; /* current dis target */ 66 void *db_data; /* function or section data */ 67 uint64_t db_addr; /* address of function start */ 68 size_t db_size; /* size of data */ 69 uint64_t db_nextaddr; /* next address to be read */ 70 } dis_buffer_t; 71 72 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 73 74 /* 75 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 76 * formatted symbol, based on the offset and current setttings. 77 */ 78 void 79 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 80 size_t buflen) 81 { 82 if (symbol == NULL || g_numeric) { 83 if (g_flags & DIS_OCTAL) 84 (void) snprintf(buf, buflen, "0%llo", addr); 85 else 86 (void) snprintf(buf, buflen, "0x%llx", addr); 87 } else { 88 if (g_demangle) 89 symbol = dis_demangle(symbol); 90 91 if (offset == 0) 92 (void) snprintf(buf, buflen, "%s", symbol); 93 else if (g_flags & DIS_OCTAL) 94 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 95 else 96 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 97 } 98 } 99 100 /* 101 * Determine if we are on an architecture with fixed-size instructions, 102 * and if so, what size they are. 103 */ 104 static int 105 insn_size(dis_handle_t *dhp) 106 { 107 int min = dis_min_instrlen(dhp); 108 int max = dis_max_instrlen(dhp); 109 110 if (min == max) 111 return (min); 112 113 return (0); 114 } 115 116 /* 117 * The main disassembly routine. Given a fixed-sized buffer and starting 118 * address, disassemble the data using the supplied target and libdisasm handle. 119 */ 120 void 121 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 122 size_t datalen) 123 { 124 dis_buffer_t db = { 0 }; 125 char buf[BUFSIZE]; 126 char symbuf[BUFSIZE]; 127 const char *symbol; 128 const char *last_symbol; 129 off_t symoffset; 130 int i; 131 int bytesperline; 132 size_t symsize; 133 int isfunc; 134 size_t symwidth = 0; 135 int ret; 136 int insz = insn_size(dhp); 137 138 db.db_tgt = tgt; 139 db.db_data = data; 140 db.db_addr = addr; 141 db.db_size = datalen; 142 143 dis_set_data(dhp, &db); 144 145 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 146 bytesperline = 6; 147 148 symbol = NULL; 149 150 while (addr < db.db_addr + db.db_size) { 151 152 ret = dis_disassemble(dhp, addr, buf, BUFSIZE); 153 if (ret != 0 && insz > 0) { 154 /* 155 * Since we know instructions are fixed size, we 156 * always know the address of the next instruction 157 */ 158 (void) snprintf(buf, sizeof (buf), 159 "*** invalid opcode ***"); 160 db.db_nextaddr = addr + insz; 161 162 } else if (ret != 0) { 163 off_t next; 164 165 (void) snprintf(buf, sizeof (buf), 166 "*** invalid opcode ***"); 167 168 /* 169 * On architectures with variable sized instructions 170 * we have no way to figure out where the next 171 * instruction starts if we encounter an invalid 172 * instruction. Instead we print the rest of the 173 * instruction stream as hex until we reach the 174 * next valid symbol in the section. 175 */ 176 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 177 db.db_nextaddr = db.db_addr + db.db_size; 178 } else { 179 if (next > db.db_size) 180 db.db_nextaddr = db.db_addr + 181 db.db_size; 182 else 183 db.db_nextaddr = addr + next; 184 } 185 } 186 187 /* 188 * Print out the line as: 189 * 190 * address: bytes text 191 * 192 * If there are more than 6 bytes in any given instruction, 193 * spread the bytes across two lines. We try to get symbolic 194 * information for the address, but if that fails we print out 195 * the numeric address instead. 196 * 197 * We try to keep the address portion of the text aligned at 198 * MINSYMWIDTH characters. If we are disassembling a function 199 * with a long name, this can be annoying. So we pick a width 200 * based on the maximum width that the current symbol can be. 201 * This at least produces text aligned within each function. 202 */ 203 last_symbol = symbol; 204 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 205 &isfunc); 206 if (symbol == NULL) { 207 symbol = dis_find_section(tgt, addr, &symoffset); 208 symsize = symoffset; 209 } 210 211 if (symbol != last_symbol) 212 getsymname(addr, symbol, symsize, symbuf, 213 sizeof (symbuf)); 214 215 symwidth = MAX(symwidth, strlen(symbuf)); 216 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 217 218 /* 219 * If we've crossed a new function boundary, print out the 220 * function name on a blank line. 221 */ 222 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 223 (void) printf("%s()\n", symbol); 224 225 (void) printf(" %s:%*s ", symbuf, 226 symwidth - strlen(symbuf), ""); 227 228 /* print bytes */ 229 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 230 i++) { 231 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 232 if (g_flags & DIS_OCTAL) 233 (void) printf("%03o ", byte); 234 else 235 (void) printf("%02x ", byte); 236 } 237 238 /* trailing spaces for missing bytes */ 239 for (; i < bytesperline; i++) { 240 if (g_flags & DIS_OCTAL) 241 (void) printf(" "); 242 else 243 (void) printf(" "); 244 } 245 246 /* contents of disassembly */ 247 (void) printf(" %s", buf); 248 249 /* excess bytes that spill over onto subsequent lines */ 250 for (; i < db.db_nextaddr - addr; i++) { 251 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 252 if (i % bytesperline == 0) 253 (void) printf("\n %*s ", symwidth, ""); 254 if (g_flags & DIS_OCTAL) 255 (void) printf("%03o ", byte); 256 else 257 (void) printf("%02x ", byte); 258 } 259 260 (void) printf("\n"); 261 262 addr = db.db_nextaddr; 263 } 264 } 265 266 /* 267 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 268 * function, and convert the result using getsymname(). 269 */ 270 int 271 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 272 size_t *symlen) 273 { 274 dis_buffer_t *db = data; 275 const char *symbol; 276 off_t offset; 277 size_t size; 278 279 /* 280 * If NULL symbol is returned, getsymname takes care of 281 * printing appropriate address in buf instead of symbol. 282 */ 283 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 284 285 if (buf != NULL) 286 getsymname(addr, symbol, offset, buf, buflen); 287 288 if (start != NULL) 289 *start = addr - offset; 290 if (symlen != NULL) 291 *symlen = size; 292 293 if (symbol == NULL) 294 return (-1); 295 296 return (0); 297 } 298 299 /* 300 * libdisasm wrapper around target reading. libdisasm will always read data 301 * in order, so update our current offset within the buffer appropriately. 302 * We only support reading from within the current object; libdisasm should 303 * never ask us to do otherwise. 304 */ 305 int 306 do_read(void *data, uint64_t addr, void *buf, size_t len) 307 { 308 dis_buffer_t *db = data; 309 size_t offset; 310 311 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 312 return (-1); 313 314 offset = addr - db->db_addr; 315 len = MIN(len, db->db_size - offset); 316 317 (void) memcpy(buf, (char *)db->db_data + offset, len); 318 319 db->db_nextaddr = addr + len; 320 321 return (len); 322 } 323 324 /* 325 * Routine to dump raw data in a human-readable format. Used by the -d and -D 326 * options. We model our output after the xxd(1) program, which gives nicely 327 * formatted output, along with an ASCII translation of the result. 328 */ 329 void 330 dump_data(uint64_t addr, void *data, size_t datalen) 331 { 332 uintptr_t curaddr = addr & (~0xf); 333 uint8_t *bytes = data; 334 int i; 335 int width; 336 337 /* 338 * Determine if the address given to us fits in 32-bit range, in which 339 * case use a 4-byte width. 340 */ 341 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 342 width = 8; 343 else 344 width = 16; 345 346 while (curaddr < addr + datalen) { 347 /* 348 * Display leading address 349 */ 350 (void) printf("%0*x: ", width, curaddr); 351 352 /* 353 * Print out data in two-byte chunks. If the current address 354 * is before the starting address or after the end of the 355 * section, print spaces. 356 */ 357 for (i = 0; i < 16; i++) { 358 if (curaddr + i < addr ||curaddr + i >= addr + datalen) 359 (void) printf(" "); 360 else 361 (void) printf("%02x", 362 bytes[curaddr + i - addr]); 363 364 if (i & 1) 365 (void) printf(" "); 366 } 367 368 (void) printf(" "); 369 370 /* 371 * Print out the ASCII representation 372 */ 373 for (i = 0; i < 16; i++) { 374 if (curaddr + i < addr || 375 curaddr + i >= addr + datalen) { 376 (void) printf(" "); 377 } else { 378 uint8_t byte = bytes[curaddr + i - addr]; 379 if (isprint(byte)) 380 (void) printf("%c", byte); 381 else 382 (void) printf("."); 383 } 384 } 385 386 (void) printf("\n"); 387 388 curaddr += 16; 389 } 390 } 391 392 /* 393 * Disassemble a section implicitly specified as part of a file. This function 394 * is called for all sections when no other flags are specified. We ignore any 395 * data sections, and print out only those sections containing text. 396 */ 397 void 398 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 399 { 400 dis_handle_t *dhp = data; 401 402 /* ignore data sections */ 403 if (!dis_section_istext(scn)) 404 return; 405 406 if (!g_quiet) 407 (void) printf("\nsection %s\n", dis_section_name(scn)); 408 409 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 410 dis_section_size(scn)); 411 } 412 413 /* 414 * Structure passed to dis_named_{section,function} which keeps track of both 415 * the target and the libdisasm handle. 416 */ 417 typedef struct callback_arg { 418 dis_tgt_t *ca_tgt; 419 dis_handle_t *ca_handle; 420 } callback_arg_t; 421 422 /* 423 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 424 * argument contains the type of argument given. Pass the data onto the 425 * appropriate helper routine. 426 */ 427 void 428 dis_named_section(dis_scn_t *scn, int type, void *data) 429 { 430 callback_arg_t *ca = data; 431 432 if (!g_quiet) 433 (void) printf("\nsection %s\n", dis_section_name(scn)); 434 435 switch (type) { 436 case DIS_DATA_RELATIVE: 437 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 438 break; 439 case DIS_DATA_ABSOLUTE: 440 dump_data(dis_section_addr(scn), dis_section_data(scn), 441 dis_section_size(scn)); 442 break; 443 case DIS_TEXT: 444 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 445 dis_section_data(scn), dis_section_size(scn)); 446 break; 447 } 448 } 449 450 /* 451 * Disassemble a function explicitly specified with '-F'. The 'type' argument 452 * is unused. 453 */ 454 /* ARGSUSED */ 455 void 456 dis_named_function(dis_func_t *func, int type, void *data) 457 { 458 callback_arg_t *ca = data; 459 460 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 461 dis_function_data(func), dis_function_size(func)); 462 } 463 464 /* 465 * Disassemble a complete file. First, we determine the type of the file based 466 * on the ELF machine type, and instantiate a version of the disassembler 467 * appropriate for the file. We then resolve any named sections or functions 468 * against the file, and iterate over the results (or all sections if no flags 469 * were specified). 470 */ 471 void 472 dis_file(const char *filename) 473 { 474 dis_tgt_t *tgt, *current; 475 dis_scnlist_t *sections; 476 dis_funclist_t *functions; 477 dis_handle_t *dhp; 478 GElf_Ehdr ehdr; 479 480 /* 481 * First, initialize the target 482 */ 483 if ((tgt = dis_tgt_create(filename)) == NULL) 484 return; 485 486 if (!g_quiet) 487 (void) printf("disassembly for %s\n\n", filename); 488 489 /* 490 * A given file may contain multiple targets (if it is an archive, for 491 * example). We iterate over all possible targets if this is the case. 492 */ 493 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 494 dis_tgt_ehdr(current, &ehdr); 495 496 /* 497 * Eventually, this should probably live within libdisasm, and 498 * we should be able to disassemble targets from different 499 * architectures. For now, we only support objects as the 500 * native machine type. 501 */ 502 switch (ehdr.e_machine) { 503 case EM_SPARC: 504 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 505 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 506 warn("invalid E_IDENT field for SPARC object"); 507 return; 508 } 509 g_flags |= DIS_SPARC_V8; 510 break; 511 512 case EM_SPARC32PLUS: 513 { 514 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK; 515 516 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 517 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 518 warn("invalid E_IDENT field for SPARC object"); 519 return; 520 } 521 522 if (flags != 0 && 523 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 524 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS) 525 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 526 else 527 g_flags |= DIS_SPARC_V9; 528 break; 529 } 530 531 case EM_SPARCV9: 532 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 533 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 534 warn("invalid E_IDENT field for SPARC object"); 535 return; 536 } 537 538 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 539 break; 540 541 case EM_386: 542 g_flags |= DIS_X86_SIZE32; 543 break; 544 545 case EM_AMD64: 546 g_flags |= DIS_X86_SIZE64; 547 break; 548 549 default: 550 die("%s: unsupported ELF machine 0x%x", filename, 551 ehdr.e_machine); 552 } 553 554 /* 555 * If ET_REL (.o), printing immediate symbols is likely to 556 * result in garbage, as symbol lookups on unrelocated 557 * immediates find false and useless matches. 558 */ 559 560 if (ehdr.e_type == ET_REL) 561 g_flags |= DIS_NOIMMSYM; 562 563 if (!g_quiet && dis_tgt_member(current) != NULL) 564 (void) printf("\narchive member %s\n", 565 dis_tgt_member(current)); 566 567 /* 568 * Instantiate a libdisasm handle based on the file type. 569 */ 570 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 571 do_read)) == NULL) 572 die("%s: failed to initialize disassembler: %s", 573 filename, dis_strerror(dis_errno())); 574 575 if (g_doall) { 576 /* 577 * With no arguments, iterate over all sections and 578 * disassemble only those that contain text. 579 */ 580 dis_tgt_section_iter(current, dis_text_section, dhp); 581 } else { 582 callback_arg_t ca; 583 584 ca.ca_tgt = current; 585 ca.ca_handle = dhp; 586 587 /* 588 * If sections or functions were explicitly specified, 589 * resolve those names against the object, and iterate 590 * over just the resulting data. 591 */ 592 sections = dis_namelist_resolve_sections(g_seclist, 593 current); 594 functions = dis_namelist_resolve_functions(g_funclist, 595 current); 596 597 dis_scnlist_iter(sections, dis_named_section, &ca); 598 dis_funclist_iter(functions, dis_named_function, &ca); 599 600 dis_scnlist_destroy(sections); 601 dis_funclist_destroy(functions); 602 } 603 604 dis_handle_destroy(dhp); 605 } 606 607 dis_tgt_destroy(tgt); 608 } 609 610 void 611 usage(void) 612 { 613 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 614 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 615 exit(2); 616 } 617 618 typedef struct lib_node { 619 char *path; 620 struct lib_node *next; 621 } lib_node_t; 622 623 int 624 main(int argc, char **argv) 625 { 626 int optchar; 627 int i; 628 lib_node_t *libs = NULL; 629 630 g_funclist = dis_namelist_create(); 631 g_seclist = dis_namelist_create(); 632 633 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 634 switch (optchar) { 635 case 'C': 636 g_demangle = 1; 637 break; 638 case 'd': 639 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 640 break; 641 case 'D': 642 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 643 break; 644 case 'F': 645 dis_namelist_add(g_funclist, optarg, 0); 646 break; 647 case 'l': { 648 /* 649 * The '-l foo' option historically would attempt to 650 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 651 * environment variable has never been supported or 652 * documented for our linker. However, until this 653 * option is formally EOLed, we have to support it. 654 */ 655 char *dir; 656 lib_node_t *node; 657 size_t len; 658 659 if ((dir = getenv("LIBDIR")) == NULL || 660 dir[0] == '\0') 661 dir = "/usr/lib"; 662 node = safe_malloc(sizeof (lib_node_t)); 663 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 664 node->path = safe_malloc(len); 665 666 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 667 optarg); 668 node->next = libs; 669 libs = node; 670 break; 671 } 672 case 'L': 673 /* 674 * The '-L' option historically would attempt to read 675 * the .debug section of the target to determine source 676 * line information in order to annotate the output. 677 * No compiler has emitted these sections in many years, 678 * and the option has never done what it purported to 679 * do. We silently consume the option for 680 * compatibility. 681 */ 682 break; 683 case 'n': 684 g_numeric = 1; 685 break; 686 case 'o': 687 g_flags |= DIS_OCTAL; 688 break; 689 case 'q': 690 g_quiet = 1; 691 break; 692 case 't': 693 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 694 break; 695 case 'V': 696 (void) printf("Solaris disassembler version 1.0\n"); 697 return (0); 698 default: 699 usage(); 700 break; 701 } 702 } 703 704 argc -= optind; 705 argv += optind; 706 707 if (argc == 0 && libs == NULL) { 708 warn("no objects specified"); 709 usage(); 710 } 711 712 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 713 g_doall = 1; 714 715 /* 716 * See comment for 'l' option, above. 717 */ 718 while (libs != NULL) { 719 lib_node_t *node = libs->next; 720 721 dis_file(libs->path); 722 free(libs->path); 723 free(libs); 724 libs = node; 725 } 726 727 for (i = 0; i < argc; i++) 728 dis_file(argv[i]); 729 730 dis_namelist_destroy(g_funclist); 731 dis_namelist_destroy(g_seclist); 732 733 return (g_error); 734 } 735