1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2011 Jason King. All rights reserved. 27 */ 28 29 #include <ctype.h> 30 #include <getopt.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <sys/sysmacros.h> 35 #include <sys/elf_SPARC.h> 36 37 #include <libdisasm.h> 38 39 #include "dis_target.h" 40 #include "dis_util.h" 41 #include "dis_list.h" 42 43 int g_demangle; /* Demangle C++ names */ 44 int g_quiet; /* Quiet mode */ 45 int g_numeric; /* Numeric mode */ 46 int g_flags; /* libdisasm language flags */ 47 int g_doall; /* true if no functions or sections were given */ 48 49 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 50 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 51 52 /* 53 * Section options for -d, -D, and -s 54 */ 55 #define DIS_DATA_RELATIVE 1 56 #define DIS_DATA_ABSOLUTE 2 57 #define DIS_TEXT 3 58 59 /* 60 * libdisasm callback data. Keeps track of current data (function or section) 61 * and offset within that data. 62 */ 63 typedef struct dis_buffer { 64 dis_tgt_t *db_tgt; /* current dis target */ 65 void *db_data; /* function or section data */ 66 uint64_t db_addr; /* address of function start */ 67 size_t db_size; /* size of data */ 68 uint64_t db_nextaddr; /* next address to be read */ 69 } dis_buffer_t; 70 71 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 72 73 /* 74 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 75 * formatted symbol, based on the offset and current setttings. 76 */ 77 void 78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 79 size_t buflen) 80 { 81 if (symbol == NULL || g_numeric) { 82 if (g_flags & DIS_OCTAL) 83 (void) snprintf(buf, buflen, "0%llo", addr); 84 else 85 (void) snprintf(buf, buflen, "0x%llx", addr); 86 } else { 87 if (g_demangle) 88 symbol = dis_demangle(symbol); 89 90 if (offset == 0) 91 (void) snprintf(buf, buflen, "%s", symbol); 92 else if (g_flags & DIS_OCTAL) 93 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 94 else 95 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 96 } 97 } 98 99 /* 100 * The main disassembly routine. Given a fixed-sized buffer and starting 101 * address, disassemble the data using the supplied target and libdisasm handle. 102 */ 103 void 104 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 105 size_t datalen) 106 { 107 dis_buffer_t db = { 0 }; 108 char buf[BUFSIZE]; 109 char symbuf[BUFSIZE]; 110 const char *symbol; 111 const char *last_symbol; 112 off_t symoffset; 113 int i; 114 int bytesperline; 115 size_t symsize; 116 int isfunc; 117 size_t symwidth = 0; 118 119 db.db_tgt = tgt; 120 db.db_data = data; 121 db.db_addr = addr; 122 db.db_size = datalen; 123 124 dis_set_data(dhp, &db); 125 126 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 127 bytesperline = 6; 128 129 symbol = NULL; 130 131 while (addr < db.db_addr + db.db_size) { 132 133 if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) { 134 #if defined(__sparc) 135 /* 136 * Since sparc instructions are fixed size, we 137 * always know the address of the next instruction 138 */ 139 (void) snprintf(buf, sizeof (buf), 140 "*** invalid opcode ***"); 141 db.db_nextaddr = addr + 4; 142 143 #else 144 off_t next; 145 146 (void) snprintf(buf, sizeof (buf), 147 "*** invalid opcode ***"); 148 149 /* 150 * On architectures with variable sized instructions 151 * we have no way to figure out where the next 152 * instruction starts if we encounter an invalid 153 * instruction. Instead we print the rest of the 154 * instruction stream as hex until we reach the 155 * next valid symbol in the section. 156 */ 157 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 158 db.db_nextaddr = db.db_addr + db.db_size; 159 } else { 160 if (next > db.db_size) 161 db.db_nextaddr = db.db_addr + 162 db.db_size; 163 else 164 db.db_nextaddr = addr + next; 165 } 166 #endif 167 } 168 169 /* 170 * Print out the line as: 171 * 172 * address: bytes text 173 * 174 * If there are more than 6 bytes in any given instruction, 175 * spread the bytes across two lines. We try to get symbolic 176 * information for the address, but if that fails we print out 177 * the numeric address instead. 178 * 179 * We try to keep the address portion of the text aligned at 180 * MINSYMWIDTH characters. If we are disassembling a function 181 * with a long name, this can be annoying. So we pick a width 182 * based on the maximum width that the current symbol can be. 183 * This at least produces text aligned within each function. 184 */ 185 last_symbol = symbol; 186 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 187 &isfunc); 188 if (symbol == NULL) { 189 symbol = dis_find_section(tgt, addr, &symoffset); 190 symsize = symoffset; 191 } 192 193 if (symbol != last_symbol) 194 getsymname(addr, symbol, symsize, symbuf, 195 sizeof (symbuf)); 196 197 symwidth = MAX(symwidth, strlen(symbuf)); 198 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 199 200 /* 201 * If we've crossed a new function boundary, print out the 202 * function name on a blank line. 203 */ 204 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 205 (void) printf("%s()\n", symbol); 206 207 (void) printf(" %s:%*s ", symbuf, 208 symwidth - strlen(symbuf), ""); 209 210 /* print bytes */ 211 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 212 i++) { 213 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 214 if (g_flags & DIS_OCTAL) 215 (void) printf("%03o ", byte); 216 else 217 (void) printf("%02x ", byte); 218 } 219 220 /* trailing spaces for missing bytes */ 221 for (; i < bytesperline; i++) { 222 if (g_flags & DIS_OCTAL) 223 (void) printf(" "); 224 else 225 (void) printf(" "); 226 } 227 228 /* contents of disassembly */ 229 (void) printf(" %s", buf); 230 231 /* excess bytes that spill over onto subsequent lines */ 232 for (; i < db.db_nextaddr - addr; i++) { 233 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 234 if (i % bytesperline == 0) 235 (void) printf("\n %*s ", symwidth, ""); 236 if (g_flags & DIS_OCTAL) 237 (void) printf("%03o ", byte); 238 else 239 (void) printf("%02x ", byte); 240 } 241 242 (void) printf("\n"); 243 244 addr = db.db_nextaddr; 245 } 246 } 247 248 /* 249 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 250 * function, and convert the result using getsymname(). 251 */ 252 int 253 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 254 size_t *symlen) 255 { 256 dis_buffer_t *db = data; 257 const char *symbol; 258 off_t offset; 259 size_t size; 260 261 /* 262 * If NULL symbol is returned, getsymname takes care of 263 * printing appropriate address in buf instead of symbol. 264 */ 265 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 266 267 if (buf != NULL) 268 getsymname(addr, symbol, offset, buf, buflen); 269 270 if (start != NULL) 271 *start = addr - offset; 272 if (symlen != NULL) 273 *symlen = size; 274 275 if (symbol == NULL) 276 return (-1); 277 278 return (0); 279 } 280 281 /* 282 * libdisasm wrapper around target reading. libdisasm will always read data 283 * in order, so update our current offset within the buffer appropriately. 284 * We only support reading from within the current object; libdisasm should 285 * never ask us to do otherwise. 286 */ 287 int 288 do_read(void *data, uint64_t addr, void *buf, size_t len) 289 { 290 dis_buffer_t *db = data; 291 size_t offset; 292 293 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 294 return (-1); 295 296 offset = addr - db->db_addr; 297 len = MIN(len, db->db_size - offset); 298 299 (void) memcpy(buf, (char *)db->db_data + offset, len); 300 301 db->db_nextaddr = addr + len; 302 303 return (len); 304 } 305 306 /* 307 * Routine to dump raw data in a human-readable format. Used by the -d and -D 308 * options. We model our output after the xxd(1) program, which gives nicely 309 * formatted output, along with an ASCII translation of the result. 310 */ 311 void 312 dump_data(uint64_t addr, void *data, size_t datalen) 313 { 314 uintptr_t curaddr = addr & (~0xf); 315 uint8_t *bytes = data; 316 int i; 317 int width; 318 319 /* 320 * Determine if the address given to us fits in 32-bit range, in which 321 * case use a 4-byte width. 322 */ 323 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 324 width = 8; 325 else 326 width = 16; 327 328 while (curaddr < addr + datalen) { 329 /* 330 * Display leading address 331 */ 332 (void) printf("%0*x: ", width, curaddr); 333 334 /* 335 * Print out data in two-byte chunks. If the current address 336 * is before the starting address or after the end of the 337 * section, print spaces. 338 */ 339 for (i = 0; i < 16; i++) { 340 if (curaddr + i < addr ||curaddr + i >= addr + datalen) 341 (void) printf(" "); 342 else 343 (void) printf("%02x", 344 bytes[curaddr + i - addr]); 345 346 if (i & 1) 347 (void) printf(" "); 348 } 349 350 (void) printf(" "); 351 352 /* 353 * Print out the ASCII representation 354 */ 355 for (i = 0; i < 16; i++) { 356 if (curaddr + i < addr || 357 curaddr + i >= addr + datalen) { 358 (void) printf(" "); 359 } else { 360 uint8_t byte = bytes[curaddr + i - addr]; 361 if (isprint(byte)) 362 (void) printf("%c", byte); 363 else 364 (void) printf("."); 365 } 366 } 367 368 (void) printf("\n"); 369 370 curaddr += 16; 371 } 372 } 373 374 /* 375 * Disassemble a section implicitly specified as part of a file. This function 376 * is called for all sections when no other flags are specified. We ignore any 377 * data sections, and print out only those sections containing text. 378 */ 379 void 380 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 381 { 382 dis_handle_t *dhp = data; 383 384 /* ignore data sections */ 385 if (!dis_section_istext(scn)) 386 return; 387 388 if (!g_quiet) 389 (void) printf("\nsection %s\n", dis_section_name(scn)); 390 391 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 392 dis_section_size(scn)); 393 } 394 395 /* 396 * Structure passed to dis_named_{section,function} which keeps track of both 397 * the target and the libdisasm handle. 398 */ 399 typedef struct callback_arg { 400 dis_tgt_t *ca_tgt; 401 dis_handle_t *ca_handle; 402 } callback_arg_t; 403 404 /* 405 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 406 * argument contains the type of argument given. Pass the data onto the 407 * appropriate helper routine. 408 */ 409 void 410 dis_named_section(dis_scn_t *scn, int type, void *data) 411 { 412 callback_arg_t *ca = data; 413 414 if (!g_quiet) 415 (void) printf("\nsection %s\n", dis_section_name(scn)); 416 417 switch (type) { 418 case DIS_DATA_RELATIVE: 419 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 420 break; 421 case DIS_DATA_ABSOLUTE: 422 dump_data(dis_section_addr(scn), dis_section_data(scn), 423 dis_section_size(scn)); 424 break; 425 case DIS_TEXT: 426 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 427 dis_section_data(scn), dis_section_size(scn)); 428 break; 429 } 430 } 431 432 /* 433 * Disassemble a function explicitly specified with '-F'. The 'type' argument 434 * is unused. 435 */ 436 /* ARGSUSED */ 437 void 438 dis_named_function(dis_func_t *func, int type, void *data) 439 { 440 callback_arg_t *ca = data; 441 442 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 443 dis_function_data(func), dis_function_size(func)); 444 } 445 446 /* 447 * Disassemble a complete file. First, we determine the type of the file based 448 * on the ELF machine type, and instantiate a version of the disassembler 449 * appropriate for the file. We then resolve any named sections or functions 450 * against the file, and iterate over the results (or all sections if no flags 451 * were specified). 452 */ 453 void 454 dis_file(const char *filename) 455 { 456 dis_tgt_t *tgt, *current; 457 dis_scnlist_t *sections; 458 dis_funclist_t *functions; 459 dis_handle_t *dhp; 460 GElf_Ehdr ehdr; 461 462 /* 463 * First, initialize the target 464 */ 465 if ((tgt = dis_tgt_create(filename)) == NULL) 466 return; 467 468 if (!g_quiet) 469 (void) printf("disassembly for %s\n\n", filename); 470 471 /* 472 * A given file may contain multiple targets (if it is an archive, for 473 * example). We iterate over all possible targets if this is the case. 474 */ 475 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 476 dis_tgt_ehdr(current, &ehdr); 477 478 /* 479 * Eventually, this should probably live within libdisasm, and 480 * we should be able to disassemble targets from different 481 * architectures. For now, we only support objects as the 482 * native machine type. 483 */ 484 switch (ehdr.e_machine) { 485 #ifdef __sparc 486 case EM_SPARC: 487 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 488 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 489 warn("invalid E_IDENT field for SPARC object"); 490 return; 491 } 492 g_flags |= DIS_SPARC_V8; 493 break; 494 495 case EM_SPARC32PLUS: 496 { 497 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK; 498 499 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 500 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 501 warn("invalid E_IDENT field for SPARC object"); 502 return; 503 } 504 505 if (flags != 0 && 506 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 507 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS) 508 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 509 else 510 g_flags |= DIS_SPARC_V9; 511 break; 512 } 513 514 case EM_SPARCV9: 515 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 516 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 517 warn("invalid E_IDENT field for SPARC object"); 518 return; 519 } 520 521 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 522 break; 523 #endif /* __sparc */ 524 525 #if defined(__i386) || defined(__amd64) 526 case EM_386: 527 g_flags |= DIS_X86_SIZE32; 528 break; 529 530 case EM_AMD64: 531 g_flags |= DIS_X86_SIZE64; 532 break; 533 #endif /* __i386 || __amd64 */ 534 535 default: 536 die("%s: unsupported ELF machine 0x%x", filename, 537 ehdr.e_machine); 538 } 539 540 /* 541 * If ET_REL (.o), printing immediate symbols is likely to 542 * result in garbage, as symbol lookups on unrelocated 543 * immediates find false and useless matches. 544 */ 545 546 if (ehdr.e_type == ET_REL) 547 g_flags |= DIS_NOIMMSYM; 548 549 if (!g_quiet && dis_tgt_member(current) != NULL) 550 (void) printf("\narchive member %s\n", 551 dis_tgt_member(current)); 552 553 /* 554 * Instantiate a libdisasm handle based on the file type. 555 */ 556 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 557 do_read)) == NULL) 558 die("%s: failed to initialize disassembler: %s", 559 filename, dis_strerror(dis_errno())); 560 561 if (g_doall) { 562 /* 563 * With no arguments, iterate over all sections and 564 * disassemble only those that contain text. 565 */ 566 dis_tgt_section_iter(current, dis_text_section, dhp); 567 } else { 568 callback_arg_t ca; 569 570 ca.ca_tgt = current; 571 ca.ca_handle = dhp; 572 573 /* 574 * If sections or functions were explicitly specified, 575 * resolve those names against the object, and iterate 576 * over just the resulting data. 577 */ 578 sections = dis_namelist_resolve_sections(g_seclist, 579 current); 580 functions = dis_namelist_resolve_functions(g_funclist, 581 current); 582 583 dis_scnlist_iter(sections, dis_named_section, &ca); 584 dis_funclist_iter(functions, dis_named_function, &ca); 585 586 dis_scnlist_destroy(sections); 587 dis_funclist_destroy(functions); 588 } 589 590 dis_handle_destroy(dhp); 591 } 592 593 dis_tgt_destroy(tgt); 594 } 595 596 void 597 usage(void) 598 { 599 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 600 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 601 exit(2); 602 } 603 604 typedef struct lib_node { 605 char *path; 606 struct lib_node *next; 607 } lib_node_t; 608 609 int 610 main(int argc, char **argv) 611 { 612 int optchar; 613 int i; 614 lib_node_t *libs = NULL; 615 616 g_funclist = dis_namelist_create(); 617 g_seclist = dis_namelist_create(); 618 619 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 620 switch (optchar) { 621 case 'C': 622 g_demangle = 1; 623 break; 624 case 'd': 625 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 626 break; 627 case 'D': 628 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 629 break; 630 case 'F': 631 dis_namelist_add(g_funclist, optarg, 0); 632 break; 633 case 'l': { 634 /* 635 * The '-l foo' option historically would attempt to 636 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 637 * environment variable has never been supported or 638 * documented for our linker. However, until this 639 * option is formally EOLed, we have to support it. 640 */ 641 char *dir; 642 lib_node_t *node; 643 size_t len; 644 645 if ((dir = getenv("LIBDIR")) == NULL || 646 dir[0] == '\0') 647 dir = "/usr/lib"; 648 node = safe_malloc(sizeof (lib_node_t)); 649 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 650 node->path = safe_malloc(len); 651 652 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 653 optarg); 654 node->next = libs; 655 libs = node; 656 break; 657 } 658 case 'L': 659 /* 660 * The '-L' option historically would attempt to read 661 * the .debug section of the target to determine source 662 * line information in order to annotate the output. 663 * No compiler has emitted these sections in many years, 664 * and the option has never done what it purported to 665 * do. We silently consume the option for 666 * compatibility. 667 */ 668 break; 669 case 'n': 670 g_numeric = 1; 671 break; 672 case 'o': 673 g_flags |= DIS_OCTAL; 674 break; 675 case 'q': 676 g_quiet = 1; 677 break; 678 case 't': 679 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 680 break; 681 case 'V': 682 (void) printf("Solaris disassembler version 1.0\n"); 683 return (0); 684 default: 685 usage(); 686 break; 687 } 688 } 689 690 argc -= optind; 691 argv += optind; 692 693 if (argc == 0 && libs == NULL) { 694 warn("no objects specified"); 695 usage(); 696 } 697 698 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 699 g_doall = 1; 700 701 /* 702 * See comment for 'l' option, above. 703 */ 704 while (libs != NULL) { 705 lib_node_t *node = libs->next; 706 707 dis_file(libs->path); 708 free(libs->path); 709 free(libs); 710 libs = node; 711 } 712 713 for (i = 0; i < argc; i++) 714 dis_file(argv[i]); 715 716 dis_namelist_destroy(g_funclist); 717 dis_namelist_destroy(g_seclist); 718 719 return (g_error); 720 } 721