1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <ctype.h> 30 #include <getopt.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <sys/sysmacros.h> 35 #include <sys/elf_SPARC.h> 36 37 #include <libdisasm.h> 38 39 #include "dis_target.h" 40 #include "dis_util.h" 41 #include "dis_list.h" 42 43 int g_demangle; /* Demangle C++ names */ 44 int g_quiet; /* Quiet mode */ 45 int g_numeric; /* Numeric mode */ 46 int g_flags; /* libdisasm language flags */ 47 int g_doall; /* true if no functions or sections were given */ 48 49 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 50 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 51 52 /* 53 * Section options for -d, -D, and -s 54 */ 55 #define DIS_DATA_RELATIVE 1 56 #define DIS_DATA_ABSOLUTE 2 57 #define DIS_TEXT 3 58 59 /* 60 * libdisasm callback data. Keeps track of current data (function or section) 61 * and offset within that data. 62 */ 63 typedef struct dis_buffer { 64 dis_tgt_t *db_tgt; /* current dis target */ 65 void *db_data; /* function or section data */ 66 uint64_t db_addr; /* address of function start */ 67 size_t db_size; /* size of data */ 68 uint64_t db_nextaddr; /* next address to be read */ 69 } dis_buffer_t; 70 71 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 72 73 /* 74 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 75 * formatted symbol, based on the offset and current setttings. 76 */ 77 void 78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 79 size_t buflen) 80 { 81 if (symbol == NULL || g_numeric) { 82 if (g_flags & DIS_OCTAL) 83 (void) snprintf(buf, buflen, "0%llo", addr); 84 else 85 (void) snprintf(buf, buflen, "0x%llx", addr); 86 } else { 87 if (g_demangle) 88 symbol = dis_demangle(symbol); 89 90 if (offset == 0) 91 (void) snprintf(buf, buflen, "%s", symbol); 92 else if (g_flags & DIS_OCTAL) 93 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 94 else 95 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 96 } 97 } 98 99 /* 100 * The main disassembly routine. Given a fixed-sized buffer and starting 101 * address, disassemble the data using the supplied target and libdisasm handle. 102 */ 103 void 104 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 105 size_t datalen) 106 { 107 dis_buffer_t db = { 0 }; 108 char buf[BUFSIZE]; 109 char symbuf[BUFSIZE]; 110 const char *symbol; 111 off_t symoffset; 112 int i; 113 int bytesperline; 114 size_t symsize; 115 int isfunc; 116 size_t symwidth = 0; 117 118 db.db_tgt = tgt; 119 db.db_data = data; 120 db.db_addr = addr; 121 db.db_size = datalen; 122 123 dis_set_data(dhp, &db); 124 125 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 126 bytesperline = 6; 127 128 while (addr < db.db_addr + db.db_size) { 129 130 if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) { 131 /* 132 * If we encounter an invalid opcode, we just 133 * print "*** invalid opcode ***" at that first bad 134 * instruction and continue with printing the rest 135 * of the instruction stream as hex data, 136 * We then find the next valid symbol in the section, 137 * and disassemble from there. 138 */ 139 off_t next; 140 141 (void) snprintf(buf, sizeof (buf), 142 "*** invalid opcode ***"); 143 144 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 145 db.db_nextaddr = db.db_addr + db.db_size; 146 } else { 147 if (next > db.db_size) 148 db.db_nextaddr = db.db_addr + 149 db.db_size; 150 else 151 db.db_nextaddr = addr + next; 152 } 153 } 154 155 /* 156 * Print out the line as: 157 * 158 * address: bytes text 159 * 160 * If there are more than 6 bytes in any given instruction, 161 * spread the bytes across two lines. We try to get symbolic 162 * information for the address, but if that fails we print out 163 * the numeric address instead. 164 * 165 * We try to keep the address portion of the text aligned at 166 * MINSYMWIDTH characters. If we are disassembling a function 167 * with a long name, this can be annoying. So we pick a width 168 * based on the maximum width that the current symbol can be. 169 * This at least produces text aligned within each function. 170 */ 171 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 172 &isfunc); 173 /* Get the maximum length for this symbol */ 174 getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf)); 175 symwidth = MAX(strlen(symbuf), MINSYMWIDTH); 176 177 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 178 179 /* 180 * If we've crossed a new function boundary, print out the 181 * function name on a blank line. 182 */ 183 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 184 (void) printf("%s()\n", symbol); 185 186 (void) printf(" %s:%*s ", symbuf, 187 symwidth - strlen(symbuf), ""); 188 189 /* print bytes */ 190 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 191 i++) { 192 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 193 if (g_flags & DIS_OCTAL) 194 (void) printf("%03o ", byte); 195 else 196 (void) printf("%02x ", byte); 197 } 198 199 /* trailing spaces for missing bytes */ 200 for (; i < bytesperline; i++) { 201 if (g_flags & DIS_OCTAL) 202 (void) printf(" "); 203 else 204 (void) printf(" "); 205 } 206 207 /* contents of disassembly */ 208 (void) printf(" %s", buf); 209 210 /* excess bytes that spill over onto subsequent lines */ 211 for (; i < db.db_nextaddr - addr; i++) { 212 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 213 if (i % bytesperline == 0) 214 (void) printf("\n %*s ", symwidth, ""); 215 if (g_flags & DIS_OCTAL) 216 (void) printf("%03o ", byte); 217 else 218 (void) printf("%02x ", byte); 219 } 220 221 (void) printf("\n"); 222 223 addr = db.db_nextaddr; 224 } 225 } 226 227 /* 228 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 229 * function, and convert the result using getsymname(). 230 */ 231 int 232 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 233 size_t *symlen) 234 { 235 dis_buffer_t *db = data; 236 const char *symbol; 237 off_t offset; 238 size_t size; 239 240 /* 241 * If NULL symbol is returned, getsymname takes care of 242 * printing appropriate address in buf instead of symbol. 243 */ 244 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 245 246 if (buf != NULL) 247 getsymname(addr, symbol, offset, buf, buflen); 248 249 if (start != NULL) 250 *start = addr - offset; 251 if (symlen != NULL) 252 *symlen = size; 253 254 if (symbol == NULL) 255 return (-1); 256 257 return (0); 258 } 259 260 /* 261 * libdisasm wrapper around target reading. libdisasm will always read data 262 * in order, so update our current offset within the buffer appropriately. 263 * We only support reading from within the current object; libdisasm should 264 * never ask us to do otherwise. 265 */ 266 int 267 do_read(void *data, uint64_t addr, void *buf, size_t len) 268 { 269 dis_buffer_t *db = data; 270 size_t offset; 271 272 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 273 return (-1); 274 275 offset = addr - db->db_addr; 276 len = MIN(len, db->db_size - offset); 277 278 (void) memcpy(buf, (char *)db->db_data + offset, len); 279 280 db->db_nextaddr = addr + len; 281 282 return (len); 283 } 284 285 /* 286 * Routine to dump raw data in a human-readable format. Used by the -d and -D 287 * options. We model our output after the xxd(1) program, which gives nicely 288 * formatted output, along with an ASCII translation of the result. 289 */ 290 void 291 dump_data(uint64_t addr, void *data, size_t datalen) 292 { 293 uintptr_t curaddr = addr & (~0xf); 294 uint8_t *bytes = data; 295 int i; 296 int width; 297 298 /* 299 * Determine if the address given to us fits in 32-bit range, in which 300 * case use a 4-byte width. 301 */ 302 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 303 width = 8; 304 else 305 width = 16; 306 307 while (curaddr < addr + datalen) { 308 /* 309 * Display leading address 310 */ 311 (void) printf("%0*x: ", width, curaddr); 312 313 /* 314 * Print out data in two-byte chunks. If the current address 315 * is before the starting address or after the end of the 316 * section, print spaces. 317 */ 318 for (i = 0; i < 16; i++) { 319 if (curaddr + i < addr ||curaddr + i >= addr + datalen) 320 (void) printf(" "); 321 else 322 (void) printf("%02x", 323 bytes[curaddr + i - addr]); 324 325 if (i & 1) 326 (void) printf(" "); 327 } 328 329 (void) printf(" "); 330 331 /* 332 * Print out the ASCII representation 333 */ 334 for (i = 0; i < 16; i++) { 335 if (curaddr + i < addr || 336 curaddr + i >= addr + datalen) { 337 (void) printf(" "); 338 } else { 339 uint8_t byte = bytes[curaddr + i - addr]; 340 if (isprint(byte)) 341 (void) printf("%c", byte); 342 else 343 (void) printf("."); 344 } 345 } 346 347 (void) printf("\n"); 348 349 curaddr += 16; 350 } 351 } 352 353 /* 354 * Disassemble a section implicitly specified as part of a file. This function 355 * is called for all sections when no other flags are specified. We ignore any 356 * data sections, and print out only those sections containing text. 357 */ 358 void 359 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 360 { 361 dis_handle_t *dhp = data; 362 363 /* ignore data sections */ 364 if (!dis_section_istext(scn)) 365 return; 366 367 if (!g_quiet) 368 (void) printf("\nsection %s\n", dis_section_name(scn)); 369 370 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 371 dis_section_size(scn)); 372 } 373 374 /* 375 * Structure passed to dis_named_{section,function} which keeps track of both 376 * the target and the libdisasm handle. 377 */ 378 typedef struct callback_arg { 379 dis_tgt_t *ca_tgt; 380 dis_handle_t *ca_handle; 381 } callback_arg_t; 382 383 /* 384 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 385 * argument contains the type of argument given. Pass the data onto the 386 * appropriate helper routine. 387 */ 388 void 389 dis_named_section(dis_scn_t *scn, int type, void *data) 390 { 391 callback_arg_t *ca = data; 392 393 if (!g_quiet) 394 (void) printf("\nsection %s\n", dis_section_name(scn)); 395 396 switch (type) { 397 case DIS_DATA_RELATIVE: 398 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 399 break; 400 case DIS_DATA_ABSOLUTE: 401 dump_data(dis_section_addr(scn), dis_section_data(scn), 402 dis_section_size(scn)); 403 break; 404 case DIS_TEXT: 405 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 406 dis_section_data(scn), dis_section_size(scn)); 407 break; 408 } 409 } 410 411 /* 412 * Disassemble a function explicitly specified with '-F'. The 'type' argument 413 * is unused. 414 */ 415 /* ARGSUSED */ 416 void 417 dis_named_function(dis_func_t *func, int type, void *data) 418 { 419 callback_arg_t *ca = data; 420 421 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 422 dis_function_data(func), dis_function_size(func)); 423 } 424 425 /* 426 * Disassemble a complete file. First, we determine the type of the file based 427 * on the ELF machine type, and instantiate a version of the disassembler 428 * appropriate for the file. We then resolve any named sections or functions 429 * against the file, and iterate over the results (or all sections if no flags 430 * were specified). 431 */ 432 void 433 dis_file(const char *filename) 434 { 435 dis_tgt_t *tgt, *current; 436 dis_scnlist_t *sections; 437 dis_funclist_t *functions; 438 dis_handle_t *dhp; 439 GElf_Ehdr ehdr; 440 441 /* 442 * First, initialize the target 443 */ 444 if ((tgt = dis_tgt_create(filename)) == NULL) 445 return; 446 447 if (!g_quiet) 448 (void) printf("disassembly for %s\n\n", filename); 449 450 /* 451 * A given file may contain multiple targets (if it is an archive, for 452 * example). We iterate over all possible targets if this is the case. 453 */ 454 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 455 dis_tgt_ehdr(current, &ehdr); 456 457 /* 458 * Eventually, this should probably live within libdisasm, and 459 * we should be able to disassemble targets from different 460 * architectures. For now, we only support objects as the 461 * native machine type. 462 */ 463 switch (ehdr.e_machine) { 464 #ifdef __sparc 465 case EM_SPARC: 466 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 467 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 468 warn("invalid E_IDENT field for SPARC object"); 469 return; 470 } 471 g_flags |= DIS_SPARC_V8; 472 break; 473 474 case EM_SPARC32PLUS: 475 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 476 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 477 warn("invalid E_IDENT field for SPARC object"); 478 return; 479 } 480 481 switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) { 482 case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 483 EF_SPARC_SUN_US3): 484 case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1): 485 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 486 default: 487 g_flags |= DIS_SPARC_V9; 488 } 489 break; 490 491 case EM_SPARCV9: 492 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 493 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 494 warn("invalid E_IDENT field for SPARC object"); 495 return; 496 } 497 498 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 499 break; 500 #endif /* __sparc */ 501 502 #if defined(__i386) || defined(__amd64) 503 case EM_386: 504 g_flags |= DIS_X86_SIZE32; 505 break; 506 507 case EM_AMD64: 508 g_flags |= DIS_X86_SIZE64; 509 break; 510 #endif /* __i386 || __amd64 */ 511 512 default: 513 die("%s: unsupported ELF machine 0x%x", filename, 514 ehdr.e_machine); 515 } 516 517 if (!g_quiet && dis_tgt_member(current) != NULL) 518 (void) printf("\narchive member %s\n", 519 dis_tgt_member(current)); 520 521 /* 522 * Instantiate a libdisasm handle based on the file type. 523 */ 524 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 525 do_read)) == NULL) 526 die("%s: failed to initialize disassembler: %s", 527 filename, dis_strerror(dis_errno())); 528 529 if (g_doall) { 530 /* 531 * With no arguments, iterate over all sections and 532 * disassemble only those that contain text. 533 */ 534 dis_tgt_section_iter(current, dis_text_section, dhp); 535 } else { 536 callback_arg_t ca; 537 538 ca.ca_tgt = current; 539 ca.ca_handle = dhp; 540 541 /* 542 * If sections or functions were explicitly specified, 543 * resolve those names against the object, and iterate 544 * over just the resulting data. 545 */ 546 sections = dis_namelist_resolve_sections(g_seclist, 547 current); 548 functions = dis_namelist_resolve_functions(g_funclist, 549 current); 550 551 dis_scnlist_iter(sections, dis_named_section, &ca); 552 dis_funclist_iter(functions, dis_named_function, &ca); 553 554 dis_scnlist_destroy(sections); 555 dis_funclist_destroy(functions); 556 } 557 558 dis_handle_destroy(dhp); 559 } 560 561 dis_tgt_destroy(tgt); 562 } 563 564 void 565 usage(void) 566 { 567 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 568 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 569 exit(2); 570 } 571 572 typedef struct lib_node { 573 char *path; 574 struct lib_node *next; 575 } lib_node_t; 576 577 int 578 main(int argc, char **argv) 579 { 580 int optchar; 581 int i; 582 lib_node_t *libs = NULL; 583 584 g_funclist = dis_namelist_create(); 585 g_seclist = dis_namelist_create(); 586 587 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 588 switch (optchar) { 589 case 'C': 590 g_demangle = 1; 591 break; 592 case 'd': 593 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 594 break; 595 case 'D': 596 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 597 break; 598 case 'F': 599 dis_namelist_add(g_funclist, optarg, 0); 600 break; 601 case 'l': { 602 /* 603 * The '-l foo' option historically would attempt to 604 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 605 * environment variable has never been supported or 606 * documented for our linker. However, until this 607 * option is formally EOLed, we have to support it. 608 */ 609 char *dir; 610 lib_node_t *node; 611 size_t len; 612 613 if ((dir = getenv("LIBDIR")) == NULL || 614 dir[0] == '\0') 615 dir = "/usr/lib"; 616 node = safe_malloc(sizeof (lib_node_t)); 617 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 618 node->path = safe_malloc(len); 619 620 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 621 optarg); 622 node->next = libs; 623 libs = node; 624 break; 625 } 626 case 'L': 627 /* 628 * The '-L' option historically would attempt to read 629 * the .debug section of the target to determine source 630 * line information in order to annotate the output. 631 * No compiler has emitted these sections in many years, 632 * and the option has never done what it purported to 633 * do. We silently consume the option for 634 * compatibility. 635 */ 636 break; 637 case 'n': 638 g_numeric = 1; 639 break; 640 case 'o': 641 g_flags |= DIS_OCTAL; 642 break; 643 case 'q': 644 g_quiet = 1; 645 break; 646 case 't': 647 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 648 break; 649 case 'V': 650 (void) printf("Solaris disassembler version 1.0\n"); 651 return (0); 652 default: 653 usage(); 654 break; 655 } 656 } 657 658 argc -= optind; 659 argv += optind; 660 661 if (argc == 0 && libs == NULL) { 662 warn("no objects specified"); 663 usage(); 664 } 665 666 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 667 g_doall = 1; 668 669 /* 670 * See comment for 'l' option, above. 671 */ 672 while (libs != NULL) { 673 lib_node_t *node = libs->next; 674 675 dis_file(libs->path); 676 free(libs->path); 677 free(libs); 678 libs = node; 679 } 680 681 for (i = 0; i < argc; i++) 682 dis_file(argv[i]); 683 684 dis_namelist_destroy(g_funclist); 685 dis_namelist_destroy(g_seclist); 686 687 return (g_error); 688 } 689