1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <ctype.h> 30 #include <getopt.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <sys/sysmacros.h> 35 #include <sys/elf_SPARC.h> 36 37 #include <libdisasm.h> 38 39 #include "dis_target.h" 40 #include "dis_util.h" 41 #include "dis_list.h" 42 43 int g_demangle; /* Demangle C++ names */ 44 int g_quiet; /* Quiet mode */ 45 int g_numeric; /* Numeric mode */ 46 int g_flags; /* libdisasm language flags */ 47 int g_doall; /* true if no functions or sections were given */ 48 49 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 50 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 51 52 /* 53 * Section options for -d, -D, and -s 54 */ 55 #define DIS_DATA_RELATIVE 1 56 #define DIS_DATA_ABSOLUTE 2 57 #define DIS_TEXT 3 58 59 /* 60 * libdisasm callback data. Keeps track of current data (function or section) 61 * and offset within that data. 62 */ 63 typedef struct dis_buffer { 64 dis_tgt_t *db_tgt; /* current dis target */ 65 void *db_data; /* function or section data */ 66 uint64_t db_addr; /* address of function start */ 67 size_t db_size; /* size of data */ 68 uint64_t db_nextaddr; /* next address to be read */ 69 } dis_buffer_t; 70 71 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 72 73 /* 74 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 75 * formatted symbol, based on the offset and current setttings. 76 */ 77 void 78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 79 size_t buflen) 80 { 81 if (symbol == NULL || g_numeric) 82 (void) snprintf(buf, buflen, "%llx", addr); 83 else { 84 if (g_demangle) 85 symbol = dis_demangle(symbol); 86 87 if (offset == 0) 88 (void) snprintf(buf, buflen, "%s", symbol); 89 else if (g_flags & DIS_OCTAL) 90 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 91 else 92 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 93 } 94 } 95 96 /* 97 * The main disassembly routine. Given a fixed-sized buffer and starting 98 * address, disassemble the data using the supplied target and libdisasm handle. 99 */ 100 void 101 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 102 size_t datalen) 103 { 104 dis_buffer_t db = { 0 }; 105 char buf[BUFSIZE]; 106 char symbuf[BUFSIZE]; 107 const char *symbol; 108 off_t symoffset; 109 int i; 110 int bytesperline; 111 size_t symsize; 112 int isfunc; 113 size_t symwidth = 0; 114 115 db.db_tgt = tgt; 116 db.db_data = data; 117 db.db_addr = addr; 118 db.db_size = datalen; 119 120 dis_set_data(dhp, &db); 121 122 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 123 bytesperline = 6; 124 125 while (addr < db.db_addr + db.db_size) { 126 127 if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) { 128 /* 129 * If we encounter an invalid opcode, we just 130 * print "*** invalid opcode ***" at that first bad 131 * instruction and continue with printing the rest 132 * of the instruction stream as hex data, 133 * We then find the next valid symbol in the section, 134 * and disassemble from there. 135 */ 136 off_t next; 137 138 (void) snprintf(buf, sizeof (buf), 139 "*** invalid opcode ***"); 140 141 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 142 db.db_nextaddr = db.db_addr + db.db_size; 143 } else { 144 if (next > db.db_size) 145 db.db_nextaddr = db.db_addr + 146 db.db_size; 147 else 148 db.db_nextaddr = addr + next; 149 } 150 } 151 152 /* 153 * Print out the line as: 154 * 155 * address: bytes text 156 * 157 * If there are more than 6 bytes in any given instruction, 158 * spread the bytes across two lines. We try to get symbolic 159 * information for the address, but if that fails we print out 160 * the numeric address instead. 161 * 162 * We try to keep the address portion of the text aligned at 163 * MINSYMWIDTH characters. If we are disassembling a function 164 * with a long name, this can be annoying. So we pick a width 165 * based on the maximum width that the current symbol can be. 166 * This at least produces text aligned within each function. 167 */ 168 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 169 &isfunc); 170 /* Get the maximum length for this symbol */ 171 getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf)); 172 symwidth = MAX(strlen(symbuf), MINSYMWIDTH); 173 174 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 175 176 /* 177 * If we've crossed a new function boundary, print out the 178 * function name on a blank line. 179 */ 180 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 181 (void) printf("%s()\n", symbol); 182 183 (void) printf(" %s:%*s ", symbuf, 184 symwidth - strlen(symbuf), ""); 185 186 /* print bytes */ 187 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 188 i++) { 189 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 190 if (g_flags & DIS_OCTAL) 191 (void) printf("%03o ", byte); 192 else 193 (void) printf("%02x ", byte); 194 } 195 196 /* trailing spaces for missing bytes */ 197 for (; i < bytesperline; i++) { 198 if (g_flags & DIS_OCTAL) 199 (void) printf(" "); 200 else 201 (void) printf(" "); 202 } 203 204 /* contents of disassembly */ 205 (void) printf(" %s", buf); 206 207 /* excess bytes that spill over onto subsequent lines */ 208 for (; i < db.db_nextaddr - addr; i++) { 209 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 210 if (i % bytesperline == 0) 211 (void) printf("\n %*s ", symwidth, ""); 212 if (g_flags & DIS_OCTAL) 213 (void) printf("%03o ", byte); 214 else 215 (void) printf("%02x ", byte); 216 } 217 218 (void) printf("\n"); 219 220 addr = db.db_nextaddr; 221 } 222 } 223 224 /* 225 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 226 * function, and convert the result using getsymname(). 227 */ 228 int 229 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 230 size_t *symlen) 231 { 232 dis_buffer_t *db = data; 233 const char *symbol; 234 off_t offset; 235 size_t size; 236 237 /* 238 * If NULL symbol is returned, getsymname takes care of 239 * printing appropriate address in buf instead of symbol. 240 */ 241 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 242 243 if (buf != NULL) 244 getsymname(addr, symbol, offset, buf, buflen); 245 246 if (start != NULL) 247 *start = addr - offset; 248 if (symlen != NULL) 249 *symlen = size; 250 251 return (0); 252 } 253 254 /* 255 * libdisasm wrapper around target reading. libdisasm will always read data 256 * in order, so update our current offset within the buffer appropriately. 257 * We only support reading from within the current object; libdisasm should 258 * never ask us to do otherwise. 259 */ 260 int 261 do_read(void *data, uint64_t addr, void *buf, size_t len) 262 { 263 dis_buffer_t *db = data; 264 size_t offset; 265 266 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 267 return (-1); 268 269 offset = addr - db->db_addr; 270 len = MIN(len, db->db_size - offset); 271 272 (void) memcpy(buf, (char *)db->db_data + offset, len); 273 274 db->db_nextaddr = addr + len; 275 276 return (len); 277 } 278 279 /* 280 * Routine to dump raw data in a human-readable format. Used by the -d and -D 281 * options. We model our output after the xxd(1) program, which gives nicely 282 * formatted output, along with an ASCII translation of the result. 283 */ 284 void 285 dump_data(uint64_t addr, void *data, size_t datalen) 286 { 287 uintptr_t curaddr = addr & (~0xf); 288 uint8_t *bytes = data; 289 int i; 290 int width; 291 292 /* 293 * Determine if the address given to us fits in 32-bit range, in which 294 * case use a 4-byte width. 295 */ 296 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 297 width = 8; 298 else 299 width = 16; 300 301 while (curaddr < addr + datalen) { 302 /* 303 * Display leading address 304 */ 305 (void) printf("%0*x: ", width, curaddr); 306 307 /* 308 * Print out data in two-byte chunks. If the current address 309 * is before the starting address or after the end of the 310 * section, print spaces. 311 */ 312 for (i = 0; i < 16; i++) { 313 if (curaddr + i < addr ||curaddr + i >= addr + datalen) 314 (void) printf(" "); 315 else 316 (void) printf("%02x", 317 bytes[curaddr + i - addr]); 318 319 if (i & 1) 320 (void) printf(" "); 321 } 322 323 (void) printf(" "); 324 325 /* 326 * Print out the ASCII representation 327 */ 328 for (i = 0; i < 16; i++) { 329 if (curaddr + i < addr || 330 curaddr + i >= addr + datalen) { 331 (void) printf(" "); 332 } else { 333 uint8_t byte = bytes[curaddr + i - addr]; 334 if (isprint(byte)) 335 (void) printf("%c", byte); 336 else 337 (void) printf("."); 338 } 339 } 340 341 (void) printf("\n"); 342 343 curaddr += 16; 344 } 345 } 346 347 /* 348 * Disassemble a section implicitly specified as part of a file. This function 349 * is called for all sections when no other flags are specified. We ignore any 350 * data sections, and print out only those sections containing text. 351 */ 352 void 353 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 354 { 355 dis_handle_t *dhp = data; 356 357 /* ignore data sections */ 358 if (!dis_section_istext(scn)) 359 return; 360 361 if (!g_quiet) 362 (void) printf("\nsection %s\n", dis_section_name(scn)); 363 364 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 365 dis_section_size(scn)); 366 } 367 368 /* 369 * Structure passed to dis_named_{section,function} which keeps track of both 370 * the target and the libdisasm handle. 371 */ 372 typedef struct callback_arg { 373 dis_tgt_t *ca_tgt; 374 dis_handle_t *ca_handle; 375 } callback_arg_t; 376 377 /* 378 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 379 * argument contains the type of argument given. Pass the data onto the 380 * appropriate helper routine. 381 */ 382 void 383 dis_named_section(dis_scn_t *scn, int type, void *data) 384 { 385 callback_arg_t *ca = data; 386 387 if (!g_quiet) 388 (void) printf("\nsection %s\n", dis_section_name(scn)); 389 390 switch (type) { 391 case DIS_DATA_RELATIVE: 392 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 393 break; 394 case DIS_DATA_ABSOLUTE: 395 dump_data(dis_section_addr(scn), dis_section_data(scn), 396 dis_section_size(scn)); 397 break; 398 case DIS_TEXT: 399 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 400 dis_section_data(scn), dis_section_size(scn)); 401 break; 402 } 403 } 404 405 /* 406 * Disassemble a function explicitly specified with '-F'. The 'type' argument 407 * is unused. 408 */ 409 /* ARGSUSED */ 410 void 411 dis_named_function(dis_func_t *func, int type, void *data) 412 { 413 callback_arg_t *ca = data; 414 415 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 416 dis_function_data(func), dis_function_size(func)); 417 } 418 419 /* 420 * Disassemble a complete file. First, we determine the type of the file based 421 * on the ELF machine type, and instantiate a version of the disassembler 422 * appropriate for the file. We then resolve any named sections or functions 423 * against the file, and iterate over the results (or all sections if no flags 424 * were specified). 425 */ 426 void 427 dis_file(const char *filename) 428 { 429 dis_tgt_t *tgt, *current; 430 dis_scnlist_t *sections; 431 dis_funclist_t *functions; 432 dis_handle_t *dhp; 433 GElf_Ehdr ehdr; 434 435 /* 436 * First, initialize the target 437 */ 438 if ((tgt = dis_tgt_create(filename)) == NULL) 439 return; 440 441 if (!g_quiet) 442 (void) printf("disassembly for %s\n\n", filename); 443 444 /* 445 * A given file may contain multiple targets (if it is an archive, for 446 * example). We iterate over all possible targets if this is the case. 447 */ 448 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 449 dis_tgt_ehdr(current, &ehdr); 450 451 /* 452 * Eventually, this should probably live within libdisasm, and 453 * we should be able to disassemble targets from different 454 * architectures. For now, we only support objects as the 455 * native machine type. 456 */ 457 switch (ehdr.e_machine) { 458 #ifdef __sparc 459 case EM_SPARC: 460 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 461 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 462 warn("invalid E_IDENT field for SPARC object"); 463 return; 464 } 465 g_flags |= DIS_SPARC_V8; 466 break; 467 468 case EM_SPARC32PLUS: 469 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 470 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 471 warn("invalid E_IDENT field for SPARC object"); 472 return; 473 } 474 475 switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) { 476 case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 477 EF_SPARC_SUN_US3): 478 case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1): 479 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 480 default: 481 g_flags |= DIS_SPARC_V9; 482 } 483 break; 484 485 case EM_SPARCV9: 486 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 487 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 488 warn("invalid E_IDENT field for SPARC object"); 489 return; 490 } 491 492 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 493 break; 494 #endif /* __sparc */ 495 496 #if defined(__i386) || defined(__amd64) 497 case EM_386: 498 g_flags |= DIS_X86_SIZE32; 499 break; 500 501 case EM_AMD64: 502 g_flags |= DIS_X86_SIZE64; 503 break; 504 #endif /* __i386 || __amd64 */ 505 506 default: 507 die("%s: unsupported ELF machine 0x%x", filename, 508 ehdr.e_machine); 509 } 510 511 if (!g_quiet && dis_tgt_member(current) != NULL) 512 (void) printf("\narchive member %s\n", 513 dis_tgt_member(current)); 514 515 /* 516 * Instantiate a libdisasm handle based on the file type. 517 */ 518 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 519 do_read)) == NULL) 520 die("%s: failed to initialize disassembler: %s", 521 filename, dis_strerror(dis_errno())); 522 523 if (g_doall) { 524 /* 525 * With no arguments, iterate over all sections and 526 * disassemble only those that contain text. 527 */ 528 dis_tgt_section_iter(current, dis_text_section, dhp); 529 } else { 530 callback_arg_t ca; 531 532 ca.ca_tgt = current; 533 ca.ca_handle = dhp; 534 535 /* 536 * If sections or functions were explicitly specified, 537 * resolve those names against the object, and iterate 538 * over just the resulting data. 539 */ 540 sections = dis_namelist_resolve_sections(g_seclist, 541 current); 542 functions = dis_namelist_resolve_functions(g_funclist, 543 current); 544 545 dis_scnlist_iter(sections, dis_named_section, &ca); 546 dis_funclist_iter(functions, dis_named_function, &ca); 547 548 dis_scnlist_destroy(sections); 549 dis_funclist_destroy(functions); 550 } 551 552 dis_handle_destroy(dhp); 553 } 554 555 dis_tgt_destroy(tgt); 556 } 557 558 void 559 usage(void) 560 { 561 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 562 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 563 exit(2); 564 } 565 566 typedef struct lib_node { 567 char *path; 568 struct lib_node *next; 569 } lib_node_t; 570 571 int 572 main(int argc, char **argv) 573 { 574 int optchar; 575 int i; 576 lib_node_t *libs = NULL; 577 578 g_funclist = dis_namelist_create(); 579 g_seclist = dis_namelist_create(); 580 581 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 582 switch (optchar) { 583 case 'C': 584 g_demangle = 1; 585 break; 586 case 'd': 587 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 588 break; 589 case 'D': 590 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 591 break; 592 case 'F': 593 dis_namelist_add(g_funclist, optarg, 0); 594 break; 595 case 'l': { 596 /* 597 * The '-l foo' option historically would attempt to 598 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 599 * environment variable has never been supported or 600 * documented for our linker. However, until this 601 * option is formally EOLed, we have to support it. 602 */ 603 char *dir; 604 lib_node_t *node; 605 size_t len; 606 607 if ((dir = getenv("LIBDIR")) == NULL || 608 dir[0] == '\0') 609 dir = "/usr/lib"; 610 node = safe_malloc(sizeof (lib_node_t)); 611 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 612 node->path = safe_malloc(len); 613 614 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 615 optarg); 616 node->next = libs; 617 libs = node; 618 break; 619 } 620 case 'L': 621 /* 622 * The '-L' option historically would attempt to read 623 * the .debug section of the target to determine source 624 * line information in order to annotate the output. 625 * No compiler has emitted these sections in many years, 626 * and the option has never done what it purported to 627 * do. We silently consume the option for 628 * compatibility. 629 */ 630 break; 631 case 'n': 632 g_numeric = 1; 633 break; 634 case 'o': 635 g_flags |= DIS_OCTAL; 636 break; 637 case 'q': 638 g_quiet = 1; 639 break; 640 case 't': 641 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 642 break; 643 case 'V': 644 (void) printf("Solaris disassembler version 1.0\n"); 645 return (0); 646 default: 647 usage(); 648 break; 649 } 650 } 651 652 argc -= optind; 653 argv += optind; 654 655 if (argc == 0 && libs == NULL) { 656 warn("no objects specified"); 657 usage(); 658 } 659 660 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 661 g_doall = 1; 662 663 /* 664 * See comment for 'l' option, above. 665 */ 666 while (libs != NULL) { 667 lib_node_t *node = libs->next; 668 669 dis_file(libs->path); 670 free(libs->path); 671 free(libs); 672 libs = node; 673 } 674 675 for (i = 0; i < argc; i++) 676 dis_file(argv[i]); 677 678 dis_namelist_destroy(g_funclist); 679 dis_namelist_destroy(g_seclist); 680 681 return (g_error); 682 } 683