1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2011 Jason King. All rights reserved. 27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org> 28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> 29 * Copyright 2018, Joyent, Inc. 30 * Copyright 2024 Oxide Computer Company 31 */ 32 33 #include <ctype.h> 34 #include <getopt.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <sys/hexdump.h> 39 #include <sys/sysmacros.h> 40 #include <sys/elf_SPARC.h> 41 42 #include <libdisasm.h> 43 44 #include "dis_target.h" 45 #include "dis_util.h" 46 #include "dis_list.h" 47 48 int g_demangle; /* Demangle C++ names */ 49 int g_quiet; /* Quiet mode */ 50 int g_numeric; /* Numeric mode */ 51 int g_flags; /* libdisasm language flags */ 52 int g_doall; /* true if no functions or sections were given */ 53 54 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 55 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 56 57 /* 58 * Section options for -d, -D, and -s 59 */ 60 #define DIS_DATA_RELATIVE 1 61 #define DIS_DATA_ABSOLUTE 2 62 #define DIS_TEXT 3 63 64 /* 65 * libdisasm callback data. Keeps track of current data (function or section) 66 * and offset within that data. 67 */ 68 typedef struct dis_buffer { 69 dis_tgt_t *db_tgt; /* current dis target */ 70 void *db_data; /* function or section data */ 71 uint64_t db_addr; /* address of function start */ 72 size_t db_size; /* size of data */ 73 uint64_t db_nextaddr; /* next address to be read */ 74 } dis_buffer_t; 75 76 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 77 78 /* 79 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 80 * formatted symbol, based on the offset and current setttings. 81 */ 82 void 83 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 84 size_t buflen) 85 { 86 if (symbol == NULL || g_numeric) { 87 if (g_flags & DIS_OCTAL) 88 (void) snprintf(buf, buflen, "0%llo", addr); 89 else 90 (void) snprintf(buf, buflen, "0x%llx", addr); 91 } else { 92 if (g_demangle) 93 symbol = dis_demangle(symbol); 94 95 if (offset == 0) 96 (void) snprintf(buf, buflen, "%s", symbol); 97 else if (g_flags & DIS_OCTAL) 98 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 99 else 100 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 101 } 102 } 103 104 /* 105 * Determine if we are on an architecture with fixed-size instructions, 106 * and if so, what size they are. 107 */ 108 static int 109 insn_size(dis_handle_t *dhp) 110 { 111 int min = dis_min_instrlen(dhp); 112 int max = dis_max_instrlen(dhp); 113 114 if (min == max) 115 return (min); 116 117 return (0); 118 } 119 120 /* 121 * The main disassembly routine. Given a fixed-sized buffer and starting 122 * address, disassemble the data using the supplied target and libdisasm handle. 123 */ 124 void 125 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 126 size_t datalen) 127 { 128 dis_buffer_t db = { 0 }; 129 char buf[BUFSIZE]; 130 char symbuf[BUFSIZE]; 131 const char *symbol; 132 const char *last_symbol; 133 off_t symoffset; 134 int i; 135 int bytesperline; 136 size_t symsize; 137 int isfunc; 138 size_t symwidth = 0; 139 int ret; 140 int insz = insn_size(dhp); 141 142 db.db_tgt = tgt; 143 db.db_data = data; 144 db.db_addr = addr; 145 db.db_size = datalen; 146 147 dis_set_data(dhp, &db); 148 149 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 150 bytesperline = 6; 151 152 symbol = NULL; 153 154 while (addr < db.db_addr + db.db_size) { 155 156 ret = dis_disassemble(dhp, addr, buf, BUFSIZE); 157 if (ret != 0 && insz > 0) { 158 /* 159 * Since we know instructions are fixed size, we 160 * always know the address of the next instruction 161 */ 162 (void) snprintf(buf, sizeof (buf), 163 "*** invalid opcode ***"); 164 db.db_nextaddr = addr + insz; 165 166 } else if (ret != 0) { 167 off_t next; 168 169 (void) snprintf(buf, sizeof (buf), 170 "*** invalid opcode ***"); 171 172 /* 173 * On architectures with variable sized instructions 174 * we have no way to figure out where the next 175 * instruction starts if we encounter an invalid 176 * instruction. Instead we print the rest of the 177 * instruction stream as hex until we reach the 178 * next valid symbol in the section. 179 */ 180 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 181 db.db_nextaddr = db.db_addr + db.db_size; 182 } else { 183 if (next > db.db_size) 184 db.db_nextaddr = db.db_addr + 185 db.db_size; 186 else 187 db.db_nextaddr = addr + next; 188 } 189 } 190 191 /* 192 * Print out the line as: 193 * 194 * address: bytes text 195 * 196 * If there are more than 6 bytes in any given instruction, 197 * spread the bytes across two lines. We try to get symbolic 198 * information for the address, but if that fails we print out 199 * the numeric address instead. 200 * 201 * We try to keep the address portion of the text aligned at 202 * MINSYMWIDTH characters. If we are disassembling a function 203 * with a long name, this can be annoying. So we pick a width 204 * based on the maximum width that the current symbol can be. 205 * This at least produces text aligned within each function. 206 */ 207 last_symbol = symbol; 208 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 209 &isfunc); 210 if (symbol == NULL) { 211 symbol = dis_find_section(tgt, addr, &symoffset); 212 symsize = symoffset; 213 } 214 215 if (symbol != last_symbol) 216 getsymname(addr, symbol, symsize, symbuf, 217 sizeof (symbuf)); 218 219 symwidth = MAX(symwidth, strlen(symbuf)); 220 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 221 222 /* 223 * If we've crossed a new function boundary, print out the 224 * function name on a blank line. 225 */ 226 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 227 (void) printf("%s()\n", symbol); 228 229 (void) printf(" %s:%*s ", symbuf, 230 symwidth - strlen(symbuf), ""); 231 232 /* print bytes */ 233 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 234 i++) { 235 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 236 if (g_flags & DIS_OCTAL) 237 (void) printf("%03o ", byte); 238 else 239 (void) printf("%02x ", byte); 240 } 241 242 /* trailing spaces for missing bytes */ 243 for (; i < bytesperline; i++) { 244 if (g_flags & DIS_OCTAL) 245 (void) printf(" "); 246 else 247 (void) printf(" "); 248 } 249 250 /* contents of disassembly */ 251 (void) printf(" %s", buf); 252 253 /* excess bytes that spill over onto subsequent lines */ 254 for (; i < db.db_nextaddr - addr; i++) { 255 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 256 if (i % bytesperline == 0) 257 (void) printf("\n %*s ", symwidth, ""); 258 if (g_flags & DIS_OCTAL) 259 (void) printf("%03o ", byte); 260 else 261 (void) printf("%02x ", byte); 262 } 263 264 (void) printf("\n"); 265 266 addr = db.db_nextaddr; 267 } 268 } 269 270 /* 271 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 272 * function, and convert the result using getsymname(). 273 */ 274 int 275 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 276 size_t *symlen) 277 { 278 dis_buffer_t *db = data; 279 const char *symbol; 280 off_t offset; 281 size_t size; 282 283 /* 284 * If NULL symbol is returned, getsymname takes care of 285 * printing appropriate address in buf instead of symbol. 286 */ 287 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 288 289 if (buf != NULL) 290 getsymname(addr, symbol, offset, buf, buflen); 291 292 if (start != NULL) 293 *start = addr - offset; 294 if (symlen != NULL) 295 *symlen = size; 296 297 if (symbol == NULL) 298 return (-1); 299 300 return (0); 301 } 302 303 /* 304 * libdisasm wrapper around target reading. libdisasm will always read data 305 * in order, so update our current offset within the buffer appropriately. 306 * We only support reading from within the current object; libdisasm should 307 * never ask us to do otherwise. 308 */ 309 int 310 do_read(void *data, uint64_t addr, void *buf, size_t len) 311 { 312 dis_buffer_t *db = data; 313 size_t offset; 314 315 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 316 return (-1); 317 318 offset = addr - db->db_addr; 319 len = MIN(len, db->db_size - offset); 320 321 (void) memcpy(buf, (char *)db->db_data + offset, len); 322 323 db->db_nextaddr = addr + len; 324 325 return (len); 326 } 327 328 /* 329 * Routine to dump raw data in a human-readable format. Used by the -d and -D 330 * options. 331 */ 332 void 333 dump_data(uint64_t addr, void *data, size_t datalen) 334 { 335 hexdump_t h; 336 337 hexdump_init(&h); 338 /* Print out data in two-byte chunks. */ 339 hexdump_set_grouping(&h, 2); 340 hexdump_set_addr(&h, addr); 341 342 /* 343 * Determine if the address given to us fits in 32-bit range, in which 344 * case use a 4-byte width. 345 */ 346 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 347 hexdump_set_addrwidth(&h, 8); 348 else 349 hexdump_set_addrwidth(&h, 16); 350 351 352 (void) hexdump_fileh(&h, data, datalen, HDF_DEFAULT | HDF_ALIGN, 353 stdout); 354 355 hexdump_fini(&h); 356 } 357 358 /* 359 * Disassemble a section implicitly specified as part of a file. This function 360 * is called for all sections when no other flags are specified. We ignore any 361 * data sections, and print out only those sections containing text. 362 */ 363 void 364 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 365 { 366 dis_handle_t *dhp = data; 367 368 /* ignore data sections */ 369 if (!dis_section_istext(scn)) 370 return; 371 372 if (!g_quiet) 373 (void) printf("\nsection %s\n", dis_section_name(scn)); 374 375 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 376 dis_section_size(scn)); 377 } 378 379 /* 380 * Structure passed to dis_named_{section,function} which keeps track of both 381 * the target and the libdisasm handle. 382 */ 383 typedef struct callback_arg { 384 dis_tgt_t *ca_tgt; 385 dis_handle_t *ca_handle; 386 } callback_arg_t; 387 388 /* 389 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 390 * argument contains the type of argument given. Pass the data onto the 391 * appropriate helper routine. 392 */ 393 void 394 dis_named_section(dis_scn_t *scn, int type, void *data) 395 { 396 callback_arg_t *ca = data; 397 398 if (!g_quiet) 399 (void) printf("\nsection %s\n", dis_section_name(scn)); 400 401 switch (type) { 402 case DIS_DATA_RELATIVE: 403 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 404 break; 405 case DIS_DATA_ABSOLUTE: 406 dump_data(dis_section_addr(scn), dis_section_data(scn), 407 dis_section_size(scn)); 408 break; 409 case DIS_TEXT: 410 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 411 dis_section_data(scn), dis_section_size(scn)); 412 break; 413 } 414 } 415 416 /* 417 * Disassemble a function explicitly specified with '-F'. The 'type' argument 418 * is unused. 419 */ 420 /* ARGSUSED */ 421 void 422 dis_named_function(dis_func_t *func, int type, void *data) 423 { 424 callback_arg_t *ca = data; 425 426 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 427 dis_function_data(func), dis_function_size(func)); 428 } 429 430 /* 431 * Disassemble a complete file. First, we determine the type of the file based 432 * on the ELF machine type, and instantiate a version of the disassembler 433 * appropriate for the file. We then resolve any named sections or functions 434 * against the file, and iterate over the results (or all sections if no flags 435 * were specified). 436 */ 437 void 438 dis_file(const char *filename) 439 { 440 dis_tgt_t *tgt, *current; 441 dis_scnlist_t *sections; 442 dis_funclist_t *functions; 443 dis_handle_t *dhp; 444 GElf_Ehdr ehdr; 445 446 /* 447 * First, initialize the target 448 */ 449 if ((tgt = dis_tgt_create(filename)) == NULL) 450 return; 451 452 if (!g_quiet) 453 (void) printf("disassembly for %s\n\n", filename); 454 455 /* 456 * A given file may contain multiple targets (if it is an archive, for 457 * example). We iterate over all possible targets if this is the case. 458 */ 459 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 460 dis_tgt_ehdr(current, &ehdr); 461 462 /* 463 * Eventually, this should probably live within libdisasm, and 464 * we should be able to disassemble targets from different 465 * architectures. For now, we only support objects as the 466 * native machine type. 467 */ 468 switch (ehdr.e_machine) { 469 case EM_SPARC: 470 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 471 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 472 warn("invalid E_IDENT field for SPARC object"); 473 return; 474 } 475 g_flags |= DIS_SPARC_V8; 476 break; 477 478 case EM_SPARC32PLUS: 479 { 480 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK; 481 482 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 483 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 484 warn("invalid E_IDENT field for SPARC object"); 485 return; 486 } 487 488 if (flags != 0 && 489 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 490 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS) 491 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 492 else 493 g_flags |= DIS_SPARC_V9; 494 break; 495 } 496 497 case EM_SPARCV9: 498 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 499 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 500 warn("invalid E_IDENT field for SPARC object"); 501 return; 502 } 503 504 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 505 break; 506 507 case EM_386: 508 g_flags |= DIS_X86_SIZE32; 509 break; 510 511 case EM_AMD64: 512 g_flags |= DIS_X86_SIZE64; 513 break; 514 515 case EM_S370: 516 g_flags |= DIS_S370; 517 518 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 519 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 520 warn("invalid E_IDENT field for S370 object"); 521 return; 522 } 523 break; 524 525 case EM_S390: 526 /* 527 * Both 390 and z/Architecture use EM_S390, the only 528 * differences is the class: ELFCLASS32 for plain 529 * old s390 and ELFCLASS64 for z/Architecture (aka. 530 * s390x). 531 */ 532 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 533 g_flags |= DIS_S390_31; 534 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 535 g_flags |= DIS_S390_64; 536 } else { 537 warn("invalid E_IDENT field for S390 object"); 538 return; 539 } 540 541 if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 542 warn("invalid E_IDENT field for S390 object"); 543 return; 544 } 545 break; 546 547 case EM_RISCV: 548 /* 549 * RISC-V is defined to be litle endian. The current ISA 550 * makes it clear that the 64-bit instructions can 551 * co-exist with the 32-bit ones and therefore we don't 552 * need a separate elf class at this time. 553 */ 554 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { 555 warn("invalid EI_DATA field for RISC-V object"); 556 return; 557 } 558 559 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 560 g_flags |= DIS_RISCV_32; 561 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 562 g_flags |= DIS_RISCV_64; 563 } else { 564 warn("invalid EI_CLASS field for RISC-V " 565 "object"); 566 return; 567 } 568 break; 569 570 default: 571 die("%s: unsupported ELF machine 0x%x", filename, 572 ehdr.e_machine); 573 } 574 575 /* 576 * If ET_REL (.o), printing immediate symbols is likely to 577 * result in garbage, as symbol lookups on unrelocated 578 * immediates find false and useless matches. 579 */ 580 581 if (ehdr.e_type == ET_REL) 582 g_flags |= DIS_NOIMMSYM; 583 584 if (!g_quiet && dis_tgt_member(current) != NULL) 585 (void) printf("\narchive member %s\n", 586 dis_tgt_member(current)); 587 588 /* 589 * Instantiate a libdisasm handle based on the file type. 590 */ 591 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 592 do_read)) == NULL) 593 die("%s: failed to initialize disassembler: %s", 594 filename, dis_strerror(dis_errno())); 595 596 if (g_doall) { 597 /* 598 * With no arguments, iterate over all sections and 599 * disassemble only those that contain text. 600 */ 601 dis_tgt_section_iter(current, dis_text_section, dhp); 602 } else { 603 callback_arg_t ca; 604 605 ca.ca_tgt = current; 606 ca.ca_handle = dhp; 607 608 /* 609 * If sections or functions were explicitly specified, 610 * resolve those names against the object, and iterate 611 * over just the resulting data. 612 */ 613 sections = dis_namelist_resolve_sections(g_seclist, 614 current); 615 functions = dis_namelist_resolve_functions(g_funclist, 616 current); 617 618 dis_scnlist_iter(sections, dis_named_section, &ca); 619 dis_funclist_iter(functions, dis_named_function, &ca); 620 621 dis_scnlist_destroy(sections); 622 dis_funclist_destroy(functions); 623 } 624 625 dis_handle_destroy(dhp); 626 } 627 628 dis_tgt_destroy(tgt); 629 } 630 631 void 632 usage(void) 633 { 634 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 635 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 636 exit(2); 637 } 638 639 typedef struct lib_node { 640 char *path; 641 struct lib_node *next; 642 } lib_node_t; 643 644 int 645 main(int argc, char **argv) 646 { 647 int optchar; 648 int i; 649 lib_node_t *libs = NULL; 650 651 g_funclist = dis_namelist_create(); 652 g_seclist = dis_namelist_create(); 653 654 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 655 switch (optchar) { 656 case 'C': 657 g_demangle = 1; 658 break; 659 case 'd': 660 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 661 break; 662 case 'D': 663 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 664 break; 665 case 'F': 666 dis_namelist_add(g_funclist, optarg, 0); 667 break; 668 case 'l': { 669 /* 670 * The '-l foo' option historically would attempt to 671 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 672 * environment variable has never been supported or 673 * documented for our linker. However, until this 674 * option is formally EOLed, we have to support it. 675 */ 676 char *dir; 677 lib_node_t *node; 678 size_t len; 679 680 if ((dir = getenv("LIBDIR")) == NULL || 681 dir[0] == '\0') 682 dir = "/usr/lib"; 683 node = safe_malloc(sizeof (lib_node_t)); 684 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 685 node->path = safe_malloc(len); 686 687 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 688 optarg); 689 node->next = libs; 690 libs = node; 691 break; 692 } 693 case 'L': 694 /* 695 * The '-L' option historically would attempt to read 696 * the .debug section of the target to determine source 697 * line information in order to annotate the output. 698 * No compiler has emitted these sections in many years, 699 * and the option has never done what it purported to 700 * do. We silently consume the option for 701 * compatibility. 702 */ 703 break; 704 case 'n': 705 g_numeric = 1; 706 break; 707 case 'o': 708 g_flags |= DIS_OCTAL; 709 break; 710 case 'q': 711 g_quiet = 1; 712 break; 713 case 't': 714 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 715 break; 716 case 'V': 717 (void) printf("Solaris disassembler version 1.0\n"); 718 return (0); 719 default: 720 usage(); 721 break; 722 } 723 } 724 725 argc -= optind; 726 argv += optind; 727 728 if (argc == 0 && libs == NULL) { 729 warn("no objects specified"); 730 usage(); 731 } 732 733 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 734 g_doall = 1; 735 736 /* 737 * See comment for 'l' option, above. 738 */ 739 while (libs != NULL) { 740 lib_node_t *node = libs->next; 741 742 dis_file(libs->path); 743 free(libs->path); 744 free(libs); 745 libs = node; 746 } 747 748 for (i = 0; i < argc; i++) 749 dis_file(argv[i]); 750 751 dis_namelist_destroy(g_funclist); 752 dis_namelist_destroy(g_seclist); 753 754 return (g_error); 755 } 756