1*dc0093f4Seschrock /* 2*dc0093f4Seschrock * CDDL HEADER START 3*dc0093f4Seschrock * 4*dc0093f4Seschrock * The contents of this file are subject to the terms of the 5*dc0093f4Seschrock * Common Development and Distribution License (the "License"). 6*dc0093f4Seschrock * You may not use this file except in compliance with the License. 7*dc0093f4Seschrock * 8*dc0093f4Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*dc0093f4Seschrock * or http://www.opensolaris.org/os/licensing. 10*dc0093f4Seschrock * See the License for the specific language governing permissions 11*dc0093f4Seschrock * and limitations under the License. 12*dc0093f4Seschrock * 13*dc0093f4Seschrock * When distributing Covered Code, include this CDDL HEADER in each 14*dc0093f4Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*dc0093f4Seschrock * If applicable, add the following below this CDDL HEADER, with the 16*dc0093f4Seschrock * fields enclosed by brackets "[]" replaced with your own identifying 17*dc0093f4Seschrock * information: Portions Copyright [yyyy] [name of copyright owner] 18*dc0093f4Seschrock * 19*dc0093f4Seschrock * CDDL HEADER END 20*dc0093f4Seschrock */ 21*dc0093f4Seschrock 22*dc0093f4Seschrock /* 23*dc0093f4Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24*dc0093f4Seschrock * Use is subject to license terms. 25*dc0093f4Seschrock */ 26*dc0093f4Seschrock 27*dc0093f4Seschrock #pragma ident "%Z%%M% %I% %E% SMI" 28*dc0093f4Seschrock 29*dc0093f4Seschrock #include <assert.h> 30*dc0093f4Seschrock #include <errno.h> 31*dc0093f4Seschrock #include <fcntl.h> 32*dc0093f4Seschrock #include <gelf.h> 33*dc0093f4Seschrock #include <libelf.h> 34*dc0093f4Seschrock #include <stdlib.h> 35*dc0093f4Seschrock #include <string.h> 36*dc0093f4Seschrock #include <unistd.h> 37*dc0093f4Seschrock 38*dc0093f4Seschrock #include <sys/fcntl.h> 39*dc0093f4Seschrock #include <sys/stat.h> 40*dc0093f4Seschrock 41*dc0093f4Seschrock #include "dis_target.h" 42*dc0093f4Seschrock #include "dis_util.h" 43*dc0093f4Seschrock 44*dc0093f4Seschrock /* 45*dc0093f4Seschrock * Standard ELF disassembler target. 46*dc0093f4Seschrock * 47*dc0093f4Seschrock * We only support disassembly of ELF files, though this target interface could 48*dc0093f4Seschrock * be extended in the future. Each basic type (target, func, section) contains 49*dc0093f4Seschrock * enough information to uniquely identify the location within the file. The 50*dc0093f4Seschrock * interfaces use libelf(3LIB) to do the actual processing of the file. 51*dc0093f4Seschrock */ 52*dc0093f4Seschrock 53*dc0093f4Seschrock /* 54*dc0093f4Seschrock * Symbol table entry type. We maintain our own symbol table sorted by address, 55*dc0093f4Seschrock * with the symbol name already resolved against the ELF symbol table. 56*dc0093f4Seschrock */ 57*dc0093f4Seschrock typedef struct sym_entry { 58*dc0093f4Seschrock GElf_Sym se_sym; /* value of symbol */ 59*dc0093f4Seschrock char *se_name; /* name of symbol */ 60*dc0093f4Seschrock int se_shndx; /* section where symbol is located */ 61*dc0093f4Seschrock } sym_entry_t; 62*dc0093f4Seschrock 63*dc0093f4Seschrock /* 64*dc0093f4Seschrock * Target data structure. This structure keeps track of the ELF file 65*dc0093f4Seschrock * information, a few bits of pre-processed section index information, and 66*dc0093f4Seschrock * sorted versions of the symbol table. We also keep track of the last symbol 67*dc0093f4Seschrock * looked up, as the majority of lookups remain within the same symbol. 68*dc0093f4Seschrock */ 69*dc0093f4Seschrock struct dis_tgt { 70*dc0093f4Seschrock Elf *dt_elf; /* libelf handle */ 71*dc0093f4Seschrock Elf *dt_elf_root; /* main libelf handle (for archives) */ 72*dc0093f4Seschrock const char *dt_filename; /* name of file */ 73*dc0093f4Seschrock int dt_fd; /* underlying file descriptor */ 74*dc0093f4Seschrock size_t dt_shstrndx; /* section index of .shstrtab */ 75*dc0093f4Seschrock size_t dt_symidx; /* section index of symbol table */ 76*dc0093f4Seschrock sym_entry_t *dt_symcache; /* last symbol looked up */ 77*dc0093f4Seschrock sym_entry_t *dt_symtab; /* sorted symbol table */ 78*dc0093f4Seschrock int dt_symcount; /* # of symbol table entries */ 79*dc0093f4Seschrock struct dis_tgt *dt_next; /* next target (for archives) */ 80*dc0093f4Seschrock Elf_Arhdr *dt_arhdr; /* archive header (for archives) */ 81*dc0093f4Seschrock }; 82*dc0093f4Seschrock 83*dc0093f4Seschrock /* 84*dc0093f4Seschrock * Function data structure. We resolve the symbol and lookup the associated ELF 85*dc0093f4Seschrock * data when building this structure. The offset is calculated based on the 86*dc0093f4Seschrock * section's starting address. 87*dc0093f4Seschrock */ 88*dc0093f4Seschrock struct dis_func { 89*dc0093f4Seschrock sym_entry_t *df_sym; /* symbol table reference */ 90*dc0093f4Seschrock Elf_Data *df_data; /* associated ELF data */ 91*dc0093f4Seschrock size_t df_offset; /* offset within data */ 92*dc0093f4Seschrock }; 93*dc0093f4Seschrock 94*dc0093f4Seschrock /* 95*dc0093f4Seschrock * Section data structure. We store the entire section header so that we can 96*dc0093f4Seschrock * determine some properties (such as whether or not it contains text) after 97*dc0093f4Seschrock * building the structure. 98*dc0093f4Seschrock */ 99*dc0093f4Seschrock struct dis_scn { 100*dc0093f4Seschrock GElf_Shdr ds_shdr; 101*dc0093f4Seschrock const char *ds_name; 102*dc0093f4Seschrock Elf_Data *ds_data; 103*dc0093f4Seschrock }; 104*dc0093f4Seschrock 105*dc0093f4Seschrock /* Lifted from Psymtab.c */ 106*dc0093f4Seschrock #define DATA_TYPES \ 107*dc0093f4Seschrock ((1 << STT_OBJECT) | (1 << STT_FUNC) | \ 108*dc0093f4Seschrock (1 << STT_COMMON) | (1 << STT_TLS)) 109*dc0093f4Seschrock #define IS_DATA_TYPE(tp) (((1 << (tp)) & DATA_TYPES) != 0) 110*dc0093f4Seschrock 111*dc0093f4Seschrock /* 112*dc0093f4Seschrock * Pick out the best symbol to used based on the sections available in the 113*dc0093f4Seschrock * target. We prefer SHT_SYMTAB over SHT_DYNSYM. 114*dc0093f4Seschrock */ 115*dc0093f4Seschrock /* ARGSUSED */ 116*dc0093f4Seschrock static void 117*dc0093f4Seschrock get_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 118*dc0093f4Seschrock { 119*dc0093f4Seschrock int *index = data; 120*dc0093f4Seschrock 121*dc0093f4Seschrock *index += 1; 122*dc0093f4Seschrock 123*dc0093f4Seschrock /* 124*dc0093f4Seschrock * Prefer SHT_SYMTAB over SHT_DYNSYM 125*dc0093f4Seschrock */ 126*dc0093f4Seschrock if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0) 127*dc0093f4Seschrock tgt->dt_symidx = *index; 128*dc0093f4Seschrock else if (scn->ds_shdr.sh_type == SHT_SYMTAB) 129*dc0093f4Seschrock tgt->dt_symidx = *index; 130*dc0093f4Seschrock } 131*dc0093f4Seschrock 132*dc0093f4Seschrock static int 133*dc0093f4Seschrock sym_compare(const void *a, const void *b) 134*dc0093f4Seschrock { 135*dc0093f4Seschrock const sym_entry_t *syma = a; 136*dc0093f4Seschrock const sym_entry_t *symb = b; 137*dc0093f4Seschrock const char *aname = syma->se_name; 138*dc0093f4Seschrock const char *bname = symb->se_name; 139*dc0093f4Seschrock 140*dc0093f4Seschrock if (syma->se_sym.st_value < symb->se_sym.st_value) 141*dc0093f4Seschrock return (-1); 142*dc0093f4Seschrock 143*dc0093f4Seschrock if (syma->se_sym.st_value > symb->se_sym.st_value) 144*dc0093f4Seschrock return (1); 145*dc0093f4Seschrock 146*dc0093f4Seschrock /* 147*dc0093f4Seschrock * Prefer functions over non-functions 148*dc0093f4Seschrock */ 149*dc0093f4Seschrock if (GELF_ST_TYPE(syma->se_sym.st_info) != 150*dc0093f4Seschrock GELF_ST_TYPE(symb->se_sym.st_info)) { 151*dc0093f4Seschrock if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC) 152*dc0093f4Seschrock return (-1); 153*dc0093f4Seschrock if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC) 154*dc0093f4Seschrock return (1); 155*dc0093f4Seschrock } 156*dc0093f4Seschrock 157*dc0093f4Seschrock /* 158*dc0093f4Seschrock * For symbols with the same address and type, we sort them according to 159*dc0093f4Seschrock * a hierarchy: 160*dc0093f4Seschrock * 161*dc0093f4Seschrock * 1. weak symbols (common name) 162*dc0093f4Seschrock * 2. global symbols (external name) 163*dc0093f4Seschrock * 3. local symbols 164*dc0093f4Seschrock */ 165*dc0093f4Seschrock if (GELF_ST_BIND(syma->se_sym.st_info) != 166*dc0093f4Seschrock GELF_ST_BIND(symb->se_sym.st_info)) { 167*dc0093f4Seschrock if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK) 168*dc0093f4Seschrock return (-1); 169*dc0093f4Seschrock if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK) 170*dc0093f4Seschrock return (1); 171*dc0093f4Seschrock 172*dc0093f4Seschrock if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL) 173*dc0093f4Seschrock return (-1); 174*dc0093f4Seschrock if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL) 175*dc0093f4Seschrock return (1); 176*dc0093f4Seschrock } 177*dc0093f4Seschrock 178*dc0093f4Seschrock /* 179*dc0093f4Seschrock * As a last resort, if we have multiple symbols of the same type at the 180*dc0093f4Seschrock * same address, prefer the version with the fewest leading underscores. 181*dc0093f4Seschrock */ 182*dc0093f4Seschrock if (aname == NULL) 183*dc0093f4Seschrock return (-1); 184*dc0093f4Seschrock if (bname == NULL) 185*dc0093f4Seschrock return (1); 186*dc0093f4Seschrock 187*dc0093f4Seschrock while (*aname == '_' && *bname == '_') { 188*dc0093f4Seschrock aname++; 189*dc0093f4Seschrock bname++; 190*dc0093f4Seschrock } 191*dc0093f4Seschrock 192*dc0093f4Seschrock if (*bname == '_') 193*dc0093f4Seschrock return (-1); 194*dc0093f4Seschrock if (*aname == '_') 195*dc0093f4Seschrock return (1); 196*dc0093f4Seschrock 197*dc0093f4Seschrock /* 198*dc0093f4Seschrock * Prefer the symbol with the smaller size. 199*dc0093f4Seschrock */ 200*dc0093f4Seschrock if (syma->se_sym.st_size < symb->se_sym.st_size) 201*dc0093f4Seschrock return (-1); 202*dc0093f4Seschrock if (syma->se_sym.st_size > symb->se_sym.st_size) 203*dc0093f4Seschrock return (1); 204*dc0093f4Seschrock 205*dc0093f4Seschrock /* 206*dc0093f4Seschrock * We really do have two identical symbols for some reason. Just report 207*dc0093f4Seschrock * them as equal, and to the lucky one go the spoils. 208*dc0093f4Seschrock */ 209*dc0093f4Seschrock return (0); 210*dc0093f4Seschrock } 211*dc0093f4Seschrock 212*dc0093f4Seschrock /* 213*dc0093f4Seschrock * Construct an optimized symbol table sorted by starting address. 214*dc0093f4Seschrock */ 215*dc0093f4Seschrock static void 216*dc0093f4Seschrock construct_symtab(dis_tgt_t *tgt) 217*dc0093f4Seschrock { 218*dc0093f4Seschrock Elf_Scn *scn; 219*dc0093f4Seschrock GElf_Shdr shdr; 220*dc0093f4Seschrock Elf_Data *symdata; 221*dc0093f4Seschrock int i; 222*dc0093f4Seschrock GElf_Word *symshndx = NULL; 223*dc0093f4Seschrock int symshndx_size; 224*dc0093f4Seschrock sym_entry_t *sym; 225*dc0093f4Seschrock sym_entry_t *p_symtab = NULL; 226*dc0093f4Seschrock int nsym = 0; /* count of symbols we're not interested in */ 227*dc0093f4Seschrock 228*dc0093f4Seschrock /* 229*dc0093f4Seschrock * Find the symshndx section, if any 230*dc0093f4Seschrock */ 231*dc0093f4Seschrock for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL; 232*dc0093f4Seschrock scn = elf_nextscn(tgt->dt_elf, scn)) { 233*dc0093f4Seschrock if (gelf_getshdr(scn, &shdr) == NULL) 234*dc0093f4Seschrock break; 235*dc0093f4Seschrock if (shdr.sh_type == SHT_SYMTAB_SHNDX && 236*dc0093f4Seschrock shdr.sh_link == tgt->dt_symidx) { 237*dc0093f4Seschrock Elf_Data *data; 238*dc0093f4Seschrock 239*dc0093f4Seschrock if ((data = elf_getdata(scn, NULL)) != NULL) { 240*dc0093f4Seschrock symshndx = (GElf_Word *)data->d_buf; 241*dc0093f4Seschrock symshndx_size = data->d_size / 242*dc0093f4Seschrock sizeof (GElf_Word); 243*dc0093f4Seschrock break; 244*dc0093f4Seschrock } 245*dc0093f4Seschrock } 246*dc0093f4Seschrock } 247*dc0093f4Seschrock 248*dc0093f4Seschrock if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL) 249*dc0093f4Seschrock die("%s: failed to get section information", tgt->dt_filename); 250*dc0093f4Seschrock if (gelf_getshdr(scn, &shdr) == NULL) 251*dc0093f4Seschrock die("%s: failed to get section header", tgt->dt_filename); 252*dc0093f4Seschrock if (shdr.sh_entsize == 0) 253*dc0093f4Seschrock die("%s: symbol table has zero size", tgt->dt_filename); 254*dc0093f4Seschrock 255*dc0093f4Seschrock if ((symdata = elf_getdata(scn, NULL)) == NULL) 256*dc0093f4Seschrock die("%s: failed to get symbol table", tgt->dt_filename); 257*dc0093f4Seschrock 258*dc0093f4Seschrock tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM, 259*dc0093f4Seschrock 1, EV_CURRENT); 260*dc0093f4Seschrock 261*dc0093f4Seschrock p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t)); 262*dc0093f4Seschrock 263*dc0093f4Seschrock for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) { 264*dc0093f4Seschrock (void) memset(sym, sizeof (sym_entry_t), 0); 265*dc0093f4Seschrock if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) { 266*dc0093f4Seschrock warn("%s: gelf_getsym returned NULL for %d", 267*dc0093f4Seschrock tgt->dt_filename, i); 268*dc0093f4Seschrock nsym++; 269*dc0093f4Seschrock continue; 270*dc0093f4Seschrock } 271*dc0093f4Seschrock 272*dc0093f4Seschrock /* 273*dc0093f4Seschrock * We're only interested in data symbols. 274*dc0093f4Seschrock */ 275*dc0093f4Seschrock if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) { 276*dc0093f4Seschrock nsym++; 277*dc0093f4Seschrock continue; 278*dc0093f4Seschrock } 279*dc0093f4Seschrock 280*dc0093f4Seschrock if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) { 281*dc0093f4Seschrock if (i > symshndx_size) { 282*dc0093f4Seschrock warn("%s: bad SHNX_XINDEX %d", 283*dc0093f4Seschrock tgt->dt_filename, i); 284*dc0093f4Seschrock sym->se_shndx = -1; 285*dc0093f4Seschrock } else { 286*dc0093f4Seschrock sym->se_shndx = symshndx[i]; 287*dc0093f4Seschrock } 288*dc0093f4Seschrock } else { 289*dc0093f4Seschrock sym->se_shndx = sym->se_sym.st_shndx; 290*dc0093f4Seschrock } 291*dc0093f4Seschrock 292*dc0093f4Seschrock if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link, 293*dc0093f4Seschrock (size_t)sym->se_sym.st_name)) == NULL) { 294*dc0093f4Seschrock warn("%s: failed to lookup symbol %d name", 295*dc0093f4Seschrock tgt->dt_filename, i); 296*dc0093f4Seschrock nsym++; 297*dc0093f4Seschrock continue; 298*dc0093f4Seschrock } 299*dc0093f4Seschrock 300*dc0093f4Seschrock sym++; 301*dc0093f4Seschrock } 302*dc0093f4Seschrock 303*dc0093f4Seschrock tgt->dt_symcount -= nsym; 304*dc0093f4Seschrock tgt->dt_symtab = realloc(p_symtab, 305*dc0093f4Seschrock tgt->dt_symcount * sizeof (sym_entry_t)); 306*dc0093f4Seschrock 307*dc0093f4Seschrock qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t), 308*dc0093f4Seschrock sym_compare); 309*dc0093f4Seschrock } 310*dc0093f4Seschrock 311*dc0093f4Seschrock /* 312*dc0093f4Seschrock * Create a target backed by an ELF file. 313*dc0093f4Seschrock */ 314*dc0093f4Seschrock dis_tgt_t * 315*dc0093f4Seschrock dis_tgt_create(const char *file) 316*dc0093f4Seschrock { 317*dc0093f4Seschrock dis_tgt_t *tgt, *current; 318*dc0093f4Seschrock int idx; 319*dc0093f4Seschrock Elf *elf; 320*dc0093f4Seschrock GElf_Ehdr ehdr; 321*dc0093f4Seschrock Elf_Arhdr *arhdr = NULL; 322*dc0093f4Seschrock int cmd; 323*dc0093f4Seschrock 324*dc0093f4Seschrock if (elf_version(EV_CURRENT) == EV_NONE) 325*dc0093f4Seschrock die("libelf(3ELF) out of date"); 326*dc0093f4Seschrock 327*dc0093f4Seschrock tgt = safe_malloc(sizeof (dis_tgt_t)); 328*dc0093f4Seschrock 329*dc0093f4Seschrock if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) { 330*dc0093f4Seschrock warn("%s: failed opening file, reason: %s", file, 331*dc0093f4Seschrock strerror(errno)); 332*dc0093f4Seschrock free(tgt); 333*dc0093f4Seschrock return (NULL); 334*dc0093f4Seschrock } 335*dc0093f4Seschrock 336*dc0093f4Seschrock if ((tgt->dt_elf_root = 337*dc0093f4Seschrock elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) { 338*dc0093f4Seschrock warn("%s: invalid or corrupt ELF file", file); 339*dc0093f4Seschrock dis_tgt_destroy(tgt); 340*dc0093f4Seschrock return (NULL); 341*dc0093f4Seschrock } 342*dc0093f4Seschrock 343*dc0093f4Seschrock current = tgt; 344*dc0093f4Seschrock cmd = ELF_C_READ; 345*dc0093f4Seschrock while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) { 346*dc0093f4Seschrock 347*dc0093f4Seschrock if (elf_kind(tgt->dt_elf_root) == ELF_K_AR && 348*dc0093f4Seschrock (arhdr = elf_getarhdr(elf)) == NULL) { 349*dc0093f4Seschrock warn("%s: malformed archive", file); 350*dc0093f4Seschrock dis_tgt_destroy(tgt); 351*dc0093f4Seschrock return (NULL); 352*dc0093f4Seschrock } 353*dc0093f4Seschrock 354*dc0093f4Seschrock /* 355*dc0093f4Seschrock * Make sure that this Elf file is sane 356*dc0093f4Seschrock */ 357*dc0093f4Seschrock if (gelf_getehdr(elf, &ehdr) == NULL) { 358*dc0093f4Seschrock if (arhdr != NULL) { 359*dc0093f4Seschrock /* 360*dc0093f4Seschrock * For archives, we drive on in the face of bad 361*dc0093f4Seschrock * members. The "/" and "//" members are 362*dc0093f4Seschrock * special, and should be silently ignored. 363*dc0093f4Seschrock */ 364*dc0093f4Seschrock if (strcmp(arhdr->ar_name, "/") != 0 && 365*dc0093f4Seschrock strcmp(arhdr->ar_name, "//") != 0) 366*dc0093f4Seschrock warn("%s[%s]: invalid file type", 367*dc0093f4Seschrock file, arhdr->ar_name); 368*dc0093f4Seschrock cmd = elf_next(elf); 369*dc0093f4Seschrock (void) elf_end(elf); 370*dc0093f4Seschrock continue; 371*dc0093f4Seschrock } 372*dc0093f4Seschrock 373*dc0093f4Seschrock warn("%s: invalid file type", file); 374*dc0093f4Seschrock dis_tgt_destroy(tgt); 375*dc0093f4Seschrock return (NULL); 376*dc0093f4Seschrock } 377*dc0093f4Seschrock 378*dc0093f4Seschrock /* 379*dc0093f4Seschrock * If we're seeing a new Elf object, then we have an 380*dc0093f4Seschrock * archive. In this case, we create a new target, and chain it 381*dc0093f4Seschrock * off the master target. We can later iterate over these 382*dc0093f4Seschrock * targets using dis_tgt_next(). 383*dc0093f4Seschrock */ 384*dc0093f4Seschrock if (current->dt_elf != NULL) { 385*dc0093f4Seschrock dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t)); 386*dc0093f4Seschrock next->dt_elf_root = tgt->dt_elf_root; 387*dc0093f4Seschrock next->dt_fd = -1; 388*dc0093f4Seschrock current->dt_next = next; 389*dc0093f4Seschrock current = next; 390*dc0093f4Seschrock } 391*dc0093f4Seschrock current->dt_elf = elf; 392*dc0093f4Seschrock current->dt_arhdr = arhdr; 393*dc0093f4Seschrock 394*dc0093f4Seschrock if (elf_getshstrndx(elf, ¤t->dt_shstrndx) == -1) { 395*dc0093f4Seschrock warn("%s: failed to get section string table for " 396*dc0093f4Seschrock "file", file); 397*dc0093f4Seschrock dis_tgt_destroy(tgt); 398*dc0093f4Seschrock return (NULL); 399*dc0093f4Seschrock } 400*dc0093f4Seschrock 401*dc0093f4Seschrock idx = 0; 402*dc0093f4Seschrock dis_tgt_section_iter(current, get_symtab, &idx); 403*dc0093f4Seschrock 404*dc0093f4Seschrock if (current->dt_symidx != 0) 405*dc0093f4Seschrock construct_symtab(current); 406*dc0093f4Seschrock 407*dc0093f4Seschrock current->dt_filename = file; 408*dc0093f4Seschrock 409*dc0093f4Seschrock cmd = elf_next(elf); 410*dc0093f4Seschrock } 411*dc0093f4Seschrock 412*dc0093f4Seschrock /* 413*dc0093f4Seschrock * Final sanity check. If we had an archive with no members, then bail 414*dc0093f4Seschrock * out with a nice message. 415*dc0093f4Seschrock */ 416*dc0093f4Seschrock if (tgt->dt_elf == NULL) { 417*dc0093f4Seschrock warn("%s: empty archive\n", file); 418*dc0093f4Seschrock dis_tgt_destroy(tgt); 419*dc0093f4Seschrock return (NULL); 420*dc0093f4Seschrock } 421*dc0093f4Seschrock 422*dc0093f4Seschrock return (tgt); 423*dc0093f4Seschrock } 424*dc0093f4Seschrock 425*dc0093f4Seschrock /* 426*dc0093f4Seschrock * Return the filename associated with the target. 427*dc0093f4Seschrock */ 428*dc0093f4Seschrock const char * 429*dc0093f4Seschrock dis_tgt_name(dis_tgt_t *tgt) 430*dc0093f4Seschrock { 431*dc0093f4Seschrock return (tgt->dt_filename); 432*dc0093f4Seschrock } 433*dc0093f4Seschrock 434*dc0093f4Seschrock /* 435*dc0093f4Seschrock * Return the archive member name, if any. 436*dc0093f4Seschrock */ 437*dc0093f4Seschrock const char * 438*dc0093f4Seschrock dis_tgt_member(dis_tgt_t *tgt) 439*dc0093f4Seschrock { 440*dc0093f4Seschrock if (tgt->dt_arhdr) 441*dc0093f4Seschrock return (tgt->dt_arhdr->ar_name); 442*dc0093f4Seschrock else 443*dc0093f4Seschrock return (NULL); 444*dc0093f4Seschrock } 445*dc0093f4Seschrock 446*dc0093f4Seschrock /* 447*dc0093f4Seschrock * Return the Elf_Ehdr associated with this target. Needed to determine which 448*dc0093f4Seschrock * disassembler to use. 449*dc0093f4Seschrock */ 450*dc0093f4Seschrock void 451*dc0093f4Seschrock dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr) 452*dc0093f4Seschrock { 453*dc0093f4Seschrock (void) gelf_getehdr(tgt->dt_elf, ehdr); 454*dc0093f4Seschrock } 455*dc0093f4Seschrock 456*dc0093f4Seschrock /* 457*dc0093f4Seschrock * Return the next target in the list, if this is an archive. 458*dc0093f4Seschrock */ 459*dc0093f4Seschrock dis_tgt_t * 460*dc0093f4Seschrock dis_tgt_next(dis_tgt_t *tgt) 461*dc0093f4Seschrock { 462*dc0093f4Seschrock return (tgt->dt_next); 463*dc0093f4Seschrock } 464*dc0093f4Seschrock 465*dc0093f4Seschrock /* 466*dc0093f4Seschrock * Destroy a target and free up any associated memory. 467*dc0093f4Seschrock */ 468*dc0093f4Seschrock void 469*dc0093f4Seschrock dis_tgt_destroy(dis_tgt_t *tgt) 470*dc0093f4Seschrock { 471*dc0093f4Seschrock dis_tgt_t *current, *next; 472*dc0093f4Seschrock 473*dc0093f4Seschrock current = tgt->dt_next; 474*dc0093f4Seschrock while (current != NULL) { 475*dc0093f4Seschrock next = current->dt_next; 476*dc0093f4Seschrock if (current->dt_elf) 477*dc0093f4Seschrock (void) elf_end(current->dt_elf); 478*dc0093f4Seschrock if (current->dt_symtab) 479*dc0093f4Seschrock free(current->dt_symtab); 480*dc0093f4Seschrock free(current); 481*dc0093f4Seschrock current = next; 482*dc0093f4Seschrock } 483*dc0093f4Seschrock 484*dc0093f4Seschrock if (tgt->dt_elf) 485*dc0093f4Seschrock (void) elf_end(tgt->dt_elf); 486*dc0093f4Seschrock if (tgt->dt_elf_root) 487*dc0093f4Seschrock (void) elf_end(tgt->dt_elf_root); 488*dc0093f4Seschrock 489*dc0093f4Seschrock if (tgt->dt_symtab) 490*dc0093f4Seschrock free(tgt->dt_symtab); 491*dc0093f4Seschrock 492*dc0093f4Seschrock free(tgt); 493*dc0093f4Seschrock } 494*dc0093f4Seschrock 495*dc0093f4Seschrock /* 496*dc0093f4Seschrock * Given an address, returns the name of the corresponding symbol, as well as 497*dc0093f4Seschrock * the offset within that symbol. If no matching symbol is found, then NULL is 498*dc0093f4Seschrock * returned. 499*dc0093f4Seschrock * 500*dc0093f4Seschrock * If 'cache_result' is specified, then we keep track of the resulting symbol. 501*dc0093f4Seschrock * This cached result is consulted first on subsequent lookups in order to avoid 502*dc0093f4Seschrock * unecessary lookups. This flag should be used for resolving the current PC, 503*dc0093f4Seschrock * as the majority of addresses stay within the current function. 504*dc0093f4Seschrock */ 505*dc0093f4Seschrock const char * 506*dc0093f4Seschrock dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result, 507*dc0093f4Seschrock size_t *size, int *isfunc) 508*dc0093f4Seschrock { 509*dc0093f4Seschrock int lo, hi, mid; 510*dc0093f4Seschrock sym_entry_t *sym, *osym, *match; 511*dc0093f4Seschrock int found; 512*dc0093f4Seschrock 513*dc0093f4Seschrock if (tgt->dt_symcache != NULL && 514*dc0093f4Seschrock addr >= tgt->dt_symcache->se_sym.st_value && 515*dc0093f4Seschrock addr < tgt->dt_symcache->se_sym.st_value + 516*dc0093f4Seschrock tgt->dt_symcache->se_sym.st_size) { 517*dc0093f4Seschrock *offset = addr - tgt->dt_symcache->se_sym.st_value; 518*dc0093f4Seschrock *size = tgt->dt_symcache->se_sym.st_size; 519*dc0093f4Seschrock return (tgt->dt_symcache->se_name); 520*dc0093f4Seschrock } 521*dc0093f4Seschrock 522*dc0093f4Seschrock lo = 0; 523*dc0093f4Seschrock hi = (tgt->dt_symcount - 1); 524*dc0093f4Seschrock found = 0; 525*dc0093f4Seschrock match = osym = NULL; 526*dc0093f4Seschrock while (lo <= hi) { 527*dc0093f4Seschrock mid = (lo + hi) / 2; 528*dc0093f4Seschrock 529*dc0093f4Seschrock sym = &tgt->dt_symtab[mid]; 530*dc0093f4Seschrock 531*dc0093f4Seschrock if (addr >= sym->se_sym.st_value && 532*dc0093f4Seschrock addr < sym->se_sym.st_value + sym->se_sym.st_size && 533*dc0093f4Seschrock (!found || sym->se_sym.st_value > osym->se_sym.st_value)) { 534*dc0093f4Seschrock osym = sym; 535*dc0093f4Seschrock found = 1; 536*dc0093f4Seschrock } else if (addr == sym->se_sym.st_value) { 537*dc0093f4Seschrock /* 538*dc0093f4Seschrock * Particularly for .plt objects, it's possible to have 539*dc0093f4Seschrock * a zero sized object. We want to return this, but we 540*dc0093f4Seschrock * want it to be a last resort. 541*dc0093f4Seschrock */ 542*dc0093f4Seschrock match = sym; 543*dc0093f4Seschrock } 544*dc0093f4Seschrock 545*dc0093f4Seschrock if (addr < sym->se_sym.st_value) 546*dc0093f4Seschrock hi = mid - 1; 547*dc0093f4Seschrock else 548*dc0093f4Seschrock lo = mid + 1; 549*dc0093f4Seschrock } 550*dc0093f4Seschrock 551*dc0093f4Seschrock if (!found) { 552*dc0093f4Seschrock if (match) 553*dc0093f4Seschrock osym = match; 554*dc0093f4Seschrock else 555*dc0093f4Seschrock return (NULL); 556*dc0093f4Seschrock } 557*dc0093f4Seschrock 558*dc0093f4Seschrock /* 559*dc0093f4Seschrock * Walk backwards to find the best match. 560*dc0093f4Seschrock */ 561*dc0093f4Seschrock do { 562*dc0093f4Seschrock sym = osym; 563*dc0093f4Seschrock 564*dc0093f4Seschrock if (osym == tgt->dt_symtab) 565*dc0093f4Seschrock break; 566*dc0093f4Seschrock 567*dc0093f4Seschrock osym = osym - 1; 568*dc0093f4Seschrock } while ((sym->se_sym.st_value == osym->se_sym.st_value) && 569*dc0093f4Seschrock (addr >= osym->se_sym.st_value) && 570*dc0093f4Seschrock (addr < osym->se_sym.st_value + osym->se_sym.st_size)); 571*dc0093f4Seschrock 572*dc0093f4Seschrock if (cache_result) 573*dc0093f4Seschrock tgt->dt_symcache = sym; 574*dc0093f4Seschrock 575*dc0093f4Seschrock *offset = addr - sym->se_sym.st_value; 576*dc0093f4Seschrock *size = sym->se_sym.st_size; 577*dc0093f4Seschrock if (isfunc) 578*dc0093f4Seschrock *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC); 579*dc0093f4Seschrock 580*dc0093f4Seschrock return (sym->se_name); 581*dc0093f4Seschrock } 582*dc0093f4Seschrock 583*dc0093f4Seschrock /* 584*dc0093f4Seschrock * Given an address, return the starting offset of the next symbol in the file. 585*dc0093f4Seschrock * Relies on the fact that this is only used when we encounter a bad instruction 586*dc0093f4Seschrock * in the input stream, so we know that the last symbol looked up will be in the 587*dc0093f4Seschrock * cache. 588*dc0093f4Seschrock */ 589*dc0093f4Seschrock off_t 590*dc0093f4Seschrock dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr) 591*dc0093f4Seschrock { 592*dc0093f4Seschrock sym_entry_t *sym = tgt->dt_symcache; 593*dc0093f4Seschrock uint64_t start; 594*dc0093f4Seschrock 595*dc0093f4Seschrock /* make sure the cached symbol and address are valid */ 596*dc0093f4Seschrock if (sym == NULL || addr < sym->se_sym.st_value || 597*dc0093f4Seschrock addr >= sym->se_sym.st_value + sym->se_sym.st_size) 598*dc0093f4Seschrock return (0); 599*dc0093f4Seschrock 600*dc0093f4Seschrock start = sym->se_sym.st_value; 601*dc0093f4Seschrock 602*dc0093f4Seschrock /* find the next symbol */ 603*dc0093f4Seschrock while (sym != tgt->dt_symtab + tgt->dt_symcount && 604*dc0093f4Seschrock sym->se_sym.st_value == start) 605*dc0093f4Seschrock sym++; 606*dc0093f4Seschrock 607*dc0093f4Seschrock return (sym->se_sym.st_value - addr); 608*dc0093f4Seschrock } 609*dc0093f4Seschrock 610*dc0093f4Seschrock /* 611*dc0093f4Seschrock * Iterate over all sections in the target, executing the given callback for 612*dc0093f4Seschrock * each. 613*dc0093f4Seschrock */ 614*dc0093f4Seschrock void 615*dc0093f4Seschrock dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data) 616*dc0093f4Seschrock { 617*dc0093f4Seschrock dis_scn_t sdata; 618*dc0093f4Seschrock Elf_Scn *scn; 619*dc0093f4Seschrock int idx; 620*dc0093f4Seschrock 621*dc0093f4Seschrock for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL; 622*dc0093f4Seschrock scn = elf_nextscn(tgt->dt_elf, scn), idx++) { 623*dc0093f4Seschrock 624*dc0093f4Seschrock if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) { 625*dc0093f4Seschrock warn("%s: failed to get section %d header", 626*dc0093f4Seschrock tgt->dt_filename, idx); 627*dc0093f4Seschrock continue; 628*dc0093f4Seschrock } 629*dc0093f4Seschrock 630*dc0093f4Seschrock if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx, 631*dc0093f4Seschrock sdata.ds_shdr.sh_name)) == NULL) { 632*dc0093f4Seschrock warn("%s: failed to get section %d name", 633*dc0093f4Seschrock tgt->dt_filename, idx); 634*dc0093f4Seschrock continue; 635*dc0093f4Seschrock } 636*dc0093f4Seschrock 637*dc0093f4Seschrock if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) { 638*dc0093f4Seschrock warn("%s: failed to get data for section '%s'", 639*dc0093f4Seschrock tgt->dt_filename, sdata.ds_name); 640*dc0093f4Seschrock continue; 641*dc0093f4Seschrock } 642*dc0093f4Seschrock 643*dc0093f4Seschrock func(tgt, &sdata, data); 644*dc0093f4Seschrock } 645*dc0093f4Seschrock } 646*dc0093f4Seschrock 647*dc0093f4Seschrock /* 648*dc0093f4Seschrock * Return 1 if the given section contains text, 0 otherwise. 649*dc0093f4Seschrock */ 650*dc0093f4Seschrock int 651*dc0093f4Seschrock dis_section_istext(dis_scn_t *scn) 652*dc0093f4Seschrock { 653*dc0093f4Seschrock return ((scn->ds_shdr.sh_type == SHT_PROGBITS) && 654*dc0093f4Seschrock (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR))); 655*dc0093f4Seschrock } 656*dc0093f4Seschrock 657*dc0093f4Seschrock /* 658*dc0093f4Seschrock * Return a pointer to the section data. 659*dc0093f4Seschrock */ 660*dc0093f4Seschrock void * 661*dc0093f4Seschrock dis_section_data(dis_scn_t *scn) 662*dc0093f4Seschrock { 663*dc0093f4Seschrock return (scn->ds_data->d_buf); 664*dc0093f4Seschrock } 665*dc0093f4Seschrock 666*dc0093f4Seschrock /* 667*dc0093f4Seschrock * Return the size of the section data. 668*dc0093f4Seschrock */ 669*dc0093f4Seschrock size_t 670*dc0093f4Seschrock dis_section_size(dis_scn_t *scn) 671*dc0093f4Seschrock { 672*dc0093f4Seschrock return (scn->ds_data->d_size); 673*dc0093f4Seschrock } 674*dc0093f4Seschrock 675*dc0093f4Seschrock /* 676*dc0093f4Seschrock * Return the address for the given section. 677*dc0093f4Seschrock */ 678*dc0093f4Seschrock uint64_t 679*dc0093f4Seschrock dis_section_addr(dis_scn_t *scn) 680*dc0093f4Seschrock { 681*dc0093f4Seschrock return (scn->ds_shdr.sh_addr); 682*dc0093f4Seschrock } 683*dc0093f4Seschrock 684*dc0093f4Seschrock /* 685*dc0093f4Seschrock * Return the name of the current section. 686*dc0093f4Seschrock */ 687*dc0093f4Seschrock const char * 688*dc0093f4Seschrock dis_section_name(dis_scn_t *scn) 689*dc0093f4Seschrock { 690*dc0093f4Seschrock return (scn->ds_name); 691*dc0093f4Seschrock } 692*dc0093f4Seschrock 693*dc0093f4Seschrock /* 694*dc0093f4Seschrock * Create an allocated copy of the given section 695*dc0093f4Seschrock */ 696*dc0093f4Seschrock dis_scn_t * 697*dc0093f4Seschrock dis_section_copy(dis_scn_t *scn) 698*dc0093f4Seschrock { 699*dc0093f4Seschrock dis_scn_t *new; 700*dc0093f4Seschrock 701*dc0093f4Seschrock new = safe_malloc(sizeof (dis_scn_t)); 702*dc0093f4Seschrock (void) memcpy(new, scn, sizeof (dis_scn_t)); 703*dc0093f4Seschrock 704*dc0093f4Seschrock return (new); 705*dc0093f4Seschrock } 706*dc0093f4Seschrock 707*dc0093f4Seschrock /* 708*dc0093f4Seschrock * Free section memory 709*dc0093f4Seschrock */ 710*dc0093f4Seschrock void 711*dc0093f4Seschrock dis_section_free(dis_scn_t *scn) 712*dc0093f4Seschrock { 713*dc0093f4Seschrock free(scn); 714*dc0093f4Seschrock } 715*dc0093f4Seschrock 716*dc0093f4Seschrock /* 717*dc0093f4Seschrock * Iterate over all functions in the target, executing the given callback for 718*dc0093f4Seschrock * each one. 719*dc0093f4Seschrock */ 720*dc0093f4Seschrock void 721*dc0093f4Seschrock dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data) 722*dc0093f4Seschrock { 723*dc0093f4Seschrock int i; 724*dc0093f4Seschrock sym_entry_t *sym; 725*dc0093f4Seschrock dis_func_t df; 726*dc0093f4Seschrock Elf_Scn *scn; 727*dc0093f4Seschrock GElf_Shdr shdr; 728*dc0093f4Seschrock 729*dc0093f4Seschrock for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) { 730*dc0093f4Seschrock 731*dc0093f4Seschrock /* ignore non-functions */ 732*dc0093f4Seschrock if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) || 733*dc0093f4Seschrock sym->se_name == NULL || 734*dc0093f4Seschrock sym->se_sym.st_size == 0 || 735*dc0093f4Seschrock sym->se_shndx == -1) 736*dc0093f4Seschrock continue; 737*dc0093f4Seschrock 738*dc0093f4Seschrock /* get the ELF data associated with this function */ 739*dc0093f4Seschrock if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL || 740*dc0093f4Seschrock gelf_getshdr(scn, &shdr) == NULL || 741*dc0093f4Seschrock (df.df_data = elf_getdata(scn, NULL)) == NULL || 742*dc0093f4Seschrock df.df_data->d_size == 0) { 743*dc0093f4Seschrock warn("%s: failed to read section %d", 744*dc0093f4Seschrock tgt->dt_filename, sym->se_shndx); 745*dc0093f4Seschrock continue; 746*dc0093f4Seschrock } 747*dc0093f4Seschrock 748*dc0093f4Seschrock /* 749*dc0093f4Seschrock * Verify that the address lies within the section that we think 750*dc0093f4Seschrock * it does. 751*dc0093f4Seschrock */ 752*dc0093f4Seschrock if (sym->se_sym.st_value < shdr.sh_addr || 753*dc0093f4Seschrock (sym->se_sym.st_value + sym->se_sym.st_size) > 754*dc0093f4Seschrock (shdr.sh_addr + shdr.sh_size)) { 755*dc0093f4Seschrock warn("%s: bad section %d for address %p", 756*dc0093f4Seschrock tgt->dt_filename, sym->se_sym.st_shndx, 757*dc0093f4Seschrock sym->se_sym.st_value); 758*dc0093f4Seschrock continue; 759*dc0093f4Seschrock } 760*dc0093f4Seschrock 761*dc0093f4Seschrock df.df_sym = sym; 762*dc0093f4Seschrock df.df_offset = sym->se_sym.st_value - shdr.sh_addr; 763*dc0093f4Seschrock 764*dc0093f4Seschrock func(tgt, &df, data); 765*dc0093f4Seschrock } 766*dc0093f4Seschrock } 767*dc0093f4Seschrock 768*dc0093f4Seschrock /* 769*dc0093f4Seschrock * Return the data associated with a given function. 770*dc0093f4Seschrock */ 771*dc0093f4Seschrock void * 772*dc0093f4Seschrock dis_function_data(dis_func_t *func) 773*dc0093f4Seschrock { 774*dc0093f4Seschrock return ((char *)func->df_data->d_buf + func->df_offset); 775*dc0093f4Seschrock } 776*dc0093f4Seschrock 777*dc0093f4Seschrock /* 778*dc0093f4Seschrock * Return the size of a function. 779*dc0093f4Seschrock */ 780*dc0093f4Seschrock size_t 781*dc0093f4Seschrock dis_function_size(dis_func_t *func) 782*dc0093f4Seschrock { 783*dc0093f4Seschrock return (func->df_sym->se_sym.st_size); 784*dc0093f4Seschrock } 785*dc0093f4Seschrock 786*dc0093f4Seschrock /* 787*dc0093f4Seschrock * Return the address of a function. 788*dc0093f4Seschrock */ 789*dc0093f4Seschrock uint64_t 790*dc0093f4Seschrock dis_function_addr(dis_func_t *func) 791*dc0093f4Seschrock { 792*dc0093f4Seschrock return (func->df_sym->se_sym.st_value); 793*dc0093f4Seschrock } 794*dc0093f4Seschrock 795*dc0093f4Seschrock /* 796*dc0093f4Seschrock * Return the name of the function 797*dc0093f4Seschrock */ 798*dc0093f4Seschrock const char * 799*dc0093f4Seschrock dis_function_name(dis_func_t *func) 800*dc0093f4Seschrock { 801*dc0093f4Seschrock return (func->df_sym->se_name); 802*dc0093f4Seschrock } 803*dc0093f4Seschrock 804*dc0093f4Seschrock /* 805*dc0093f4Seschrock * Return a copy of a function. 806*dc0093f4Seschrock */ 807*dc0093f4Seschrock dis_func_t * 808*dc0093f4Seschrock dis_function_copy(dis_func_t *func) 809*dc0093f4Seschrock { 810*dc0093f4Seschrock dis_func_t *new; 811*dc0093f4Seschrock 812*dc0093f4Seschrock new = safe_malloc(sizeof (dis_func_t)); 813*dc0093f4Seschrock (void) memcpy(new, func, sizeof (dis_func_t)); 814*dc0093f4Seschrock 815*dc0093f4Seschrock return (new); 816*dc0093f4Seschrock } 817*dc0093f4Seschrock 818*dc0093f4Seschrock /* 819*dc0093f4Seschrock * Free function memory 820*dc0093f4Seschrock */ 821*dc0093f4Seschrock void 822*dc0093f4Seschrock dis_function_free(dis_func_t *func) 823*dc0093f4Seschrock { 824*dc0093f4Seschrock free(func); 825*dc0093f4Seschrock } 826