xref: /illumos-gate/usr/src/cmd/sgs/gprof/common/readelf.c (revision 7a15b0ec33c685e4e6b096454b077a52604acf9b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include	"gprof.h"
28 #include	<stdlib.h>
29 #include	<sys/file.h>
30 #include	<fcntl.h>
31 #include	<unistd.h>
32 #include	<string.h>
33 #include	<sysexits.h>
34 #include	<libelf.h>
35 #include 	"gelf.h"
36 
37 #ifdef DEBUG
38 static void	debug_dup_del(nltype *, nltype *);
39 
40 #define	DPRINTF(msg, file)	if (debug & ELFDEBUG) \
41 					(void) printf(msg, file);
42 
43 #define	PRINTF(msg)		if (debug & ELFDEBUG) \
44 					(void) printf(msg);
45 
46 #define	DEBUG_DUP_DEL(keeper, louser)	if (debug & ELFDEBUG) \
47 						debug_dup_del(keeper, louser);
48 
49 #else
50 #define	DPRINTF(msg, file)
51 #define	PRINTF(msg)
52 #define	DEBUG_DUP_DEL(keeper, louser)
53 #endif
54 
55 size_t	textbegin, textsize;
56 
57 /* Prototype definitions first */
58 
59 static void	process(char *filename, int fd);
60 static void	get_symtab(Elf *elf, mod_info_t *module);
61 static void	get_textseg(Elf *elf, int fd);
62 static void	save_aout_info(char *);
63 
64 static void
65 fatal_error(char *error)
66 {
67 	(void) fprintf(stderr,
68 	    "Fatal ELF error: %s (%s)\n", error, elf_errmsg(-1));
69 	exit(EX_SOFTWARE);
70 }
71 
72 bool
73 is_shared_obj(char *name)
74 {
75 	int		fd;
76 	Elf		*elf;
77 	GElf_Ehdr	ehdr;
78 
79 	if ((fd = open(name, O_RDONLY)) == -1) {
80 		(void) fprintf(stderr, "%s: can't open `%s'\n", whoami, name);
81 		exit(EX_NOINPUT);
82 	}
83 
84 	if (elf_version(EV_CURRENT) == EV_NONE)
85 		fatal_error("libelf is out of date");
86 
87 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
88 		fatal_error("can't read as ELF file");
89 
90 	if (gelf_getehdr(elf, &ehdr) == NULL)
91 		fatal_error("can't read ehdr");
92 
93 	(void) elf_end(elf);
94 	(void) close(fd);
95 
96 	if (ehdr.e_type == ET_DYN)
97 		return (TRUE);
98 	else
99 		return (FALSE);
100 }
101 
102 static void
103 save_aout_info(char *aoutname)
104 {
105 	struct stat		buf;
106 	extern fl_info_t	aout_info;
107 
108 	if (stat(aoutname, &buf) == -1) {
109 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
110 							whoami, aoutname);
111 		exit(EX_NOINPUT);
112 	}
113 
114 	aout_info.dev = buf.st_dev;
115 	aout_info.ino = buf.st_ino;
116 	aout_info.mtime = buf.st_mtime;
117 	aout_info.size = buf.st_size;
118 }
119 
120 void
121 getnfile(char *aoutname)
122 {
123 	int	fd;
124 
125 	DPRINTF(" Attempting to open %s  \n", aoutname);
126 	if ((fd = open((aoutname), O_RDONLY)) == -1) {
127 		(void) fprintf(stderr, "%s: can't open `%s'\n",
128 							whoami, aoutname);
129 		exit(EX_NOINPUT);
130 	}
131 	process(aoutname, fd);
132 	save_aout_info(aoutname);
133 
134 	(void) close(fd);
135 }
136 
137 static GElf_Addr
138 get_txtorigin(Elf *elf)
139 {
140 	GElf_Ehdr	ehdr;
141 	GElf_Phdr	phdr;
142 	GElf_Half	ndx;
143 	GElf_Addr	txt_origin = 0;
144 	bool		first_load_seg = TRUE;
145 
146 	if (gelf_getehdr(elf, &ehdr) == NULL)
147 		fatal_error("can't read ehdr");
148 
149 	for (ndx = 0; ndx < ehdr.e_phnum; ndx++) {
150 		if (gelf_getphdr(elf, ndx, &phdr) == NULL)
151 			continue;
152 
153 		if ((phdr.p_type == PT_LOAD) && !(phdr.p_flags & PF_W)) {
154 			if (first_load_seg || phdr.p_vaddr < txt_origin)
155 				txt_origin = phdr.p_vaddr;
156 
157 			if (first_load_seg)
158 				first_load_seg = FALSE;
159 		}
160 	}
161 
162 	return (txt_origin);
163 }
164 
165 void
166 process_namelist(mod_info_t *module)
167 {
168 	int		fd;
169 	Elf		*elf;
170 
171 	if ((fd = open(module->name, O_RDONLY)) == -1) {
172 		(void) fprintf(stderr, "%s: can't read %s\n",
173 							whoami, module->name);
174 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
175 		exit(EX_NOINPUT);
176 	}
177 
178 	/*
179 	 * libelf's version already verified in processing a.out,
180 	 * so directly do elf_begin()
181 	 */
182 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
183 		fatal_error("can't read as ELF file");
184 
185 	module->next = NULL;
186 	module->txt_origin = get_txtorigin(elf);
187 	get_symtab(elf, module);
188 	module->active = TRUE;
189 }
190 
191 /*
192  * Get the ELF header and,  if it exists, call get_symtab()
193  * to begin processing of the file; otherwise, return from
194  * processing the file with a warning.
195  */
196 static void
197 process(char *filename, int fd)
198 {
199 	Elf			*elf;
200 	extern bool		cflag;
201 	extern bool		Bflag;
202 
203 	if (elf_version(EV_CURRENT) == EV_NONE)
204 		fatal_error("libelf is out of date");
205 
206 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
207 		fatal_error("can't read as ELF file");
208 
209 	if (gelf_getclass(elf) == ELFCLASS64)
210 		Bflag = TRUE;
211 
212 	/*
213 	 * Initialize active modules list. Note that we set the end
214 	 * address while reading the symbol table, in get_symtab
215 	 */
216 	modules.id = 1;
217 	modules.next = NULL;
218 	modules.txt_origin = get_txtorigin(elf);
219 	modules.load_base = modules.txt_origin;
220 	if ((modules.name = malloc(strlen(filename) + 1)) == NULL) {
221 		(void) fprintf(stderr, "%s: can't malloc %d bytes",
222 					    whoami, strlen(filename) + 1);
223 		exit(EX_UNAVAILABLE);
224 	}
225 	(void) strcpy(modules.name, filename);
226 
227 	get_symtab(elf, &modules);
228 
229 	modules.load_end = modules.data_end;
230 	modules.active = TRUE;
231 	n_modules = 1;
232 
233 	if (cflag)
234 		get_textseg(elf, fd);
235 }
236 
237 static void
238 get_textseg(Elf *elf, int fd)
239 {
240 	GElf_Ehdr ehdr;
241 	GElf_Phdr phdr;
242 	GElf_Half i;
243 
244 	if (gelf_getehdr(elf, &ehdr) == NULL)
245 		fatal_error("can't read ehdr");
246 
247 	for (i = 0; i < ehdr.e_phnum; i++) {
248 
249 		if (gelf_getphdr(elf, i, &phdr) == NULL)
250 			continue;
251 
252 		if (!(phdr.p_flags & PF_W) && (phdr.p_filesz > textsize)) {
253 			size_t chk;
254 
255 			/*
256 			 * We could have multiple loadable text segments;
257 			 * keep the largest we find.
258 			 */
259 			if (textspace)
260 				free(textspace);
261 
262 			/*
263 			 * gprof is a 32-bit program;  if this text segment
264 			 * has a > 32-bit offset or length, it's too big.
265 			 */
266 			chk = (size_t)phdr.p_vaddr + (size_t)phdr.p_filesz;
267 			if (phdr.p_vaddr + phdr.p_filesz != (GElf_Xword)chk)
268 				fatal_error("text segment too large for -c");
269 
270 			textbegin = (size_t)phdr.p_vaddr;
271 			textsize = (size_t)phdr.p_filesz;
272 
273 			textspace = malloc(textsize);
274 
275 			if (lseek(fd, (off_t)phdr.p_offset, SEEK_SET) !=
276 			    (off_t)phdr.p_offset)
277 				fatal_error("cannot seek to text section");
278 
279 			if (read(fd, textspace, textsize) != textsize)
280 				fatal_error("cannot read text");
281 		}
282 	}
283 
284 	if (textsize == 0)
285 		fatal_error("can't find text segment");
286 }
287 
288 #ifdef DEBUG
289 static void
290 debug_dup_del(nltype * keeper, nltype * louser)
291 {
292 	(void) printf("remove_dup_syms: discarding sym %s over sym %s\n",
293 		louser->name, keeper->name);
294 }
295 #endif /* DEBUG */
296 
297 static void
298 remove_dup_syms(nltype *nl, sztype *sym_count)
299 {
300 	int	i;
301 	int	index;
302 	int	nextsym;
303 
304 	nltype *	orig_list;
305 	if ((orig_list = malloc(sizeof (nltype) * *sym_count)) == NULL) {
306 		(void) fprintf(stderr,
307 		    "gprof: remove_dup_syms: malloc failed\n");
308 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
309 		exit(EX_UNAVAILABLE);
310 	}
311 	(void) memcpy(orig_list, nl, sizeof (nltype) * *sym_count);
312 
313 	for (i = 0, index = 0, nextsym = 1; nextsym < *sym_count; nextsym++) {
314 		int	i_type;
315 		int	n_bind;
316 		int	n_type;
317 
318 		/*
319 		 * If orig_list[nextsym] points to a new symvalue, then we
320 		 * will copy our keeper and move on to the next symbol.
321 		 */
322 		if ((orig_list + i)->value < (orig_list + nextsym)->value) {
323 			*(nl + index++) = *(orig_list +i);
324 			i = nextsym;
325 			continue;
326 		}
327 
328 		/*
329 		 * If these two symbols have the same info, then we
330 		 * keep the first and keep checking for dups.
331 		 */
332 		if ((orig_list + i)->syminfo ==
333 		    (orig_list + nextsym)->syminfo) {
334 			DEBUG_DUP_DEL(orig_list + i, orig_list + nextsym);
335 			continue;
336 		}
337 		n_bind = ELF32_ST_BIND((orig_list + nextsym)->syminfo);
338 		i_type = ELF32_ST_TYPE((orig_list + i)->syminfo);
339 		n_type = ELF32_ST_TYPE((orig_list + nextsym)->syminfo);
340 
341 		/*
342 		 * If they have the same type we take the stronger
343 		 * bound function.
344 		 */
345 		if (i_type == n_type) {
346 			if (n_bind == STB_WEAK) {
347 				DEBUG_DUP_DEL((orig_list + i),
348 				    (orig_list + nextsym));
349 				continue;
350 			}
351 			DEBUG_DUP_DEL((orig_list + nextsym),
352 			    (orig_list + i));
353 			i = nextsym;
354 			continue;
355 		}
356 
357 		/*
358 		 * If the first symbol isn't of type NOTYPE then it must
359 		 * be the keeper.
360 		 */
361 		if (i_type != STT_NOTYPE) {
362 			DEBUG_DUP_DEL((orig_list + i),
363 			    (orig_list + nextsym));
364 			continue;
365 		}
366 
367 		/*
368 		 * Throw away the first one and take the new
369 		 * symbol
370 		 */
371 		DEBUG_DUP_DEL((orig_list + nextsym), (orig_list + i));
372 		i = nextsym;
373 	}
374 
375 	if ((orig_list + i)->value > (nl + index - 1)->value)
376 		*(nl + index++) = *(orig_list +i);
377 
378 	*sym_count = index;
379 }
380 
381 /*
382  * compare either by name or by value for sorting.
383  * This is the comparison function called by qsort to
384  * sort the symbols either by name or value when requested.
385  */
386 static int
387 compare(const void *arg1, const void *arg2)
388 {
389 	nltype *a = (nltype *)arg1;
390 	nltype *b = (nltype *)arg2;
391 
392 	if (a->value > b->value)
393 		return (1);
394 	else
395 		return ((a->value == b->value) - 1);
396 }
397 
398 static int
399 is_function(Elf *elf, GElf_Sym *sym)
400 {
401 	Elf_Scn *scn;
402 	GElf_Shdr shdr;
403 
404 	/*
405 	 * With shared objects, it is possible we come across a function
406 	 * that's global, but is undefined. The definition is probably
407 	 * elsewhere, so we'll have to skip it as far as this object is
408 	 * concerned.
409 	 */
410 	if (sym->st_shndx == SHN_UNDEF)
411 		return (0);
412 
413 	if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
414 		if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
415 			return (1);
416 
417 		if (GELF_ST_BIND(sym->st_info) == STB_WEAK)
418 			return (1);
419 
420 		if (!aflag && GELF_ST_BIND(sym->st_info) == STB_LOCAL)
421 			return (1);
422 	}
423 
424 	/*
425 	 * It's not a function; determine if it's in an executable section.
426 	 */
427 	if (GELF_ST_TYPE(sym->st_info) != STT_NOTYPE)
428 		return (0);
429 
430 	/*
431 	 * If it isn't global, and it isn't weak, and it either isn't
432 	 * local or the "all flag" isn't set, then get out.
433 	 */
434 	if (GELF_ST_BIND(sym->st_info) != STB_GLOBAL &&
435 	    GELF_ST_BIND(sym->st_info) != STB_WEAK &&
436 	    (GELF_ST_BIND(sym->st_info) != STB_LOCAL || aflag))
437 		return (0);
438 
439 	if (sym->st_shndx >= SHN_LORESERVE)
440 		return (0);
441 
442 	scn = elf_getscn(elf, sym->st_shndx);
443 	(void) gelf_getshdr(scn, &shdr);
444 
445 	if (!(shdr.sh_flags & SHF_EXECINSTR))
446 		return (0);
447 
448 	return (1);
449 }
450 
451 static void
452 get_symtab(Elf *elf, mod_info_t *module)
453 {
454 	Elf_Scn		*scn = NULL, *sym_pri = NULL, *sym_aux = NULL;
455 	GElf_Word	strndx = 0;
456 	sztype		nsyms, i;
457 	Elf_Data	*symdata_pri;
458 	Elf_Data	*symdata_aux = NULL;
459 	GElf_Xword	nsyms_pri = 0, nsyms_aux = 0;
460 	nltype		*etext = NULL;
461 	nltype		*l_nl, *l_npe;
462 	sztype		l_nname;
463 	extern sztype	total_names;
464 	int		symtab_found = 0;
465 
466 
467 	/*
468 	 * Scan the section headers looking for a symbol table. Our
469 	 * preference is to use .symtab, because it contains the full
470 	 * set of symbols. If we find it, we stop looking immediately
471 	 * and use it. In the absence of a .symtab section, we are
472 	 * willing to use the dynamic symbol table (.dynsym), possibly
473 	 * augmented by the .SUNW_ldynsym, which contains local symbols.
474 	 */
475 	while ((symtab_found == 0) && ((scn = elf_nextscn(elf, scn)) != NULL)) {
476 		GElf_Shdr shdr;
477 
478 		if (gelf_getshdr(scn, &shdr) == NULL)
479 			continue;
480 
481 		switch (shdr.sh_type) {
482 		case SHT_SYMTAB:
483 			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
484 			strndx = shdr.sh_link;
485 			sym_pri = scn;
486 			/* Throw away .SUNW_ldynsym. It is for .dynsym only */
487 			nsyms_aux = 0;
488 			sym_aux = NULL;
489 			/* We have found the best symbol table. Stop looking */
490 			symtab_found = 1;
491 			break;
492 
493 		case SHT_DYNSYM:
494 			/* We will use .dynsym if no .symtab is found */
495 			nsyms_pri = shdr.sh_size / shdr.sh_entsize;
496 			strndx = shdr.sh_link;
497 			sym_pri = scn;
498 			break;
499 
500 		case SHT_SUNW_LDYNSYM:
501 			/* Auxiliary table, used with .dynsym */
502 			nsyms_aux = shdr.sh_size / shdr.sh_entsize;
503 			sym_aux = scn;
504 			break;
505 		}
506 	}
507 
508 	if (sym_pri == NULL || strndx == 0)
509 		fatal_error("can't find symbol table.\n");
510 
511 	nsyms = (sztype)(nsyms_pri + nsyms_aux);
512 	if ((nsyms_pri + nsyms_aux) != (GElf_Xword)nsyms)
513 		fatal_error(
514 		    "32-bit gprof cannot handle more than 2^32 symbols");
515 
516 	if ((symdata_pri = elf_getdata(sym_pri, NULL)) == NULL)
517 		fatal_error("can't read symbol data.\n");
518 
519 	if ((sym_aux != NULL) &&
520 	    ((symdata_aux = elf_getdata(sym_aux, NULL)) == NULL))
521 		fatal_error("can't read .SUNW_ldynsym symbol data.\n");
522 
523 	if ((l_nl = l_npe = (nltype *)calloc(nsyms + PRF_SYMCNT,
524 	    sizeof (nltype))) == NULL)
525 		fatal_error("cannot allocate symbol data.\n");
526 
527 	/*
528 	 * Now we need to cruise through the symbol table eliminating
529 	 * all non-functions from consideration, and making strings
530 	 * real.
531 	 */
532 	l_nname = 0;
533 
534 	for (i = 1; i < nsyms; i++) {
535 		GElf_Sym gsym;
536 		char *name;
537 
538 		/*
539 		 * Look up the symbol. In the case where we have a
540 		 * .SUNW_ldynsym/.dynsym pair, we treat them as a single
541 		 * logical table, with the data from .SUNW_ldynsym coming
542 		 * before the data in .dynsym.
543 		 */
544 		if (i >= nsyms_aux)
545 			(void) gelf_getsym(symdata_pri, i - nsyms_aux, &gsym);
546 		else
547 			(void) gelf_getsym(symdata_aux, i, &gsym);
548 
549 		name = elf_strptr(elf, strndx, gsym.st_name);
550 
551 		/*
552 		 * We're interested in this symbol if it's a function or
553 		 * if it's the symbol "_etext"
554 		 */
555 		if (is_function(elf, &gsym) || strcmp(name, PRF_ETEXT) == 0) {
556 
557 			l_npe->name = name;
558 			l_npe->value = gsym.st_value;
559 			l_npe->sz = gsym.st_size;
560 			l_npe->syminfo = gsym.st_info;
561 			l_npe->module = module;
562 
563 			if (strcmp(name, PRF_ETEXT) == 0)
564 				etext = l_npe;
565 
566 			if (lflag == TRUE &&
567 			    GELF_ST_BIND(gsym.st_info) == STB_LOCAL) {
568 				/*
569 				 * If the "locals only" flag is on, then
570 				 * we add the local symbols to the
571 				 * exclusion lists.
572 				 */
573 				addlist(Elist, name);
574 				addlist(elist, name);
575 			}
576 			DPRINTF("Index %lld:", l_nname);
577 			DPRINTF("\tValue: 0x%llx\t", l_npe->value);
578 			DPRINTF("Name: %s \n", l_npe->name);
579 			l_npe++;
580 			l_nname++;
581 		}
582 
583 		if (strcmp(name, PRF_END) == 0)
584 			module->data_end = gsym.st_value;
585 	}
586 
587 	if (l_npe == l_nl)
588 		fatal_error("no valid functions found");
589 
590 	/*
591 	 * Finally, we need to construct some dummy entries.
592 	 */
593 	if (etext) {
594 		l_npe->name = PRF_EXTSYM;
595 		l_npe->value = etext->value + 1;
596 		l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
597 		l_npe->module = module;
598 		l_npe++;
599 		l_nname++;
600 	}
601 
602 	l_npe->name = PRF_MEMTERM;
603 	l_npe->value = (pctype)-1;
604 	l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
605 	l_npe->module = module;
606 	l_npe++;
607 	l_nname++;
608 
609 	/*
610 	 * We're almost done;  all we need to do is sort the symbols
611 	 * and then remove the duplicates.
612 	 */
613 	qsort(l_nl, (size_t)l_nname, sizeof (nltype), compare);
614 	remove_dup_syms(l_nl, &l_nname);
615 
616 	module->nl = l_nl;
617 	module->npe = l_npe;
618 	module->nname = l_nname;
619 
620 	total_names += l_nname;
621 }
622