xref: /illumos-gate/usr/src/cmd/sgs/gprof/common/readelf.c (revision 94bc75770001bfdc49b11467deff2235fc9927f9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include	"gprof.h"
31 #include	<stdlib.h>
32 #include	<sys/file.h>
33 #include	<fcntl.h>
34 #include	<unistd.h>
35 #include	<string.h>
36 #include	<sysexits.h>
37 #include	<libelf.h>
38 #include 	"gelf.h"
39 
40 #ifdef DEBUG
41 static void	debug_dup_del(nltype *, nltype *);
42 
43 #define	DPRINTF(msg, file)	if (debug & ELFDEBUG) \
44 					(void) printf(msg, file);
45 
46 #define	PRINTF(msg)		if (debug & ELFDEBUG) \
47 					(void) printf(msg);
48 
49 #define	DEBUG_DUP_DEL(keeper, louser)	if (debug & ELFDEBUG) \
50 						debug_dup_del(keeper, louser);
51 
52 #else
53 #define	DPRINTF(msg, file)
54 #define	PRINTF(msg)
55 #define	DEBUG_DUP_DEL(keeper, louser)
56 #endif
57 
58 size_t	textbegin, textsize;
59 
60 /* Prototype definitions first */
61 
62 static void	process(char *filename, int fd);
63 static void	get_symtab(Elf *elf, mod_info_t *module);
64 static void	get_textseg(Elf *elf, int fd);
65 static void	save_aout_info(char *);
66 
67 static void
68 fatal_error(char *error)
69 {
70 	(void) fprintf(stderr,
71 	    "Fatal ELF error: %s (%s)\n", error, elf_errmsg(-1));
72 	exit(EX_SOFTWARE);
73 }
74 
75 bool
76 is_shared_obj(char *name)
77 {
78 	int		fd;
79 	Elf		*elf;
80 	GElf_Ehdr	ehdr;
81 
82 	if ((fd = open(name, O_RDONLY)) == -1) {
83 		(void) fprintf(stderr, "%s: can't open `%s'\n", whoami, name);
84 		exit(EX_NOINPUT);
85 	}
86 
87 	if (elf_version(EV_CURRENT) == EV_NONE)
88 		fatal_error("libelf is out of date");
89 
90 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
91 		fatal_error("can't read as ELF file");
92 
93 	if (gelf_getehdr(elf, &ehdr) == NULL)
94 		fatal_error("can't read ehdr");
95 
96 	(void) elf_end(elf);
97 	(void) close(fd);
98 
99 	if (ehdr.e_type == ET_DYN)
100 		return (TRUE);
101 	else
102 		return (FALSE);
103 }
104 
105 static void
106 save_aout_info(char *aoutname)
107 {
108 	struct stat		buf;
109 	extern fl_info_t	aout_info;
110 
111 	if (stat(aoutname, &buf) == -1) {
112 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
113 							whoami, aoutname);
114 		exit(EX_NOINPUT);
115 	}
116 
117 	aout_info.dev = buf.st_dev;
118 	aout_info.ino = buf.st_ino;
119 	aout_info.mtime = buf.st_mtime;
120 	aout_info.size = buf.st_size;
121 }
122 
123 void
124 getnfile(char *aoutname)
125 {
126 	int	fd;
127 
128 	DPRINTF(" Attempting to open %s  \n", aoutname);
129 	if ((fd = open((aoutname), O_RDONLY)) == -1) {
130 		(void) fprintf(stderr, "%s: can't open `%s'\n",
131 							whoami, aoutname);
132 		exit(EX_NOINPUT);
133 	}
134 	process(aoutname, fd);
135 	save_aout_info(aoutname);
136 
137 	(void) close(fd);
138 }
139 
140 static GElf_Addr
141 get_txtorigin(Elf *elf)
142 {
143 	GElf_Ehdr	ehdr;
144 	GElf_Phdr	phdr;
145 	GElf_Half	ndx;
146 	GElf_Addr	txt_origin = 0;
147 	bool		first_load_seg = TRUE;
148 
149 	if (gelf_getehdr(elf, &ehdr) == NULL)
150 		fatal_error("can't read ehdr");
151 
152 	for (ndx = 0; ndx < ehdr.e_phnum; ndx++) {
153 		if (gelf_getphdr(elf, ndx, &phdr) == NULL)
154 			continue;
155 
156 		if ((phdr.p_type == PT_LOAD) && !(phdr.p_flags & PF_W)) {
157 			if (first_load_seg || phdr.p_vaddr < txt_origin)
158 				txt_origin = phdr.p_vaddr;
159 
160 			if (first_load_seg)
161 				first_load_seg = FALSE;
162 		}
163 	}
164 
165 	return (txt_origin);
166 }
167 
168 void
169 process_namelist(mod_info_t *module)
170 {
171 	int		fd;
172 	Elf		*elf;
173 
174 	if ((fd = open(module->name, O_RDONLY)) == -1) {
175 		(void) fprintf(stderr, "%s: can't read %s\n",
176 							whoami, module->name);
177 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
178 		exit(EX_NOINPUT);
179 	}
180 
181 	/*
182 	 * libelf's version already verified in processing a.out,
183 	 * so directly do elf_begin()
184 	 */
185 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
186 		fatal_error("can't read as ELF file");
187 
188 	module->next = NULL;
189 	module->txt_origin = get_txtorigin(elf);
190 	get_symtab(elf, module);
191 	module->active = TRUE;
192 }
193 
194 /*
195  * Get the ELF header and,  if it exists, call get_symtab()
196  * to begin processing of the file; otherwise, return from
197  * processing the file with a warning.
198  */
199 static void
200 process(char *filename, int fd)
201 {
202 	Elf			*elf;
203 	extern bool		cflag;
204 	extern bool		Bflag;
205 
206 	if (elf_version(EV_CURRENT) == EV_NONE)
207 		fatal_error("libelf is out of date");
208 
209 	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
210 		fatal_error("can't read as ELF file");
211 
212 	if (gelf_getclass(elf) == ELFCLASS64)
213 		Bflag = TRUE;
214 
215 	/*
216 	 * Initialize active modules list. Note that we set the end
217 	 * address while reading the symbol table, in get_symtab
218 	 */
219 	modules.id = 1;
220 	modules.next = NULL;
221 	modules.txt_origin = get_txtorigin(elf);
222 	modules.load_base = modules.txt_origin;
223 	if ((modules.name = malloc(strlen(filename) + 1)) == NULL) {
224 		(void) fprintf(stderr, "%s: can't malloc %d bytes",
225 					    whoami, strlen(filename) + 1);
226 		exit(EX_UNAVAILABLE);
227 	}
228 	(void) strcpy(modules.name, filename);
229 
230 	get_symtab(elf, &modules);
231 
232 	modules.load_end = modules.data_end;
233 	modules.active = TRUE;
234 	n_modules = 1;
235 
236 	if (cflag)
237 		get_textseg(elf, fd);
238 }
239 
240 static void
241 get_textseg(Elf *elf, int fd)
242 {
243 	GElf_Ehdr ehdr;
244 	GElf_Phdr phdr;
245 	GElf_Half i;
246 
247 	if (gelf_getehdr(elf, &ehdr) == NULL)
248 		fatal_error("can't read ehdr");
249 
250 	for (i = 0; i < ehdr.e_phnum; i++) {
251 
252 		if (gelf_getphdr(elf, i, &phdr) == NULL)
253 			continue;
254 
255 		if (!(phdr.p_flags & PF_W) && (phdr.p_filesz > textsize)) {
256 			size_t chk;
257 
258 			/*
259 			 * We could have multiple loadable text segments;
260 			 * keep the largest we find.
261 			 */
262 			if (textspace)
263 				free(textspace);
264 
265 			/*
266 			 * gprof is a 32-bit program;  if this text segment
267 			 * has a > 32-bit offset or length, it's too big.
268 			 */
269 			chk = (size_t)phdr.p_vaddr + (size_t)phdr.p_filesz;
270 			if (phdr.p_vaddr + phdr.p_filesz != (GElf_Xword)chk)
271 				fatal_error("text segment too large for -c");
272 
273 			textbegin = (size_t)phdr.p_vaddr;
274 			textsize = (size_t)phdr.p_filesz;
275 
276 			textspace = malloc(textsize);
277 
278 			if (lseek(fd, (off_t)phdr.p_offset, SEEK_SET) !=
279 			    (off_t)phdr.p_offset)
280 				fatal_error("cannot seek to text section");
281 
282 			if (read(fd, textspace, textsize) != textsize)
283 				fatal_error("cannot read text");
284 		}
285 	}
286 
287 	if (textsize == 0)
288 		fatal_error("can't find text segment");
289 }
290 
291 #ifdef DEBUG
292 static void
293 debug_dup_del(nltype * keeper, nltype * louser)
294 {
295 	(void) printf("remove_dup_syms: discarding sym %s over sym %s\n",
296 		louser->name, keeper->name);
297 }
298 #endif /* DEBUG */
299 
300 static void
301 remove_dup_syms(nltype *nl, sztype *sym_count)
302 {
303 	int	i;
304 	int	index;
305 	int	nextsym;
306 
307 	nltype *	orig_list;
308 	if ((orig_list = malloc(sizeof (nltype) * *sym_count)) == NULL) {
309 		(void) fprintf(stderr,
310 		    "gprof: remove_dup_syms: malloc failed\n");
311 		(void) fprintf(stderr, "Exiting due to error(s)...\n");
312 		exit(EX_UNAVAILABLE);
313 	}
314 	(void) memcpy(orig_list, nl, sizeof (nltype) * *sym_count);
315 
316 	for (i = 0, index = 0, nextsym = 1; nextsym < *sym_count; nextsym++) {
317 		int	i_type;
318 		int	n_bind;
319 		int	n_type;
320 
321 		/*
322 		 * If orig_list[nextsym] points to a new symvalue, then we
323 		 * will copy our keeper and move on to the next symbol.
324 		 */
325 		if ((orig_list + i)->value < (orig_list + nextsym)->value) {
326 			*(nl + index++) = *(orig_list +i);
327 			i = nextsym;
328 			continue;
329 		}
330 
331 		/*
332 		 * If these two symbols have the same info, then we
333 		 * keep the first and keep checking for dups.
334 		 */
335 		if ((orig_list + i)->syminfo ==
336 		    (orig_list + nextsym)->syminfo) {
337 			DEBUG_DUP_DEL(orig_list + i, orig_list + nextsym);
338 			continue;
339 		}
340 		n_bind = ELF32_ST_BIND((orig_list + nextsym)->syminfo);
341 		i_type = ELF32_ST_TYPE((orig_list + i)->syminfo);
342 		n_type = ELF32_ST_TYPE((orig_list + nextsym)->syminfo);
343 
344 		/*
345 		 * If they have the same type we take the stronger
346 		 * bound function.
347 		 */
348 		if (i_type == n_type) {
349 			if (n_bind == STB_WEAK) {
350 				DEBUG_DUP_DEL((orig_list + i),
351 				    (orig_list + nextsym));
352 				continue;
353 			}
354 			DEBUG_DUP_DEL((orig_list + nextsym),
355 			    (orig_list + i));
356 			i = nextsym;
357 			continue;
358 		}
359 
360 		/*
361 		 * If the first symbol isn't of type NOTYPE then it must
362 		 * be the keeper.
363 		 */
364 		if (i_type != STT_NOTYPE) {
365 			DEBUG_DUP_DEL((orig_list + i),
366 			    (orig_list + nextsym));
367 			continue;
368 		}
369 
370 		/*
371 		 * Throw away the first one and take the new
372 		 * symbol
373 		 */
374 		DEBUG_DUP_DEL((orig_list + nextsym), (orig_list + i));
375 		i = nextsym;
376 	}
377 
378 	if ((orig_list + i)->value > (nl + index - 1)->value)
379 		*(nl + index++) = *(orig_list +i);
380 
381 	*sym_count = index;
382 }
383 
384 /*
385  * compare either by name or by value for sorting.
386  * This is the comparison function called by qsort to
387  * sort the symbols either by name or value when requested.
388  */
389 static int
390 compare(const void *arg1, const void *arg2)
391 {
392 	nltype *a = (nltype *)arg1;
393 	nltype *b = (nltype *)arg2;
394 
395 	if (a->value > b->value)
396 		return (1);
397 	else
398 		return ((a->value == b->value) - 1);
399 }
400 
401 static int
402 is_function(Elf *elf, GElf_Sym *sym)
403 {
404 	Elf_Scn *scn;
405 	GElf_Shdr shdr;
406 
407 	/*
408 	 * With shared objects, it is possible we come across a function
409 	 * that's global, but is undefined. The definition is probably
410 	 * elsewhere, so we'll have to skip it as far as this object is
411 	 * concerned.
412 	 */
413 	if (sym->st_shndx == SHN_UNDEF)
414 		return (0);
415 
416 	if (GELF_ST_TYPE(sym->st_info) == STT_FUNC) {
417 		if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
418 			return (1);
419 
420 		if (GELF_ST_BIND(sym->st_info) == STB_WEAK)
421 			return (1);
422 
423 		if (!aflag && GELF_ST_BIND(sym->st_info) == STB_LOCAL)
424 			return (1);
425 	}
426 
427 	/*
428 	 * It's not a function; determine if it's in an executable section.
429 	 */
430 	if (GELF_ST_TYPE(sym->st_info) != STT_NOTYPE)
431 		return (0);
432 
433 	/*
434 	 * If it isn't global, and it isn't weak, and it either isn't
435 	 * local or the "all flag" isn't set, then get out.
436 	 */
437 	if (GELF_ST_BIND(sym->st_info) != STB_GLOBAL &&
438 	    GELF_ST_BIND(sym->st_info) != STB_WEAK &&
439 	    (GELF_ST_BIND(sym->st_info) != STB_LOCAL || aflag))
440 		return (0);
441 
442 	if (sym->st_shndx >= SHN_LORESERVE)
443 		return (0);
444 
445 	scn = elf_getscn(elf, sym->st_shndx);
446 	(void) gelf_getshdr(scn, &shdr);
447 
448 	if (!(shdr.sh_flags & SHF_EXECINSTR))
449 		return (0);
450 
451 	return (1);
452 }
453 
454 static void
455 get_symtab(Elf *elf, mod_info_t *module)
456 {
457 	Elf_Scn		*scn = NULL, *sym = NULL;
458 	GElf_Word	strndx = 0;
459 	sztype		nsyms, i;
460 	Elf_Data	*symdata;
461 	nltype		*etext = NULL;
462 
463 	nltype			*l_nl, *l_npe;
464 	sztype			l_nname;
465 	extern sztype		total_names;
466 
467 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
468 		GElf_Shdr shdr;
469 
470 		if (gelf_getshdr(scn, &shdr) == NULL)
471 			continue;
472 
473 		if (shdr.sh_type == SHT_SYMTAB || shdr.sh_type == SHT_DYNSYM) {
474 			GElf_Xword chk = shdr.sh_size / shdr.sh_entsize;
475 
476 			nsyms = (sztype)(shdr.sh_size / shdr.sh_entsize);
477 
478 			if (chk != (GElf_Xword)nsyms)
479 				fatal_error("32-bit gprof cannot handle"
480 				    "more than 2^32 symbols");
481 
482 			strndx = shdr.sh_link;
483 			sym = scn;
484 		}
485 
486 		/*
487 		 * If we've found a real symbol table, we're done.
488 		 */
489 		if (shdr.sh_type == SHT_SYMTAB)
490 			break;
491 	}
492 
493 	if (sym == NULL || strndx == 0)
494 		fatal_error("can't find symbol table.\n");
495 
496 	if ((symdata = elf_getdata(scn, NULL)) == NULL)
497 		fatal_error("can't read symbol data.\n");
498 
499 	if ((l_nl = l_npe = (nltype *)calloc(nsyms + PRF_SYMCNT,
500 	    sizeof (nltype))) == NULL)
501 		fatal_error("cannot allocate symbol data.\n");
502 
503 	/*
504 	 * Now we need to cruise through the symbol table eliminating
505 	 * all non-functions from consideration, and making strings
506 	 * real.
507 	 */
508 	l_nname = 0;
509 
510 	for (i = 1; i < nsyms; i++) {
511 		GElf_Sym gsym;
512 		char *name;
513 
514 		(void) gelf_getsym(symdata, i, &gsym);
515 
516 		name = elf_strptr(elf, strndx, gsym.st_name);
517 
518 		/*
519 		 * We're interested in this symbol if it's a function or
520 		 * if it's the symbol "_etext"
521 		 */
522 		if (is_function(elf, &gsym) || strcmp(name, PRF_ETEXT) == 0) {
523 
524 			l_npe->name = name;
525 			l_npe->value = gsym.st_value;
526 			l_npe->sz = gsym.st_size;
527 			l_npe->syminfo = gsym.st_info;
528 			l_npe->module = module;
529 
530 			if (strcmp(name, PRF_ETEXT) == 0)
531 				etext = l_npe;
532 
533 			if (lflag == TRUE &&
534 			    GELF_ST_BIND(gsym.st_info) == STB_LOCAL) {
535 				/*
536 				 * If the "locals only" flag is on, then
537 				 * we add the local symbols to the
538 				 * exclusion lists.
539 				 */
540 				addlist(Elist, name);
541 				addlist(elist, name);
542 			}
543 			DPRINTF("Index %lld:", l_nname);
544 			DPRINTF("\tValue: 0x%llx\t", l_npe->value);
545 			DPRINTF("Name: %s \n", l_npe->name);
546 			l_npe++;
547 			l_nname++;
548 		}
549 
550 		if (strcmp(name, PRF_END) == 0)
551 			module->data_end = gsym.st_value;
552 	}
553 
554 	if (l_npe == l_nl)
555 		fatal_error("no valid functions found");
556 
557 	/*
558 	 * Finally, we need to construct some dummy entries.
559 	 */
560 	if (etext) {
561 		l_npe->name = PRF_EXTSYM;
562 		l_npe->value = etext->value + 1;
563 		l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
564 		l_npe->module = module;
565 		l_npe++;
566 		l_nname++;
567 	}
568 
569 	l_npe->name = PRF_MEMTERM;
570 	l_npe->value = (pctype)-1;
571 	l_npe->syminfo = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
572 	l_npe->module = module;
573 	l_npe++;
574 	l_nname++;
575 
576 	/*
577 	 * We're almost done;  all we need to do is sort the symbols
578 	 * and then remove the duplicates.
579 	 */
580 	qsort(l_nl, (size_t)l_nname, sizeof (nltype), compare);
581 	remove_dup_syms(l_nl, &l_nname);
582 
583 	module->nl = l_nl;
584 	module->npe = l_npe;
585 	module->nname = l_nname;
586 
587 	total_names += l_nname;
588 }
589