xref: /freebsd/usr.sbin/pmcstat/pmcstat_log.c (revision bd18fd57db1df29da1a3adf94d47924a977a29c2)
1 /*-
2  * Copyright (c) 2005-2007, Joseph Koshy
3  * Copyright (c) 2007 The FreeBSD Foundation
4  * All rights reserved.
5  *
6  * Portions of this software were developed by A. Joseph Koshy under
7  * sponsorship from the FreeBSD Foundation and Google, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * Transform a hwpmc(4) log into human readable form, and into
33  * gprof(1) compatible profiles.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/param.h>
40 #include <sys/endian.h>
41 #include <sys/cpuset.h>
42 #include <sys/gmon.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/mman.h>
46 #include <sys/pmc.h>
47 #include <sys/queue.h>
48 #include <sys/socket.h>
49 #include <sys/stat.h>
50 #include <sys/wait.h>
51 
52 #include <netinet/in.h>
53 
54 #include <assert.h>
55 #include <curses.h>
56 #include <err.h>
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <gelf.h>
60 #include <libgen.h>
61 #include <limits.h>
62 #include <netdb.h>
63 #include <pmc.h>
64 #include <pmclog.h>
65 #include <sysexits.h>
66 #include <stdint.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71 
72 #include "pmcstat.h"
73 #include "pmcstat_log.h"
74 #include "pmcstat_top.h"
75 
76 #define	PMCSTAT_ALLOCATE		1
77 
78 /*
79  * PUBLIC INTERFACES
80  *
81  * pmcstat_initialize_logging()	initialize this module, called first
82  * pmcstat_shutdown_logging()		orderly shutdown, called last
83  * pmcstat_open_log()			open an eventlog for processing
84  * pmcstat_process_log()		print/convert an event log
85  * pmcstat_display_log()		top mode display for the log
86  * pmcstat_close_log()			finish processing an event log
87  *
88  * IMPLEMENTATION NOTES
89  *
90  * We correlate each 'callchain' or 'sample' entry seen in the event
91  * log back to an executable object in the system. Executable objects
92  * include:
93  * 	- program executables,
94  *	- shared libraries loaded by the runtime loader,
95  *	- dlopen()'ed objects loaded by the program,
96  *	- the runtime loader itself,
97  *	- the kernel and kernel modules.
98  *
99  * Each process that we know about is treated as a set of regions that
100  * map to executable objects.  Processes are described by
101  * 'pmcstat_process' structures.  Executable objects are tracked by
102  * 'pmcstat_image' structures.  The kernel and kernel modules are
103  * common to all processes (they reside at the same virtual addresses
104  * for all processes).  Individual processes can have their text
105  * segments and shared libraries loaded at process-specific locations.
106  *
107  * A given executable object can be in use by multiple processes
108  * (e.g., libc.so) and loaded at a different address in each.
109  * pmcstat_pcmap structures track per-image mappings.
110  *
111  * The sample log could have samples from multiple PMCs; we
112  * generate one 'gmon.out' profile per PMC.
113  *
114  * IMPLEMENTATION OF GMON OUTPUT
115  *
116  * Each executable object gets one 'gmon.out' profile, per PMC in
117  * use.  Creation of 'gmon.out' profiles is done lazily.  The
118  * 'gmon.out' profiles generated for a given sampling PMC are
119  * aggregates of all the samples for that particular executable
120  * object.
121  *
122  * IMPLEMENTATION OF SYSTEM-WIDE CALLGRAPH OUTPUT
123  *
124  * Each active pmcid has its own callgraph structure, described by a
125  * 'struct pmcstat_callgraph'.  Given a process id and a list of pc
126  * values, we map each pc value to a tuple (image, symbol), where
127  * 'image' denotes an executable object and 'symbol' is the closest
128  * symbol that precedes the pc value.  Each pc value in the list is
129  * also given a 'rank' that reflects its depth in the call stack.
130  */
131 
132 struct pmcstat_pmcs pmcstat_pmcs = LIST_HEAD_INITIALIZER(pmcstat_pmcs);
133 
134 /*
135  * All image descriptors are kept in a hash table.
136  */
137 struct pmcstat_image_hash_list pmcstat_image_hash[PMCSTAT_NHASH];
138 
139 /*
140  * All process descriptors are kept in a hash table.
141  */
142 struct pmcstat_process_hash_list pmcstat_process_hash[PMCSTAT_NHASH];
143 
144 struct pmcstat_stats pmcstat_stats; /* statistics */
145 static int ps_samples_period; /* samples count between top refresh. */
146 
147 struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
148 
149 #include "pmcpl_gprof.h"
150 #include "pmcpl_callgraph.h"
151 #include "pmcpl_annotate.h"
152 #include "pmcpl_annotate_cg.h"
153 #include "pmcpl_calltree.h"
154 
155 static struct pmc_plugins  {
156 	const char 	*pl_name;	/* name */
157 
158 	/* configure */
159 	int (*pl_configure)(char *opt);
160 
161 	/* init and shutdown */
162 	int (*pl_init)(void);
163 	void (*pl_shutdown)(FILE *mf);
164 
165 	/* sample processing */
166 	void (*pl_process)(struct pmcstat_process *pp,
167 	    struct pmcstat_pmcrecord *pmcr, uint32_t nsamples,
168 	    uintfptr_t *cc, int usermode, uint32_t cpu);
169 
170 	/* image */
171 	void (*pl_initimage)(struct pmcstat_image *pi);
172 	void (*pl_shutdownimage)(struct pmcstat_image *pi);
173 
174 	/* pmc */
175 	void (*pl_newpmc)(pmcstat_interned_string ps,
176 		struct pmcstat_pmcrecord *pr);
177 
178 	/* top display */
179 	void (*pl_topdisplay)(void);
180 
181 	/* top keypress */
182 	int (*pl_topkeypress)(int c, WINDOW *w);
183 
184 } plugins[] = {
185 	{
186 		.pl_name		= "none",
187 	},
188 	{
189 		.pl_name		= "callgraph",
190 		.pl_init		= pmcpl_cg_init,
191 		.pl_shutdown		= pmcpl_cg_shutdown,
192 		.pl_process		= pmcpl_cg_process,
193 		.pl_topkeypress		= pmcpl_cg_topkeypress,
194 		.pl_topdisplay		= pmcpl_cg_topdisplay
195 	},
196 	{
197 		.pl_name		= "gprof",
198 		.pl_shutdown		= pmcpl_gmon_shutdown,
199 		.pl_process		= pmcpl_gmon_process,
200 		.pl_initimage		= pmcpl_gmon_initimage,
201 		.pl_shutdownimage	= pmcpl_gmon_shutdownimage,
202 		.pl_newpmc		= pmcpl_gmon_newpmc
203 	},
204 	{
205 		.pl_name		= "annotate",
206 		.pl_process		= pmcpl_annotate_process
207 	},
208 	{
209 		.pl_name		= "calltree",
210 		.pl_configure		= pmcpl_ct_configure,
211 		.pl_init		= pmcpl_ct_init,
212 		.pl_shutdown		= pmcpl_ct_shutdown,
213 		.pl_process		= pmcpl_ct_process,
214 		.pl_topkeypress		= pmcpl_ct_topkeypress,
215 		.pl_topdisplay		= pmcpl_ct_topdisplay
216 	},
217 	{
218 		.pl_name		= "annotate_cg",
219 		.pl_process		= pmcpl_annotate_cg_process
220 	},
221 
222 	{
223 		.pl_name		= NULL
224 	}
225 };
226 
227 static int pmcstat_mergepmc;
228 
229 int pmcstat_pmcinfilter = 0; /* PMC filter for top mode. */
230 float pmcstat_threshold = 0.5; /* Cost filter for top mode. */
231 
232 /*
233  * Prototypes
234  */
235 
236 static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string
237     _path, int _iskernelmodule);
238 static void pmcstat_image_get_aout_params(struct pmcstat_image *_image);
239 static void pmcstat_image_get_elf_params(struct pmcstat_image *_image);
240 static void	pmcstat_image_link(struct pmcstat_process *_pp,
241     struct pmcstat_image *_i, uintfptr_t _lpc);
242 
243 static void	pmcstat_pmcid_add(pmc_id_t _pmcid,
244     pmcstat_interned_string _name);
245 
246 static void	pmcstat_process_aout_exec(struct pmcstat_process *_pp,
247     struct pmcstat_image *_image, uintfptr_t _entryaddr);
248 static void	pmcstat_process_elf_exec(struct pmcstat_process *_pp,
249     struct pmcstat_image *_image, uintfptr_t _entryaddr);
250 static void	pmcstat_process_exec(struct pmcstat_process *_pp,
251     pmcstat_interned_string _path, uintfptr_t _entryaddr);
252 static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid,
253     int _allocate);
254 static int	pmcstat_string_compute_hash(const char *_string);
255 static void pmcstat_string_initialize(void);
256 static int	pmcstat_string_lookup_hash(pmcstat_interned_string _is);
257 static void pmcstat_string_shutdown(void);
258 static void pmcstat_stats_reset(int _reset_global);
259 
260 /*
261  * A simple implementation of interned strings.  Each interned string
262  * is assigned a unique address, so that subsequent string compares
263  * can be done by a simple pointer comparison instead of using
264  * strcmp().  This speeds up hash table lookups and saves memory if
265  * duplicate strings are the norm.
266  */
267 struct pmcstat_string {
268 	LIST_ENTRY(pmcstat_string)	ps_next;	/* hash link */
269 	int		ps_len;
270 	int		ps_hash;
271 	char		*ps_string;
272 };
273 
274 static LIST_HEAD(,pmcstat_string)	pmcstat_string_hash[PMCSTAT_NHASH];
275 
276 /*
277  * PMC count.
278  */
279 int pmcstat_npmcs;
280 
281 /*
282  * PMC Top mode pause state.
283  */
284 static int pmcstat_pause;
285 
286 static void
287 pmcstat_stats_reset(int reset_global)
288 {
289 	struct pmcstat_pmcrecord *pr;
290 
291 	/* Flush PMCs stats. */
292 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) {
293 		pr->pr_samples = 0;
294 		pr->pr_dubious_frames = 0;
295 	}
296 	ps_samples_period = 0;
297 
298 	/* Flush global stats. */
299 	if (reset_global)
300 		bzero(&pmcstat_stats, sizeof(struct pmcstat_stats));
301 }
302 
303 /*
304  * Compute a 'hash' value for a string.
305  */
306 
307 static int
308 pmcstat_string_compute_hash(const char *s)
309 {
310 	unsigned hash;
311 
312 	for (hash = 2166136261; *s; s++)
313 		hash = (hash ^ *s) * 16777619;
314 
315 	return (hash & PMCSTAT_HASH_MASK);
316 }
317 
318 /*
319  * Intern a copy of string 's', and return a pointer to the
320  * interned structure.
321  */
322 
323 pmcstat_interned_string
324 pmcstat_string_intern(const char *s)
325 {
326 	struct pmcstat_string *ps;
327 	const struct pmcstat_string *cps;
328 	int hash, len;
329 
330 	if ((cps = pmcstat_string_lookup(s)) != NULL)
331 		return (cps);
332 
333 	hash = pmcstat_string_compute_hash(s);
334 	len  = strlen(s);
335 
336 	if ((ps = malloc(sizeof(*ps))) == NULL)
337 		err(EX_OSERR, "ERROR: Could not intern string");
338 	ps->ps_len = len;
339 	ps->ps_hash = hash;
340 	ps->ps_string = strdup(s);
341 	LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next);
342 	return ((pmcstat_interned_string) ps);
343 }
344 
345 const char *
346 pmcstat_string_unintern(pmcstat_interned_string str)
347 {
348 	const char *s;
349 
350 	s = ((const struct pmcstat_string *) str)->ps_string;
351 	return (s);
352 }
353 
354 pmcstat_interned_string
355 pmcstat_string_lookup(const char *s)
356 {
357 	struct pmcstat_string *ps;
358 	int hash, len;
359 
360 	hash = pmcstat_string_compute_hash(s);
361 	len = strlen(s);
362 
363 	LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next)
364 	    if (ps->ps_len == len && ps->ps_hash == hash &&
365 		strcmp(ps->ps_string, s) == 0)
366 		    return (ps);
367 	return (NULL);
368 }
369 
370 static int
371 pmcstat_string_lookup_hash(pmcstat_interned_string s)
372 {
373 	const struct pmcstat_string *ps;
374 
375 	ps = (const struct pmcstat_string *) s;
376 	return (ps->ps_hash);
377 }
378 
379 /*
380  * Initialize the string interning facility.
381  */
382 
383 static void
384 pmcstat_string_initialize(void)
385 {
386 	int i;
387 
388 	for (i = 0; i < PMCSTAT_NHASH; i++)
389 		LIST_INIT(&pmcstat_string_hash[i]);
390 }
391 
392 /*
393  * Destroy the string table, free'ing up space.
394  */
395 
396 static void
397 pmcstat_string_shutdown(void)
398 {
399 	int i;
400 	struct pmcstat_string *ps, *pstmp;
401 
402 	for (i = 0; i < PMCSTAT_NHASH; i++)
403 		LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next,
404 		    pstmp) {
405 			LIST_REMOVE(ps, ps_next);
406 			free(ps->ps_string);
407 			free(ps);
408 		}
409 }
410 
411 /*
412  * Determine whether a given executable image is an A.OUT object, and
413  * if so, fill in its parameters from the text file.
414  * Sets image->pi_type.
415  */
416 
417 static void
418 pmcstat_image_get_aout_params(struct pmcstat_image *image)
419 {
420 	int fd;
421 	ssize_t nbytes;
422 	struct exec ex;
423 	const char *path;
424 	char buffer[PATH_MAX];
425 
426 	path = pmcstat_string_unintern(image->pi_execpath);
427 	assert(path != NULL);
428 
429 	if (image->pi_iskernelmodule)
430 		errx(EX_SOFTWARE,
431 		    "ERROR: a.out kernel modules are unsupported \"%s\"", path);
432 
433 	(void) snprintf(buffer, sizeof(buffer), "%s%s",
434 	    args.pa_fsroot, path);
435 
436 	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
437 	    (nbytes = read(fd, &ex, sizeof(ex))) < 0) {
438 		if (args.pa_verbosity >= 2)
439 			warn("WARNING: Cannot determine type of \"%s\"",
440 			    path);
441 		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
442 		if (fd != -1)
443 			(void) close(fd);
444 		return;
445 	}
446 
447 	(void) close(fd);
448 
449 	if ((unsigned) nbytes != sizeof(ex) ||
450 	    N_BADMAG(ex))
451 		return;
452 
453 	image->pi_type = PMCSTAT_IMAGE_AOUT;
454 
455 	/* TODO: the rest of a.out processing */
456 
457 	return;
458 }
459 
460 /*
461  * Helper function.
462  */
463 
464 static int
465 pmcstat_symbol_compare(const void *a, const void *b)
466 {
467 	const struct pmcstat_symbol *sym1, *sym2;
468 
469 	sym1 = (const struct pmcstat_symbol *) a;
470 	sym2 = (const struct pmcstat_symbol *) b;
471 
472 	if (sym1->ps_end <= sym2->ps_start)
473 		return (-1);
474 	if (sym1->ps_start >= sym2->ps_end)
475 		return (1);
476 	return (0);
477 }
478 
479 /*
480  * Map an address to a symbol in an image.
481  */
482 
483 struct pmcstat_symbol *
484 pmcstat_symbol_search(struct pmcstat_image *image, uintfptr_t addr)
485 {
486 	struct pmcstat_symbol sym;
487 
488 	if (image->pi_symbols == NULL)
489 		return (NULL);
490 
491 	sym.ps_name  = NULL;
492 	sym.ps_start = addr;
493 	sym.ps_end   = addr + 1;
494 
495 	return (bsearch((void *) &sym, image->pi_symbols,
496 		    image->pi_symcount, sizeof(struct pmcstat_symbol),
497 		    pmcstat_symbol_compare));
498 }
499 
500 /*
501  * Add the list of symbols in the given section to the list associated
502  * with the object.
503  */
504 static void
505 pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e,
506     Elf_Scn *scn, GElf_Shdr *sh)
507 {
508 	int firsttime;
509 	size_t n, newsyms, nshsyms, nfuncsyms;
510 	struct pmcstat_symbol *symptr;
511 	char *fnname;
512 	GElf_Sym sym;
513 	Elf_Data *data;
514 
515 	if ((data = elf_getdata(scn, NULL)) == NULL)
516 		return;
517 
518 	/*
519 	 * Determine the number of functions named in this
520 	 * section.
521 	 */
522 
523 	nshsyms = sh->sh_size / sh->sh_entsize;
524 	for (n = nfuncsyms = 0; n < nshsyms; n++) {
525 		if (gelf_getsym(data, (int) n, &sym) != &sym)
526 			return;
527 		if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
528 			nfuncsyms++;
529 	}
530 
531 	if (nfuncsyms == 0)
532 		return;
533 
534 	/*
535 	 * Allocate space for the new entries.
536 	 */
537 	firsttime = image->pi_symbols == NULL;
538 	symptr = realloc(image->pi_symbols,
539 	    sizeof(*symptr) * (image->pi_symcount + nfuncsyms));
540 	if (symptr == image->pi_symbols) /* realloc() failed. */
541 		return;
542 	image->pi_symbols = symptr;
543 
544 	/*
545 	 * Append new symbols to the end of the current table.
546 	 */
547 	symptr += image->pi_symcount;
548 
549 	for (n = newsyms = 0; n < nshsyms; n++) {
550 		if (gelf_getsym(data, (int) n, &sym) != &sym)
551 			return;
552 		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
553 			continue;
554 		if (sym.st_shndx == STN_UNDEF)
555 			continue;
556 
557 		if (!firsttime && pmcstat_symbol_search(image, sym.st_value))
558 			continue; /* We've seen this symbol already. */
559 
560 		if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name))
561 		    == NULL)
562 			continue;
563 #ifdef __arm__
564 		/* Remove spurious ARM function name. */
565 		if (fnname[0] == '$' &&
566 		    (fnname[1] == 'a' || fnname[1] == 't' ||
567 		    fnname[1] == 'd') &&
568 		    fnname[2] == '\0')
569 			continue;
570 #endif
571 
572 		symptr->ps_name  = pmcstat_string_intern(fnname);
573 		symptr->ps_start = sym.st_value - image->pi_vaddr;
574 		symptr->ps_end   = symptr->ps_start + sym.st_size;
575 		symptr++;
576 
577 		newsyms++;
578 	}
579 
580 	image->pi_symcount += newsyms;
581 	if (image->pi_symcount == 0)
582 		return;
583 
584 	assert(newsyms <= nfuncsyms);
585 
586 	/*
587 	 * Return space to the system if there were duplicates.
588 	 */
589 	if (newsyms < nfuncsyms)
590 		image->pi_symbols = realloc(image->pi_symbols,
591 		    sizeof(*symptr) * image->pi_symcount);
592 
593 	/*
594 	 * Keep the list of symbols sorted.
595 	 */
596 	qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr),
597 	    pmcstat_symbol_compare);
598 
599 	/*
600 	 * Deal with function symbols that have a size of 'zero' by
601 	 * making them extend to the next higher address.  These
602 	 * symbols are usually defined in assembly code.
603 	 */
604 	for (symptr = image->pi_symbols;
605 	     symptr < image->pi_symbols + (image->pi_symcount - 1);
606 	     symptr++)
607 		if (symptr->ps_start == symptr->ps_end)
608 			symptr->ps_end = (symptr+1)->ps_start;
609 }
610 
611 /*
612  * Examine an ELF file to determine the size of its text segment.
613  * Sets image->pi_type if anything conclusive can be determined about
614  * this image.
615  */
616 
617 static void
618 pmcstat_image_get_elf_params(struct pmcstat_image *image)
619 {
620 	int fd;
621 	size_t i, nph, nsh;
622 	const char *path, *elfbase;
623 	char *p, *endp;
624 	uintfptr_t minva, maxva;
625 	Elf *e;
626 	Elf_Scn *scn;
627 	GElf_Ehdr eh;
628 	GElf_Phdr ph;
629 	GElf_Shdr sh;
630 	enum pmcstat_image_type image_type;
631 	char buffer[PATH_MAX];
632 
633 	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
634 
635 	image->pi_start = minva = ~(uintfptr_t) 0;
636 	image->pi_end = maxva = (uintfptr_t) 0;
637 	image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE;
638 	image->pi_isdynamic = 0;
639 	image->pi_dynlinkerpath = NULL;
640 	image->pi_vaddr = 0;
641 
642 	path = pmcstat_string_unintern(image->pi_execpath);
643 	assert(path != NULL);
644 
645 	/*
646 	 * Look for kernel modules under FSROOT/KERNELPATH/NAME,
647 	 * and user mode executable objects under FSROOT/PATHNAME.
648 	 */
649 	if (image->pi_iskernelmodule)
650 		(void) snprintf(buffer, sizeof(buffer), "%s%s/%s",
651 		    args.pa_fsroot, args.pa_kernel, path);
652 	else
653 		(void) snprintf(buffer, sizeof(buffer), "%s%s",
654 		    args.pa_fsroot, path);
655 
656 	e = NULL;
657 	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
658 	    (e = elf_begin(fd, ELF_C_READ, NULL)) == NULL ||
659 	    (elf_kind(e) != ELF_K_ELF)) {
660 		if (args.pa_verbosity >= 2)
661 			warnx("WARNING: Cannot determine the type of \"%s\".",
662 			    buffer);
663 		goto done;
664 	}
665 
666 	if (gelf_getehdr(e, &eh) != &eh) {
667 		warnx(
668 		    "WARNING: Cannot retrieve the ELF Header for \"%s\": %s.",
669 		    buffer, elf_errmsg(-1));
670 		goto done;
671 	}
672 
673 	if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN &&
674 	    !(image->pi_iskernelmodule && eh.e_type == ET_REL)) {
675 		warnx("WARNING: \"%s\" is of an unsupported ELF type.",
676 		    buffer);
677 		goto done;
678 	}
679 
680 	image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ?
681 	    PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64;
682 
683 	/*
684 	 * Determine the virtual address where an executable would be
685 	 * loaded.  Additionally, for dynamically linked executables,
686 	 * save the pathname to the runtime linker.
687 	 */
688 	if (eh.e_type == ET_EXEC) {
689 		if (elf_getphnum(e, &nph) == 0) {
690 			warnx(
691 "WARNING: Could not determine the number of program headers in \"%s\": %s.",
692 			    buffer,
693 			    elf_errmsg(-1));
694 			goto done;
695 		}
696 		for (i = 0; i < eh.e_phnum; i++) {
697 			if (gelf_getphdr(e, i, &ph) != &ph) {
698 				warnx(
699 "WARNING: Retrieval of PHDR entry #%ju in \"%s\" failed: %s.",
700 				    (uintmax_t) i, buffer, elf_errmsg(-1));
701 				goto done;
702 			}
703 			switch (ph.p_type) {
704 			case PT_DYNAMIC:
705 				image->pi_isdynamic = 1;
706 				break;
707 			case PT_INTERP:
708 				if ((elfbase = elf_rawfile(e, NULL)) == NULL) {
709 					warnx(
710 "WARNING: Cannot retrieve the interpreter for \"%s\": %s.",
711 					    buffer, elf_errmsg(-1));
712 					goto done;
713 				}
714 				image->pi_dynlinkerpath =
715 				    pmcstat_string_intern(elfbase +
716 				        ph.p_offset);
717 				break;
718 			case PT_LOAD:
719 				if ((ph.p_flags & PF_X) != 0 &&
720 				    (ph.p_offset & (-ph.p_align)) == 0)
721 					image->pi_vaddr = ph.p_vaddr & (-ph.p_align);
722 				break;
723 			}
724 		}
725 	}
726 
727 	/*
728 	 * Get the min and max VA associated with this ELF object.
729 	 */
730 	if (elf_getshnum(e, &nsh) == 0) {
731 		warnx(
732 "WARNING: Could not determine the number of sections for \"%s\": %s.",
733 		    buffer, elf_errmsg(-1));
734 		goto done;
735 	}
736 
737 	for (i = 0; i < nsh; i++) {
738 		if ((scn = elf_getscn(e, i)) == NULL ||
739 		    gelf_getshdr(scn, &sh) != &sh) {
740 			warnx(
741 "WARNING: Could not retrieve section header #%ju in \"%s\": %s.",
742 			    (uintmax_t) i, buffer, elf_errmsg(-1));
743 			goto done;
744 		}
745 		if (sh.sh_flags & SHF_EXECINSTR) {
746 			minva = min(minva, sh.sh_addr);
747 			maxva = max(maxva, sh.sh_addr + sh.sh_size);
748 		}
749 		if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM)
750 			pmcstat_image_add_symbols(image, e, scn, &sh);
751 	}
752 
753 	image->pi_start = minva;
754 	image->pi_end   = maxva;
755 	image->pi_type  = image_type;
756 	image->pi_fullpath = pmcstat_string_intern(buffer);
757 
758 	/* Build display name
759 	 */
760 	endp = buffer;
761 	for (p = buffer; *p; p++)
762 		if (*p == '/')
763 			endp = p+1;
764 	image->pi_name = pmcstat_string_intern(endp);
765 
766  done:
767 	(void) elf_end(e);
768 	if (fd >= 0)
769 		(void) close(fd);
770 	return;
771 }
772 
773 /*
774  * Given an image descriptor, determine whether it is an ELF, or AOUT.
775  * If no handler claims the image, set its type to 'INDETERMINABLE'.
776  */
777 
778 void
779 pmcstat_image_determine_type(struct pmcstat_image *image)
780 {
781 	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
782 
783 	/* Try each kind of handler in turn */
784 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
785 		pmcstat_image_get_elf_params(image);
786 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
787 		pmcstat_image_get_aout_params(image);
788 
789 	/*
790 	 * Otherwise, remember that we tried to determine
791 	 * the object's type and had failed.
792 	 */
793 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
794 		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
795 }
796 
797 /*
798  * Locate an image descriptor given an interned path, adding a fresh
799  * descriptor to the cache if necessary.  This function also finds a
800  * suitable name for this image's sample file.
801  *
802  * We defer filling in the file format specific parts of the image
803  * structure till the time we actually see a sample that would fall
804  * into this image.
805  */
806 
807 static struct pmcstat_image *
808 pmcstat_image_from_path(pmcstat_interned_string internedpath,
809     int iskernelmodule)
810 {
811 	int hash;
812 	struct pmcstat_image *pi;
813 
814 	hash = pmcstat_string_lookup_hash(internedpath);
815 
816 	/* First, look for an existing entry. */
817 	LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
818 	    if (pi->pi_execpath == internedpath &&
819 		  pi->pi_iskernelmodule == iskernelmodule)
820 		    return (pi);
821 
822 	/*
823 	 * Allocate a new entry and place it at the head of the hash
824 	 * and LRU lists.
825 	 */
826 	pi = malloc(sizeof(*pi));
827 	if (pi == NULL)
828 		return (NULL);
829 
830 	pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
831 	pi->pi_execpath = internedpath;
832 	pi->pi_start = ~0;
833 	pi->pi_end = 0;
834 	pi->pi_entry = 0;
835 	pi->pi_vaddr = 0;
836 	pi->pi_isdynamic = 0;
837 	pi->pi_iskernelmodule = iskernelmodule;
838 	pi->pi_dynlinkerpath = NULL;
839 	pi->pi_symbols = NULL;
840 	pi->pi_symcount = 0;
841 	pi->pi_addr2line = NULL;
842 
843 	if (plugins[args.pa_pplugin].pl_initimage != NULL)
844 		plugins[args.pa_pplugin].pl_initimage(pi);
845 	if (plugins[args.pa_plugin].pl_initimage != NULL)
846 		plugins[args.pa_plugin].pl_initimage(pi);
847 
848 	LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
849 
850 	return (pi);
851 }
852 
853 /*
854  * Record the fact that PC values from 'start' to 'end' come from
855  * image 'image'.
856  */
857 
858 static void
859 pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
860     uintfptr_t start)
861 {
862 	struct pmcstat_pcmap *pcm, *pcmnew;
863 	uintfptr_t offset;
864 
865 	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
866 	    image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);
867 
868 	if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
869 		err(EX_OSERR, "ERROR: Cannot create a map entry");
870 
871 	/*
872 	 * Adjust the map entry to only cover the text portion
873 	 * of the object.
874 	 */
875 
876 	offset = start - image->pi_vaddr;
877 	pcmnew->ppm_lowpc  = image->pi_start + offset;
878 	pcmnew->ppm_highpc = image->pi_end + offset;
879 	pcmnew->ppm_image  = image;
880 
881 	assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);
882 
883 	/* Overlapped mmap()'s are assumed to never occur. */
884 	TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
885 	    if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
886 		    break;
887 
888 	if (pcm == NULL)
889 		TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
890 	else
891 		TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
892 }
893 
894 /*
895  * Unmap images in the range [start..end) associated with process
896  * 'pp'.
897  */
898 
899 static void
900 pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
901     uintfptr_t end)
902 {
903 	struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew;
904 
905 	assert(pp != NULL);
906 	assert(start < end);
907 
908 	/*
909 	 * Cases:
910 	 * - we could have the range completely in the middle of an
911 	 *   existing pcmap; in this case we have to split the pcmap
912 	 *   structure into two (i.e., generate a 'hole').
913 	 * - we could have the range covering multiple pcmaps; these
914 	 *   will have to be removed.
915 	 * - we could have either 'start' or 'end' falling in the
916 	 *   middle of a pcmap; in this case shorten the entry.
917 	 */
918 	TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
919 		assert(pcm->ppm_lowpc < pcm->ppm_highpc);
920 		if (pcm->ppm_highpc <= start)
921 			continue;
922 		if (pcm->ppm_lowpc >= end)
923 			return;
924 		if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
925 			/*
926 			 * The current pcmap is completely inside the
927 			 * unmapped range: remove it entirely.
928 			 */
929 			TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next);
930 			free(pcm);
931 		} else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) {
932 			/*
933 			 * Split this pcmap into two; curtail the
934 			 * current map to end at [start-1], and start
935 			 * the new one at [end].
936 			 */
937 			if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
938 				err(EX_OSERR,
939 				    "ERROR: Cannot split a map entry");
940 
941 			pcmnew->ppm_image = pcm->ppm_image;
942 
943 			pcmnew->ppm_lowpc = end;
944 			pcmnew->ppm_highpc = pcm->ppm_highpc;
945 
946 			pcm->ppm_highpc = start;
947 
948 			TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
949 
950 			return;
951 		} else if (pcm->ppm_lowpc < start && pcm->ppm_highpc <= end)
952 			pcm->ppm_highpc = start;
953 		else if (pcm->ppm_lowpc >= start && pcm->ppm_highpc > end)
954 			pcm->ppm_lowpc = end;
955 		else
956 			assert(0);
957 	}
958 }
959 
960 /*
961  * Resolve file name and line number for the given address.
962  */
963 int
964 pmcstat_image_addr2line(struct pmcstat_image *image, uintfptr_t addr,
965     char *sourcefile, size_t sourcefile_len, unsigned *sourceline,
966     char *funcname, size_t funcname_len)
967 {
968 	static int addr2line_warn = 0;
969 
970 	char *sep, cmdline[PATH_MAX], imagepath[PATH_MAX];
971 	unsigned l;
972 	int fd;
973 
974 	if (image->pi_addr2line == NULL) {
975 		/* Try default debug file location. */
976 		snprintf(imagepath, sizeof(imagepath),
977 		    "/usr/lib/debug/%s%s.debug",
978 		    args.pa_fsroot,
979 		    pmcstat_string_unintern(image->pi_fullpath));
980 		fd = open(imagepath, O_RDONLY);
981 		if (fd < 0) {
982 			/* Old kernel symbol path. */
983 			snprintf(imagepath, sizeof(imagepath), "%s%s.symbols",
984 			    args.pa_fsroot,
985 			    pmcstat_string_unintern(image->pi_fullpath));
986 			fd = open(imagepath, O_RDONLY);
987 			if (fd < 0) {
988 				snprintf(imagepath, sizeof(imagepath), "%s%s",
989 				    args.pa_fsroot,
990 				    pmcstat_string_unintern(
991 				        image->pi_fullpath));
992 			}
993 		}
994 		if (fd >= 0)
995 			close(fd);
996 		/*
997 		 * New addr2line support recursive inline function with -i
998 		 * but the format does not add a marker when no more entries
999 		 * are available.
1000 		 */
1001 		snprintf(cmdline, sizeof(cmdline), "addr2line -Cfe \"%s\"",
1002 		    imagepath);
1003 		image->pi_addr2line = popen(cmdline, "r+");
1004 		if (image->pi_addr2line == NULL) {
1005 			if (!addr2line_warn) {
1006 				addr2line_warn = 1;
1007 				warnx(
1008 "WARNING: addr2line is needed for source code information."
1009 				    );
1010 			}
1011 			return (0);
1012 		}
1013 	}
1014 
1015 	if (feof(image->pi_addr2line) || ferror(image->pi_addr2line)) {
1016 		warnx("WARNING: addr2line pipe error");
1017 		pclose(image->pi_addr2line);
1018 		image->pi_addr2line = NULL;
1019 		return (0);
1020 	}
1021 
1022 	fprintf(image->pi_addr2line, "%p\n", (void *)addr);
1023 
1024 	if (fgets(funcname, funcname_len, image->pi_addr2line) == NULL) {
1025 		warnx("WARNING: addr2line function name read error");
1026 		return (0);
1027 	}
1028 	sep = strchr(funcname, '\n');
1029 	if (sep != NULL)
1030 		*sep = '\0';
1031 
1032 	if (fgets(sourcefile, sourcefile_len, image->pi_addr2line) == NULL) {
1033 		warnx("WARNING: addr2line source file read error");
1034 		return (0);
1035 	}
1036 	sep = strchr(sourcefile, ':');
1037 	if (sep == NULL) {
1038 		warnx("WARNING: addr2line source line separator missing");
1039 		return (0);
1040 	}
1041 	*sep = '\0';
1042 	l = atoi(sep+1);
1043 	if (l == 0)
1044 		return (0);
1045 	*sourceline = l;
1046 	return (1);
1047 }
1048 
1049 /*
1050  * Add a {pmcid,name} mapping.
1051  */
1052 
1053 static void
1054 pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps)
1055 {
1056 	struct pmcstat_pmcrecord *pr, *prm;
1057 
1058 	/* Replace an existing name for the PMC. */
1059 	prm = NULL;
1060 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1061 		if (pr->pr_pmcid == pmcid) {
1062 			pr->pr_pmcname = ps;
1063 			return;
1064 		} else if (pr->pr_pmcname == ps)
1065 			prm = pr;
1066 
1067 	/*
1068 	 * Otherwise, allocate a new descriptor and call the
1069 	 * plugins hook.
1070 	 */
1071 	if ((pr = malloc(sizeof(*pr))) == NULL)
1072 		err(EX_OSERR, "ERROR: Cannot allocate pmc record");
1073 
1074 	pr->pr_pmcid = pmcid;
1075 	pr->pr_pmcname = ps;
1076 	pr->pr_pmcin = pmcstat_npmcs++;
1077 	pr->pr_samples = 0;
1078 	pr->pr_dubious_frames = 0;
1079 	pr->pr_merge = prm == NULL ? pr : prm;
1080 
1081 	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1082 
1083 	if (plugins[args.pa_pplugin].pl_newpmc != NULL)
1084 		plugins[args.pa_pplugin].pl_newpmc(ps, pr);
1085 	if (plugins[args.pa_plugin].pl_newpmc != NULL)
1086 		plugins[args.pa_plugin].pl_newpmc(ps, pr);
1087 }
1088 
1089 /*
1090  * Given a pmcid in use, find its human-readable name.
1091  */
1092 
1093 const char *
1094 pmcstat_pmcid_to_name(pmc_id_t pmcid)
1095 {
1096 	struct pmcstat_pmcrecord *pr;
1097 
1098 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1099 	    if (pr->pr_pmcid == pmcid)
1100 		    return (pmcstat_string_unintern(pr->pr_pmcname));
1101 
1102 	return NULL;
1103 }
1104 
1105 /*
1106  * Convert PMC index to name.
1107  */
1108 
1109 const char *
1110 pmcstat_pmcindex_to_name(int pmcin)
1111 {
1112 	struct pmcstat_pmcrecord *pr;
1113 
1114 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1115 		if (pr->pr_pmcin == pmcin)
1116 			return pmcstat_string_unintern(pr->pr_pmcname);
1117 
1118 	return NULL;
1119 }
1120 
1121 /*
1122  * Return PMC record with given index.
1123  */
1124 
1125 struct pmcstat_pmcrecord *
1126 pmcstat_pmcindex_to_pmcr(int pmcin)
1127 {
1128 	struct pmcstat_pmcrecord *pr;
1129 
1130 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1131 		if (pr->pr_pmcin == pmcin)
1132 			return pr;
1133 
1134 	return NULL;
1135 }
1136 
1137 /*
1138  * Get PMC record by id, apply merge policy.
1139  */
1140 
1141 static struct pmcstat_pmcrecord *
1142 pmcstat_lookup_pmcid(pmc_id_t pmcid)
1143 {
1144 	struct pmcstat_pmcrecord *pr;
1145 
1146 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) {
1147 		if (pr->pr_pmcid == pmcid) {
1148 			if (pmcstat_mergepmc)
1149 				return pr->pr_merge;
1150 			return pr;
1151 		}
1152 	}
1153 
1154 	return NULL;
1155 }
1156 
1157 /*
1158  * Associate an AOUT image with a process.
1159  */
1160 
1161 static void
1162 pmcstat_process_aout_exec(struct pmcstat_process *pp,
1163     struct pmcstat_image *image, uintfptr_t entryaddr)
1164 {
1165 	(void) pp;
1166 	(void) image;
1167 	(void) entryaddr;
1168 	/* TODO Implement a.out handling */
1169 }
1170 
1171 /*
1172  * Associate an ELF image with a process.
1173  */
1174 
1175 static void
1176 pmcstat_process_elf_exec(struct pmcstat_process *pp,
1177     struct pmcstat_image *image, uintfptr_t entryaddr)
1178 {
1179 	uintmax_t libstart;
1180 	struct pmcstat_image *rtldimage;
1181 
1182 	assert(image->pi_type == PMCSTAT_IMAGE_ELF32 ||
1183 	    image->pi_type == PMCSTAT_IMAGE_ELF64);
1184 
1185 	/* Create a map entry for the base executable. */
1186 	pmcstat_image_link(pp, image, image->pi_vaddr);
1187 
1188 	/*
1189 	 * For dynamically linked executables we need to determine
1190 	 * where the dynamic linker was mapped to for this process,
1191 	 * Subsequent executable objects that are mapped in by the
1192 	 * dynamic linker will be tracked by log events of type
1193 	 * PMCLOG_TYPE_MAP_IN.
1194 	 */
1195 
1196 	if (image->pi_isdynamic) {
1197 
1198 		/*
1199 		 * The runtime loader gets loaded just after the maximum
1200 		 * possible heap address.  Like so:
1201 		 *
1202 		 * [  TEXT DATA BSS HEAP -->*RTLD  SHLIBS   <--STACK]
1203 		 * ^					            ^
1204 		 * 0				   VM_MAXUSER_ADDRESS
1205 
1206 		 *
1207 		 * The exact address where the loader gets mapped in
1208 		 * will vary according to the size of the executable
1209 		 * and the limits on the size of the process'es data
1210 		 * segment at the time of exec().  The entry address
1211 		 * recorded at process exec time corresponds to the
1212 		 * 'start' address inside the dynamic linker.  From
1213 		 * this we can figure out the address where the
1214 		 * runtime loader's file object had been mapped to.
1215 		 */
1216 		rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath, 0);
1217 		if (rtldimage == NULL) {
1218 			warnx("WARNING: Cannot find image for \"%s\".",
1219 			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1220 			pmcstat_stats.ps_exec_errors++;
1221 			return;
1222 		}
1223 
1224 		if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1225 			pmcstat_image_get_elf_params(rtldimage);
1226 
1227 		if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 &&
1228 		    rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) {
1229 			warnx("WARNING: rtld not an ELF object \"%s\".",
1230 			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1231 			return;
1232 		}
1233 
1234 		libstart = entryaddr - rtldimage->pi_entry;
1235 		pmcstat_image_link(pp, rtldimage, libstart);
1236 	}
1237 }
1238 
1239 /*
1240  * Find the process descriptor corresponding to a PID.  If 'allocate'
1241  * is zero, we return a NULL if a pid descriptor could not be found or
1242  * a process descriptor process.  If 'allocate' is non-zero, then we
1243  * will attempt to allocate a fresh process descriptor.  Zombie
1244  * process descriptors are only removed if a fresh allocation for the
1245  * same PID is requested.
1246  */
1247 
1248 static struct pmcstat_process *
1249 pmcstat_process_lookup(pid_t pid, int allocate)
1250 {
1251 	uint32_t hash;
1252 	struct pmcstat_pcmap *ppm, *ppmtmp;
1253 	struct pmcstat_process *pp, *pptmp;
1254 
1255 	hash = (uint32_t) pid & PMCSTAT_HASH_MASK;	/* simplicity wins */
1256 
1257 	LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp)
1258 		if (pp->pp_pid == pid) {
1259 			/* Found a descriptor, check and process zombies */
1260 			if (allocate && pp->pp_isactive == 0) {
1261 				/* remove maps */
1262 				TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next,
1263 				    ppmtmp) {
1264 					TAILQ_REMOVE(&pp->pp_map, ppm,
1265 					    ppm_next);
1266 					free(ppm);
1267 				}
1268 				/* remove process entry */
1269 				LIST_REMOVE(pp, pp_next);
1270 				free(pp);
1271 				break;
1272 			}
1273 			return (pp);
1274 		}
1275 
1276 	if (!allocate)
1277 		return (NULL);
1278 
1279 	if ((pp = malloc(sizeof(*pp))) == NULL)
1280 		err(EX_OSERR, "ERROR: Cannot allocate pid descriptor");
1281 
1282 	pp->pp_pid = pid;
1283 	pp->pp_isactive = 1;
1284 
1285 	TAILQ_INIT(&pp->pp_map);
1286 
1287 	LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next);
1288 	return (pp);
1289 }
1290 
1291 /*
1292  * Associate an image and a process.
1293  */
1294 
1295 static void
1296 pmcstat_process_exec(struct pmcstat_process *pp,
1297     pmcstat_interned_string path, uintfptr_t entryaddr)
1298 {
1299 	struct pmcstat_image *image;
1300 
1301 	if ((image = pmcstat_image_from_path(path, 0)) == NULL) {
1302 		pmcstat_stats.ps_exec_errors++;
1303 		return;
1304 	}
1305 
1306 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1307 		pmcstat_image_determine_type(image);
1308 
1309 	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1310 
1311 	switch (image->pi_type) {
1312 	case PMCSTAT_IMAGE_ELF32:
1313 	case PMCSTAT_IMAGE_ELF64:
1314 		pmcstat_stats.ps_exec_elf++;
1315 		pmcstat_process_elf_exec(pp, image, entryaddr);
1316 		break;
1317 
1318 	case PMCSTAT_IMAGE_AOUT:
1319 		pmcstat_stats.ps_exec_aout++;
1320 		pmcstat_process_aout_exec(pp, image, entryaddr);
1321 		break;
1322 
1323 	case PMCSTAT_IMAGE_INDETERMINABLE:
1324 		pmcstat_stats.ps_exec_indeterminable++;
1325 		break;
1326 
1327 	default:
1328 		err(EX_SOFTWARE,
1329 		    "ERROR: Unsupported executable type for \"%s\"",
1330 		    pmcstat_string_unintern(path));
1331 	}
1332 }
1333 
1334 
1335 /*
1336  * Find the map entry associated with process 'p' at PC value 'pc'.
1337  */
1338 
1339 struct pmcstat_pcmap *
1340 pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc)
1341 {
1342 	struct pmcstat_pcmap *ppm;
1343 
1344 	TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) {
1345 		if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc)
1346 			return (ppm);
1347 		if (pc < ppm->ppm_lowpc)
1348 			return (NULL);
1349 	}
1350 
1351 	return (NULL);
1352 }
1353 
1354 /*
1355  * Convert a hwpmc(4) log to profile information.  A system-wide
1356  * callgraph is generated if FLAG_DO_CALLGRAPHS is set.  gmon.out
1357  * files usable by gprof(1) are created if FLAG_DO_GPROF is set.
1358  */
1359 static int
1360 pmcstat_analyze_log(void)
1361 {
1362 	uint32_t cpu, cpuflags;
1363 	uintfptr_t pc;
1364 	pid_t pid;
1365 	struct pmcstat_image *image;
1366 	struct pmcstat_process *pp, *ppnew;
1367 	struct pmcstat_pcmap *ppm, *ppmtmp;
1368 	struct pmclog_ev ev;
1369 	struct pmcstat_pmcrecord *pmcr;
1370 	pmcstat_interned_string image_path;
1371 
1372 	assert(args.pa_flags & FLAG_DO_ANALYSIS);
1373 
1374 	if (elf_version(EV_CURRENT) == EV_NONE)
1375 		err(EX_UNAVAILABLE, "Elf library intialization failed");
1376 
1377 	while (pmclog_read(args.pa_logparser, &ev) == 0) {
1378 		assert(ev.pl_state == PMCLOG_OK);
1379 
1380 		switch (ev.pl_type) {
1381 		case PMCLOG_TYPE_INITIALIZE:
1382 			if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
1383 			    PMC_VERSION_MAJOR << 24 && args.pa_verbosity > 0)
1384 				warnx(
1385 "WARNING: Log version 0x%x does not match compiled version 0x%x.",
1386 				    ev.pl_u.pl_i.pl_version, PMC_VERSION_MAJOR);
1387 			break;
1388 
1389 		case PMCLOG_TYPE_MAP_IN:
1390 			/*
1391 			 * Introduce an address range mapping for a
1392 			 * userland process or the kernel (pid == -1).
1393 			 *
1394 			 * We always allocate a process descriptor so
1395 			 * that subsequent samples seen for this
1396 			 * address range are mapped to the current
1397 			 * object being mapped in.
1398 			 */
1399 			pid = ev.pl_u.pl_mi.pl_pid;
1400 			if (pid == -1)
1401 				pp = pmcstat_kernproc;
1402 			else
1403 				pp = pmcstat_process_lookup(pid,
1404 				    PMCSTAT_ALLOCATE);
1405 
1406 			assert(pp != NULL);
1407 
1408 			image_path = pmcstat_string_intern(ev.pl_u.pl_mi.
1409 			    pl_pathname);
1410 			image = pmcstat_image_from_path(image_path, pid == -1);
1411 			if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1412 				pmcstat_image_determine_type(image);
1413 			if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE)
1414 				pmcstat_image_link(pp, image,
1415 				    ev.pl_u.pl_mi.pl_start);
1416 			break;
1417 
1418 		case PMCLOG_TYPE_MAP_OUT:
1419 			/*
1420 			 * Remove an address map.
1421 			 */
1422 			pid = ev.pl_u.pl_mo.pl_pid;
1423 			if (pid == -1)
1424 				pp = pmcstat_kernproc;
1425 			else
1426 				pp = pmcstat_process_lookup(pid, 0);
1427 
1428 			if (pp == NULL)	/* unknown process */
1429 				break;
1430 
1431 			pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start,
1432 			    ev.pl_u.pl_mo.pl_end);
1433 			break;
1434 
1435 		case PMCLOG_TYPE_PCSAMPLE:
1436 			/*
1437 			 * Note: the `PCSAMPLE' log entry is not
1438 			 * generated by hpwmc(4) after version 2.
1439 			 */
1440 
1441 			/*
1442 			 * We bring in the gmon file for the image
1443 			 * currently associated with the PMC & pid
1444 			 * pair and increment the appropriate entry
1445 			 * bin inside this.
1446 			 */
1447 			pmcstat_stats.ps_samples_total++;
1448 			ps_samples_period++;
1449 
1450 			pc = ev.pl_u.pl_s.pl_pc;
1451 			pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid,
1452 			    PMCSTAT_ALLOCATE);
1453 
1454 			/* Get PMC record. */
1455 			pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_s.pl_pmcid);
1456 			assert(pmcr != NULL);
1457 			pmcr->pr_samples++;
1458 
1459 			/*
1460 			 * Call the plugins processing
1461 			 * TODO: move pmcstat_process_find_map inside plugins
1462 			 */
1463 
1464 			if (plugins[args.pa_pplugin].pl_process != NULL)
1465 				plugins[args.pa_pplugin].pl_process(
1466 				    pp, pmcr, 1, &pc,
1467 				    pmcstat_process_find_map(pp, pc) != NULL, 0);
1468 			plugins[args.pa_plugin].pl_process(
1469 			    pp, pmcr, 1, &pc,
1470 			    pmcstat_process_find_map(pp, pc) != NULL, 0);
1471 			break;
1472 
1473 		case PMCLOG_TYPE_CALLCHAIN:
1474 			pmcstat_stats.ps_samples_total++;
1475 			ps_samples_period++;
1476 
1477 			cpuflags = ev.pl_u.pl_cc.pl_cpuflags;
1478 			cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags);
1479 
1480 			/* Filter on the CPU id. */
1481 			if (!CPU_ISSET(cpu, &(args.pa_cpumask))) {
1482 				pmcstat_stats.ps_samples_skipped++;
1483 				break;
1484 			}
1485 
1486 			pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid,
1487 			    PMCSTAT_ALLOCATE);
1488 
1489 			/* Get PMC record. */
1490 			pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_cc.pl_pmcid);
1491 			assert(pmcr != NULL);
1492 			pmcr->pr_samples++;
1493 
1494 			/*
1495 			 * Call the plugins processing
1496 			 */
1497 
1498 			if (plugins[args.pa_pplugin].pl_process != NULL)
1499 				plugins[args.pa_pplugin].pl_process(
1500 				    pp, pmcr,
1501 				    ev.pl_u.pl_cc.pl_npc,
1502 				    ev.pl_u.pl_cc.pl_pc,
1503 				    PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags),
1504 				    cpu);
1505 			plugins[args.pa_plugin].pl_process(
1506 			    pp, pmcr,
1507 			    ev.pl_u.pl_cc.pl_npc,
1508 			    ev.pl_u.pl_cc.pl_pc,
1509 			    PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags),
1510 			    cpu);
1511 			break;
1512 
1513 		case PMCLOG_TYPE_PMCALLOCATE:
1514 			/*
1515 			 * Record the association pmc id between this
1516 			 * PMC and its name.
1517 			 */
1518 			pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid,
1519 			    pmcstat_string_intern(ev.pl_u.pl_a.pl_evname));
1520 			break;
1521 
1522 		case PMCLOG_TYPE_PMCALLOCATEDYN:
1523 			/*
1524 			 * Record the association pmc id between this
1525 			 * PMC and its name.
1526 			 */
1527 			pmcstat_pmcid_add(ev.pl_u.pl_ad.pl_pmcid,
1528 			    pmcstat_string_intern(ev.pl_u.pl_ad.pl_evname));
1529 			break;
1530 
1531 		case PMCLOG_TYPE_PROCEXEC:
1532 
1533 			/*
1534 			 * Change the executable image associated with
1535 			 * a process.
1536 			 */
1537 			pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid,
1538 			    PMCSTAT_ALLOCATE);
1539 
1540 			/* delete the current process map */
1541 			TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
1542 				TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1543 				free(ppm);
1544 			}
1545 
1546 			/*
1547 			 * Associate this process image.
1548 			 */
1549 			image_path = pmcstat_string_intern(
1550 				ev.pl_u.pl_x.pl_pathname);
1551 			assert(image_path != NULL);
1552 			pmcstat_process_exec(pp, image_path,
1553 			    ev.pl_u.pl_x.pl_entryaddr);
1554 			break;
1555 
1556 		case PMCLOG_TYPE_PROCEXIT:
1557 
1558 			/*
1559 			 * Due to the way the log is generated, the
1560 			 * last few samples corresponding to a process
1561 			 * may appear in the log after the process
1562 			 * exit event is recorded.  Thus we keep the
1563 			 * process' descriptor and associated data
1564 			 * structures around, but mark the process as
1565 			 * having exited.
1566 			 */
1567 			pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0);
1568 			if (pp == NULL)
1569 				break;
1570 			pp->pp_isactive = 0;	/* mark as a zombie */
1571 			break;
1572 
1573 		case PMCLOG_TYPE_SYSEXIT:
1574 			pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0);
1575 			if (pp == NULL)
1576 				break;
1577 			pp->pp_isactive = 0;	/* make a zombie */
1578 			break;
1579 
1580 		case PMCLOG_TYPE_PROCFORK:
1581 
1582 			/*
1583 			 * Allocate a process descriptor for the new
1584 			 * (child) process.
1585 			 */
1586 			ppnew =
1587 			    pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid,
1588 				PMCSTAT_ALLOCATE);
1589 
1590 			/*
1591 			 * If we had been tracking the parent, clone
1592 			 * its address maps.
1593 			 */
1594 			pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0);
1595 			if (pp == NULL)
1596 				break;
1597 			TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next)
1598 			    pmcstat_image_link(ppnew, ppm->ppm_image,
1599 				ppm->ppm_lowpc);
1600 			break;
1601 
1602 		default:	/* other types of entries are not relevant */
1603 			break;
1604 		}
1605 	}
1606 
1607 	if (ev.pl_state == PMCLOG_EOF)
1608 		return (PMCSTAT_FINISHED);
1609 	else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
1610 		return (PMCSTAT_RUNNING);
1611 
1612 	err(EX_DATAERR,
1613 	    "ERROR: event parsing failed (record %jd, offset 0x%jx)",
1614 	    (uintmax_t) ev.pl_count + 1, ev.pl_offset);
1615 }
1616 
1617 /*
1618  * Print log entries as text.
1619  */
1620 
1621 static int
1622 pmcstat_print_log(void)
1623 {
1624 	struct pmclog_ev ev;
1625 	uint32_t npc;
1626 
1627 	while (pmclog_read(args.pa_logparser, &ev) == 0) {
1628 		assert(ev.pl_state == PMCLOG_OK);
1629 		switch (ev.pl_type) {
1630 		case PMCLOG_TYPE_CALLCHAIN:
1631 			PMCSTAT_PRINT_ENTRY("callchain",
1632 			    "%d 0x%x %d %d %c", ev.pl_u.pl_cc.pl_pid,
1633 			    ev.pl_u.pl_cc.pl_pmcid,
1634 			    PMC_CALLCHAIN_CPUFLAGS_TO_CPU(ev.pl_u.pl_cc. \
1635 				pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
1636 			    PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
1637 			        pl_cpuflags) ? 'u' : 's');
1638 			for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++)
1639 				PMCSTAT_PRINT_ENTRY("...", "%p",
1640 				    (void *) ev.pl_u.pl_cc.pl_pc[npc]);
1641 			break;
1642 		case PMCLOG_TYPE_CLOSELOG:
1643 			PMCSTAT_PRINT_ENTRY("closelog",);
1644 			break;
1645 		case PMCLOG_TYPE_DROPNOTIFY:
1646 			PMCSTAT_PRINT_ENTRY("drop",);
1647 			break;
1648 		case PMCLOG_TYPE_INITIALIZE:
1649 			PMCSTAT_PRINT_ENTRY("initlog","0x%x \"%s\"",
1650 			    ev.pl_u.pl_i.pl_version,
1651 			    pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
1652 			if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
1653 			    PMC_VERSION_MAJOR << 24 && args.pa_verbosity > 0)
1654 				warnx(
1655 "WARNING: Log version 0x%x != expected version 0x%x.",
1656 				    ev.pl_u.pl_i.pl_version, PMC_VERSION);
1657 			break;
1658 		case PMCLOG_TYPE_MAP_IN:
1659 			PMCSTAT_PRINT_ENTRY("map-in","%d %p \"%s\"",
1660 			    ev.pl_u.pl_mi.pl_pid,
1661 			    (void *) ev.pl_u.pl_mi.pl_start,
1662 			    ev.pl_u.pl_mi.pl_pathname);
1663 			break;
1664 		case PMCLOG_TYPE_MAP_OUT:
1665 			PMCSTAT_PRINT_ENTRY("map-out","%d %p %p",
1666 			    ev.pl_u.pl_mo.pl_pid,
1667 			    (void *) ev.pl_u.pl_mo.pl_start,
1668 			    (void *) ev.pl_u.pl_mo.pl_end);
1669 			break;
1670 		case PMCLOG_TYPE_PCSAMPLE:
1671 			PMCSTAT_PRINT_ENTRY("sample","0x%x %d %p %c",
1672 			    ev.pl_u.pl_s.pl_pmcid,
1673 			    ev.pl_u.pl_s.pl_pid,
1674 			    (void *) ev.pl_u.pl_s.pl_pc,
1675 			    ev.pl_u.pl_s.pl_usermode ? 'u' : 's');
1676 			break;
1677 		case PMCLOG_TYPE_PMCALLOCATE:
1678 			PMCSTAT_PRINT_ENTRY("allocate","0x%x \"%s\" 0x%x",
1679 			    ev.pl_u.pl_a.pl_pmcid,
1680 			    ev.pl_u.pl_a.pl_evname,
1681 			    ev.pl_u.pl_a.pl_flags);
1682 			break;
1683 		case PMCLOG_TYPE_PMCALLOCATEDYN:
1684 			PMCSTAT_PRINT_ENTRY("allocatedyn","0x%x \"%s\" 0x%x",
1685 			    ev.pl_u.pl_ad.pl_pmcid,
1686 			    ev.pl_u.pl_ad.pl_evname,
1687 			    ev.pl_u.pl_ad.pl_flags);
1688 			break;
1689 		case PMCLOG_TYPE_PMCATTACH:
1690 			PMCSTAT_PRINT_ENTRY("attach","0x%x %d \"%s\"",
1691 			    ev.pl_u.pl_t.pl_pmcid,
1692 			    ev.pl_u.pl_t.pl_pid,
1693 			    ev.pl_u.pl_t.pl_pathname);
1694 			break;
1695 		case PMCLOG_TYPE_PMCDETACH:
1696 			PMCSTAT_PRINT_ENTRY("detach","0x%x %d",
1697 			    ev.pl_u.pl_d.pl_pmcid,
1698 			    ev.pl_u.pl_d.pl_pid);
1699 			break;
1700 		case PMCLOG_TYPE_PROCCSW:
1701 			PMCSTAT_PRINT_ENTRY("cswval","0x%x %d %jd",
1702 			    ev.pl_u.pl_c.pl_pmcid,
1703 			    ev.pl_u.pl_c.pl_pid,
1704 			    ev.pl_u.pl_c.pl_value);
1705 			break;
1706 		case PMCLOG_TYPE_PROCEXEC:
1707 			PMCSTAT_PRINT_ENTRY("exec","0x%x %d %p \"%s\"",
1708 			    ev.pl_u.pl_x.pl_pmcid,
1709 			    ev.pl_u.pl_x.pl_pid,
1710 			    (void *) ev.pl_u.pl_x.pl_entryaddr,
1711 			    ev.pl_u.pl_x.pl_pathname);
1712 			break;
1713 		case PMCLOG_TYPE_PROCEXIT:
1714 			PMCSTAT_PRINT_ENTRY("exitval","0x%x %d %jd",
1715 			    ev.pl_u.pl_e.pl_pmcid,
1716 			    ev.pl_u.pl_e.pl_pid,
1717 			    ev.pl_u.pl_e.pl_value);
1718 			break;
1719 		case PMCLOG_TYPE_PROCFORK:
1720 			PMCSTAT_PRINT_ENTRY("fork","%d %d",
1721 			    ev.pl_u.pl_f.pl_oldpid,
1722 			    ev.pl_u.pl_f.pl_newpid);
1723 			break;
1724 		case PMCLOG_TYPE_USERDATA:
1725 			PMCSTAT_PRINT_ENTRY("userdata","0x%x",
1726 			    ev.pl_u.pl_u.pl_userdata);
1727 			break;
1728 		case PMCLOG_TYPE_SYSEXIT:
1729 			PMCSTAT_PRINT_ENTRY("exit","%d",
1730 			    ev.pl_u.pl_se.pl_pid);
1731 			break;
1732 		default:
1733 			fprintf(args.pa_printfile, "unknown event (type %d).\n",
1734 			    ev.pl_type);
1735 		}
1736 	}
1737 
1738 	if (ev.pl_state == PMCLOG_EOF)
1739 		return (PMCSTAT_FINISHED);
1740 	else if (ev.pl_state ==  PMCLOG_REQUIRE_DATA)
1741 		return (PMCSTAT_RUNNING);
1742 
1743 	errx(EX_DATAERR,
1744 	    "ERROR: event parsing failed (record %jd, offset 0x%jx).",
1745 	    (uintmax_t) ev.pl_count + 1, ev.pl_offset);
1746 	/*NOTREACHED*/
1747 }
1748 
1749 /*
1750  * Public Interfaces.
1751  */
1752 
1753 /*
1754  * Close a logfile, after first flushing all in-module queued data.
1755  */
1756 
1757 int
1758 pmcstat_close_log(void)
1759 {
1760 	/* If a local logfile is configured ask the kernel to stop
1761 	 * and flush data. Kernel will close the file when data is flushed
1762 	 * so keep the status to EXITING.
1763 	 */
1764 	if (args.pa_logfd != -1) {
1765 		if (pmc_close_logfile() < 0)
1766 			err(EX_OSERR, "ERROR: logging failed");
1767 	}
1768 
1769 	return (args.pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
1770 	    PMCSTAT_FINISHED);
1771 }
1772 
1773 
1774 
1775 /*
1776  * Open a log file, for reading or writing.
1777  *
1778  * The function returns the fd of a successfully opened log or -1 in
1779  * case of failure.
1780  */
1781 
1782 int
1783 pmcstat_open_log(const char *path, int mode)
1784 {
1785 	int error, fd, cfd;
1786 	size_t hlen;
1787 	const char *p, *errstr;
1788 	struct addrinfo hints, *res, *res0;
1789 	char hostname[MAXHOSTNAMELEN];
1790 
1791 	errstr = NULL;
1792 	fd = -1;
1793 
1794 	/*
1795 	 * If 'path' is "-" then open one of stdin or stdout depending
1796 	 * on the value of 'mode'.
1797 	 *
1798 	 * If 'path' contains a ':' and does not start with a '/' or '.',
1799 	 * and is being opened for writing, treat it as a "host:port"
1800 	 * specification and open a network socket.
1801 	 *
1802 	 * Otherwise, treat 'path' as a file name and open that.
1803 	 */
1804 	if (path[0] == '-' && path[1] == '\0')
1805 		fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1;
1806 	else if (path[0] != '/' &&
1807 	    path[0] != '.' && strchr(path, ':') != NULL) {
1808 
1809 		p = strrchr(path, ':');
1810 		hlen = p - path;
1811 		if (p == path || hlen >= sizeof(hostname)) {
1812 			errstr = strerror(EINVAL);
1813 			goto done;
1814 		}
1815 
1816 		assert(hlen < sizeof(hostname));
1817 		(void) strncpy(hostname, path, hlen);
1818 		hostname[hlen] = '\0';
1819 
1820 		(void) memset(&hints, 0, sizeof(hints));
1821 		hints.ai_family = AF_UNSPEC;
1822 		hints.ai_socktype = SOCK_STREAM;
1823 		if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) {
1824 			errstr = gai_strerror(error);
1825 			goto done;
1826 		}
1827 
1828 		fd = -1;
1829 		for (res = res0; res; res = res->ai_next) {
1830 			if ((fd = socket(res->ai_family, res->ai_socktype,
1831 			    res->ai_protocol)) < 0) {
1832 				errstr = strerror(errno);
1833 				continue;
1834 			}
1835 			if (mode == PMCSTAT_OPEN_FOR_READ) {
1836 				if (bind(fd, res->ai_addr, res->ai_addrlen) < 0) {
1837 					errstr = strerror(errno);
1838 					(void) close(fd);
1839 					fd = -1;
1840 					continue;
1841 				}
1842 				listen(fd, 1);
1843 				cfd = accept(fd, NULL, NULL);
1844 				(void) close(fd);
1845 				if (cfd < 0) {
1846 					errstr = strerror(errno);
1847 					fd = -1;
1848 					break;
1849 				}
1850 				fd = cfd;
1851 			} else {
1852 				if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) {
1853 					errstr = strerror(errno);
1854 					(void) close(fd);
1855 					fd = -1;
1856 					continue;
1857 				}
1858 			}
1859 			errstr = NULL;
1860 			break;
1861 		}
1862 		freeaddrinfo(res0);
1863 
1864 	} else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ?
1865 		    O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC),
1866 		    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
1867 			errstr = strerror(errno);
1868 
1869   done:
1870 	if (errstr)
1871 		errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path,
1872 		    (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"),
1873 		    errstr);
1874 
1875 	return (fd);
1876 }
1877 
1878 /*
1879  * Process a log file in offline analysis mode.
1880  */
1881 
1882 int
1883 pmcstat_process_log(void)
1884 {
1885 
1886 	/*
1887 	 * If analysis has not been asked for, just print the log to
1888 	 * the current output file.
1889 	 */
1890 	if (args.pa_flags & FLAG_DO_PRINT)
1891 		return (pmcstat_print_log());
1892 	else
1893 		return (pmcstat_analyze_log());
1894 }
1895 
1896 /*
1897  * Refresh top display.
1898  */
1899 
1900 static void
1901 pmcstat_refresh_top(void)
1902 {
1903 	int v_attrs;
1904 	float v;
1905 	char pmcname[40];
1906 	struct pmcstat_pmcrecord *pmcpr;
1907 
1908 	/* If in pause mode do not refresh display. */
1909 	if (pmcstat_pause)
1910 		return;
1911 
1912 	/* Wait until PMC pop in the log. */
1913 	pmcpr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter);
1914 	if (pmcpr == NULL)
1915 		return;
1916 
1917 	/* Format PMC name. */
1918 	if (pmcstat_mergepmc)
1919 		snprintf(pmcname, sizeof(pmcname), "[%s]",
1920 		    pmcstat_string_unintern(pmcpr->pr_pmcname));
1921 	else
1922 		snprintf(pmcname, sizeof(pmcname), "%s.%d",
1923 		    pmcstat_string_unintern(pmcpr->pr_pmcname),
1924 		    pmcstat_pmcinfilter);
1925 
1926 	/* Format samples count. */
1927 	if (ps_samples_period > 0)
1928 		v = (pmcpr->pr_samples * 100.0) / ps_samples_period;
1929 	else
1930 		v = 0.;
1931 	v_attrs = PMCSTAT_ATTRPERCENT(v);
1932 
1933 	PMCSTAT_PRINTBEGIN();
1934 	PMCSTAT_PRINTW("PMC: %s Samples: %u ",
1935 	    pmcname,
1936 	    pmcpr->pr_samples);
1937 	PMCSTAT_ATTRON(v_attrs);
1938 	PMCSTAT_PRINTW("(%.1f%%) ", v);
1939 	PMCSTAT_ATTROFF(v_attrs);
1940 	PMCSTAT_PRINTW(", %u unresolved\n\n",
1941 	    pmcpr->pr_dubious_frames);
1942 	if (plugins[args.pa_plugin].pl_topdisplay != NULL)
1943 		plugins[args.pa_plugin].pl_topdisplay();
1944 	PMCSTAT_PRINTEND();
1945 }
1946 
1947 /*
1948  * Find the next pmc index to display.
1949  */
1950 
1951 static void
1952 pmcstat_changefilter(void)
1953 {
1954 	int pmcin;
1955 	struct pmcstat_pmcrecord *pmcr;
1956 
1957 	/*
1958 	 * Find the next merge target.
1959 	 */
1960 	if (pmcstat_mergepmc) {
1961 		pmcin = pmcstat_pmcinfilter;
1962 
1963 		do {
1964 			pmcr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter);
1965 			if (pmcr == NULL || pmcr == pmcr->pr_merge)
1966 				break;
1967 
1968 			pmcstat_pmcinfilter++;
1969 			if (pmcstat_pmcinfilter >= pmcstat_npmcs)
1970 				pmcstat_pmcinfilter = 0;
1971 
1972 		} while (pmcstat_pmcinfilter != pmcin);
1973 	}
1974 }
1975 
1976 /*
1977  * Top mode keypress.
1978  */
1979 
1980 int
1981 pmcstat_keypress_log(void)
1982 {
1983 	int c, ret = 0;
1984 	WINDOW *w;
1985 
1986 	w = newwin(1, 0, 1, 0);
1987 	c = wgetch(w);
1988 	wprintw(w, "Key: %c => ", c);
1989 	switch (c) {
1990 	case 'c':
1991 		wprintw(w, "enter mode 'd' or 'a' => ");
1992 		c = wgetch(w);
1993 		if (c == 'd') {
1994 			args.pa_topmode = PMCSTAT_TOP_DELTA;
1995 			wprintw(w, "switching to delta mode");
1996 		} else {
1997 			args.pa_topmode = PMCSTAT_TOP_ACCUM;
1998 			wprintw(w, "switching to accumulation mode");
1999 		}
2000 		break;
2001 	case 'm':
2002 		pmcstat_mergepmc = !pmcstat_mergepmc;
2003 		/*
2004 		 * Changing merge state require data reset.
2005 		 */
2006 		if (plugins[args.pa_plugin].pl_shutdown != NULL)
2007 			plugins[args.pa_plugin].pl_shutdown(NULL);
2008 		pmcstat_stats_reset(0);
2009 		if (plugins[args.pa_plugin].pl_init != NULL)
2010 			plugins[args.pa_plugin].pl_init();
2011 
2012 		/* Update filter to be on a merge target. */
2013 		pmcstat_changefilter();
2014 		wprintw(w, "merge PMC %s", pmcstat_mergepmc ? "on" : "off");
2015 		break;
2016 	case 'n':
2017 		/* Close current plugin. */
2018 		if (plugins[args.pa_plugin].pl_shutdown != NULL)
2019 			plugins[args.pa_plugin].pl_shutdown(NULL);
2020 
2021 		/* Find next top display available. */
2022 		do {
2023 			args.pa_plugin++;
2024 			if (plugins[args.pa_plugin].pl_name == NULL)
2025 				args.pa_plugin = 0;
2026 		} while (plugins[args.pa_plugin].pl_topdisplay == NULL);
2027 
2028 		/* Open new plugin. */
2029 		pmcstat_stats_reset(0);
2030 		if (plugins[args.pa_plugin].pl_init != NULL)
2031 			plugins[args.pa_plugin].pl_init();
2032 		wprintw(w, "switching to plugin %s",
2033 		    plugins[args.pa_plugin].pl_name);
2034 		break;
2035 	case 'p':
2036 		pmcstat_pmcinfilter++;
2037 		if (pmcstat_pmcinfilter >= pmcstat_npmcs)
2038 			pmcstat_pmcinfilter = 0;
2039 		pmcstat_changefilter();
2040 		wprintw(w, "switching to PMC %s.%d",
2041 		    pmcstat_pmcindex_to_name(pmcstat_pmcinfilter),
2042 		    pmcstat_pmcinfilter);
2043 		break;
2044 	case ' ':
2045 		pmcstat_pause = !pmcstat_pause;
2046 		if (pmcstat_pause)
2047 			wprintw(w, "pause => press space again to continue");
2048 		break;
2049 	case 'q':
2050 		wprintw(w, "exiting...");
2051 		ret = 1;
2052 		break;
2053 	default:
2054 		if (plugins[args.pa_plugin].pl_topkeypress != NULL)
2055 			if (plugins[args.pa_plugin].pl_topkeypress(c, w))
2056 				ret = 1;
2057 	}
2058 
2059 	wrefresh(w);
2060 	delwin(w);
2061 	return ret;
2062 }
2063 
2064 
2065 /*
2066  * Top mode display.
2067  */
2068 
2069 void
2070 pmcstat_display_log(void)
2071 {
2072 
2073 	pmcstat_refresh_top();
2074 
2075 	/* Reset everythings if delta mode. */
2076 	if (args.pa_topmode == PMCSTAT_TOP_DELTA) {
2077 		if (plugins[args.pa_plugin].pl_shutdown != NULL)
2078 			plugins[args.pa_plugin].pl_shutdown(NULL);
2079 		pmcstat_stats_reset(0);
2080 		if (plugins[args.pa_plugin].pl_init != NULL)
2081 			plugins[args.pa_plugin].pl_init();
2082 	}
2083 
2084 }
2085 
2086 /*
2087  * Configure a plugins.
2088  */
2089 
2090 void
2091 pmcstat_pluginconfigure_log(char *opt)
2092 {
2093 
2094 	if (strncmp(opt, "threshold=", 10) == 0) {
2095 		pmcstat_threshold = atof(opt+10);
2096 	} else {
2097 		if (plugins[args.pa_plugin].pl_configure != NULL) {
2098 			if (!plugins[args.pa_plugin].pl_configure(opt))
2099 				err(EX_USAGE,
2100 				    "ERROR: unknown option <%s>.", opt);
2101 		}
2102 	}
2103 }
2104 
2105 /*
2106  * Initialize module.
2107  */
2108 
2109 void
2110 pmcstat_initialize_logging(void)
2111 {
2112 	int i;
2113 
2114 	/* use a convenient format for 'ldd' output */
2115 	if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0)
2116 		err(EX_OSERR, "ERROR: Cannot setenv");
2117 
2118 	/* Initialize hash tables */
2119 	pmcstat_string_initialize();
2120 	for (i = 0; i < PMCSTAT_NHASH; i++) {
2121 		LIST_INIT(&pmcstat_image_hash[i]);
2122 		LIST_INIT(&pmcstat_process_hash[i]);
2123 	}
2124 
2125 	/*
2126 	 * Create a fake 'process' entry for the kernel with pid -1.
2127 	 * hwpmc(4) will subsequently inform us about where the kernel
2128 	 * and any loaded kernel modules are mapped.
2129 	 */
2130 	if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1,
2131 		 PMCSTAT_ALLOCATE)) == NULL)
2132 		err(EX_OSERR, "ERROR: Cannot initialize logging");
2133 
2134 	/* PMC count. */
2135 	pmcstat_npmcs = 0;
2136 
2137 	/* Merge PMC with same name. */
2138 	pmcstat_mergepmc = args.pa_mergepmc;
2139 
2140 	/*
2141 	 * Initialize plugins
2142 	 */
2143 
2144 	if (plugins[args.pa_pplugin].pl_init != NULL)
2145 		plugins[args.pa_pplugin].pl_init();
2146 	if (plugins[args.pa_plugin].pl_init != NULL)
2147 		plugins[args.pa_plugin].pl_init();
2148 }
2149 
2150 /*
2151  * Shutdown module.
2152  */
2153 
2154 void
2155 pmcstat_shutdown_logging(void)
2156 {
2157 	int i;
2158 	FILE *mf;
2159 	struct pmcstat_image *pi, *pitmp;
2160 	struct pmcstat_process *pp, *pptmp;
2161 	struct pmcstat_pcmap *ppm, *ppmtmp;
2162 
2163 	/* determine where to send the map file */
2164 	mf = NULL;
2165 	if (args.pa_mapfilename != NULL)
2166 		mf = (strcmp(args.pa_mapfilename, "-") == 0) ?
2167 		    args.pa_printfile : fopen(args.pa_mapfilename, "w");
2168 
2169 	if (mf == NULL && args.pa_flags & FLAG_DO_GPROF &&
2170 	    args.pa_verbosity >= 2)
2171 		mf = args.pa_printfile;
2172 
2173 	if (mf)
2174 		(void) fprintf(mf, "MAP:\n");
2175 
2176 	/*
2177 	 * Shutdown the plugins
2178 	 */
2179 
2180 	if (plugins[args.pa_plugin].pl_shutdown != NULL)
2181 		plugins[args.pa_plugin].pl_shutdown(mf);
2182 	if (plugins[args.pa_pplugin].pl_shutdown != NULL)
2183 		plugins[args.pa_pplugin].pl_shutdown(mf);
2184 
2185 	for (i = 0; i < PMCSTAT_NHASH; i++) {
2186 		LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next,
2187 		    pitmp) {
2188 			if (plugins[args.pa_plugin].pl_shutdownimage != NULL)
2189 				plugins[args.pa_plugin].pl_shutdownimage(pi);
2190 			if (plugins[args.pa_pplugin].pl_shutdownimage != NULL)
2191 				plugins[args.pa_pplugin].pl_shutdownimage(pi);
2192 
2193 			free(pi->pi_symbols);
2194 			if (pi->pi_addr2line != NULL)
2195 				pclose(pi->pi_addr2line);
2196 			LIST_REMOVE(pi, pi_next);
2197 			free(pi);
2198 		}
2199 
2200 		LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
2201 		    pptmp) {
2202 			TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
2203 				TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
2204 				free(ppm);
2205 			}
2206 			LIST_REMOVE(pp, pp_next);
2207 			free(pp);
2208 		}
2209 	}
2210 
2211 	pmcstat_string_shutdown();
2212 
2213 	/*
2214 	 * Print errors unless -q was specified.  Print all statistics
2215 	 * if verbosity > 1.
2216 	 */
2217 #define	PRINT(N,V) do {							\
2218 		if (pmcstat_stats.ps_##V || args.pa_verbosity >= 2)	\
2219 			(void) fprintf(args.pa_printfile, " %-40s %d\n",\
2220 			    N, pmcstat_stats.ps_##V);			\
2221 	} while (0)
2222 
2223 	if (args.pa_verbosity >= 1 && (args.pa_flags & FLAG_DO_ANALYSIS)) {
2224 		(void) fprintf(args.pa_printfile, "CONVERSION STATISTICS:\n");
2225 		PRINT("#exec/a.out", exec_aout);
2226 		PRINT("#exec/elf", exec_elf);
2227 		PRINT("#exec/unknown", exec_indeterminable);
2228 		PRINT("#exec handling errors", exec_errors);
2229 		PRINT("#samples/total", samples_total);
2230 		PRINT("#samples/unclaimed", samples_unknown_offset);
2231 		PRINT("#samples/unknown-object", samples_indeterminable);
2232 		PRINT("#samples/unknown-function", samples_unknown_function);
2233 		PRINT("#callchain/dubious-frames", callchain_dubious_frames);
2234 	}
2235 
2236 	if (mf)
2237 		(void) fclose(mf);
2238 }
2239