xref: /freebsd/usr.sbin/pmcstat/pmcstat_log.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 2005-2007, Joseph Koshy
3  * Copyright (c) 2007 The FreeBSD Foundation
4  * All rights reserved.
5  *
6  * Portions of this software were developed by A. Joseph Koshy under
7  * sponsorship from the FreeBSD Foundation and Google, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * Transform a hwpmc(4) log into human readable form, and into
33  * gprof(1) compatible profiles.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/param.h>
40 #include <sys/endian.h>
41 #include <sys/gmon.h>
42 #include <sys/imgact_aout.h>
43 #include <sys/imgact_elf.h>
44 #include <sys/mman.h>
45 #include <sys/pmc.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/stat.h>
49 #include <sys/wait.h>
50 
51 #include <netinet/in.h>
52 
53 #include <assert.h>
54 #include <err.h>
55 #include <errno.h>
56 #include <fcntl.h>
57 #include <gelf.h>
58 #include <libgen.h>
59 #include <limits.h>
60 #include <netdb.h>
61 #include <pmc.h>
62 #include <pmclog.h>
63 #include <sysexits.h>
64 #include <stdint.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68 #include <unistd.h>
69 
70 #include "pmcstat.h"
71 
72 #define	min(A,B)		((A) < (B) ? (A) : (B))
73 #define	max(A,B)		((A) > (B) ? (A) : (B))
74 
75 #define	PMCSTAT_ALLOCATE		1
76 
77 /*
78  * PUBLIC INTERFACES
79  *
80  * pmcstat_initialize_logging()	initialize this module, called first
81  * pmcstat_shutdown_logging()		orderly shutdown, called last
82  * pmcstat_open_log()			open an eventlog for processing
83  * pmcstat_process_log()		print/convert an event log
84  * pmcstat_close_log()			finish processing an event log
85  *
86  * IMPLEMENTATION NOTES
87  *
88  * We correlate each 'callchain' or 'sample' entry seen in the event
89  * log back to an executable object in the system. Executable objects
90  * include:
91  * 	- program executables,
92  *	- shared libraries loaded by the runtime loader,
93  *	- dlopen()'ed objects loaded by the program,
94  *	- the runtime loader itself,
95  *	- the kernel and kernel modules.
96  *
97  * Each process that we know about is treated as a set of regions that
98  * map to executable objects.  Processes are described by
99  * 'pmcstat_process' structures.  Executable objects are tracked by
100  * 'pmcstat_image' structures.  The kernel and kernel modules are
101  * common to all processes (they reside at the same virtual addresses
102  * for all processes).  Individual processes can have their text
103  * segments and shared libraries loaded at process-specific locations.
104  *
105  * A given executable object can be in use by multiple processes
106  * (e.g., libc.so) and loaded at a different address in each.
107  * pmcstat_pcmap structures track per-image mappings.
108  *
109  * The sample log could have samples from multiple PMCs; we
110  * generate one 'gmon.out' profile per PMC.
111  *
112  * IMPLEMENTATION OF GMON OUTPUT
113  *
114  * Each executable object gets one 'gmon.out' profile, per PMC in
115  * use.  Creation of 'gmon.out' profiles is done lazily.  The
116  * 'gmon.out' profiles generated for a given sampling PMC are
117  * aggregates of all the samples for that particular executable
118  * object.
119  *
120  * IMPLEMENTATION OF SYSTEM-WIDE CALLGRAPH OUTPUT
121  *
122  * Each active pmcid has its own callgraph structure, described by a
123  * 'struct pmcstat_callgraph'.  Given a process id and a list of pc
124  * values, we map each pc value to a tuple (image, symbol), where
125  * 'image' denotes an executable object and 'symbol' is the closest
126  * symbol that precedes the pc value.  Each pc value in the list is
127  * also given a 'rank' that reflects its depth in the call stack.
128  */
129 
130 typedef const void *pmcstat_interned_string;
131 
132 /*
133  * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable
134  * names.
135  */
136 
137 struct pmcstat_pmcrecord {
138 	LIST_ENTRY(pmcstat_pmcrecord)	pr_next;
139 	pmc_id_t			pr_pmcid;
140 	pmcstat_interned_string	pr_pmcname;
141 };
142 
143 static LIST_HEAD(,pmcstat_pmcrecord)	pmcstat_pmcs =
144 	LIST_HEAD_INITIALIZER(&pmcstat_pmcs);
145 
146 
147 /*
148  * struct pmcstat_gmonfile tracks a given 'gmon.out' file.  These
149  * files are mmap()'ed in as needed.
150  */
151 
152 struct pmcstat_gmonfile {
153 	LIST_ENTRY(pmcstat_gmonfile)	pgf_next; /* list of entries */
154 	int		pgf_overflow;	/* whether a count overflowed */
155 	pmc_id_t	pgf_pmcid;	/* id of the associated pmc */
156 	size_t		pgf_nbuckets;	/* #buckets in this gmon.out */
157 	unsigned int	pgf_nsamples;	/* #samples in this gmon.out */
158 	pmcstat_interned_string pgf_name;	/* pathname of gmon.out file */
159 	size_t		pgf_ndatabytes;	/* number of bytes mapped */
160 	void		*pgf_gmondata;	/* pointer to mmap'ed data */
161 	FILE		*pgf_file;	/* used when writing gmon arcs */
162 };
163 
164 /*
165  * A 'pmcstat_image' structure describes an executable program on
166  * disk.  'pi_execpath' is a cookie representing the pathname of
167  * the executable.  'pi_start' and 'pi_end' are the least and greatest
168  * virtual addresses for the text segments in the executable.
169  * 'pi_gmonlist' contains a linked list of gmon.out files associated
170  * with this image.
171  */
172 
173 enum pmcstat_image_type {
174 	PMCSTAT_IMAGE_UNKNOWN = 0,	/* never looked at the image */
175 	PMCSTAT_IMAGE_INDETERMINABLE,	/* can't tell what the image is */
176 	PMCSTAT_IMAGE_ELF32,		/* ELF 32 bit object */
177 	PMCSTAT_IMAGE_ELF64,		/* ELF 64 bit object */
178 	PMCSTAT_IMAGE_AOUT		/* AOUT object */
179 };
180 
181 struct pmcstat_image {
182 	LIST_ENTRY(pmcstat_image) pi_next;	/* hash link */
183 	TAILQ_ENTRY(pmcstat_image) pi_lru;	/* LRU list */
184 	pmcstat_interned_string	pi_execpath;    /* cookie */
185 	pmcstat_interned_string pi_samplename;  /* sample path name */
186 	pmcstat_interned_string pi_fullpath;    /* path to FS object */
187 
188 	enum pmcstat_image_type pi_type;	/* executable type */
189 
190 	/*
191 	 * Executables have pi_start and pi_end; these are zero
192 	 * for shared libraries.
193 	 */
194 	uintfptr_t	pi_start;	/* start address (inclusive) */
195 	uintfptr_t	pi_end;		/* end address (exclusive) */
196 	uintfptr_t	pi_entry;	/* entry address */
197 	uintfptr_t	pi_vaddr;	/* virtual address where loaded */
198 	int		pi_isdynamic;	/* whether a dynamic object */
199 	int		pi_iskernelmodule;
200 	pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */
201 
202 	/* All symbols associated with this object. */
203 	struct pmcstat_symbol *pi_symbols;
204 	size_t		pi_symcount;
205 
206 	/*
207 	 * An image can be associated with one or more gmon.out files;
208 	 * one per PMC.
209 	 */
210 	LIST_HEAD(,pmcstat_gmonfile) pi_gmlist;
211 };
212 
213 /*
214  * All image descriptors are kept in a hash table.
215  */
216 static LIST_HEAD(,pmcstat_image)	pmcstat_image_hash[PMCSTAT_NHASH];
217 
218 /*
219  * A 'pmcstat_pcmap' structure maps a virtual address range to an
220  * underlying 'pmcstat_image' descriptor.
221  */
222 struct pmcstat_pcmap {
223 	TAILQ_ENTRY(pmcstat_pcmap) ppm_next;
224 	uintfptr_t	ppm_lowpc;
225 	uintfptr_t	ppm_highpc;
226 	struct pmcstat_image *ppm_image;
227 };
228 
229 /*
230  * A 'pmcstat_process' structure models processes.  Each process is
231  * associated with a set of pmcstat_pcmap structures that map
232  * addresses inside it to executable objects.  This set is implemented
233  * as a list, kept sorted in ascending order of mapped addresses.
234  *
235  * 'pp_pid' holds the pid of the process.  When a process exits, the
236  * 'pp_isactive' field is set to zero, but the process structure is
237  * not immediately reclaimed because there may still be samples in the
238  * log for this process.
239  */
240 
241 struct pmcstat_process {
242 	LIST_ENTRY(pmcstat_process) pp_next;	/* hash-next */
243 	pid_t			pp_pid;		/* associated pid */
244 	int			pp_isactive;	/* whether active */
245 	uintfptr_t		pp_entryaddr;	/* entry address */
246 	TAILQ_HEAD(,pmcstat_pcmap) pp_map;	/* address range map */
247 };
248 
249 /*
250  * All process descriptors are kept in a hash table.
251  */
252 static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH];
253 
254 static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
255 
256 /*
257  * Each function symbol tracked by pmcstat(8).
258  */
259 
260 struct pmcstat_symbol {
261 	pmcstat_interned_string ps_name;
262 	uint64_t	ps_start;
263 	uint64_t	ps_end;
264 };
265 
266 /*
267  * Each call graph node is tracked by a pmcstat_cgnode struct.
268  */
269 
270 struct pmcstat_cgnode {
271 	struct pmcstat_image	*pcg_image;
272 	uintfptr_t		pcg_func;
273 	uint32_t		pcg_count;
274 	uint32_t		pcg_nchildren;
275 	LIST_ENTRY(pmcstat_cgnode) pcg_sibling;
276 	LIST_HEAD(,pmcstat_cgnode) pcg_children;
277 };
278 
279 struct pmcstat_cgnode_hash {
280 	struct pmcstat_cgnode  *pch_cgnode;
281 	uint32_t		pch_pmcid;
282 	LIST_ENTRY(pmcstat_cgnode_hash) pch_next;
283 };
284 
285 static int pmcstat_cgnode_hash_count;
286 static pmcstat_interned_string pmcstat_previous_filename_printed;
287 
288 /*
289  * The toplevel CG nodes (i.e., with rank == 0) are placed in a hash table.
290  */
291 
292 static LIST_HEAD(,pmcstat_cgnode_hash) pmcstat_cgnode_hash[PMCSTAT_NHASH];
293 
294 /* Misc. statistics */
295 static struct pmcstat_stats {
296 	int ps_exec_aout;	/* # a.out executables seen */
297 	int ps_exec_elf;	/* # elf executables seen */
298 	int ps_exec_errors;	/* # errors processing executables */
299 	int ps_exec_indeterminable; /* # unknown executables seen */
300 	int ps_samples_total;	/* total number of samples processed */
301 	int ps_samples_skipped; /* #samples filtered out for any reason */
302 	int ps_samples_unknown_offset;	/* #samples of rank 0 not in a map */
303 	int ps_samples_indeterminable;	/* #samples in indeterminable images */
304 	int ps_callchain_dubious_frames;/* #dubious frame pointers seen */
305 } pmcstat_stats;
306 
307 
308 /*
309  * Prototypes
310  */
311 
312 static void	pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf,
313     struct pmcstat_image *_image);
314 static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd,
315     struct pmcstat_image *_img, pmc_id_t _pmcid);
316 static void	pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf);
317 static void	pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf);
318 
319 static void pmcstat_image_determine_type(struct pmcstat_image *_image,
320     struct pmcstat_args *_a);
321 static struct pmcstat_gmonfile *pmcstat_image_find_gmonfile(struct
322     pmcstat_image *_i, pmc_id_t _id);
323 static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string
324     _path, int _iskernelmodule);
325 static void pmcstat_image_get_aout_params(struct pmcstat_image *_image,
326     struct pmcstat_args *_a);
327 static void pmcstat_image_get_elf_params(struct pmcstat_image *_image,
328     struct pmcstat_args *_a);
329 static void	pmcstat_image_increment_bucket(struct pmcstat_pcmap *_pcm,
330     uintfptr_t _pc, pmc_id_t _pmcid, struct pmcstat_args *_a);
331 static void	pmcstat_image_link(struct pmcstat_process *_pp,
332     struct pmcstat_image *_i, uintfptr_t _lpc);
333 
334 static void	pmcstat_pmcid_add(pmc_id_t _pmcid,
335     pmcstat_interned_string _name, struct pmcstat_args *_a);
336 static const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid);
337 
338 static void	pmcstat_process_aout_exec(struct pmcstat_process *_pp,
339     struct pmcstat_image *_image, uintfptr_t _entryaddr,
340     struct pmcstat_args *_a);
341 static void	pmcstat_process_elf_exec(struct pmcstat_process *_pp,
342     struct pmcstat_image *_image, uintfptr_t _entryaddr,
343     struct pmcstat_args *_a);
344 static void	pmcstat_process_exec(struct pmcstat_process *_pp,
345     pmcstat_interned_string _path, uintfptr_t _entryaddr,
346     struct pmcstat_args *_ao);
347 static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid,
348     int _allocate);
349 static struct pmcstat_pcmap *pmcstat_process_find_map(
350     struct pmcstat_process *_p, uintfptr_t _pc);
351 
352 static int	pmcstat_string_compute_hash(const char *_string);
353 static void pmcstat_string_initialize(void);
354 static pmcstat_interned_string pmcstat_string_intern(const char *_s);
355 static pmcstat_interned_string pmcstat_string_lookup(const char *_s);
356 static int	pmcstat_string_lookup_hash(pmcstat_interned_string _is);
357 static void pmcstat_string_shutdown(void);
358 static const char *pmcstat_string_unintern(pmcstat_interned_string _is);
359 
360 
361 /*
362  * A simple implementation of interned strings.  Each interned string
363  * is assigned a unique address, so that subsequent string compares
364  * can be done by a simple pointer comparision instead of using
365  * strcmp().  This speeds up hash table lookups and saves memory if
366  * duplicate strings are the norm.
367  */
368 struct pmcstat_string {
369 	LIST_ENTRY(pmcstat_string)	ps_next;	/* hash link */
370 	int		ps_len;
371 	int		ps_hash;
372 	char		*ps_string;
373 };
374 
375 static LIST_HEAD(,pmcstat_string)	pmcstat_string_hash[PMCSTAT_NHASH];
376 
377 /*
378  * Compute a 'hash' value for a string.
379  */
380 
381 static int
382 pmcstat_string_compute_hash(const char *s)
383 {
384 	int hash;
385 
386 	for (hash = 0; *s; s++)
387 		hash ^= *s;
388 
389 	return (hash & PMCSTAT_HASH_MASK);
390 }
391 
392 /*
393  * Intern a copy of string 's', and return a pointer to the
394  * interned structure.
395  */
396 
397 static pmcstat_interned_string
398 pmcstat_string_intern(const char *s)
399 {
400 	struct pmcstat_string *ps;
401 	const struct pmcstat_string *cps;
402 	int hash, len;
403 
404 	if ((cps = pmcstat_string_lookup(s)) != NULL)
405 		return (cps);
406 
407 	hash = pmcstat_string_compute_hash(s);
408 	len  = strlen(s);
409 
410 	if ((ps = malloc(sizeof(*ps))) == NULL)
411 		err(EX_OSERR, "ERROR: Could not intern string");
412 	ps->ps_len = len;
413 	ps->ps_hash = hash;
414 	ps->ps_string = strdup(s);
415 	LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next);
416 	return ((pmcstat_interned_string) ps);
417 }
418 
419 static const char *
420 pmcstat_string_unintern(pmcstat_interned_string str)
421 {
422 	const char *s;
423 
424 	s = ((const struct pmcstat_string *) str)->ps_string;
425 	return (s);
426 }
427 
428 static pmcstat_interned_string
429 pmcstat_string_lookup(const char *s)
430 {
431 	struct pmcstat_string *ps;
432 	int hash, len;
433 
434 	hash = pmcstat_string_compute_hash(s);
435 	len = strlen(s);
436 
437 	LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next)
438 	    if (ps->ps_len == len && ps->ps_hash == hash &&
439 		strcmp(ps->ps_string, s) == 0)
440 		    return (ps);
441 	return (NULL);
442 }
443 
444 static int
445 pmcstat_string_lookup_hash(pmcstat_interned_string s)
446 {
447 	const struct pmcstat_string *ps;
448 
449 	ps = (const struct pmcstat_string *) s;
450 	return (ps->ps_hash);
451 }
452 
453 /*
454  * Initialize the string interning facility.
455  */
456 
457 static void
458 pmcstat_string_initialize(void)
459 {
460 	int i;
461 
462 	for (i = 0; i < PMCSTAT_NHASH; i++)
463 		LIST_INIT(&pmcstat_string_hash[i]);
464 }
465 
466 /*
467  * Destroy the string table, free'ing up space.
468  */
469 
470 static void
471 pmcstat_string_shutdown(void)
472 {
473 	int i;
474 	struct pmcstat_string *ps, *pstmp;
475 
476 	for (i = 0; i < PMCSTAT_NHASH; i++)
477 		LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next,
478 		    pstmp) {
479 			LIST_REMOVE(ps, ps_next);
480 			free(ps->ps_string);
481 			free(ps);
482 		}
483 }
484 
485 /*
486  * Create a gmon.out file and size it.
487  */
488 
489 static void
490 pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf,
491     struct pmcstat_image *image)
492 {
493 	int fd;
494 	size_t count;
495 	struct gmonhdr gm;
496 	const char *pathname;
497 	char buffer[DEFAULT_BUFFER_SIZE];
498 
499 	pathname = pmcstat_string_unintern(pgf->pgf_name);
500 	if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT,
501 		 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
502 		err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname);
503 
504 	gm.lpc = image->pi_start;
505 	gm.hpc = image->pi_end;
506 	gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) +
507 	    sizeof(struct gmonhdr);
508 	gm.version = GMONVERSION;
509 	gm.profrate = 0;		/* use ticks */
510 	gm.histcounter_type = 0;	/* compatibility with moncontrol() */
511 	gm.spare[0] = gm.spare[1] = 0;
512 
513 	/* Write out the gmon header */
514 	if (write(fd, &gm, sizeof(gm)) < 0)
515 		goto error;
516 
517 	/* Zero fill the samples[] array */
518 	(void) memset(buffer, 0, sizeof(buffer));
519 
520 	count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr);
521 	while (count > sizeof(buffer)) {
522 		if (write(fd, &buffer, sizeof(buffer)) < 0)
523 			goto error;
524 		count -= sizeof(buffer);
525 	}
526 
527 	if (write(fd, &buffer, count) < 0)
528 		goto error;
529 
530 	(void) close(fd);
531 
532 	return;
533 
534  error:
535 	err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname);
536 }
537 
538 /*
539  * Determine the full pathname of a gmon.out file for a given
540  * (image,pmcid) combination.  Return the interned string.
541  */
542 
543 pmcstat_interned_string
544 pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image,
545     pmc_id_t pmcid)
546 {
547 	const char *pmcname;
548 	char fullpath[PATH_MAX];
549 
550 	pmcname = pmcstat_pmcid_to_name(pmcid);
551 
552 	(void) snprintf(fullpath, sizeof(fullpath),
553 	    "%s/%s/%s", samplesdir, pmcname,
554 	    pmcstat_string_unintern(image->pi_samplename));
555 
556 	return (pmcstat_string_intern(fullpath));
557 }
558 
559 
560 /*
561  * Mmap in a gmon.out file for processing.
562  */
563 
564 static void
565 pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf)
566 {
567 	int fd;
568 	const char *pathname;
569 
570 	pathname = pmcstat_string_unintern(pgf->pgf_name);
571 
572 	/* the gmon.out file must already exist */
573 	if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0)
574 		err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname);
575 
576 	pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes,
577 	    PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0);
578 
579 	if (pgf->pgf_gmondata == MAP_FAILED)
580 		err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname);
581 
582 	(void) close(fd);
583 }
584 
585 /*
586  * Unmap a gmon.out file after sync'ing its data to disk.
587  */
588 
589 static void
590 pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf)
591 {
592 	(void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes,
593 	    MS_SYNC);
594 	(void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes);
595 	pgf->pgf_gmondata = NULL;
596 }
597 
598 static void
599 pmcstat_gmon_append_arc(struct pmcstat_image *image, pmc_id_t pmcid,
600     uintptr_t rawfrom, uintptr_t rawto, uint32_t count)
601 {
602 	struct rawarc arc;	/* from <sys/gmon.h> */
603 	const char *pathname;
604 	struct pmcstat_gmonfile *pgf;
605 
606 	if ((pgf = pmcstat_image_find_gmonfile(image, pmcid)) == NULL)
607 		return;
608 
609 	if (pgf->pgf_file == NULL) {
610 		pathname = pmcstat_string_unintern(pgf->pgf_name);
611 		if ((pgf->pgf_file = fopen(pathname, "a")) == NULL)
612 			return;
613 	}
614 
615 	arc.raw_frompc = rawfrom + image->pi_vaddr;
616 	arc.raw_selfpc = rawto + image->pi_vaddr;
617 	arc.raw_count = count;
618 
619 	(void) fwrite(&arc, sizeof(arc), 1, pgf->pgf_file);
620 
621 }
622 
623 static struct pmcstat_gmonfile *
624 pmcstat_image_find_gmonfile(struct pmcstat_image *image, pmc_id_t pmcid)
625 {
626 	struct pmcstat_gmonfile *pgf;
627 	LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next)
628 	    if (pgf->pgf_pmcid == pmcid)
629 		    return (pgf);
630 	return (NULL);
631 }
632 
633 
634 /*
635  * Determine whether a given executable image is an A.OUT object, and
636  * if so, fill in its parameters from the text file.
637  * Sets image->pi_type.
638  */
639 
640 static void
641 pmcstat_image_get_aout_params(struct pmcstat_image *image,
642     struct pmcstat_args *a)
643 {
644 	int fd;
645 	ssize_t nbytes;
646 	struct exec ex;
647 	const char *path;
648 	char buffer[PATH_MAX];
649 
650 	path = pmcstat_string_unintern(image->pi_execpath);
651 	assert(path != NULL);
652 
653 	if (image->pi_iskernelmodule)
654 		errx(EX_SOFTWARE, "ERROR: a.out kernel modules are "
655 		    "unsupported \"%s\"", path);
656 
657 	(void) snprintf(buffer, sizeof(buffer), "%s%s",
658 	    a->pa_fsroot, path);
659 
660 	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
661 	    (nbytes = read(fd, &ex, sizeof(ex))) < 0) {
662 		warn("WARNING: Cannot determine type of \"%s\"", path);
663 		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
664 		if (fd != -1)
665 			(void) close(fd);
666 		return;
667 	}
668 
669 	(void) close(fd);
670 
671 	if ((unsigned) nbytes != sizeof(ex) ||
672 	    N_BADMAG(ex))
673 		return;
674 
675 	image->pi_type = PMCSTAT_IMAGE_AOUT;
676 
677 	/* TODO: the rest of a.out processing */
678 
679 	return;
680 }
681 
682 /*
683  * Helper function.
684  */
685 
686 static int
687 pmcstat_symbol_compare(const void *a, const void *b)
688 {
689 	const struct pmcstat_symbol *sym1, *sym2;
690 
691 	sym1 = (const struct pmcstat_symbol *) a;
692 	sym2 = (const struct pmcstat_symbol *) b;
693 
694 	if (sym1->ps_end <= sym2->ps_start)
695 		return (-1);
696 	if (sym1->ps_start >= sym2->ps_end)
697 		return (1);
698 	return (0);
699 }
700 
701 /*
702  * Map an address to a symbol in an image.
703  */
704 
705 static struct pmcstat_symbol *
706 pmcstat_symbol_search(struct pmcstat_image *image, uintfptr_t addr)
707 {
708 	struct pmcstat_symbol sym;
709 
710 	if (image->pi_symbols == NULL)
711 		return (NULL);
712 
713 	sym.ps_name  = NULL;
714 	sym.ps_start = addr;
715 	sym.ps_end   = addr + 1;
716 
717 	return (bsearch((void *) &sym, image->pi_symbols,
718 		    image->pi_symcount, sizeof(struct pmcstat_symbol),
719 		    pmcstat_symbol_compare));
720 }
721 
722 /*
723  * Add the list of symbols in the given section to the list associated
724  * with the object.
725  */
726 static void
727 pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e,
728     Elf_Scn *scn, GElf_Shdr *sh)
729 {
730 	int firsttime;
731 	size_t n, newsyms, nshsyms, nfuncsyms;
732 	struct pmcstat_symbol *symptr;
733 	char *fnname;
734 	GElf_Sym sym;
735 	Elf_Data *data;
736 
737 	if ((data = elf_getdata(scn, NULL)) == NULL)
738 		return;
739 
740 	/*
741 	 * Determine the number of functions named in this
742 	 * section.
743 	 */
744 
745 	nshsyms = sh->sh_size / sh->sh_entsize;
746 	for (n = nfuncsyms = 0; n < nshsyms; n++) {
747 		if (gelf_getsym(data, (int) n, &sym) != &sym)
748 			return;
749 		if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
750 			nfuncsyms++;
751 	}
752 
753 	if (nfuncsyms == 0)
754 		return;
755 
756 	/*
757 	 * Allocate space for the new entries.
758 	 */
759 	firsttime = image->pi_symbols == NULL;
760 	symptr = realloc(image->pi_symbols,
761 	    sizeof(*symptr) * (image->pi_symcount + nfuncsyms));
762 	if (symptr == image->pi_symbols) /* realloc() failed. */
763 		return;
764 	image->pi_symbols = symptr;
765 
766 	/*
767 	 * Append new symbols to the end of the current table.
768 	 */
769 	symptr += image->pi_symcount;
770 
771 	for (n = newsyms = 0; n < nshsyms; n++) {
772 		if (gelf_getsym(data, (int) n, &sym) != &sym)
773 			return;
774 		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
775 			continue;
776 
777 		if (!firsttime && pmcstat_symbol_search(image, sym.st_value))
778 			continue; /* We've seen this symbol already. */
779 
780 		if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name))
781 		    == NULL)
782 			continue;
783 
784 		symptr->ps_name  = pmcstat_string_intern(fnname);
785 		symptr->ps_start = sym.st_value - image->pi_vaddr;
786 		symptr->ps_end   = symptr->ps_start + sym.st_size;
787 		symptr++;
788 
789 		newsyms++;
790 	}
791 
792 	image->pi_symcount += newsyms;
793 
794 	assert(newsyms <= nfuncsyms);
795 
796 	/*
797 	 * Return space to the system if there were duplicates.
798 	 */
799 	if (newsyms < nfuncsyms)
800 		image->pi_symbols = realloc(image->pi_symbols,
801 		    sizeof(*symptr) * image->pi_symcount);
802 
803 	/*
804 	 * Keep the list of symbols sorted.
805 	 */
806 	qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr),
807 	    pmcstat_symbol_compare);
808 
809 	/*
810 	 * Deal with function symbols that have a size of 'zero' by
811 	 * making them extend to the next higher address.  These
812 	 * symbols are usually defined in assembly code.
813 	 */
814 	for (symptr = image->pi_symbols;
815 	     symptr < image->pi_symbols + (image->pi_symcount - 1);
816 	     symptr++)
817 		if (symptr->ps_start == symptr->ps_end)
818 			symptr->ps_end = (symptr+1)->ps_start;
819 }
820 
821 /*
822  * Examine an ELF file to determine the size of its text segment.
823  * Sets image->pi_type if anything conclusive can be determined about
824  * this image.
825  */
826 
827 static void
828 pmcstat_image_get_elf_params(struct pmcstat_image *image,
829     struct pmcstat_args *a)
830 {
831 	int fd;
832 	size_t i, nph, nsh;
833 	const char *path, *elfbase;
834 	uintfptr_t minva, maxva;
835 	Elf *e;
836 	Elf_Scn *scn;
837 	GElf_Ehdr eh;
838 	GElf_Phdr ph;
839 	GElf_Shdr sh;
840 	enum pmcstat_image_type image_type;
841 	char buffer[PATH_MAX];
842 
843 	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
844 
845 	image->pi_start = minva = ~(uintfptr_t) 0;
846 	image->pi_end = maxva = (uintfptr_t) 0;
847 	image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE;
848 	image->pi_isdynamic = 0;
849 	image->pi_dynlinkerpath = NULL;
850 	image->pi_vaddr = 0;
851 
852 	path = pmcstat_string_unintern(image->pi_execpath);
853 	assert(path != NULL);
854 
855 	/*
856 	 * Look for kernel modules under FSROOT/KERNELPATH/NAME,
857 	 * and user mode executable objects under FSROOT/PATHNAME.
858 	 */
859 	if (image->pi_iskernelmodule)
860 		(void) snprintf(buffer, sizeof(buffer), "%s%s/%s",
861 		    a->pa_fsroot, a->pa_kernel, path);
862 	else
863 		(void) snprintf(buffer, sizeof(buffer), "%s%s",
864 		    a->pa_fsroot, path);
865 
866 	e = NULL;
867 	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
868 	    (e = elf_begin(fd, ELF_C_READ, NULL)) == NULL ||
869 	    (elf_kind(e) != ELF_K_ELF)) {
870 		warnx("WARNING: Cannot determine the type of \"%s\".",
871 		    buffer);
872 		goto done;
873 	}
874 
875 	if (gelf_getehdr(e, &eh) != &eh) {
876 		warnx("WARNING: Cannot retrieve the ELF Header for "
877 		    "\"%s\": %s.", buffer, elf_errmsg(-1));
878 		goto done;
879 	}
880 
881 	if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN &&
882 	    !(image->pi_iskernelmodule && eh.e_type == ET_REL)) {
883 		warnx("WARNING: \"%s\" is of an unsupported ELF type.",
884 		    buffer);
885 		goto done;
886 	}
887 
888 	image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ?
889 	    PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64;
890 
891 	/*
892 	 * Determine the virtual address where an executable would be
893 	 * loaded.  Additionally, for dynamically linked executables,
894 	 * save the pathname to the runtime linker.
895 	 */
896 	if (eh.e_type == ET_EXEC) {
897 		if (elf_getphnum(e, &nph) == 0) {
898 			warnx("WARNING: Could not determine the number of "
899 			    "program headers in \"%s\": %s.", buffer,
900 			    elf_errmsg(-1));
901 			goto done;
902 		}
903 		for (i = 0; i < eh.e_phnum; i++) {
904 			if (gelf_getphdr(e, i, &ph) != &ph) {
905 				warnx("WARNING: Retrieval of PHDR entry #%ju "
906 				    "in \"%s\" failed: %s.", (uintmax_t) i,
907 				    buffer, elf_errmsg(-1));
908 				goto done;
909 			}
910 			switch (ph.p_type) {
911 			case PT_DYNAMIC:
912 				image->pi_isdynamic = 1;
913 				break;
914 			case PT_INTERP:
915 				if ((elfbase = elf_rawfile(e, NULL)) == NULL) {
916 					warnx("WARNING: Cannot retrieve the "
917 					    "interpreter for \"%s\": %s.",
918 					    buffer, elf_errmsg(-1));
919 					goto done;
920 				}
921 				image->pi_dynlinkerpath =
922 				    pmcstat_string_intern(elfbase +
923 					ph.p_offset);
924 				break;
925 			case PT_LOAD:
926 				if (ph.p_offset == 0)
927 					image->pi_vaddr = ph.p_vaddr;
928 				break;
929 			}
930 		}
931 	}
932 
933 	/*
934 	 * Get the min and max VA associated with this ELF object.
935 	 */
936 	if (elf_getshnum(e, &nsh) == 0) {
937 		warnx("WARNING: Could not determine the number of sections "
938 		    "for \"%s\": %s.", buffer, elf_errmsg(-1));
939 		goto done;
940 	}
941 
942 	for (i = 0; i < nsh; i++) {
943 		if ((scn = elf_getscn(e, i)) == NULL ||
944 		    gelf_getshdr(scn, &sh) != &sh) {
945 			warnx("WARNING: Could not retrieve section header "
946 			    "#%ju in \"%s\": %s.", (uintmax_t) i, buffer,
947 			    elf_errmsg(-1));
948 			goto done;
949 		}
950 		if (sh.sh_flags & SHF_EXECINSTR) {
951 			minva = min(minva, sh.sh_addr);
952 			maxva = max(maxva, sh.sh_addr + sh.sh_size);
953 		}
954 		if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM)
955 			pmcstat_image_add_symbols(image, e, scn, &sh);
956 	}
957 
958 	image->pi_start = minva;
959 	image->pi_end   = maxva;
960 	image->pi_type  = image_type;
961 	image->pi_fullpath = pmcstat_string_intern(buffer);
962 
963  done:
964 	(void) elf_end(e);
965 	if (fd >= 0)
966 		(void) close(fd);
967 	return;
968 }
969 
970 /*
971  * Given an image descriptor, determine whether it is an ELF, or AOUT.
972  * If no handler claims the image, set its type to 'INDETERMINABLE'.
973  */
974 
975 static void
976 pmcstat_image_determine_type(struct pmcstat_image *image,
977     struct pmcstat_args *a)
978 {
979 	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
980 
981 	/* Try each kind of handler in turn */
982 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
983 		pmcstat_image_get_elf_params(image, a);
984 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
985 		pmcstat_image_get_aout_params(image, a);
986 
987 	/*
988 	 * Otherwise, remember that we tried to determine
989 	 * the object's type and had failed.
990 	 */
991 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
992 		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
993 }
994 
995 /*
996  * Locate an image descriptor given an interned path, adding a fresh
997  * descriptor to the cache if necessary.  This function also finds a
998  * suitable name for this image's sample file.
999  *
1000  * We defer filling in the file format specific parts of the image
1001  * structure till the time we actually see a sample that would fall
1002  * into this image.
1003  */
1004 
1005 static struct pmcstat_image *
1006 pmcstat_image_from_path(pmcstat_interned_string internedpath,
1007     int iskernelmodule)
1008 {
1009 	int count, hash, nlen;
1010 	struct pmcstat_image *pi;
1011 	char *sn;
1012 	char name[NAME_MAX];
1013 
1014 	hash = pmcstat_string_lookup_hash(internedpath);
1015 
1016 	/* First, look for an existing entry. */
1017 	LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
1018 	    if (pi->pi_execpath == internedpath &&
1019 		  pi->pi_iskernelmodule == iskernelmodule)
1020 		    return (pi);
1021 
1022 	/*
1023 	 * Allocate a new entry and place it at the head of the hash
1024 	 * and LRU lists.
1025 	 */
1026 	pi = malloc(sizeof(*pi));
1027 	if (pi == NULL)
1028 		return (NULL);
1029 
1030 	pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
1031 	pi->pi_execpath = internedpath;
1032 	pi->pi_start = ~0;
1033 	pi->pi_end = 0;
1034 	pi->pi_entry = 0;
1035 	pi->pi_vaddr = 0;
1036 	pi->pi_isdynamic = 0;
1037 	pi->pi_iskernelmodule = iskernelmodule;
1038 	pi->pi_dynlinkerpath = NULL;
1039 	pi->pi_symbols = NULL;
1040 	pi->pi_symcount = 0;
1041 
1042 	/*
1043 	 * Look for a suitable name for the sample files associated
1044 	 * with this image: if `basename(path)`+".gmon" is available,
1045 	 * we use that, otherwise we try iterating through
1046 	 * `basename(path)`+ "~" + NNN + ".gmon" till we get a free
1047 	 * entry.
1048 	 */
1049 	if ((sn = basename(pmcstat_string_unintern(internedpath))) == NULL)
1050 		err(EX_OSERR, "ERROR: Cannot process \"%s\"",
1051 		    pmcstat_string_unintern(internedpath));
1052 
1053 	nlen = strlen(sn);
1054 	nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon")));
1055 
1056 	snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn);
1057 
1058 	/* try use the unabridged name first */
1059 	if (pmcstat_string_lookup(name) == NULL)
1060 		pi->pi_samplename = pmcstat_string_intern(name);
1061 	else {
1062 		/*
1063 		 * Otherwise use a prefix from the original name and
1064 		 * upto 3 digits.
1065 		 */
1066 		nlen = strlen(sn);
1067 		nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon")));
1068 		count = 0;
1069 		do {
1070 			if (++count > 999)
1071 				errx(EX_CANTCREAT, "ERROR: cannot create a "
1072 				    "gmon file for \"%s\"", name);
1073 			snprintf(name, sizeof(name), "%.*s~%3.3d.gmon",
1074 			    nlen, sn, count);
1075 			if (pmcstat_string_lookup(name) == NULL) {
1076 				pi->pi_samplename =
1077 				    pmcstat_string_intern(name);
1078 				count = 0;
1079 			}
1080 		} while (count > 0);
1081 	}
1082 
1083 
1084 	LIST_INIT(&pi->pi_gmlist);
1085 
1086 	LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
1087 
1088 	return (pi);
1089 }
1090 
1091 /*
1092  * Increment the bucket in the gmon.out file corresponding to 'pmcid'
1093  * and 'pc'.
1094  */
1095 
1096 static void
1097 pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc,
1098     pmc_id_t pmcid, struct pmcstat_args *a)
1099 {
1100 	struct pmcstat_image *image;
1101 	struct pmcstat_gmonfile *pgf;
1102 	uintfptr_t bucket;
1103 	HISTCOUNTER *hc;
1104 
1105 	assert(pc >= map->ppm_lowpc && pc < map->ppm_highpc);
1106 
1107 	image = map->ppm_image;
1108 
1109 	/*
1110 	 * If this is the first time we are seeing a sample for
1111 	 * this executable image, try determine its parameters.
1112 	 */
1113 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1114 		pmcstat_image_determine_type(image, a);
1115 
1116 	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1117 
1118 	/* Ignore samples in images that we know nothing about. */
1119 	if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) {
1120 		pmcstat_stats.ps_samples_indeterminable++;
1121 		return;
1122 	}
1123 
1124 	/*
1125 	 * Find the gmon file corresponding to 'pmcid', creating it if
1126 	 * needed.
1127 	 */
1128 	pgf = pmcstat_image_find_gmonfile(image, pmcid);
1129 	if (pgf == NULL) {
1130 		if ((pgf = calloc(1, sizeof(*pgf))) == NULL)
1131 			err(EX_OSERR, "ERROR:");
1132 
1133 		pgf->pgf_gmondata = NULL;	/* mark as unmapped */
1134 		pgf->pgf_name = pmcstat_gmon_create_name(a->pa_samplesdir,
1135 		    image, pmcid);
1136 		pgf->pgf_pmcid = pmcid;
1137 		assert(image->pi_end > image->pi_start);
1138 		pgf->pgf_nbuckets = (image->pi_end - image->pi_start) /
1139 		    FUNCTION_ALIGNMENT;	/* see <machine/profile.h> */
1140 		pgf->pgf_ndatabytes = sizeof(struct gmonhdr) +
1141 		    pgf->pgf_nbuckets * sizeof(HISTCOUNTER);
1142 		pgf->pgf_nsamples = 0;
1143 		pgf->pgf_file = NULL;
1144 
1145 		pmcstat_gmon_create_file(pgf, image);
1146 
1147 		LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next);
1148 	}
1149 
1150 	/*
1151 	 * Map the gmon file in if needed.  It may have been mapped
1152 	 * out under memory pressure.
1153 	 */
1154 	if (pgf->pgf_gmondata == NULL)
1155 		pmcstat_gmon_map_file(pgf);
1156 
1157 	assert(pgf->pgf_gmondata != NULL);
1158 
1159 	/*
1160 	 *
1161 	 */
1162 
1163 	bucket = (pc - map->ppm_lowpc) / FUNCTION_ALIGNMENT;
1164 
1165 	assert(bucket < pgf->pgf_nbuckets);
1166 
1167 	hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
1168 	    sizeof(struct gmonhdr));
1169 
1170 	/* saturating add */
1171 	if (hc[bucket] < 0xFFFFU)  /* XXX tie this to sizeof(HISTCOUNTER) */
1172 		hc[bucket]++;
1173 	else /* mark that an overflow occurred */
1174 		pgf->pgf_overflow = 1;
1175 
1176 	pgf->pgf_nsamples++;
1177 }
1178 
1179 /*
1180  * Record the fact that PC values from 'start' to 'end' come from
1181  * image 'image'.
1182  */
1183 
1184 static void
1185 pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
1186     uintfptr_t start)
1187 {
1188 	struct pmcstat_pcmap *pcm, *pcmnew;
1189 	uintfptr_t offset;
1190 
1191 	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
1192 	    image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);
1193 
1194 	if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
1195 		err(EX_OSERR, "ERROR: Cannot create a map entry");
1196 
1197 	/*
1198 	 * Adjust the map entry to only cover the text portion
1199 	 * of the object.
1200 	 */
1201 
1202 	offset = start - image->pi_vaddr;
1203 	pcmnew->ppm_lowpc  = image->pi_start + offset;
1204 	pcmnew->ppm_highpc = image->pi_end + offset;
1205 	pcmnew->ppm_image  = image;
1206 
1207 	assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);
1208 
1209 	/* Overlapped mmap()'s are assumed to never occur. */
1210 	TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
1211 	    if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
1212 		    break;
1213 
1214 	if (pcm == NULL)
1215 		TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
1216 	else
1217 		TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
1218 }
1219 
1220 /*
1221  * Unmap images in the range [start..end) associated with process
1222  * 'pp'.
1223  */
1224 
1225 static void
1226 pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
1227     uintfptr_t end)
1228 {
1229 	struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew;
1230 
1231 	assert(pp != NULL);
1232 	assert(start < end);
1233 
1234 	/*
1235 	 * Cases:
1236 	 * - we could have the range completely in the middle of an
1237 	 *   existing pcmap; in this case we have to split the pcmap
1238 	 *   structure into two (i.e., generate a 'hole').
1239 	 * - we could have the range covering multiple pcmaps; these
1240 	 *   will have to be removed.
1241 	 * - we could have either 'start' or 'end' falling in the
1242 	 *   middle of a pcmap; in this case shorten the entry.
1243 	 */
1244 	TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
1245 		assert(pcm->ppm_lowpc < pcm->ppm_highpc);
1246 		if (pcm->ppm_highpc <= start)
1247 			continue;
1248 		if (pcm->ppm_lowpc >= end)
1249 			return;
1250 		if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
1251 			/*
1252 			 * The current pcmap is completely inside the
1253 			 * unmapped range: remove it entirely.
1254 			 */
1255 			TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next);
1256 			free(pcm);
1257 		} else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) {
1258 			/*
1259 			 * Split this pcmap into two; curtail the
1260 			 * current map to end at [start-1], and start
1261 			 * the new one at [end].
1262 			 */
1263 			if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
1264 				err(EX_OSERR, "ERROR: Cannot split a map "
1265 				    "entry");
1266 
1267 			pcmnew->ppm_image = pcm->ppm_image;
1268 
1269 			pcmnew->ppm_lowpc = end;
1270 			pcmnew->ppm_highpc = pcm->ppm_highpc;
1271 
1272 			pcm->ppm_highpc = start;
1273 
1274 			TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
1275 
1276 			return;
1277 		} else if (pcm->ppm_lowpc < start && pcm->ppm_highpc <= end)
1278 			pcm->ppm_highpc = start;
1279 		else if (pcm->ppm_lowpc >= start && pcm->ppm_highpc > end)
1280 			pcm->ppm_lowpc = end;
1281 		else
1282 			assert(0);
1283 	}
1284 }
1285 
1286 /*
1287  * Add a {pmcid,name} mapping.
1288  */
1289 
1290 static void
1291 pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps,
1292     struct pmcstat_args *a)
1293 {
1294 	struct pmcstat_pmcrecord *pr;
1295 	struct stat st;
1296 	char fullpath[PATH_MAX];
1297 
1298 	/* Replace an existing name for the PMC. */
1299 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1300 	    if (pr->pr_pmcid == pmcid) {
1301 		    pr->pr_pmcname = ps;
1302 		    return;
1303 	    }
1304 
1305 	/*
1306 	 * Otherwise, allocate a new descriptor and create the
1307 	 * appropriate directory to hold gmon.out files.
1308 	 */
1309 	if ((pr = malloc(sizeof(*pr))) == NULL)
1310 		err(EX_OSERR, "ERROR: Cannot allocate pmc record");
1311 
1312 	pr->pr_pmcid = pmcid;
1313 	pr->pr_pmcname = ps;
1314 	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1315 
1316 	(void) snprintf(fullpath, sizeof(fullpath), "%s/%s", a->pa_samplesdir,
1317 	    pmcstat_string_unintern(ps));
1318 
1319 	/* If the path name exists, it should be a directory */
1320 	if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode))
1321 		return;
1322 
1323 	if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0)
1324 		err(EX_OSERR, "ERROR: Cannot create directory \"%s\"",
1325 		    fullpath);
1326 }
1327 
1328 /*
1329  * Given a pmcid in use, find its human-readable name.
1330  */
1331 
1332 static const char *
1333 pmcstat_pmcid_to_name(pmc_id_t pmcid)
1334 {
1335 	struct pmcstat_pmcrecord *pr;
1336 	char fullpath[PATH_MAX];
1337 
1338 	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1339 	    if (pr->pr_pmcid == pmcid)
1340 		    return (pmcstat_string_unintern(pr->pr_pmcname));
1341 
1342 	/* create a default name and add this entry */
1343 	if ((pr = malloc(sizeof(*pr))) == NULL)
1344 		err(EX_OSERR, "ERROR: ");
1345 	pr->pr_pmcid = pmcid;
1346 
1347 	(void) snprintf(fullpath, sizeof(fullpath), "%X", (unsigned int) pmcid);
1348 	pr->pr_pmcname = pmcstat_string_intern(fullpath);
1349 
1350 	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1351 
1352 	return (pmcstat_string_unintern(pr->pr_pmcname));
1353 }
1354 
1355 /*
1356  * Associate an AOUT image with a process.
1357  */
1358 
1359 static void
1360 pmcstat_process_aout_exec(struct pmcstat_process *pp,
1361     struct pmcstat_image *image, uintfptr_t entryaddr,
1362     struct pmcstat_args *a)
1363 {
1364 	(void) pp;
1365 	(void) image;
1366 	(void) entryaddr;
1367 	(void) a;
1368 	/* TODO Implement a.out handling */
1369 }
1370 
1371 /*
1372  * Associate an ELF image with a process.
1373  */
1374 
1375 static void
1376 pmcstat_process_elf_exec(struct pmcstat_process *pp,
1377     struct pmcstat_image *image, uintfptr_t entryaddr,
1378     struct pmcstat_args *a)
1379 {
1380 	uintmax_t libstart;
1381 	struct pmcstat_image *rtldimage;
1382 
1383 	assert(image->pi_type == PMCSTAT_IMAGE_ELF32 ||
1384 	    image->pi_type == PMCSTAT_IMAGE_ELF64);
1385 
1386 	/* Create a map entry for the base executable. */
1387 	pmcstat_image_link(pp, image, image->pi_vaddr);
1388 
1389 	/*
1390 	 * For dynamically linked executables we need to determine
1391 	 * where the dynamic linker was mapped to for this process,
1392 	 * Subsequent executable objects that are mapped in by the
1393 	 * dynamic linker will be tracked by log events of type
1394 	 * PMCLOG_TYPE_MAP_IN.
1395 	 */
1396 
1397 	if (image->pi_isdynamic) {
1398 
1399 		/*
1400 		 * The runtime loader gets loaded just after the maximum
1401 		 * possible heap address.  Like so:
1402 		 *
1403 		 * [  TEXT DATA BSS HEAP -->*RTLD  SHLIBS   <--STACK]
1404 		 * ^					            ^
1405 		 * 0				   VM_MAXUSER_ADDRESS
1406 
1407 		 *
1408 		 * The exact address where the loader gets mapped in
1409 		 * will vary according to the size of the executable
1410 		 * and the limits on the size of the process'es data
1411 		 * segment at the time of exec().  The entry address
1412 		 * recorded at process exec time corresponds to the
1413 		 * 'start' address inside the dynamic linker.  From
1414 		 * this we can figure out the address where the
1415 		 * runtime loader's file object had been mapped to.
1416 		 */
1417 		rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath,
1418 		    0);
1419 		if (rtldimage == NULL) {
1420 			warnx("WARNING: Cannot find image for \"%s\".",
1421 			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1422 			pmcstat_stats.ps_exec_errors++;
1423 			return;
1424 		}
1425 
1426 		if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1427 			pmcstat_image_get_elf_params(rtldimage, a);
1428 
1429 		if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 &&
1430 		    rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) {
1431 			warnx("WARNING: rtld not an ELF object \"%s\".",
1432 			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1433 			return;
1434 		}
1435 
1436 		libstart = entryaddr - rtldimage->pi_entry;
1437 		pmcstat_image_link(pp, rtldimage, libstart);
1438 	}
1439 }
1440 
1441 /*
1442  * Find the process descriptor corresponding to a PID.  If 'allocate'
1443  * is zero, we return a NULL if a pid descriptor could not be found or
1444  * a process descriptor process.  If 'allocate' is non-zero, then we
1445  * will attempt to allocate a fresh process descriptor.  Zombie
1446  * process descriptors are only removed if a fresh allocation for the
1447  * same PID is requested.
1448  */
1449 
1450 static struct pmcstat_process *
1451 pmcstat_process_lookup(pid_t pid, int allocate)
1452 {
1453 	uint32_t hash;
1454 	struct pmcstat_pcmap *ppm, *ppmtmp;
1455 	struct pmcstat_process *pp, *pptmp;
1456 
1457 	hash = (uint32_t) pid & PMCSTAT_HASH_MASK;	/* simplicity wins */
1458 
1459 	LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp)
1460 	    if (pp->pp_pid == pid) {
1461 		    /* Found a descriptor, check and process zombies */
1462 		    if (allocate && pp->pp_isactive == 0) {
1463 			    /* remove maps */
1464 			    TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next,
1465 				ppmtmp) {
1466 				    TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1467 				    free(ppm);
1468 			    }
1469 			    /* remove process entry */
1470 			    LIST_REMOVE(pp, pp_next);
1471 			    free(pp);
1472 			    break;
1473 		    }
1474 		    return (pp);
1475 	    }
1476 
1477 	if (!allocate)
1478 		return (NULL);
1479 
1480 	if ((pp = malloc(sizeof(*pp))) == NULL)
1481 		err(EX_OSERR, "ERROR: Cannot allocate pid descriptor");
1482 
1483 	pp->pp_pid = pid;
1484 	pp->pp_isactive = 1;
1485 
1486 	TAILQ_INIT(&pp->pp_map);
1487 
1488 	LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next);
1489 	return (pp);
1490 }
1491 
1492 /*
1493  * Associate an image and a process.
1494  */
1495 
1496 static void
1497 pmcstat_process_exec(struct pmcstat_process *pp,
1498     pmcstat_interned_string path, uintfptr_t entryaddr,
1499     struct pmcstat_args *a)
1500 {
1501 	struct pmcstat_image *image;
1502 
1503 	if ((image = pmcstat_image_from_path(path, 0)) == NULL) {
1504 		pmcstat_stats.ps_exec_errors++;
1505 		return;
1506 	}
1507 
1508 	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1509 		pmcstat_image_determine_type(image, a);
1510 
1511 	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1512 
1513 	switch (image->pi_type) {
1514 	case PMCSTAT_IMAGE_ELF32:
1515 	case PMCSTAT_IMAGE_ELF64:
1516 		pmcstat_stats.ps_exec_elf++;
1517 		pmcstat_process_elf_exec(pp, image, entryaddr, a);
1518 		break;
1519 
1520 	case PMCSTAT_IMAGE_AOUT:
1521 		pmcstat_stats.ps_exec_aout++;
1522 		pmcstat_process_aout_exec(pp, image, entryaddr, a);
1523 		break;
1524 
1525 	case PMCSTAT_IMAGE_INDETERMINABLE:
1526 		pmcstat_stats.ps_exec_indeterminable++;
1527 		break;
1528 
1529 	default:
1530 		err(EX_SOFTWARE, "ERROR: Unsupported executable type for "
1531 		    "\"%s\"", pmcstat_string_unintern(path));
1532 	}
1533 }
1534 
1535 
1536 /*
1537  * Find the map entry associated with process 'p' at PC value 'pc'.
1538  */
1539 
1540 static struct pmcstat_pcmap *
1541 pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc)
1542 {
1543 	struct pmcstat_pcmap *ppm;
1544 
1545 	TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) {
1546 		if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc)
1547 			return (ppm);
1548 		if (pc < ppm->ppm_lowpc)
1549 			return (NULL);
1550 	}
1551 
1552 	return (NULL);
1553 }
1554 
1555 static struct pmcstat_cgnode *
1556 pmcstat_cgnode_allocate(struct pmcstat_image *image, uintfptr_t pc)
1557 {
1558 	struct pmcstat_cgnode *cg;
1559 
1560 	if ((cg = malloc(sizeof(*cg))) == NULL)
1561 		err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
1562 
1563 	cg->pcg_image = image;
1564 	cg->pcg_func = pc;
1565 
1566 	cg->pcg_count = 0;
1567 	cg->pcg_nchildren = 0;
1568 	LIST_INIT(&cg->pcg_children);
1569 
1570 	return (cg);
1571 }
1572 
1573 /*
1574  * Free a node and its children.
1575  */
1576 static void
1577 pmcstat_cgnode_free(struct pmcstat_cgnode *cg)
1578 {
1579 	struct pmcstat_cgnode *cgc, *cgtmp;
1580 
1581 	LIST_FOREACH_SAFE(cgc, &cg->pcg_children, pcg_sibling, cgtmp)
1582 		pmcstat_cgnode_free(cgc);
1583 	free(cg);
1584 }
1585 
1586 /*
1587  * Look for a callgraph node associated with pmc `pmcid' in the global
1588  * hash table that corresponds to the given `pc' value in the process
1589  * `pp'.
1590  */
1591 static struct pmcstat_cgnode *
1592 pmcstat_cgnode_hash_lookup_pc(struct pmcstat_process *pp, uint32_t pmcid,
1593     uintfptr_t pc, int usermode)
1594 {
1595 	struct pmcstat_pcmap *ppm;
1596 	struct pmcstat_symbol *sym;
1597 	struct pmcstat_image *image;
1598 	struct pmcstat_cgnode *cg;
1599 	struct pmcstat_cgnode_hash *h;
1600 	uintfptr_t loadaddress;
1601 	unsigned int i, hash;
1602 
1603 	ppm = pmcstat_process_find_map(usermode ? pp : pmcstat_kernproc, pc);
1604 	if (ppm == NULL)
1605 		return (NULL);
1606 
1607 	image = ppm->ppm_image;
1608 
1609 	loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start;
1610 	pc -= loadaddress;	/* Convert to an offset in the image. */
1611 
1612 	/*
1613 	 * Try determine the function at this offset.  If we can't
1614 	 * find a function round leave the `pc' value alone.
1615 	 */
1616 	if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
1617 		pc = sym->ps_start;
1618 
1619 	for (hash = i = 0; i < sizeof(uintfptr_t); i++)
1620 		hash += (pc >> i) & 0xFF;
1621 
1622 	hash &= PMCSTAT_HASH_MASK;
1623 
1624 	cg = NULL;
1625 	LIST_FOREACH(h, &pmcstat_cgnode_hash[hash], pch_next)
1626 	{
1627 		if (h->pch_pmcid != pmcid)
1628 			continue;
1629 
1630 		cg = h->pch_cgnode;
1631 
1632 		assert(cg != NULL);
1633 
1634 		if (cg->pcg_image == image && cg->pcg_func == pc)
1635 			return (cg);
1636 	}
1637 
1638 	/*
1639 	 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
1640 	 * new callgraph node and a new hash table entry for it.
1641 	 */
1642 	cg = pmcstat_cgnode_allocate(image, pc);
1643 	if ((h = malloc(sizeof(*h))) == NULL)
1644 		err(EX_OSERR, "ERROR: Could not allocate callgraph node");
1645 
1646 	h->pch_pmcid = pmcid;
1647 	h->pch_cgnode = cg;
1648 	LIST_INSERT_HEAD(&pmcstat_cgnode_hash[hash], h, pch_next);
1649 
1650 	pmcstat_cgnode_hash_count++;
1651 
1652 	return (cg);
1653 }
1654 
1655 /*
1656  * Compare two callgraph nodes for sorting.
1657  */
1658 static int
1659 pmcstat_cgnode_compare(const void *a, const void *b)
1660 {
1661 	const struct pmcstat_cgnode *const *pcg1, *const *pcg2, *cg1, *cg2;
1662 
1663 	pcg1 = (const struct pmcstat_cgnode *const *) a;
1664 	cg1 = *pcg1;
1665 	pcg2 = (const struct pmcstat_cgnode *const *) b;
1666 	cg2 = *pcg2;
1667 
1668 	/* Sort in reverse order */
1669 	if (cg1->pcg_count < cg2->pcg_count)
1670 		return (1);
1671 	if (cg1->pcg_count > cg2->pcg_count)
1672 		return (-1);
1673 	return (0);
1674 }
1675 
1676 /*
1677  * Find (allocating if a needed) a callgraph node in the given
1678  * parent with the same (image, pcoffset) pair.
1679  */
1680 
1681 static struct pmcstat_cgnode *
1682 pmcstat_cgnode_find(struct pmcstat_cgnode *parent, struct pmcstat_image *image,
1683     uintfptr_t pcoffset)
1684 {
1685 	struct pmcstat_cgnode *child;
1686 
1687 	LIST_FOREACH(child, &parent->pcg_children, pcg_sibling) {
1688 		if (child->pcg_image == image &&
1689 		    child->pcg_func == pcoffset)
1690 			return (child);
1691 	}
1692 
1693 	/*
1694 	 * Allocate a new structure.
1695 	 */
1696 
1697 	child = pmcstat_cgnode_allocate(image, pcoffset);
1698 
1699 	/*
1700 	 * Link it into the parent.
1701 	 */
1702 	LIST_INSERT_HEAD(&parent->pcg_children, child, pcg_sibling);
1703 	parent->pcg_nchildren++;
1704 
1705 	return (child);
1706 }
1707 
1708 /*
1709  * Print one callgraph node.  The output format is:
1710  *
1711  * indentation %(parent's samples) #nsamples function@object
1712  */
1713 static void
1714 pmcstat_cgnode_print(struct pmcstat_args *a, struct pmcstat_cgnode *cg,
1715     int depth, uint32_t total)
1716 {
1717 	uint32_t n;
1718 	const char *space;
1719 	struct pmcstat_symbol *sym;
1720 	struct pmcstat_cgnode **sortbuffer, **cgn, *pcg;
1721 
1722 	space = " ";
1723 
1724 	if (depth > 0)
1725 		(void) fprintf(a->pa_graphfile, "%*s", depth, space);
1726 
1727 	if (cg->pcg_count == total)
1728 		(void) fprintf(a->pa_graphfile, "100.0%% ");
1729 	else
1730 		(void) fprintf(a->pa_graphfile, "%05.2f%% ",
1731 		    100.0 * cg->pcg_count / total);
1732 
1733 	n = fprintf(a->pa_graphfile, " [%u] ", cg->pcg_count);
1734 
1735 	/* #samples is a 12 character wide field. */
1736 	if (n < 12)
1737 		(void) fprintf(a->pa_graphfile, "%*s", 12 - n, space);
1738 
1739 	if (depth > 0)
1740 		(void) fprintf(a->pa_graphfile, "%*s", depth, space);
1741 
1742 	sym = pmcstat_symbol_search(cg->pcg_image, cg->pcg_func);
1743 	if (sym)
1744 		(void) fprintf(a->pa_graphfile, "%s",
1745 		    pmcstat_string_unintern(sym->ps_name));
1746 	else
1747 		(void) fprintf(a->pa_graphfile, "%p",
1748 		    (void *) (cg->pcg_image->pi_vaddr + cg->pcg_func));
1749 
1750 	if (pmcstat_previous_filename_printed !=
1751 	    cg->pcg_image->pi_fullpath) {
1752 		pmcstat_previous_filename_printed = cg->pcg_image->pi_fullpath;
1753 		(void) fprintf(a->pa_graphfile, " @ %s\n",
1754 		    pmcstat_string_unintern(
1755 		    pmcstat_previous_filename_printed));
1756 	} else
1757 		(void) fprintf(a->pa_graphfile, "\n");
1758 
1759 	if (cg->pcg_nchildren == 0)
1760 		return;
1761 
1762 	if ((sortbuffer = (struct pmcstat_cgnode **)
1763 		malloc(sizeof(struct pmcstat_cgnode *) *
1764 		    cg->pcg_nchildren)) == NULL)
1765 		err(EX_OSERR, "ERROR: Cannot print callgraph");
1766 	cgn = sortbuffer;
1767 
1768 	LIST_FOREACH(pcg, &cg->pcg_children, pcg_sibling)
1769 	    *cgn++ = pcg;
1770 
1771 	assert(cgn - sortbuffer == (int) cg->pcg_nchildren);
1772 
1773 	qsort(sortbuffer, cg->pcg_nchildren, sizeof(struct pmcstat_cgnode *),
1774 	    pmcstat_cgnode_compare);
1775 
1776 	for (cgn = sortbuffer, n = 0; n < cg->pcg_nchildren; n++, cgn++)
1777 		pmcstat_cgnode_print(a, *cgn, depth+1, cg->pcg_count);
1778 
1779 	free(sortbuffer);
1780 }
1781 
1782 /*
1783  * Record a callchain.
1784  */
1785 
1786 static void
1787 pmcstat_record_callchain(struct pmcstat_process *pp, uint32_t pmcid,
1788     uint32_t nsamples, uintfptr_t *cc, int usermode, struct pmcstat_args *a)
1789 {
1790 	uintfptr_t pc, loadaddress;
1791 	uint32_t n;
1792 	struct pmcstat_image *image;
1793 	struct pmcstat_pcmap *ppm;
1794 	struct pmcstat_symbol *sym;
1795 	struct pmcstat_cgnode *parent, *child;
1796 
1797 	/*
1798 	 * Find the callgraph node recorded in the global hash table
1799 	 * for this (pmcid, pc).
1800 	 */
1801 
1802 	pc = cc[0];
1803 	parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode);
1804 	if (parent == NULL) {
1805 		pmcstat_stats.ps_callchain_dubious_frames++;
1806 		return;
1807 	}
1808 
1809 	parent->pcg_count++;
1810 
1811 	/*
1812 	 * For each return address in the call chain record, subject
1813 	 * to the maximum depth desired.
1814 	 * - Find the image associated with the sample.  Stop if there
1815 	 *   there is no valid image at that address.
1816 	 * - Find the function that overlaps the return address.
1817 	 * - If found: use the start address of the function.
1818 	 *   If not found (say an object's symbol table is not present or
1819 	 *   is incomplete), round down to th gprof bucket granularity.
1820 	 * - Convert return virtual address to an offset in the image.
1821 	 * - Look for a child with the same {offset,image} tuple,
1822 	 *   inserting one if needed.
1823 	 * - Increment the count of occurrences of the child.
1824 	 */
1825 
1826 	for (n = 1; n < (uint32_t) a->pa_graphdepth && n < nsamples; n++,
1827 	    parent = child) {
1828 		pc = cc[n];
1829 
1830 		ppm = pmcstat_process_find_map(usermode ? pp :
1831 		    pmcstat_kernproc, pc);
1832 		if (ppm == NULL)
1833 			return;
1834 
1835 		image = ppm->ppm_image;
1836 		loadaddress = ppm->ppm_lowpc + image->pi_vaddr -
1837 		    image->pi_start;
1838 		pc -= loadaddress;
1839 
1840 		if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
1841 			pc = sym->ps_start;
1842 
1843 		child = pmcstat_cgnode_find(parent, image, pc);
1844 		child->pcg_count++;
1845 	}
1846 }
1847 
1848 /*
1849  * Printing a callgraph for a PMC.
1850  */
1851 static void
1852 pmcstat_callgraph_print_for_pmcid(struct pmcstat_args *a,
1853     struct pmcstat_pmcrecord *pmcr)
1854 {
1855 	int n, nentries;
1856 	uint32_t nsamples, pmcid;
1857 	struct pmcstat_cgnode **sortbuffer, **cgn;
1858 	struct pmcstat_cgnode_hash *pch;
1859 
1860 	/*
1861 	 * We pull out all callgraph nodes in the top-level hash table
1862 	 * with a matching PMC id.  We then sort these based on the
1863 	 * frequency of occurrence.  Each callgraph node is then
1864 	 * printed.
1865 	 */
1866 
1867 	nsamples = 0;
1868 	pmcid = pmcr->pr_pmcid;
1869 	if ((sortbuffer = (struct pmcstat_cgnode **)
1870 	    malloc(sizeof(struct pmcstat_cgnode *) *
1871 	    pmcstat_cgnode_hash_count)) == NULL)
1872 		err(EX_OSERR, "ERROR: Cannot sort callgraph");
1873 	cgn = sortbuffer;
1874 
1875 	memset(sortbuffer, 0xFF, pmcstat_cgnode_hash_count *
1876 	    sizeof(struct pmcstat_cgnode **));
1877 
1878 	for (n = 0; n < PMCSTAT_NHASH; n++)
1879 		LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next)
1880 		    if (pch->pch_pmcid == pmcid) {
1881 			    nsamples += pch->pch_cgnode->pcg_count;
1882 			    *cgn++ = pch->pch_cgnode;
1883 		    }
1884 
1885 	nentries = cgn - sortbuffer;
1886 	assert(nentries <= pmcstat_cgnode_hash_count);
1887 
1888 	if (nentries == 0)
1889 		return;
1890 
1891 	qsort(sortbuffer, nentries, sizeof(struct pmcstat_cgnode *),
1892 	    pmcstat_cgnode_compare);
1893 
1894 	(void) fprintf(a->pa_graphfile,
1895 	    "@ %s [%u samples]\n\n",
1896 	    pmcstat_string_unintern(pmcr->pr_pmcname),
1897 	    nsamples);
1898 
1899 	for (cgn = sortbuffer, n = 0; n < nentries; n++, cgn++) {
1900 		pmcstat_previous_filename_printed = NULL;
1901 		pmcstat_cgnode_print(a, *cgn, 0, nsamples);
1902 		(void) fprintf(a->pa_graphfile, "\n");
1903 	}
1904 
1905 	free(sortbuffer);
1906 }
1907 
1908 /*
1909  * Print out callgraphs.
1910  */
1911 
1912 static void
1913 pmcstat_callgraph_print(struct pmcstat_args *a)
1914 {
1915 	struct pmcstat_pmcrecord *pmcr;
1916 
1917 	LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next)
1918 	    pmcstat_callgraph_print_for_pmcid(a, pmcr);
1919 }
1920 
1921 static void
1922 pmcstat_cgnode_do_gmon_arcs(struct pmcstat_cgnode *cg, pmc_id_t pmcid)
1923 {
1924 	struct pmcstat_cgnode *cgc;
1925 
1926 	/*
1927 	 * Look for child nodes that belong to the same image.
1928 	 */
1929 
1930 	LIST_FOREACH(cgc, &cg->pcg_children, pcg_sibling) {
1931 		if (cgc->pcg_image == cg->pcg_image)
1932 			pmcstat_gmon_append_arc(cg->pcg_image, pmcid,
1933 			    cgc->pcg_func, cg->pcg_func, cgc->pcg_count);
1934 		if (cgc->pcg_nchildren > 0)
1935 			pmcstat_cgnode_do_gmon_arcs(cgc, pmcid);
1936 	}
1937 }
1938 
1939 static void
1940 pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmc_id_t pmcid)
1941 {
1942 	int n;
1943 	struct pmcstat_cgnode_hash *pch;
1944 
1945 	for (n = 0; n < PMCSTAT_NHASH; n++)
1946 		LIST_FOREACH(pch, &pmcstat_cgnode_hash[n], pch_next)
1947 			if (pch->pch_pmcid == pmcid &&
1948 			    pch->pch_cgnode->pcg_nchildren > 1)
1949 				pmcstat_cgnode_do_gmon_arcs(pch->pch_cgnode,
1950 				    pmcid);
1951 }
1952 
1953 
1954 static void
1955 pmcstat_callgraph_do_gmon_arcs(void)
1956 {
1957 	struct pmcstat_pmcrecord *pmcr;
1958 
1959 	LIST_FOREACH(pmcr, &pmcstat_pmcs, pr_next)
1960 		pmcstat_callgraph_do_gmon_arcs_for_pmcid(pmcr->pr_pmcid);
1961 }
1962 
1963 /*
1964  * Convert a hwpmc(4) log to profile information.  A system-wide
1965  * callgraph is generated if FLAG_DO_CALLGRAPHS is set.  gmon.out
1966  * files usable by gprof(1) are created if FLAG_DO_GPROF is set.
1967  */
1968 static int
1969 pmcstat_analyze_log(struct pmcstat_args *a)
1970 {
1971 	uint32_t cpu, cpuflags;
1972 	uintfptr_t pc;
1973 	pid_t pid;
1974 	struct pmcstat_image *image;
1975 	struct pmcstat_process *pp, *ppnew;
1976 	struct pmcstat_pcmap *ppm, *ppmtmp;
1977 	struct pmclog_ev ev;
1978 	pmcstat_interned_string image_path;
1979 
1980 	assert(a->pa_flags & FLAG_DO_ANALYSIS);
1981 
1982 	if (elf_version(EV_CURRENT) == EV_NONE)
1983 		err(EX_UNAVAILABLE, "Elf library intialization failed");
1984 
1985 	while (pmclog_read(a->pa_logparser, &ev) == 0) {
1986 		assert(ev.pl_state == PMCLOG_OK);
1987 
1988 		switch (ev.pl_type) {
1989 		case PMCLOG_TYPE_INITIALIZE:
1990 			if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
1991 			    PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0)
1992 				warnx("WARNING: Log version 0x%x does not "
1993 				    "match compiled version 0x%x.",
1994 				    ev.pl_u.pl_i.pl_version,
1995 				    PMC_VERSION_MAJOR);
1996 			break;
1997 
1998 		case PMCLOG_TYPE_MAP_IN:
1999 			/*
2000 			 * Introduce an address range mapping for a
2001 			 * userland process or the kernel (pid == -1).
2002 			 *
2003 			 * We always allocate a process descriptor so
2004 			 * that subsequent samples seen for this
2005 			 * address range are mapped to the current
2006 			 * object being mapped in.
2007 			 */
2008 			pid = ev.pl_u.pl_mi.pl_pid;
2009 			if (pid == -1)
2010 				pp = pmcstat_kernproc;
2011 			else
2012 				pp = pmcstat_process_lookup(pid,
2013 				    PMCSTAT_ALLOCATE);
2014 
2015 			assert(pp != NULL);
2016 
2017 			image_path = pmcstat_string_intern(ev.pl_u.pl_mi.
2018 			    pl_pathname);
2019 			image = pmcstat_image_from_path(image_path, pid == -1);
2020 			if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
2021 				pmcstat_image_determine_type(image, a);
2022 			if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE)
2023 				pmcstat_image_link(pp, image,
2024 				    ev.pl_u.pl_mi.pl_start);
2025 			break;
2026 
2027 		case PMCLOG_TYPE_MAP_OUT:
2028 			/*
2029 			 * Remove an address map.
2030 			 */
2031 			pid = ev.pl_u.pl_mo.pl_pid;
2032 			if (pid == -1)
2033 				pp = pmcstat_kernproc;
2034 			else
2035 				pp = pmcstat_process_lookup(pid, 0);
2036 
2037 			if (pp == NULL)	/* unknown process */
2038 				break;
2039 
2040 			pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start,
2041 			    ev.pl_u.pl_mo.pl_end);
2042 			break;
2043 
2044 		case PMCLOG_TYPE_PCSAMPLE:
2045 			/*
2046 			 * Note: the `PCSAMPLE' log entry is not
2047 			 * generated by hpwmc(4) after version 2.
2048 			 */
2049 
2050 			/*
2051 			 * We bring in the gmon file for the image
2052 			 * currently associated with the PMC & pid
2053 			 * pair and increment the appropriate entry
2054 			 * bin inside this.
2055 			 */
2056 			pmcstat_stats.ps_samples_total++;
2057 
2058 			pc = ev.pl_u.pl_s.pl_pc;
2059 			pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid,
2060 			    PMCSTAT_ALLOCATE);
2061 			if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
2062 			    (ppm = pmcstat_process_find_map(pmcstat_kernproc,
2063 				pc)) == NULL) {	/* unknown process,offset pair */
2064 				pmcstat_stats.ps_samples_unknown_offset++;
2065 				break;
2066 			}
2067 
2068 			pmcstat_image_increment_bucket(ppm, pc,
2069 			    ev.pl_u.pl_s.pl_pmcid, a);
2070 
2071 			break;
2072 
2073 		case PMCLOG_TYPE_CALLCHAIN:
2074 			pmcstat_stats.ps_samples_total++;
2075 
2076 			cpuflags = ev.pl_u.pl_cc.pl_cpuflags;
2077 			cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags);
2078 
2079 			/* Filter on the CPU id. */
2080 			if ((a->pa_cpumask & (1 << cpu)) == 0) {
2081 				pmcstat_stats.ps_samples_skipped++;
2082 				break;
2083 			}
2084 
2085 			pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid,
2086 			    PMCSTAT_ALLOCATE);
2087 
2088 			pmcstat_record_callchain(pp,
2089 			    ev.pl_u.pl_cc.pl_pmcid, ev.pl_u.pl_cc.pl_npc,
2090 			    ev.pl_u.pl_cc.pl_pc,
2091 			    PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags), a);
2092 
2093 			if ((a->pa_flags & FLAG_DO_GPROF) == 0)
2094 				break;
2095 
2096 			pc = ev.pl_u.pl_cc.pl_pc[0];
2097 			if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
2098 			    (ppm = pmcstat_process_find_map(pmcstat_kernproc,
2099 				pc)) == NULL) { /* unknown offset */
2100 				pmcstat_stats.ps_samples_unknown_offset++;
2101 				break;
2102 			}
2103 
2104 			pmcstat_image_increment_bucket(ppm, pc,
2105 			    ev.pl_u.pl_cc.pl_pmcid, a);
2106 
2107 			break;
2108 
2109 		case PMCLOG_TYPE_PMCALLOCATE:
2110 			/*
2111 			 * Record the association pmc id between this
2112 			 * PMC and its name.
2113 			 */
2114 			pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid,
2115 			    pmcstat_string_intern(ev.pl_u.pl_a.pl_evname), a);
2116 			break;
2117 
2118 		case PMCLOG_TYPE_PROCEXEC:
2119 
2120 			/*
2121 			 * Change the executable image associated with
2122 			 * a process.
2123 			 */
2124 			pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid,
2125 			    PMCSTAT_ALLOCATE);
2126 
2127 			/* delete the current process map */
2128 			TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
2129 				TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
2130 				free(ppm);
2131 			}
2132 
2133 			/* associate this process  image */
2134 			image_path = pmcstat_string_intern(
2135 				ev.pl_u.pl_x.pl_pathname);
2136 			assert(image_path != NULL);
2137 			pmcstat_process_exec(pp, image_path,
2138 			    ev.pl_u.pl_x.pl_entryaddr, a);
2139 			break;
2140 
2141 		case PMCLOG_TYPE_PROCEXIT:
2142 
2143 			/*
2144 			 * Due to the way the log is generated, the
2145 			 * last few samples corresponding to a process
2146 			 * may appear in the log after the process
2147 			 * exit event is recorded.  Thus we keep the
2148 			 * process' descriptor and associated data
2149 			 * structures around, but mark the process as
2150 			 * having exited.
2151 			 */
2152 			pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0);
2153 			if (pp == NULL)
2154 				break;
2155 			pp->pp_isactive = 0;	/* mark as a zombie */
2156 			break;
2157 
2158 		case PMCLOG_TYPE_SYSEXIT:
2159 			pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0);
2160 			if (pp == NULL)
2161 				break;
2162 			pp->pp_isactive = 0;	/* make a zombie */
2163 			break;
2164 
2165 		case PMCLOG_TYPE_PROCFORK:
2166 
2167 			/*
2168 			 * Allocate a process descriptor for the new
2169 			 * (child) process.
2170 			 */
2171 			ppnew =
2172 			    pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid,
2173 				PMCSTAT_ALLOCATE);
2174 
2175 			/*
2176 			 * If we had been tracking the parent, clone
2177 			 * its address maps.
2178 			 */
2179 			pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0);
2180 			if (pp == NULL)
2181 				break;
2182 			TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next)
2183 			    pmcstat_image_link(ppnew, ppm->ppm_image,
2184 				ppm->ppm_lowpc);
2185 			break;
2186 
2187 		default:	/* other types of entries are not relevant */
2188 			break;
2189 		}
2190 	}
2191 
2192 	if (ev.pl_state == PMCLOG_EOF)
2193 		return (PMCSTAT_FINISHED);
2194 	else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
2195 		return (PMCSTAT_RUNNING);
2196 
2197 	err(EX_DATAERR, "ERROR: event parsing failed (record %jd, "
2198 	    "offset 0x%jx)", (uintmax_t) ev.pl_count + 1, ev.pl_offset);
2199 }
2200 
2201 /*
2202  * Print log entries as text.
2203  */
2204 
2205 static int
2206 pmcstat_print_log(struct pmcstat_args *a)
2207 {
2208 	struct pmclog_ev ev;
2209 	uint32_t npc;
2210 
2211 	while (pmclog_read(a->pa_logparser, &ev) == 0) {
2212 		assert(ev.pl_state == PMCLOG_OK);
2213 		switch (ev.pl_type) {
2214 		case PMCLOG_TYPE_CALLCHAIN:
2215 			PMCSTAT_PRINT_ENTRY(a, "callchain",
2216 			    "%d 0x%x %d %d %c", ev.pl_u.pl_cc.pl_pid,
2217 			    ev.pl_u.pl_cc.pl_pmcid,
2218 			    PMC_CALLCHAIN_CPUFLAGS_TO_CPU(ev.pl_u.pl_cc. \
2219 				pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
2220 			    PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
2221 			        pl_cpuflags) ? 'u' : 's');
2222 			for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++)
2223 				PMCSTAT_PRINT_ENTRY(a, "...", "%p",
2224 				    (void *) ev.pl_u.pl_cc.pl_pc[npc]);
2225 			break;
2226 		case PMCLOG_TYPE_CLOSELOG:
2227 			PMCSTAT_PRINT_ENTRY(a,"closelog",);
2228 			break;
2229 		case PMCLOG_TYPE_DROPNOTIFY:
2230 			PMCSTAT_PRINT_ENTRY(a,"drop",);
2231 			break;
2232 		case PMCLOG_TYPE_INITIALIZE:
2233 			PMCSTAT_PRINT_ENTRY(a,"initlog","0x%x \"%s\"",
2234 			    ev.pl_u.pl_i.pl_version,
2235 			    pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
2236 			if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
2237 			    PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0)
2238 				warnx("WARNING: Log version 0x%x != expected "
2239 				    "version 0x%x.", ev.pl_u.pl_i.pl_version,
2240 				    PMC_VERSION);
2241 			break;
2242 		case PMCLOG_TYPE_MAP_IN:
2243 			PMCSTAT_PRINT_ENTRY(a,"map-in","%d %p \"%s\"",
2244 			    ev.pl_u.pl_mi.pl_pid,
2245 			    (void *) ev.pl_u.pl_mi.pl_start,
2246 			    ev.pl_u.pl_mi.pl_pathname);
2247 			break;
2248 		case PMCLOG_TYPE_MAP_OUT:
2249 			PMCSTAT_PRINT_ENTRY(a,"map-out","%d %p %p",
2250 			    ev.pl_u.pl_mo.pl_pid,
2251 			    (void *) ev.pl_u.pl_mo.pl_start,
2252 			    (void *) ev.pl_u.pl_mo.pl_end);
2253 			break;
2254 		case PMCLOG_TYPE_PCSAMPLE:
2255 			PMCSTAT_PRINT_ENTRY(a,"sample","0x%x %d %p %c",
2256 			    ev.pl_u.pl_s.pl_pmcid,
2257 			    ev.pl_u.pl_s.pl_pid,
2258 			    (void *) ev.pl_u.pl_s.pl_pc,
2259 			    ev.pl_u.pl_s.pl_usermode ? 'u' : 's');
2260 			break;
2261 		case PMCLOG_TYPE_PMCALLOCATE:
2262 			PMCSTAT_PRINT_ENTRY(a,"allocate","0x%x \"%s\" 0x%x",
2263 			    ev.pl_u.pl_a.pl_pmcid,
2264 			    ev.pl_u.pl_a.pl_evname,
2265 			    ev.pl_u.pl_a.pl_flags);
2266 			break;
2267 		case PMCLOG_TYPE_PMCATTACH:
2268 			PMCSTAT_PRINT_ENTRY(a,"attach","0x%x %d \"%s\"",
2269 			    ev.pl_u.pl_t.pl_pmcid,
2270 			    ev.pl_u.pl_t.pl_pid,
2271 			    ev.pl_u.pl_t.pl_pathname);
2272 			break;
2273 		case PMCLOG_TYPE_PMCDETACH:
2274 			PMCSTAT_PRINT_ENTRY(a,"detach","0x%x %d",
2275 			    ev.pl_u.pl_d.pl_pmcid,
2276 			    ev.pl_u.pl_d.pl_pid);
2277 			break;
2278 		case PMCLOG_TYPE_PROCCSW:
2279 			PMCSTAT_PRINT_ENTRY(a,"cswval","0x%x %d %jd",
2280 			    ev.pl_u.pl_c.pl_pmcid,
2281 			    ev.pl_u.pl_c.pl_pid,
2282 			    ev.pl_u.pl_c.pl_value);
2283 			break;
2284 		case PMCLOG_TYPE_PROCEXEC:
2285 			PMCSTAT_PRINT_ENTRY(a,"exec","0x%x %d %p \"%s\"",
2286 			    ev.pl_u.pl_x.pl_pmcid,
2287 			    ev.pl_u.pl_x.pl_pid,
2288 			    (void *) ev.pl_u.pl_x.pl_entryaddr,
2289 			    ev.pl_u.pl_x.pl_pathname);
2290 			break;
2291 		case PMCLOG_TYPE_PROCEXIT:
2292 			PMCSTAT_PRINT_ENTRY(a,"exitval","0x%x %d %jd",
2293 			    ev.pl_u.pl_e.pl_pmcid,
2294 			    ev.pl_u.pl_e.pl_pid,
2295 			    ev.pl_u.pl_e.pl_value);
2296 			break;
2297 		case PMCLOG_TYPE_PROCFORK:
2298 			PMCSTAT_PRINT_ENTRY(a,"fork","%d %d",
2299 			    ev.pl_u.pl_f.pl_oldpid,
2300 			    ev.pl_u.pl_f.pl_newpid);
2301 			break;
2302 		case PMCLOG_TYPE_USERDATA:
2303 			PMCSTAT_PRINT_ENTRY(a,"userdata","0x%x",
2304 			    ev.pl_u.pl_u.pl_userdata);
2305 			break;
2306 		case PMCLOG_TYPE_SYSEXIT:
2307 			PMCSTAT_PRINT_ENTRY(a,"exit","%d",
2308 			    ev.pl_u.pl_se.pl_pid);
2309 			break;
2310 		default:
2311 			fprintf(a->pa_printfile, "unknown event (type %d).\n",
2312 			    ev.pl_type);
2313 		}
2314 	}
2315 
2316 	if (ev.pl_state == PMCLOG_EOF)
2317 		return (PMCSTAT_FINISHED);
2318 	else if (ev.pl_state ==  PMCLOG_REQUIRE_DATA)
2319 		return (PMCSTAT_RUNNING);
2320 
2321 	errx(EX_DATAERR, "ERROR: event parsing failed "
2322 	    "(record %jd, offset 0x%jx).",
2323 	    (uintmax_t) ev.pl_count + 1, ev.pl_offset);
2324 	/*NOTREACHED*/
2325 }
2326 
2327 /*
2328  * Public Interfaces.
2329  */
2330 
2331 /*
2332  * Close a logfile, after first flushing all in-module queued data.
2333  */
2334 
2335 int
2336 pmcstat_close_log(struct pmcstat_args *a)
2337 {
2338 	if (pmc_flush_logfile() < 0 ||
2339 	    pmc_configure_logfile(-1) < 0)
2340 		err(EX_OSERR, "ERROR: logging failed");
2341 	a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE);
2342 	return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
2343 	    PMCSTAT_FINISHED);
2344 }
2345 
2346 
2347 
2348 /*
2349  * Open a log file, for reading or writing.
2350  *
2351  * The function returns the fd of a successfully opened log or -1 in
2352  * case of failure.
2353  */
2354 
2355 int
2356 pmcstat_open_log(const char *path, int mode)
2357 {
2358 	int error, fd;
2359 	size_t hlen;
2360 	const char *p, *errstr;
2361 	struct addrinfo hints, *res, *res0;
2362 	char hostname[MAXHOSTNAMELEN];
2363 
2364 	errstr = NULL;
2365 	fd = -1;
2366 
2367 	/*
2368 	 * If 'path' is "-" then open one of stdin or stdout depending
2369 	 * on the value of 'mode'.
2370 	 *
2371 	 * If 'path' contains a ':' and does not start with a '/' or '.',
2372 	 * and is being opened for writing, treat it as a "host:port"
2373 	 * specification and open a network socket.
2374 	 *
2375 	 * Otherwise, treat 'path' as a file name and open that.
2376 	 */
2377 	if (path[0] == '-' && path[1] == '\0')
2378 		fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1;
2379 	else if (mode == PMCSTAT_OPEN_FOR_WRITE && path[0] != '/' &&
2380 	    path[0] != '.' && strchr(path, ':') != NULL) {
2381 
2382 		p = strrchr(path, ':');
2383 		hlen = p - path;
2384 		if (p == path || hlen >= sizeof(hostname)) {
2385 			errstr = strerror(EINVAL);
2386 			goto done;
2387 		}
2388 
2389 		assert(hlen < sizeof(hostname));
2390 		(void) strncpy(hostname, path, hlen);
2391 		hostname[hlen] = '\0';
2392 
2393 		(void) memset(&hints, 0, sizeof(hints));
2394 		hints.ai_family = AF_UNSPEC;
2395 		hints.ai_socktype = SOCK_STREAM;
2396 		if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) {
2397 			errstr = gai_strerror(error);
2398 			goto done;
2399 		}
2400 
2401 		fd = -1;
2402 		for (res = res0; res; res = res->ai_next) {
2403 			if ((fd = socket(res->ai_family, res->ai_socktype,
2404 			    res->ai_protocol)) < 0) {
2405 				errstr = strerror(errno);
2406 				continue;
2407 			}
2408 			if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) {
2409 				errstr = strerror(errno);
2410 				(void) close(fd);
2411 				fd = -1;
2412 				continue;
2413 			}
2414 			errstr = NULL;
2415 			break;
2416 		}
2417 		freeaddrinfo(res0);
2418 
2419 	} else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ?
2420 		    O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC),
2421 		    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
2422 			errstr = strerror(errno);
2423 
2424   done:
2425 	if (errstr)
2426 		errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path,
2427 		    (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"),
2428 		    errstr);
2429 
2430 	return (fd);
2431 }
2432 
2433 /*
2434  * Process a log file in offline analysis mode.
2435  */
2436 
2437 int
2438 pmcstat_process_log(struct pmcstat_args *a)
2439 {
2440 
2441 	/*
2442 	 * If analysis has not been asked for, just print the log to
2443 	 * the current output file.
2444 	 */
2445 	if (a->pa_flags & FLAG_DO_PRINT)
2446 		return (pmcstat_print_log(a));
2447 	else
2448 		return (pmcstat_analyze_log(a));
2449 }
2450 
2451 /*
2452  * Initialize module.
2453  */
2454 
2455 void
2456 pmcstat_initialize_logging(struct pmcstat_args *a)
2457 {
2458 	int i;
2459 
2460 	(void) a;
2461 
2462 	/* use a convenient format for 'ldd' output */
2463 	if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0)
2464 		err(EX_OSERR, "ERROR: Cannot setenv");
2465 
2466 	/* Initialize hash tables */
2467 	pmcstat_string_initialize();
2468 	for (i = 0; i < PMCSTAT_NHASH; i++) {
2469 		LIST_INIT(&pmcstat_image_hash[i]);
2470 		LIST_INIT(&pmcstat_process_hash[i]);
2471 	}
2472 
2473 	/*
2474 	 * Create a fake 'process' entry for the kernel with pid -1.
2475 	 * hwpmc(4) will subsequently inform us about where the kernel
2476 	 * and any loaded kernel modules are mapped.
2477 	 */
2478 	if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1,
2479 		 PMCSTAT_ALLOCATE)) == NULL)
2480 		err(EX_OSERR, "ERROR: Cannot initialize logging");
2481 }
2482 
2483 /*
2484  * Shutdown module.
2485  */
2486 
2487 void
2488 pmcstat_shutdown_logging(struct pmcstat_args *a)
2489 {
2490 	int i;
2491 	FILE *mf;
2492 	struct pmcstat_gmonfile *pgf, *pgftmp;
2493 	struct pmcstat_image *pi, *pitmp;
2494 	struct pmcstat_process *pp, *pptmp;
2495 	struct pmcstat_cgnode_hash *pch, *pchtmp;
2496 
2497 	/* determine where to send the map file */
2498 	mf = NULL;
2499 	if (a->pa_mapfilename != NULL)
2500 		mf = (strcmp(a->pa_mapfilename, "-") == 0) ?
2501 		    a->pa_printfile : fopen(a->pa_mapfilename, "w");
2502 
2503 	if (mf == NULL && a->pa_flags & FLAG_DO_GPROF &&
2504 	    a->pa_verbosity >= 2)
2505 		mf = a->pa_printfile;
2506 
2507 	if (mf)
2508 		(void) fprintf(mf, "MAP:\n");
2509 
2510 
2511 	if (a->pa_flags & FLAG_DO_CALLGRAPHS)
2512 		pmcstat_callgraph_print(a);
2513 
2514 	/*
2515 	 * Sync back all gprof flat profile data.
2516 	 */
2517 	for (i = 0; i < PMCSTAT_NHASH; i++) {
2518 		LIST_FOREACH(pi, &pmcstat_image_hash[i], pi_next) {
2519 			if (mf)
2520 				(void) fprintf(mf, " \"%s\" => \"%s\"",
2521 				    pmcstat_string_unintern(pi->pi_execpath),
2522 				    pmcstat_string_unintern(
2523 				    pi->pi_samplename));
2524 
2525 			/* flush gmon.out data to disk */
2526 			LIST_FOREACH(pgf, &pi->pi_gmlist, pgf_next) {
2527 				pmcstat_gmon_unmap_file(pgf);
2528 				if (mf)
2529 					(void) fprintf(mf, " %s/%d",
2530 					    pmcstat_pmcid_to_name(
2531 					    pgf->pgf_pmcid),
2532 					    pgf->pgf_nsamples);
2533 				if (pgf->pgf_overflow && a->pa_verbosity >= 1)
2534 					warnx("WARNING: profile \"%s\" "
2535 					    "overflowed.",
2536 					    pmcstat_string_unintern(
2537 					        pgf->pgf_name));
2538 			}
2539 
2540 			if (mf)
2541 				(void) fprintf(mf, "\n");
2542 		}
2543 	}
2544 
2545 	/*
2546 	 * Compute arcs and add these to the gprof files.
2547 	 */
2548 	if (a->pa_flags & FLAG_DO_GPROF && a->pa_graphdepth > 1)
2549 		pmcstat_callgraph_do_gmon_arcs();
2550 
2551 	/*
2552 	 * Free memory.
2553 	 */
2554 	for (i = 0; i < PMCSTAT_NHASH; i++) {
2555 		LIST_FOREACH_SAFE(pch, &pmcstat_cgnode_hash[i], pch_next,
2556 		    pchtmp) {
2557 			pmcstat_cgnode_free(pch->pch_cgnode);
2558 			free(pch);
2559 		}
2560 	}
2561 
2562 	for (i = 0; i < PMCSTAT_NHASH; i++) {
2563 		LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp)
2564 		{
2565 			LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next,
2566 			    pgftmp) {
2567 				if (pgf->pgf_file)
2568 					(void) fclose(pgf->pgf_file);
2569 				LIST_REMOVE(pgf, pgf_next);
2570 				free(pgf);
2571 			}
2572 			if (pi->pi_symbols)
2573 				free(pi->pi_symbols);
2574 
2575 			LIST_REMOVE(pi, pi_next);
2576 			free(pi);
2577 		}
2578 
2579 		LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
2580 		    pptmp) {
2581 			LIST_REMOVE(pp, pp_next);
2582 			free(pp);
2583 		}
2584 	}
2585 
2586 	pmcstat_string_shutdown();
2587 
2588 	/*
2589 	 * Print errors unless -q was specified.  Print all statistics
2590 	 * if verbosity > 1.
2591 	 */
2592 #define	PRINT(N,V,A) do {						\
2593 		if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2)	\
2594 			(void) fprintf((A)->pa_printfile, " %-40s %d\n",\
2595 			    N, pmcstat_stats.ps_##V);			\
2596 	} while (0)
2597 
2598 	if (a->pa_verbosity >= 1 && a->pa_flags & FLAG_DO_GPROF) {
2599 		(void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n");
2600 		PRINT("#exec/a.out", exec_aout, a);
2601 		PRINT("#exec/elf", exec_elf, a);
2602 		PRINT("#exec/unknown", exec_indeterminable, a);
2603 		PRINT("#exec handling errors", exec_errors, a);
2604 		PRINT("#samples/total", samples_total, a);
2605 		PRINT("#samples/unclaimed", samples_unknown_offset, a);
2606 		PRINT("#samples/unknown-object", samples_indeterminable, a);
2607 		PRINT("#callchain/dubious-frames", callchain_dubious_frames,
2608 		    a);
2609 	}
2610 
2611 	if (mf)
2612 		(void) fclose(mf);
2613 }
2614