xref: /freebsd/usr.sbin/pmcstat/pmcpl_calltree.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2012, Fabien Thomas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Process hwpmc(4) samples as calltree.
31  *
32  * Output file format compatible with Kcachegrind (kdesdk).
33  * Handle top mode with a sorted tree display.
34  */
35 
36 #include <sys/cdefs.h>
37 #include <sys/param.h>
38 #include <sys/endian.h>
39 #include <sys/queue.h>
40 
41 #include <assert.h>
42 #include <curses.h>
43 #include <ctype.h>
44 #include <err.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <pmc.h>
48 #include <pmclog.h>
49 #include <stdint.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include <sysexits.h>
55 
56 #include "pmcstat.h"
57 #include "pmcstat_log.h"
58 #include "pmcstat_top.h"
59 #include "pmcpl_calltree.h"
60 
61 #define	min(A,B)		((A) < (B) ? (A) : (B))
62 #define	max(A,B)		((A) > (B) ? (A) : (B))
63 
64 #define	PMCPL_CT_GROWSIZE	4
65 
66 static int pmcstat_skiplink = 0;
67 
68 struct pmcpl_ct_node;
69 
70 /* Get the sample value for PMC a. */
71 #define	PMCPL_CT_SAMPLE(a, b) \
72 	((a) < (b)->npmcs ? (b)->sb[a] : 0)
73 
74 /* Get the sample value in percent related to rsamples. */
75 #define	PMCPL_CT_SAMPLEP(a, b) \
76 	(PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a])
77 
78 struct pmcpl_ct_sample {
79 	int		npmcs;		/* Max pmc index available. */
80 	unsigned	*sb;		/* Sample buffer for 0..npmcs. */
81 };
82 
83 struct pmcpl_ct_arc {
84 	struct pmcpl_ct_sample	pcta_samples;
85 	struct pmcpl_ct_sample	pcta_callid;
86 	unsigned		pcta_call;
87 	struct pmcpl_ct_node	*pcta_child;
88 };
89 
90 struct pmcpl_ct_instr {
91 	uintfptr_t		pctf_func;
92 	struct pmcpl_ct_sample	pctf_samples;
93 };
94 
95 /*
96  * Each calltree node is tracked by a pmcpl_ct_node struct.
97  */
98 struct pmcpl_ct_node {
99 	struct pmcstat_image	*pct_image;
100 	uintfptr_t		pct_func;
101 
102 	struct pmcstat_symbol	*pct_sym;
103 	pmcstat_interned_string	pct_ifl;
104 	pmcstat_interned_string	pct_ifn;
105 
106 	struct pmcpl_ct_sample	pct_samples;
107 
108 	int			pct_narc;
109 	int			pct_arc_c;
110 	struct pmcpl_ct_arc 	*pct_arc;
111 
112 	/* TODO: optimize for large number of items. */
113 	int			pct_ninstr;
114 	int			pct_instr_c;
115 	struct pmcpl_ct_instr	*pct_instr;
116 
117 #define PMCPL_PCT_ADDR	0
118 #define PMCPL_PCT_NAME	1
119 	char			pct_type;
120 #define	PMCPL_PCT_WHITE	0
121 #define	PMCPL_PCT_GREY	1
122 #define	PMCPL_PCT_BLACK	2
123 	char			pct_color;
124 };
125 
126 struct pmcpl_ct_node_hash {
127 	struct pmcpl_ct_node  *pch_ctnode;
128 	STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next;
129 };
130 
131 static struct pmcpl_ct_sample pmcpl_ct_callid;
132 
133 #define	PMCPL_CT_MAXCOL		PMC_CALLCHAIN_DEPTH_MAX
134 #define	PMCPL_CT_MAXLINE	1024	/* TODO: dynamic. */
135 
136 struct pmcpl_ct_line {
137 	unsigned	ln_sum;
138 	unsigned	ln_index;
139 };
140 
141 static struct pmcpl_ct_line	pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1];
142 static struct pmcpl_ct_node
143     *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1];
144 
145 /*
146  * All nodes indexed by function/image name are placed in a hash table.
147  */
148 static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH];
149 
150 /*
151  * Root node for the graph.
152  */
153 static struct pmcpl_ct_node *pmcpl_ct_root;
154 
155 /*
156  * Prototypes
157  */
158 
159 /*
160  * Initialize a samples.
161  */
162 
163 static void
164 pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples)
165 {
166 
167 	samples->npmcs = 0;
168 	samples->sb = NULL;
169 }
170 
171 /*
172  * Free a samples.
173  */
174 
175 static void
176 pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples)
177 {
178 
179 	samples->npmcs = 0;
180 	free(samples->sb);
181 	samples->sb = NULL;
182 }
183 
184 /*
185  * Grow a sample block to store pmcstat_npmcs PMCs.
186  */
187 
188 static void
189 pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples)
190 {
191 	unsigned int npmcs;
192 
193 	/* Enough storage. */
194 	if (pmcstat_npmcs <= samples->npmcs)
195                 return;
196 
197 	npmcs = samples->npmcs +
198 	    max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE);
199 	samples->sb = reallocarray(samples->sb, npmcs, sizeof(unsigned));
200 	if (samples->sb == NULL)
201 		errx(EX_SOFTWARE, "ERROR: out of memory");
202 	bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned),
203 	    (npmcs - samples->npmcs) * sizeof(unsigned));
204 	samples->npmcs = npmcs;
205 }
206 
207 /*
208  * Compute the sum of all root arcs.
209  */
210 
211 static void
212 pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples)
213 {
214 	int i, pmcin;
215 
216 	pmcpl_ct_samples_init(samples);
217 	pmcpl_ct_samples_grow(samples);
218 
219 	for (i = 0; i < pmcpl_ct_root->pct_narc; i++)
220 		for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++)
221 			samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin,
222 			    &pmcpl_ct_root->pct_arc[i].pcta_samples);
223 }
224 
225 /*
226  * Grow the arc table.
227  */
228 
229 static void
230 pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items)
231 {
232 	unsigned int nmaxsize;
233 
234 	if (cursize < *maxsize)
235 		return;
236 
237 	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
238 	*items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_arc));
239 	if (*items == NULL)
240 		errx(EX_SOFTWARE, "ERROR: out of memory");
241 	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc),
242 	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc));
243 	*maxsize = nmaxsize;
244 }
245 
246 /*
247  * Grow the instr table.
248  */
249 
250 static void
251 pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items)
252 {
253 	unsigned int nmaxsize;
254 
255 	if (cursize < *maxsize)
256 		return;
257 
258 	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
259 	*items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_instr));
260 	if (*items == NULL)
261 		errx(EX_SOFTWARE, "ERROR: out of memory");
262 	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr),
263 	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr));
264 	*maxsize = nmaxsize;
265 }
266 
267 /*
268  * Add a new instruction sample to given node.
269  */
270 
271 static void
272 pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin,
273     uintfptr_t pc, unsigned v)
274 {
275 	int i;
276 	struct pmcpl_ct_instr *in;
277 
278 	for (i = 0; i<ct->pct_ninstr; i++) {
279 		if (ct->pct_instr[i].pctf_func == pc) {
280 			in = &ct->pct_instr[i];
281 			pmcpl_ct_samples_grow(&in->pctf_samples);
282 			in->pctf_samples.sb[pmcin] += v;
283 			return;
284 		}
285 	}
286 
287 	pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr);
288 	in = &ct->pct_instr[ct->pct_ninstr];
289 	in->pctf_func = pc;
290 	pmcpl_ct_samples_init(&in->pctf_samples);
291 	pmcpl_ct_samples_grow(&in->pctf_samples);
292 	in->pctf_samples.sb[pmcin] = v;
293 	ct->pct_ninstr++;
294 }
295 
296 /*
297  * Allocate a new node.
298  */
299 
300 static struct pmcpl_ct_node *
301 pmcpl_ct_node_allocate(void)
302 {
303 	struct pmcpl_ct_node *ct;
304 
305 	if ((ct = malloc(sizeof(*ct))) == NULL)
306 		err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
307 
308 	pmcpl_ct_samples_init(&ct->pct_samples);
309 
310 	ct->pct_sym	= NULL;
311 	ct->pct_image	= NULL;
312 	ct->pct_func	= 0;
313 
314 	ct->pct_narc	= 0;
315 	ct->pct_arc_c	= 0;
316 	ct->pct_arc	= NULL;
317 
318 	ct->pct_ninstr	= 0;
319 	ct->pct_instr_c	= 0;
320 	ct->pct_instr	= NULL;
321 
322 	ct->pct_color   = PMCPL_PCT_WHITE;
323 
324 	return (ct);
325 }
326 
327 /*
328  * Free a node.
329  */
330 
331 static void
332 pmcpl_ct_node_free(struct pmcpl_ct_node *ct)
333 {
334 	int i;
335 
336 	for (i = 0; i < ct->pct_narc; i++) {
337 		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples);
338 		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid);
339 	}
340 
341 	pmcpl_ct_samples_free(&ct->pct_samples);
342 	free(ct->pct_arc);
343 	free(ct->pct_instr);
344 	free(ct);
345 }
346 
347 /*
348  * Clear the graph tag on each node.
349  */
350 static void
351 pmcpl_ct_node_cleartag(void)
352 {
353 	int i;
354 	struct pmcpl_ct_node_hash *pch;
355 
356 	for (i = 0; i < PMCSTAT_NHASH; i++)
357 		STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
358 			pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE;
359 
360 	pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE;
361 }
362 
363 /*
364  * Print the callchain line by line with maximum cost at top.
365  */
366 
367 static int
368 pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct,
369     struct pmcpl_ct_sample *rsamples, int x, int *y)
370 {
371 	int i, terminal;
372 	struct pmcpl_ct_arc *arc;
373 
374 	if (ct->pct_color == PMCPL_PCT_GREY)
375 		return 0;
376 
377 	if (x >= PMCPL_CT_MAXCOL) {
378 		pmcpl_ct_topscreen[x][*y] = NULL;
379 		return 1;
380 	}
381 	pmcpl_ct_topscreen[x][*y] = ct;
382 
383 	/*
384 	 * Check if this is a terminal node.
385 	 * We need to check that some samples exist
386 	 * for at least one arc for that PMC.
387 	 */
388 	terminal = 1;
389 	for (i = 0; i < ct->pct_narc; i++) {
390 		arc = &ct->pct_arc[i];
391 		if (arc->pcta_child->pct_color != PMCPL_PCT_GREY &&
392 		    PMCPL_CT_SAMPLE(pmcin,
393 		    &arc->pcta_samples) != 0 &&
394 		    PMCPL_CT_SAMPLEP(pmcin,
395 		    &arc->pcta_samples) > pmcstat_threshold) {
396 			terminal = 0;
397 			break;
398 		}
399 	}
400 
401 	if (ct->pct_narc == 0 || terminal) {
402 		pmcpl_ct_topscreen[x+1][*y] = NULL;
403 		if (*y >= PMCPL_CT_MAXLINE)
404 			return 1;
405 		*y = *y + 1;
406 		for (i=0; i < x; i++)
407 			pmcpl_ct_topscreen[i][*y] =
408 			    pmcpl_ct_topscreen[i][*y - 1];
409 		return 0;
410 	}
411 
412 	ct->pct_color = PMCPL_PCT_GREY;
413 	for (i = 0; i < ct->pct_narc; i++) {
414 		if (PMCPL_CT_SAMPLE(pmcin,
415 		    &ct->pct_arc[i].pcta_samples) == 0)
416 			continue;
417 		if (PMCPL_CT_SAMPLEP(pmcin,
418 		    &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) {
419 			if (pmcpl_ct_node_dumptop(pmcin,
420 			        ct->pct_arc[i].pcta_child,
421 			        rsamples, x+1, y)) {
422 				ct->pct_color = PMCPL_PCT_BLACK;
423 				return 1;
424 			}
425 		}
426 	}
427 	ct->pct_color = PMCPL_PCT_BLACK;
428 
429 	return 0;
430 }
431 
432 /*
433  * Compare two top line by sum.
434  */
435 static int
436 pmcpl_ct_line_compare(const void *a, const void *b)
437 {
438 	const struct pmcpl_ct_line *ct1, *ct2;
439 
440 	ct1 = (const struct pmcpl_ct_line *) a;
441 	ct2 = (const struct pmcpl_ct_line *) b;
442 
443 	/* Sort in reverse order */
444 	if (ct1->ln_sum < ct2->ln_sum)
445 		return (1);
446 	if (ct1->ln_sum > ct2->ln_sum)
447 		return (-1);
448 	return (0);
449 }
450 
451 /*
452  * Format and display given PMC index.
453  */
454 
455 static void
456 pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy)
457 {
458 #undef	TS
459 #undef	TSI
460 #define	TS(x, y)	(pmcpl_ct_topscreen[x][y])
461 #define	TSI(x, y)	(pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index])
462 
463 	int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y;
464 	float v;
465 	char ns[30], vs[10], is[20];
466 	struct pmcpl_ct_node *ct;
467 	const char *space = " ";
468 
469 	/*
470 	 * Sort by line cost.
471 	 */
472 	for (y = 0; ; y++) {
473 		ct = TS(1, y);
474 		if (ct == NULL)
475 			break;
476 
477 		pmcpl_ct_topmax[y].ln_sum = 0;
478 		pmcpl_ct_topmax[y].ln_index = y;
479 		for (x = 1; TS(x, y) != NULL; x++) {
480 			pmcpl_ct_topmax[y].ln_sum +=
481 			    PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples);
482 		}
483 	}
484 	qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]),
485 	    pmcpl_ct_line_compare);
486 	pmcpl_ct_topmax[y].ln_index = y;
487 
488 	for (y = 0; y < maxy; y++) {
489 		ct = TSI(1, y);
490 		if (ct == NULL)
491 			break;
492 
493 		if (y > 0)
494 			PMCSTAT_PRINTW("\n");
495 
496 		/* Output sum. */
497 		v = pmcpl_ct_topmax[y].ln_sum * 100.0 /
498 		    rsamples->sb[pmcin];
499 		snprintf(vs, sizeof(vs), "%.1f", v);
500 		v_attrs = PMCSTAT_ATTRPERCENT(v);
501 		PMCSTAT_ATTRON(v_attrs);
502 		PMCSTAT_PRINTW("%5.5s ", vs);
503 		PMCSTAT_ATTROFF(v_attrs);
504 
505 		width = indentwidth = 5 + 1;
506 
507 		for (x = 1; (ct = TSI(x, y)) != NULL; x++) {
508 
509 			vs[0] = '\0'; vs_len = 0;
510 			is[0] = '\0'; is_len = 0;
511 
512 			/* Format value. */
513 			v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples);
514 			if (v > pmcstat_threshold)
515 				vs_len  = snprintf(vs, sizeof(vs),
516 				    "(%.1f%%)", v);
517 			v_attrs = PMCSTAT_ATTRPERCENT(v);
518 
519 			if (pmcstat_skiplink && v <= pmcstat_threshold) {
520 				strlcpy(ns, ".", sizeof(ns));
521 				ns_len = 1;
522 			} else {
523 			if (ct->pct_sym != NULL) {
524 				ns_len = snprintf(ns, sizeof(ns), "%s",
525 				    pmcstat_string_unintern(ct->pct_sym->ps_name));
526 			} else
527 				ns_len = snprintf(ns, sizeof(ns), "%p",
528 				    (void *)ct->pct_func);
529 
530 			/* Format image. */
531 			if (x == 1 ||
532 			    TSI(x-1, y)->pct_image != ct->pct_image)
533 				is_len = snprintf(is, sizeof(is), "@%s",
534 				    pmcstat_string_unintern(ct->pct_image->pi_name));
535 
536 			/* Check for line wrap. */
537 			width += ns_len + is_len + vs_len + 1;
538 			}
539 			if (width >= pmcstat_displaywidth) {
540 				maxy--;
541 				if (y >= maxy)
542 					break;
543 				PMCSTAT_PRINTW("\n%*s", indentwidth, space);
544 				width = indentwidth + ns_len + is_len + vs_len;
545 			}
546 
547 			PMCSTAT_ATTRON(v_attrs);
548 			PMCSTAT_PRINTW("%s%s%s ", ns, is, vs);
549 			PMCSTAT_ATTROFF(v_attrs);
550 		}
551 	}
552 }
553 
554 /*
555  * Output top mode snapshot.
556  */
557 
558 void
559 pmcpl_ct_topdisplay(void)
560 {
561 	int y;
562 	struct pmcpl_ct_sample r, *rsamples;
563 
564 	rsamples = &r;
565 	pmcpl_ct_samples_root(rsamples);
566 	pmcpl_ct_node_cleartag();
567 
568 	PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE");
569 
570 	y = 0;
571 	if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter,
572 	    pmcpl_ct_root, rsamples, 0, &y))
573 		PMCSTAT_PRINTW("...\n");
574 	pmcpl_ct_topscreen[1][y] = NULL;
575 
576 	pmcpl_ct_node_printtop(rsamples,
577 	    pmcstat_pmcinfilter, pmcstat_displayheight - 2);
578 
579 	pmcpl_ct_samples_free(rsamples);
580 }
581 
582 /*
583  * Handle top mode keypress.
584  */
585 
586 int
587 pmcpl_ct_topkeypress(int c, void *arg)
588 {
589 	WINDOW *w;
590 
591 	w = (WINDOW *)arg;
592 
593 	switch (c) {
594 	case 'f':
595 		pmcstat_skiplink = !pmcstat_skiplink;
596 		wprintw(w, "skip empty link %s",
597 		    pmcstat_skiplink ? "on" : "off");
598 		break;
599 	}
600 
601 	return 0;
602 }
603 
604 /*
605  * Look for a callgraph node associated with pmc `pmcid' in the global
606  * hash table that corresponds to the given `pc' value in the process map
607  * `ppm'.
608  */
609 
610 static void
611 pmcpl_ct_node_update(struct pmcpl_ct_node *parent,
612     struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd)
613 {
614 	struct pmcpl_ct_arc *arc;
615 	int i;
616 
617 	assert(parent != NULL);
618 
619 	/*
620 	 * Find related arc in parent node and
621 	 * increment the sample count.
622 	 */
623 	for (i = 0; i < parent->pct_narc; i++) {
624 		if (parent->pct_arc[i].pcta_child == child) {
625 			arc = &parent->pct_arc[i];
626 			pmcpl_ct_samples_grow(&arc->pcta_samples);
627 			arc->pcta_samples.sb[pmcin] += v;
628 			/* Estimate call count. */
629 			if (cd) {
630 			pmcpl_ct_samples_grow(&arc->pcta_callid);
631 			if (pmcpl_ct_callid.sb[pmcin] -
632 			    arc->pcta_callid.sb[pmcin] > 1)
633 				arc->pcta_call++;
634 			arc->pcta_callid.sb[pmcin] =
635 			    pmcpl_ct_callid.sb[pmcin];
636 			}
637 			return;
638 		}
639 	}
640 
641 	/*
642 	 * No arc found for us, add ourself to the parent.
643 	 */
644 	pmcpl_ct_arc_grow(parent->pct_narc,
645 	    &parent->pct_arc_c, &parent->pct_arc);
646 	arc = &parent->pct_arc[parent->pct_narc];
647 	pmcpl_ct_samples_grow(&arc->pcta_samples);
648 	arc->pcta_samples.sb[pmcin] = v;
649 	arc->pcta_call = 1;
650 	if (cd) {
651 		pmcpl_ct_samples_grow(&arc->pcta_callid);
652 		arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
653 	}
654 	arc->pcta_child = child;
655 	parent->pct_narc++;
656 }
657 
658 /*
659  * Lookup by image/pc.
660  */
661 
662 static struct pmcpl_ct_node *
663 pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc,
664     struct pmcstat_symbol *sym, char *fl, char *fn)
665 {
666 	int i;
667 	unsigned int hash;
668 	struct pmcpl_ct_node *ct;
669 	struct pmcpl_ct_node_hash *h;
670 	pmcstat_interned_string	ifl, ifn;
671 
672 	if (fn != NULL) {
673 		ifl = pmcstat_string_intern(fl);
674 		ifn = pmcstat_string_intern(fn);
675 	} else {
676 		ifl = 0;
677 		ifn = 0;
678 	}
679 
680 	for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++)
681 		hash += (pc >> i) & 0xFF;
682 
683 	hash &= PMCSTAT_HASH_MASK;
684 
685 	STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) {
686 		ct = h->pch_ctnode;
687 
688 		assert(ct != NULL);
689 
690 		if (ct->pct_image == image && ct->pct_func == pc) {
691 			if (fn == NULL)
692 				return (ct);
693 			if (ct->pct_type == PMCPL_PCT_NAME &&
694 			    ct->pct_ifl == ifl && ct->pct_ifn == ifn)
695 				return (ct);
696 		}
697 	}
698 
699 	/*
700 	 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
701 	 * new callgraph node and a new hash table entry for it.
702 	 */
703 	ct = pmcpl_ct_node_allocate();
704 	if ((h = malloc(sizeof(*h))) == NULL)
705 		err(EX_OSERR, "ERROR: Could not allocate callgraph node");
706 
707 	if (fn != NULL) {
708 		ct->pct_type = PMCPL_PCT_NAME;
709 		ct->pct_ifl = ifl;
710 		ct->pct_ifn = ifn;
711 	} else
712 		ct->pct_type = PMCPL_PCT_ADDR;
713 	ct->pct_image = image;
714 	ct->pct_func = pc;
715 	ct->pct_sym = sym;
716 
717 	h->pch_ctnode = ct;
718 	STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next);
719 	return (ct);
720 }
721 
722 /*
723  * Record a callchain.
724  */
725 
726 void
727 pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
728     uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu)
729 {
730 	int i, n, pmcin;
731 	uintfptr_t pc, loadaddress;
732 	struct pmcstat_image *image;
733 	struct pmcstat_symbol *sym;
734 	struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX];
735 	struct pmcstat_process *km;
736 	struct pmcpl_ct_node *ct;
737 	struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1];
738 
739 	(void) cpu;
740 
741 	assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX);
742 
743 	/* Get the PMC index. */
744 	pmcin = pmcr->pr_pmcin;
745 
746 	/*
747 	 * Validate mapping for the callchain.
748 	 * Go from bottom to first invalid entry.
749 	 */
750 	km = pmcstat_kernproc;
751 	for (n = 0; n < (int)nsamples; n++) {
752 		ppm[n] = pmcstat_process_find_map(usermode ?
753 		    pp : km, cc[n]);
754 		if (ppm[n] == NULL) {
755 			/* Detect full frame capture (kernel + user). */
756 			if (!usermode) {
757 				ppm[n] = pmcstat_process_find_map(pp, cc[n]);
758 				if (ppm[n] != NULL)
759 					km = pp;
760 			}
761 		}
762 		if (ppm[n] == NULL)
763 			break;
764 	}
765 	if (n-- == 0) {
766 		pmcstat_stats.ps_callchain_dubious_frames++;
767 		pmcr->pr_dubious_frames++;
768 		return;
769 	}
770 
771 	/* Increase the call generation counter. */
772 	pmcpl_ct_samples_grow(&pmcpl_ct_callid);
773 	pmcpl_ct_callid.sb[pmcin]++;
774 
775 	/*
776 	 * Build node list.
777 	 */
778 	ctl[0] = pmcpl_ct_root;
779 	for (i = 1; n >= 0; n--) {
780 		image = ppm[n]->ppm_image;
781 		loadaddress = ppm[n]->ppm_lowpc +
782 		    image->pi_vaddr - image->pi_start;
783 		/* Convert to an offset in the image. */
784 		pc = cc[n] - loadaddress;
785 		/*
786 		 * Try determine the function at this offset.  If we can't
787 		 * find a function round leave the `pc' value alone.
788 		 */
789 		if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
790 			pc = sym->ps_start;
791 		else
792 			pmcstat_stats.ps_samples_unknown_function++;
793 
794 		ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL);
795 		if (ct == NULL) {
796 			pmcstat_stats.ps_callchain_dubious_frames++;
797 			continue;
798 		}
799 		ctl[i++] = ct;
800 	}
801 	/* No valid node found. */
802 	if (i == 1)
803 		return;
804 	n = i;
805 
806 	ct = ctl[0];
807 	for (i = 1; i < n; i++)
808 		pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1);
809 
810 	/*
811 	 * Increment the sample count for this PMC.
812 	 */
813 	pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples);
814 	ctl[n-1]->pct_samples.sb[pmcin]++;
815 
816 	/* Update per instruction sample if required. */
817 	if (args.pa_ctdumpinstr)
818 		pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] -
819 		    (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr -
820 		     ppm[0]->ppm_image->pi_start), 1);
821 }
822 
823 /*
824  * Print node child cost.
825  */
826 
827 static void
828 pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr,
829     int pline)
830 {
831 	int i, j, line;
832 	uintfptr_t addr;
833 	struct pmcpl_ct_node *child;
834 	char sourcefile[PATH_MAX];
835 	char funcname[PATH_MAX];
836 
837 	/*
838 	 * Child cost.
839 	 * TODO: attach child cost to the real position in the function.
840 	 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost>
841 	 */
842 	for (i=0 ; i<ct->pct_narc; i++) {
843 		child = ct->pct_arc[i].pcta_child;
844 		/* Object binary. */
845 		fprintf(args.pa_graphfile, "cob=%s\n",
846 		    pmcstat_string_unintern(child->pct_image->pi_fullpath));
847 		/* Child function name. */
848 		addr = child->pct_image->pi_vaddr + child->pct_func;
849 		line = 0;
850 		/* Child function source file. */
851 		if (child->pct_type == PMCPL_PCT_NAME) {
852 			fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
853 			    pmcstat_string_unintern(child->pct_ifl),
854 			    pmcstat_string_unintern(child->pct_ifn));
855 		} else if (pmcstat_image_addr2line(child->pct_image, addr,
856 		    sourcefile, sizeof(sourcefile), &line,
857 		    funcname, sizeof(funcname))) {
858 			fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
859 				sourcefile, funcname);
860 		} else {
861 			if (child->pct_sym != NULL)
862 				fprintf(args.pa_graphfile,
863 				    "cfi=???\ncfn=%s\n",
864 				    pmcstat_string_unintern(
865 				        child->pct_sym->ps_name));
866 			else
867 				fprintf(args.pa_graphfile,
868 				    "cfi=???\ncfn=%p\n", (void *)addr);
869 		}
870 
871 		/* Child function address, line and call count. */
872 		fprintf(args.pa_graphfile, "calls=%u %p %u\n",
873 		    ct->pct_arc[i].pcta_call, (void *)addr, line);
874 
875 		/*
876 		 * Call address, line, sample.
877 		 * TODO: Associate call address to the right location.
878 		 */
879 		fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline);
880 		for (j = 0; j<pmcstat_npmcs; j++)
881 			fprintf(args.pa_graphfile, " %u",
882 			    PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples));
883 		fprintf(args.pa_graphfile, "\n");
884 	}
885 }
886 
887 /*
888  * Print node self cost.
889  */
890 
891 static void
892 pmcpl_ct_node_printself(struct pmcpl_ct_node *ct)
893 {
894 	int i, j, fline, line;
895 	uintfptr_t faddr, addr;
896 	char sourcefile[PATH_MAX];
897 	char funcname[PATH_MAX];
898 
899 	/*
900 	 * Object binary.
901 	 */
902 	fprintf(args.pa_graphfile, "ob=%s\n",
903 	    pmcstat_string_unintern(ct->pct_image->pi_fullpath));
904 
905 	/*
906 	 * Function name.
907 	 */
908 	faddr = ct->pct_image->pi_vaddr + ct->pct_func;
909 	fline = 0;
910 	if (ct->pct_type == PMCPL_PCT_NAME) {
911 		fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
912 		    pmcstat_string_unintern(ct->pct_ifl),
913 		    pmcstat_string_unintern(ct->pct_ifn));
914 	} else if (pmcstat_image_addr2line(ct->pct_image, faddr,
915 	    sourcefile, sizeof(sourcefile), &fline,
916 	    funcname, sizeof(funcname))) {
917 		fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
918 		    sourcefile, funcname);
919 	} else {
920 		if (ct->pct_sym != NULL)
921 			fprintf(args.pa_graphfile, "fl=???\nfn=%s\n",
922 			    pmcstat_string_unintern(ct->pct_sym->ps_name));
923 		else
924 			fprintf(args.pa_graphfile, "fl=???\nfn=%p\n",
925 			    (void *)(ct->pct_image->pi_vaddr + ct->pct_func));
926 	}
927 
928 	/*
929 	 * Self cost.
930 	 */
931 	if (ct->pct_ninstr > 0) {
932 		/*
933 		 * Per location cost.
934 		 */
935 		for (i = 0; i < ct->pct_ninstr; i++) {
936 			addr = ct->pct_image->pi_vaddr +
937 			    ct->pct_instr[i].pctf_func;
938 			line = 0;
939 			pmcstat_image_addr2line(ct->pct_image, addr,
940 			    sourcefile, sizeof(sourcefile), &line,
941 			    funcname, sizeof(funcname));
942 			fprintf(args.pa_graphfile, "%p %u",
943 			    (void *)addr, line);
944 			for (j = 0; j<pmcstat_npmcs; j++)
945 				fprintf(args.pa_graphfile, " %u",
946 				    PMCPL_CT_SAMPLE(j,
947 				    &ct->pct_instr[i].pctf_samples));
948 			fprintf(args.pa_graphfile, "\n");
949 		}
950 	} else {
951 		/* Global cost function cost. */
952 		fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline);
953 		for (i = 0; i<pmcstat_npmcs ; i++)
954 			fprintf(args.pa_graphfile, " %u",
955 			    PMCPL_CT_SAMPLE(i, &ct->pct_samples));
956 		fprintf(args.pa_graphfile, "\n");
957 	}
958 
959 	pmcpl_ct_node_printchild(ct, faddr, fline);
960 }
961 
962 static void
963 pmcpl_ct_printnode(struct pmcpl_ct_node *ct)
964 {
965 	int i;
966 
967 	if (ct == pmcpl_ct_root) {
968 		fprintf(args.pa_graphfile, "fn=root\n");
969 		fprintf(args.pa_graphfile, "0x0 1");
970 		for (i = 0; i<pmcstat_npmcs ; i++)
971 			fprintf(args.pa_graphfile, " 0");
972 		fprintf(args.pa_graphfile, "\n");
973 		pmcpl_ct_node_printchild(ct, 0, 0);
974 	} else
975 		pmcpl_ct_node_printself(ct);
976 }
977 
978 /*
979  * Breadth first traversal.
980  */
981 
982 static void
983 pmcpl_ct_bfs(struct pmcpl_ct_node *ct)
984 {
985 	int i;
986 	struct pmcpl_ct_node_hash *pch, *pchc;
987 	struct pmcpl_ct_node *child;
988 	STAILQ_HEAD(,pmcpl_ct_node_hash) q;
989 
990 	STAILQ_INIT(&q);
991 	if ((pch = malloc(sizeof(*pch))) == NULL)
992 		err(EX_OSERR, "ERROR: Cannot allocate queue");
993 	pch->pch_ctnode = ct;
994 	STAILQ_INSERT_TAIL(&q, pch, pch_next);
995 	ct->pct_color = PMCPL_PCT_BLACK;
996 
997 	while (!STAILQ_EMPTY(&q)) {
998 		pch = STAILQ_FIRST(&q);
999 		STAILQ_REMOVE_HEAD(&q, pch_next);
1000 		pmcpl_ct_printnode(pch->pch_ctnode);
1001 		for (i = 0; i<pch->pch_ctnode->pct_narc; i++) {
1002 			child = pch->pch_ctnode->pct_arc[i].pcta_child;
1003 			if (child->pct_color == PMCPL_PCT_WHITE) {
1004 				child->pct_color = PMCPL_PCT_BLACK;
1005 				if ((pchc = malloc(sizeof(*pchc))) == NULL)
1006 					err(EX_OSERR,
1007 					    "ERROR: Cannot allocate queue");
1008 				pchc->pch_ctnode = child;
1009 				STAILQ_INSERT_TAIL(&q, pchc, pch_next);
1010 			}
1011 		}
1012 		free(pch);
1013 	}
1014 }
1015 
1016 /*
1017  * Detect and fix inlined location.
1018  */
1019 
1020 static void
1021 _pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct)
1022 {
1023 	int i, j;
1024 	unsigned fline, line, v;
1025 	uintfptr_t faddr, addr, pc;
1026 	char sourcefile[PATH_MAX];
1027 	char ffuncname[PATH_MAX], funcname[PATH_MAX];
1028 	char buffer[PATH_MAX];
1029 	struct pmcpl_ct_node *child;
1030 
1031 	/*
1032 	 * Resolve parent and compare to each instr location.
1033 	 */
1034 	faddr = ct->pct_image->pi_vaddr + ct->pct_func;
1035 	fline = 0;
1036 	if (!pmcstat_image_addr2line(ct->pct_image, faddr,
1037 	    sourcefile, sizeof(sourcefile), &fline,
1038 	    ffuncname, sizeof(ffuncname)))
1039 		return;
1040 
1041 	for (i = 0; i < ct->pct_ninstr; i++) {
1042 		addr = ct->pct_image->pi_vaddr +
1043 		    ct->pct_instr[i].pctf_func;
1044 		line = 0;
1045 		if (!pmcstat_image_addr2line(ct->pct_image, addr,
1046 		    sourcefile, sizeof(sourcefile), &line,
1047 		    funcname, sizeof(funcname)))
1048 			continue;
1049 
1050 		if (strcmp(funcname, ffuncname) == 0)
1051 			continue;
1052 
1053 		/*
1054 		 * - Lookup/create inline node by function name.
1055 		 * - Move instr PMCs to the inline node.
1056 		 * - Link nodes.
1057 		 * The lookup create a specific node per image/pc.
1058 		 */
1059 		if (args.pa_verbosity >= 2)
1060 			fprintf(args.pa_printfile,
1061 			    "WARNING: inlined function at %p %s in %s\n",
1062 			    (void *)addr, funcname, ffuncname);
1063 
1064 		snprintf(buffer, sizeof(buffer), "%s@%s",
1065 			funcname, ffuncname);
1066 		child = pmcpl_ct_node_hash_lookup(ct->pct_image,
1067 		    ct->pct_func, ct->pct_sym, sourcefile, buffer);
1068 		assert(child != NULL);
1069 		pc = ct->pct_instr[i].pctf_func;
1070 		for (j = 0; j<pmcstat_npmcs; j++) {
1071 			v = PMCPL_CT_SAMPLE(j,
1072 			    &ct->pct_instr[i].pctf_samples);
1073 			if (v == 0)
1074 				continue;
1075 			pmcpl_ct_instr_add(child, j, pc, v);
1076 			pmcpl_ct_node_update(ct, child, j, v, 0);
1077 			if (j < ct->pct_samples.npmcs)
1078 				ct->pct_samples.sb[j] -=
1079 				    ct->pct_instr[i].pctf_samples.sb[j];
1080 			ct->pct_instr[i].pctf_samples.sb[j] = 0;
1081 		}
1082 	}
1083 }
1084 
1085 static void
1086 pmcpl_ct_expand_inline(void)
1087 {
1088 	int i;
1089 	struct pmcpl_ct_node_hash *pch;
1090 
1091 	if (!args.pa_ctdumpinstr)
1092 		return;
1093 
1094 	for (i = 0; i < PMCSTAT_NHASH; i++)
1095 		STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
1096 			if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR)
1097 				_pmcpl_ct_expand_inline(pch->pch_ctnode);
1098 }
1099 
1100 /*
1101  * Clean the PMC name for Kcachegrind formula
1102  */
1103 
1104 static void
1105 pmcpl_ct_fixup_pmcname(char *s)
1106 {
1107 	char *p;
1108 
1109 	for (p = s; *p; p++)
1110 		if (!isalnum(*p))
1111 			*p = '_';
1112 }
1113 
1114 /*
1115  * Print a calltree (KCachegrind) for all PMCs.
1116  */
1117 
1118 static void
1119 pmcpl_ct_print(void)
1120 {
1121 	int i;
1122 	char name[40];
1123 	struct pmcpl_ct_sample rsamples;
1124 
1125 	pmcpl_ct_samples_root(&rsamples);
1126 	pmcpl_ct_expand_inline();
1127 
1128 	fprintf(args.pa_graphfile,
1129 		"version: 1\n"
1130 		"creator: pmcstat\n"
1131 		"positions: instr line\n"
1132 		"events:");
1133 	for (i=0; i<pmcstat_npmcs; i++) {
1134 		snprintf(name, sizeof(name), "%s_%d",
1135 		    pmcstat_pmcindex_to_name(i), i);
1136 		pmcpl_ct_fixup_pmcname(name);
1137 		fprintf(args.pa_graphfile, " %s", name);
1138 	}
1139 	fprintf(args.pa_graphfile, "\nsummary:");
1140 	for (i=0; i<pmcstat_npmcs ; i++)
1141 		fprintf(args.pa_graphfile, " %u",
1142 		    PMCPL_CT_SAMPLE(i, &rsamples));
1143 	fprintf(args.pa_graphfile, "\n");
1144 	pmcpl_ct_bfs(pmcpl_ct_root);
1145 	pmcpl_ct_samples_free(&rsamples);
1146 }
1147 
1148 int
1149 pmcpl_ct_configure(char *opt)
1150 {
1151 
1152 	if (strncmp(opt, "skiplink=", 9) == 0) {
1153 		pmcstat_skiplink = atoi(opt+9);
1154 	} else
1155 		return (0);
1156 
1157 	return (1);
1158 }
1159 
1160 int
1161 pmcpl_ct_init(void)
1162 {
1163 	int i;
1164 
1165 	pmcpl_ct_root = pmcpl_ct_node_allocate();
1166 
1167 	for (i = 0; i < PMCSTAT_NHASH; i++)
1168 		STAILQ_INIT(&pmcpl_ct_node_hash[i]);
1169 
1170 	pmcpl_ct_samples_init(&pmcpl_ct_callid);
1171 
1172 	return (0);
1173 }
1174 
1175 void
1176 pmcpl_ct_shutdown(FILE *mf)
1177 {
1178 	int i;
1179 	struct pmcpl_ct_node_hash *pch, *pchtmp;
1180 
1181 	(void) mf;
1182 
1183 	if (args.pa_flags & FLAG_DO_CALLGRAPHS)
1184 		pmcpl_ct_print();
1185 
1186 	/*
1187 	 * Free memory.
1188 	 */
1189 
1190 	for (i = 0; i < PMCSTAT_NHASH; i++) {
1191 		STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next,
1192 		    pchtmp) {
1193 			pmcpl_ct_node_free(pch->pch_ctnode);
1194 			free(pch);
1195 		}
1196 	}
1197 
1198 	pmcpl_ct_node_free(pmcpl_ct_root);
1199 	pmcpl_ct_root = NULL;
1200 
1201 	pmcpl_ct_samples_free(&pmcpl_ct_callid);
1202 }
1203 
1204