xref: /freebsd/usr.sbin/pmcstat/pmcpl_calltree.c (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2012, Fabien Thomas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Process hwpmc(4) samples as calltree.
31  *
32  * Output file format compatible with Kcachegrind (kdesdk).
33  * Handle top mode with a sorted tree display.
34  */
35 
36 #include <sys/param.h>
37 #include <sys/endian.h>
38 #include <sys/queue.h>
39 
40 #include <assert.h>
41 #include <curses.h>
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <pmc.h>
47 #include <pmclog.h>
48 #include <stdint.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <sysexits.h>
54 
55 #include "pmcstat.h"
56 #include "pmcstat_log.h"
57 #include "pmcstat_top.h"
58 #include "pmcpl_calltree.h"
59 
60 #define	min(A,B)		((A) < (B) ? (A) : (B))
61 #define	max(A,B)		((A) > (B) ? (A) : (B))
62 
63 #define	PMCPL_CT_GROWSIZE	4
64 
65 static int pmcstat_skiplink = 0;
66 
67 struct pmcpl_ct_node;
68 
69 /* Get the sample value for PMC a. */
70 #define	PMCPL_CT_SAMPLE(a, b) \
71 	((a) < (b)->npmcs ? (b)->sb[a] : 0)
72 
73 /* Get the sample value in percent related to rsamples. */
74 #define	PMCPL_CT_SAMPLEP(a, b) \
75 	(PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a])
76 
77 struct pmcpl_ct_sample {
78 	int		npmcs;		/* Max pmc index available. */
79 	unsigned	*sb;		/* Sample buffer for 0..npmcs. */
80 };
81 
82 struct pmcpl_ct_arc {
83 	struct pmcpl_ct_sample	pcta_samples;
84 	struct pmcpl_ct_sample	pcta_callid;
85 	unsigned		pcta_call;
86 	struct pmcpl_ct_node	*pcta_child;
87 };
88 
89 struct pmcpl_ct_instr {
90 	uintfptr_t		pctf_func;
91 	struct pmcpl_ct_sample	pctf_samples;
92 };
93 
94 /*
95  * Each calltree node is tracked by a pmcpl_ct_node struct.
96  */
97 struct pmcpl_ct_node {
98 	struct pmcstat_image	*pct_image;
99 	uintfptr_t		pct_func;
100 
101 	struct pmcstat_symbol	*pct_sym;
102 	pmcstat_interned_string	pct_ifl;
103 	pmcstat_interned_string	pct_ifn;
104 
105 	struct pmcpl_ct_sample	pct_samples;
106 
107 	int			pct_narc;
108 	int			pct_arc_c;
109 	struct pmcpl_ct_arc 	*pct_arc;
110 
111 	/* TODO: optimize for large number of items. */
112 	int			pct_ninstr;
113 	int			pct_instr_c;
114 	struct pmcpl_ct_instr	*pct_instr;
115 
116 #define PMCPL_PCT_ADDR	0
117 #define PMCPL_PCT_NAME	1
118 	char			pct_type;
119 #define	PMCPL_PCT_WHITE	0
120 #define	PMCPL_PCT_GREY	1
121 #define	PMCPL_PCT_BLACK	2
122 	char			pct_color;
123 };
124 
125 struct pmcpl_ct_node_hash {
126 	struct pmcpl_ct_node  *pch_ctnode;
127 	STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next;
128 };
129 
130 static struct pmcpl_ct_sample pmcpl_ct_callid;
131 
132 #define	PMCPL_CT_MAXCOL		PMC_CALLCHAIN_DEPTH_MAX
133 #define	PMCPL_CT_MAXLINE	1024	/* TODO: dynamic. */
134 
135 struct pmcpl_ct_line {
136 	unsigned	ln_sum;
137 	unsigned	ln_index;
138 };
139 
140 static struct pmcpl_ct_line	pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1];
141 static struct pmcpl_ct_node
142     *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1];
143 
144 /*
145  * All nodes indexed by function/image name are placed in a hash table.
146  */
147 static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH];
148 
149 /*
150  * Root node for the graph.
151  */
152 static struct pmcpl_ct_node *pmcpl_ct_root;
153 
154 /*
155  * Prototypes
156  */
157 
158 /*
159  * Initialize a samples.
160  */
161 
162 static void
163 pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples)
164 {
165 
166 	samples->npmcs = 0;
167 	samples->sb = NULL;
168 }
169 
170 /*
171  * Free a samples.
172  */
173 
174 static void
175 pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples)
176 {
177 
178 	samples->npmcs = 0;
179 	free(samples->sb);
180 	samples->sb = NULL;
181 }
182 
183 /*
184  * Grow a sample block to store pmcstat_npmcs PMCs.
185  */
186 
187 static void
188 pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples)
189 {
190 	unsigned int npmcs;
191 
192 	/* Enough storage. */
193 	if (pmcstat_npmcs <= samples->npmcs)
194                 return;
195 
196 	npmcs = samples->npmcs +
197 	    max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE);
198 	samples->sb = reallocarray(samples->sb, npmcs, sizeof(unsigned));
199 	if (samples->sb == NULL)
200 		errx(EX_SOFTWARE, "ERROR: out of memory");
201 	bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned),
202 	    (npmcs - samples->npmcs) * sizeof(unsigned));
203 	samples->npmcs = npmcs;
204 }
205 
206 /*
207  * Compute the sum of all root arcs.
208  */
209 
210 static void
211 pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples)
212 {
213 	int i, pmcin;
214 
215 	pmcpl_ct_samples_init(samples);
216 	pmcpl_ct_samples_grow(samples);
217 
218 	for (i = 0; i < pmcpl_ct_root->pct_narc; i++)
219 		for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++)
220 			samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin,
221 			    &pmcpl_ct_root->pct_arc[i].pcta_samples);
222 }
223 
224 /*
225  * Grow the arc table.
226  */
227 
228 static void
229 pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items)
230 {
231 	unsigned int nmaxsize;
232 
233 	if (cursize < *maxsize)
234 		return;
235 
236 	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
237 	*items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_arc));
238 	if (*items == NULL)
239 		errx(EX_SOFTWARE, "ERROR: out of memory");
240 	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc),
241 	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc));
242 	*maxsize = nmaxsize;
243 }
244 
245 /*
246  * Grow the instr table.
247  */
248 
249 static void
250 pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items)
251 {
252 	unsigned int nmaxsize;
253 
254 	if (cursize < *maxsize)
255 		return;
256 
257 	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
258 	*items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_instr));
259 	if (*items == NULL)
260 		errx(EX_SOFTWARE, "ERROR: out of memory");
261 	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr),
262 	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr));
263 	*maxsize = nmaxsize;
264 }
265 
266 /*
267  * Add a new instruction sample to given node.
268  */
269 
270 static void
271 pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin,
272     uintfptr_t pc, unsigned v)
273 {
274 	int i;
275 	struct pmcpl_ct_instr *in;
276 
277 	for (i = 0; i<ct->pct_ninstr; i++) {
278 		if (ct->pct_instr[i].pctf_func == pc) {
279 			in = &ct->pct_instr[i];
280 			pmcpl_ct_samples_grow(&in->pctf_samples);
281 			in->pctf_samples.sb[pmcin] += v;
282 			return;
283 		}
284 	}
285 
286 	pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr);
287 	in = &ct->pct_instr[ct->pct_ninstr];
288 	in->pctf_func = pc;
289 	pmcpl_ct_samples_init(&in->pctf_samples);
290 	pmcpl_ct_samples_grow(&in->pctf_samples);
291 	in->pctf_samples.sb[pmcin] = v;
292 	ct->pct_ninstr++;
293 }
294 
295 /*
296  * Allocate a new node.
297  */
298 
299 static struct pmcpl_ct_node *
300 pmcpl_ct_node_allocate(void)
301 {
302 	struct pmcpl_ct_node *ct;
303 
304 	if ((ct = malloc(sizeof(*ct))) == NULL)
305 		err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
306 
307 	pmcpl_ct_samples_init(&ct->pct_samples);
308 
309 	ct->pct_sym	= NULL;
310 	ct->pct_image	= NULL;
311 	ct->pct_func	= 0;
312 
313 	ct->pct_narc	= 0;
314 	ct->pct_arc_c	= 0;
315 	ct->pct_arc	= NULL;
316 
317 	ct->pct_ninstr	= 0;
318 	ct->pct_instr_c	= 0;
319 	ct->pct_instr	= NULL;
320 
321 	ct->pct_color   = PMCPL_PCT_WHITE;
322 
323 	return (ct);
324 }
325 
326 /*
327  * Free a node.
328  */
329 
330 static void
331 pmcpl_ct_node_free(struct pmcpl_ct_node *ct)
332 {
333 	int i;
334 
335 	for (i = 0; i < ct->pct_narc; i++) {
336 		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples);
337 		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid);
338 	}
339 
340 	pmcpl_ct_samples_free(&ct->pct_samples);
341 	free(ct->pct_arc);
342 	free(ct->pct_instr);
343 	free(ct);
344 }
345 
346 /*
347  * Clear the graph tag on each node.
348  */
349 static void
350 pmcpl_ct_node_cleartag(void)
351 {
352 	int i;
353 	struct pmcpl_ct_node_hash *pch;
354 
355 	for (i = 0; i < PMCSTAT_NHASH; i++)
356 		STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
357 			pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE;
358 
359 	pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE;
360 }
361 
362 /*
363  * Print the callchain line by line with maximum cost at top.
364  */
365 
366 static int
367 pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct,
368     struct pmcpl_ct_sample *rsamples, int x, int *y)
369 {
370 	int i, terminal;
371 	struct pmcpl_ct_arc *arc;
372 
373 	if (ct->pct_color == PMCPL_PCT_GREY)
374 		return 0;
375 
376 	if (x >= PMCPL_CT_MAXCOL) {
377 		pmcpl_ct_topscreen[x][*y] = NULL;
378 		return 1;
379 	}
380 	pmcpl_ct_topscreen[x][*y] = ct;
381 
382 	/*
383 	 * Check if this is a terminal node.
384 	 * We need to check that some samples exist
385 	 * for at least one arc for that PMC.
386 	 */
387 	terminal = 1;
388 	for (i = 0; i < ct->pct_narc; i++) {
389 		arc = &ct->pct_arc[i];
390 		if (arc->pcta_child->pct_color != PMCPL_PCT_GREY &&
391 		    PMCPL_CT_SAMPLE(pmcin,
392 		    &arc->pcta_samples) != 0 &&
393 		    PMCPL_CT_SAMPLEP(pmcin,
394 		    &arc->pcta_samples) > pmcstat_threshold) {
395 			terminal = 0;
396 			break;
397 		}
398 	}
399 
400 	if (ct->pct_narc == 0 || terminal) {
401 		pmcpl_ct_topscreen[x+1][*y] = NULL;
402 		if (*y >= PMCPL_CT_MAXLINE)
403 			return 1;
404 		*y = *y + 1;
405 		for (i=0; i < x; i++)
406 			pmcpl_ct_topscreen[i][*y] =
407 			    pmcpl_ct_topscreen[i][*y - 1];
408 		return 0;
409 	}
410 
411 	ct->pct_color = PMCPL_PCT_GREY;
412 	for (i = 0; i < ct->pct_narc; i++) {
413 		if (PMCPL_CT_SAMPLE(pmcin,
414 		    &ct->pct_arc[i].pcta_samples) == 0)
415 			continue;
416 		if (PMCPL_CT_SAMPLEP(pmcin,
417 		    &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) {
418 			if (pmcpl_ct_node_dumptop(pmcin,
419 			        ct->pct_arc[i].pcta_child,
420 			        rsamples, x+1, y)) {
421 				ct->pct_color = PMCPL_PCT_BLACK;
422 				return 1;
423 			}
424 		}
425 	}
426 	ct->pct_color = PMCPL_PCT_BLACK;
427 
428 	return 0;
429 }
430 
431 /*
432  * Compare two top line by sum.
433  */
434 static int
435 pmcpl_ct_line_compare(const void *a, const void *b)
436 {
437 	const struct pmcpl_ct_line *ct1, *ct2;
438 
439 	ct1 = (const struct pmcpl_ct_line *) a;
440 	ct2 = (const struct pmcpl_ct_line *) b;
441 
442 	/* Sort in reverse order */
443 	if (ct1->ln_sum < ct2->ln_sum)
444 		return (1);
445 	if (ct1->ln_sum > ct2->ln_sum)
446 		return (-1);
447 	return (0);
448 }
449 
450 /*
451  * Format and display given PMC index.
452  */
453 
454 static void
455 pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy)
456 {
457 #undef	TS
458 #undef	TSI
459 #define	TS(x, y)	(pmcpl_ct_topscreen[x][y])
460 #define	TSI(x, y)	(pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index])
461 
462 	int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y;
463 	float v;
464 	char ns[30], vs[10], is[20];
465 	struct pmcpl_ct_node *ct;
466 	const char *space = " ";
467 
468 	/*
469 	 * Sort by line cost.
470 	 */
471 	for (y = 0; ; y++) {
472 		ct = TS(1, y);
473 		if (ct == NULL)
474 			break;
475 
476 		pmcpl_ct_topmax[y].ln_sum = 0;
477 		pmcpl_ct_topmax[y].ln_index = y;
478 		for (x = 1; TS(x, y) != NULL; x++) {
479 			pmcpl_ct_topmax[y].ln_sum +=
480 			    PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples);
481 		}
482 	}
483 	qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]),
484 	    pmcpl_ct_line_compare);
485 	pmcpl_ct_topmax[y].ln_index = y;
486 
487 	for (y = 0; y < maxy; y++) {
488 		ct = TSI(1, y);
489 		if (ct == NULL)
490 			break;
491 
492 		if (y > 0)
493 			PMCSTAT_PRINTW("\n");
494 
495 		/* Output sum. */
496 		v = pmcpl_ct_topmax[y].ln_sum * 100.0 /
497 		    rsamples->sb[pmcin];
498 		snprintf(vs, sizeof(vs), "%.1f", v);
499 		v_attrs = PMCSTAT_ATTRPERCENT(v);
500 		PMCSTAT_ATTRON(v_attrs);
501 		PMCSTAT_PRINTW("%5.5s ", vs);
502 		PMCSTAT_ATTROFF(v_attrs);
503 
504 		width = indentwidth = 5 + 1;
505 
506 		for (x = 1; (ct = TSI(x, y)) != NULL; x++) {
507 
508 			vs[0] = '\0'; vs_len = 0;
509 			is[0] = '\0'; is_len = 0;
510 
511 			/* Format value. */
512 			v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples);
513 			if (v > pmcstat_threshold)
514 				vs_len  = snprintf(vs, sizeof(vs),
515 				    "(%.1f%%)", v);
516 			v_attrs = PMCSTAT_ATTRPERCENT(v);
517 
518 			if (pmcstat_skiplink && v <= pmcstat_threshold) {
519 				strlcpy(ns, ".", sizeof(ns));
520 				ns_len = 1;
521 			} else {
522 			if (ct->pct_sym != NULL) {
523 				ns_len = snprintf(ns, sizeof(ns), "%s",
524 				    pmcstat_string_unintern(ct->pct_sym->ps_name));
525 			} else
526 				ns_len = snprintf(ns, sizeof(ns), "%p",
527 				    (void *)ct->pct_func);
528 
529 			/* Format image. */
530 			if (x == 1 ||
531 			    TSI(x-1, y)->pct_image != ct->pct_image)
532 				is_len = snprintf(is, sizeof(is), "@%s",
533 				    pmcstat_string_unintern(ct->pct_image->pi_name));
534 
535 			/* Check for line wrap. */
536 			width += ns_len + is_len + vs_len + 1;
537 			}
538 			if (width >= pmcstat_displaywidth) {
539 				maxy--;
540 				if (y >= maxy)
541 					break;
542 				PMCSTAT_PRINTW("\n%*s", indentwidth, space);
543 				width = indentwidth + ns_len + is_len + vs_len;
544 			}
545 
546 			PMCSTAT_ATTRON(v_attrs);
547 			PMCSTAT_PRINTW("%s%s%s ", ns, is, vs);
548 			PMCSTAT_ATTROFF(v_attrs);
549 		}
550 	}
551 }
552 
553 /*
554  * Output top mode snapshot.
555  */
556 
557 void
558 pmcpl_ct_topdisplay(void)
559 {
560 	int y;
561 	struct pmcpl_ct_sample r, *rsamples;
562 
563 	rsamples = &r;
564 	pmcpl_ct_samples_root(rsamples);
565 	pmcpl_ct_node_cleartag();
566 
567 	PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE");
568 
569 	y = 0;
570 	if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter,
571 	    pmcpl_ct_root, rsamples, 0, &y))
572 		PMCSTAT_PRINTW("...\n");
573 	pmcpl_ct_topscreen[1][y] = NULL;
574 
575 	pmcpl_ct_node_printtop(rsamples,
576 	    pmcstat_pmcinfilter, pmcstat_displayheight - 2);
577 
578 	pmcpl_ct_samples_free(rsamples);
579 }
580 
581 /*
582  * Handle top mode keypress.
583  */
584 
585 int
586 pmcpl_ct_topkeypress(int c, void *arg)
587 {
588 	WINDOW *w;
589 
590 	w = (WINDOW *)arg;
591 
592 	switch (c) {
593 	case 'f':
594 		pmcstat_skiplink = !pmcstat_skiplink;
595 		wprintw(w, "skip empty link %s",
596 		    pmcstat_skiplink ? "on" : "off");
597 		break;
598 	}
599 
600 	return 0;
601 }
602 
603 /*
604  * Look for a callgraph node associated with pmc `pmcid' in the global
605  * hash table that corresponds to the given `pc' value in the process map
606  * `ppm'.
607  */
608 
609 static void
610 pmcpl_ct_node_update(struct pmcpl_ct_node *parent,
611     struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd)
612 {
613 	struct pmcpl_ct_arc *arc;
614 	int i;
615 
616 	assert(parent != NULL);
617 
618 	/*
619 	 * Find related arc in parent node and
620 	 * increment the sample count.
621 	 */
622 	for (i = 0; i < parent->pct_narc; i++) {
623 		if (parent->pct_arc[i].pcta_child == child) {
624 			arc = &parent->pct_arc[i];
625 			pmcpl_ct_samples_grow(&arc->pcta_samples);
626 			arc->pcta_samples.sb[pmcin] += v;
627 			/* Estimate call count. */
628 			if (cd) {
629 			pmcpl_ct_samples_grow(&arc->pcta_callid);
630 			if (pmcpl_ct_callid.sb[pmcin] -
631 			    arc->pcta_callid.sb[pmcin] > 1)
632 				arc->pcta_call++;
633 			arc->pcta_callid.sb[pmcin] =
634 			    pmcpl_ct_callid.sb[pmcin];
635 			}
636 			return;
637 		}
638 	}
639 
640 	/*
641 	 * No arc found for us, add ourself to the parent.
642 	 */
643 	pmcpl_ct_arc_grow(parent->pct_narc,
644 	    &parent->pct_arc_c, &parent->pct_arc);
645 	arc = &parent->pct_arc[parent->pct_narc];
646 	pmcpl_ct_samples_grow(&arc->pcta_samples);
647 	arc->pcta_samples.sb[pmcin] = v;
648 	arc->pcta_call = 1;
649 	if (cd) {
650 		pmcpl_ct_samples_grow(&arc->pcta_callid);
651 		arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
652 	}
653 	arc->pcta_child = child;
654 	parent->pct_narc++;
655 }
656 
657 /*
658  * Lookup by image/pc.
659  */
660 
661 static struct pmcpl_ct_node *
662 pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc,
663     struct pmcstat_symbol *sym, char *fl, char *fn)
664 {
665 	int i;
666 	unsigned int hash;
667 	struct pmcpl_ct_node *ct;
668 	struct pmcpl_ct_node_hash *h;
669 	pmcstat_interned_string	ifl, ifn;
670 
671 	if (fn != NULL) {
672 		ifl = pmcstat_string_intern(fl);
673 		ifn = pmcstat_string_intern(fn);
674 	} else {
675 		ifl = 0;
676 		ifn = 0;
677 	}
678 
679 	for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++)
680 		hash += (pc >> i) & 0xFF;
681 
682 	hash &= PMCSTAT_HASH_MASK;
683 
684 	STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) {
685 		ct = h->pch_ctnode;
686 
687 		assert(ct != NULL);
688 
689 		if (ct->pct_image == image && ct->pct_func == pc) {
690 			if (fn == NULL)
691 				return (ct);
692 			if (ct->pct_type == PMCPL_PCT_NAME &&
693 			    ct->pct_ifl == ifl && ct->pct_ifn == ifn)
694 				return (ct);
695 		}
696 	}
697 
698 	/*
699 	 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
700 	 * new callgraph node and a new hash table entry for it.
701 	 */
702 	ct = pmcpl_ct_node_allocate();
703 	if ((h = malloc(sizeof(*h))) == NULL)
704 		err(EX_OSERR, "ERROR: Could not allocate callgraph node");
705 
706 	if (fn != NULL) {
707 		ct->pct_type = PMCPL_PCT_NAME;
708 		ct->pct_ifl = ifl;
709 		ct->pct_ifn = ifn;
710 	} else
711 		ct->pct_type = PMCPL_PCT_ADDR;
712 	ct->pct_image = image;
713 	ct->pct_func = pc;
714 	ct->pct_sym = sym;
715 
716 	h->pch_ctnode = ct;
717 	STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next);
718 	return (ct);
719 }
720 
721 /*
722  * Record a callchain.
723  */
724 
725 void
726 pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
727     uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu)
728 {
729 	int i, n, pmcin;
730 	uintfptr_t pc, loadaddress;
731 	struct pmcstat_image *image;
732 	struct pmcstat_symbol *sym;
733 	struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX];
734 	struct pmcstat_process *km;
735 	struct pmcpl_ct_node *ct;
736 	struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1];
737 
738 	(void) cpu;
739 
740 	assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX);
741 
742 	/* Get the PMC index. */
743 	pmcin = pmcr->pr_pmcin;
744 
745 	/*
746 	 * Validate mapping for the callchain.
747 	 * Go from bottom to first invalid entry.
748 	 */
749 	km = pmcstat_kernproc;
750 	for (n = 0; n < (int)nsamples; n++) {
751 		ppm[n] = pmcstat_process_find_map(usermode ?
752 		    pp : km, cc[n]);
753 		if (ppm[n] == NULL) {
754 			/* Detect full frame capture (kernel + user). */
755 			if (!usermode) {
756 				ppm[n] = pmcstat_process_find_map(pp, cc[n]);
757 				if (ppm[n] != NULL)
758 					km = pp;
759 			}
760 		}
761 		if (ppm[n] == NULL)
762 			break;
763 	}
764 	if (n-- == 0) {
765 		pmcstat_stats.ps_callchain_dubious_frames++;
766 		pmcr->pr_dubious_frames++;
767 		return;
768 	}
769 
770 	/* Increase the call generation counter. */
771 	pmcpl_ct_samples_grow(&pmcpl_ct_callid);
772 	pmcpl_ct_callid.sb[pmcin]++;
773 
774 	/*
775 	 * Build node list.
776 	 */
777 	ctl[0] = pmcpl_ct_root;
778 	for (i = 1; n >= 0; n--) {
779 		image = ppm[n]->ppm_image;
780 		loadaddress = ppm[n]->ppm_lowpc +
781 		    image->pi_vaddr - image->pi_start;
782 		/* Convert to an offset in the image. */
783 		pc = cc[n] - loadaddress;
784 		/*
785 		 * Try determine the function at this offset.  If we can't
786 		 * find a function round leave the `pc' value alone.
787 		 */
788 		if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
789 			pc = sym->ps_start;
790 		else
791 			pmcstat_stats.ps_samples_unknown_function++;
792 
793 		ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL);
794 		if (ct == NULL) {
795 			pmcstat_stats.ps_callchain_dubious_frames++;
796 			continue;
797 		}
798 		ctl[i++] = ct;
799 	}
800 	/* No valid node found. */
801 	if (i == 1)
802 		return;
803 	n = i;
804 
805 	ct = ctl[0];
806 	for (i = 1; i < n; i++)
807 		pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1);
808 
809 	/*
810 	 * Increment the sample count for this PMC.
811 	 */
812 	pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples);
813 	ctl[n-1]->pct_samples.sb[pmcin]++;
814 
815 	/* Update per instruction sample if required. */
816 	if (args.pa_ctdumpinstr)
817 		pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] -
818 		    (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr -
819 		     ppm[0]->ppm_image->pi_start), 1);
820 }
821 
822 /*
823  * Print node child cost.
824  */
825 
826 static void
827 pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr,
828     int pline)
829 {
830 	int i, j, line;
831 	uintfptr_t addr;
832 	struct pmcpl_ct_node *child;
833 	char sourcefile[PATH_MAX];
834 	char funcname[PATH_MAX];
835 
836 	/*
837 	 * Child cost.
838 	 * TODO: attach child cost to the real position in the function.
839 	 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost>
840 	 */
841 	for (i=0 ; i<ct->pct_narc; i++) {
842 		child = ct->pct_arc[i].pcta_child;
843 		/* Object binary. */
844 		fprintf(args.pa_graphfile, "cob=%s\n",
845 		    pmcstat_string_unintern(child->pct_image->pi_fullpath));
846 		/* Child function name. */
847 		addr = child->pct_image->pi_vaddr + child->pct_func;
848 		line = 0;
849 		/* Child function source file. */
850 		if (child->pct_type == PMCPL_PCT_NAME) {
851 			fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
852 			    pmcstat_string_unintern(child->pct_ifl),
853 			    pmcstat_string_unintern(child->pct_ifn));
854 		} else if (pmcstat_image_addr2line(child->pct_image, addr,
855 		    sourcefile, sizeof(sourcefile), &line,
856 		    funcname, sizeof(funcname))) {
857 			fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
858 				sourcefile, funcname);
859 		} else {
860 			if (child->pct_sym != NULL)
861 				fprintf(args.pa_graphfile,
862 				    "cfi=???\ncfn=%s\n",
863 				    pmcstat_string_unintern(
864 				        child->pct_sym->ps_name));
865 			else
866 				fprintf(args.pa_graphfile,
867 				    "cfi=???\ncfn=%p\n", (void *)addr);
868 		}
869 
870 		/* Child function address, line and call count. */
871 		fprintf(args.pa_graphfile, "calls=%u %p %u\n",
872 		    ct->pct_arc[i].pcta_call, (void *)addr, line);
873 
874 		/*
875 		 * Call address, line, sample.
876 		 * TODO: Associate call address to the right location.
877 		 */
878 		fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline);
879 		for (j = 0; j<pmcstat_npmcs; j++)
880 			fprintf(args.pa_graphfile, " %u",
881 			    PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples));
882 		fprintf(args.pa_graphfile, "\n");
883 	}
884 }
885 
886 /*
887  * Print node self cost.
888  */
889 
890 static void
891 pmcpl_ct_node_printself(struct pmcpl_ct_node *ct)
892 {
893 	int i, j, fline, line;
894 	uintfptr_t faddr, addr;
895 	char sourcefile[PATH_MAX];
896 	char funcname[PATH_MAX];
897 
898 	/*
899 	 * Object binary.
900 	 */
901 	fprintf(args.pa_graphfile, "ob=%s\n",
902 	    pmcstat_string_unintern(ct->pct_image->pi_fullpath));
903 
904 	/*
905 	 * Function name.
906 	 */
907 	faddr = ct->pct_image->pi_vaddr + ct->pct_func;
908 	fline = 0;
909 	if (ct->pct_type == PMCPL_PCT_NAME) {
910 		fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
911 		    pmcstat_string_unintern(ct->pct_ifl),
912 		    pmcstat_string_unintern(ct->pct_ifn));
913 	} else if (pmcstat_image_addr2line(ct->pct_image, faddr,
914 	    sourcefile, sizeof(sourcefile), &fline,
915 	    funcname, sizeof(funcname))) {
916 		fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
917 		    sourcefile, funcname);
918 	} else {
919 		if (ct->pct_sym != NULL)
920 			fprintf(args.pa_graphfile, "fl=???\nfn=%s\n",
921 			    pmcstat_string_unintern(ct->pct_sym->ps_name));
922 		else
923 			fprintf(args.pa_graphfile, "fl=???\nfn=%p\n",
924 			    (void *)(ct->pct_image->pi_vaddr + ct->pct_func));
925 	}
926 
927 	/*
928 	 * Self cost.
929 	 */
930 	if (ct->pct_ninstr > 0) {
931 		/*
932 		 * Per location cost.
933 		 */
934 		for (i = 0; i < ct->pct_ninstr; i++) {
935 			addr = ct->pct_image->pi_vaddr +
936 			    ct->pct_instr[i].pctf_func;
937 			line = 0;
938 			pmcstat_image_addr2line(ct->pct_image, addr,
939 			    sourcefile, sizeof(sourcefile), &line,
940 			    funcname, sizeof(funcname));
941 			fprintf(args.pa_graphfile, "%p %u",
942 			    (void *)addr, line);
943 			for (j = 0; j<pmcstat_npmcs; j++)
944 				fprintf(args.pa_graphfile, " %u",
945 				    PMCPL_CT_SAMPLE(j,
946 				    &ct->pct_instr[i].pctf_samples));
947 			fprintf(args.pa_graphfile, "\n");
948 		}
949 	} else {
950 		/* Global cost function cost. */
951 		fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline);
952 		for (i = 0; i<pmcstat_npmcs ; i++)
953 			fprintf(args.pa_graphfile, " %u",
954 			    PMCPL_CT_SAMPLE(i, &ct->pct_samples));
955 		fprintf(args.pa_graphfile, "\n");
956 	}
957 
958 	pmcpl_ct_node_printchild(ct, faddr, fline);
959 }
960 
961 static void
962 pmcpl_ct_printnode(struct pmcpl_ct_node *ct)
963 {
964 	int i;
965 
966 	if (ct == pmcpl_ct_root) {
967 		fprintf(args.pa_graphfile, "fn=root\n");
968 		fprintf(args.pa_graphfile, "0x0 1");
969 		for (i = 0; i<pmcstat_npmcs ; i++)
970 			fprintf(args.pa_graphfile, " 0");
971 		fprintf(args.pa_graphfile, "\n");
972 		pmcpl_ct_node_printchild(ct, 0, 0);
973 	} else
974 		pmcpl_ct_node_printself(ct);
975 }
976 
977 /*
978  * Breadth first traversal.
979  */
980 
981 static void
982 pmcpl_ct_bfs(struct pmcpl_ct_node *ct)
983 {
984 	int i;
985 	struct pmcpl_ct_node_hash *pch, *pchc;
986 	struct pmcpl_ct_node *child;
987 	STAILQ_HEAD(,pmcpl_ct_node_hash) q;
988 
989 	STAILQ_INIT(&q);
990 	if ((pch = malloc(sizeof(*pch))) == NULL)
991 		err(EX_OSERR, "ERROR: Cannot allocate queue");
992 	pch->pch_ctnode = ct;
993 	STAILQ_INSERT_TAIL(&q, pch, pch_next);
994 	ct->pct_color = PMCPL_PCT_BLACK;
995 
996 	while (!STAILQ_EMPTY(&q)) {
997 		pch = STAILQ_FIRST(&q);
998 		STAILQ_REMOVE_HEAD(&q, pch_next);
999 		pmcpl_ct_printnode(pch->pch_ctnode);
1000 		for (i = 0; i<pch->pch_ctnode->pct_narc; i++) {
1001 			child = pch->pch_ctnode->pct_arc[i].pcta_child;
1002 			if (child->pct_color == PMCPL_PCT_WHITE) {
1003 				child->pct_color = PMCPL_PCT_BLACK;
1004 				if ((pchc = malloc(sizeof(*pchc))) == NULL)
1005 					err(EX_OSERR,
1006 					    "ERROR: Cannot allocate queue");
1007 				pchc->pch_ctnode = child;
1008 				STAILQ_INSERT_TAIL(&q, pchc, pch_next);
1009 			}
1010 		}
1011 		free(pch);
1012 	}
1013 }
1014 
1015 /*
1016  * Detect and fix inlined location.
1017  */
1018 
1019 static void
1020 _pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct)
1021 {
1022 	int i, j;
1023 	unsigned fline, line, v;
1024 	uintfptr_t faddr, addr, pc;
1025 	char sourcefile[PATH_MAX];
1026 	char ffuncname[PATH_MAX], funcname[PATH_MAX];
1027 	char buffer[PATH_MAX];
1028 	struct pmcpl_ct_node *child;
1029 
1030 	/*
1031 	 * Resolve parent and compare to each instr location.
1032 	 */
1033 	faddr = ct->pct_image->pi_vaddr + ct->pct_func;
1034 	fline = 0;
1035 	if (!pmcstat_image_addr2line(ct->pct_image, faddr,
1036 	    sourcefile, sizeof(sourcefile), &fline,
1037 	    ffuncname, sizeof(ffuncname)))
1038 		return;
1039 
1040 	for (i = 0; i < ct->pct_ninstr; i++) {
1041 		addr = ct->pct_image->pi_vaddr +
1042 		    ct->pct_instr[i].pctf_func;
1043 		line = 0;
1044 		if (!pmcstat_image_addr2line(ct->pct_image, addr,
1045 		    sourcefile, sizeof(sourcefile), &line,
1046 		    funcname, sizeof(funcname)))
1047 			continue;
1048 
1049 		if (strcmp(funcname, ffuncname) == 0)
1050 			continue;
1051 
1052 		/*
1053 		 * - Lookup/create inline node by function name.
1054 		 * - Move instr PMCs to the inline node.
1055 		 * - Link nodes.
1056 		 * The lookup create a specific node per image/pc.
1057 		 */
1058 		if (args.pa_verbosity >= 2)
1059 			fprintf(args.pa_printfile,
1060 			    "WARNING: inlined function at %p %s in %s\n",
1061 			    (void *)addr, funcname, ffuncname);
1062 
1063 		snprintf(buffer, sizeof(buffer), "%s@%s",
1064 			funcname, ffuncname);
1065 		child = pmcpl_ct_node_hash_lookup(ct->pct_image,
1066 		    ct->pct_func, ct->pct_sym, sourcefile, buffer);
1067 		assert(child != NULL);
1068 		pc = ct->pct_instr[i].pctf_func;
1069 		for (j = 0; j<pmcstat_npmcs; j++) {
1070 			v = PMCPL_CT_SAMPLE(j,
1071 			    &ct->pct_instr[i].pctf_samples);
1072 			if (v == 0)
1073 				continue;
1074 			pmcpl_ct_instr_add(child, j, pc, v);
1075 			pmcpl_ct_node_update(ct, child, j, v, 0);
1076 			if (j < ct->pct_samples.npmcs)
1077 				ct->pct_samples.sb[j] -=
1078 				    ct->pct_instr[i].pctf_samples.sb[j];
1079 			ct->pct_instr[i].pctf_samples.sb[j] = 0;
1080 		}
1081 	}
1082 }
1083 
1084 static void
1085 pmcpl_ct_expand_inline(void)
1086 {
1087 	int i;
1088 	struct pmcpl_ct_node_hash *pch;
1089 
1090 	if (!args.pa_ctdumpinstr)
1091 		return;
1092 
1093 	for (i = 0; i < PMCSTAT_NHASH; i++)
1094 		STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
1095 			if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR)
1096 				_pmcpl_ct_expand_inline(pch->pch_ctnode);
1097 }
1098 
1099 /*
1100  * Clean the PMC name for Kcachegrind formula
1101  */
1102 
1103 static void
1104 pmcpl_ct_fixup_pmcname(char *s)
1105 {
1106 	char *p;
1107 
1108 	for (p = s; *p; p++)
1109 		if (!isalnum(*p))
1110 			*p = '_';
1111 }
1112 
1113 /*
1114  * Print a calltree (KCachegrind) for all PMCs.
1115  */
1116 
1117 static void
1118 pmcpl_ct_print(void)
1119 {
1120 	int i;
1121 	char name[40];
1122 	struct pmcpl_ct_sample rsamples;
1123 
1124 	pmcpl_ct_samples_root(&rsamples);
1125 	pmcpl_ct_expand_inline();
1126 
1127 	fprintf(args.pa_graphfile,
1128 		"version: 1\n"
1129 		"creator: pmcstat\n"
1130 		"positions: instr line\n"
1131 		"events:");
1132 	for (i=0; i<pmcstat_npmcs; i++) {
1133 		snprintf(name, sizeof(name), "%s_%d",
1134 		    pmcstat_pmcindex_to_name(i), i);
1135 		pmcpl_ct_fixup_pmcname(name);
1136 		fprintf(args.pa_graphfile, " %s", name);
1137 	}
1138 	fprintf(args.pa_graphfile, "\nsummary:");
1139 	for (i=0; i<pmcstat_npmcs ; i++)
1140 		fprintf(args.pa_graphfile, " %u",
1141 		    PMCPL_CT_SAMPLE(i, &rsamples));
1142 	fprintf(args.pa_graphfile, "\n");
1143 	pmcpl_ct_bfs(pmcpl_ct_root);
1144 	pmcpl_ct_samples_free(&rsamples);
1145 }
1146 
1147 int
1148 pmcpl_ct_configure(char *opt)
1149 {
1150 
1151 	if (strncmp(opt, "skiplink=", 9) == 0) {
1152 		pmcstat_skiplink = atoi(opt+9);
1153 	} else
1154 		return (0);
1155 
1156 	return (1);
1157 }
1158 
1159 int
1160 pmcpl_ct_init(void)
1161 {
1162 	int i;
1163 
1164 	pmcpl_ct_root = pmcpl_ct_node_allocate();
1165 
1166 	for (i = 0; i < PMCSTAT_NHASH; i++)
1167 		STAILQ_INIT(&pmcpl_ct_node_hash[i]);
1168 
1169 	pmcpl_ct_samples_init(&pmcpl_ct_callid);
1170 
1171 	return (0);
1172 }
1173 
1174 void
1175 pmcpl_ct_shutdown(FILE *mf)
1176 {
1177 	int i;
1178 	struct pmcpl_ct_node_hash *pch, *pchtmp;
1179 
1180 	(void) mf;
1181 
1182 	if (args.pa_flags & FLAG_DO_CALLGRAPHS)
1183 		pmcpl_ct_print();
1184 
1185 	/*
1186 	 * Free memory.
1187 	 */
1188 
1189 	for (i = 0; i < PMCSTAT_NHASH; i++) {
1190 		STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next,
1191 		    pchtmp) {
1192 			pmcpl_ct_node_free(pch->pch_ctnode);
1193 			free(pch);
1194 		}
1195 	}
1196 
1197 	pmcpl_ct_node_free(pmcpl_ct_root);
1198 	pmcpl_ct_root = NULL;
1199 
1200 	pmcpl_ct_samples_free(&pmcpl_ct_callid);
1201 }
1202 
1203