xref: /linux/tools/perf/util/callchain.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
4  *
5  * Handle the callchains from the stream in an ad-hoc radix tree and then
6  * sort them in an rbtree.
7  *
8  * Using a radix for code path provides a fast retrieval and factorizes
9  * memory use. Also that lets us use the paths in a hierarchical graph view.
10  *
11  */
12 
13 #include <inttypes.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <stdbool.h>
17 #include <errno.h>
18 #include <math.h>
19 #include <linux/string.h>
20 #include <linux/zalloc.h>
21 
22 #include "asm/bug.h"
23 
24 #include "debug.h"
25 #include "dso.h"
26 #include "event.h"
27 #include "hist.h"
28 #include "sort.h"
29 #include "machine.h"
30 #include "map.h"
31 #include "callchain.h"
32 #include "branch.h"
33 #include "symbol.h"
34 #include "thread.h"
35 #include "util.h"
36 #include "../perf.h"
37 
38 #define CALLCHAIN_PARAM_DEFAULT			\
39 	.mode		= CHAIN_GRAPH_ABS,	\
40 	.min_percent	= 0.5,			\
41 	.order		= ORDER_CALLEE,		\
42 	.key		= CCKEY_FUNCTION,	\
43 	.value		= CCVAL_PERCENT,	\
44 
45 struct callchain_param callchain_param = {
46 	CALLCHAIN_PARAM_DEFAULT
47 };
48 
49 /*
50  * Are there any events usind DWARF callchains?
51  *
52  * I.e.
53  *
54  * -e cycles/call-graph=dwarf/
55  */
56 bool dwarf_callchain_users;
57 
58 struct callchain_param callchain_param_default = {
59 	CALLCHAIN_PARAM_DEFAULT
60 };
61 
62 /* Used for thread-local struct callchain_cursor. */
63 static pthread_key_t callchain_cursor;
64 
65 int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
66 {
67 	return parse_callchain_record(arg, param);
68 }
69 
70 static int parse_callchain_mode(const char *value)
71 {
72 	if (!strncmp(value, "graph", strlen(value))) {
73 		callchain_param.mode = CHAIN_GRAPH_ABS;
74 		return 0;
75 	}
76 	if (!strncmp(value, "flat", strlen(value))) {
77 		callchain_param.mode = CHAIN_FLAT;
78 		return 0;
79 	}
80 	if (!strncmp(value, "fractal", strlen(value))) {
81 		callchain_param.mode = CHAIN_GRAPH_REL;
82 		return 0;
83 	}
84 	if (!strncmp(value, "folded", strlen(value))) {
85 		callchain_param.mode = CHAIN_FOLDED;
86 		return 0;
87 	}
88 	return -1;
89 }
90 
91 static int parse_callchain_order(const char *value)
92 {
93 	if (!strncmp(value, "caller", strlen(value))) {
94 		callchain_param.order = ORDER_CALLER;
95 		callchain_param.order_set = true;
96 		return 0;
97 	}
98 	if (!strncmp(value, "callee", strlen(value))) {
99 		callchain_param.order = ORDER_CALLEE;
100 		callchain_param.order_set = true;
101 		return 0;
102 	}
103 	return -1;
104 }
105 
106 static int parse_callchain_sort_key(const char *value)
107 {
108 	if (!strncmp(value, "function", strlen(value))) {
109 		callchain_param.key = CCKEY_FUNCTION;
110 		return 0;
111 	}
112 	if (!strncmp(value, "address", strlen(value))) {
113 		callchain_param.key = CCKEY_ADDRESS;
114 		return 0;
115 	}
116 	if (!strncmp(value, "srcline", strlen(value))) {
117 		callchain_param.key = CCKEY_SRCLINE;
118 		return 0;
119 	}
120 	if (!strncmp(value, "branch", strlen(value))) {
121 		callchain_param.branch_callstack = 1;
122 		return 0;
123 	}
124 	return -1;
125 }
126 
127 static int parse_callchain_value(const char *value)
128 {
129 	if (!strncmp(value, "percent", strlen(value))) {
130 		callchain_param.value = CCVAL_PERCENT;
131 		return 0;
132 	}
133 	if (!strncmp(value, "period", strlen(value))) {
134 		callchain_param.value = CCVAL_PERIOD;
135 		return 0;
136 	}
137 	if (!strncmp(value, "count", strlen(value))) {
138 		callchain_param.value = CCVAL_COUNT;
139 		return 0;
140 	}
141 	return -1;
142 }
143 
144 static int get_stack_size(const char *str, unsigned long *_size)
145 {
146 	char *endptr;
147 	unsigned long size;
148 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
149 
150 	size = strtoul(str, &endptr, 0);
151 
152 	do {
153 		if (*endptr)
154 			break;
155 
156 		size = round_up(size, sizeof(u64));
157 		if (!size || size > max_size)
158 			break;
159 
160 		*_size = size;
161 		return 0;
162 
163 	} while (0);
164 
165 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
166 	       max_size, str);
167 	return -1;
168 }
169 
170 static int
171 __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
172 {
173 	char *tok;
174 	char *endptr, *saveptr = NULL;
175 	bool minpcnt_set = false;
176 	bool record_opt_set = false;
177 	bool try_stack_size = false;
178 
179 	callchain_param.enabled = true;
180 	symbol_conf.use_callchain = true;
181 
182 	if (!arg)
183 		return 0;
184 
185 	while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
186 		if (!strncmp(tok, "none", strlen(tok))) {
187 			callchain_param.mode = CHAIN_NONE;
188 			callchain_param.enabled = false;
189 			symbol_conf.use_callchain = false;
190 			return 0;
191 		}
192 
193 		if (!parse_callchain_mode(tok) ||
194 		    !parse_callchain_order(tok) ||
195 		    !parse_callchain_sort_key(tok) ||
196 		    !parse_callchain_value(tok)) {
197 			/* parsing ok - move on to the next */
198 			try_stack_size = false;
199 			goto next;
200 		} else if (allow_record_opt && !record_opt_set) {
201 			if (parse_callchain_record(tok, &callchain_param))
202 				goto try_numbers;
203 
204 			/* assume that number followed by 'dwarf' is stack size */
205 			if (callchain_param.record_mode == CALLCHAIN_DWARF)
206 				try_stack_size = true;
207 
208 			record_opt_set = true;
209 			goto next;
210 		}
211 
212 try_numbers:
213 		if (try_stack_size) {
214 			unsigned long size = 0;
215 
216 			if (get_stack_size(tok, &size) < 0)
217 				return -1;
218 			callchain_param.dump_size = size;
219 			try_stack_size = false;
220 		} else if (!minpcnt_set) {
221 			/* try to get the min percent */
222 			callchain_param.min_percent = strtod(tok, &endptr);
223 			if (tok == endptr)
224 				return -1;
225 			minpcnt_set = true;
226 		} else {
227 			/* try print limit at last */
228 			callchain_param.print_limit = strtoul(tok, &endptr, 0);
229 			if (tok == endptr)
230 				return -1;
231 		}
232 next:
233 		arg = NULL;
234 	}
235 
236 	if (callchain_register_param(&callchain_param) < 0) {
237 		pr_err("Can't register callchain params\n");
238 		return -1;
239 	}
240 	return 0;
241 }
242 
243 int parse_callchain_report_opt(const char *arg)
244 {
245 	return __parse_callchain_report_opt(arg, false);
246 }
247 
248 int parse_callchain_top_opt(const char *arg)
249 {
250 	return __parse_callchain_report_opt(arg, true);
251 }
252 
253 int parse_callchain_record(const char *arg, struct callchain_param *param)
254 {
255 	char *tok, *name, *saveptr = NULL;
256 	char *buf;
257 	int ret = -1;
258 
259 	/* We need buffer that we know we can write to. */
260 	buf = malloc(strlen(arg) + 1);
261 	if (!buf)
262 		return -ENOMEM;
263 
264 	strcpy(buf, arg);
265 
266 	tok = strtok_r((char *)buf, ",", &saveptr);
267 	name = tok ? : (char *)buf;
268 
269 	do {
270 		/* Framepointer style */
271 		if (!strncmp(name, "fp", sizeof("fp"))) {
272 			ret = 0;
273 			param->record_mode = CALLCHAIN_FP;
274 
275 			tok = strtok_r(NULL, ",", &saveptr);
276 			if (tok) {
277 				unsigned long size;
278 
279 				if (!strncmp(tok, "defer", sizeof("defer"))) {
280 					param->defer = true;
281 				} else {
282 					size = strtoul(tok, &name, 0);
283 					if (size < (unsigned) sysctl__max_stack())
284 						param->max_stack = size;
285 				}
286 			}
287 			break;
288 
289 		/* Dwarf style */
290 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
291 			const unsigned long default_stack_dump_size = 8192;
292 
293 			ret = 0;
294 			param->record_mode = CALLCHAIN_DWARF;
295 			param->dump_size = default_stack_dump_size;
296 			dwarf_callchain_users = true;
297 
298 			tok = strtok_r(NULL, ",", &saveptr);
299 			if (tok) {
300 				unsigned long size = 0;
301 
302 				ret = get_stack_size(tok, &size);
303 				param->dump_size = size;
304 			}
305 		} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
306 			if (!strtok_r(NULL, ",", &saveptr)) {
307 				param->record_mode = CALLCHAIN_LBR;
308 				ret = 0;
309 			} else
310 				pr_err("callchain: No more arguments "
311 					"needed for --call-graph lbr\n");
312 			break;
313 		} else {
314 			pr_err("callchain: Unknown --call-graph option "
315 			       "value: %s\n", arg);
316 			break;
317 		}
318 
319 	} while (0);
320 
321 	free(buf);
322 
323 	if (param->defer && param->record_mode != CALLCHAIN_FP) {
324 		pr_err("callchain: deferred callchain only works with FP\n");
325 		return -EINVAL;
326 	}
327 
328 	return ret;
329 }
330 
331 int perf_callchain_config(const char *var, const char *value)
332 {
333 	char *endptr;
334 
335 	if (!strstarts(var, "call-graph."))
336 		return 0;
337 	var += sizeof("call-graph.") - 1;
338 
339 	if (!strcmp(var, "record-mode"))
340 		return parse_callchain_record_opt(value, &callchain_param);
341 	if (!strcmp(var, "dump-size")) {
342 		unsigned long size = 0;
343 		int ret;
344 
345 		ret = get_stack_size(value, &size);
346 		callchain_param.dump_size = size;
347 
348 		return ret;
349 	}
350 	if (!strcmp(var, "print-type")){
351 		int ret;
352 		ret = parse_callchain_mode(value);
353 		if (ret == -1)
354 			pr_err("Invalid callchain mode: %s\n", value);
355 		return ret;
356 	}
357 	if (!strcmp(var, "order")){
358 		int ret;
359 		ret = parse_callchain_order(value);
360 		if (ret == -1)
361 			pr_err("Invalid callchain order: %s\n", value);
362 		return ret;
363 	}
364 	if (!strcmp(var, "sort-key")){
365 		int ret;
366 		ret = parse_callchain_sort_key(value);
367 		if (ret == -1)
368 			pr_err("Invalid callchain sort key: %s\n", value);
369 		return ret;
370 	}
371 	if (!strcmp(var, "threshold")) {
372 		callchain_param.min_percent = strtod(value, &endptr);
373 		if (value == endptr) {
374 			pr_err("Invalid callchain threshold: %s\n", value);
375 			return -1;
376 		}
377 	}
378 	if (!strcmp(var, "print-limit")) {
379 		callchain_param.print_limit = strtod(value, &endptr);
380 		if (value == endptr) {
381 			pr_err("Invalid callchain print limit: %s\n", value);
382 			return -1;
383 		}
384 	}
385 
386 	return 0;
387 }
388 
389 static void
390 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
391 		    enum chain_mode mode)
392 {
393 	struct rb_node **p = &root->rb_node;
394 	struct rb_node *parent = NULL;
395 	struct callchain_node *rnode;
396 	u64 chain_cumul = callchain_cumul_hits(chain);
397 
398 	while (*p) {
399 		u64 rnode_cumul;
400 
401 		parent = *p;
402 		rnode = rb_entry(parent, struct callchain_node, rb_node);
403 		rnode_cumul = callchain_cumul_hits(rnode);
404 
405 		switch (mode) {
406 		case CHAIN_FLAT:
407 		case CHAIN_FOLDED:
408 			if (rnode->hit < chain->hit)
409 				p = &(*p)->rb_left;
410 			else
411 				p = &(*p)->rb_right;
412 			break;
413 		case CHAIN_GRAPH_ABS: /* Falldown */
414 		case CHAIN_GRAPH_REL:
415 			if (rnode_cumul < chain_cumul)
416 				p = &(*p)->rb_left;
417 			else
418 				p = &(*p)->rb_right;
419 			break;
420 		case CHAIN_NONE:
421 		default:
422 			break;
423 		}
424 	}
425 
426 	rb_link_node(&chain->rb_node, parent, p);
427 	rb_insert_color(&chain->rb_node, root);
428 }
429 
430 static void
431 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
432 		  u64 min_hit)
433 {
434 	struct rb_node *n;
435 	struct callchain_node *child;
436 
437 	n = rb_first(&node->rb_root_in);
438 	while (n) {
439 		child = rb_entry(n, struct callchain_node, rb_node_in);
440 		n = rb_next(n);
441 
442 		__sort_chain_flat(rb_root, child, min_hit);
443 	}
444 
445 	if (node->hit && node->hit >= min_hit)
446 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
447 }
448 
449 /*
450  * Once we get every callchains from the stream, we can now
451  * sort them by hit
452  */
453 static void
454 sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
455 		u64 min_hit, struct callchain_param *param __maybe_unused)
456 {
457 	*rb_root = RB_ROOT;
458 	__sort_chain_flat(rb_root, &root->node, min_hit);
459 }
460 
461 static void __sort_chain_graph_abs(struct callchain_node *node,
462 				   u64 min_hit)
463 {
464 	struct rb_node *n;
465 	struct callchain_node *child;
466 
467 	node->rb_root = RB_ROOT;
468 	n = rb_first(&node->rb_root_in);
469 
470 	while (n) {
471 		child = rb_entry(n, struct callchain_node, rb_node_in);
472 		n = rb_next(n);
473 
474 		__sort_chain_graph_abs(child, min_hit);
475 		if (callchain_cumul_hits(child) >= min_hit)
476 			rb_insert_callchain(&node->rb_root, child,
477 					    CHAIN_GRAPH_ABS);
478 	}
479 }
480 
481 static void
482 sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
483 		     u64 min_hit, struct callchain_param *param __maybe_unused)
484 {
485 	__sort_chain_graph_abs(&chain_root->node, min_hit);
486 	rb_root->rb_node = chain_root->node.rb_root.rb_node;
487 }
488 
489 static void __sort_chain_graph_rel(struct callchain_node *node,
490 				   double min_percent)
491 {
492 	struct rb_node *n;
493 	struct callchain_node *child;
494 	u64 min_hit;
495 
496 	node->rb_root = RB_ROOT;
497 	min_hit = ceil(node->children_hit * min_percent);
498 
499 	n = rb_first(&node->rb_root_in);
500 	while (n) {
501 		child = rb_entry(n, struct callchain_node, rb_node_in);
502 		n = rb_next(n);
503 
504 		__sort_chain_graph_rel(child, min_percent);
505 		if (callchain_cumul_hits(child) >= min_hit)
506 			rb_insert_callchain(&node->rb_root, child,
507 					    CHAIN_GRAPH_REL);
508 	}
509 }
510 
511 static void
512 sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
513 		     u64 min_hit __maybe_unused, struct callchain_param *param)
514 {
515 	__sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
516 	rb_root->rb_node = chain_root->node.rb_root.rb_node;
517 }
518 
519 int callchain_register_param(struct callchain_param *param)
520 {
521 	switch (param->mode) {
522 	case CHAIN_GRAPH_ABS:
523 		param->sort = sort_chain_graph_abs;
524 		break;
525 	case CHAIN_GRAPH_REL:
526 		param->sort = sort_chain_graph_rel;
527 		break;
528 	case CHAIN_FLAT:
529 	case CHAIN_FOLDED:
530 		param->sort = sort_chain_flat;
531 		break;
532 	case CHAIN_NONE:
533 	default:
534 		return -1;
535 	}
536 	return 0;
537 }
538 
539 /*
540  * Create a child for a parent. If inherit_children, then the new child
541  * will become the new parent of it's parent children
542  */
543 static struct callchain_node *
544 create_child(struct callchain_node *parent, bool inherit_children)
545 {
546 	struct callchain_node *new;
547 
548 	new = zalloc(sizeof(*new));
549 	if (!new) {
550 		perror("not enough memory to create child for code path tree");
551 		return NULL;
552 	}
553 	new->parent = parent;
554 	INIT_LIST_HEAD(&new->val);
555 	INIT_LIST_HEAD(&new->parent_val);
556 
557 	if (inherit_children) {
558 		struct rb_node *n;
559 		struct callchain_node *child;
560 
561 		new->rb_root_in = parent->rb_root_in;
562 		parent->rb_root_in = RB_ROOT;
563 
564 		n = rb_first(&new->rb_root_in);
565 		while (n) {
566 			child = rb_entry(n, struct callchain_node, rb_node_in);
567 			child->parent = new;
568 			n = rb_next(n);
569 		}
570 
571 		/* make it the first child */
572 		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
573 		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
574 	}
575 
576 	return new;
577 }
578 
579 
580 /*
581  * Fill the node with callchain values
582  */
583 static int
584 fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
585 {
586 	struct callchain_cursor_node *cursor_node;
587 
588 	node->val_nr = cursor->nr - cursor->pos;
589 	if (!node->val_nr)
590 		pr_warning("Warning: empty node in callchain tree\n");
591 
592 	cursor_node = callchain_cursor_current(cursor);
593 
594 	while (cursor_node) {
595 		struct callchain_list *call;
596 
597 		call = zalloc(sizeof(*call));
598 		if (!call) {
599 			perror("not enough memory for the code path tree");
600 			return -ENOMEM;
601 		}
602 		call->ip = cursor_node->ip;
603 		map_symbol__copy(&call->ms, &cursor_node->ms);
604 		call->srcline = cursor_node->srcline;
605 
606 		if (cursor_node->branch) {
607 			call->branch_count = 1;
608 
609 			if (cursor_node->branch_from) {
610 				/*
611 				 * branch_from is set with value somewhere else
612 				 * to imply it's "to" of a branch.
613 				 */
614 				if (!call->brtype_stat) {
615 					call->brtype_stat = zalloc(sizeof(*call->brtype_stat));
616 					if (!call->brtype_stat) {
617 						perror("not enough memory for the code path branch statistics");
618 						zfree(&call->brtype_stat);
619 						return -ENOMEM;
620 					}
621 				}
622 				call->brtype_stat->branch_to = true;
623 
624 				if (cursor_node->branch_flags.predicted)
625 					call->predicted_count = 1;
626 
627 				if (cursor_node->branch_flags.abort)
628 					call->abort_count = 1;
629 
630 				branch_type_count(call->brtype_stat,
631 						  &cursor_node->branch_flags,
632 						  cursor_node->branch_from,
633 						  cursor_node->ip);
634 			} else {
635 				/*
636 				 * It's "from" of a branch
637 				 */
638 				if (call->brtype_stat && call->brtype_stat->branch_to)
639 					call->brtype_stat->branch_to = false;
640 				call->cycles_count =
641 					cursor_node->branch_flags.cycles;
642 				call->iter_count = cursor_node->nr_loop_iter;
643 				call->iter_cycles = cursor_node->iter_cycles;
644 			}
645 		}
646 
647 		list_add_tail(&call->list, &node->val);
648 
649 		callchain_cursor_advance(cursor);
650 		cursor_node = callchain_cursor_current(cursor);
651 	}
652 	return 0;
653 }
654 
655 static struct callchain_node *
656 add_child(struct callchain_node *parent,
657 	  struct callchain_cursor *cursor,
658 	  u64 period)
659 {
660 	struct callchain_node *new;
661 
662 	new = create_child(parent, false);
663 	if (new == NULL)
664 		return NULL;
665 
666 	if (fill_node(new, cursor) < 0) {
667 		struct callchain_list *call, *tmp;
668 
669 		list_for_each_entry_safe(call, tmp, &new->val, list) {
670 			list_del_init(&call->list);
671 			map_symbol__exit(&call->ms);
672 			zfree(&call->brtype_stat);
673 			free(call);
674 		}
675 		free(new);
676 		return NULL;
677 	}
678 
679 	new->children_hit = 0;
680 	new->hit = period;
681 	new->children_count = 0;
682 	new->count = 1;
683 	return new;
684 }
685 
686 enum match_result {
687 	MATCH_ERROR  = -1,
688 	MATCH_EQ,
689 	MATCH_LT,
690 	MATCH_GT,
691 };
692 
693 static enum match_result match_chain_strings(const char *left,
694 					     const char *right)
695 {
696 	enum match_result ret = MATCH_EQ;
697 	int cmp;
698 
699 	if (left && right)
700 		cmp = strcmp(left, right);
701 	else if (!left && right)
702 		cmp = 1;
703 	else if (left && !right)
704 		cmp = -1;
705 	else
706 		return MATCH_ERROR;
707 
708 	if (cmp != 0)
709 		ret = cmp < 0 ? MATCH_LT : MATCH_GT;
710 
711 	return ret;
712 }
713 
714 /*
715  * We need to always use relative addresses because we're aggregating
716  * callchains from multiple threads, i.e. different address spaces, so
717  * comparing absolute addresses make no sense as a symbol in a DSO may end up
718  * in a different address when used in a different binary or even the same
719  * binary but with some sort of address randomization technique, thus we need
720  * to compare just relative addresses. -acme
721  */
722 static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
723 						   struct map *right_map, u64 right_ip)
724 {
725 	struct dso *left_dso = left_map ? map__dso(left_map) : NULL;
726 	struct dso *right_dso = right_map ? map__dso(right_map) : NULL;
727 
728 	if (left_dso != right_dso)
729 		return left_dso < right_dso ? MATCH_LT : MATCH_GT;
730 
731 	if (left_ip != right_ip)
732  		return left_ip < right_ip ? MATCH_LT : MATCH_GT;
733 
734 	return MATCH_EQ;
735 }
736 
737 static enum match_result match_chain(struct callchain_cursor_node *node,
738 				     struct callchain_list *cnode)
739 {
740 	enum match_result match = MATCH_ERROR;
741 
742 	switch (callchain_param.key) {
743 	case CCKEY_SRCLINE:
744 		match = match_chain_strings(cnode->srcline, node->srcline);
745 		if (match != MATCH_ERROR)
746 			break;
747 		/* otherwise fall-back to symbol-based comparison below */
748 		fallthrough;
749 	case CCKEY_FUNCTION:
750 		if (node->ms.sym && cnode->ms.sym) {
751 			/*
752 			 * Compare inlined frames based on their symbol name
753 			 * because different inlined frames will have the same
754 			 * symbol start. Otherwise do a faster comparison based
755 			 * on the symbol start address.
756 			 */
757 			if (cnode->ms.sym->inlined || node->ms.sym->inlined) {
758 				match = match_chain_strings(cnode->ms.sym->name,
759 							    node->ms.sym->name);
760 				if (match != MATCH_ERROR)
761 					break;
762 			} else {
763 				match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
764 								  node->ms.map, node->ms.sym->start);
765 				break;
766 			}
767 		}
768 		/* otherwise fall-back to IP-based comparison below */
769 		fallthrough;
770 	case CCKEY_ADDRESS:
771 	default:
772 		match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->ms.map, node->ip);
773 		break;
774 	}
775 
776 	if (match == MATCH_EQ && node->branch) {
777 		cnode->branch_count++;
778 
779 		if (node->branch_from) {
780 			/*
781 			 * It's "to" of a branch
782 			 */
783 			if (!cnode->brtype_stat) {
784 				cnode->brtype_stat = zalloc(sizeof(*cnode->brtype_stat));
785 				if (!cnode->brtype_stat) {
786 					perror("not enough memory for the code path branch statistics");
787 					return MATCH_ERROR;
788 				}
789 			}
790 			cnode->brtype_stat->branch_to = true;
791 
792 			if (node->branch_flags.predicted)
793 				cnode->predicted_count++;
794 
795 			if (node->branch_flags.abort)
796 				cnode->abort_count++;
797 
798 			branch_type_count(cnode->brtype_stat,
799 					  &node->branch_flags,
800 					  node->branch_from,
801 					  node->ip);
802 		} else {
803 			/*
804 			 * It's "from" of a branch
805 			 */
806 			if (cnode->brtype_stat && cnode->brtype_stat->branch_to)
807 				cnode->brtype_stat->branch_to = false;
808 			cnode->cycles_count += node->branch_flags.cycles;
809 			cnode->iter_count += node->nr_loop_iter;
810 			cnode->iter_cycles += node->iter_cycles;
811 			cnode->from_count++;
812 		}
813 	}
814 
815 	return match;
816 }
817 
818 /*
819  * Split the parent in two parts (a new child is created) and
820  * give a part of its callchain to the created child.
821  * Then create another child to host the given callchain of new branch
822  */
823 static int
824 split_add_child(struct callchain_node *parent,
825 		struct callchain_cursor *cursor,
826 		struct callchain_list *to_split,
827 		u64 idx_parents, u64 idx_local, u64 period)
828 {
829 	struct callchain_node *new;
830 	struct list_head *old_tail;
831 	unsigned int idx_total = idx_parents + idx_local;
832 
833 	/* split */
834 	new = create_child(parent, true);
835 	if (new == NULL)
836 		return -1;
837 
838 	/* split the callchain and move a part to the new child */
839 	old_tail = parent->val.prev;
840 	list_del_range(&to_split->list, old_tail);
841 	new->val.next = &to_split->list;
842 	new->val.prev = old_tail;
843 	to_split->list.prev = &new->val;
844 	old_tail->next = &new->val;
845 
846 	/* split the hits */
847 	new->hit = parent->hit;
848 	new->children_hit = parent->children_hit;
849 	parent->children_hit = callchain_cumul_hits(new);
850 	new->val_nr = parent->val_nr - idx_local;
851 	parent->val_nr = idx_local;
852 	new->count = parent->count;
853 	new->children_count = parent->children_count;
854 	parent->children_count = callchain_cumul_counts(new);
855 
856 	/* create a new child for the new branch if any */
857 	if (idx_total < cursor->nr) {
858 		struct callchain_node *first;
859 		struct callchain_list *cnode;
860 		struct callchain_cursor_node *node;
861 		struct rb_node *p, **pp;
862 
863 		parent->hit = 0;
864 		parent->children_hit += period;
865 		parent->count = 0;
866 		parent->children_count += 1;
867 
868 		node = callchain_cursor_current(cursor);
869 		new = add_child(parent, cursor, period);
870 		if (new == NULL)
871 			return -1;
872 
873 		/*
874 		 * This is second child since we moved parent's children
875 		 * to new (first) child above.
876 		 */
877 		p = parent->rb_root_in.rb_node;
878 		first = rb_entry(p, struct callchain_node, rb_node_in);
879 		cnode = list_first_entry(&first->val, struct callchain_list,
880 					 list);
881 
882 		if (match_chain(node, cnode) == MATCH_LT)
883 			pp = &p->rb_left;
884 		else
885 			pp = &p->rb_right;
886 
887 		rb_link_node(&new->rb_node_in, p, pp);
888 		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
889 	} else {
890 		parent->hit = period;
891 		parent->count = 1;
892 	}
893 	return 0;
894 }
895 
896 static enum match_result
897 append_chain(struct callchain_node *root,
898 	     struct callchain_cursor *cursor,
899 	     u64 period);
900 
901 static int
902 append_chain_children(struct callchain_node *root,
903 		      struct callchain_cursor *cursor,
904 		      u64 period)
905 {
906 	struct callchain_node *rnode;
907 	struct callchain_cursor_node *node;
908 	struct rb_node **p = &root->rb_root_in.rb_node;
909 	struct rb_node *parent = NULL;
910 
911 	node = callchain_cursor_current(cursor);
912 	if (!node)
913 		return -1;
914 
915 	/* lookup in children */
916 	while (*p) {
917 		enum match_result ret;
918 
919 		parent = *p;
920 		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
921 
922 		/* If at least first entry matches, rely to children */
923 		ret = append_chain(rnode, cursor, period);
924 		if (ret == MATCH_EQ)
925 			goto inc_children_hit;
926 		if (ret == MATCH_ERROR)
927 			return -1;
928 
929 		if (ret == MATCH_LT)
930 			p = &parent->rb_left;
931 		else
932 			p = &parent->rb_right;
933 	}
934 	/* nothing in children, add to the current node */
935 	rnode = add_child(root, cursor, period);
936 	if (rnode == NULL)
937 		return -1;
938 
939 	rb_link_node(&rnode->rb_node_in, parent, p);
940 	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
941 
942 inc_children_hit:
943 	root->children_hit += period;
944 	root->children_count++;
945 	return 0;
946 }
947 
948 static enum match_result
949 append_chain(struct callchain_node *root,
950 	     struct callchain_cursor *cursor,
951 	     u64 period)
952 {
953 	struct callchain_list *cnode;
954 	u64 start = cursor->pos;
955 	bool found = false;
956 	u64 matches;
957 	enum match_result cmp = MATCH_ERROR;
958 
959 	/*
960 	 * Lookup in the current node
961 	 * If we have a symbol, then compare the start to match
962 	 * anywhere inside a function, unless function
963 	 * mode is disabled.
964 	 */
965 	list_for_each_entry(cnode, &root->val, list) {
966 		struct callchain_cursor_node *node;
967 
968 		node = callchain_cursor_current(cursor);
969 		if (!node)
970 			break;
971 
972 		cmp = match_chain(node, cnode);
973 		if (cmp != MATCH_EQ)
974 			break;
975 
976 		found = true;
977 
978 		callchain_cursor_advance(cursor);
979 	}
980 
981 	/* matches not, relay no the parent */
982 	if (!found) {
983 		WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
984 		return cmp;
985 	}
986 
987 	matches = cursor->pos - start;
988 
989 	/* we match only a part of the node. Split it and add the new chain */
990 	if (matches < root->val_nr) {
991 		if (split_add_child(root, cursor, cnode, start, matches,
992 				    period) < 0)
993 			return MATCH_ERROR;
994 
995 		return MATCH_EQ;
996 	}
997 
998 	/* we match 100% of the path, increment the hit */
999 	if (matches == root->val_nr && cursor->pos == cursor->nr) {
1000 		root->hit += period;
1001 		root->count++;
1002 		return MATCH_EQ;
1003 	}
1004 
1005 	/* We match the node and still have a part remaining */
1006 	if (append_chain_children(root, cursor, period) < 0)
1007 		return MATCH_ERROR;
1008 
1009 	return MATCH_EQ;
1010 }
1011 
1012 int callchain_append(struct callchain_root *root,
1013 		     struct callchain_cursor *cursor,
1014 		     u64 period)
1015 {
1016 	if (cursor == NULL)
1017 		return -1;
1018 
1019 	if (!cursor->nr)
1020 		return 0;
1021 
1022 	callchain_cursor_commit(cursor);
1023 
1024 	if (append_chain_children(&root->node, cursor, period) < 0)
1025 		return -1;
1026 
1027 	if (cursor->nr > root->max_depth)
1028 		root->max_depth = cursor->nr;
1029 
1030 	return 0;
1031 }
1032 
1033 static int
1034 merge_chain_branch(struct callchain_cursor *cursor,
1035 		   struct callchain_node *dst, struct callchain_node *src)
1036 {
1037 	struct callchain_cursor_node **old_last = cursor->last;
1038 	struct callchain_node *child;
1039 	struct callchain_list *list, *next_list;
1040 	struct rb_node *n;
1041 	int old_pos = cursor->nr;
1042 	int err = 0;
1043 
1044 	list_for_each_entry_safe(list, next_list, &src->val, list) {
1045 		struct map_symbol ms = {
1046 			.thread = thread__get(list->ms.thread),
1047 			.map = map__get(list->ms.map),
1048 		};
1049 		callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline);
1050 		list_del_init(&list->list);
1051 		map_symbol__exit(&ms);
1052 		map_symbol__exit(&list->ms);
1053 		zfree(&list->brtype_stat);
1054 		free(list);
1055 	}
1056 
1057 	if (src->hit) {
1058 		callchain_cursor_commit(cursor);
1059 		if (append_chain_children(dst, cursor, src->hit) < 0)
1060 			return -1;
1061 	}
1062 
1063 	n = rb_first(&src->rb_root_in);
1064 	while (n) {
1065 		child = container_of(n, struct callchain_node, rb_node_in);
1066 		n = rb_next(n);
1067 		rb_erase(&child->rb_node_in, &src->rb_root_in);
1068 
1069 		err = merge_chain_branch(cursor, dst, child);
1070 		if (err)
1071 			break;
1072 
1073 		free(child);
1074 	}
1075 
1076 	cursor->nr = old_pos;
1077 	cursor->last = old_last;
1078 
1079 	return err;
1080 }
1081 
1082 int callchain_merge(struct callchain_cursor *cursor,
1083 		    struct callchain_root *dst, struct callchain_root *src)
1084 {
1085 	return merge_chain_branch(cursor, &dst->node, &src->node);
1086 }
1087 
1088 int callchain_cursor_append(struct callchain_cursor *cursor,
1089 			    u64 ip, struct map_symbol *ms,
1090 			    bool branch, struct branch_flags *flags,
1091 			    int nr_loop_iter, u64 iter_cycles, u64 branch_from,
1092 			    const char *srcline)
1093 {
1094 	struct callchain_cursor_node *node = *cursor->last;
1095 
1096 	if (!node) {
1097 		node = calloc(1, sizeof(*node));
1098 		if (!node)
1099 			return -ENOMEM;
1100 
1101 		*cursor->last = node;
1102 	}
1103 
1104 	node->ip = ip;
1105 	map_symbol__exit(&node->ms);
1106 	map_symbol__copy(&node->ms, ms);
1107 	node->branch = branch;
1108 	node->nr_loop_iter = nr_loop_iter;
1109 	node->iter_cycles = iter_cycles;
1110 	node->srcline = srcline;
1111 
1112 	if (flags)
1113 		memcpy(&node->branch_flags, flags,
1114 			sizeof(struct branch_flags));
1115 
1116 	node->branch_from = branch_from;
1117 	cursor->nr++;
1118 
1119 	cursor->last = &node->next;
1120 
1121 	return 0;
1122 }
1123 
1124 int sample__resolve_callchain(struct perf_sample *sample,
1125 			      struct callchain_cursor *cursor, struct symbol **parent,
1126 			      struct evsel *evsel, struct addr_location *al,
1127 			      int max_stack)
1128 {
1129 	if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
1130 		return 0;
1131 
1132 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
1133 	    perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
1134 		return thread__resolve_callchain(al->thread, cursor, evsel, sample,
1135 						 parent, al, max_stack);
1136 	}
1137 	return 0;
1138 }
1139 
1140 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
1141 {
1142 	if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
1143 		!symbol_conf.show_branchflag_count)
1144 		return 0;
1145 	return callchain_append(he->callchain, get_tls_callchain_cursor(), sample->period);
1146 }
1147 
1148 int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
1149 			bool hide_unresolved)
1150 {
1151 	struct machine *machine = NULL;
1152 
1153 	if (node->ms.thread)
1154 		machine = maps__machine(thread__maps(node->ms.thread));
1155 
1156 	map__put(al->map);
1157 	al->map = map__get(node->ms.map);
1158 	al->sym = node->ms.sym;
1159 	al->srcline = node->srcline;
1160 	al->addr = node->ip;
1161 
1162 	if (al->sym == NULL) {
1163 		if (hide_unresolved)
1164 			return 0;
1165 		if (al->map == NULL)
1166 			goto out;
1167 	}
1168 	if (maps__equal(thread__maps(al->thread), machine__kernel_maps(machine))) {
1169 		if (machine__is_host(machine)) {
1170 			al->cpumode = PERF_RECORD_MISC_KERNEL;
1171 			al->level = 'k';
1172 		} else {
1173 			al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1174 			al->level = 'g';
1175 		}
1176 	} else {
1177 		if (machine__is_host(machine)) {
1178 			al->cpumode = PERF_RECORD_MISC_USER;
1179 			al->level = '.';
1180 		} else if (perf_guest) {
1181 			al->cpumode = PERF_RECORD_MISC_GUEST_USER;
1182 			al->level = 'u';
1183 		} else {
1184 			al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
1185 			al->level = 'H';
1186 		}
1187 	}
1188 
1189 out:
1190 	return 1;
1191 }
1192 
1193 char *callchain_list__sym_name(struct callchain_list *cl,
1194 			       char *bf, size_t bfsize, bool show_dso)
1195 {
1196 	bool show_addr = callchain_param.key == CCKEY_ADDRESS;
1197 	bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE;
1198 	int printed;
1199 
1200 	if (cl->ms.sym) {
1201 		const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
1202 
1203 		if (show_srcline && cl->srcline)
1204 			printed = scnprintf(bf, bfsize, "%s %s%s",
1205 					    cl->ms.sym->name, cl->srcline,
1206 					    inlined);
1207 		else
1208 			printed = scnprintf(bf, bfsize, "%s%s",
1209 					    cl->ms.sym->name, inlined);
1210 	} else
1211 		printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
1212 
1213 	if (show_dso)
1214 		scnprintf(bf + printed, bfsize - printed, " %s",
1215 			  cl->ms.map ?
1216 			  dso__short_name(map__dso(cl->ms.map)) :
1217 			  "unknown");
1218 
1219 	return bf;
1220 }
1221 
1222 char *callchain_node__scnprintf_value(struct callchain_node *node,
1223 				      char *bf, size_t bfsize, u64 total)
1224 {
1225 	double percent = 0.0;
1226 	u64 period = callchain_cumul_hits(node);
1227 	unsigned count = callchain_cumul_counts(node);
1228 
1229 	if (callchain_param.mode == CHAIN_FOLDED) {
1230 		period = node->hit;
1231 		count = node->count;
1232 	}
1233 
1234 	switch (callchain_param.value) {
1235 	case CCVAL_PERIOD:
1236 		scnprintf(bf, bfsize, "%"PRIu64, period);
1237 		break;
1238 	case CCVAL_COUNT:
1239 		scnprintf(bf, bfsize, "%u", count);
1240 		break;
1241 	case CCVAL_PERCENT:
1242 	default:
1243 		if (total)
1244 			percent = period * 100.0 / total;
1245 		scnprintf(bf, bfsize, "%.2f%%", percent);
1246 		break;
1247 	}
1248 	return bf;
1249 }
1250 
1251 int callchain_node__fprintf_value(struct callchain_node *node,
1252 				 FILE *fp, u64 total)
1253 {
1254 	double percent = 0.0;
1255 	u64 period = callchain_cumul_hits(node);
1256 	unsigned count = callchain_cumul_counts(node);
1257 
1258 	if (callchain_param.mode == CHAIN_FOLDED) {
1259 		period = node->hit;
1260 		count = node->count;
1261 	}
1262 
1263 	switch (callchain_param.value) {
1264 	case CCVAL_PERIOD:
1265 		return fprintf(fp, "%"PRIu64, period);
1266 	case CCVAL_COUNT:
1267 		return fprintf(fp, "%u", count);
1268 	case CCVAL_PERCENT:
1269 	default:
1270 		if (total)
1271 			percent = period * 100.0 / total;
1272 		return percent_color_fprintf(fp, "%.2f%%", percent);
1273 	}
1274 	return 0;
1275 }
1276 
1277 static void callchain_counts_value(struct callchain_node *node,
1278 				   u64 *branch_count, u64 *predicted_count,
1279 				   u64 *abort_count, u64 *cycles_count)
1280 {
1281 	struct callchain_list *clist;
1282 
1283 	list_for_each_entry(clist, &node->val, list) {
1284 		if (branch_count)
1285 			*branch_count += clist->branch_count;
1286 
1287 		if (predicted_count)
1288 			*predicted_count += clist->predicted_count;
1289 
1290 		if (abort_count)
1291 			*abort_count += clist->abort_count;
1292 
1293 		if (cycles_count)
1294 			*cycles_count += clist->cycles_count;
1295 	}
1296 }
1297 
1298 static int callchain_node_branch_counts_cumul(struct callchain_node *node,
1299 					      u64 *branch_count,
1300 					      u64 *predicted_count,
1301 					      u64 *abort_count,
1302 					      u64 *cycles_count)
1303 {
1304 	struct callchain_node *child;
1305 	struct rb_node *n;
1306 
1307 	n = rb_first(&node->rb_root_in);
1308 	while (n) {
1309 		child = rb_entry(n, struct callchain_node, rb_node_in);
1310 		n = rb_next(n);
1311 
1312 		callchain_node_branch_counts_cumul(child, branch_count,
1313 						   predicted_count,
1314 						   abort_count,
1315 						   cycles_count);
1316 
1317 		callchain_counts_value(child, branch_count,
1318 				       predicted_count, abort_count,
1319 				       cycles_count);
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 int callchain_branch_counts(struct callchain_root *root,
1326 			    u64 *branch_count, u64 *predicted_count,
1327 			    u64 *abort_count, u64 *cycles_count)
1328 {
1329 	if (branch_count)
1330 		*branch_count = 0;
1331 
1332 	if (predicted_count)
1333 		*predicted_count = 0;
1334 
1335 	if (abort_count)
1336 		*abort_count = 0;
1337 
1338 	if (cycles_count)
1339 		*cycles_count = 0;
1340 
1341 	return callchain_node_branch_counts_cumul(&root->node,
1342 						  branch_count,
1343 						  predicted_count,
1344 						  abort_count,
1345 						  cycles_count);
1346 }
1347 
1348 static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
1349 {
1350 	return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
1351 }
1352 
1353 static int count_float_printf(int idx, const char *str, float value,
1354 			      char *bf, int bfsize, float threshold)
1355 {
1356 	if (threshold != 0.0 && value < threshold)
1357 		return 0;
1358 
1359 	return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
1360 }
1361 
1362 static int branch_to_str(char *bf, int bfsize,
1363 			 u64 branch_count, u64 predicted_count,
1364 			 u64 abort_count,
1365 			 const struct branch_type_stat *brtype_stat)
1366 {
1367 	int printed, i = 0;
1368 
1369 	printed = branch_type_str(brtype_stat, bf, bfsize);
1370 	if (printed)
1371 		i++;
1372 
1373 	if (predicted_count < branch_count) {
1374 		printed += count_float_printf(i++, "predicted",
1375 				predicted_count * 100.0 / branch_count,
1376 				bf + printed, bfsize - printed, 0.0);
1377 	}
1378 
1379 	if (abort_count) {
1380 		printed += count_float_printf(i++, "abort",
1381 				abort_count * 100.0 / branch_count,
1382 				bf + printed, bfsize - printed, 0.1);
1383 	}
1384 
1385 	if (i)
1386 		printed += scnprintf(bf + printed, bfsize - printed, ")");
1387 
1388 	return printed;
1389 }
1390 
1391 static int branch_from_str(char *bf, int bfsize,
1392 			   u64 branch_count,
1393 			   u64 cycles_count, u64 iter_count,
1394 			   u64 iter_cycles, u64 from_count)
1395 {
1396 	int printed = 0, i = 0;
1397 	u64 cycles, v = 0;
1398 
1399 	cycles = cycles_count / branch_count;
1400 	if (cycles) {
1401 		printed += count_pri64_printf(i++, "cycles",
1402 				cycles,
1403 				bf + printed, bfsize - printed);
1404 	}
1405 
1406 	if (iter_count && from_count) {
1407 		v = iter_count / from_count;
1408 		if (v) {
1409 			printed += count_pri64_printf(i++, "iter",
1410 					v, bf + printed, bfsize - printed);
1411 
1412 			printed += count_pri64_printf(i++, "avg_cycles",
1413 					iter_cycles / iter_count,
1414 					bf + printed, bfsize - printed);
1415 		}
1416 	}
1417 
1418 	if (i)
1419 		printed += scnprintf(bf + printed, bfsize - printed, ")");
1420 
1421 	return printed;
1422 }
1423 
1424 static int counts_str_build(char *bf, int bfsize,
1425 			     u64 branch_count, u64 predicted_count,
1426 			     u64 abort_count, u64 cycles_count,
1427 			     u64 iter_count, u64 iter_cycles,
1428 			     u64 from_count,
1429 			     const struct branch_type_stat *brtype_stat)
1430 {
1431 	int printed;
1432 
1433 	if (branch_count == 0)
1434 		return scnprintf(bf, bfsize, " (calltrace)");
1435 
1436 	if (brtype_stat->branch_to) {
1437 		printed = branch_to_str(bf, bfsize, branch_count,
1438 				predicted_count, abort_count, brtype_stat);
1439 	} else {
1440 		printed = branch_from_str(bf, bfsize, branch_count,
1441 				cycles_count, iter_count, iter_cycles,
1442 				from_count);
1443 	}
1444 
1445 	if (!printed)
1446 		bf[0] = 0;
1447 
1448 	return printed;
1449 }
1450 
1451 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
1452 				   u64 branch_count, u64 predicted_count,
1453 				   u64 abort_count, u64 cycles_count,
1454 				   u64 iter_count, u64 iter_cycles,
1455 				   u64 from_count,
1456 				   const struct branch_type_stat *brtype_stat)
1457 {
1458 	char str[256];
1459 
1460 	counts_str_build(str, sizeof(str), branch_count,
1461 			 predicted_count, abort_count, cycles_count,
1462 			 iter_count, iter_cycles, from_count, brtype_stat);
1463 
1464 	if (fp)
1465 		return fprintf(fp, "%s", str);
1466 
1467 	return scnprintf(bf, bfsize, "%s", str);
1468 }
1469 
1470 int callchain_list_counts__printf_value(struct callchain_list *clist,
1471 					FILE *fp, char *bf, int bfsize)
1472 {
1473 	static const struct branch_type_stat empty_brtype_stat = {};
1474 	const struct branch_type_stat *brtype_stat;
1475 	u64 branch_count, predicted_count;
1476 	u64 abort_count, cycles_count;
1477 	u64 iter_count, iter_cycles;
1478 	u64 from_count;
1479 
1480 	brtype_stat = clist->brtype_stat ?: &empty_brtype_stat;
1481 	branch_count = clist->branch_count;
1482 	predicted_count = clist->predicted_count;
1483 	abort_count = clist->abort_count;
1484 	cycles_count = clist->cycles_count;
1485 	iter_count = clist->iter_count;
1486 	iter_cycles = clist->iter_cycles;
1487 	from_count = clist->from_count;
1488 
1489 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
1490 				       predicted_count, abort_count,
1491 				       cycles_count, iter_count, iter_cycles,
1492 				       from_count, brtype_stat);
1493 }
1494 
1495 static void free_callchain_node(struct callchain_node *node)
1496 {
1497 	struct callchain_list *list, *tmp;
1498 	struct callchain_node *child;
1499 	struct rb_node *n;
1500 
1501 	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
1502 		list_del_init(&list->list);
1503 		map_symbol__exit(&list->ms);
1504 		zfree(&list->brtype_stat);
1505 		free(list);
1506 	}
1507 
1508 	list_for_each_entry_safe(list, tmp, &node->val, list) {
1509 		list_del_init(&list->list);
1510 		map_symbol__exit(&list->ms);
1511 		zfree(&list->brtype_stat);
1512 		free(list);
1513 	}
1514 
1515 	n = rb_first(&node->rb_root_in);
1516 	while (n) {
1517 		child = container_of(n, struct callchain_node, rb_node_in);
1518 		n = rb_next(n);
1519 		rb_erase(&child->rb_node_in, &node->rb_root_in);
1520 
1521 		free_callchain_node(child);
1522 		free(child);
1523 	}
1524 }
1525 
1526 void free_callchain(struct callchain_root *root)
1527 {
1528 	if (!symbol_conf.use_callchain)
1529 		return;
1530 
1531 	free_callchain_node(&root->node);
1532 }
1533 
1534 static u64 decay_callchain_node(struct callchain_node *node)
1535 {
1536 	struct callchain_node *child;
1537 	struct rb_node *n;
1538 	u64 child_hits = 0;
1539 
1540 	n = rb_first(&node->rb_root_in);
1541 	while (n) {
1542 		child = container_of(n, struct callchain_node, rb_node_in);
1543 
1544 		child_hits += decay_callchain_node(child);
1545 		n = rb_next(n);
1546 	}
1547 
1548 	node->hit = (node->hit * 7) / 8;
1549 	node->children_hit = child_hits;
1550 
1551 	return node->hit;
1552 }
1553 
1554 void decay_callchain(struct callchain_root *root)
1555 {
1556 	if (!symbol_conf.use_callchain)
1557 		return;
1558 
1559 	decay_callchain_node(&root->node);
1560 }
1561 
1562 int callchain_node__make_parent_list(struct callchain_node *node)
1563 {
1564 	struct callchain_node *parent = node->parent;
1565 	struct callchain_list *chain, *new;
1566 	LIST_HEAD(head);
1567 
1568 	while (parent) {
1569 		list_for_each_entry_reverse(chain, &parent->val, list) {
1570 			new = malloc(sizeof(*new));
1571 			if (new == NULL)
1572 				goto out;
1573 			*new = *chain;
1574 			new->has_children = false;
1575 			map_symbol__copy(&new->ms, &chain->ms);
1576 			list_add_tail(&new->list, &head);
1577 		}
1578 		parent = parent->parent;
1579 	}
1580 
1581 	list_for_each_entry_safe_reverse(chain, new, &head, list)
1582 		list_move_tail(&chain->list, &node->parent_val);
1583 
1584 	if (!list_empty(&node->parent_val)) {
1585 		chain = list_first_entry(&node->parent_val, struct callchain_list, list);
1586 		chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
1587 
1588 		chain = list_first_entry(&node->val, struct callchain_list, list);
1589 		chain->has_children = false;
1590 	}
1591 	return 0;
1592 
1593 out:
1594 	list_for_each_entry_safe(chain, new, &head, list) {
1595 		list_del_init(&chain->list);
1596 		map_symbol__exit(&chain->ms);
1597 		zfree(&chain->brtype_stat);
1598 		free(chain);
1599 	}
1600 	return -ENOMEM;
1601 }
1602 
1603 static void callchain_cursor__delete(void *vcursor)
1604 {
1605 	struct callchain_cursor *cursor = vcursor;
1606 	struct callchain_cursor_node *node, *next;
1607 
1608 	callchain_cursor_reset(cursor);
1609 	for (node = cursor->first; node != NULL; node = next) {
1610 		next = node->next;
1611 		free(node);
1612 	}
1613 	free(cursor);
1614 }
1615 
1616 static void init_callchain_cursor_key(void)
1617 {
1618 	if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) {
1619 		pr_err("callchain cursor creation failed");
1620 		abort();
1621 	}
1622 }
1623 
1624 struct callchain_cursor *get_tls_callchain_cursor(void)
1625 {
1626 	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
1627 	struct callchain_cursor *cursor;
1628 
1629 	pthread_once(&once_control, init_callchain_cursor_key);
1630 	cursor = pthread_getspecific(callchain_cursor);
1631 	if (!cursor) {
1632 		cursor = zalloc(sizeof(*cursor));
1633 		if (!cursor)
1634 			pr_debug3("%s: not enough memory\n", __func__);
1635 		pthread_setspecific(callchain_cursor, cursor);
1636 	}
1637 	return cursor;
1638 }
1639 
1640 int callchain_cursor__copy(struct callchain_cursor *dst,
1641 			   struct callchain_cursor *src)
1642 {
1643 	int rc = 0;
1644 
1645 	callchain_cursor_reset(dst);
1646 	callchain_cursor_commit(src);
1647 
1648 	while (true) {
1649 		struct callchain_cursor_node *node;
1650 
1651 		node = callchain_cursor_current(src);
1652 		if (node == NULL)
1653 			break;
1654 
1655 		rc = callchain_cursor_append(dst, node->ip, &node->ms,
1656 					     node->branch, &node->branch_flags,
1657 					     node->nr_loop_iter,
1658 					     node->iter_cycles,
1659 					     node->branch_from, node->srcline);
1660 		if (rc)
1661 			break;
1662 
1663 		callchain_cursor_advance(src);
1664 	}
1665 
1666 	return rc;
1667 }
1668 
1669 /*
1670  * Initialize a cursor before adding entries inside, but keep
1671  * the previously allocated entries as a cache.
1672  */
1673 void callchain_cursor_reset(struct callchain_cursor *cursor)
1674 {
1675 	struct callchain_cursor_node *node;
1676 
1677 	cursor->nr = 0;
1678 	cursor->last = &cursor->first;
1679 
1680 	for (node = cursor->first; node != NULL; node = node->next)
1681 		map_symbol__exit(&node->ms);
1682 }
1683 
1684 void callchain_param_setup(u64 sample_type, uint16_t e_machine)
1685 {
1686 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
1687 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
1688 		    (sample_type & PERF_SAMPLE_STACK_USER)) {
1689 			callchain_param.record_mode = CALLCHAIN_DWARF;
1690 			dwarf_callchain_users = true;
1691 		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
1692 			callchain_param.record_mode = CALLCHAIN_LBR;
1693 		else
1694 			callchain_param.record_mode = CALLCHAIN_FP;
1695 	}
1696 
1697 	/*
1698 	 * It's necessary to use libunwind to reliably determine the caller of
1699 	 * a leaf function on aarch64, as otherwise we cannot know whether to
1700 	 * start from the LR or FP.
1701 	 *
1702 	 * Always starting from the LR can result in duplicate or entirely
1703 	 * erroneous entries. Always skipping the LR and starting from the FP
1704 	 * can result in missing entries.
1705 	 */
1706 	if (callchain_param.record_mode == CALLCHAIN_FP && e_machine == EM_AARCH64)
1707 		dwarf_callchain_users = true;
1708 }
1709 
1710 static bool chain_match(struct callchain_list *base_chain,
1711 			struct callchain_list *pair_chain)
1712 {
1713 	enum match_result match;
1714 
1715 	match = match_chain_strings(base_chain->srcline,
1716 				    pair_chain->srcline);
1717 	if (match != MATCH_ERROR)
1718 		return match == MATCH_EQ;
1719 
1720 	match = match_chain_dso_addresses(base_chain->ms.map,
1721 					  base_chain->ip,
1722 					  pair_chain->ms.map,
1723 					  pair_chain->ip);
1724 
1725 	return match == MATCH_EQ;
1726 }
1727 
1728 bool callchain_cnode_matched(struct callchain_node *base_cnode,
1729 			     struct callchain_node *pair_cnode)
1730 {
1731 	struct callchain_list *base_chain, *pair_chain;
1732 	bool match = false;
1733 
1734 	pair_chain = list_first_entry(&pair_cnode->val,
1735 				      struct callchain_list,
1736 				      list);
1737 
1738 	list_for_each_entry(base_chain, &base_cnode->val, list) {
1739 		if (&pair_chain->list == &pair_cnode->val)
1740 			return false;
1741 
1742 		if (!base_chain->srcline || !pair_chain->srcline) {
1743 			pair_chain = list_next_entry(pair_chain, list);
1744 			continue;
1745 		}
1746 
1747 		match = chain_match(base_chain, pair_chain);
1748 		if (!match)
1749 			return false;
1750 
1751 		pair_chain = list_next_entry(pair_chain, list);
1752 	}
1753 
1754 	/*
1755 	 * Say chain1 is ABC, chain2 is ABCD, we consider they are
1756 	 * not fully matched.
1757 	 */
1758 	if (pair_chain && (&pair_chain->list != &pair_cnode->val))
1759 		return false;
1760 
1761 	return match;
1762 }
1763 
1764 static u64 count_callchain_hits(struct hist_entry *he)
1765 {
1766 	struct rb_root *root = &he->sorted_chain;
1767 	struct rb_node *rb_node = rb_first(root);
1768 	struct callchain_node *node;
1769 	u64 chain_hits = 0;
1770 
1771 	while (rb_node) {
1772 		node = rb_entry(rb_node, struct callchain_node, rb_node);
1773 		chain_hits += node->hit;
1774 		rb_node = rb_next(rb_node);
1775 	}
1776 
1777 	return chain_hits;
1778 }
1779 
1780 u64 callchain_total_hits(struct hists *hists)
1781 {
1782 	struct rb_node *next = rb_first_cached(&hists->entries);
1783 	u64 chain_hits = 0;
1784 
1785 	while (next) {
1786 		struct hist_entry *he = rb_entry(next, struct hist_entry,
1787 						 rb_node);
1788 
1789 		chain_hits += count_callchain_hits(he);
1790 		next = rb_next(&he->rb_node);
1791 	}
1792 
1793 	return chain_hits;
1794 }
1795 
1796 s64 callchain_avg_cycles(struct callchain_node *cnode)
1797 {
1798 	struct callchain_list *chain;
1799 	s64 cycles = 0;
1800 
1801 	list_for_each_entry(chain, &cnode->val, list) {
1802 		if (chain->srcline && chain->branch_count)
1803 			cycles += chain->cycles_count / chain->branch_count;
1804 	}
1805 
1806 	return cycles;
1807 }
1808 
1809 int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
1810 				    struct perf_sample *sample, int max_stack,
1811 				    bool symbols, callchain_iter_fn cb, void *data)
1812 {
1813 	struct callchain_cursor *cursor = get_tls_callchain_cursor();
1814 	int ret;
1815 
1816 	if (!cursor)
1817 		return -ENOMEM;
1818 
1819 	/* Fill in the callchain. */
1820 	ret = __thread__resolve_callchain(thread, cursor, evsel, sample,
1821 					  /*parent=*/NULL, /*root_al=*/NULL,
1822 					  max_stack, symbols);
1823 	if (ret)
1824 		return ret;
1825 
1826 	/* Switch from writing the callchain to reading it. */
1827 	callchain_cursor_commit(cursor);
1828 
1829 	while (1) {
1830 		struct callchain_cursor_node *node = callchain_cursor_current(cursor);
1831 
1832 		if (!node)
1833 			break;
1834 
1835 		ret = cb(node, data);
1836 		if (ret)
1837 			return ret;
1838 
1839 		callchain_cursor_advance(cursor);
1840 	}
1841 	return 0;
1842 }
1843 
1844 /*
1845  * This function merges earlier samples (@sample_orig) waiting for deferred
1846  * user callchains with the matching callchain record (@sample_callchain)
1847  * which is delivered now.  The @sample_orig->callchain should be released
1848  * after use if ->deferred_callchain is set.
1849  */
1850 int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
1851 				     struct perf_sample *sample_callchain)
1852 {
1853 	u64 nr_orig = sample_orig->callchain->nr - 1;
1854 	u64 nr_deferred = sample_callchain->callchain->nr;
1855 	struct ip_callchain *callchain;
1856 
1857 	if (sample_orig->callchain->nr < 2) {
1858 		sample_orig->deferred_callchain = false;
1859 		return -EINVAL;
1860 	}
1861 
1862 	callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
1863 	if (callchain == NULL) {
1864 		sample_orig->deferred_callchain = false;
1865 		return -ENOMEM;
1866 	}
1867 
1868 	callchain->nr = nr_orig + nr_deferred;
1869 	/* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
1870 	memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
1871 	/* copy deferred user callchains */
1872 	memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
1873 	       nr_deferred * sizeof(u64));
1874 
1875 	sample_orig->callchain = callchain;
1876 	return 0;
1877 }
1878