xref: /linux/tools/perf/util/callchain.c (revision ad5ceacd48e9ea36bd12e778071561290adb0154)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
4  *
5  * Handle the callchains from the stream in an ad-hoc radix tree and then
6  * sort them in an rbtree.
7  *
8  * Using a radix for code path provides a fast retrieval and factorizes
9  * memory use. Also that lets us use the paths in a hierarchical graph view.
10  *
11  */
12 
13 #include <inttypes.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <stdbool.h>
17 #include <errno.h>
18 #include <math.h>
19 #include <linux/string.h>
20 #include <linux/zalloc.h>
21 
22 #include "asm/bug.h"
23 
24 #include "debug.h"
25 #include "dso.h"
26 #include "event.h"
27 #include "hist.h"
28 #include "sort.h"
29 #include "machine.h"
30 #include "map.h"
31 #include "callchain.h"
32 #include "branch.h"
33 #include "record.h"
34 #include "symbol.h"
35 #include "thread.h"
36 #include "util.h"
37 #include "../perf.h"
38 
39 #define CALLCHAIN_PARAM_DEFAULT			\
40 	.mode		= CHAIN_GRAPH_ABS,	\
41 	.min_percent	= 0.5,			\
42 	.order		= ORDER_CALLEE,		\
43 	.key		= CCKEY_FUNCTION,	\
44 	.value		= CCVAL_PERCENT,	\
45 
46 struct callchain_param callchain_param = {
47 	CALLCHAIN_PARAM_DEFAULT
48 };
49 
50 /*
51  * Are there any events usind DWARF callchains?
52  *
53  * I.e.
54  *
55  * -e cycles/call-graph=dwarf/
56  */
57 bool dwarf_callchain_users;
58 
59 struct callchain_param callchain_param_default = {
60 	CALLCHAIN_PARAM_DEFAULT
61 };
62 
63 /* Used for thread-local struct callchain_cursor. */
64 static pthread_key_t callchain_cursor;
65 
66 int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
67 {
68 	return parse_callchain_record(arg, param);
69 }
70 
71 static int parse_callchain_mode(const char *value)
72 {
73 	if (!strncmp(value, "graph", strlen(value))) {
74 		callchain_param.mode = CHAIN_GRAPH_ABS;
75 		return 0;
76 	}
77 	if (!strncmp(value, "flat", strlen(value))) {
78 		callchain_param.mode = CHAIN_FLAT;
79 		return 0;
80 	}
81 	if (!strncmp(value, "fractal", strlen(value))) {
82 		callchain_param.mode = CHAIN_GRAPH_REL;
83 		return 0;
84 	}
85 	if (!strncmp(value, "folded", strlen(value))) {
86 		callchain_param.mode = CHAIN_FOLDED;
87 		return 0;
88 	}
89 	return -1;
90 }
91 
92 static int parse_callchain_order(const char *value)
93 {
94 	if (!strncmp(value, "caller", strlen(value))) {
95 		callchain_param.order = ORDER_CALLER;
96 		callchain_param.order_set = true;
97 		return 0;
98 	}
99 	if (!strncmp(value, "callee", strlen(value))) {
100 		callchain_param.order = ORDER_CALLEE;
101 		callchain_param.order_set = true;
102 		return 0;
103 	}
104 	return -1;
105 }
106 
107 static int parse_callchain_sort_key(const char *value)
108 {
109 	if (!strncmp(value, "function", strlen(value))) {
110 		callchain_param.key = CCKEY_FUNCTION;
111 		return 0;
112 	}
113 	if (!strncmp(value, "address", strlen(value))) {
114 		callchain_param.key = CCKEY_ADDRESS;
115 		return 0;
116 	}
117 	if (!strncmp(value, "srcline", strlen(value))) {
118 		callchain_param.key = CCKEY_SRCLINE;
119 		return 0;
120 	}
121 	if (!strncmp(value, "branch", strlen(value))) {
122 		callchain_param.branch_callstack = 1;
123 		return 0;
124 	}
125 	return -1;
126 }
127 
128 static int parse_callchain_value(const char *value)
129 {
130 	if (!strncmp(value, "percent", strlen(value))) {
131 		callchain_param.value = CCVAL_PERCENT;
132 		return 0;
133 	}
134 	if (!strncmp(value, "period", strlen(value))) {
135 		callchain_param.value = CCVAL_PERIOD;
136 		return 0;
137 	}
138 	if (!strncmp(value, "count", strlen(value))) {
139 		callchain_param.value = CCVAL_COUNT;
140 		return 0;
141 	}
142 	return -1;
143 }
144 
145 static int get_stack_size(const char *str, unsigned long *_size)
146 {
147 	char *endptr;
148 	unsigned long size;
149 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
150 
151 	size = strtoul(str, &endptr, 0);
152 
153 	do {
154 		if (*endptr)
155 			break;
156 
157 		size = round_up(size, sizeof(u64));
158 		if (!size || size > max_size)
159 			break;
160 
161 		*_size = size;
162 		return 0;
163 
164 	} while (0);
165 
166 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
167 	       max_size, str);
168 	return -1;
169 }
170 
171 static int
172 __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
173 {
174 	char *tok, *arg_copy;
175 	char *endptr, *saveptr = NULL;
176 	bool minpcnt_set = false;
177 	bool record_opt_set = false;
178 	bool try_stack_size = false;
179 
180 	callchain_param.enabled = true;
181 	symbol_conf.use_callchain = true;
182 
183 	if (!arg)
184 		return 0;
185 
186 	arg_copy = strdup(arg);
187 	if (!arg_copy)
188 		return -ENOMEM;
189 
190 	tok = strtok_r(arg_copy, ",", &saveptr);
191 	while (tok) {
192 		if (!strncmp(tok, "none", strlen(tok))) {
193 			callchain_param.mode = CHAIN_NONE;
194 			callchain_param.enabled = false;
195 			symbol_conf.use_callchain = false;
196 			goto out;
197 		}
198 
199 		if (!parse_callchain_mode(tok) ||
200 		    !parse_callchain_order(tok) ||
201 		    !parse_callchain_sort_key(tok) ||
202 		    !parse_callchain_value(tok)) {
203 			/* parsing ok - move on to the next */
204 			try_stack_size = false;
205 			goto next;
206 		} else if (allow_record_opt && !record_opt_set) {
207 			if (parse_callchain_record(tok, &callchain_param))
208 				goto try_numbers;
209 
210 			/* assume that number followed by 'dwarf' is stack size */
211 			if (callchain_param.record_mode == CALLCHAIN_DWARF)
212 				try_stack_size = true;
213 
214 			record_opt_set = true;
215 			goto next;
216 		}
217 
218 try_numbers:
219 		if (try_stack_size) {
220 			unsigned long size = 0;
221 
222 			if (get_stack_size(tok, &size) < 0)
223 				goto err_out;
224 			callchain_param.dump_size = size;
225 			try_stack_size = false;
226 		} else if (!minpcnt_set) {
227 			/* try to get the min percent */
228 			callchain_param.min_percent = strtod(tok, &endptr);
229 			if (tok == endptr)
230 				goto err_out;
231 			minpcnt_set = true;
232 		} else {
233 			/* try print limit at last */
234 			callchain_param.print_limit = strtoul(tok, &endptr, 0);
235 			if (tok == endptr)
236 				goto err_out;
237 		}
238 next:
239 		tok = strtok_r(NULL, ",", &saveptr);
240 	}
241 
242 	if (callchain_register_param(&callchain_param) < 0) {
243 		pr_err("Can't register callchain params\n");
244 		goto err_out;
245 	}
246 out:
247 	free(arg_copy);
248 	return 0;
249 err_out:
250 	free(arg_copy);
251 	return -1;
252 }
253 
254 int parse_callchain_report_opt(const char *arg)
255 {
256 	return __parse_callchain_report_opt(arg, false);
257 }
258 
259 int parse_callchain_top_opt(const char *arg)
260 {
261 	return __parse_callchain_report_opt(arg, true);
262 }
263 
264 int parse_callchain_record(const char *arg, struct callchain_param *param)
265 {
266 	char *tok, *name, *saveptr = NULL;
267 	char *buf;
268 	int ret = -1;
269 
270 	/* We need buffer that we know we can write to. */
271 	buf = strdup(arg);
272 	if (!buf)
273 		return -ENOMEM;
274 
275 	tok = strtok_r(buf, ",", &saveptr);
276 	name = tok ? : buf;
277 
278 	do {
279 		/* Framepointer style */
280 		if (!strncmp(name, "fp", sizeof("fp"))) {
281 			ret = 0;
282 			param->record_mode = CALLCHAIN_FP;
283 
284 			tok = strtok_r(NULL, ",", &saveptr);
285 			if (tok) {
286 				unsigned long size;
287 
288 				if (!strncmp(tok, "defer", sizeof("defer"))) {
289 					param->defer = true;
290 				} else {
291 					size = strtoul(tok, &name, 0);
292 					if (size < (unsigned) sysctl__max_stack())
293 						param->max_stack = size;
294 				}
295 			}
296 			break;
297 
298 		/* Dwarf style */
299 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
300 			const unsigned long default_stack_dump_size = 8192;
301 
302 			ret = 0;
303 			param->record_mode = CALLCHAIN_DWARF;
304 			param->dump_size = default_stack_dump_size;
305 			dwarf_callchain_users = true;
306 
307 			tok = strtok_r(NULL, ",", &saveptr);
308 			if (tok) {
309 				unsigned long size = 0;
310 
311 				ret = get_stack_size(tok, &size);
312 				param->dump_size = size;
313 			}
314 		} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
315 			if (!strtok_r(NULL, ",", &saveptr)) {
316 				param->record_mode = CALLCHAIN_LBR;
317 				ret = 0;
318 			} else
319 				pr_err("callchain: No more arguments "
320 					"needed for --call-graph lbr\n");
321 			break;
322 		} else {
323 			pr_err("callchain: Unknown --call-graph option "
324 			       "value: %s\n", arg);
325 			break;
326 		}
327 
328 	} while (0);
329 
330 	free(buf);
331 
332 	if (param->defer && param->record_mode != CALLCHAIN_FP) {
333 		pr_err("callchain: deferred callchain only works with FP\n");
334 		return -EINVAL;
335 	}
336 
337 	return ret;
338 }
339 
340 static void callchain_debug(const struct callchain_param *callchain)
341 {
342 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
343 
344 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
345 
346 	if (callchain->record_mode == CALLCHAIN_DWARF)
347 		pr_debug("callchain: stack dump size %d\n",
348 			 callchain->dump_size);
349 }
350 
351 int record_opts__parse_callchain(struct record_opts *record,
352 				 struct callchain_param *callchain,
353 				 const char *arg, bool unset)
354 {
355 	int ret;
356 
357 	callchain->enabled = !unset;
358 
359 	/* --no-call-graph */
360 	if (unset) {
361 		callchain->record_mode = CALLCHAIN_NONE;
362 		pr_debug("callchain: disabled\n");
363 		return 0;
364 	}
365 
366 	ret = parse_callchain_record_opt(arg, callchain);
367 	if (!ret) {
368 		/* Enable data address sampling for DWARF unwind. */
369 		if (callchain->record_mode == CALLCHAIN_DWARF &&
370 		    !record->record_data_mmap_set)
371 			record->record_data_mmap = true;
372 		callchain_debug(callchain);
373 	}
374 
375 	return ret;
376 }
377 
378 int perf_callchain_config(const char *var, const char *value)
379 {
380 	char *endptr;
381 
382 	if (!strstarts(var, "call-graph."))
383 		return 0;
384 	var += sizeof("call-graph.") - 1;
385 
386 	if (!strcmp(var, "record-mode"))
387 		return parse_callchain_record_opt(value, &callchain_param);
388 	if (!strcmp(var, "dump-size")) {
389 		unsigned long size = 0;
390 		int ret;
391 
392 		ret = get_stack_size(value, &size);
393 		callchain_param.dump_size = size;
394 
395 		return ret;
396 	}
397 	if (!strcmp(var, "print-type")){
398 		int ret;
399 		ret = parse_callchain_mode(value);
400 		if (ret == -1)
401 			pr_err("Invalid callchain mode: %s\n", value);
402 		return ret;
403 	}
404 	if (!strcmp(var, "order")){
405 		int ret;
406 		ret = parse_callchain_order(value);
407 		if (ret == -1)
408 			pr_err("Invalid callchain order: %s\n", value);
409 		return ret;
410 	}
411 	if (!strcmp(var, "sort-key")){
412 		int ret;
413 		ret = parse_callchain_sort_key(value);
414 		if (ret == -1)
415 			pr_err("Invalid callchain sort key: %s\n", value);
416 		return ret;
417 	}
418 	if (!strcmp(var, "threshold")) {
419 		callchain_param.min_percent = strtod(value, &endptr);
420 		if (value == endptr) {
421 			pr_err("Invalid callchain threshold: %s\n", value);
422 			return -1;
423 		}
424 	}
425 	if (!strcmp(var, "print-limit")) {
426 		callchain_param.print_limit = strtod(value, &endptr);
427 		if (value == endptr) {
428 			pr_err("Invalid callchain print limit: %s\n", value);
429 			return -1;
430 		}
431 	}
432 
433 	return 0;
434 }
435 
436 static void
437 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
438 		    enum chain_mode mode)
439 {
440 	struct rb_node **p = &root->rb_node;
441 	struct rb_node *parent = NULL;
442 	struct callchain_node *rnode;
443 	u64 chain_cumul = callchain_cumul_hits(chain);
444 
445 	while (*p) {
446 		u64 rnode_cumul;
447 
448 		parent = *p;
449 		rnode = rb_entry(parent, struct callchain_node, rb_node);
450 		rnode_cumul = callchain_cumul_hits(rnode);
451 
452 		switch (mode) {
453 		case CHAIN_FLAT:
454 		case CHAIN_FOLDED:
455 			if (rnode->hit < chain->hit)
456 				p = &(*p)->rb_left;
457 			else
458 				p = &(*p)->rb_right;
459 			break;
460 		case CHAIN_GRAPH_ABS: /* Falldown */
461 		case CHAIN_GRAPH_REL:
462 			if (rnode_cumul < chain_cumul)
463 				p = &(*p)->rb_left;
464 			else
465 				p = &(*p)->rb_right;
466 			break;
467 		case CHAIN_NONE:
468 		default:
469 			break;
470 		}
471 	}
472 
473 	rb_link_node(&chain->rb_node, parent, p);
474 	rb_insert_color(&chain->rb_node, root);
475 }
476 
477 static void
478 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
479 		  u64 min_hit)
480 {
481 	struct rb_node *n;
482 	struct callchain_node *child;
483 
484 	n = rb_first(&node->rb_root_in);
485 	while (n) {
486 		child = rb_entry(n, struct callchain_node, rb_node_in);
487 		n = rb_next(n);
488 
489 		__sort_chain_flat(rb_root, child, min_hit);
490 	}
491 
492 	if (node->hit && node->hit >= min_hit)
493 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
494 }
495 
496 /*
497  * Once we get every callchains from the stream, we can now
498  * sort them by hit
499  */
500 static void
501 sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
502 		u64 min_hit, struct callchain_param *param __maybe_unused)
503 {
504 	*rb_root = RB_ROOT;
505 	__sort_chain_flat(rb_root, &root->node, min_hit);
506 }
507 
508 static void __sort_chain_graph_abs(struct callchain_node *node,
509 				   u64 min_hit)
510 {
511 	struct rb_node *n;
512 	struct callchain_node *child;
513 
514 	node->rb_root = RB_ROOT;
515 	n = rb_first(&node->rb_root_in);
516 
517 	while (n) {
518 		child = rb_entry(n, struct callchain_node, rb_node_in);
519 		n = rb_next(n);
520 
521 		__sort_chain_graph_abs(child, min_hit);
522 		if (callchain_cumul_hits(child) >= min_hit)
523 			rb_insert_callchain(&node->rb_root, child,
524 					    CHAIN_GRAPH_ABS);
525 	}
526 }
527 
528 static void
529 sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
530 		     u64 min_hit, struct callchain_param *param __maybe_unused)
531 {
532 	__sort_chain_graph_abs(&chain_root->node, min_hit);
533 	rb_root->rb_node = chain_root->node.rb_root.rb_node;
534 }
535 
536 static void __sort_chain_graph_rel(struct callchain_node *node,
537 				   double min_percent)
538 {
539 	struct rb_node *n;
540 	struct callchain_node *child;
541 	u64 min_hit;
542 
543 	node->rb_root = RB_ROOT;
544 	min_hit = ceil(node->children_hit * min_percent);
545 
546 	n = rb_first(&node->rb_root_in);
547 	while (n) {
548 		child = rb_entry(n, struct callchain_node, rb_node_in);
549 		n = rb_next(n);
550 
551 		__sort_chain_graph_rel(child, min_percent);
552 		if (callchain_cumul_hits(child) >= min_hit)
553 			rb_insert_callchain(&node->rb_root, child,
554 					    CHAIN_GRAPH_REL);
555 	}
556 }
557 
558 static void
559 sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
560 		     u64 min_hit __maybe_unused, struct callchain_param *param)
561 {
562 	__sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
563 	rb_root->rb_node = chain_root->node.rb_root.rb_node;
564 }
565 
566 int callchain_register_param(struct callchain_param *param)
567 {
568 	switch (param->mode) {
569 	case CHAIN_GRAPH_ABS:
570 		param->sort = sort_chain_graph_abs;
571 		break;
572 	case CHAIN_GRAPH_REL:
573 		param->sort = sort_chain_graph_rel;
574 		break;
575 	case CHAIN_FLAT:
576 	case CHAIN_FOLDED:
577 		param->sort = sort_chain_flat;
578 		break;
579 	case CHAIN_NONE:
580 	default:
581 		return -1;
582 	}
583 	return 0;
584 }
585 
586 /*
587  * Create a child for a parent. If inherit_children, then the new child
588  * will become the new parent of it's parent children
589  */
590 static struct callchain_node *
591 create_child(struct callchain_node *parent, bool inherit_children)
592 {
593 	struct callchain_node *new;
594 
595 	new = zalloc(sizeof(*new));
596 	if (!new) {
597 		perror("not enough memory to create child for code path tree");
598 		return NULL;
599 	}
600 	new->parent = parent;
601 	INIT_LIST_HEAD(&new->val);
602 	INIT_LIST_HEAD(&new->parent_val);
603 
604 	if (inherit_children) {
605 		struct rb_node *n;
606 		struct callchain_node *child;
607 
608 		new->rb_root_in = parent->rb_root_in;
609 		parent->rb_root_in = RB_ROOT;
610 
611 		n = rb_first(&new->rb_root_in);
612 		while (n) {
613 			child = rb_entry(n, struct callchain_node, rb_node_in);
614 			child->parent = new;
615 			n = rb_next(n);
616 		}
617 
618 		/* make it the first child */
619 		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
620 		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
621 	}
622 
623 	return new;
624 }
625 
626 
627 /*
628  * Fill the node with callchain values
629  */
630 static int
631 fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
632 {
633 	struct callchain_cursor_node *cursor_node;
634 
635 	node->val_nr = cursor->nr - cursor->pos;
636 	if (!node->val_nr)
637 		pr_warning("Warning: empty node in callchain tree\n");
638 
639 	cursor_node = callchain_cursor_current(cursor);
640 
641 	while (cursor_node) {
642 		struct callchain_list *call;
643 
644 		call = zalloc(sizeof(*call));
645 		if (!call) {
646 			perror("not enough memory for the code path tree");
647 			return -ENOMEM;
648 		}
649 		call->ip = cursor_node->ip;
650 		map_symbol__copy(&call->ms, &cursor_node->ms);
651 		call->srcline = cursor_node->srcline;
652 
653 		if (cursor_node->branch) {
654 			call->branch_count = 1;
655 
656 			if (cursor_node->branch_from) {
657 				/*
658 				 * branch_from is set with value somewhere else
659 				 * to imply it's "to" of a branch.
660 				 */
661 				if (!call->brtype_stat) {
662 					call->brtype_stat = zalloc(sizeof(*call->brtype_stat));
663 					if (!call->brtype_stat) {
664 						perror("not enough memory for the code path branch statistics");
665 						zfree(&call->brtype_stat);
666 						return -ENOMEM;
667 					}
668 				}
669 				call->brtype_stat->branch_to = true;
670 
671 				if (cursor_node->branch_flags.predicted)
672 					call->predicted_count = 1;
673 
674 				if (cursor_node->branch_flags.abort)
675 					call->abort_count = 1;
676 
677 				branch_type_count(call->brtype_stat,
678 						  &cursor_node->branch_flags,
679 						  cursor_node->branch_from,
680 						  cursor_node->ip);
681 			} else {
682 				/*
683 				 * It's "from" of a branch
684 				 */
685 				if (call->brtype_stat && call->brtype_stat->branch_to)
686 					call->brtype_stat->branch_to = false;
687 				call->cycles_count =
688 					cursor_node->branch_flags.cycles;
689 				call->iter_count = cursor_node->nr_loop_iter;
690 				call->iter_cycles = cursor_node->iter_cycles;
691 			}
692 		}
693 
694 		list_add_tail(&call->list, &node->val);
695 
696 		callchain_cursor_advance(cursor);
697 		cursor_node = callchain_cursor_current(cursor);
698 	}
699 	return 0;
700 }
701 
702 static struct callchain_node *
703 add_child(struct callchain_node *parent,
704 	  struct callchain_cursor *cursor,
705 	  u64 period)
706 {
707 	struct callchain_node *new;
708 
709 	new = create_child(parent, false);
710 	if (new == NULL)
711 		return NULL;
712 
713 	if (fill_node(new, cursor) < 0) {
714 		struct callchain_list *call, *tmp;
715 
716 		list_for_each_entry_safe(call, tmp, &new->val, list) {
717 			list_del_init(&call->list);
718 			map_symbol__exit(&call->ms);
719 			zfree(&call->brtype_stat);
720 			free(call);
721 		}
722 		free(new);
723 		return NULL;
724 	}
725 
726 	new->children_hit = 0;
727 	new->hit = period;
728 	new->children_count = 0;
729 	new->count = 1;
730 	return new;
731 }
732 
733 enum match_result {
734 	MATCH_ERROR  = -1,
735 	MATCH_EQ,
736 	MATCH_LT,
737 	MATCH_GT,
738 };
739 
740 static enum match_result match_chain_strings(const char *left,
741 					     const char *right)
742 {
743 	enum match_result ret = MATCH_EQ;
744 	int cmp;
745 
746 	if (left && right)
747 		cmp = strcmp(left, right);
748 	else if (!left && right)
749 		cmp = 1;
750 	else if (left && !right)
751 		cmp = -1;
752 	else
753 		return MATCH_ERROR;
754 
755 	if (cmp != 0)
756 		ret = cmp < 0 ? MATCH_LT : MATCH_GT;
757 
758 	return ret;
759 }
760 
761 /*
762  * We need to always use relative addresses because we're aggregating
763  * callchains from multiple threads, i.e. different address spaces, so
764  * comparing absolute addresses make no sense as a symbol in a DSO may end up
765  * in a different address when used in a different binary or even the same
766  * binary but with some sort of address randomization technique, thus we need
767  * to compare just relative addresses. -acme
768  */
769 static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
770 						   struct map *right_map, u64 right_ip)
771 {
772 	struct dso *left_dso = left_map ? map__dso(left_map) : NULL;
773 	struct dso *right_dso = right_map ? map__dso(right_map) : NULL;
774 
775 	if (left_dso != right_dso)
776 		return left_dso < right_dso ? MATCH_LT : MATCH_GT;
777 
778 	if (left_ip != right_ip)
779  		return left_ip < right_ip ? MATCH_LT : MATCH_GT;
780 
781 	return MATCH_EQ;
782 }
783 
784 static enum match_result match_chain(struct callchain_cursor_node *node,
785 				     struct callchain_list *cnode)
786 {
787 	enum match_result match = MATCH_ERROR;
788 
789 	switch (callchain_param.key) {
790 	case CCKEY_SRCLINE:
791 		match = match_chain_strings(cnode->srcline, node->srcline);
792 		if (match != MATCH_ERROR)
793 			break;
794 		/* otherwise fall-back to symbol-based comparison below */
795 		fallthrough;
796 	case CCKEY_FUNCTION:
797 		if (node->ms.sym && cnode->ms.sym) {
798 			/*
799 			 * Compare inlined frames based on their symbol name
800 			 * because different inlined frames will have the same
801 			 * symbol start. Otherwise do a faster comparison based
802 			 * on the symbol start address.
803 			 */
804 			if (cnode->ms.sym->inlined || node->ms.sym->inlined) {
805 				match = match_chain_strings(cnode->ms.sym->name,
806 							    node->ms.sym->name);
807 				if (match != MATCH_ERROR)
808 					break;
809 			} else {
810 				match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
811 								  node->ms.map, node->ms.sym->start);
812 				break;
813 			}
814 		}
815 		/* otherwise fall-back to IP-based comparison below */
816 		fallthrough;
817 	case CCKEY_ADDRESS:
818 	default:
819 		match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->ms.map, node->ip);
820 		break;
821 	}
822 
823 	if (match == MATCH_EQ && node->branch) {
824 		cnode->branch_count++;
825 
826 		if (node->branch_from) {
827 			/*
828 			 * It's "to" of a branch
829 			 */
830 			if (!cnode->brtype_stat) {
831 				cnode->brtype_stat = zalloc(sizeof(*cnode->brtype_stat));
832 				if (!cnode->brtype_stat) {
833 					perror("not enough memory for the code path branch statistics");
834 					return MATCH_ERROR;
835 				}
836 			}
837 			cnode->brtype_stat->branch_to = true;
838 
839 			if (node->branch_flags.predicted)
840 				cnode->predicted_count++;
841 
842 			if (node->branch_flags.abort)
843 				cnode->abort_count++;
844 
845 			branch_type_count(cnode->brtype_stat,
846 					  &node->branch_flags,
847 					  node->branch_from,
848 					  node->ip);
849 		} else {
850 			/*
851 			 * It's "from" of a branch
852 			 */
853 			if (cnode->brtype_stat && cnode->brtype_stat->branch_to)
854 				cnode->brtype_stat->branch_to = false;
855 			cnode->cycles_count += node->branch_flags.cycles;
856 			cnode->iter_count += node->nr_loop_iter;
857 			cnode->iter_cycles += node->iter_cycles;
858 			cnode->from_count++;
859 		}
860 	}
861 
862 	return match;
863 }
864 
865 /*
866  * Split the parent in two parts (a new child is created) and
867  * give a part of its callchain to the created child.
868  * Then create another child to host the given callchain of new branch
869  */
870 static int
871 split_add_child(struct callchain_node *parent,
872 		struct callchain_cursor *cursor,
873 		struct callchain_list *to_split,
874 		u64 idx_parents, u64 idx_local, u64 period)
875 {
876 	struct callchain_node *new;
877 	struct list_head *old_tail;
878 	unsigned int idx_total = idx_parents + idx_local;
879 
880 	/* split */
881 	new = create_child(parent, true);
882 	if (new == NULL)
883 		return -1;
884 
885 	/* split the callchain and move a part to the new child */
886 	old_tail = parent->val.prev;
887 	list_del_range(&to_split->list, old_tail);
888 	new->val.next = &to_split->list;
889 	new->val.prev = old_tail;
890 	to_split->list.prev = &new->val;
891 	old_tail->next = &new->val;
892 
893 	/* split the hits */
894 	new->hit = parent->hit;
895 	new->children_hit = parent->children_hit;
896 	parent->children_hit = callchain_cumul_hits(new);
897 	new->val_nr = parent->val_nr - idx_local;
898 	parent->val_nr = idx_local;
899 	new->count = parent->count;
900 	new->children_count = parent->children_count;
901 	parent->children_count = callchain_cumul_counts(new);
902 
903 	/* create a new child for the new branch if any */
904 	if (idx_total < cursor->nr) {
905 		struct callchain_node *first;
906 		struct callchain_list *cnode;
907 		struct callchain_cursor_node *node;
908 		struct rb_node *p, **pp;
909 
910 		parent->hit = 0;
911 		parent->children_hit += period;
912 		parent->count = 0;
913 		parent->children_count += 1;
914 
915 		node = callchain_cursor_current(cursor);
916 		new = add_child(parent, cursor, period);
917 		if (new == NULL)
918 			return -1;
919 
920 		/*
921 		 * This is second child since we moved parent's children
922 		 * to new (first) child above.
923 		 */
924 		p = parent->rb_root_in.rb_node;
925 		first = rb_entry(p, struct callchain_node, rb_node_in);
926 		cnode = list_first_entry(&first->val, struct callchain_list,
927 					 list);
928 
929 		if (match_chain(node, cnode) == MATCH_LT)
930 			pp = &p->rb_left;
931 		else
932 			pp = &p->rb_right;
933 
934 		rb_link_node(&new->rb_node_in, p, pp);
935 		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
936 	} else {
937 		parent->hit = period;
938 		parent->count = 1;
939 	}
940 	return 0;
941 }
942 
943 static enum match_result
944 append_chain(struct callchain_node *root,
945 	     struct callchain_cursor *cursor,
946 	     u64 period);
947 
948 static int
949 append_chain_children(struct callchain_node *root,
950 		      struct callchain_cursor *cursor,
951 		      u64 period)
952 {
953 	struct callchain_node *rnode;
954 	struct callchain_cursor_node *node;
955 	struct rb_node **p = &root->rb_root_in.rb_node;
956 	struct rb_node *parent = NULL;
957 
958 	node = callchain_cursor_current(cursor);
959 	if (!node)
960 		return -1;
961 
962 	/* lookup in children */
963 	while (*p) {
964 		enum match_result ret;
965 
966 		parent = *p;
967 		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
968 
969 		/* If at least first entry matches, rely to children */
970 		ret = append_chain(rnode, cursor, period);
971 		if (ret == MATCH_EQ)
972 			goto inc_children_hit;
973 		if (ret == MATCH_ERROR)
974 			return -1;
975 
976 		if (ret == MATCH_LT)
977 			p = &parent->rb_left;
978 		else
979 			p = &parent->rb_right;
980 	}
981 	/* nothing in children, add to the current node */
982 	rnode = add_child(root, cursor, period);
983 	if (rnode == NULL)
984 		return -1;
985 
986 	rb_link_node(&rnode->rb_node_in, parent, p);
987 	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
988 
989 inc_children_hit:
990 	root->children_hit += period;
991 	root->children_count++;
992 	return 0;
993 }
994 
995 static enum match_result
996 append_chain(struct callchain_node *root,
997 	     struct callchain_cursor *cursor,
998 	     u64 period)
999 {
1000 	struct callchain_list *cnode;
1001 	u64 start = cursor->pos;
1002 	bool found = false;
1003 	u64 matches;
1004 	enum match_result cmp = MATCH_ERROR;
1005 
1006 	/*
1007 	 * Lookup in the current node
1008 	 * If we have a symbol, then compare the start to match
1009 	 * anywhere inside a function, unless function
1010 	 * mode is disabled.
1011 	 */
1012 	list_for_each_entry(cnode, &root->val, list) {
1013 		struct callchain_cursor_node *node;
1014 
1015 		node = callchain_cursor_current(cursor);
1016 		if (!node)
1017 			break;
1018 
1019 		cmp = match_chain(node, cnode);
1020 		if (cmp != MATCH_EQ)
1021 			break;
1022 
1023 		found = true;
1024 
1025 		callchain_cursor_advance(cursor);
1026 	}
1027 
1028 	/* matches not, relay no the parent */
1029 	if (!found) {
1030 		WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
1031 		return cmp;
1032 	}
1033 
1034 	matches = cursor->pos - start;
1035 
1036 	/* we match only a part of the node. Split it and add the new chain */
1037 	if (matches < root->val_nr) {
1038 		if (split_add_child(root, cursor, cnode, start, matches,
1039 				    period) < 0)
1040 			return MATCH_ERROR;
1041 
1042 		return MATCH_EQ;
1043 	}
1044 
1045 	/* we match 100% of the path, increment the hit */
1046 	if (matches == root->val_nr && cursor->pos == cursor->nr) {
1047 		root->hit += period;
1048 		root->count++;
1049 		return MATCH_EQ;
1050 	}
1051 
1052 	/* We match the node and still have a part remaining */
1053 	if (append_chain_children(root, cursor, period) < 0)
1054 		return MATCH_ERROR;
1055 
1056 	return MATCH_EQ;
1057 }
1058 
1059 int callchain_append(struct callchain_root *root,
1060 		     struct callchain_cursor *cursor,
1061 		     u64 period)
1062 {
1063 	if (cursor == NULL)
1064 		return -1;
1065 
1066 	if (!cursor->nr)
1067 		return 0;
1068 
1069 	callchain_cursor_commit(cursor);
1070 
1071 	if (append_chain_children(&root->node, cursor, period) < 0)
1072 		return -1;
1073 
1074 	if (cursor->nr > root->max_depth)
1075 		root->max_depth = cursor->nr;
1076 
1077 	return 0;
1078 }
1079 
1080 static int
1081 merge_chain_branch(struct callchain_cursor *cursor,
1082 		   struct callchain_node *dst, struct callchain_node *src)
1083 {
1084 	struct callchain_cursor_node **old_last = cursor->last;
1085 	struct callchain_node *child;
1086 	struct callchain_list *list, *next_list;
1087 	struct rb_node *n;
1088 	int old_pos = cursor->nr;
1089 	int err = 0;
1090 
1091 	list_for_each_entry_safe(list, next_list, &src->val, list) {
1092 		struct map_symbol ms = {
1093 			.thread = thread__get(list->ms.thread),
1094 			.map = map__get(list->ms.map),
1095 		};
1096 		callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline);
1097 		list_del_init(&list->list);
1098 		map_symbol__exit(&ms);
1099 		map_symbol__exit(&list->ms);
1100 		zfree(&list->brtype_stat);
1101 		free(list);
1102 	}
1103 
1104 	if (src->hit) {
1105 		callchain_cursor_commit(cursor);
1106 		if (append_chain_children(dst, cursor, src->hit) < 0)
1107 			return -1;
1108 	}
1109 
1110 	n = rb_first(&src->rb_root_in);
1111 	while (n) {
1112 		child = container_of(n, struct callchain_node, rb_node_in);
1113 		n = rb_next(n);
1114 		rb_erase(&child->rb_node_in, &src->rb_root_in);
1115 
1116 		err = merge_chain_branch(cursor, dst, child);
1117 		if (err)
1118 			break;
1119 
1120 		free(child);
1121 	}
1122 
1123 	cursor->nr = old_pos;
1124 	cursor->last = old_last;
1125 
1126 	return err;
1127 }
1128 
1129 int callchain_merge(struct callchain_cursor *cursor,
1130 		    struct callchain_root *dst, struct callchain_root *src)
1131 {
1132 	return merge_chain_branch(cursor, &dst->node, &src->node);
1133 }
1134 
1135 int callchain_cursor_append(struct callchain_cursor *cursor,
1136 			    u64 ip, struct map_symbol *ms,
1137 			    bool branch, struct branch_flags *flags,
1138 			    int nr_loop_iter, u64 iter_cycles, u64 branch_from,
1139 			    const char *srcline)
1140 {
1141 	struct callchain_cursor_node *node = *cursor->last;
1142 
1143 	if (!node) {
1144 		node = calloc(1, sizeof(*node));
1145 		if (!node)
1146 			return -ENOMEM;
1147 
1148 		*cursor->last = node;
1149 	}
1150 
1151 	node->ip = ip;
1152 	map_symbol__exit(&node->ms);
1153 	map_symbol__copy(&node->ms, ms);
1154 	node->branch = branch;
1155 	node->nr_loop_iter = nr_loop_iter;
1156 	node->iter_cycles = iter_cycles;
1157 	node->srcline = srcline;
1158 
1159 	if (flags)
1160 		memcpy(&node->branch_flags, flags,
1161 			sizeof(struct branch_flags));
1162 
1163 	node->branch_from = branch_from;
1164 	cursor->nr++;
1165 
1166 	cursor->last = &node->next;
1167 
1168 	return 0;
1169 }
1170 
1171 int sample__resolve_callchain(struct perf_sample *sample,
1172 			      struct callchain_cursor *cursor, struct symbol **parent,
1173 			      struct evsel *evsel, struct addr_location *al,
1174 			      int max_stack)
1175 {
1176 	if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
1177 		return 0;
1178 
1179 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
1180 	    perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
1181 		return thread__resolve_callchain(al->thread, cursor, evsel, sample,
1182 						 parent, al, max_stack);
1183 	}
1184 	return 0;
1185 }
1186 
1187 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
1188 {
1189 	if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
1190 		!symbol_conf.show_branchflag_count)
1191 		return 0;
1192 	return callchain_append(he->callchain, get_tls_callchain_cursor(), sample->period);
1193 }
1194 
1195 int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
1196 			bool hide_unresolved)
1197 {
1198 	struct machine *machine = NULL;
1199 
1200 	if (node->ms.thread)
1201 		machine = maps__machine(thread__maps(node->ms.thread));
1202 
1203 	map__put(al->map);
1204 	al->map = map__get(node->ms.map);
1205 	al->sym = node->ms.sym;
1206 	al->srcline = node->srcline;
1207 	al->addr = node->ip;
1208 
1209 	if (al->sym == NULL) {
1210 		if (hide_unresolved)
1211 			return 0;
1212 		if (al->map == NULL)
1213 			goto out;
1214 	}
1215 	if (maps__equal(thread__maps(al->thread), machine__kernel_maps(machine))) {
1216 		if (machine__is_host(machine)) {
1217 			al->cpumode = PERF_RECORD_MISC_KERNEL;
1218 			al->level = 'k';
1219 		} else {
1220 			al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1221 			al->level = 'g';
1222 		}
1223 	} else {
1224 		if (machine__is_host(machine)) {
1225 			al->cpumode = PERF_RECORD_MISC_USER;
1226 			al->level = '.';
1227 		} else if (perf_guest) {
1228 			al->cpumode = PERF_RECORD_MISC_GUEST_USER;
1229 			al->level = 'u';
1230 		} else {
1231 			al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
1232 			al->level = 'H';
1233 		}
1234 	}
1235 
1236 out:
1237 	return 1;
1238 }
1239 
1240 char *callchain_list__sym_name(struct callchain_list *cl,
1241 			       char *bf, size_t bfsize, bool show_dso)
1242 {
1243 	bool show_addr = callchain_param.key == CCKEY_ADDRESS;
1244 	bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE;
1245 	int printed;
1246 
1247 	if (cl->ms.sym) {
1248 		const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
1249 
1250 		if (show_srcline && cl->srcline)
1251 			printed = scnprintf(bf, bfsize, "%s %s%s",
1252 					    cl->ms.sym->name, cl->srcline,
1253 					    inlined);
1254 		else
1255 			printed = scnprintf(bf, bfsize, "%s%s",
1256 					    cl->ms.sym->name, inlined);
1257 	} else
1258 		printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
1259 
1260 	if (show_dso)
1261 		scnprintf(bf + printed, bfsize - printed, " %s",
1262 			  cl->ms.map ?
1263 			  dso__short_name(map__dso(cl->ms.map)) :
1264 			  "unknown");
1265 
1266 	return bf;
1267 }
1268 
1269 char *callchain_node__scnprintf_value(struct callchain_node *node,
1270 				      char *bf, size_t bfsize, u64 total)
1271 {
1272 	double percent = 0.0;
1273 	u64 period = callchain_cumul_hits(node);
1274 	unsigned count = callchain_cumul_counts(node);
1275 
1276 	if (callchain_param.mode == CHAIN_FOLDED) {
1277 		period = node->hit;
1278 		count = node->count;
1279 	}
1280 
1281 	switch (callchain_param.value) {
1282 	case CCVAL_PERIOD:
1283 		scnprintf(bf, bfsize, "%"PRIu64, period);
1284 		break;
1285 	case CCVAL_COUNT:
1286 		scnprintf(bf, bfsize, "%u", count);
1287 		break;
1288 	case CCVAL_PERCENT:
1289 	default:
1290 		if (total)
1291 			percent = period * 100.0 / total;
1292 		scnprintf(bf, bfsize, "%.2f%%", percent);
1293 		break;
1294 	}
1295 	return bf;
1296 }
1297 
1298 int callchain_node__fprintf_value(struct callchain_node *node,
1299 				 FILE *fp, u64 total)
1300 {
1301 	double percent = 0.0;
1302 	u64 period = callchain_cumul_hits(node);
1303 	unsigned count = callchain_cumul_counts(node);
1304 
1305 	if (callchain_param.mode == CHAIN_FOLDED) {
1306 		period = node->hit;
1307 		count = node->count;
1308 	}
1309 
1310 	switch (callchain_param.value) {
1311 	case CCVAL_PERIOD:
1312 		return fprintf(fp, "%"PRIu64, period);
1313 	case CCVAL_COUNT:
1314 		return fprintf(fp, "%u", count);
1315 	case CCVAL_PERCENT:
1316 	default:
1317 		if (total)
1318 			percent = period * 100.0 / total;
1319 		return percent_color_fprintf(fp, "%.2f%%", percent);
1320 	}
1321 	return 0;
1322 }
1323 
1324 static void callchain_counts_value(struct callchain_node *node,
1325 				   u64 *branch_count, u64 *predicted_count,
1326 				   u64 *abort_count, u64 *cycles_count)
1327 {
1328 	struct callchain_list *clist;
1329 
1330 	list_for_each_entry(clist, &node->val, list) {
1331 		if (branch_count)
1332 			*branch_count += clist->branch_count;
1333 
1334 		if (predicted_count)
1335 			*predicted_count += clist->predicted_count;
1336 
1337 		if (abort_count)
1338 			*abort_count += clist->abort_count;
1339 
1340 		if (cycles_count)
1341 			*cycles_count += clist->cycles_count;
1342 	}
1343 }
1344 
1345 static int callchain_node_branch_counts_cumul(struct callchain_node *node,
1346 					      u64 *branch_count,
1347 					      u64 *predicted_count,
1348 					      u64 *abort_count,
1349 					      u64 *cycles_count)
1350 {
1351 	struct callchain_node *child;
1352 	struct rb_node *n;
1353 
1354 	n = rb_first(&node->rb_root_in);
1355 	while (n) {
1356 		child = rb_entry(n, struct callchain_node, rb_node_in);
1357 		n = rb_next(n);
1358 
1359 		callchain_node_branch_counts_cumul(child, branch_count,
1360 						   predicted_count,
1361 						   abort_count,
1362 						   cycles_count);
1363 
1364 		callchain_counts_value(child, branch_count,
1365 				       predicted_count, abort_count,
1366 				       cycles_count);
1367 	}
1368 
1369 	return 0;
1370 }
1371 
1372 int callchain_branch_counts(struct callchain_root *root,
1373 			    u64 *branch_count, u64 *predicted_count,
1374 			    u64 *abort_count, u64 *cycles_count)
1375 {
1376 	if (branch_count)
1377 		*branch_count = 0;
1378 
1379 	if (predicted_count)
1380 		*predicted_count = 0;
1381 
1382 	if (abort_count)
1383 		*abort_count = 0;
1384 
1385 	if (cycles_count)
1386 		*cycles_count = 0;
1387 
1388 	return callchain_node_branch_counts_cumul(&root->node,
1389 						  branch_count,
1390 						  predicted_count,
1391 						  abort_count,
1392 						  cycles_count);
1393 }
1394 
1395 static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
1396 {
1397 	return scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
1398 }
1399 
1400 static int count_float_printf(int idx, const char *str, float value,
1401 			      char *bf, int bfsize, float threshold)
1402 {
1403 	if (threshold != 0.0 && value < threshold)
1404 		return 0;
1405 
1406 	return scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
1407 }
1408 
1409 static int branch_to_str(char *bf, int bfsize,
1410 			 u64 branch_count, u64 predicted_count,
1411 			 u64 abort_count,
1412 			 const struct branch_type_stat *brtype_stat)
1413 {
1414 	int printed, i = 0;
1415 
1416 	printed = branch_type_str(brtype_stat, bf, bfsize);
1417 	if (printed)
1418 		i++;
1419 
1420 	if (predicted_count < branch_count) {
1421 		printed += count_float_printf(i++, "predicted",
1422 				predicted_count * 100.0 / branch_count,
1423 				bf + printed, bfsize - printed, 0.0);
1424 	}
1425 
1426 	if (abort_count) {
1427 		printed += count_float_printf(i++, "abort",
1428 				abort_count * 100.0 / branch_count,
1429 				bf + printed, bfsize - printed, 0.1);
1430 	}
1431 
1432 	if (i)
1433 		printed += scnprintf(bf + printed, bfsize - printed, ")");
1434 
1435 	return printed;
1436 }
1437 
1438 static int branch_from_str(char *bf, int bfsize,
1439 			   u64 branch_count,
1440 			   u64 cycles_count, u64 iter_count,
1441 			   u64 iter_cycles, u64 from_count)
1442 {
1443 	int printed = 0, i = 0;
1444 	u64 cycles, v = 0;
1445 
1446 	cycles = cycles_count / branch_count;
1447 	if (cycles) {
1448 		printed += count_pri64_printf(i++, "cycles",
1449 				cycles,
1450 				bf + printed, bfsize - printed);
1451 	}
1452 
1453 	if (iter_count && from_count) {
1454 		v = iter_count / from_count;
1455 		if (v) {
1456 			printed += count_pri64_printf(i++, "iter",
1457 					v, bf + printed, bfsize - printed);
1458 
1459 			printed += count_pri64_printf(i++, "avg_cycles",
1460 					iter_cycles / iter_count,
1461 					bf + printed, bfsize - printed);
1462 		}
1463 	}
1464 
1465 	if (i)
1466 		printed += scnprintf(bf + printed, bfsize - printed, ")");
1467 
1468 	return printed;
1469 }
1470 
1471 static int counts_str_build(char *bf, int bfsize,
1472 			     u64 branch_count, u64 predicted_count,
1473 			     u64 abort_count, u64 cycles_count,
1474 			     u64 iter_count, u64 iter_cycles,
1475 			     u64 from_count,
1476 			     const struct branch_type_stat *brtype_stat)
1477 {
1478 	int printed;
1479 
1480 	if (branch_count == 0)
1481 		return scnprintf(bf, bfsize, " (calltrace)");
1482 
1483 	if (brtype_stat->branch_to) {
1484 		printed = branch_to_str(bf, bfsize, branch_count,
1485 				predicted_count, abort_count, brtype_stat);
1486 	} else {
1487 		printed = branch_from_str(bf, bfsize, branch_count,
1488 				cycles_count, iter_count, iter_cycles,
1489 				from_count);
1490 	}
1491 
1492 	if (!printed)
1493 		bf[0] = 0;
1494 
1495 	return printed;
1496 }
1497 
1498 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
1499 				   u64 branch_count, u64 predicted_count,
1500 				   u64 abort_count, u64 cycles_count,
1501 				   u64 iter_count, u64 iter_cycles,
1502 				   u64 from_count,
1503 				   const struct branch_type_stat *brtype_stat)
1504 {
1505 	char str[256];
1506 
1507 	counts_str_build(str, sizeof(str), branch_count,
1508 			 predicted_count, abort_count, cycles_count,
1509 			 iter_count, iter_cycles, from_count, brtype_stat);
1510 
1511 	if (fp)
1512 		return fprintf(fp, "%s", str);
1513 
1514 	return scnprintf(bf, bfsize, "%s", str);
1515 }
1516 
1517 int callchain_list_counts__printf_value(struct callchain_list *clist,
1518 					FILE *fp, char *bf, int bfsize)
1519 {
1520 	static const struct branch_type_stat empty_brtype_stat = {};
1521 	const struct branch_type_stat *brtype_stat;
1522 	u64 branch_count, predicted_count;
1523 	u64 abort_count, cycles_count;
1524 	u64 iter_count, iter_cycles;
1525 	u64 from_count;
1526 
1527 	brtype_stat = clist->brtype_stat ?: &empty_brtype_stat;
1528 	branch_count = clist->branch_count;
1529 	predicted_count = clist->predicted_count;
1530 	abort_count = clist->abort_count;
1531 	cycles_count = clist->cycles_count;
1532 	iter_count = clist->iter_count;
1533 	iter_cycles = clist->iter_cycles;
1534 	from_count = clist->from_count;
1535 
1536 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
1537 				       predicted_count, abort_count,
1538 				       cycles_count, iter_count, iter_cycles,
1539 				       from_count, brtype_stat);
1540 }
1541 
1542 static void free_callchain_node(struct callchain_node *node)
1543 {
1544 	struct callchain_list *list, *tmp;
1545 	struct callchain_node *child;
1546 	struct rb_node *n;
1547 
1548 	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
1549 		list_del_init(&list->list);
1550 		map_symbol__exit(&list->ms);
1551 		zfree(&list->brtype_stat);
1552 		free(list);
1553 	}
1554 
1555 	list_for_each_entry_safe(list, tmp, &node->val, list) {
1556 		list_del_init(&list->list);
1557 		map_symbol__exit(&list->ms);
1558 		zfree(&list->brtype_stat);
1559 		free(list);
1560 	}
1561 
1562 	n = rb_first(&node->rb_root_in);
1563 	while (n) {
1564 		child = container_of(n, struct callchain_node, rb_node_in);
1565 		n = rb_next(n);
1566 		rb_erase(&child->rb_node_in, &node->rb_root_in);
1567 
1568 		free_callchain_node(child);
1569 		free(child);
1570 	}
1571 }
1572 
1573 void free_callchain(struct callchain_root *root)
1574 {
1575 	if (!symbol_conf.use_callchain)
1576 		return;
1577 
1578 	free_callchain_node(&root->node);
1579 }
1580 
1581 static u64 decay_callchain_node(struct callchain_node *node)
1582 {
1583 	struct callchain_node *child;
1584 	struct rb_node *n;
1585 	u64 child_hits = 0;
1586 
1587 	n = rb_first(&node->rb_root_in);
1588 	while (n) {
1589 		child = container_of(n, struct callchain_node, rb_node_in);
1590 
1591 		child_hits += decay_callchain_node(child);
1592 		n = rb_next(n);
1593 	}
1594 
1595 	node->hit = (node->hit * 7) / 8;
1596 	node->children_hit = child_hits;
1597 
1598 	return node->hit;
1599 }
1600 
1601 void decay_callchain(struct callchain_root *root)
1602 {
1603 	if (!symbol_conf.use_callchain)
1604 		return;
1605 
1606 	decay_callchain_node(&root->node);
1607 }
1608 
1609 int callchain_node__make_parent_list(struct callchain_node *node)
1610 {
1611 	struct callchain_node *parent = node->parent;
1612 	struct callchain_list *chain, *new;
1613 	LIST_HEAD(head);
1614 
1615 	while (parent) {
1616 		list_for_each_entry_reverse(chain, &parent->val, list) {
1617 			new = malloc(sizeof(*new));
1618 			if (new == NULL)
1619 				goto out;
1620 			*new = *chain;
1621 			new->has_children = false;
1622 			map_symbol__copy(&new->ms, &chain->ms);
1623 			list_add_tail(&new->list, &head);
1624 		}
1625 		parent = parent->parent;
1626 	}
1627 
1628 	list_for_each_entry_safe_reverse(chain, new, &head, list)
1629 		list_move_tail(&chain->list, &node->parent_val);
1630 
1631 	if (!list_empty(&node->parent_val)) {
1632 		chain = list_first_entry(&node->parent_val, struct callchain_list, list);
1633 		chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
1634 
1635 		chain = list_first_entry(&node->val, struct callchain_list, list);
1636 		chain->has_children = false;
1637 	}
1638 	return 0;
1639 
1640 out:
1641 	list_for_each_entry_safe(chain, new, &head, list) {
1642 		list_del_init(&chain->list);
1643 		map_symbol__exit(&chain->ms);
1644 		zfree(&chain->brtype_stat);
1645 		free(chain);
1646 	}
1647 	return -ENOMEM;
1648 }
1649 
1650 static void callchain_cursor__delete(void *vcursor)
1651 {
1652 	struct callchain_cursor *cursor = vcursor;
1653 	struct callchain_cursor_node *node, *next;
1654 
1655 	callchain_cursor_reset(cursor);
1656 	for (node = cursor->first; node != NULL; node = next) {
1657 		next = node->next;
1658 		free(node);
1659 	}
1660 	free(cursor);
1661 }
1662 
1663 static void init_callchain_cursor_key(void)
1664 {
1665 	if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) {
1666 		pr_err("callchain cursor creation failed");
1667 		abort();
1668 	}
1669 }
1670 
1671 struct callchain_cursor *get_tls_callchain_cursor(void)
1672 {
1673 	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
1674 	struct callchain_cursor *cursor;
1675 
1676 	pthread_once(&once_control, init_callchain_cursor_key);
1677 	cursor = pthread_getspecific(callchain_cursor);
1678 	if (!cursor) {
1679 		cursor = zalloc(sizeof(*cursor));
1680 		if (!cursor)
1681 			pr_debug3("%s: not enough memory\n", __func__);
1682 		pthread_setspecific(callchain_cursor, cursor);
1683 	}
1684 	return cursor;
1685 }
1686 
1687 int callchain_cursor__copy(struct callchain_cursor *dst,
1688 			   struct callchain_cursor *src)
1689 {
1690 	int rc = 0;
1691 
1692 	callchain_cursor_reset(dst);
1693 	callchain_cursor_commit(src);
1694 
1695 	while (true) {
1696 		struct callchain_cursor_node *node;
1697 
1698 		node = callchain_cursor_current(src);
1699 		if (node == NULL)
1700 			break;
1701 
1702 		rc = callchain_cursor_append(dst, node->ip, &node->ms,
1703 					     node->branch, &node->branch_flags,
1704 					     node->nr_loop_iter,
1705 					     node->iter_cycles,
1706 					     node->branch_from, node->srcline);
1707 		if (rc)
1708 			break;
1709 
1710 		callchain_cursor_advance(src);
1711 	}
1712 
1713 	return rc;
1714 }
1715 
1716 /*
1717  * Initialize a cursor before adding entries inside, but keep
1718  * the previously allocated entries as a cache.
1719  */
1720 void callchain_cursor_reset(struct callchain_cursor *cursor)
1721 {
1722 	struct callchain_cursor_node *node;
1723 
1724 	cursor->nr = 0;
1725 	cursor->last = &cursor->first;
1726 
1727 	for (node = cursor->first; node != NULL; node = node->next)
1728 		map_symbol__exit(&node->ms);
1729 }
1730 
1731 void callchain_param_setup(u64 sample_type, uint16_t e_machine)
1732 {
1733 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
1734 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
1735 		    (sample_type & PERF_SAMPLE_STACK_USER)) {
1736 			callchain_param.record_mode = CALLCHAIN_DWARF;
1737 			dwarf_callchain_users = true;
1738 		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
1739 			callchain_param.record_mode = CALLCHAIN_LBR;
1740 		else
1741 			callchain_param.record_mode = CALLCHAIN_FP;
1742 	}
1743 
1744 	/*
1745 	 * It's necessary to use libunwind to reliably determine the caller of
1746 	 * a leaf function on aarch64, as otherwise we cannot know whether to
1747 	 * start from the LR or FP.
1748 	 *
1749 	 * Always starting from the LR can result in duplicate or entirely
1750 	 * erroneous entries. Always skipping the LR and starting from the FP
1751 	 * can result in missing entries.
1752 	 */
1753 	if (callchain_param.record_mode == CALLCHAIN_FP && e_machine == EM_AARCH64)
1754 		dwarf_callchain_users = true;
1755 }
1756 
1757 static bool chain_match(struct callchain_list *base_chain,
1758 			struct callchain_list *pair_chain)
1759 {
1760 	enum match_result match;
1761 
1762 	match = match_chain_strings(base_chain->srcline,
1763 				    pair_chain->srcline);
1764 	if (match != MATCH_ERROR)
1765 		return match == MATCH_EQ;
1766 
1767 	match = match_chain_dso_addresses(base_chain->ms.map,
1768 					  base_chain->ip,
1769 					  pair_chain->ms.map,
1770 					  pair_chain->ip);
1771 
1772 	return match == MATCH_EQ;
1773 }
1774 
1775 bool callchain_cnode_matched(struct callchain_node *base_cnode,
1776 			     struct callchain_node *pair_cnode)
1777 {
1778 	struct callchain_list *base_chain, *pair_chain;
1779 	bool match = false;
1780 
1781 	pair_chain = list_first_entry(&pair_cnode->val,
1782 				      struct callchain_list,
1783 				      list);
1784 
1785 	list_for_each_entry(base_chain, &base_cnode->val, list) {
1786 		if (&pair_chain->list == &pair_cnode->val)
1787 			return false;
1788 
1789 		if (!base_chain->srcline || !pair_chain->srcline) {
1790 			pair_chain = list_next_entry(pair_chain, list);
1791 			continue;
1792 		}
1793 
1794 		match = chain_match(base_chain, pair_chain);
1795 		if (!match)
1796 			return false;
1797 
1798 		pair_chain = list_next_entry(pair_chain, list);
1799 	}
1800 
1801 	/*
1802 	 * Say chain1 is ABC, chain2 is ABCD, we consider they are
1803 	 * not fully matched.
1804 	 */
1805 	if (pair_chain && (&pair_chain->list != &pair_cnode->val))
1806 		return false;
1807 
1808 	return match;
1809 }
1810 
1811 static u64 count_callchain_hits(struct hist_entry *he)
1812 {
1813 	struct rb_root *root = &he->sorted_chain;
1814 	struct rb_node *rb_node = rb_first(root);
1815 	struct callchain_node *node;
1816 	u64 chain_hits = 0;
1817 
1818 	while (rb_node) {
1819 		node = rb_entry(rb_node, struct callchain_node, rb_node);
1820 		chain_hits += node->hit;
1821 		rb_node = rb_next(rb_node);
1822 	}
1823 
1824 	return chain_hits;
1825 }
1826 
1827 u64 callchain_total_hits(struct hists *hists)
1828 {
1829 	struct rb_node *next = rb_first_cached(&hists->entries);
1830 	u64 chain_hits = 0;
1831 
1832 	while (next) {
1833 		struct hist_entry *he = rb_entry(next, struct hist_entry,
1834 						 rb_node);
1835 
1836 		chain_hits += count_callchain_hits(he);
1837 		next = rb_next(&he->rb_node);
1838 	}
1839 
1840 	return chain_hits;
1841 }
1842 
1843 s64 callchain_avg_cycles(struct callchain_node *cnode)
1844 {
1845 	struct callchain_list *chain;
1846 	s64 cycles = 0;
1847 
1848 	list_for_each_entry(chain, &cnode->val, list) {
1849 		if (chain->srcline && chain->branch_count)
1850 			cycles += chain->cycles_count / chain->branch_count;
1851 	}
1852 
1853 	return cycles;
1854 }
1855 
1856 int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
1857 				    struct perf_sample *sample, int max_stack,
1858 				    bool symbols, callchain_iter_fn cb, void *data)
1859 {
1860 	struct callchain_cursor *cursor = get_tls_callchain_cursor();
1861 	int ret;
1862 
1863 	if (!cursor)
1864 		return -ENOMEM;
1865 
1866 	/* Fill in the callchain. */
1867 	ret = __thread__resolve_callchain(thread, cursor, evsel, sample,
1868 					  /*parent=*/NULL, /*root_al=*/NULL,
1869 					  max_stack, symbols);
1870 	if (ret)
1871 		return ret;
1872 
1873 	/* Switch from writing the callchain to reading it. */
1874 	callchain_cursor_commit(cursor);
1875 
1876 	while (1) {
1877 		struct callchain_cursor_node *node = callchain_cursor_current(cursor);
1878 
1879 		if (!node)
1880 			break;
1881 
1882 		ret = cb(node, data);
1883 		if (ret)
1884 			return ret;
1885 
1886 		callchain_cursor_advance(cursor);
1887 	}
1888 	return 0;
1889 }
1890 
1891 /*
1892  * This function merges earlier samples (@sample_orig) waiting for deferred
1893  * user callchains with the matching callchain record (@sample_callchain)
1894  * which is delivered now.  The @sample_orig->callchain should be released
1895  * after use if ->deferred_callchain is set.
1896  */
1897 int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
1898 				     struct perf_sample *sample_callchain)
1899 {
1900 	u64 nr_orig = sample_orig->callchain->nr - 1;
1901 	u64 nr_deferred = sample_callchain->callchain->nr;
1902 	struct ip_callchain *callchain;
1903 
1904 	if (sample_orig->merged_callchain) {
1905 		/* Already merged. */
1906 		return -EINVAL;
1907 	}
1908 
1909 	if (sample_orig->callchain->nr < 2) {
1910 		sample_orig->deferred_callchain = false;
1911 		return -EINVAL;
1912 	}
1913 
1914 	callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
1915 	if (callchain == NULL)
1916 		return -ENOMEM;
1917 
1918 	callchain->nr = nr_orig + nr_deferred;
1919 	/* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */
1920 	memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
1921 	/* copy deferred user callchains */
1922 	memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
1923 	       nr_deferred * sizeof(u64));
1924 
1925 	sample_orig->merged_callchain = true;
1926 	sample_orig->callchain = callchain;
1927 	return 0;
1928 }
1929