xref: /linux/tools/perf/util/thread-stack.c (revision e0bf6c5ca2d3281f231c5f0c9bf145e9513644de)
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include "thread.h"
19 #include "event.h"
20 #include "machine.h"
21 #include "util.h"
22 #include "debug.h"
23 #include "symbol.h"
24 #include "comm.h"
25 #include "thread-stack.h"
26 
27 #define CALL_PATH_BLOCK_SHIFT 8
28 #define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
29 #define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
30 
31 struct call_path_block {
32 	struct call_path cp[CALL_PATH_BLOCK_SIZE];
33 	struct list_head node;
34 };
35 
36 /**
37  * struct call_path_root - root of all call paths.
38  * @call_path: root call path
39  * @blocks: list of blocks to store call paths
40  * @next: next free space
41  * @sz: number of spaces
42  */
43 struct call_path_root {
44 	struct call_path call_path;
45 	struct list_head blocks;
46 	size_t next;
47 	size_t sz;
48 };
49 
50 /**
51  * struct call_return_processor - provides a call-back to consume call-return
52  *                                information.
53  * @cpr: call path root
54  * @process: call-back that accepts call/return information
55  * @data: anonymous data for call-back
56  */
57 struct call_return_processor {
58 	struct call_path_root *cpr;
59 	int (*process)(struct call_return *cr, void *data);
60 	void *data;
61 };
62 
63 #define STACK_GROWTH 2048
64 
65 /**
66  * struct thread_stack_entry - thread stack entry.
67  * @ret_addr: return address
68  * @timestamp: timestamp (if known)
69  * @ref: external reference (e.g. db_id of sample)
70  * @branch_count: the branch count when the entry was created
71  * @cp: call path
72  * @no_call: a 'call' was not seen
73  */
74 struct thread_stack_entry {
75 	u64 ret_addr;
76 	u64 timestamp;
77 	u64 ref;
78 	u64 branch_count;
79 	struct call_path *cp;
80 	bool no_call;
81 };
82 
83 /**
84  * struct thread_stack - thread stack constructed from 'call' and 'return'
85  *                       branch samples.
86  * @stack: array that holds the stack
87  * @cnt: number of entries in the stack
88  * @sz: current maximum stack size
89  * @trace_nr: current trace number
90  * @branch_count: running branch count
91  * @kernel_start: kernel start address
92  * @last_time: last timestamp
93  * @crp: call/return processor
94  * @comm: current comm
95  */
96 struct thread_stack {
97 	struct thread_stack_entry *stack;
98 	size_t cnt;
99 	size_t sz;
100 	u64 trace_nr;
101 	u64 branch_count;
102 	u64 kernel_start;
103 	u64 last_time;
104 	struct call_return_processor *crp;
105 	struct comm *comm;
106 };
107 
108 static int thread_stack__grow(struct thread_stack *ts)
109 {
110 	struct thread_stack_entry *new_stack;
111 	size_t sz, new_sz;
112 
113 	new_sz = ts->sz + STACK_GROWTH;
114 	sz = new_sz * sizeof(struct thread_stack_entry);
115 
116 	new_stack = realloc(ts->stack, sz);
117 	if (!new_stack)
118 		return -ENOMEM;
119 
120 	ts->stack = new_stack;
121 	ts->sz = new_sz;
122 
123 	return 0;
124 }
125 
126 static struct thread_stack *thread_stack__new(struct thread *thread,
127 					      struct call_return_processor *crp)
128 {
129 	struct thread_stack *ts;
130 
131 	ts = zalloc(sizeof(struct thread_stack));
132 	if (!ts)
133 		return NULL;
134 
135 	if (thread_stack__grow(ts)) {
136 		free(ts);
137 		return NULL;
138 	}
139 
140 	if (thread->mg && thread->mg->machine)
141 		ts->kernel_start = machine__kernel_start(thread->mg->machine);
142 	else
143 		ts->kernel_start = 1ULL << 63;
144 	ts->crp = crp;
145 
146 	return ts;
147 }
148 
149 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
150 {
151 	int err = 0;
152 
153 	if (ts->cnt == ts->sz) {
154 		err = thread_stack__grow(ts);
155 		if (err) {
156 			pr_warning("Out of memory: discarding thread stack\n");
157 			ts->cnt = 0;
158 		}
159 	}
160 
161 	ts->stack[ts->cnt++].ret_addr = ret_addr;
162 
163 	return err;
164 }
165 
166 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
167 {
168 	size_t i;
169 
170 	/*
171 	 * In some cases there may be functions which are not seen to return.
172 	 * For example when setjmp / longjmp has been used.  Or the perf context
173 	 * switch in the kernel which doesn't stop and start tracing in exactly
174 	 * the same code path.  When that happens the return address will be
175 	 * further down the stack.  If the return address is not found at all,
176 	 * we assume the opposite (i.e. this is a return for a call that wasn't
177 	 * seen for some reason) and leave the stack alone.
178 	 */
179 	for (i = ts->cnt; i; ) {
180 		if (ts->stack[--i].ret_addr == ret_addr) {
181 			ts->cnt = i;
182 			return;
183 		}
184 	}
185 }
186 
187 static bool thread_stack__in_kernel(struct thread_stack *ts)
188 {
189 	if (!ts->cnt)
190 		return false;
191 
192 	return ts->stack[ts->cnt - 1].cp->in_kernel;
193 }
194 
195 static int thread_stack__call_return(struct thread *thread,
196 				     struct thread_stack *ts, size_t idx,
197 				     u64 timestamp, u64 ref, bool no_return)
198 {
199 	struct call_return_processor *crp = ts->crp;
200 	struct thread_stack_entry *tse;
201 	struct call_return cr = {
202 		.thread = thread,
203 		.comm = ts->comm,
204 		.db_id = 0,
205 	};
206 
207 	tse = &ts->stack[idx];
208 	cr.cp = tse->cp;
209 	cr.call_time = tse->timestamp;
210 	cr.return_time = timestamp;
211 	cr.branch_count = ts->branch_count - tse->branch_count;
212 	cr.call_ref = tse->ref;
213 	cr.return_ref = ref;
214 	if (tse->no_call)
215 		cr.flags |= CALL_RETURN_NO_CALL;
216 	if (no_return)
217 		cr.flags |= CALL_RETURN_NO_RETURN;
218 
219 	return crp->process(&cr, crp->data);
220 }
221 
222 static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
223 {
224 	struct call_return_processor *crp = ts->crp;
225 	int err;
226 
227 	if (!crp) {
228 		ts->cnt = 0;
229 		return 0;
230 	}
231 
232 	while (ts->cnt) {
233 		err = thread_stack__call_return(thread, ts, --ts->cnt,
234 						ts->last_time, 0, true);
235 		if (err) {
236 			pr_err("Error flushing thread stack!\n");
237 			ts->cnt = 0;
238 			return err;
239 		}
240 	}
241 
242 	return 0;
243 }
244 
245 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
246 			u64 to_ip, u16 insn_len, u64 trace_nr)
247 {
248 	if (!thread)
249 		return -EINVAL;
250 
251 	if (!thread->ts) {
252 		thread->ts = thread_stack__new(thread, NULL);
253 		if (!thread->ts) {
254 			pr_warning("Out of memory: no thread stack\n");
255 			return -ENOMEM;
256 		}
257 		thread->ts->trace_nr = trace_nr;
258 	}
259 
260 	/*
261 	 * When the trace is discontinuous, the trace_nr changes.  In that case
262 	 * the stack might be completely invalid.  Better to report nothing than
263 	 * to report something misleading, so flush the stack.
264 	 */
265 	if (trace_nr != thread->ts->trace_nr) {
266 		if (thread->ts->trace_nr)
267 			thread_stack__flush(thread, thread->ts);
268 		thread->ts->trace_nr = trace_nr;
269 	}
270 
271 	/* Stop here if thread_stack__process() is in use */
272 	if (thread->ts->crp)
273 		return 0;
274 
275 	if (flags & PERF_IP_FLAG_CALL) {
276 		u64 ret_addr;
277 
278 		if (!to_ip)
279 			return 0;
280 		ret_addr = from_ip + insn_len;
281 		if (ret_addr == to_ip)
282 			return 0; /* Zero-length calls are excluded */
283 		return thread_stack__push(thread->ts, ret_addr);
284 	} else if (flags & PERF_IP_FLAG_RETURN) {
285 		if (!from_ip)
286 			return 0;
287 		thread_stack__pop(thread->ts, to_ip);
288 	}
289 
290 	return 0;
291 }
292 
293 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
294 {
295 	if (!thread || !thread->ts)
296 		return;
297 
298 	if (trace_nr != thread->ts->trace_nr) {
299 		if (thread->ts->trace_nr)
300 			thread_stack__flush(thread, thread->ts);
301 		thread->ts->trace_nr = trace_nr;
302 	}
303 }
304 
305 void thread_stack__free(struct thread *thread)
306 {
307 	if (thread->ts) {
308 		thread_stack__flush(thread, thread->ts);
309 		zfree(&thread->ts->stack);
310 		zfree(&thread->ts);
311 	}
312 }
313 
314 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
315 			  size_t sz, u64 ip)
316 {
317 	size_t i;
318 
319 	if (!thread || !thread->ts)
320 		chain->nr = 1;
321 	else
322 		chain->nr = min(sz, thread->ts->cnt + 1);
323 
324 	chain->ips[0] = ip;
325 
326 	for (i = 1; i < chain->nr; i++)
327 		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
328 }
329 
330 static void call_path__init(struct call_path *cp, struct call_path *parent,
331 			    struct symbol *sym, u64 ip, bool in_kernel)
332 {
333 	cp->parent = parent;
334 	cp->sym = sym;
335 	cp->ip = sym ? 0 : ip;
336 	cp->db_id = 0;
337 	cp->in_kernel = in_kernel;
338 	RB_CLEAR_NODE(&cp->rb_node);
339 	cp->children = RB_ROOT;
340 }
341 
342 static struct call_path_root *call_path_root__new(void)
343 {
344 	struct call_path_root *cpr;
345 
346 	cpr = zalloc(sizeof(struct call_path_root));
347 	if (!cpr)
348 		return NULL;
349 	call_path__init(&cpr->call_path, NULL, NULL, 0, false);
350 	INIT_LIST_HEAD(&cpr->blocks);
351 	return cpr;
352 }
353 
354 static void call_path_root__free(struct call_path_root *cpr)
355 {
356 	struct call_path_block *pos, *n;
357 
358 	list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
359 		list_del(&pos->node);
360 		free(pos);
361 	}
362 	free(cpr);
363 }
364 
365 static struct call_path *call_path__new(struct call_path_root *cpr,
366 					struct call_path *parent,
367 					struct symbol *sym, u64 ip,
368 					bool in_kernel)
369 {
370 	struct call_path_block *cpb;
371 	struct call_path *cp;
372 	size_t n;
373 
374 	if (cpr->next < cpr->sz) {
375 		cpb = list_last_entry(&cpr->blocks, struct call_path_block,
376 				      node);
377 	} else {
378 		cpb = zalloc(sizeof(struct call_path_block));
379 		if (!cpb)
380 			return NULL;
381 		list_add_tail(&cpb->node, &cpr->blocks);
382 		cpr->sz += CALL_PATH_BLOCK_SIZE;
383 	}
384 
385 	n = cpr->next++ & CALL_PATH_BLOCK_MASK;
386 	cp = &cpb->cp[n];
387 
388 	call_path__init(cp, parent, sym, ip, in_kernel);
389 
390 	return cp;
391 }
392 
393 static struct call_path *call_path__findnew(struct call_path_root *cpr,
394 					    struct call_path *parent,
395 					    struct symbol *sym, u64 ip, u64 ks)
396 {
397 	struct rb_node **p;
398 	struct rb_node *node_parent = NULL;
399 	struct call_path *cp;
400 	bool in_kernel = ip >= ks;
401 
402 	if (sym)
403 		ip = 0;
404 
405 	if (!parent)
406 		return call_path__new(cpr, parent, sym, ip, in_kernel);
407 
408 	p = &parent->children.rb_node;
409 	while (*p != NULL) {
410 		node_parent = *p;
411 		cp = rb_entry(node_parent, struct call_path, rb_node);
412 
413 		if (cp->sym == sym && cp->ip == ip)
414 			return cp;
415 
416 		if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
417 			p = &(*p)->rb_left;
418 		else
419 			p = &(*p)->rb_right;
420 	}
421 
422 	cp = call_path__new(cpr, parent, sym, ip, in_kernel);
423 	if (!cp)
424 		return NULL;
425 
426 	rb_link_node(&cp->rb_node, node_parent, p);
427 	rb_insert_color(&cp->rb_node, &parent->children);
428 
429 	return cp;
430 }
431 
432 struct call_return_processor *
433 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
434 			   void *data)
435 {
436 	struct call_return_processor *crp;
437 
438 	crp = zalloc(sizeof(struct call_return_processor));
439 	if (!crp)
440 		return NULL;
441 	crp->cpr = call_path_root__new();
442 	if (!crp->cpr)
443 		goto out_free;
444 	crp->process = process;
445 	crp->data = data;
446 	return crp;
447 
448 out_free:
449 	free(crp);
450 	return NULL;
451 }
452 
453 void call_return_processor__free(struct call_return_processor *crp)
454 {
455 	if (crp) {
456 		call_path_root__free(crp->cpr);
457 		free(crp);
458 	}
459 }
460 
461 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
462 				 u64 timestamp, u64 ref, struct call_path *cp,
463 				 bool no_call)
464 {
465 	struct thread_stack_entry *tse;
466 	int err;
467 
468 	if (ts->cnt == ts->sz) {
469 		err = thread_stack__grow(ts);
470 		if (err)
471 			return err;
472 	}
473 
474 	tse = &ts->stack[ts->cnt++];
475 	tse->ret_addr = ret_addr;
476 	tse->timestamp = timestamp;
477 	tse->ref = ref;
478 	tse->branch_count = ts->branch_count;
479 	tse->cp = cp;
480 	tse->no_call = no_call;
481 
482 	return 0;
483 }
484 
485 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
486 				u64 ret_addr, u64 timestamp, u64 ref,
487 				struct symbol *sym)
488 {
489 	int err;
490 
491 	if (!ts->cnt)
492 		return 1;
493 
494 	if (ts->cnt == 1) {
495 		struct thread_stack_entry *tse = &ts->stack[0];
496 
497 		if (tse->cp->sym == sym)
498 			return thread_stack__call_return(thread, ts, --ts->cnt,
499 							 timestamp, ref, false);
500 	}
501 
502 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
503 		return thread_stack__call_return(thread, ts, --ts->cnt,
504 						 timestamp, ref, false);
505 	} else {
506 		size_t i = ts->cnt - 1;
507 
508 		while (i--) {
509 			if (ts->stack[i].ret_addr != ret_addr)
510 				continue;
511 			i += 1;
512 			while (ts->cnt > i) {
513 				err = thread_stack__call_return(thread, ts,
514 								--ts->cnt,
515 								timestamp, ref,
516 								true);
517 				if (err)
518 					return err;
519 			}
520 			return thread_stack__call_return(thread, ts, --ts->cnt,
521 							 timestamp, ref, false);
522 		}
523 	}
524 
525 	return 1;
526 }
527 
528 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
529 				struct perf_sample *sample,
530 				struct addr_location *from_al,
531 				struct addr_location *to_al, u64 ref)
532 {
533 	struct call_path_root *cpr = ts->crp->cpr;
534 	struct call_path *cp;
535 	struct symbol *sym;
536 	u64 ip;
537 
538 	if (sample->ip) {
539 		ip = sample->ip;
540 		sym = from_al->sym;
541 	} else if (sample->addr) {
542 		ip = sample->addr;
543 		sym = to_al->sym;
544 	} else {
545 		return 0;
546 	}
547 
548 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
549 				ts->kernel_start);
550 	if (!cp)
551 		return -ENOMEM;
552 
553 	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
554 				     true);
555 }
556 
557 static int thread_stack__no_call_return(struct thread *thread,
558 					struct thread_stack *ts,
559 					struct perf_sample *sample,
560 					struct addr_location *from_al,
561 					struct addr_location *to_al, u64 ref)
562 {
563 	struct call_path_root *cpr = ts->crp->cpr;
564 	struct call_path *cp, *parent;
565 	u64 ks = ts->kernel_start;
566 	int err;
567 
568 	if (sample->ip >= ks && sample->addr < ks) {
569 		/* Return to userspace, so pop all kernel addresses */
570 		while (thread_stack__in_kernel(ts)) {
571 			err = thread_stack__call_return(thread, ts, --ts->cnt,
572 							sample->time, ref,
573 							true);
574 			if (err)
575 				return err;
576 		}
577 
578 		/* If the stack is empty, push the userspace address */
579 		if (!ts->cnt) {
580 			cp = call_path__findnew(cpr, &cpr->call_path,
581 						to_al->sym, sample->addr,
582 						ts->kernel_start);
583 			if (!cp)
584 				return -ENOMEM;
585 			return thread_stack__push_cp(ts, 0, sample->time, ref,
586 						     cp, true);
587 		}
588 	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
589 		/* Return to userspace, so pop all kernel addresses */
590 		while (thread_stack__in_kernel(ts)) {
591 			err = thread_stack__call_return(thread, ts, --ts->cnt,
592 							sample->time, ref,
593 							true);
594 			if (err)
595 				return err;
596 		}
597 	}
598 
599 	if (ts->cnt)
600 		parent = ts->stack[ts->cnt - 1].cp;
601 	else
602 		parent = &cpr->call_path;
603 
604 	/* This 'return' had no 'call', so push and pop top of stack */
605 	cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
606 				ts->kernel_start);
607 	if (!cp)
608 		return -ENOMEM;
609 
610 	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
611 				    true);
612 	if (err)
613 		return err;
614 
615 	return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
616 				    to_al->sym);
617 }
618 
619 static int thread_stack__trace_begin(struct thread *thread,
620 				     struct thread_stack *ts, u64 timestamp,
621 				     u64 ref)
622 {
623 	struct thread_stack_entry *tse;
624 	int err;
625 
626 	if (!ts->cnt)
627 		return 0;
628 
629 	/* Pop trace end */
630 	tse = &ts->stack[ts->cnt - 1];
631 	if (tse->cp->sym == NULL && tse->cp->ip == 0) {
632 		err = thread_stack__call_return(thread, ts, --ts->cnt,
633 						timestamp, ref, false);
634 		if (err)
635 			return err;
636 	}
637 
638 	return 0;
639 }
640 
641 static int thread_stack__trace_end(struct thread_stack *ts,
642 				   struct perf_sample *sample, u64 ref)
643 {
644 	struct call_path_root *cpr = ts->crp->cpr;
645 	struct call_path *cp;
646 	u64 ret_addr;
647 
648 	/* No point having 'trace end' on the bottom of the stack */
649 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
650 		return 0;
651 
652 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
653 				ts->kernel_start);
654 	if (!cp)
655 		return -ENOMEM;
656 
657 	ret_addr = sample->ip + sample->insn_len;
658 
659 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
660 				     false);
661 }
662 
663 int thread_stack__process(struct thread *thread, struct comm *comm,
664 			  struct perf_sample *sample,
665 			  struct addr_location *from_al,
666 			  struct addr_location *to_al, u64 ref,
667 			  struct call_return_processor *crp)
668 {
669 	struct thread_stack *ts = thread->ts;
670 	int err = 0;
671 
672 	if (ts) {
673 		if (!ts->crp) {
674 			/* Supersede thread_stack__event() */
675 			thread_stack__free(thread);
676 			thread->ts = thread_stack__new(thread, crp);
677 			if (!thread->ts)
678 				return -ENOMEM;
679 			ts = thread->ts;
680 			ts->comm = comm;
681 		}
682 	} else {
683 		thread->ts = thread_stack__new(thread, crp);
684 		if (!thread->ts)
685 			return -ENOMEM;
686 		ts = thread->ts;
687 		ts->comm = comm;
688 	}
689 
690 	/* Flush stack on exec */
691 	if (ts->comm != comm && thread->pid_ == thread->tid) {
692 		err = thread_stack__flush(thread, ts);
693 		if (err)
694 			return err;
695 		ts->comm = comm;
696 	}
697 
698 	/* If the stack is empty, put the current symbol on the stack */
699 	if (!ts->cnt) {
700 		err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
701 					   ref);
702 		if (err)
703 			return err;
704 	}
705 
706 	ts->branch_count += 1;
707 	ts->last_time = sample->time;
708 
709 	if (sample->flags & PERF_IP_FLAG_CALL) {
710 		struct call_path_root *cpr = ts->crp->cpr;
711 		struct call_path *cp;
712 		u64 ret_addr;
713 
714 		if (!sample->ip || !sample->addr)
715 			return 0;
716 
717 		ret_addr = sample->ip + sample->insn_len;
718 		if (ret_addr == sample->addr)
719 			return 0; /* Zero-length calls are excluded */
720 
721 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
722 					to_al->sym, sample->addr,
723 					ts->kernel_start);
724 		if (!cp)
725 			return -ENOMEM;
726 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
727 					    cp, false);
728 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
729 		if (!sample->ip || !sample->addr)
730 			return 0;
731 
732 		err = thread_stack__pop_cp(thread, ts, sample->addr,
733 					   sample->time, ref, from_al->sym);
734 		if (err) {
735 			if (err < 0)
736 				return err;
737 			err = thread_stack__no_call_return(thread, ts, sample,
738 							   from_al, to_al, ref);
739 		}
740 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
741 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
742 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
743 		err = thread_stack__trace_end(ts, sample, ref);
744 	}
745 
746 	return err;
747 }
748