xref: /linux/tools/perf/util/thread-stack.c (revision 0c874100108f03401cb3154801d2671bbad40ad4)
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include <errno.h>
19 #include "thread.h"
20 #include "event.h"
21 #include "machine.h"
22 #include "util.h"
23 #include "debug.h"
24 #include "symbol.h"
25 #include "comm.h"
26 #include "call-path.h"
27 #include "thread-stack.h"
28 
29 #define STACK_GROWTH 2048
30 
31 /**
32  * struct thread_stack_entry - thread stack entry.
33  * @ret_addr: return address
34  * @timestamp: timestamp (if known)
35  * @ref: external reference (e.g. db_id of sample)
36  * @branch_count: the branch count when the entry was created
37  * @cp: call path
38  * @no_call: a 'call' was not seen
39  * @trace_end: a 'call' but trace ended
40  */
41 struct thread_stack_entry {
42 	u64 ret_addr;
43 	u64 timestamp;
44 	u64 ref;
45 	u64 branch_count;
46 	struct call_path *cp;
47 	bool no_call;
48 	bool trace_end;
49 };
50 
51 /**
52  * struct thread_stack - thread stack constructed from 'call' and 'return'
53  *                       branch samples.
54  * @stack: array that holds the stack
55  * @cnt: number of entries in the stack
56  * @sz: current maximum stack size
57  * @trace_nr: current trace number
58  * @branch_count: running branch count
59  * @kernel_start: kernel start address
60  * @last_time: last timestamp
61  * @crp: call/return processor
62  * @comm: current comm
63  */
64 struct thread_stack {
65 	struct thread_stack_entry *stack;
66 	size_t cnt;
67 	size_t sz;
68 	u64 trace_nr;
69 	u64 branch_count;
70 	u64 kernel_start;
71 	u64 last_time;
72 	struct call_return_processor *crp;
73 	struct comm *comm;
74 };
75 
76 static int thread_stack__grow(struct thread_stack *ts)
77 {
78 	struct thread_stack_entry *new_stack;
79 	size_t sz, new_sz;
80 
81 	new_sz = ts->sz + STACK_GROWTH;
82 	sz = new_sz * sizeof(struct thread_stack_entry);
83 
84 	new_stack = realloc(ts->stack, sz);
85 	if (!new_stack)
86 		return -ENOMEM;
87 
88 	ts->stack = new_stack;
89 	ts->sz = new_sz;
90 
91 	return 0;
92 }
93 
94 static struct thread_stack *thread_stack__new(struct thread *thread,
95 					      struct call_return_processor *crp)
96 {
97 	struct thread_stack *ts;
98 
99 	ts = zalloc(sizeof(struct thread_stack));
100 	if (!ts)
101 		return NULL;
102 
103 	if (thread_stack__grow(ts)) {
104 		free(ts);
105 		return NULL;
106 	}
107 
108 	if (thread->mg && thread->mg->machine)
109 		ts->kernel_start = machine__kernel_start(thread->mg->machine);
110 	else
111 		ts->kernel_start = 1ULL << 63;
112 	ts->crp = crp;
113 
114 	return ts;
115 }
116 
117 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
118 			      bool trace_end)
119 {
120 	int err = 0;
121 
122 	if (ts->cnt == ts->sz) {
123 		err = thread_stack__grow(ts);
124 		if (err) {
125 			pr_warning("Out of memory: discarding thread stack\n");
126 			ts->cnt = 0;
127 		}
128 	}
129 
130 	ts->stack[ts->cnt].trace_end = trace_end;
131 	ts->stack[ts->cnt++].ret_addr = ret_addr;
132 
133 	return err;
134 }
135 
136 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
137 {
138 	size_t i;
139 
140 	/*
141 	 * In some cases there may be functions which are not seen to return.
142 	 * For example when setjmp / longjmp has been used.  Or the perf context
143 	 * switch in the kernel which doesn't stop and start tracing in exactly
144 	 * the same code path.  When that happens the return address will be
145 	 * further down the stack.  If the return address is not found at all,
146 	 * we assume the opposite (i.e. this is a return for a call that wasn't
147 	 * seen for some reason) and leave the stack alone.
148 	 */
149 	for (i = ts->cnt; i; ) {
150 		if (ts->stack[--i].ret_addr == ret_addr) {
151 			ts->cnt = i;
152 			return;
153 		}
154 	}
155 }
156 
157 static void thread_stack__pop_trace_end(struct thread_stack *ts)
158 {
159 	size_t i;
160 
161 	for (i = ts->cnt; i; ) {
162 		if (ts->stack[--i].trace_end)
163 			ts->cnt = i;
164 		else
165 			return;
166 	}
167 }
168 
169 static bool thread_stack__in_kernel(struct thread_stack *ts)
170 {
171 	if (!ts->cnt)
172 		return false;
173 
174 	return ts->stack[ts->cnt - 1].cp->in_kernel;
175 }
176 
177 static int thread_stack__call_return(struct thread *thread,
178 				     struct thread_stack *ts, size_t idx,
179 				     u64 timestamp, u64 ref, bool no_return)
180 {
181 	struct call_return_processor *crp = ts->crp;
182 	struct thread_stack_entry *tse;
183 	struct call_return cr = {
184 		.thread = thread,
185 		.comm = ts->comm,
186 		.db_id = 0,
187 	};
188 
189 	tse = &ts->stack[idx];
190 	cr.cp = tse->cp;
191 	cr.call_time = tse->timestamp;
192 	cr.return_time = timestamp;
193 	cr.branch_count = ts->branch_count - tse->branch_count;
194 	cr.call_ref = tse->ref;
195 	cr.return_ref = ref;
196 	if (tse->no_call)
197 		cr.flags |= CALL_RETURN_NO_CALL;
198 	if (no_return)
199 		cr.flags |= CALL_RETURN_NO_RETURN;
200 
201 	return crp->process(&cr, crp->data);
202 }
203 
204 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
205 {
206 	struct call_return_processor *crp = ts->crp;
207 	int err;
208 
209 	if (!crp) {
210 		ts->cnt = 0;
211 		return 0;
212 	}
213 
214 	while (ts->cnt) {
215 		err = thread_stack__call_return(thread, ts, --ts->cnt,
216 						ts->last_time, 0, true);
217 		if (err) {
218 			pr_err("Error flushing thread stack!\n");
219 			ts->cnt = 0;
220 			return err;
221 		}
222 	}
223 
224 	return 0;
225 }
226 
227 int thread_stack__flush(struct thread *thread)
228 {
229 	if (thread->ts)
230 		return __thread_stack__flush(thread, thread->ts);
231 
232 	return 0;
233 }
234 
235 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
236 			u64 to_ip, u16 insn_len, u64 trace_nr)
237 {
238 	if (!thread)
239 		return -EINVAL;
240 
241 	if (!thread->ts) {
242 		thread->ts = thread_stack__new(thread, NULL);
243 		if (!thread->ts) {
244 			pr_warning("Out of memory: no thread stack\n");
245 			return -ENOMEM;
246 		}
247 		thread->ts->trace_nr = trace_nr;
248 	}
249 
250 	/*
251 	 * When the trace is discontinuous, the trace_nr changes.  In that case
252 	 * the stack might be completely invalid.  Better to report nothing than
253 	 * to report something misleading, so flush the stack.
254 	 */
255 	if (trace_nr != thread->ts->trace_nr) {
256 		if (thread->ts->trace_nr)
257 			__thread_stack__flush(thread, thread->ts);
258 		thread->ts->trace_nr = trace_nr;
259 	}
260 
261 	/* Stop here if thread_stack__process() is in use */
262 	if (thread->ts->crp)
263 		return 0;
264 
265 	if (flags & PERF_IP_FLAG_CALL) {
266 		u64 ret_addr;
267 
268 		if (!to_ip)
269 			return 0;
270 		ret_addr = from_ip + insn_len;
271 		if (ret_addr == to_ip)
272 			return 0; /* Zero-length calls are excluded */
273 		return thread_stack__push(thread->ts, ret_addr,
274 					  flags & PERF_IP_FLAG_TRACE_END);
275 	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
276 		/*
277 		 * If the caller did not change the trace number (which would
278 		 * have flushed the stack) then try to make sense of the stack.
279 		 * Possibly, tracing began after returning to the current
280 		 * address, so try to pop that. Also, do not expect a call made
281 		 * when the trace ended, to return, so pop that.
282 		 */
283 		thread_stack__pop(thread->ts, to_ip);
284 		thread_stack__pop_trace_end(thread->ts);
285 	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
286 		thread_stack__pop(thread->ts, to_ip);
287 	}
288 
289 	return 0;
290 }
291 
292 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
293 {
294 	if (!thread || !thread->ts)
295 		return;
296 
297 	if (trace_nr != thread->ts->trace_nr) {
298 		if (thread->ts->trace_nr)
299 			__thread_stack__flush(thread, thread->ts);
300 		thread->ts->trace_nr = trace_nr;
301 	}
302 }
303 
304 void thread_stack__free(struct thread *thread)
305 {
306 	if (thread->ts) {
307 		__thread_stack__flush(thread, thread->ts);
308 		zfree(&thread->ts->stack);
309 		zfree(&thread->ts);
310 	}
311 }
312 
313 static inline u64 callchain_context(u64 ip, u64 kernel_start)
314 {
315 	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
316 }
317 
318 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
319 			  size_t sz, u64 ip, u64 kernel_start)
320 {
321 	u64 context = callchain_context(ip, kernel_start);
322 	u64 last_context;
323 	size_t i, j;
324 
325 	if (sz < 2) {
326 		chain->nr = 0;
327 		return;
328 	}
329 
330 	chain->ips[0] = context;
331 	chain->ips[1] = ip;
332 
333 	if (!thread || !thread->ts) {
334 		chain->nr = 2;
335 		return;
336 	}
337 
338 	last_context = context;
339 
340 	for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
341 		ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
342 		context = callchain_context(ip, kernel_start);
343 		if (context != last_context) {
344 			if (i >= sz - 1)
345 				break;
346 			chain->ips[i++] = context;
347 			last_context = context;
348 		}
349 		chain->ips[i] = ip;
350 	}
351 
352 	chain->nr = i;
353 }
354 
355 struct call_return_processor *
356 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
357 			   void *data)
358 {
359 	struct call_return_processor *crp;
360 
361 	crp = zalloc(sizeof(struct call_return_processor));
362 	if (!crp)
363 		return NULL;
364 	crp->cpr = call_path_root__new();
365 	if (!crp->cpr)
366 		goto out_free;
367 	crp->process = process;
368 	crp->data = data;
369 	return crp;
370 
371 out_free:
372 	free(crp);
373 	return NULL;
374 }
375 
376 void call_return_processor__free(struct call_return_processor *crp)
377 {
378 	if (crp) {
379 		call_path_root__free(crp->cpr);
380 		free(crp);
381 	}
382 }
383 
384 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
385 				 u64 timestamp, u64 ref, struct call_path *cp,
386 				 bool no_call, bool trace_end)
387 {
388 	struct thread_stack_entry *tse;
389 	int err;
390 
391 	if (ts->cnt == ts->sz) {
392 		err = thread_stack__grow(ts);
393 		if (err)
394 			return err;
395 	}
396 
397 	tse = &ts->stack[ts->cnt++];
398 	tse->ret_addr = ret_addr;
399 	tse->timestamp = timestamp;
400 	tse->ref = ref;
401 	tse->branch_count = ts->branch_count;
402 	tse->cp = cp;
403 	tse->no_call = no_call;
404 	tse->trace_end = trace_end;
405 
406 	return 0;
407 }
408 
409 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
410 				u64 ret_addr, u64 timestamp, u64 ref,
411 				struct symbol *sym)
412 {
413 	int err;
414 
415 	if (!ts->cnt)
416 		return 1;
417 
418 	if (ts->cnt == 1) {
419 		struct thread_stack_entry *tse = &ts->stack[0];
420 
421 		if (tse->cp->sym == sym)
422 			return thread_stack__call_return(thread, ts, --ts->cnt,
423 							 timestamp, ref, false);
424 	}
425 
426 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
427 		return thread_stack__call_return(thread, ts, --ts->cnt,
428 						 timestamp, ref, false);
429 	} else {
430 		size_t i = ts->cnt - 1;
431 
432 		while (i--) {
433 			if (ts->stack[i].ret_addr != ret_addr)
434 				continue;
435 			i += 1;
436 			while (ts->cnt > i) {
437 				err = thread_stack__call_return(thread, ts,
438 								--ts->cnt,
439 								timestamp, ref,
440 								true);
441 				if (err)
442 					return err;
443 			}
444 			return thread_stack__call_return(thread, ts, --ts->cnt,
445 							 timestamp, ref, false);
446 		}
447 	}
448 
449 	return 1;
450 }
451 
452 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
453 				struct perf_sample *sample,
454 				struct addr_location *from_al,
455 				struct addr_location *to_al, u64 ref)
456 {
457 	struct call_path_root *cpr = ts->crp->cpr;
458 	struct call_path *cp;
459 	struct symbol *sym;
460 	u64 ip;
461 
462 	if (sample->ip) {
463 		ip = sample->ip;
464 		sym = from_al->sym;
465 	} else if (sample->addr) {
466 		ip = sample->addr;
467 		sym = to_al->sym;
468 	} else {
469 		return 0;
470 	}
471 
472 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
473 				ts->kernel_start);
474 	if (!cp)
475 		return -ENOMEM;
476 
477 	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
478 				     true, false);
479 }
480 
481 static int thread_stack__no_call_return(struct thread *thread,
482 					struct thread_stack *ts,
483 					struct perf_sample *sample,
484 					struct addr_location *from_al,
485 					struct addr_location *to_al, u64 ref)
486 {
487 	struct call_path_root *cpr = ts->crp->cpr;
488 	struct call_path *cp, *parent;
489 	u64 ks = ts->kernel_start;
490 	int err;
491 
492 	if (sample->ip >= ks && sample->addr < ks) {
493 		/* Return to userspace, so pop all kernel addresses */
494 		while (thread_stack__in_kernel(ts)) {
495 			err = thread_stack__call_return(thread, ts, --ts->cnt,
496 							sample->time, ref,
497 							true);
498 			if (err)
499 				return err;
500 		}
501 
502 		/* If the stack is empty, push the userspace address */
503 		if (!ts->cnt) {
504 			cp = call_path__findnew(cpr, &cpr->call_path,
505 						to_al->sym, sample->addr,
506 						ts->kernel_start);
507 			if (!cp)
508 				return -ENOMEM;
509 			return thread_stack__push_cp(ts, 0, sample->time, ref,
510 						     cp, true, false);
511 		}
512 	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
513 		/* Return to userspace, so pop all kernel addresses */
514 		while (thread_stack__in_kernel(ts)) {
515 			err = thread_stack__call_return(thread, ts, --ts->cnt,
516 							sample->time, ref,
517 							true);
518 			if (err)
519 				return err;
520 		}
521 	}
522 
523 	if (ts->cnt)
524 		parent = ts->stack[ts->cnt - 1].cp;
525 	else
526 		parent = &cpr->call_path;
527 
528 	/* This 'return' had no 'call', so push and pop top of stack */
529 	cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
530 				ts->kernel_start);
531 	if (!cp)
532 		return -ENOMEM;
533 
534 	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
535 				    true, false);
536 	if (err)
537 		return err;
538 
539 	return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
540 				    to_al->sym);
541 }
542 
543 static int thread_stack__trace_begin(struct thread *thread,
544 				     struct thread_stack *ts, u64 timestamp,
545 				     u64 ref)
546 {
547 	struct thread_stack_entry *tse;
548 	int err;
549 
550 	if (!ts->cnt)
551 		return 0;
552 
553 	/* Pop trace end */
554 	tse = &ts->stack[ts->cnt - 1];
555 	if (tse->trace_end) {
556 		err = thread_stack__call_return(thread, ts, --ts->cnt,
557 						timestamp, ref, false);
558 		if (err)
559 			return err;
560 	}
561 
562 	return 0;
563 }
564 
565 static int thread_stack__trace_end(struct thread_stack *ts,
566 				   struct perf_sample *sample, u64 ref)
567 {
568 	struct call_path_root *cpr = ts->crp->cpr;
569 	struct call_path *cp;
570 	u64 ret_addr;
571 
572 	/* No point having 'trace end' on the bottom of the stack */
573 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
574 		return 0;
575 
576 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
577 				ts->kernel_start);
578 	if (!cp)
579 		return -ENOMEM;
580 
581 	ret_addr = sample->ip + sample->insn_len;
582 
583 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
584 				     false, true);
585 }
586 
587 int thread_stack__process(struct thread *thread, struct comm *comm,
588 			  struct perf_sample *sample,
589 			  struct addr_location *from_al,
590 			  struct addr_location *to_al, u64 ref,
591 			  struct call_return_processor *crp)
592 {
593 	struct thread_stack *ts = thread->ts;
594 	int err = 0;
595 
596 	if (ts) {
597 		if (!ts->crp) {
598 			/* Supersede thread_stack__event() */
599 			thread_stack__free(thread);
600 			thread->ts = thread_stack__new(thread, crp);
601 			if (!thread->ts)
602 				return -ENOMEM;
603 			ts = thread->ts;
604 			ts->comm = comm;
605 		}
606 	} else {
607 		thread->ts = thread_stack__new(thread, crp);
608 		if (!thread->ts)
609 			return -ENOMEM;
610 		ts = thread->ts;
611 		ts->comm = comm;
612 	}
613 
614 	/* Flush stack on exec */
615 	if (ts->comm != comm && thread->pid_ == thread->tid) {
616 		err = __thread_stack__flush(thread, ts);
617 		if (err)
618 			return err;
619 		ts->comm = comm;
620 	}
621 
622 	/* If the stack is empty, put the current symbol on the stack */
623 	if (!ts->cnt) {
624 		err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
625 					   ref);
626 		if (err)
627 			return err;
628 	}
629 
630 	ts->branch_count += 1;
631 	ts->last_time = sample->time;
632 
633 	if (sample->flags & PERF_IP_FLAG_CALL) {
634 		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
635 		struct call_path_root *cpr = ts->crp->cpr;
636 		struct call_path *cp;
637 		u64 ret_addr;
638 
639 		if (!sample->ip || !sample->addr)
640 			return 0;
641 
642 		ret_addr = sample->ip + sample->insn_len;
643 		if (ret_addr == sample->addr)
644 			return 0; /* Zero-length calls are excluded */
645 
646 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
647 					to_al->sym, sample->addr,
648 					ts->kernel_start);
649 		if (!cp)
650 			return -ENOMEM;
651 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
652 					    cp, false, trace_end);
653 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
654 		if (!sample->ip || !sample->addr)
655 			return 0;
656 
657 		err = thread_stack__pop_cp(thread, ts, sample->addr,
658 					   sample->time, ref, from_al->sym);
659 		if (err) {
660 			if (err < 0)
661 				return err;
662 			err = thread_stack__no_call_return(thread, ts, sample,
663 							   from_al, to_al, ref);
664 		}
665 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
666 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
667 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
668 		err = thread_stack__trace_end(ts, sample, ref);
669 	}
670 
671 	return err;
672 }
673 
674 size_t thread_stack__depth(struct thread *thread)
675 {
676 	if (!thread->ts)
677 		return 0;
678 	return thread->ts->cnt;
679 }
680