xref: /linux/tools/perf/util/thread-stack.c (revision 238997e5e63ab6a655c73d6a65c81cfb7f452f25)
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include <errno.h>
19 #include "thread.h"
20 #include "event.h"
21 #include "machine.h"
22 #include "util.h"
23 #include "debug.h"
24 #include "symbol.h"
25 #include "comm.h"
26 #include "call-path.h"
27 #include "thread-stack.h"
28 
29 #define STACK_GROWTH 2048
30 
31 /**
32  * struct thread_stack_entry - thread stack entry.
33  * @ret_addr: return address
34  * @timestamp: timestamp (if known)
35  * @ref: external reference (e.g. db_id of sample)
36  * @branch_count: the branch count when the entry was created
37  * @cp: call path
38  * @no_call: a 'call' was not seen
39  * @trace_end: a 'call' but trace ended
40  */
41 struct thread_stack_entry {
42 	u64 ret_addr;
43 	u64 timestamp;
44 	u64 ref;
45 	u64 branch_count;
46 	struct call_path *cp;
47 	bool no_call;
48 	bool trace_end;
49 };
50 
51 /**
52  * struct thread_stack - thread stack constructed from 'call' and 'return'
53  *                       branch samples.
54  * @stack: array that holds the stack
55  * @cnt: number of entries in the stack
56  * @sz: current maximum stack size
57  * @trace_nr: current trace number
58  * @branch_count: running branch count
59  * @kernel_start: kernel start address
60  * @last_time: last timestamp
61  * @crp: call/return processor
62  * @comm: current comm
63  */
64 struct thread_stack {
65 	struct thread_stack_entry *stack;
66 	size_t cnt;
67 	size_t sz;
68 	u64 trace_nr;
69 	u64 branch_count;
70 	u64 kernel_start;
71 	u64 last_time;
72 	struct call_return_processor *crp;
73 	struct comm *comm;
74 };
75 
76 static int thread_stack__grow(struct thread_stack *ts)
77 {
78 	struct thread_stack_entry *new_stack;
79 	size_t sz, new_sz;
80 
81 	new_sz = ts->sz + STACK_GROWTH;
82 	sz = new_sz * sizeof(struct thread_stack_entry);
83 
84 	new_stack = realloc(ts->stack, sz);
85 	if (!new_stack)
86 		return -ENOMEM;
87 
88 	ts->stack = new_stack;
89 	ts->sz = new_sz;
90 
91 	return 0;
92 }
93 
94 static struct thread_stack *thread_stack__new(struct thread *thread,
95 					      struct call_return_processor *crp)
96 {
97 	struct thread_stack *ts;
98 
99 	ts = zalloc(sizeof(struct thread_stack));
100 	if (!ts)
101 		return NULL;
102 
103 	if (thread_stack__grow(ts)) {
104 		free(ts);
105 		return NULL;
106 	}
107 
108 	if (thread->mg && thread->mg->machine)
109 		ts->kernel_start = machine__kernel_start(thread->mg->machine);
110 	else
111 		ts->kernel_start = 1ULL << 63;
112 	ts->crp = crp;
113 
114 	return ts;
115 }
116 
117 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
118 			      bool trace_end)
119 {
120 	int err = 0;
121 
122 	if (ts->cnt == ts->sz) {
123 		err = thread_stack__grow(ts);
124 		if (err) {
125 			pr_warning("Out of memory: discarding thread stack\n");
126 			ts->cnt = 0;
127 		}
128 	}
129 
130 	ts->stack[ts->cnt].trace_end = trace_end;
131 	ts->stack[ts->cnt++].ret_addr = ret_addr;
132 
133 	return err;
134 }
135 
136 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
137 {
138 	size_t i;
139 
140 	/*
141 	 * In some cases there may be functions which are not seen to return.
142 	 * For example when setjmp / longjmp has been used.  Or the perf context
143 	 * switch in the kernel which doesn't stop and start tracing in exactly
144 	 * the same code path.  When that happens the return address will be
145 	 * further down the stack.  If the return address is not found at all,
146 	 * we assume the opposite (i.e. this is a return for a call that wasn't
147 	 * seen for some reason) and leave the stack alone.
148 	 */
149 	for (i = ts->cnt; i; ) {
150 		if (ts->stack[--i].ret_addr == ret_addr) {
151 			ts->cnt = i;
152 			return;
153 		}
154 	}
155 }
156 
157 static void thread_stack__pop_trace_end(struct thread_stack *ts)
158 {
159 	size_t i;
160 
161 	for (i = ts->cnt; i; ) {
162 		if (ts->stack[--i].trace_end)
163 			ts->cnt = i;
164 		else
165 			return;
166 	}
167 }
168 
169 static bool thread_stack__in_kernel(struct thread_stack *ts)
170 {
171 	if (!ts->cnt)
172 		return false;
173 
174 	return ts->stack[ts->cnt - 1].cp->in_kernel;
175 }
176 
177 static int thread_stack__call_return(struct thread *thread,
178 				     struct thread_stack *ts, size_t idx,
179 				     u64 timestamp, u64 ref, bool no_return)
180 {
181 	struct call_return_processor *crp = ts->crp;
182 	struct thread_stack_entry *tse;
183 	struct call_return cr = {
184 		.thread = thread,
185 		.comm = ts->comm,
186 		.db_id = 0,
187 	};
188 
189 	tse = &ts->stack[idx];
190 	cr.cp = tse->cp;
191 	cr.call_time = tse->timestamp;
192 	cr.return_time = timestamp;
193 	cr.branch_count = ts->branch_count - tse->branch_count;
194 	cr.call_ref = tse->ref;
195 	cr.return_ref = ref;
196 	if (tse->no_call)
197 		cr.flags |= CALL_RETURN_NO_CALL;
198 	if (no_return)
199 		cr.flags |= CALL_RETURN_NO_RETURN;
200 
201 	return crp->process(&cr, crp->data);
202 }
203 
204 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
205 {
206 	struct call_return_processor *crp = ts->crp;
207 	int err;
208 
209 	if (!crp) {
210 		ts->cnt = 0;
211 		return 0;
212 	}
213 
214 	while (ts->cnt) {
215 		err = thread_stack__call_return(thread, ts, --ts->cnt,
216 						ts->last_time, 0, true);
217 		if (err) {
218 			pr_err("Error flushing thread stack!\n");
219 			ts->cnt = 0;
220 			return err;
221 		}
222 	}
223 
224 	return 0;
225 }
226 
227 int thread_stack__flush(struct thread *thread)
228 {
229 	if (thread->ts)
230 		return __thread_stack__flush(thread, thread->ts);
231 
232 	return 0;
233 }
234 
235 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
236 			u64 to_ip, u16 insn_len, u64 trace_nr)
237 {
238 	if (!thread)
239 		return -EINVAL;
240 
241 	if (!thread->ts) {
242 		thread->ts = thread_stack__new(thread, NULL);
243 		if (!thread->ts) {
244 			pr_warning("Out of memory: no thread stack\n");
245 			return -ENOMEM;
246 		}
247 		thread->ts->trace_nr = trace_nr;
248 	}
249 
250 	/*
251 	 * When the trace is discontinuous, the trace_nr changes.  In that case
252 	 * the stack might be completely invalid.  Better to report nothing than
253 	 * to report something misleading, so flush the stack.
254 	 */
255 	if (trace_nr != thread->ts->trace_nr) {
256 		if (thread->ts->trace_nr)
257 			__thread_stack__flush(thread, thread->ts);
258 		thread->ts->trace_nr = trace_nr;
259 	}
260 
261 	/* Stop here if thread_stack__process() is in use */
262 	if (thread->ts->crp)
263 		return 0;
264 
265 	if (flags & PERF_IP_FLAG_CALL) {
266 		u64 ret_addr;
267 
268 		if (!to_ip)
269 			return 0;
270 		ret_addr = from_ip + insn_len;
271 		if (ret_addr == to_ip)
272 			return 0; /* Zero-length calls are excluded */
273 		return thread_stack__push(thread->ts, ret_addr,
274 					  flags & PERF_IP_FLAG_TRACE_END);
275 	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
276 		/*
277 		 * If the caller did not change the trace number (which would
278 		 * have flushed the stack) then try to make sense of the stack.
279 		 * Possibly, tracing began after returning to the current
280 		 * address, so try to pop that. Also, do not expect a call made
281 		 * when the trace ended, to return, so pop that.
282 		 */
283 		thread_stack__pop(thread->ts, to_ip);
284 		thread_stack__pop_trace_end(thread->ts);
285 	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
286 		thread_stack__pop(thread->ts, to_ip);
287 	}
288 
289 	return 0;
290 }
291 
292 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
293 {
294 	if (!thread || !thread->ts)
295 		return;
296 
297 	if (trace_nr != thread->ts->trace_nr) {
298 		if (thread->ts->trace_nr)
299 			__thread_stack__flush(thread, thread->ts);
300 		thread->ts->trace_nr = trace_nr;
301 	}
302 }
303 
304 void thread_stack__free(struct thread *thread)
305 {
306 	if (thread->ts) {
307 		__thread_stack__flush(thread, thread->ts);
308 		zfree(&thread->ts->stack);
309 		zfree(&thread->ts);
310 	}
311 }
312 
313 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
314 			  size_t sz, u64 ip)
315 {
316 	size_t i;
317 
318 	if (!thread || !thread->ts)
319 		chain->nr = 1;
320 	else
321 		chain->nr = min(sz, thread->ts->cnt + 1);
322 
323 	chain->ips[0] = ip;
324 
325 	for (i = 1; i < chain->nr; i++)
326 		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
327 }
328 
329 struct call_return_processor *
330 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
331 			   void *data)
332 {
333 	struct call_return_processor *crp;
334 
335 	crp = zalloc(sizeof(struct call_return_processor));
336 	if (!crp)
337 		return NULL;
338 	crp->cpr = call_path_root__new();
339 	if (!crp->cpr)
340 		goto out_free;
341 	crp->process = process;
342 	crp->data = data;
343 	return crp;
344 
345 out_free:
346 	free(crp);
347 	return NULL;
348 }
349 
350 void call_return_processor__free(struct call_return_processor *crp)
351 {
352 	if (crp) {
353 		call_path_root__free(crp->cpr);
354 		free(crp);
355 	}
356 }
357 
358 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
359 				 u64 timestamp, u64 ref, struct call_path *cp,
360 				 bool no_call, bool trace_end)
361 {
362 	struct thread_stack_entry *tse;
363 	int err;
364 
365 	if (ts->cnt == ts->sz) {
366 		err = thread_stack__grow(ts);
367 		if (err)
368 			return err;
369 	}
370 
371 	tse = &ts->stack[ts->cnt++];
372 	tse->ret_addr = ret_addr;
373 	tse->timestamp = timestamp;
374 	tse->ref = ref;
375 	tse->branch_count = ts->branch_count;
376 	tse->cp = cp;
377 	tse->no_call = no_call;
378 	tse->trace_end = trace_end;
379 
380 	return 0;
381 }
382 
383 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
384 				u64 ret_addr, u64 timestamp, u64 ref,
385 				struct symbol *sym)
386 {
387 	int err;
388 
389 	if (!ts->cnt)
390 		return 1;
391 
392 	if (ts->cnt == 1) {
393 		struct thread_stack_entry *tse = &ts->stack[0];
394 
395 		if (tse->cp->sym == sym)
396 			return thread_stack__call_return(thread, ts, --ts->cnt,
397 							 timestamp, ref, false);
398 	}
399 
400 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
401 		return thread_stack__call_return(thread, ts, --ts->cnt,
402 						 timestamp, ref, false);
403 	} else {
404 		size_t i = ts->cnt - 1;
405 
406 		while (i--) {
407 			if (ts->stack[i].ret_addr != ret_addr)
408 				continue;
409 			i += 1;
410 			while (ts->cnt > i) {
411 				err = thread_stack__call_return(thread, ts,
412 								--ts->cnt,
413 								timestamp, ref,
414 								true);
415 				if (err)
416 					return err;
417 			}
418 			return thread_stack__call_return(thread, ts, --ts->cnt,
419 							 timestamp, ref, false);
420 		}
421 	}
422 
423 	return 1;
424 }
425 
426 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
427 				struct perf_sample *sample,
428 				struct addr_location *from_al,
429 				struct addr_location *to_al, u64 ref)
430 {
431 	struct call_path_root *cpr = ts->crp->cpr;
432 	struct call_path *cp;
433 	struct symbol *sym;
434 	u64 ip;
435 
436 	if (sample->ip) {
437 		ip = sample->ip;
438 		sym = from_al->sym;
439 	} else if (sample->addr) {
440 		ip = sample->addr;
441 		sym = to_al->sym;
442 	} else {
443 		return 0;
444 	}
445 
446 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
447 				ts->kernel_start);
448 	if (!cp)
449 		return -ENOMEM;
450 
451 	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
452 				     true, false);
453 }
454 
455 static int thread_stack__no_call_return(struct thread *thread,
456 					struct thread_stack *ts,
457 					struct perf_sample *sample,
458 					struct addr_location *from_al,
459 					struct addr_location *to_al, u64 ref)
460 {
461 	struct call_path_root *cpr = ts->crp->cpr;
462 	struct call_path *cp, *parent;
463 	u64 ks = ts->kernel_start;
464 	int err;
465 
466 	if (sample->ip >= ks && sample->addr < ks) {
467 		/* Return to userspace, so pop all kernel addresses */
468 		while (thread_stack__in_kernel(ts)) {
469 			err = thread_stack__call_return(thread, ts, --ts->cnt,
470 							sample->time, ref,
471 							true);
472 			if (err)
473 				return err;
474 		}
475 
476 		/* If the stack is empty, push the userspace address */
477 		if (!ts->cnt) {
478 			cp = call_path__findnew(cpr, &cpr->call_path,
479 						to_al->sym, sample->addr,
480 						ts->kernel_start);
481 			if (!cp)
482 				return -ENOMEM;
483 			return thread_stack__push_cp(ts, 0, sample->time, ref,
484 						     cp, true, false);
485 		}
486 	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
487 		/* Return to userspace, so pop all kernel addresses */
488 		while (thread_stack__in_kernel(ts)) {
489 			err = thread_stack__call_return(thread, ts, --ts->cnt,
490 							sample->time, ref,
491 							true);
492 			if (err)
493 				return err;
494 		}
495 	}
496 
497 	if (ts->cnt)
498 		parent = ts->stack[ts->cnt - 1].cp;
499 	else
500 		parent = &cpr->call_path;
501 
502 	/* This 'return' had no 'call', so push and pop top of stack */
503 	cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
504 				ts->kernel_start);
505 	if (!cp)
506 		return -ENOMEM;
507 
508 	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
509 				    true, false);
510 	if (err)
511 		return err;
512 
513 	return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
514 				    to_al->sym);
515 }
516 
517 static int thread_stack__trace_begin(struct thread *thread,
518 				     struct thread_stack *ts, u64 timestamp,
519 				     u64 ref)
520 {
521 	struct thread_stack_entry *tse;
522 	int err;
523 
524 	if (!ts->cnt)
525 		return 0;
526 
527 	/* Pop trace end */
528 	tse = &ts->stack[ts->cnt - 1];
529 	if (tse->trace_end) {
530 		err = thread_stack__call_return(thread, ts, --ts->cnt,
531 						timestamp, ref, false);
532 		if (err)
533 			return err;
534 	}
535 
536 	return 0;
537 }
538 
539 static int thread_stack__trace_end(struct thread_stack *ts,
540 				   struct perf_sample *sample, u64 ref)
541 {
542 	struct call_path_root *cpr = ts->crp->cpr;
543 	struct call_path *cp;
544 	u64 ret_addr;
545 
546 	/* No point having 'trace end' on the bottom of the stack */
547 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
548 		return 0;
549 
550 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
551 				ts->kernel_start);
552 	if (!cp)
553 		return -ENOMEM;
554 
555 	ret_addr = sample->ip + sample->insn_len;
556 
557 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
558 				     false, true);
559 }
560 
561 int thread_stack__process(struct thread *thread, struct comm *comm,
562 			  struct perf_sample *sample,
563 			  struct addr_location *from_al,
564 			  struct addr_location *to_al, u64 ref,
565 			  struct call_return_processor *crp)
566 {
567 	struct thread_stack *ts = thread->ts;
568 	int err = 0;
569 
570 	if (ts) {
571 		if (!ts->crp) {
572 			/* Supersede thread_stack__event() */
573 			thread_stack__free(thread);
574 			thread->ts = thread_stack__new(thread, crp);
575 			if (!thread->ts)
576 				return -ENOMEM;
577 			ts = thread->ts;
578 			ts->comm = comm;
579 		}
580 	} else {
581 		thread->ts = thread_stack__new(thread, crp);
582 		if (!thread->ts)
583 			return -ENOMEM;
584 		ts = thread->ts;
585 		ts->comm = comm;
586 	}
587 
588 	/* Flush stack on exec */
589 	if (ts->comm != comm && thread->pid_ == thread->tid) {
590 		err = __thread_stack__flush(thread, ts);
591 		if (err)
592 			return err;
593 		ts->comm = comm;
594 	}
595 
596 	/* If the stack is empty, put the current symbol on the stack */
597 	if (!ts->cnt) {
598 		err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
599 					   ref);
600 		if (err)
601 			return err;
602 	}
603 
604 	ts->branch_count += 1;
605 	ts->last_time = sample->time;
606 
607 	if (sample->flags & PERF_IP_FLAG_CALL) {
608 		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
609 		struct call_path_root *cpr = ts->crp->cpr;
610 		struct call_path *cp;
611 		u64 ret_addr;
612 
613 		if (!sample->ip || !sample->addr)
614 			return 0;
615 
616 		ret_addr = sample->ip + sample->insn_len;
617 		if (ret_addr == sample->addr)
618 			return 0; /* Zero-length calls are excluded */
619 
620 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
621 					to_al->sym, sample->addr,
622 					ts->kernel_start);
623 		if (!cp)
624 			return -ENOMEM;
625 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
626 					    cp, false, trace_end);
627 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
628 		if (!sample->ip || !sample->addr)
629 			return 0;
630 
631 		err = thread_stack__pop_cp(thread, ts, sample->addr,
632 					   sample->time, ref, from_al->sym);
633 		if (err) {
634 			if (err < 0)
635 				return err;
636 			err = thread_stack__no_call_return(thread, ts, sample,
637 							   from_al, to_al, ref);
638 		}
639 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
640 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
641 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
642 		err = thread_stack__trace_end(ts, sample, ref);
643 	}
644 
645 	return err;
646 }
647 
648 size_t thread_stack__depth(struct thread *thread)
649 {
650 	if (!thread->ts)
651 		return 0;
652 	return thread->ts->cnt;
653 }
654