xref: /linux/tools/perf/util/thread-stack.c (revision e5c86679d5e864947a52fb31e45a425dea3e7fa9)
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include "thread.h"
19 #include "event.h"
20 #include "machine.h"
21 #include "util.h"
22 #include "debug.h"
23 #include "symbol.h"
24 #include "comm.h"
25 #include "call-path.h"
26 #include "thread-stack.h"
27 
28 #define STACK_GROWTH 2048
29 
30 /**
31  * struct thread_stack_entry - thread stack entry.
32  * @ret_addr: return address
33  * @timestamp: timestamp (if known)
34  * @ref: external reference (e.g. db_id of sample)
35  * @branch_count: the branch count when the entry was created
36  * @cp: call path
37  * @no_call: a 'call' was not seen
38  */
39 struct thread_stack_entry {
40 	u64 ret_addr;
41 	u64 timestamp;
42 	u64 ref;
43 	u64 branch_count;
44 	struct call_path *cp;
45 	bool no_call;
46 };
47 
48 /**
49  * struct thread_stack - thread stack constructed from 'call' and 'return'
50  *                       branch samples.
51  * @stack: array that holds the stack
52  * @cnt: number of entries in the stack
53  * @sz: current maximum stack size
54  * @trace_nr: current trace number
55  * @branch_count: running branch count
56  * @kernel_start: kernel start address
57  * @last_time: last timestamp
58  * @crp: call/return processor
59  * @comm: current comm
60  */
61 struct thread_stack {
62 	struct thread_stack_entry *stack;
63 	size_t cnt;
64 	size_t sz;
65 	u64 trace_nr;
66 	u64 branch_count;
67 	u64 kernel_start;
68 	u64 last_time;
69 	struct call_return_processor *crp;
70 	struct comm *comm;
71 };
72 
73 static int thread_stack__grow(struct thread_stack *ts)
74 {
75 	struct thread_stack_entry *new_stack;
76 	size_t sz, new_sz;
77 
78 	new_sz = ts->sz + STACK_GROWTH;
79 	sz = new_sz * sizeof(struct thread_stack_entry);
80 
81 	new_stack = realloc(ts->stack, sz);
82 	if (!new_stack)
83 		return -ENOMEM;
84 
85 	ts->stack = new_stack;
86 	ts->sz = new_sz;
87 
88 	return 0;
89 }
90 
91 static struct thread_stack *thread_stack__new(struct thread *thread,
92 					      struct call_return_processor *crp)
93 {
94 	struct thread_stack *ts;
95 
96 	ts = zalloc(sizeof(struct thread_stack));
97 	if (!ts)
98 		return NULL;
99 
100 	if (thread_stack__grow(ts)) {
101 		free(ts);
102 		return NULL;
103 	}
104 
105 	if (thread->mg && thread->mg->machine)
106 		ts->kernel_start = machine__kernel_start(thread->mg->machine);
107 	else
108 		ts->kernel_start = 1ULL << 63;
109 	ts->crp = crp;
110 
111 	return ts;
112 }
113 
114 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
115 {
116 	int err = 0;
117 
118 	if (ts->cnt == ts->sz) {
119 		err = thread_stack__grow(ts);
120 		if (err) {
121 			pr_warning("Out of memory: discarding thread stack\n");
122 			ts->cnt = 0;
123 		}
124 	}
125 
126 	ts->stack[ts->cnt++].ret_addr = ret_addr;
127 
128 	return err;
129 }
130 
131 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
132 {
133 	size_t i;
134 
135 	/*
136 	 * In some cases there may be functions which are not seen to return.
137 	 * For example when setjmp / longjmp has been used.  Or the perf context
138 	 * switch in the kernel which doesn't stop and start tracing in exactly
139 	 * the same code path.  When that happens the return address will be
140 	 * further down the stack.  If the return address is not found at all,
141 	 * we assume the opposite (i.e. this is a return for a call that wasn't
142 	 * seen for some reason) and leave the stack alone.
143 	 */
144 	for (i = ts->cnt; i; ) {
145 		if (ts->stack[--i].ret_addr == ret_addr) {
146 			ts->cnt = i;
147 			return;
148 		}
149 	}
150 }
151 
152 static bool thread_stack__in_kernel(struct thread_stack *ts)
153 {
154 	if (!ts->cnt)
155 		return false;
156 
157 	return ts->stack[ts->cnt - 1].cp->in_kernel;
158 }
159 
160 static int thread_stack__call_return(struct thread *thread,
161 				     struct thread_stack *ts, size_t idx,
162 				     u64 timestamp, u64 ref, bool no_return)
163 {
164 	struct call_return_processor *crp = ts->crp;
165 	struct thread_stack_entry *tse;
166 	struct call_return cr = {
167 		.thread = thread,
168 		.comm = ts->comm,
169 		.db_id = 0,
170 	};
171 
172 	tse = &ts->stack[idx];
173 	cr.cp = tse->cp;
174 	cr.call_time = tse->timestamp;
175 	cr.return_time = timestamp;
176 	cr.branch_count = ts->branch_count - tse->branch_count;
177 	cr.call_ref = tse->ref;
178 	cr.return_ref = ref;
179 	if (tse->no_call)
180 		cr.flags |= CALL_RETURN_NO_CALL;
181 	if (no_return)
182 		cr.flags |= CALL_RETURN_NO_RETURN;
183 
184 	return crp->process(&cr, crp->data);
185 }
186 
187 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
188 {
189 	struct call_return_processor *crp = ts->crp;
190 	int err;
191 
192 	if (!crp) {
193 		ts->cnt = 0;
194 		return 0;
195 	}
196 
197 	while (ts->cnt) {
198 		err = thread_stack__call_return(thread, ts, --ts->cnt,
199 						ts->last_time, 0, true);
200 		if (err) {
201 			pr_err("Error flushing thread stack!\n");
202 			ts->cnt = 0;
203 			return err;
204 		}
205 	}
206 
207 	return 0;
208 }
209 
210 int thread_stack__flush(struct thread *thread)
211 {
212 	if (thread->ts)
213 		return __thread_stack__flush(thread, thread->ts);
214 
215 	return 0;
216 }
217 
218 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
219 			u64 to_ip, u16 insn_len, u64 trace_nr)
220 {
221 	if (!thread)
222 		return -EINVAL;
223 
224 	if (!thread->ts) {
225 		thread->ts = thread_stack__new(thread, NULL);
226 		if (!thread->ts) {
227 			pr_warning("Out of memory: no thread stack\n");
228 			return -ENOMEM;
229 		}
230 		thread->ts->trace_nr = trace_nr;
231 	}
232 
233 	/*
234 	 * When the trace is discontinuous, the trace_nr changes.  In that case
235 	 * the stack might be completely invalid.  Better to report nothing than
236 	 * to report something misleading, so flush the stack.
237 	 */
238 	if (trace_nr != thread->ts->trace_nr) {
239 		if (thread->ts->trace_nr)
240 			__thread_stack__flush(thread, thread->ts);
241 		thread->ts->trace_nr = trace_nr;
242 	}
243 
244 	/* Stop here if thread_stack__process() is in use */
245 	if (thread->ts->crp)
246 		return 0;
247 
248 	if (flags & PERF_IP_FLAG_CALL) {
249 		u64 ret_addr;
250 
251 		if (!to_ip)
252 			return 0;
253 		ret_addr = from_ip + insn_len;
254 		if (ret_addr == to_ip)
255 			return 0; /* Zero-length calls are excluded */
256 		return thread_stack__push(thread->ts, ret_addr);
257 	} else if (flags & PERF_IP_FLAG_RETURN) {
258 		if (!from_ip)
259 			return 0;
260 		thread_stack__pop(thread->ts, to_ip);
261 	}
262 
263 	return 0;
264 }
265 
266 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
267 {
268 	if (!thread || !thread->ts)
269 		return;
270 
271 	if (trace_nr != thread->ts->trace_nr) {
272 		if (thread->ts->trace_nr)
273 			__thread_stack__flush(thread, thread->ts);
274 		thread->ts->trace_nr = trace_nr;
275 	}
276 }
277 
278 void thread_stack__free(struct thread *thread)
279 {
280 	if (thread->ts) {
281 		__thread_stack__flush(thread, thread->ts);
282 		zfree(&thread->ts->stack);
283 		zfree(&thread->ts);
284 	}
285 }
286 
287 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
288 			  size_t sz, u64 ip)
289 {
290 	size_t i;
291 
292 	if (!thread || !thread->ts)
293 		chain->nr = 1;
294 	else
295 		chain->nr = min(sz, thread->ts->cnt + 1);
296 
297 	chain->ips[0] = ip;
298 
299 	for (i = 1; i < chain->nr; i++)
300 		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
301 }
302 
303 struct call_return_processor *
304 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
305 			   void *data)
306 {
307 	struct call_return_processor *crp;
308 
309 	crp = zalloc(sizeof(struct call_return_processor));
310 	if (!crp)
311 		return NULL;
312 	crp->cpr = call_path_root__new();
313 	if (!crp->cpr)
314 		goto out_free;
315 	crp->process = process;
316 	crp->data = data;
317 	return crp;
318 
319 out_free:
320 	free(crp);
321 	return NULL;
322 }
323 
324 void call_return_processor__free(struct call_return_processor *crp)
325 {
326 	if (crp) {
327 		call_path_root__free(crp->cpr);
328 		free(crp);
329 	}
330 }
331 
332 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
333 				 u64 timestamp, u64 ref, struct call_path *cp,
334 				 bool no_call)
335 {
336 	struct thread_stack_entry *tse;
337 	int err;
338 
339 	if (ts->cnt == ts->sz) {
340 		err = thread_stack__grow(ts);
341 		if (err)
342 			return err;
343 	}
344 
345 	tse = &ts->stack[ts->cnt++];
346 	tse->ret_addr = ret_addr;
347 	tse->timestamp = timestamp;
348 	tse->ref = ref;
349 	tse->branch_count = ts->branch_count;
350 	tse->cp = cp;
351 	tse->no_call = no_call;
352 
353 	return 0;
354 }
355 
356 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
357 				u64 ret_addr, u64 timestamp, u64 ref,
358 				struct symbol *sym)
359 {
360 	int err;
361 
362 	if (!ts->cnt)
363 		return 1;
364 
365 	if (ts->cnt == 1) {
366 		struct thread_stack_entry *tse = &ts->stack[0];
367 
368 		if (tse->cp->sym == sym)
369 			return thread_stack__call_return(thread, ts, --ts->cnt,
370 							 timestamp, ref, false);
371 	}
372 
373 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
374 		return thread_stack__call_return(thread, ts, --ts->cnt,
375 						 timestamp, ref, false);
376 	} else {
377 		size_t i = ts->cnt - 1;
378 
379 		while (i--) {
380 			if (ts->stack[i].ret_addr != ret_addr)
381 				continue;
382 			i += 1;
383 			while (ts->cnt > i) {
384 				err = thread_stack__call_return(thread, ts,
385 								--ts->cnt,
386 								timestamp, ref,
387 								true);
388 				if (err)
389 					return err;
390 			}
391 			return thread_stack__call_return(thread, ts, --ts->cnt,
392 							 timestamp, ref, false);
393 		}
394 	}
395 
396 	return 1;
397 }
398 
399 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
400 				struct perf_sample *sample,
401 				struct addr_location *from_al,
402 				struct addr_location *to_al, u64 ref)
403 {
404 	struct call_path_root *cpr = ts->crp->cpr;
405 	struct call_path *cp;
406 	struct symbol *sym;
407 	u64 ip;
408 
409 	if (sample->ip) {
410 		ip = sample->ip;
411 		sym = from_al->sym;
412 	} else if (sample->addr) {
413 		ip = sample->addr;
414 		sym = to_al->sym;
415 	} else {
416 		return 0;
417 	}
418 
419 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
420 				ts->kernel_start);
421 	if (!cp)
422 		return -ENOMEM;
423 
424 	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
425 				     true);
426 }
427 
428 static int thread_stack__no_call_return(struct thread *thread,
429 					struct thread_stack *ts,
430 					struct perf_sample *sample,
431 					struct addr_location *from_al,
432 					struct addr_location *to_al, u64 ref)
433 {
434 	struct call_path_root *cpr = ts->crp->cpr;
435 	struct call_path *cp, *parent;
436 	u64 ks = ts->kernel_start;
437 	int err;
438 
439 	if (sample->ip >= ks && sample->addr < ks) {
440 		/* Return to userspace, so pop all kernel addresses */
441 		while (thread_stack__in_kernel(ts)) {
442 			err = thread_stack__call_return(thread, ts, --ts->cnt,
443 							sample->time, ref,
444 							true);
445 			if (err)
446 				return err;
447 		}
448 
449 		/* If the stack is empty, push the userspace address */
450 		if (!ts->cnt) {
451 			cp = call_path__findnew(cpr, &cpr->call_path,
452 						to_al->sym, sample->addr,
453 						ts->kernel_start);
454 			if (!cp)
455 				return -ENOMEM;
456 			return thread_stack__push_cp(ts, 0, sample->time, ref,
457 						     cp, true);
458 		}
459 	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
460 		/* Return to userspace, so pop all kernel addresses */
461 		while (thread_stack__in_kernel(ts)) {
462 			err = thread_stack__call_return(thread, ts, --ts->cnt,
463 							sample->time, ref,
464 							true);
465 			if (err)
466 				return err;
467 		}
468 	}
469 
470 	if (ts->cnt)
471 		parent = ts->stack[ts->cnt - 1].cp;
472 	else
473 		parent = &cpr->call_path;
474 
475 	/* This 'return' had no 'call', so push and pop top of stack */
476 	cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
477 				ts->kernel_start);
478 	if (!cp)
479 		return -ENOMEM;
480 
481 	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
482 				    true);
483 	if (err)
484 		return err;
485 
486 	return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
487 				    to_al->sym);
488 }
489 
490 static int thread_stack__trace_begin(struct thread *thread,
491 				     struct thread_stack *ts, u64 timestamp,
492 				     u64 ref)
493 {
494 	struct thread_stack_entry *tse;
495 	int err;
496 
497 	if (!ts->cnt)
498 		return 0;
499 
500 	/* Pop trace end */
501 	tse = &ts->stack[ts->cnt - 1];
502 	if (tse->cp->sym == NULL && tse->cp->ip == 0) {
503 		err = thread_stack__call_return(thread, ts, --ts->cnt,
504 						timestamp, ref, false);
505 		if (err)
506 			return err;
507 	}
508 
509 	return 0;
510 }
511 
512 static int thread_stack__trace_end(struct thread_stack *ts,
513 				   struct perf_sample *sample, u64 ref)
514 {
515 	struct call_path_root *cpr = ts->crp->cpr;
516 	struct call_path *cp;
517 	u64 ret_addr;
518 
519 	/* No point having 'trace end' on the bottom of the stack */
520 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
521 		return 0;
522 
523 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
524 				ts->kernel_start);
525 	if (!cp)
526 		return -ENOMEM;
527 
528 	ret_addr = sample->ip + sample->insn_len;
529 
530 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
531 				     false);
532 }
533 
534 int thread_stack__process(struct thread *thread, struct comm *comm,
535 			  struct perf_sample *sample,
536 			  struct addr_location *from_al,
537 			  struct addr_location *to_al, u64 ref,
538 			  struct call_return_processor *crp)
539 {
540 	struct thread_stack *ts = thread->ts;
541 	int err = 0;
542 
543 	if (ts) {
544 		if (!ts->crp) {
545 			/* Supersede thread_stack__event() */
546 			thread_stack__free(thread);
547 			thread->ts = thread_stack__new(thread, crp);
548 			if (!thread->ts)
549 				return -ENOMEM;
550 			ts = thread->ts;
551 			ts->comm = comm;
552 		}
553 	} else {
554 		thread->ts = thread_stack__new(thread, crp);
555 		if (!thread->ts)
556 			return -ENOMEM;
557 		ts = thread->ts;
558 		ts->comm = comm;
559 	}
560 
561 	/* Flush stack on exec */
562 	if (ts->comm != comm && thread->pid_ == thread->tid) {
563 		err = __thread_stack__flush(thread, ts);
564 		if (err)
565 			return err;
566 		ts->comm = comm;
567 	}
568 
569 	/* If the stack is empty, put the current symbol on the stack */
570 	if (!ts->cnt) {
571 		err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
572 					   ref);
573 		if (err)
574 			return err;
575 	}
576 
577 	ts->branch_count += 1;
578 	ts->last_time = sample->time;
579 
580 	if (sample->flags & PERF_IP_FLAG_CALL) {
581 		struct call_path_root *cpr = ts->crp->cpr;
582 		struct call_path *cp;
583 		u64 ret_addr;
584 
585 		if (!sample->ip || !sample->addr)
586 			return 0;
587 
588 		ret_addr = sample->ip + sample->insn_len;
589 		if (ret_addr == sample->addr)
590 			return 0; /* Zero-length calls are excluded */
591 
592 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
593 					to_al->sym, sample->addr,
594 					ts->kernel_start);
595 		if (!cp)
596 			return -ENOMEM;
597 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
598 					    cp, false);
599 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
600 		if (!sample->ip || !sample->addr)
601 			return 0;
602 
603 		err = thread_stack__pop_cp(thread, ts, sample->addr,
604 					   sample->time, ref, from_al->sym);
605 		if (err) {
606 			if (err < 0)
607 				return err;
608 			err = thread_stack__no_call_return(thread, ts, sample,
609 							   from_al, to_al, ref);
610 		}
611 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
612 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
613 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
614 		err = thread_stack__trace_end(ts, sample, ref);
615 	}
616 
617 	return err;
618 }
619 
620 size_t thread_stack__depth(struct thread *thread)
621 {
622 	if (!thread->ts)
623 		return 0;
624 	return thread->ts->cnt;
625 }
626