xref: /linux/tools/tracing/rtla/src/timerlat_aa.c (revision 4fa118e5b79fcc537dcb1a860ed319a6106935eb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "timerlat.h"
9 #include <unistd.h>
10 
11 enum timelat_state {
12 	TIMERLAT_INIT = 0,
13 	TIMERLAT_WAITING_IRQ,
14 	TIMERLAT_WAITING_THREAD,
15 };
16 
17 /* Used to fill spaces in the output */
18 static const char *spaces  = "                                                         ";
19 
20 #define MAX_COMM		24
21 
22 /*
23  * Per-cpu data statistics and data.
24  */
25 struct timerlat_aa_data {
26 	/* Current CPU state */
27 	int			curr_state;
28 
29 	/* timerlat IRQ latency */
30 	unsigned long long	tlat_irq_seqnum;
31 	unsigned long long	tlat_irq_latency;
32 	unsigned long long	tlat_irq_timstamp;
33 
34 	/* timerlat Thread latency */
35 	unsigned long long	tlat_thread_seqnum;
36 	unsigned long long	tlat_thread_latency;
37 	unsigned long long	tlat_thread_timstamp;
38 
39 	/*
40 	 * Information about the thread running when the IRQ
41 	 * arrived.
42 	 *
43 	 * This can be blocking or interference, depending on the
44 	 * priority of the thread. Assuming timerlat is the highest
45 	 * prio, it is blocking. If timerlat has a lower prio, it is
46 	 * interference.
47 	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
48 	 */
49 	unsigned long long	run_thread_pid;
50 	char			run_thread_comm[MAX_COMM];
51 	unsigned long long	thread_blocking_duration;
52 	unsigned long long	max_exit_idle_latency;
53 
54 	/* Information about the timerlat timer irq */
55 	unsigned long long	timer_irq_start_time;
56 	unsigned long long	timer_irq_start_delay;
57 	unsigned long long	timer_irq_duration;
58 	unsigned long long	timer_exit_from_idle;
59 
60 	/*
61 	 * Information about the last IRQ before the timerlat irq
62 	 * arrived.
63 	 *
64 	 * If now - timestamp is <= latency, it might have influenced
65 	 * in the timerlat irq latency. Otherwise, ignore it.
66 	 */
67 	unsigned long long	prev_irq_duration;
68 	unsigned long long	prev_irq_timstamp;
69 
70 	/*
71 	 * Interference sum.
72 	 */
73 	unsigned long long	thread_nmi_sum;
74 	unsigned long long	thread_irq_sum;
75 	unsigned long long	thread_softirq_sum;
76 	unsigned long long	thread_thread_sum;
77 
78 	/*
79 	 * Interference task information.
80 	 */
81 	struct trace_seq	*prev_irqs_seq;
82 	struct trace_seq	*nmi_seq;
83 	struct trace_seq	*irqs_seq;
84 	struct trace_seq	*softirqs_seq;
85 	struct trace_seq	*threads_seq;
86 	struct trace_seq	*stack_seq;
87 
88 	/*
89 	 * Current thread.
90 	 */
91 	char			current_comm[MAX_COMM];
92 	unsigned long long	current_pid;
93 
94 	/*
95 	 * Is the system running a kworker?
96 	 */
97 	unsigned long long	kworker;
98 	unsigned long long	kworker_func;
99 };
100 
101 /*
102  * The analysis context and system wide view
103  */
104 struct timerlat_aa_context {
105 	int nr_cpus;
106 	int dump_tasks;
107 
108 	/* per CPU data */
109 	struct timerlat_aa_data *taa_data;
110 
111 	/*
112 	 * required to translate function names and register
113 	 * events.
114 	 */
115 	struct osnoise_tool *tool;
116 };
117 
118 /*
119  * The data is stored as a local variable, but accessed via a helper function.
120  *
121  * It could be stored inside the trace context. But every access would
122  * require container_of() + a series of pointers. Do we need it? Not sure.
123  *
124  * For now keep it simple. If needed, store it in the tool, add the *context
125  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
126  */
127 static struct timerlat_aa_context *__timerlat_aa_ctx;
128 
timerlat_aa_get_ctx(void)129 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
130 {
131 	return __timerlat_aa_ctx;
132 }
133 
134 /*
135  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
136  */
137 static struct timerlat_aa_data
timerlat_aa_get_data(struct timerlat_aa_context * taa_ctx,int cpu)138 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
139 {
140 	return &taa_ctx->taa_data[cpu];
141 }
142 
143 /*
144  * timerlat_aa_irq_latency - Handles timerlat IRQ event
145  */
timerlat_aa_irq_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)146 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
147 				   struct trace_seq *s, struct tep_record *record,
148 				   struct tep_event *event)
149 {
150 	/*
151 	 * For interference, we start now looking for things that can delay
152 	 * the thread.
153 	 */
154 	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
155 	taa_data->tlat_irq_timstamp = record->ts;
156 
157 	/*
158 	 * Zero values.
159 	 */
160 	taa_data->thread_nmi_sum = 0;
161 	taa_data->thread_irq_sum = 0;
162 	taa_data->thread_softirq_sum = 0;
163 	taa_data->thread_thread_sum = 0;
164 	taa_data->thread_blocking_duration = 0;
165 	taa_data->timer_irq_start_time = 0;
166 	taa_data->timer_irq_duration = 0;
167 	taa_data->timer_exit_from_idle = 0;
168 
169 	/*
170 	 * Zero interference tasks.
171 	 */
172 	trace_seq_reset(taa_data->nmi_seq);
173 	trace_seq_reset(taa_data->irqs_seq);
174 	trace_seq_reset(taa_data->softirqs_seq);
175 	trace_seq_reset(taa_data->threads_seq);
176 
177 	/* IRQ latency values */
178 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
179 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
180 
181 	/* The thread that can cause blocking */
182 	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
183 
184 	/*
185 	 * Get exit from idle case.
186 	 *
187 	 * If it is not idle thread:
188 	 */
189 	if (taa_data->run_thread_pid)
190 		return 0;
191 
192 	/*
193 	 * if the latency is shorter than the known exit from idle:
194 	 */
195 	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
196 		return 0;
197 
198 	/*
199 	 * To be safe, ignore the cases in which an IRQ/NMI could have
200 	 * interfered with the timerlat IRQ.
201 	 */
202 	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
203 	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
204 		return 0;
205 
206 	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
207 
208 	return 0;
209 }
210 
211 /*
212  * timerlat_aa_thread_latency - Handles timerlat thread event
213  */
timerlat_aa_thread_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)214 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
215 				      struct trace_seq *s, struct tep_record *record,
216 				      struct tep_event *event)
217 {
218 	/*
219 	 * For interference, we start now looking for things that can delay
220 	 * the IRQ of the next cycle.
221 	 */
222 	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
223 	taa_data->tlat_thread_timstamp = record->ts;
224 
225 	/* Thread latency values */
226 	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
227 	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
228 
229 	return 0;
230 }
231 
232 /*
233  * timerlat_aa_handler - Handle timerlat events
234  *
235  * This function is called to handle timerlat events recording statistics.
236  *
237  * Returns 0 on success, -1 otherwise.
238  */
timerlat_aa_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)239 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
240 			struct tep_event *event, void *context)
241 {
242 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
243 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
244 	unsigned long long thread;
245 
246 	if (!taa_data)
247 		return -1;
248 
249 	tep_get_field_val(s, event, "context", record, &thread, 1);
250 	if (!thread)
251 		return timerlat_aa_irq_latency(taa_data, s, record, event);
252 	else
253 		return timerlat_aa_thread_latency(taa_data, s, record, event);
254 }
255 
256 /*
257  * timerlat_aa_nmi_handler - Handles NMI noise
258  *
259  * It is used to collect information about interferences from NMI. It is
260  * hooked to the osnoise:nmi_noise event.
261  */
timerlat_aa_nmi_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)262 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
263 				   struct tep_event *event, void *context)
264 {
265 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
266 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
267 	unsigned long long duration;
268 	unsigned long long start;
269 
270 	tep_get_field_val(s, event, "duration", record, &duration, 1);
271 	tep_get_field_val(s, event, "start", record, &start, 1);
272 
273 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
274 		taa_data->prev_irq_duration = duration;
275 		taa_data->prev_irq_timstamp = start;
276 
277 		trace_seq_reset(taa_data->prev_irqs_seq);
278 		trace_seq_printf(taa_data->prev_irqs_seq, "  %24s %.*s %9.2f us\n",
279 				 "nmi",
280 				 24, spaces,
281 				 ns_to_usf(duration));
282 		return 0;
283 	}
284 
285 	taa_data->thread_nmi_sum += duration;
286 	trace_seq_printf(taa_data->nmi_seq, "  %24s %.*s %9.2f us\n",
287 			 "nmi",
288 			 24, spaces, ns_to_usf(duration));
289 
290 	return 0;
291 }
292 
293 /*
294  * timerlat_aa_irq_handler - Handles IRQ noise
295  *
296  * It is used to collect information about interferences from IRQ. It is
297  * hooked to the osnoise:irq_noise event.
298  *
299  * It is a little bit more complex than the other because it measures:
300  *	- The IRQs that can delay the timer IRQ before it happened.
301  *	- The Timerlat IRQ handler
302  *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
303  *	  (IRQ interference).
304  */
timerlat_aa_irq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)305 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
306 				   struct tep_event *event, void *context)
307 {
308 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
309 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
310 	unsigned long long expected_start;
311 	unsigned long long duration;
312 	unsigned long long vector;
313 	unsigned long long start;
314 	char *desc;
315 	int val;
316 
317 	tep_get_field_val(s, event, "duration", record, &duration, 1);
318 	tep_get_field_val(s, event, "start", record, &start, 1);
319 	tep_get_field_val(s, event, "vector", record, &vector, 1);
320 	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
321 
322 	/*
323 	 * Before the timerlat IRQ.
324 	 */
325 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
326 		taa_data->prev_irq_duration = duration;
327 		taa_data->prev_irq_timstamp = start;
328 
329 		trace_seq_reset(taa_data->prev_irqs_seq);
330 		trace_seq_printf(taa_data->prev_irqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
331 				 desc, vector,
332 				 15, spaces,
333 				 ns_to_usf(duration));
334 		return 0;
335 	}
336 
337 	/*
338 	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
339 	 * the timerlat irq handler.
340 	 */
341 	if (!taa_data->timer_irq_start_time) {
342 		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
343 
344 		taa_data->timer_irq_start_time = start;
345 		taa_data->timer_irq_duration = duration;
346 
347 		/*
348 		 * We are dealing with two different clock sources: the
349 		 * external clock source that timerlat uses as a reference
350 		 * and the clock used by the tracer. There are also two
351 		 * moments: the time reading the clock and the timer in
352 		 * which the event is placed in the buffer (the trace
353 		 * event timestamp). If the processor is slow or there
354 		 * is some hardware noise, the difference between the
355 		 * timestamp and the external clock read can be longer
356 		 * than the IRQ handler delay, resulting in a negative
357 		 * time. If so, set IRQ start delay as 0. In the end,
358 		 * it is less relevant than the noise.
359 		 */
360 		if (expected_start < taa_data->timer_irq_start_time)
361 			taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
362 		else
363 			taa_data->timer_irq_start_delay = 0;
364 
365 		/*
366 		 * not exit from idle.
367 		 */
368 		if (taa_data->run_thread_pid)
369 			return 0;
370 
371 		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
372 			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
373 
374 		return 0;
375 	}
376 
377 	/*
378 	 * IRQ interference.
379 	 */
380 	taa_data->thread_irq_sum += duration;
381 	trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
382 			 desc, vector,
383 			 24, spaces,
384 			 ns_to_usf(duration));
385 
386 	return 0;
387 }
388 
389 static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
390 				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
391 
392 
393 /*
394  * timerlat_aa_softirq_handler - Handles Softirq noise
395  *
396  * It is used to collect information about interferences from Softirq. It is
397  * hooked to the osnoise:softirq_noise event.
398  *
399  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
400  */
timerlat_aa_softirq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)401 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
402 				       struct tep_event *event, void *context)
403 {
404 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
405 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
406 	unsigned long long duration;
407 	unsigned long long vector;
408 	unsigned long long start;
409 
410 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
411 		return 0;
412 
413 	tep_get_field_val(s, event, "duration", record, &duration, 1);
414 	tep_get_field_val(s, event, "start", record, &start, 1);
415 	tep_get_field_val(s, event, "vector", record, &vector, 1);
416 
417 	taa_data->thread_softirq_sum += duration;
418 
419 	trace_seq_printf(taa_data->softirqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
420 			 softirq_name[vector], vector,
421 			 24, spaces,
422 			 ns_to_usf(duration));
423 	return 0;
424 }
425 
426 /*
427  * timerlat_aa_softirq_handler - Handles thread noise
428  *
429  * It is used to collect information about interferences from threads. It is
430  * hooked to the osnoise:thread_noise event.
431  *
432  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
433  */
timerlat_aa_thread_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)434 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
435 				      struct tep_event *event, void *context)
436 {
437 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
438 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
439 	unsigned long long duration;
440 	unsigned long long start;
441 	unsigned long long pid;
442 	const char *comm;
443 	int val;
444 
445 	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
446 		return 0;
447 
448 	tep_get_field_val(s, event, "duration", record, &duration, 1);
449 	tep_get_field_val(s, event, "start", record, &start, 1);
450 
451 	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
452 	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
453 
454 	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
455 		taa_data->thread_blocking_duration = duration;
456 
457 		if (comm)
458 			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
459 		else
460 			sprintf(taa_data->run_thread_comm, "<...>");
461 
462 	} else {
463 		taa_data->thread_thread_sum += duration;
464 
465 		trace_seq_printf(taa_data->threads_seq, "  %24s:%-12llu %.*s %9.2f us\n",
466 				 comm, pid,
467 				 15, spaces,
468 				 ns_to_usf(duration));
469 	}
470 
471 	return 0;
472 }
473 
474 /*
475  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
476  *
477  * Saves and parse the stack trace generated by the timerlat IRQ.
478  */
timerlat_aa_stack_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)479 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
480 			      struct tep_event *event, void *context)
481 {
482 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
483 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
484 	unsigned long *caller;
485 	const char *function;
486 	int val, i;
487 
488 	trace_seq_reset(taa_data->stack_seq);
489 
490 	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
491 	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
492 	if (caller) {
493 		for (i = 0; ; i++) {
494 			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
495 			if (!function)
496 				break;
497 			trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n",
498 					 14, spaces, function);
499 		}
500 	}
501 	return 0;
502 }
503 
504 /*
505  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
506  *
507  * Handles the sched:sched_switch event to trace the current thread running on the
508  * CPU. It is used to display the threads running on the other CPUs when the trace
509  * stops.
510  */
timerlat_aa_sched_switch_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)511 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
512 					    struct tep_event *event, void *context)
513 {
514 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
515 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
516 	const char *comm;
517 	int val;
518 
519 	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
520 	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
521 
522 	strncpy(taa_data->current_comm, comm, MAX_COMM);
523 
524 	/*
525 	 * If this was a kworker, clean the last kworkers that ran.
526 	 */
527 	taa_data->kworker = 0;
528 	taa_data->kworker_func = 0;
529 
530 	return 0;
531 }
532 
533 /*
534  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
535  *
536  * Handles workqueue:workqueue_execute_start event, keeping track of
537  * the job that a kworker could be doing in the CPU.
538  *
539  * We already catch problems of hardware related latencies caused by work queues
540  * running driver code that causes hardware stall. For example, with DRM drivers.
541  */
timerlat_aa_kworker_start_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)542 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
543 					     struct tep_event *event, void *context)
544 {
545 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
546 	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
547 
548 	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
549 	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
550 	return 0;
551 }
552 
553 /*
554  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
555  *
556  * This is the core of the analysis.
557  */
timerlat_thread_analysis(struct timerlat_aa_data * taa_data,int cpu,int irq_thresh,int thread_thresh)558 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
559 				     int irq_thresh, int thread_thresh)
560 {
561 	long long exp_irq_ts;
562 	int total;
563 	int irq;
564 
565 	/*
566 	 * IRQ latency or Thread latency?
567 	 */
568 	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
569 		irq = 1;
570 		total = taa_data->tlat_irq_latency;
571 	} else {
572 		irq = 0;
573 		total = taa_data->tlat_thread_latency;
574 	}
575 
576 	/*
577 	 * Expected IRQ arrival time using the trace clock as the base.
578 	 *
579 	 * TODO: Add a list of previous IRQ, and then run the list backwards.
580 	 */
581 	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
582 	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
583 		if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
584 			printf("  Previous IRQ interference: %.*s up to  %9.2f us\n",
585 			       16, spaces,
586 			       ns_to_usf(taa_data->prev_irq_duration));
587 	}
588 
589 	/*
590 	 * The delay that the IRQ suffered before starting.
591 	 */
592 	printf("  IRQ handler delay: %.*s %16s  %9.2f us (%.2f %%)\n", 16, spaces,
593 	       (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
594 	       ns_to_usf(taa_data->timer_irq_start_delay),
595 	       ns_to_per(total, taa_data->timer_irq_start_delay));
596 
597 	/*
598 	 * Timerlat IRQ.
599 	 */
600 	printf("  IRQ latency: %.*s %9.2f us\n", 40, spaces,
601 	       ns_to_usf(taa_data->tlat_irq_latency));
602 
603 	if (irq) {
604 		/*
605 		 * If the trace stopped due to IRQ, the other events will not happen
606 		 * because... the trace stopped :-).
607 		 *
608 		 * That is all folks, the stack trace was printed before the stop,
609 		 * so it will be displayed, it is the key.
610 		 */
611 		printf("  Blocking thread:\n");
612 		printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm,
613 		       taa_data->run_thread_pid);
614 	} else  {
615 		/*
616 		 * The duration of the IRQ handler that handled the timerlat IRQ.
617 		 */
618 		printf("  Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n",
619 		       30, spaces,
620 		       ns_to_usf(taa_data->timer_irq_duration),
621 		       ns_to_per(total, taa_data->timer_irq_duration));
622 
623 		/*
624 		 * The amount of time that the current thread postponed the scheduler.
625 		 *
626 		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
627 		 * is no need to compute values here.
628 		 */
629 		printf("  Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces,
630 		       ns_to_usf(taa_data->thread_blocking_duration),
631 		       ns_to_per(total, taa_data->thread_blocking_duration));
632 
633 		printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces,
634 		       taa_data->run_thread_comm, taa_data->run_thread_pid,
635 		       12, spaces, ns_to_usf(taa_data->thread_blocking_duration));
636 	}
637 
638 	/*
639 	 * Print the stack trace!
640 	 */
641 	trace_seq_do_printf(taa_data->stack_seq);
642 
643 	/*
644 	 * NMIs can happen during the IRQ, so they are always possible.
645 	 */
646 	if (taa_data->thread_nmi_sum)
647 		printf("  NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
648 		       ns_to_usf(taa_data->thread_nmi_sum),
649 		       ns_to_per(total, taa_data->thread_nmi_sum));
650 
651 	/*
652 	 * If it is an IRQ latency, the other factors can be skipped.
653 	 */
654 	if (irq)
655 		goto print_total;
656 
657 	/*
658 	 * Prints the interference caused by IRQs to the thread latency.
659 	 */
660 	if (taa_data->thread_irq_sum) {
661 		printf("  IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
662 		       ns_to_usf(taa_data->thread_irq_sum),
663 		       ns_to_per(total, taa_data->thread_irq_sum));
664 
665 		trace_seq_do_printf(taa_data->irqs_seq);
666 	}
667 
668 	/*
669 	 * Prints the interference caused by Softirqs to the thread latency.
670 	 */
671 	if (taa_data->thread_softirq_sum) {
672 		printf("  Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces,
673 		       ns_to_usf(taa_data->thread_softirq_sum),
674 		       ns_to_per(total, taa_data->thread_softirq_sum));
675 
676 		trace_seq_do_printf(taa_data->softirqs_seq);
677 	}
678 
679 	/*
680 	 * Prints the interference caused by other threads to the thread latency.
681 	 *
682 	 * If this happens, your timerlat is not the highest prio. OK, migration
683 	 * thread can happen. But otherwise, you are not measuring the "scheduling
684 	 * latency" only, and here is the difference from scheduling latency and
685 	 * timer handling latency.
686 	 */
687 	if (taa_data->thread_thread_sum) {
688 		printf("  Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces,
689 		       ns_to_usf(taa_data->thread_thread_sum),
690 		       ns_to_per(total, taa_data->thread_thread_sum));
691 
692 		trace_seq_do_printf(taa_data->threads_seq);
693 	}
694 
695 	/*
696 	 * Done.
697 	 */
698 print_total:
699 	printf("------------------------------------------------------------------------\n");
700 	printf("  %s latency: %.*s %9.2f us (100%%)\n", irq ? "   IRQ" : "Thread",
701 	       37, spaces, ns_to_usf(total));
702 }
703 
timerlat_auto_analysis_collect_trace(struct timerlat_aa_context * taa_ctx)704 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
705 {
706 	struct trace_instance *trace = &taa_ctx->tool->trace;
707 	int retval;
708 
709 	retval = tracefs_iterate_raw_events(trace->tep,
710 					    trace->inst,
711 					    NULL,
712 					    0,
713 					    collect_registered_events,
714 					    trace);
715 		if (retval < 0) {
716 			err_msg("Error iterating on events\n");
717 			return 0;
718 		}
719 
720 	return 1;
721 }
722 
723 /**
724  * timerlat_auto_analysis - Analyze the collected data
725  */
timerlat_auto_analysis(int irq_thresh,int thread_thresh)726 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
727 {
728 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
729 	unsigned long long max_exit_from_idle = 0;
730 	struct timerlat_aa_data *taa_data;
731 	int max_exit_from_idle_cpu;
732 	struct tep_handle *tep;
733 	int cpu;
734 
735 	timerlat_auto_analysis_collect_trace(taa_ctx);
736 
737 	/* bring stop tracing to the ns scale */
738 	irq_thresh = irq_thresh * 1000;
739 	thread_thresh = thread_thresh * 1000;
740 
741 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
742 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
743 
744 		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
745 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
746 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
747 		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
748 			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
749 			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
750 		}
751 
752 		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
753 			max_exit_from_idle = taa_data->max_exit_idle_latency;
754 			max_exit_from_idle_cpu = cpu;
755 		}
756 
757 	}
758 
759 	if (max_exit_from_idle) {
760 		printf("\n");
761 		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
762 			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
763 	}
764 	if (!taa_ctx->dump_tasks)
765 		return;
766 
767 	printf("\n");
768 	printf("Printing CPU tasks:\n");
769 	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
770 		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
771 		tep = taa_ctx->tool->trace.tep;
772 
773 		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
774 
775 		if (taa_data->kworker_func)
776 			printf(" kworker:%s:%s",
777 				tep_find_function(tep, taa_data->kworker) ? : "<...>",
778 				tep_find_function(tep, taa_data->kworker_func));
779 		printf("\n");
780 	}
781 
782 }
783 
784 /*
785  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
786  */
timerlat_aa_destroy_seqs(struct timerlat_aa_context * taa_ctx)787 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
788 {
789 	struct timerlat_aa_data *taa_data;
790 	int i;
791 
792 	if (!taa_ctx->taa_data)
793 		return;
794 
795 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
796 		taa_data = timerlat_aa_get_data(taa_ctx, i);
797 
798 		if (taa_data->prev_irqs_seq) {
799 			trace_seq_destroy(taa_data->prev_irqs_seq);
800 			free(taa_data->prev_irqs_seq);
801 		}
802 
803 		if (taa_data->nmi_seq) {
804 			trace_seq_destroy(taa_data->nmi_seq);
805 			free(taa_data->nmi_seq);
806 		}
807 
808 		if (taa_data->irqs_seq) {
809 			trace_seq_destroy(taa_data->irqs_seq);
810 			free(taa_data->irqs_seq);
811 		}
812 
813 		if (taa_data->softirqs_seq) {
814 			trace_seq_destroy(taa_data->softirqs_seq);
815 			free(taa_data->softirqs_seq);
816 		}
817 
818 		if (taa_data->threads_seq) {
819 			trace_seq_destroy(taa_data->threads_seq);
820 			free(taa_data->threads_seq);
821 		}
822 
823 		if (taa_data->stack_seq) {
824 			trace_seq_destroy(taa_data->stack_seq);
825 			free(taa_data->stack_seq);
826 		}
827 	}
828 }
829 
830 /*
831  * timerlat_aa_init_seqs - Init seq files used to store parsed information
832  *
833  * Instead of keeping data structures to store raw data, use seq files to
834  * store parsed data.
835  *
836  * Allocates and initialize seq files.
837  *
838  * Returns 0 on success, -1 otherwise.
839  */
timerlat_aa_init_seqs(struct timerlat_aa_context * taa_ctx)840 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
841 {
842 	struct timerlat_aa_data *taa_data;
843 	int i;
844 
845 	for (i = 0; i < taa_ctx->nr_cpus; i++) {
846 
847 		taa_data = timerlat_aa_get_data(taa_ctx, i);
848 
849 		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
850 		if (!taa_data->prev_irqs_seq)
851 			goto out_err;
852 
853 		trace_seq_init(taa_data->prev_irqs_seq);
854 
855 		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
856 		if (!taa_data->nmi_seq)
857 			goto out_err;
858 
859 		trace_seq_init(taa_data->nmi_seq);
860 
861 		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
862 		if (!taa_data->irqs_seq)
863 			goto out_err;
864 
865 		trace_seq_init(taa_data->irqs_seq);
866 
867 		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
868 		if (!taa_data->softirqs_seq)
869 			goto out_err;
870 
871 		trace_seq_init(taa_data->softirqs_seq);
872 
873 		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
874 		if (!taa_data->threads_seq)
875 			goto out_err;
876 
877 		trace_seq_init(taa_data->threads_seq);
878 
879 		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
880 		if (!taa_data->stack_seq)
881 			goto out_err;
882 
883 		trace_seq_init(taa_data->stack_seq);
884 	}
885 
886 	return 0;
887 
888 out_err:
889 	timerlat_aa_destroy_seqs(taa_ctx);
890 	return -1;
891 }
892 
893 /*
894  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
895  */
timerlat_aa_unregister_events(struct osnoise_tool * tool,int dump_tasks)896 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
897 {
898 
899 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
900 				     timerlat_aa_handler, tool);
901 
902 	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
903 
904 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
905 				     timerlat_aa_nmi_handler, tool);
906 
907 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
908 				     timerlat_aa_irq_handler, tool);
909 
910 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
911 				     timerlat_aa_softirq_handler, tool);
912 
913 	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
914 				     timerlat_aa_thread_handler, tool);
915 
916 	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
917 				     timerlat_aa_stack_handler, tool);
918 	if (!dump_tasks)
919 		return;
920 
921 	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
922 	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
923 				     timerlat_aa_sched_switch_handler, tool);
924 
925 	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
926 	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
927 				     timerlat_aa_kworker_start_handler, tool);
928 }
929 
930 /*
931  * timerlat_aa_register_events - Register events used in the auto-analysis
932  *
933  * Returns 0 on success, -1 otherwise.
934  */
timerlat_aa_register_events(struct osnoise_tool * tool,int dump_tasks)935 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
936 {
937 	int retval;
938 
939 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
940 				timerlat_aa_handler, tool);
941 
942 
943 	/*
944 	 * register auto-analysis handlers.
945 	 */
946 	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
947 	if (retval < 0 && !errno) {
948 		err_msg("Could not find osnoise events\n");
949 		goto out_err;
950 	}
951 
952 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
953 				   timerlat_aa_nmi_handler, tool);
954 
955 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
956 				   timerlat_aa_irq_handler, tool);
957 
958 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
959 				   timerlat_aa_softirq_handler, tool);
960 
961 	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
962 				   timerlat_aa_thread_handler, tool);
963 
964 	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
965 				   timerlat_aa_stack_handler, tool);
966 
967 	if (!dump_tasks)
968 		return 0;
969 
970 	/*
971 	 * Dump task events.
972 	 */
973 	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
974 	if (retval < 0 && !errno) {
975 		err_msg("Could not find sched_switch\n");
976 		goto out_err;
977 	}
978 
979 	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
980 				   timerlat_aa_sched_switch_handler, tool);
981 
982 	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
983 	if (retval < 0 && !errno) {
984 		err_msg("Could not find workqueue_execute_start\n");
985 		goto out_err;
986 	}
987 
988 	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
989 				   timerlat_aa_kworker_start_handler, tool);
990 
991 	return 0;
992 
993 out_err:
994 	timerlat_aa_unregister_events(tool, dump_tasks);
995 	return -1;
996 }
997 
998 /**
999  * timerlat_aa_destroy - Destroy timerlat auto-analysis
1000  */
timerlat_aa_destroy(void)1001 void timerlat_aa_destroy(void)
1002 {
1003 	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
1004 
1005 	if (!taa_ctx)
1006 		return;
1007 
1008 	if (!taa_ctx->taa_data)
1009 		goto out_ctx;
1010 
1011 	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
1012 	timerlat_aa_destroy_seqs(taa_ctx);
1013 	free(taa_ctx->taa_data);
1014 out_ctx:
1015 	free(taa_ctx);
1016 }
1017 
1018 /**
1019  * timerlat_aa_init - Initialize timerlat auto-analysis
1020  *
1021  * Returns 0 on success, -1 otherwise.
1022  */
timerlat_aa_init(struct osnoise_tool * tool,int dump_tasks)1023 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1024 {
1025 	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1026 	struct timerlat_aa_context *taa_ctx;
1027 	int retval;
1028 
1029 	taa_ctx = calloc(1, sizeof(*taa_ctx));
1030 	if (!taa_ctx)
1031 		return -1;
1032 
1033 	__timerlat_aa_ctx = taa_ctx;
1034 
1035 	taa_ctx->nr_cpus = nr_cpus;
1036 	taa_ctx->tool = tool;
1037 	taa_ctx->dump_tasks = dump_tasks;
1038 
1039 	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1040 	if (!taa_ctx->taa_data)
1041 		goto out_err;
1042 
1043 	retval = timerlat_aa_init_seqs(taa_ctx);
1044 	if (retval)
1045 		goto out_err;
1046 
1047 	retval = timerlat_aa_register_events(tool, dump_tasks);
1048 	if (retval)
1049 		goto out_err;
1050 
1051 	return 0;
1052 
1053 out_err:
1054 	timerlat_aa_destroy();
1055 	return -1;
1056 }
1057