1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "timerlat.h"
9 #include <unistd.h>
10
11 enum timelat_state {
12 TIMERLAT_INIT = 0,
13 TIMERLAT_WAITING_IRQ,
14 TIMERLAT_WAITING_THREAD,
15 };
16
17 /* Used to fill spaces in the output */
18 static const char *spaces = " ";
19
20 #define MAX_COMM 24
21
22 /*
23 * Per-cpu data statistics and data.
24 */
25 struct timerlat_aa_data {
26 /* Current CPU state */
27 int curr_state;
28
29 /* timerlat IRQ latency */
30 unsigned long long tlat_irq_seqnum;
31 unsigned long long tlat_irq_latency;
32 unsigned long long tlat_irq_timstamp;
33
34 /* timerlat Thread latency */
35 unsigned long long tlat_thread_seqnum;
36 unsigned long long tlat_thread_latency;
37 unsigned long long tlat_thread_timstamp;
38
39 /*
40 * Information about the thread running when the IRQ
41 * arrived.
42 *
43 * This can be blocking or interference, depending on the
44 * priority of the thread. Assuming timerlat is the highest
45 * prio, it is blocking. If timerlat has a lower prio, it is
46 * interference.
47 * note: "unsigned long long" because they are fetch using tep_get_field_val();
48 */
49 unsigned long long run_thread_pid;
50 char run_thread_comm[MAX_COMM];
51 unsigned long long thread_blocking_duration;
52 unsigned long long max_exit_idle_latency;
53
54 /* Information about the timerlat timer irq */
55 unsigned long long timer_irq_start_time;
56 unsigned long long timer_irq_start_delay;
57 unsigned long long timer_irq_duration;
58 unsigned long long timer_exit_from_idle;
59
60 /*
61 * Information about the last IRQ before the timerlat irq
62 * arrived.
63 *
64 * If now - timestamp is <= latency, it might have influenced
65 * in the timerlat irq latency. Otherwise, ignore it.
66 */
67 unsigned long long prev_irq_duration;
68 unsigned long long prev_irq_timstamp;
69
70 /*
71 * Interference sum.
72 */
73 unsigned long long thread_nmi_sum;
74 unsigned long long thread_irq_sum;
75 unsigned long long thread_softirq_sum;
76 unsigned long long thread_thread_sum;
77
78 /*
79 * Interference task information.
80 */
81 struct trace_seq *prev_irqs_seq;
82 struct trace_seq *nmi_seq;
83 struct trace_seq *irqs_seq;
84 struct trace_seq *softirqs_seq;
85 struct trace_seq *threads_seq;
86 struct trace_seq *stack_seq;
87
88 /*
89 * Current thread.
90 */
91 char current_comm[MAX_COMM];
92 unsigned long long current_pid;
93
94 /*
95 * Is the system running a kworker?
96 */
97 unsigned long long kworker;
98 unsigned long long kworker_func;
99 };
100
101 /*
102 * The analysis context and system wide view
103 */
104 struct timerlat_aa_context {
105 int nr_cpus;
106 int dump_tasks;
107
108 /* per CPU data */
109 struct timerlat_aa_data *taa_data;
110
111 /*
112 * required to translate function names and register
113 * events.
114 */
115 struct osnoise_tool *tool;
116 };
117
118 /*
119 * The data is stored as a local variable, but accessed via a helper function.
120 *
121 * It could be stored inside the trace context. But every access would
122 * require container_of() + a series of pointers. Do we need it? Not sure.
123 *
124 * For now keep it simple. If needed, store it in the tool, add the *context
125 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
126 */
127 static struct timerlat_aa_context *__timerlat_aa_ctx;
128
timerlat_aa_get_ctx(void)129 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
130 {
131 return __timerlat_aa_ctx;
132 }
133
134 /*
135 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
136 */
137 static struct timerlat_aa_data
timerlat_aa_get_data(struct timerlat_aa_context * taa_ctx,int cpu)138 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
139 {
140 return &taa_ctx->taa_data[cpu];
141 }
142
143 /*
144 * timerlat_aa_irq_latency - Handles timerlat IRQ event
145 */
timerlat_aa_irq_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)146 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
147 struct trace_seq *s, struct tep_record *record,
148 struct tep_event *event)
149 {
150 /*
151 * For interference, we start now looking for things that can delay
152 * the thread.
153 */
154 taa_data->curr_state = TIMERLAT_WAITING_THREAD;
155 taa_data->tlat_irq_timstamp = record->ts;
156
157 /*
158 * Zero values.
159 */
160 taa_data->thread_nmi_sum = 0;
161 taa_data->thread_irq_sum = 0;
162 taa_data->thread_softirq_sum = 0;
163 taa_data->thread_thread_sum = 0;
164 taa_data->thread_blocking_duration = 0;
165 taa_data->timer_irq_start_time = 0;
166 taa_data->timer_irq_duration = 0;
167 taa_data->timer_exit_from_idle = 0;
168
169 /*
170 * Zero interference tasks.
171 */
172 trace_seq_reset(taa_data->nmi_seq);
173 trace_seq_reset(taa_data->irqs_seq);
174 trace_seq_reset(taa_data->softirqs_seq);
175 trace_seq_reset(taa_data->threads_seq);
176
177 /* IRQ latency values */
178 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
179 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
180
181 /* The thread that can cause blocking */
182 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
183
184 /*
185 * Get exit from idle case.
186 *
187 * If it is not idle thread:
188 */
189 if (taa_data->run_thread_pid)
190 return 0;
191
192 /*
193 * if the latency is shorter than the known exit from idle:
194 */
195 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
196 return 0;
197
198 /*
199 * To be safe, ignore the cases in which an IRQ/NMI could have
200 * interfered with the timerlat IRQ.
201 */
202 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
203 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
204 return 0;
205
206 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
207
208 return 0;
209 }
210
211 /*
212 * timerlat_aa_thread_latency - Handles timerlat thread event
213 */
timerlat_aa_thread_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)214 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
215 struct trace_seq *s, struct tep_record *record,
216 struct tep_event *event)
217 {
218 /*
219 * For interference, we start now looking for things that can delay
220 * the IRQ of the next cycle.
221 */
222 taa_data->curr_state = TIMERLAT_WAITING_IRQ;
223 taa_data->tlat_thread_timstamp = record->ts;
224
225 /* Thread latency values */
226 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
227 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
228
229 return 0;
230 }
231
232 /*
233 * timerlat_aa_handler - Handle timerlat events
234 *
235 * This function is called to handle timerlat events recording statistics.
236 *
237 * Returns 0 on success, -1 otherwise.
238 */
timerlat_aa_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)239 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
240 struct tep_event *event, void *context)
241 {
242 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
243 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
244 unsigned long long thread;
245
246 if (!taa_data)
247 return -1;
248
249 tep_get_field_val(s, event, "context", record, &thread, 1);
250 if (!thread)
251 return timerlat_aa_irq_latency(taa_data, s, record, event);
252 else
253 return timerlat_aa_thread_latency(taa_data, s, record, event);
254 }
255
256 /*
257 * timerlat_aa_nmi_handler - Handles NMI noise
258 *
259 * It is used to collect information about interferences from NMI. It is
260 * hooked to the osnoise:nmi_noise event.
261 */
timerlat_aa_nmi_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)262 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
263 struct tep_event *event, void *context)
264 {
265 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
266 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
267 unsigned long long duration;
268 unsigned long long start;
269
270 tep_get_field_val(s, event, "duration", record, &duration, 1);
271 tep_get_field_val(s, event, "start", record, &start, 1);
272
273 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
274 taa_data->prev_irq_duration = duration;
275 taa_data->prev_irq_timstamp = start;
276
277 trace_seq_reset(taa_data->prev_irqs_seq);
278 trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n",
279 "nmi",
280 24, spaces,
281 ns_to_usf(duration));
282 return 0;
283 }
284
285 taa_data->thread_nmi_sum += duration;
286 trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n",
287 "nmi",
288 24, spaces, ns_to_usf(duration));
289
290 return 0;
291 }
292
293 /*
294 * timerlat_aa_irq_handler - Handles IRQ noise
295 *
296 * It is used to collect information about interferences from IRQ. It is
297 * hooked to the osnoise:irq_noise event.
298 *
299 * It is a little bit more complex than the other because it measures:
300 * - The IRQs that can delay the timer IRQ before it happened.
301 * - The Timerlat IRQ handler
302 * - The IRQs that happened between the timerlat IRQ and the timerlat thread
303 * (IRQ interference).
304 */
timerlat_aa_irq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)305 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
306 struct tep_event *event, void *context)
307 {
308 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
309 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
310 unsigned long long expected_start;
311 unsigned long long duration;
312 unsigned long long vector;
313 unsigned long long start;
314 char *desc;
315 int val;
316
317 tep_get_field_val(s, event, "duration", record, &duration, 1);
318 tep_get_field_val(s, event, "start", record, &start, 1);
319 tep_get_field_val(s, event, "vector", record, &vector, 1);
320 desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
321
322 /*
323 * Before the timerlat IRQ.
324 */
325 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
326 taa_data->prev_irq_duration = duration;
327 taa_data->prev_irq_timstamp = start;
328
329 trace_seq_reset(taa_data->prev_irqs_seq);
330 trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
331 desc, vector,
332 15, spaces,
333 ns_to_usf(duration));
334 return 0;
335 }
336
337 /*
338 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
339 * the timerlat irq handler.
340 */
341 if (!taa_data->timer_irq_start_time) {
342 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
343
344 taa_data->timer_irq_start_time = start;
345 taa_data->timer_irq_duration = duration;
346
347 /*
348 * We are dealing with two different clock sources: the
349 * external clock source that timerlat uses as a reference
350 * and the clock used by the tracer. There are also two
351 * moments: the time reading the clock and the timer in
352 * which the event is placed in the buffer (the trace
353 * event timestamp). If the processor is slow or there
354 * is some hardware noise, the difference between the
355 * timestamp and the external clock read can be longer
356 * than the IRQ handler delay, resulting in a negative
357 * time. If so, set IRQ start delay as 0. In the end,
358 * it is less relevant than the noise.
359 */
360 if (expected_start < taa_data->timer_irq_start_time)
361 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
362 else
363 taa_data->timer_irq_start_delay = 0;
364
365 /*
366 * not exit from idle.
367 */
368 if (taa_data->run_thread_pid)
369 return 0;
370
371 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
372 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
373
374 return 0;
375 }
376
377 /*
378 * IRQ interference.
379 */
380 taa_data->thread_irq_sum += duration;
381 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
382 desc, vector,
383 24, spaces,
384 ns_to_usf(duration));
385
386 return 0;
387 }
388
389 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
390 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
391
392
393 /*
394 * timerlat_aa_softirq_handler - Handles Softirq noise
395 *
396 * It is used to collect information about interferences from Softirq. It is
397 * hooked to the osnoise:softirq_noise event.
398 *
399 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
400 */
timerlat_aa_softirq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)401 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
402 struct tep_event *event, void *context)
403 {
404 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
405 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
406 unsigned long long duration;
407 unsigned long long vector;
408 unsigned long long start;
409
410 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
411 return 0;
412
413 tep_get_field_val(s, event, "duration", record, &duration, 1);
414 tep_get_field_val(s, event, "start", record, &start, 1);
415 tep_get_field_val(s, event, "vector", record, &vector, 1);
416
417 taa_data->thread_softirq_sum += duration;
418
419 trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
420 softirq_name[vector], vector,
421 24, spaces,
422 ns_to_usf(duration));
423 return 0;
424 }
425
426 /*
427 * timerlat_aa_softirq_handler - Handles thread noise
428 *
429 * It is used to collect information about interferences from threads. It is
430 * hooked to the osnoise:thread_noise event.
431 *
432 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
433 */
timerlat_aa_thread_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)434 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
435 struct tep_event *event, void *context)
436 {
437 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
438 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
439 unsigned long long duration;
440 unsigned long long start;
441 unsigned long long pid;
442 const char *comm;
443 int val;
444
445 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
446 return 0;
447
448 tep_get_field_val(s, event, "duration", record, &duration, 1);
449 tep_get_field_val(s, event, "start", record, &start, 1);
450
451 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
452 comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
453
454 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
455 taa_data->thread_blocking_duration = duration;
456
457 if (comm)
458 strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
459 else
460 sprintf(taa_data->run_thread_comm, "<...>");
461
462 } else {
463 taa_data->thread_thread_sum += duration;
464
465 trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n",
466 comm, pid,
467 15, spaces,
468 ns_to_usf(duration));
469 }
470
471 return 0;
472 }
473
474 /*
475 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
476 *
477 * Saves and parse the stack trace generated by the timerlat IRQ.
478 */
timerlat_aa_stack_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)479 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
480 struct tep_event *event, void *context)
481 {
482 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
483 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
484 unsigned long *caller;
485 const char *function;
486 int val, i;
487
488 trace_seq_reset(taa_data->stack_seq);
489
490 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n");
491 caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
492 if (caller) {
493 for (i = 0; ; i++) {
494 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
495 if (!function)
496 break;
497 trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n",
498 14, spaces, function);
499 }
500 }
501 return 0;
502 }
503
504 /*
505 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
506 *
507 * Handles the sched:sched_switch event to trace the current thread running on the
508 * CPU. It is used to display the threads running on the other CPUs when the trace
509 * stops.
510 */
timerlat_aa_sched_switch_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)511 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
512 struct tep_event *event, void *context)
513 {
514 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
515 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
516 const char *comm;
517 int val;
518
519 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
520 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
521
522 strncpy(taa_data->current_comm, comm, MAX_COMM);
523
524 /*
525 * If this was a kworker, clean the last kworkers that ran.
526 */
527 taa_data->kworker = 0;
528 taa_data->kworker_func = 0;
529
530 return 0;
531 }
532
533 /*
534 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
535 *
536 * Handles workqueue:workqueue_execute_start event, keeping track of
537 * the job that a kworker could be doing in the CPU.
538 *
539 * We already catch problems of hardware related latencies caused by work queues
540 * running driver code that causes hardware stall. For example, with DRM drivers.
541 */
timerlat_aa_kworker_start_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)542 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
543 struct tep_event *event, void *context)
544 {
545 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
546 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
547
548 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
549 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
550 return 0;
551 }
552
553 /*
554 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
555 *
556 * This is the core of the analysis.
557 */
timerlat_thread_analysis(struct timerlat_aa_data * taa_data,int cpu,int irq_thresh,int thread_thresh)558 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
559 int irq_thresh, int thread_thresh)
560 {
561 long long exp_irq_ts;
562 int total;
563 int irq;
564
565 /*
566 * IRQ latency or Thread latency?
567 */
568 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
569 irq = 1;
570 total = taa_data->tlat_irq_latency;
571 } else {
572 irq = 0;
573 total = taa_data->tlat_thread_latency;
574 }
575
576 /*
577 * Expected IRQ arrival time using the trace clock as the base.
578 *
579 * TODO: Add a list of previous IRQ, and then run the list backwards.
580 */
581 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
582 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
583 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
584 printf(" Previous IRQ interference: %.*s up to %9.2f us\n",
585 16, spaces,
586 ns_to_usf(taa_data->prev_irq_duration));
587 }
588
589 /*
590 * The delay that the IRQ suffered before starting.
591 */
592 printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces,
593 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
594 ns_to_usf(taa_data->timer_irq_start_delay),
595 ns_to_per(total, taa_data->timer_irq_start_delay));
596
597 /*
598 * Timerlat IRQ.
599 */
600 printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces,
601 ns_to_usf(taa_data->tlat_irq_latency));
602
603 if (irq) {
604 /*
605 * If the trace stopped due to IRQ, the other events will not happen
606 * because... the trace stopped :-).
607 *
608 * That is all folks, the stack trace was printed before the stop,
609 * so it will be displayed, it is the key.
610 */
611 printf(" Blocking thread:\n");
612 printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm,
613 taa_data->run_thread_pid);
614 } else {
615 /*
616 * The duration of the IRQ handler that handled the timerlat IRQ.
617 */
618 printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n",
619 30, spaces,
620 ns_to_usf(taa_data->timer_irq_duration),
621 ns_to_per(total, taa_data->timer_irq_duration));
622
623 /*
624 * The amount of time that the current thread postponed the scheduler.
625 *
626 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
627 * is no need to compute values here.
628 */
629 printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces,
630 ns_to_usf(taa_data->thread_blocking_duration),
631 ns_to_per(total, taa_data->thread_blocking_duration));
632
633 printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces,
634 taa_data->run_thread_comm, taa_data->run_thread_pid,
635 12, spaces, ns_to_usf(taa_data->thread_blocking_duration));
636 }
637
638 /*
639 * Print the stack trace!
640 */
641 trace_seq_do_printf(taa_data->stack_seq);
642
643 /*
644 * NMIs can happen during the IRQ, so they are always possible.
645 */
646 if (taa_data->thread_nmi_sum)
647 printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
648 ns_to_usf(taa_data->thread_nmi_sum),
649 ns_to_per(total, taa_data->thread_nmi_sum));
650
651 /*
652 * If it is an IRQ latency, the other factors can be skipped.
653 */
654 if (irq)
655 goto print_total;
656
657 /*
658 * Prints the interference caused by IRQs to the thread latency.
659 */
660 if (taa_data->thread_irq_sum) {
661 printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
662 ns_to_usf(taa_data->thread_irq_sum),
663 ns_to_per(total, taa_data->thread_irq_sum));
664
665 trace_seq_do_printf(taa_data->irqs_seq);
666 }
667
668 /*
669 * Prints the interference caused by Softirqs to the thread latency.
670 */
671 if (taa_data->thread_softirq_sum) {
672 printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces,
673 ns_to_usf(taa_data->thread_softirq_sum),
674 ns_to_per(total, taa_data->thread_softirq_sum));
675
676 trace_seq_do_printf(taa_data->softirqs_seq);
677 }
678
679 /*
680 * Prints the interference caused by other threads to the thread latency.
681 *
682 * If this happens, your timerlat is not the highest prio. OK, migration
683 * thread can happen. But otherwise, you are not measuring the "scheduling
684 * latency" only, and here is the difference from scheduling latency and
685 * timer handling latency.
686 */
687 if (taa_data->thread_thread_sum) {
688 printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces,
689 ns_to_usf(taa_data->thread_thread_sum),
690 ns_to_per(total, taa_data->thread_thread_sum));
691
692 trace_seq_do_printf(taa_data->threads_seq);
693 }
694
695 /*
696 * Done.
697 */
698 print_total:
699 printf("------------------------------------------------------------------------\n");
700 printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread",
701 37, spaces, ns_to_usf(total));
702 }
703
timerlat_auto_analysis_collect_trace(struct timerlat_aa_context * taa_ctx)704 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
705 {
706 struct trace_instance *trace = &taa_ctx->tool->trace;
707 int retval;
708
709 retval = tracefs_iterate_raw_events(trace->tep,
710 trace->inst,
711 NULL,
712 0,
713 collect_registered_events,
714 trace);
715 if (retval < 0) {
716 err_msg("Error iterating on events\n");
717 return 0;
718 }
719
720 return 1;
721 }
722
723 /**
724 * timerlat_auto_analysis - Analyze the collected data
725 */
timerlat_auto_analysis(int irq_thresh,int thread_thresh)726 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
727 {
728 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
729 unsigned long long max_exit_from_idle = 0;
730 struct timerlat_aa_data *taa_data;
731 int max_exit_from_idle_cpu;
732 struct tep_handle *tep;
733 int cpu;
734
735 timerlat_auto_analysis_collect_trace(taa_ctx);
736
737 /* bring stop tracing to the ns scale */
738 irq_thresh = irq_thresh * 1000;
739 thread_thresh = thread_thresh * 1000;
740
741 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
742 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
743
744 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
745 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
746 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
747 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
748 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
749 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
750 }
751
752 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
753 max_exit_from_idle = taa_data->max_exit_idle_latency;
754 max_exit_from_idle_cpu = cpu;
755 }
756
757 }
758
759 if (max_exit_from_idle) {
760 printf("\n");
761 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
762 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
763 }
764 if (!taa_ctx->dump_tasks)
765 return;
766
767 printf("\n");
768 printf("Printing CPU tasks:\n");
769 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
770 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
771 tep = taa_ctx->tool->trace.tep;
772
773 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
774
775 if (taa_data->kworker_func)
776 printf(" kworker:%s:%s",
777 tep_find_function(tep, taa_data->kworker) ? : "<...>",
778 tep_find_function(tep, taa_data->kworker_func));
779 printf("\n");
780 }
781
782 }
783
784 /*
785 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
786 */
timerlat_aa_destroy_seqs(struct timerlat_aa_context * taa_ctx)787 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
788 {
789 struct timerlat_aa_data *taa_data;
790 int i;
791
792 if (!taa_ctx->taa_data)
793 return;
794
795 for (i = 0; i < taa_ctx->nr_cpus; i++) {
796 taa_data = timerlat_aa_get_data(taa_ctx, i);
797
798 if (taa_data->prev_irqs_seq) {
799 trace_seq_destroy(taa_data->prev_irqs_seq);
800 free(taa_data->prev_irqs_seq);
801 }
802
803 if (taa_data->nmi_seq) {
804 trace_seq_destroy(taa_data->nmi_seq);
805 free(taa_data->nmi_seq);
806 }
807
808 if (taa_data->irqs_seq) {
809 trace_seq_destroy(taa_data->irqs_seq);
810 free(taa_data->irqs_seq);
811 }
812
813 if (taa_data->softirqs_seq) {
814 trace_seq_destroy(taa_data->softirqs_seq);
815 free(taa_data->softirqs_seq);
816 }
817
818 if (taa_data->threads_seq) {
819 trace_seq_destroy(taa_data->threads_seq);
820 free(taa_data->threads_seq);
821 }
822
823 if (taa_data->stack_seq) {
824 trace_seq_destroy(taa_data->stack_seq);
825 free(taa_data->stack_seq);
826 }
827 }
828 }
829
830 /*
831 * timerlat_aa_init_seqs - Init seq files used to store parsed information
832 *
833 * Instead of keeping data structures to store raw data, use seq files to
834 * store parsed data.
835 *
836 * Allocates and initialize seq files.
837 *
838 * Returns 0 on success, -1 otherwise.
839 */
timerlat_aa_init_seqs(struct timerlat_aa_context * taa_ctx)840 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
841 {
842 struct timerlat_aa_data *taa_data;
843 int i;
844
845 for (i = 0; i < taa_ctx->nr_cpus; i++) {
846
847 taa_data = timerlat_aa_get_data(taa_ctx, i);
848
849 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
850 if (!taa_data->prev_irqs_seq)
851 goto out_err;
852
853 trace_seq_init(taa_data->prev_irqs_seq);
854
855 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
856 if (!taa_data->nmi_seq)
857 goto out_err;
858
859 trace_seq_init(taa_data->nmi_seq);
860
861 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
862 if (!taa_data->irqs_seq)
863 goto out_err;
864
865 trace_seq_init(taa_data->irqs_seq);
866
867 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
868 if (!taa_data->softirqs_seq)
869 goto out_err;
870
871 trace_seq_init(taa_data->softirqs_seq);
872
873 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
874 if (!taa_data->threads_seq)
875 goto out_err;
876
877 trace_seq_init(taa_data->threads_seq);
878
879 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
880 if (!taa_data->stack_seq)
881 goto out_err;
882
883 trace_seq_init(taa_data->stack_seq);
884 }
885
886 return 0;
887
888 out_err:
889 timerlat_aa_destroy_seqs(taa_ctx);
890 return -1;
891 }
892
893 /*
894 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
895 */
timerlat_aa_unregister_events(struct osnoise_tool * tool,int dump_tasks)896 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
897 {
898
899 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
900 timerlat_aa_handler, tool);
901
902 tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
903
904 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
905 timerlat_aa_nmi_handler, tool);
906
907 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
908 timerlat_aa_irq_handler, tool);
909
910 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
911 timerlat_aa_softirq_handler, tool);
912
913 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
914 timerlat_aa_thread_handler, tool);
915
916 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
917 timerlat_aa_stack_handler, tool);
918 if (!dump_tasks)
919 return;
920
921 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
922 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
923 timerlat_aa_sched_switch_handler, tool);
924
925 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
926 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
927 timerlat_aa_kworker_start_handler, tool);
928 }
929
930 /*
931 * timerlat_aa_register_events - Register events used in the auto-analysis
932 *
933 * Returns 0 on success, -1 otherwise.
934 */
timerlat_aa_register_events(struct osnoise_tool * tool,int dump_tasks)935 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
936 {
937 int retval;
938
939 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
940 timerlat_aa_handler, tool);
941
942
943 /*
944 * register auto-analysis handlers.
945 */
946 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
947 if (retval < 0 && !errno) {
948 err_msg("Could not find osnoise events\n");
949 goto out_err;
950 }
951
952 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
953 timerlat_aa_nmi_handler, tool);
954
955 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
956 timerlat_aa_irq_handler, tool);
957
958 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
959 timerlat_aa_softirq_handler, tool);
960
961 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
962 timerlat_aa_thread_handler, tool);
963
964 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
965 timerlat_aa_stack_handler, tool);
966
967 if (!dump_tasks)
968 return 0;
969
970 /*
971 * Dump task events.
972 */
973 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
974 if (retval < 0 && !errno) {
975 err_msg("Could not find sched_switch\n");
976 goto out_err;
977 }
978
979 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
980 timerlat_aa_sched_switch_handler, tool);
981
982 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
983 if (retval < 0 && !errno) {
984 err_msg("Could not find workqueue_execute_start\n");
985 goto out_err;
986 }
987
988 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
989 timerlat_aa_kworker_start_handler, tool);
990
991 return 0;
992
993 out_err:
994 timerlat_aa_unregister_events(tool, dump_tasks);
995 return -1;
996 }
997
998 /**
999 * timerlat_aa_destroy - Destroy timerlat auto-analysis
1000 */
timerlat_aa_destroy(void)1001 void timerlat_aa_destroy(void)
1002 {
1003 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
1004
1005 if (!taa_ctx)
1006 return;
1007
1008 if (!taa_ctx->taa_data)
1009 goto out_ctx;
1010
1011 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
1012 timerlat_aa_destroy_seqs(taa_ctx);
1013 free(taa_ctx->taa_data);
1014 out_ctx:
1015 free(taa_ctx);
1016 }
1017
1018 /**
1019 * timerlat_aa_init - Initialize timerlat auto-analysis
1020 *
1021 * Returns 0 on success, -1 otherwise.
1022 */
timerlat_aa_init(struct osnoise_tool * tool,int dump_tasks)1023 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1024 {
1025 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1026 struct timerlat_aa_context *taa_ctx;
1027 int retval;
1028
1029 taa_ctx = calloc(1, sizeof(*taa_ctx));
1030 if (!taa_ctx)
1031 return -1;
1032
1033 __timerlat_aa_ctx = taa_ctx;
1034
1035 taa_ctx->nr_cpus = nr_cpus;
1036 taa_ctx->tool = tool;
1037 taa_ctx->dump_tasks = dump_tasks;
1038
1039 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1040 if (!taa_ctx->taa_data)
1041 goto out_err;
1042
1043 retval = timerlat_aa_init_seqs(taa_ctx);
1044 if (retval)
1045 goto out_err;
1046
1047 retval = timerlat_aa_register_events(tool, dump_tasks);
1048 if (retval)
1049 goto out_err;
1050
1051 return 0;
1052
1053 out_err:
1054 timerlat_aa_destroy();
1055 return -1;
1056 }
1057