1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12
13 enum timelat_state {
14 TIMERLAT_INIT = 0,
15 TIMERLAT_WAITING_IRQ,
16 TIMERLAT_WAITING_THREAD,
17 };
18
19 /* Used to fill spaces in the output */
20 static const char *spaces = " ";
21
22 #define MAX_COMM 24
23
24 /*
25 * Per-cpu data statistics and data.
26 */
27 struct timerlat_aa_data {
28 /* Current CPU state */
29 int curr_state;
30
31 /* timerlat IRQ latency */
32 unsigned long long tlat_irq_seqnum;
33 unsigned long long tlat_irq_latency;
34 unsigned long long tlat_irq_timstamp;
35
36 /* timerlat Thread latency */
37 unsigned long long tlat_thread_seqnum;
38 unsigned long long tlat_thread_latency;
39 unsigned long long tlat_thread_timstamp;
40
41 /*
42 * Information about the thread running when the IRQ
43 * arrived.
44 *
45 * This can be blocking or interference, depending on the
46 * priority of the thread. Assuming timerlat is the highest
47 * prio, it is blocking. If timerlat has a lower prio, it is
48 * interference.
49 * note: "unsigned long long" because they are fetch using tep_get_field_val();
50 */
51 unsigned long long run_thread_pid;
52 char run_thread_comm[MAX_COMM];
53 unsigned long long thread_blocking_duration;
54 unsigned long long max_exit_idle_latency;
55
56 /* Information about the timerlat timer irq */
57 unsigned long long timer_irq_start_time;
58 unsigned long long timer_irq_start_delay;
59 unsigned long long timer_irq_duration;
60 unsigned long long timer_exit_from_idle;
61
62 /*
63 * Information about the last IRQ before the timerlat irq
64 * arrived.
65 *
66 * If now - timestamp is <= latency, it might have influenced
67 * in the timerlat irq latency. Otherwise, ignore it.
68 */
69 unsigned long long prev_irq_duration;
70 unsigned long long prev_irq_timstamp;
71
72 /*
73 * Interference sum.
74 */
75 unsigned long long thread_nmi_sum;
76 unsigned long long thread_irq_sum;
77 unsigned long long thread_softirq_sum;
78 unsigned long long thread_thread_sum;
79
80 /*
81 * Interference task information.
82 */
83 struct trace_seq *prev_irqs_seq;
84 struct trace_seq *nmi_seq;
85 struct trace_seq *irqs_seq;
86 struct trace_seq *softirqs_seq;
87 struct trace_seq *threads_seq;
88 struct trace_seq *stack_seq;
89
90 /*
91 * Current thread.
92 */
93 char current_comm[MAX_COMM];
94 unsigned long long current_pid;
95
96 /*
97 * Is the system running a kworker?
98 */
99 unsigned long long kworker;
100 unsigned long long kworker_func;
101 };
102
103 /*
104 * The analysis context and system wide view
105 */
106 struct timerlat_aa_context {
107 int nr_cpus;
108 int dump_tasks;
109
110 /* per CPU data */
111 struct timerlat_aa_data *taa_data;
112
113 /*
114 * required to translate function names and register
115 * events.
116 */
117 struct osnoise_tool *tool;
118 };
119
120 /*
121 * The data is stored as a local variable, but accessed via a helper function.
122 *
123 * It could be stored inside the trace context. But every access would
124 * require container_of() + a series of pointers. Do we need it? Not sure.
125 *
126 * For now keep it simple. If needed, store it in the tool, add the *context
127 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
128 */
129 static struct timerlat_aa_context *__timerlat_aa_ctx;
130
timerlat_aa_get_ctx(void)131 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
132 {
133 return __timerlat_aa_ctx;
134 }
135
136 /*
137 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
138 */
139 static struct timerlat_aa_data
timerlat_aa_get_data(struct timerlat_aa_context * taa_ctx,int cpu)140 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
141 {
142 return &taa_ctx->taa_data[cpu];
143 }
144
145 /*
146 * timerlat_aa_irq_latency - Handles timerlat IRQ event
147 */
timerlat_aa_irq_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)148 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
149 struct trace_seq *s, struct tep_record *record,
150 struct tep_event *event)
151 {
152 /*
153 * For interference, we start now looking for things that can delay
154 * the thread.
155 */
156 taa_data->curr_state = TIMERLAT_WAITING_THREAD;
157 taa_data->tlat_irq_timstamp = record->ts;
158
159 /*
160 * Zero values.
161 */
162 taa_data->thread_nmi_sum = 0;
163 taa_data->thread_irq_sum = 0;
164 taa_data->thread_softirq_sum = 0;
165 taa_data->thread_thread_sum = 0;
166 taa_data->thread_blocking_duration = 0;
167 taa_data->timer_irq_start_time = 0;
168 taa_data->timer_irq_duration = 0;
169 taa_data->timer_exit_from_idle = 0;
170
171 /*
172 * Zero interference tasks.
173 */
174 trace_seq_reset(taa_data->nmi_seq);
175 trace_seq_reset(taa_data->irqs_seq);
176 trace_seq_reset(taa_data->softirqs_seq);
177 trace_seq_reset(taa_data->threads_seq);
178
179 /* IRQ latency values */
180 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
181 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
182
183 /* The thread that can cause blocking */
184 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
185
186 /*
187 * Get exit from idle case.
188 *
189 * If it is not idle thread:
190 */
191 if (taa_data->run_thread_pid)
192 return 0;
193
194 /*
195 * if the latency is shorter than the known exit from idle:
196 */
197 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
198 return 0;
199
200 /*
201 * To be safe, ignore the cases in which an IRQ/NMI could have
202 * interfered with the timerlat IRQ.
203 */
204 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
205 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
206 return 0;
207
208 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
209
210 return 0;
211 }
212
213 /*
214 * timerlat_aa_thread_latency - Handles timerlat thread event
215 */
timerlat_aa_thread_latency(struct timerlat_aa_data * taa_data,struct trace_seq * s,struct tep_record * record,struct tep_event * event)216 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
217 struct trace_seq *s, struct tep_record *record,
218 struct tep_event *event)
219 {
220 /*
221 * For interference, we start now looking for things that can delay
222 * the IRQ of the next cycle.
223 */
224 taa_data->curr_state = TIMERLAT_WAITING_IRQ;
225 taa_data->tlat_thread_timstamp = record->ts;
226
227 /* Thread latency values */
228 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
229 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
230
231 return 0;
232 }
233
234 /*
235 * timerlat_aa_handler - Handle timerlat events
236 *
237 * This function is called to handle timerlat events recording statistics.
238 *
239 * Returns 0 on success, -1 otherwise.
240 */
timerlat_aa_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)241 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
242 struct tep_event *event, void *context)
243 {
244 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
245 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
246 unsigned long long thread;
247
248 if (!taa_data)
249 return -1;
250
251 tep_get_field_val(s, event, "context", record, &thread, 1);
252 if (!thread)
253 return timerlat_aa_irq_latency(taa_data, s, record, event);
254 else
255 return timerlat_aa_thread_latency(taa_data, s, record, event);
256 }
257
258 /*
259 * timerlat_aa_nmi_handler - Handles NMI noise
260 *
261 * It is used to collect information about interferences from NMI. It is
262 * hooked to the osnoise:nmi_noise event.
263 */
timerlat_aa_nmi_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)264 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
265 struct tep_event *event, void *context)
266 {
267 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
268 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
269 unsigned long long duration;
270 unsigned long long start;
271
272 tep_get_field_val(s, event, "duration", record, &duration, 1);
273 tep_get_field_val(s, event, "start", record, &start, 1);
274
275 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
276 taa_data->prev_irq_duration = duration;
277 taa_data->prev_irq_timstamp = start;
278
279 trace_seq_reset(taa_data->prev_irqs_seq);
280 trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n",
281 "nmi",
282 24, spaces,
283 ns_to_usf(duration));
284 return 0;
285 }
286
287 taa_data->thread_nmi_sum += duration;
288 trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n",
289 "nmi",
290 24, spaces, ns_to_usf(duration));
291
292 return 0;
293 }
294
295 /*
296 * timerlat_aa_irq_handler - Handles IRQ noise
297 *
298 * It is used to collect information about interferences from IRQ. It is
299 * hooked to the osnoise:irq_noise event.
300 *
301 * It is a little bit more complex than the other because it measures:
302 * - The IRQs that can delay the timer IRQ before it happened.
303 * - The Timerlat IRQ handler
304 * - The IRQs that happened between the timerlat IRQ and the timerlat thread
305 * (IRQ interference).
306 */
timerlat_aa_irq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)307 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
308 struct tep_event *event, void *context)
309 {
310 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
311 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
312 unsigned long long expected_start;
313 unsigned long long duration;
314 unsigned long long vector;
315 unsigned long long start;
316 char *desc;
317 int val;
318
319 tep_get_field_val(s, event, "duration", record, &duration, 1);
320 tep_get_field_val(s, event, "start", record, &start, 1);
321 tep_get_field_val(s, event, "vector", record, &vector, 1);
322 desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
323
324 /*
325 * Before the timerlat IRQ.
326 */
327 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
328 taa_data->prev_irq_duration = duration;
329 taa_data->prev_irq_timstamp = start;
330
331 trace_seq_reset(taa_data->prev_irqs_seq);
332 trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
333 desc, vector,
334 15, spaces,
335 ns_to_usf(duration));
336 return 0;
337 }
338
339 /*
340 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
341 * the timerlat irq handler.
342 */
343 if (!taa_data->timer_irq_start_time) {
344 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
345
346 taa_data->timer_irq_start_time = start;
347 taa_data->timer_irq_duration = duration;
348
349 /*
350 * We are dealing with two different clock sources: the
351 * external clock source that timerlat uses as a reference
352 * and the clock used by the tracer. There are also two
353 * moments: the time reading the clock and the timer in
354 * which the event is placed in the buffer (the trace
355 * event timestamp). If the processor is slow or there
356 * is some hardware noise, the difference between the
357 * timestamp and the external clock read can be longer
358 * than the IRQ handler delay, resulting in a negative
359 * time. If so, set IRQ start delay as 0. In the end,
360 * it is less relevant than the noise.
361 */
362 if (expected_start < taa_data->timer_irq_start_time)
363 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
364 else
365 taa_data->timer_irq_start_delay = 0;
366
367 /*
368 * not exit from idle.
369 */
370 if (taa_data->run_thread_pid)
371 return 0;
372
373 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
374 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
375
376 return 0;
377 }
378
379 /*
380 * IRQ interference.
381 */
382 taa_data->thread_irq_sum += duration;
383 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
384 desc, vector,
385 24, spaces,
386 ns_to_usf(duration));
387
388 return 0;
389 }
390
391 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
392 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
393
394
395 /*
396 * timerlat_aa_softirq_handler - Handles Softirq noise
397 *
398 * It is used to collect information about interferences from Softirq. It is
399 * hooked to the osnoise:softirq_noise event.
400 *
401 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
402 */
timerlat_aa_softirq_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)403 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
404 struct tep_event *event, void *context)
405 {
406 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
407 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
408 unsigned long long duration;
409 unsigned long long vector;
410 unsigned long long start;
411
412 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
413 return 0;
414
415 tep_get_field_val(s, event, "duration", record, &duration, 1);
416 tep_get_field_val(s, event, "start", record, &start, 1);
417 tep_get_field_val(s, event, "vector", record, &vector, 1);
418
419 taa_data->thread_softirq_sum += duration;
420
421 trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
422 softirq_name[vector], vector,
423 24, spaces,
424 ns_to_usf(duration));
425 return 0;
426 }
427
428 /*
429 * timerlat_aa_softirq_handler - Handles thread noise
430 *
431 * It is used to collect information about interferences from threads. It is
432 * hooked to the osnoise:thread_noise event.
433 *
434 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
435 */
timerlat_aa_thread_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)436 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
437 struct tep_event *event, void *context)
438 {
439 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
440 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
441 unsigned long long duration;
442 unsigned long long start;
443 unsigned long long pid;
444 const char *comm;
445 int val;
446
447 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
448 return 0;
449
450 tep_get_field_val(s, event, "duration", record, &duration, 1);
451 tep_get_field_val(s, event, "start", record, &start, 1);
452
453 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
454 comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
455
456 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
457 taa_data->thread_blocking_duration = duration;
458
459 if (comm)
460 strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
461 else
462 sprintf(taa_data->run_thread_comm, "<...>");
463
464 } else {
465 taa_data->thread_thread_sum += duration;
466
467 trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n",
468 comm, pid,
469 15, spaces,
470 ns_to_usf(duration));
471 }
472
473 return 0;
474 }
475
476 /*
477 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
478 *
479 * Saves and parse the stack trace generated by the timerlat IRQ.
480 */
timerlat_aa_stack_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)481 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
482 struct tep_event *event, void *context)
483 {
484 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
485 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
486 unsigned long *caller;
487 const char *function;
488 int val, i;
489
490 trace_seq_reset(taa_data->stack_seq);
491
492 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n");
493 caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
494 if (caller) {
495 for (i = 0; ; i++) {
496 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
497 if (!function)
498 break;
499 trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n",
500 14, spaces, function);
501 }
502 }
503 return 0;
504 }
505
506 /*
507 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
508 *
509 * Handles the sched:sched_switch event to trace the current thread running on the
510 * CPU. It is used to display the threads running on the other CPUs when the trace
511 * stops.
512 */
timerlat_aa_sched_switch_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)513 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
514 struct tep_event *event, void *context)
515 {
516 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
517 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
518 const char *comm;
519 int val;
520
521 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
522 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
523
524 strncpy(taa_data->current_comm, comm, MAX_COMM);
525
526 /*
527 * If this was a kworker, clean the last kworkers that ran.
528 */
529 taa_data->kworker = 0;
530 taa_data->kworker_func = 0;
531
532 return 0;
533 }
534
535 /*
536 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
537 *
538 * Handles workqueue:workqueue_execute_start event, keeping track of
539 * the job that a kworker could be doing in the CPU.
540 *
541 * We already catch problems of hardware related latencies caused by work queues
542 * running driver code that causes hardware stall. For example, with DRM drivers.
543 */
timerlat_aa_kworker_start_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * context)544 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
545 struct tep_event *event, void *context)
546 {
547 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
548 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
549
550 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
551 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
552 return 0;
553 }
554
555 /*
556 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
557 *
558 * This is the core of the analysis.
559 */
timerlat_thread_analysis(struct timerlat_aa_data * taa_data,int cpu,int irq_thresh,int thread_thresh)560 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
561 int irq_thresh, int thread_thresh)
562 {
563 long long exp_irq_ts;
564 int total;
565 int irq;
566
567 /*
568 * IRQ latency or Thread latency?
569 */
570 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
571 irq = 1;
572 total = taa_data->tlat_irq_latency;
573 } else {
574 irq = 0;
575 total = taa_data->tlat_thread_latency;
576 }
577
578 /*
579 * Expected IRQ arrival time using the trace clock as the base.
580 *
581 * TODO: Add a list of previous IRQ, and then run the list backwards.
582 */
583 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
584 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
585 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
586 printf(" Previous IRQ interference: %.*s up to %9.2f us\n",
587 16, spaces,
588 ns_to_usf(taa_data->prev_irq_duration));
589 }
590
591 /*
592 * The delay that the IRQ suffered before starting.
593 */
594 printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces,
595 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
596 ns_to_usf(taa_data->timer_irq_start_delay),
597 ns_to_per(total, taa_data->timer_irq_start_delay));
598
599 /*
600 * Timerlat IRQ.
601 */
602 printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces,
603 ns_to_usf(taa_data->tlat_irq_latency));
604
605 if (irq) {
606 /*
607 * If the trace stopped due to IRQ, the other events will not happen
608 * because... the trace stopped :-).
609 *
610 * That is all folks, the stack trace was printed before the stop,
611 * so it will be displayed, it is the key.
612 */
613 printf(" Blocking thread:\n");
614 printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm,
615 taa_data->run_thread_pid);
616 } else {
617 /*
618 * The duration of the IRQ handler that handled the timerlat IRQ.
619 */
620 printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n",
621 30, spaces,
622 ns_to_usf(taa_data->timer_irq_duration),
623 ns_to_per(total, taa_data->timer_irq_duration));
624
625 /*
626 * The amount of time that the current thread postponed the scheduler.
627 *
628 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
629 * is no need to compute values here.
630 */
631 printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces,
632 ns_to_usf(taa_data->thread_blocking_duration),
633 ns_to_per(total, taa_data->thread_blocking_duration));
634
635 printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces,
636 taa_data->run_thread_comm, taa_data->run_thread_pid,
637 12, spaces, ns_to_usf(taa_data->thread_blocking_duration));
638 }
639
640 /*
641 * Print the stack trace!
642 */
643 trace_seq_do_printf(taa_data->stack_seq);
644
645 /*
646 * NMIs can happen during the IRQ, so they are always possible.
647 */
648 if (taa_data->thread_nmi_sum)
649 printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
650 ns_to_usf(taa_data->thread_nmi_sum),
651 ns_to_per(total, taa_data->thread_nmi_sum));
652
653 /*
654 * If it is an IRQ latency, the other factors can be skipped.
655 */
656 if (irq)
657 goto print_total;
658
659 /*
660 * Prints the interference caused by IRQs to the thread latency.
661 */
662 if (taa_data->thread_irq_sum) {
663 printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
664 ns_to_usf(taa_data->thread_irq_sum),
665 ns_to_per(total, taa_data->thread_irq_sum));
666
667 trace_seq_do_printf(taa_data->irqs_seq);
668 }
669
670 /*
671 * Prints the interference caused by Softirqs to the thread latency.
672 */
673 if (taa_data->thread_softirq_sum) {
674 printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces,
675 ns_to_usf(taa_data->thread_softirq_sum),
676 ns_to_per(total, taa_data->thread_softirq_sum));
677
678 trace_seq_do_printf(taa_data->softirqs_seq);
679 }
680
681 /*
682 * Prints the interference caused by other threads to the thread latency.
683 *
684 * If this happens, your timerlat is not the highest prio. OK, migration
685 * thread can happen. But otherwise, you are not measuring the "scheduling
686 * latency" only, and here is the difference from scheduling latency and
687 * timer handling latency.
688 */
689 if (taa_data->thread_thread_sum) {
690 printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces,
691 ns_to_usf(taa_data->thread_thread_sum),
692 ns_to_per(total, taa_data->thread_thread_sum));
693
694 trace_seq_do_printf(taa_data->threads_seq);
695 }
696
697 /*
698 * Done.
699 */
700 print_total:
701 printf("------------------------------------------------------------------------\n");
702 printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread",
703 37, spaces, ns_to_usf(total));
704 }
705
timerlat_auto_analysis_collect_trace(struct timerlat_aa_context * taa_ctx)706 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
707 {
708 struct trace_instance *trace = &taa_ctx->tool->trace;
709 int retval;
710
711 retval = tracefs_iterate_raw_events(trace->tep,
712 trace->inst,
713 NULL,
714 0,
715 collect_registered_events,
716 trace);
717 if (retval < 0) {
718 err_msg("Error iterating on events\n");
719 return 0;
720 }
721
722 return 1;
723 }
724
725 /**
726 * timerlat_auto_analysis - Analyze the collected data
727 */
timerlat_auto_analysis(int irq_thresh,int thread_thresh)728 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
729 {
730 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
731 unsigned long long max_exit_from_idle = 0;
732 struct timerlat_aa_data *taa_data;
733 int max_exit_from_idle_cpu;
734 struct tep_handle *tep;
735 int cpu;
736
737 timerlat_auto_analysis_collect_trace(taa_ctx);
738
739 /* bring stop tracing to the ns scale */
740 irq_thresh = irq_thresh * 1000;
741 thread_thresh = thread_thresh * 1000;
742
743 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
744 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
745
746 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
747 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
748 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
749 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
750 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
751 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
752 }
753
754 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
755 max_exit_from_idle = taa_data->max_exit_idle_latency;
756 max_exit_from_idle_cpu = cpu;
757 }
758
759 }
760
761 if (max_exit_from_idle) {
762 printf("\n");
763 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
764 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
765 }
766 if (!taa_ctx->dump_tasks)
767 return;
768
769 printf("\n");
770 printf("Printing CPU tasks:\n");
771 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
772 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
773 tep = taa_ctx->tool->trace.tep;
774
775 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
776
777 if (taa_data->kworker_func)
778 printf(" kworker:%s:%s",
779 tep_find_function(tep, taa_data->kworker) ? : "<...>",
780 tep_find_function(tep, taa_data->kworker_func));
781 printf("\n");
782 }
783
784 }
785
786 /*
787 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
788 */
timerlat_aa_destroy_seqs(struct timerlat_aa_context * taa_ctx)789 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
790 {
791 struct timerlat_aa_data *taa_data;
792 int i;
793
794 if (!taa_ctx->taa_data)
795 return;
796
797 for (i = 0; i < taa_ctx->nr_cpus; i++) {
798 taa_data = timerlat_aa_get_data(taa_ctx, i);
799
800 if (taa_data->prev_irqs_seq) {
801 trace_seq_destroy(taa_data->prev_irqs_seq);
802 free(taa_data->prev_irqs_seq);
803 }
804
805 if (taa_data->nmi_seq) {
806 trace_seq_destroy(taa_data->nmi_seq);
807 free(taa_data->nmi_seq);
808 }
809
810 if (taa_data->irqs_seq) {
811 trace_seq_destroy(taa_data->irqs_seq);
812 free(taa_data->irqs_seq);
813 }
814
815 if (taa_data->softirqs_seq) {
816 trace_seq_destroy(taa_data->softirqs_seq);
817 free(taa_data->softirqs_seq);
818 }
819
820 if (taa_data->threads_seq) {
821 trace_seq_destroy(taa_data->threads_seq);
822 free(taa_data->threads_seq);
823 }
824
825 if (taa_data->stack_seq) {
826 trace_seq_destroy(taa_data->stack_seq);
827 free(taa_data->stack_seq);
828 }
829 }
830 }
831
832 /*
833 * timerlat_aa_init_seqs - Init seq files used to store parsed information
834 *
835 * Instead of keeping data structures to store raw data, use seq files to
836 * store parsed data.
837 *
838 * Allocates and initialize seq files.
839 *
840 * Returns 0 on success, -1 otherwise.
841 */
timerlat_aa_init_seqs(struct timerlat_aa_context * taa_ctx)842 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
843 {
844 struct timerlat_aa_data *taa_data;
845 int i;
846
847 for (i = 0; i < taa_ctx->nr_cpus; i++) {
848
849 taa_data = timerlat_aa_get_data(taa_ctx, i);
850
851 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
852 if (!taa_data->prev_irqs_seq)
853 goto out_err;
854
855 trace_seq_init(taa_data->prev_irqs_seq);
856
857 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
858 if (!taa_data->nmi_seq)
859 goto out_err;
860
861 trace_seq_init(taa_data->nmi_seq);
862
863 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
864 if (!taa_data->irqs_seq)
865 goto out_err;
866
867 trace_seq_init(taa_data->irqs_seq);
868
869 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
870 if (!taa_data->softirqs_seq)
871 goto out_err;
872
873 trace_seq_init(taa_data->softirqs_seq);
874
875 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
876 if (!taa_data->threads_seq)
877 goto out_err;
878
879 trace_seq_init(taa_data->threads_seq);
880
881 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
882 if (!taa_data->stack_seq)
883 goto out_err;
884
885 trace_seq_init(taa_data->stack_seq);
886 }
887
888 return 0;
889
890 out_err:
891 timerlat_aa_destroy_seqs(taa_ctx);
892 return -1;
893 }
894
895 /*
896 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
897 */
timerlat_aa_unregister_events(struct osnoise_tool * tool,int dump_tasks)898 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
899 {
900
901 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
902 timerlat_aa_handler, tool);
903
904 tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
905
906 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
907 timerlat_aa_nmi_handler, tool);
908
909 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
910 timerlat_aa_irq_handler, tool);
911
912 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
913 timerlat_aa_softirq_handler, tool);
914
915 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
916 timerlat_aa_thread_handler, tool);
917
918 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
919 timerlat_aa_stack_handler, tool);
920 if (!dump_tasks)
921 return;
922
923 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
924 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
925 timerlat_aa_sched_switch_handler, tool);
926
927 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
928 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
929 timerlat_aa_kworker_start_handler, tool);
930 }
931
932 /*
933 * timerlat_aa_register_events - Register events used in the auto-analysis
934 *
935 * Returns 0 on success, -1 otherwise.
936 */
timerlat_aa_register_events(struct osnoise_tool * tool,int dump_tasks)937 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
938 {
939 int retval;
940
941 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
942 timerlat_aa_handler, tool);
943
944
945 /*
946 * register auto-analysis handlers.
947 */
948 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
949 if (retval < 0 && !errno) {
950 err_msg("Could not find osnoise events\n");
951 goto out_err;
952 }
953
954 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
955 timerlat_aa_nmi_handler, tool);
956
957 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
958 timerlat_aa_irq_handler, tool);
959
960 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
961 timerlat_aa_softirq_handler, tool);
962
963 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
964 timerlat_aa_thread_handler, tool);
965
966 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
967 timerlat_aa_stack_handler, tool);
968
969 if (!dump_tasks)
970 return 0;
971
972 /*
973 * Dump task events.
974 */
975 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
976 if (retval < 0 && !errno) {
977 err_msg("Could not find sched_switch\n");
978 goto out_err;
979 }
980
981 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
982 timerlat_aa_sched_switch_handler, tool);
983
984 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
985 if (retval < 0 && !errno) {
986 err_msg("Could not find workqueue_execute_start\n");
987 goto out_err;
988 }
989
990 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
991 timerlat_aa_kworker_start_handler, tool);
992
993 return 0;
994
995 out_err:
996 timerlat_aa_unregister_events(tool, dump_tasks);
997 return -1;
998 }
999
1000 /**
1001 * timerlat_aa_destroy - Destroy timerlat auto-analysis
1002 */
timerlat_aa_destroy(void)1003 void timerlat_aa_destroy(void)
1004 {
1005 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
1006
1007 if (!taa_ctx)
1008 return;
1009
1010 if (!taa_ctx->taa_data)
1011 goto out_ctx;
1012
1013 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
1014 timerlat_aa_destroy_seqs(taa_ctx);
1015 free(taa_ctx->taa_data);
1016 out_ctx:
1017 free(taa_ctx);
1018 }
1019
1020 /**
1021 * timerlat_aa_init - Initialize timerlat auto-analysis
1022 *
1023 * Returns 0 on success, -1 otherwise.
1024 */
timerlat_aa_init(struct osnoise_tool * tool,int dump_tasks)1025 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1026 {
1027 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1028 struct timerlat_aa_context *taa_ctx;
1029 int retval;
1030
1031 taa_ctx = calloc(1, sizeof(*taa_ctx));
1032 if (!taa_ctx)
1033 return -1;
1034
1035 __timerlat_aa_ctx = taa_ctx;
1036
1037 taa_ctx->nr_cpus = nr_cpus;
1038 taa_ctx->tool = tool;
1039 taa_ctx->dump_tasks = dump_tasks;
1040
1041 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1042 if (!taa_ctx->taa_data)
1043 goto out_err;
1044
1045 retval = timerlat_aa_init_seqs(taa_ctx);
1046 if (retval)
1047 goto out_err;
1048
1049 retval = timerlat_aa_register_events(tool, dump_tasks);
1050 if (retval)
1051 goto out_err;
1052
1053 return 0;
1054
1055 out_err:
1056 timerlat_aa_destroy();
1057 return -1;
1058 }
1059