1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #include <getopt.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <signal.h>
11 #include <unistd.h>
12 #include <stdio.h>
13 #include <time.h>
14 #include <sched.h>
15 #include <pthread.h>
16
17 #include "timerlat.h"
18 #include "timerlat_aa.h"
19 #include "timerlat_u.h"
20 #include "timerlat_bpf.h"
21
22 struct timerlat_hist_cpu {
23 int *irq;
24 int *thread;
25 int *user;
26
27 unsigned long long irq_count;
28 unsigned long long thread_count;
29 unsigned long long user_count;
30
31 unsigned long long min_irq;
32 unsigned long long sum_irq;
33 unsigned long long max_irq;
34
35 unsigned long long min_thread;
36 unsigned long long sum_thread;
37 unsigned long long max_thread;
38
39 unsigned long long min_user;
40 unsigned long long sum_user;
41 unsigned long long max_user;
42 };
43
44 struct timerlat_hist_data {
45 struct timerlat_hist_cpu *hist;
46 int entries;
47 int bucket_size;
48 int nr_cpus;
49 };
50
51 /*
52 * timerlat_free_histogram - free runtime data
53 */
54 static void
timerlat_free_histogram(struct timerlat_hist_data * data)55 timerlat_free_histogram(struct timerlat_hist_data *data)
56 {
57 int cpu;
58
59 /* one histogram for IRQ and one for thread, per CPU */
60 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
61 if (data->hist[cpu].irq)
62 free(data->hist[cpu].irq);
63
64 if (data->hist[cpu].thread)
65 free(data->hist[cpu].thread);
66
67 if (data->hist[cpu].user)
68 free(data->hist[cpu].user);
69
70 }
71
72 /* one set of histograms per CPU */
73 if (data->hist)
74 free(data->hist);
75
76 free(data);
77 }
78
79 /*
80 * timerlat_alloc_histogram - alloc runtime data
81 */
82 static struct timerlat_hist_data
timerlat_alloc_histogram(int nr_cpus,int entries,int bucket_size)83 *timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size)
84 {
85 struct timerlat_hist_data *data;
86 int cpu;
87
88 data = calloc(1, sizeof(*data));
89 if (!data)
90 return NULL;
91
92 data->entries = entries;
93 data->bucket_size = bucket_size;
94 data->nr_cpus = nr_cpus;
95
96 /* one set of histograms per CPU */
97 data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
98 if (!data->hist)
99 goto cleanup;
100
101 /* one histogram for IRQ and one for thread, per cpu */
102 for (cpu = 0; cpu < nr_cpus; cpu++) {
103 data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1));
104 if (!data->hist[cpu].irq)
105 goto cleanup;
106
107 data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1));
108 if (!data->hist[cpu].thread)
109 goto cleanup;
110
111 data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1));
112 if (!data->hist[cpu].user)
113 goto cleanup;
114 }
115
116 /* set the min to max */
117 for (cpu = 0; cpu < nr_cpus; cpu++) {
118 data->hist[cpu].min_irq = ~0;
119 data->hist[cpu].min_thread = ~0;
120 data->hist[cpu].min_user = ~0;
121 }
122
123 return data;
124
125 cleanup:
126 timerlat_free_histogram(data);
127 return NULL;
128 }
129
130 /*
131 * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
132 */
133 static void
timerlat_hist_update(struct osnoise_tool * tool,int cpu,unsigned long long context,unsigned long long latency)134 timerlat_hist_update(struct osnoise_tool *tool, int cpu,
135 unsigned long long context,
136 unsigned long long latency)
137 {
138 struct timerlat_params *params = tool->params;
139 struct timerlat_hist_data *data = tool->data;
140 int entries = data->entries;
141 int bucket;
142 int *hist;
143
144 if (params->output_divisor)
145 latency = latency / params->output_divisor;
146
147 bucket = latency / data->bucket_size;
148
149 if (!context) {
150 hist = data->hist[cpu].irq;
151 data->hist[cpu].irq_count++;
152 update_min(&data->hist[cpu].min_irq, &latency);
153 update_sum(&data->hist[cpu].sum_irq, &latency);
154 update_max(&data->hist[cpu].max_irq, &latency);
155 } else if (context == 1) {
156 hist = data->hist[cpu].thread;
157 data->hist[cpu].thread_count++;
158 update_min(&data->hist[cpu].min_thread, &latency);
159 update_sum(&data->hist[cpu].sum_thread, &latency);
160 update_max(&data->hist[cpu].max_thread, &latency);
161 } else { /* user */
162 hist = data->hist[cpu].user;
163 data->hist[cpu].user_count++;
164 update_min(&data->hist[cpu].min_user, &latency);
165 update_sum(&data->hist[cpu].sum_user, &latency);
166 update_max(&data->hist[cpu].max_user, &latency);
167 }
168
169 if (bucket < entries)
170 hist[bucket]++;
171 else
172 hist[entries]++;
173 }
174
175 /*
176 * timerlat_hist_handler - this is the handler for timerlat tracer events
177 */
178 static int
timerlat_hist_handler(struct trace_seq * s,struct tep_record * record,struct tep_event * event,void * data)179 timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
180 struct tep_event *event, void *data)
181 {
182 struct trace_instance *trace = data;
183 unsigned long long context, latency;
184 struct osnoise_tool *tool;
185 int cpu = record->cpu;
186
187 tool = container_of(trace, struct osnoise_tool, trace);
188
189 tep_get_field_val(s, event, "context", record, &context, 1);
190 tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
191
192 timerlat_hist_update(tool, cpu, context, latency);
193
194 return 0;
195 }
196
197 /*
198 * timerlat_hist_bpf_pull_data - copy data from BPF maps into userspace
199 */
timerlat_hist_bpf_pull_data(struct osnoise_tool * tool)200 static int timerlat_hist_bpf_pull_data(struct osnoise_tool *tool)
201 {
202 struct timerlat_hist_data *data = tool->data;
203 int i, j, err;
204 long long value_irq[data->nr_cpus],
205 value_thread[data->nr_cpus],
206 value_user[data->nr_cpus];
207
208 /* Pull histogram */
209 for (i = 0; i < data->entries; i++) {
210 err = timerlat_bpf_get_hist_value(i, value_irq, value_thread,
211 value_user, data->nr_cpus);
212 if (err)
213 return err;
214 for (j = 0; j < data->nr_cpus; j++) {
215 data->hist[j].irq[i] = value_irq[j];
216 data->hist[j].thread[i] = value_thread[j];
217 data->hist[j].user[i] = value_user[j];
218 }
219 }
220
221 /* Pull summary */
222 err = timerlat_bpf_get_summary_value(SUMMARY_COUNT,
223 value_irq, value_thread, value_user,
224 data->nr_cpus);
225 if (err)
226 return err;
227 for (i = 0; i < data->nr_cpus; i++) {
228 data->hist[i].irq_count = value_irq[i];
229 data->hist[i].thread_count = value_thread[i];
230 data->hist[i].user_count = value_user[i];
231 }
232
233 err = timerlat_bpf_get_summary_value(SUMMARY_MIN,
234 value_irq, value_thread, value_user,
235 data->nr_cpus);
236 if (err)
237 return err;
238 for (i = 0; i < data->nr_cpus; i++) {
239 data->hist[i].min_irq = value_irq[i];
240 data->hist[i].min_thread = value_thread[i];
241 data->hist[i].min_user = value_user[i];
242 }
243
244 err = timerlat_bpf_get_summary_value(SUMMARY_MAX,
245 value_irq, value_thread, value_user,
246 data->nr_cpus);
247 if (err)
248 return err;
249 for (i = 0; i < data->nr_cpus; i++) {
250 data->hist[i].max_irq = value_irq[i];
251 data->hist[i].max_thread = value_thread[i];
252 data->hist[i].max_user = value_user[i];
253 }
254
255 err = timerlat_bpf_get_summary_value(SUMMARY_SUM,
256 value_irq, value_thread, value_user,
257 data->nr_cpus);
258 if (err)
259 return err;
260 for (i = 0; i < data->nr_cpus; i++) {
261 data->hist[i].sum_irq = value_irq[i];
262 data->hist[i].sum_thread = value_thread[i];
263 data->hist[i].sum_user = value_user[i];
264 }
265
266 err = timerlat_bpf_get_summary_value(SUMMARY_OVERFLOW,
267 value_irq, value_thread, value_user,
268 data->nr_cpus);
269 if (err)
270 return err;
271 for (i = 0; i < data->nr_cpus; i++) {
272 data->hist[i].irq[data->entries] = value_irq[i];
273 data->hist[i].thread[data->entries] = value_thread[i];
274 data->hist[i].user[data->entries] = value_user[i];
275 }
276
277 return 0;
278 }
279
280 /*
281 * timerlat_hist_header - print the header of the tracer to the output
282 */
timerlat_hist_header(struct osnoise_tool * tool)283 static void timerlat_hist_header(struct osnoise_tool *tool)
284 {
285 struct timerlat_params *params = tool->params;
286 struct timerlat_hist_data *data = tool->data;
287 struct trace_seq *s = tool->trace.seq;
288 char duration[26];
289 int cpu;
290
291 if (params->no_header)
292 return;
293
294 get_duration(tool->start_time, duration, sizeof(duration));
295 trace_seq_printf(s, "# RTLA timerlat histogram\n");
296 trace_seq_printf(s, "# Time unit is %s (%s)\n",
297 params->output_divisor == 1 ? "nanoseconds" : "microseconds",
298 params->output_divisor == 1 ? "ns" : "us");
299
300 trace_seq_printf(s, "# Duration: %s\n", duration);
301
302 if (!params->no_index)
303 trace_seq_printf(s, "Index");
304
305 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
306 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
307 continue;
308
309 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
310 continue;
311
312 if (!params->no_irq)
313 trace_seq_printf(s, " IRQ-%03d", cpu);
314
315 if (!params->no_thread)
316 trace_seq_printf(s, " Thr-%03d", cpu);
317
318 if (params->user_data)
319 trace_seq_printf(s, " Usr-%03d", cpu);
320 }
321 trace_seq_printf(s, "\n");
322
323
324 trace_seq_do_printf(s);
325 trace_seq_reset(s);
326 }
327
328 /*
329 * format_summary_value - format a line of summary value (min, max or avg)
330 * of hist data
331 */
format_summary_value(struct trace_seq * seq,int count,unsigned long long val,bool avg)332 static void format_summary_value(struct trace_seq *seq,
333 int count,
334 unsigned long long val,
335 bool avg)
336 {
337 if (count)
338 trace_seq_printf(seq, "%9llu ", avg ? val / count : val);
339 else
340 trace_seq_printf(seq, "%9c ", '-');
341 }
342
343 /*
344 * timerlat_print_summary - print the summary of the hist data to the output
345 */
346 static void
timerlat_print_summary(struct timerlat_params * params,struct trace_instance * trace,struct timerlat_hist_data * data)347 timerlat_print_summary(struct timerlat_params *params,
348 struct trace_instance *trace,
349 struct timerlat_hist_data *data)
350 {
351 int cpu;
352
353 if (params->no_summary)
354 return;
355
356 if (!params->no_index)
357 trace_seq_printf(trace->seq, "count:");
358
359 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
360 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
361 continue;
362
363 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
364 continue;
365
366 if (!params->no_irq)
367 trace_seq_printf(trace->seq, "%9llu ",
368 data->hist[cpu].irq_count);
369
370 if (!params->no_thread)
371 trace_seq_printf(trace->seq, "%9llu ",
372 data->hist[cpu].thread_count);
373
374 if (params->user_data)
375 trace_seq_printf(trace->seq, "%9llu ",
376 data->hist[cpu].user_count);
377 }
378 trace_seq_printf(trace->seq, "\n");
379
380 if (!params->no_index)
381 trace_seq_printf(trace->seq, "min: ");
382
383 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
384 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
385 continue;
386
387 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
388 continue;
389
390 if (!params->no_irq)
391 format_summary_value(trace->seq,
392 data->hist[cpu].irq_count,
393 data->hist[cpu].min_irq,
394 false);
395
396 if (!params->no_thread)
397 format_summary_value(trace->seq,
398 data->hist[cpu].thread_count,
399 data->hist[cpu].min_thread,
400 false);
401
402 if (params->user_data)
403 format_summary_value(trace->seq,
404 data->hist[cpu].user_count,
405 data->hist[cpu].min_user,
406 false);
407 }
408 trace_seq_printf(trace->seq, "\n");
409
410 if (!params->no_index)
411 trace_seq_printf(trace->seq, "avg: ");
412
413 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
414 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
415 continue;
416
417 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
418 continue;
419
420 if (!params->no_irq)
421 format_summary_value(trace->seq,
422 data->hist[cpu].irq_count,
423 data->hist[cpu].sum_irq,
424 true);
425
426 if (!params->no_thread)
427 format_summary_value(trace->seq,
428 data->hist[cpu].thread_count,
429 data->hist[cpu].sum_thread,
430 true);
431
432 if (params->user_data)
433 format_summary_value(trace->seq,
434 data->hist[cpu].user_count,
435 data->hist[cpu].sum_user,
436 true);
437 }
438 trace_seq_printf(trace->seq, "\n");
439
440 if (!params->no_index)
441 trace_seq_printf(trace->seq, "max: ");
442
443 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
444 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
445 continue;
446
447 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
448 continue;
449
450 if (!params->no_irq)
451 format_summary_value(trace->seq,
452 data->hist[cpu].irq_count,
453 data->hist[cpu].max_irq,
454 false);
455
456 if (!params->no_thread)
457 format_summary_value(trace->seq,
458 data->hist[cpu].thread_count,
459 data->hist[cpu].max_thread,
460 false);
461
462 if (params->user_data)
463 format_summary_value(trace->seq,
464 data->hist[cpu].user_count,
465 data->hist[cpu].max_user,
466 false);
467 }
468 trace_seq_printf(trace->seq, "\n");
469 trace_seq_do_printf(trace->seq);
470 trace_seq_reset(trace->seq);
471 }
472
473 static void
timerlat_print_stats_all(struct timerlat_params * params,struct trace_instance * trace,struct timerlat_hist_data * data)474 timerlat_print_stats_all(struct timerlat_params *params,
475 struct trace_instance *trace,
476 struct timerlat_hist_data *data)
477 {
478 struct timerlat_hist_cpu *cpu_data;
479 struct timerlat_hist_cpu sum;
480 int cpu;
481
482 if (params->no_summary)
483 return;
484
485 memset(&sum, 0, sizeof(sum));
486 sum.min_irq = ~0;
487 sum.min_thread = ~0;
488 sum.min_user = ~0;
489
490 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
491 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
492 continue;
493
494 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
495 continue;
496
497 cpu_data = &data->hist[cpu];
498
499 sum.irq_count += cpu_data->irq_count;
500 update_min(&sum.min_irq, &cpu_data->min_irq);
501 update_sum(&sum.sum_irq, &cpu_data->sum_irq);
502 update_max(&sum.max_irq, &cpu_data->max_irq);
503
504 sum.thread_count += cpu_data->thread_count;
505 update_min(&sum.min_thread, &cpu_data->min_thread);
506 update_sum(&sum.sum_thread, &cpu_data->sum_thread);
507 update_max(&sum.max_thread, &cpu_data->max_thread);
508
509 sum.user_count += cpu_data->user_count;
510 update_min(&sum.min_user, &cpu_data->min_user);
511 update_sum(&sum.sum_user, &cpu_data->sum_user);
512 update_max(&sum.max_user, &cpu_data->max_user);
513 }
514
515 if (!params->no_index)
516 trace_seq_printf(trace->seq, "ALL: ");
517
518 if (!params->no_irq)
519 trace_seq_printf(trace->seq, " IRQ");
520
521 if (!params->no_thread)
522 trace_seq_printf(trace->seq, " Thr");
523
524 if (params->user_data)
525 trace_seq_printf(trace->seq, " Usr");
526
527 trace_seq_printf(trace->seq, "\n");
528
529 if (!params->no_index)
530 trace_seq_printf(trace->seq, "count:");
531
532 if (!params->no_irq)
533 trace_seq_printf(trace->seq, "%9llu ",
534 sum.irq_count);
535
536 if (!params->no_thread)
537 trace_seq_printf(trace->seq, "%9llu ",
538 sum.thread_count);
539
540 if (params->user_data)
541 trace_seq_printf(trace->seq, "%9llu ",
542 sum.user_count);
543
544 trace_seq_printf(trace->seq, "\n");
545
546 if (!params->no_index)
547 trace_seq_printf(trace->seq, "min: ");
548
549 if (!params->no_irq)
550 format_summary_value(trace->seq,
551 sum.irq_count,
552 sum.min_irq,
553 false);
554
555 if (!params->no_thread)
556 format_summary_value(trace->seq,
557 sum.thread_count,
558 sum.min_thread,
559 false);
560
561 if (params->user_data)
562 format_summary_value(trace->seq,
563 sum.user_count,
564 sum.min_user,
565 false);
566
567 trace_seq_printf(trace->seq, "\n");
568
569 if (!params->no_index)
570 trace_seq_printf(trace->seq, "avg: ");
571
572 if (!params->no_irq)
573 format_summary_value(trace->seq,
574 sum.irq_count,
575 sum.sum_irq,
576 true);
577
578 if (!params->no_thread)
579 format_summary_value(trace->seq,
580 sum.thread_count,
581 sum.sum_thread,
582 true);
583
584 if (params->user_data)
585 format_summary_value(trace->seq,
586 sum.user_count,
587 sum.sum_user,
588 true);
589
590 trace_seq_printf(trace->seq, "\n");
591
592 if (!params->no_index)
593 trace_seq_printf(trace->seq, "max: ");
594
595 if (!params->no_irq)
596 format_summary_value(trace->seq,
597 sum.irq_count,
598 sum.max_irq,
599 false);
600
601 if (!params->no_thread)
602 format_summary_value(trace->seq,
603 sum.thread_count,
604 sum.max_thread,
605 false);
606
607 if (params->user_data)
608 format_summary_value(trace->seq,
609 sum.user_count,
610 sum.max_user,
611 false);
612
613 trace_seq_printf(trace->seq, "\n");
614 trace_seq_do_printf(trace->seq);
615 trace_seq_reset(trace->seq);
616 }
617
618 /*
619 * timerlat_print_stats - print data for each CPUs
620 */
621 static void
timerlat_print_stats(struct timerlat_params * params,struct osnoise_tool * tool)622 timerlat_print_stats(struct timerlat_params *params, struct osnoise_tool *tool)
623 {
624 struct timerlat_hist_data *data = tool->data;
625 struct trace_instance *trace = &tool->trace;
626 int bucket, cpu;
627 int total;
628
629 timerlat_hist_header(tool);
630
631 for (bucket = 0; bucket < data->entries; bucket++) {
632 total = 0;
633
634 if (!params->no_index)
635 trace_seq_printf(trace->seq, "%-6d",
636 bucket * data->bucket_size);
637
638 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
639 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
640 continue;
641
642 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
643 continue;
644
645 if (!params->no_irq) {
646 total += data->hist[cpu].irq[bucket];
647 trace_seq_printf(trace->seq, "%9d ",
648 data->hist[cpu].irq[bucket]);
649 }
650
651 if (!params->no_thread) {
652 total += data->hist[cpu].thread[bucket];
653 trace_seq_printf(trace->seq, "%9d ",
654 data->hist[cpu].thread[bucket]);
655 }
656
657 if (params->user_data) {
658 total += data->hist[cpu].user[bucket];
659 trace_seq_printf(trace->seq, "%9d ",
660 data->hist[cpu].user[bucket]);
661 }
662
663 }
664
665 if (total == 0 && !params->with_zeros) {
666 trace_seq_reset(trace->seq);
667 continue;
668 }
669
670 trace_seq_printf(trace->seq, "\n");
671 trace_seq_do_printf(trace->seq);
672 trace_seq_reset(trace->seq);
673 }
674
675 if (!params->no_index)
676 trace_seq_printf(trace->seq, "over: ");
677
678 for (cpu = 0; cpu < data->nr_cpus; cpu++) {
679 if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
680 continue;
681
682 if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
683 continue;
684
685 if (!params->no_irq)
686 trace_seq_printf(trace->seq, "%9d ",
687 data->hist[cpu].irq[data->entries]);
688
689 if (!params->no_thread)
690 trace_seq_printf(trace->seq, "%9d ",
691 data->hist[cpu].thread[data->entries]);
692
693 if (params->user_data)
694 trace_seq_printf(trace->seq, "%9d ",
695 data->hist[cpu].user[data->entries]);
696 }
697 trace_seq_printf(trace->seq, "\n");
698 trace_seq_do_printf(trace->seq);
699 trace_seq_reset(trace->seq);
700
701 timerlat_print_summary(params, trace, data);
702 timerlat_print_stats_all(params, trace, data);
703 osnoise_report_missed_events(tool);
704 }
705
706 /*
707 * timerlat_hist_usage - prints timerlat top usage message
708 */
timerlat_hist_usage(char * usage)709 static void timerlat_hist_usage(char *usage)
710 {
711 int i;
712
713 char *msg[] = {
714 "",
715 " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\",
716 " [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
717 " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
718 " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u|-k]",
719 " [--warm-up s] [--deepest-idle-state n]",
720 "",
721 " -h/--help: print this menu",
722 " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
723 " -p/--period us: timerlat period in us",
724 " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
725 " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
726 " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
727 " -c/--cpus cpus: run the tracer only on the given cpus",
728 " -H/--house-keeping cpus: run rtla control threads only on the given cpus",
729 " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
730 " -d/--duration time[m|h|d]: duration of the session in seconds",
731 " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
732 " -D/--debug: print debug info",
733 " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]",
734 " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
735 " --filter <filter>: enable a trace event filter to the previous -e event",
736 " --trigger <trigger>: enable a trace event trigger to the previous -e event",
737 " -n/--nano: display data in nanoseconds",
738 " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
739 " -b/--bucket-size N: set the histogram bucket size (default 1)",
740 " -E/--entries N: set the number of entries of the histogram (default 256)",
741 " --no-irq: ignore IRQ latencies",
742 " --no-thread: ignore thread latencies",
743 " --no-header: do not print header",
744 " --no-summary: do not print summary",
745 " --no-index: do not print index",
746 " --with-zeros: print zero only entries",
747 " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
748 " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
749 " o:prio - use SCHED_OTHER with prio",
750 " r:prio - use SCHED_RR with prio",
751 " f:prio - use SCHED_FIFO with prio",
752 " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
753 " in nanoseconds",
754 " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads",
755 " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads",
756 " -U/--user-load: enable timerlat for user-defined user-space workload",
757 " --warm-up s: let the workload run for s seconds before collecting data",
758 " --trace-buffer-size kB: set the per-cpu trace buffer size in kB",
759 " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency",
760 NULL,
761 };
762
763 if (usage)
764 fprintf(stderr, "%s\n", usage);
765
766 fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n",
767 VERSION);
768
769 for (i = 0; msg[i]; i++)
770 fprintf(stderr, "%s\n", msg[i]);
771
772 if (usage)
773 exit(EXIT_FAILURE);
774
775 exit(EXIT_SUCCESS);
776 }
777
778 /*
779 * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters
780 */
781 static struct timerlat_params
timerlat_hist_parse_args(int argc,char * argv[])782 *timerlat_hist_parse_args(int argc, char *argv[])
783 {
784 struct timerlat_params *params;
785 struct trace_events *tevent;
786 int auto_thresh;
787 int retval;
788 int c;
789
790 params = calloc(1, sizeof(*params));
791 if (!params)
792 exit(1);
793
794 /* disabled by default */
795 params->dma_latency = -1;
796
797 /* disabled by default */
798 params->deepest_idle_state = -2;
799
800 /* display data in microseconds */
801 params->output_divisor = 1000;
802 params->bucket_size = 1;
803 params->entries = 256;
804
805 while (1) {
806 static struct option long_options[] = {
807 {"auto", required_argument, 0, 'a'},
808 {"cpus", required_argument, 0, 'c'},
809 {"cgroup", optional_argument, 0, 'C'},
810 {"bucket-size", required_argument, 0, 'b'},
811 {"debug", no_argument, 0, 'D'},
812 {"entries", required_argument, 0, 'E'},
813 {"duration", required_argument, 0, 'd'},
814 {"house-keeping", required_argument, 0, 'H'},
815 {"help", no_argument, 0, 'h'},
816 {"irq", required_argument, 0, 'i'},
817 {"nano", no_argument, 0, 'n'},
818 {"period", required_argument, 0, 'p'},
819 {"priority", required_argument, 0, 'P'},
820 {"stack", required_argument, 0, 's'},
821 {"thread", required_argument, 0, 'T'},
822 {"trace", optional_argument, 0, 't'},
823 {"user-threads", no_argument, 0, 'u'},
824 {"kernel-threads", no_argument, 0, 'k'},
825 {"user-load", no_argument, 0, 'U'},
826 {"event", required_argument, 0, 'e'},
827 {"no-irq", no_argument, 0, '0'},
828 {"no-thread", no_argument, 0, '1'},
829 {"no-header", no_argument, 0, '2'},
830 {"no-summary", no_argument, 0, '3'},
831 {"no-index", no_argument, 0, '4'},
832 {"with-zeros", no_argument, 0, '5'},
833 {"trigger", required_argument, 0, '6'},
834 {"filter", required_argument, 0, '7'},
835 {"dma-latency", required_argument, 0, '8'},
836 {"no-aa", no_argument, 0, '9'},
837 {"dump-task", no_argument, 0, '\1'},
838 {"warm-up", required_argument, 0, '\2'},
839 {"trace-buffer-size", required_argument, 0, '\3'},
840 {"deepest-idle-state", required_argument, 0, '\4'},
841 {0, 0, 0, 0}
842 };
843
844 /* getopt_long stores the option index here. */
845 int option_index = 0;
846
847 c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3:",
848 long_options, &option_index);
849
850 /* detect the end of the options. */
851 if (c == -1)
852 break;
853
854 switch (c) {
855 case 'a':
856 auto_thresh = get_llong_from_str(optarg);
857
858 /* set thread stop to auto_thresh */
859 params->stop_total_us = auto_thresh;
860 params->stop_us = auto_thresh;
861
862 /* get stack trace */
863 params->print_stack = auto_thresh;
864
865 /* set trace */
866 params->trace_output = "timerlat_trace.txt";
867
868 break;
869 case 'c':
870 retval = parse_cpu_set(optarg, ¶ms->monitored_cpus);
871 if (retval)
872 timerlat_hist_usage("\nInvalid -c cpu list\n");
873 params->cpus = optarg;
874 break;
875 case 'C':
876 params->cgroup = 1;
877 if (!optarg) {
878 /* will inherit this cgroup */
879 params->cgroup_name = NULL;
880 } else if (*optarg == '=') {
881 /* skip the = */
882 params->cgroup_name = ++optarg;
883 }
884 break;
885 case 'b':
886 params->bucket_size = get_llong_from_str(optarg);
887 if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
888 timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
889 break;
890 case 'D':
891 config_debug = 1;
892 break;
893 case 'd':
894 params->duration = parse_seconds_duration(optarg);
895 if (!params->duration)
896 timerlat_hist_usage("Invalid -D duration\n");
897 break;
898 case 'e':
899 tevent = trace_event_alloc(optarg);
900 if (!tevent) {
901 err_msg("Error alloc trace event");
902 exit(EXIT_FAILURE);
903 }
904
905 if (params->events)
906 tevent->next = params->events;
907
908 params->events = tevent;
909 break;
910 case 'E':
911 params->entries = get_llong_from_str(optarg);
912 if ((params->entries < 10) || (params->entries > 9999999))
913 timerlat_hist_usage("Entries must be > 10 and < 9999999\n");
914 break;
915 case 'h':
916 case '?':
917 timerlat_hist_usage(NULL);
918 break;
919 case 'H':
920 params->hk_cpus = 1;
921 retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set);
922 if (retval) {
923 err_msg("Error parsing house keeping CPUs\n");
924 exit(EXIT_FAILURE);
925 }
926 break;
927 case 'i':
928 params->stop_us = get_llong_from_str(optarg);
929 break;
930 case 'k':
931 params->kernel_workload = 1;
932 break;
933 case 'n':
934 params->output_divisor = 1;
935 break;
936 case 'p':
937 params->timerlat_period_us = get_llong_from_str(optarg);
938 if (params->timerlat_period_us > 1000000)
939 timerlat_hist_usage("Period longer than 1 s\n");
940 break;
941 case 'P':
942 retval = parse_prio(optarg, ¶ms->sched_param);
943 if (retval == -1)
944 timerlat_hist_usage("Invalid -P priority");
945 params->set_sched = 1;
946 break;
947 case 's':
948 params->print_stack = get_llong_from_str(optarg);
949 break;
950 case 'T':
951 params->stop_total_us = get_llong_from_str(optarg);
952 break;
953 case 't':
954 if (optarg) {
955 if (optarg[0] == '=')
956 params->trace_output = &optarg[1];
957 else
958 params->trace_output = &optarg[0];
959 } else if (optind < argc && argv[optind][0] != '-')
960 params->trace_output = argv[optind];
961 else
962 params->trace_output = "timerlat_trace.txt";
963 break;
964 case 'u':
965 params->user_workload = 1;
966 /* fallback: -u implies in -U */
967 case 'U':
968 params->user_data = 1;
969 break;
970 case '0': /* no irq */
971 params->no_irq = 1;
972 break;
973 case '1': /* no thread */
974 params->no_thread = 1;
975 break;
976 case '2': /* no header */
977 params->no_header = 1;
978 break;
979 case '3': /* no summary */
980 params->no_summary = 1;
981 break;
982 case '4': /* no index */
983 params->no_index = 1;
984 break;
985 case '5': /* with zeros */
986 params->with_zeros = 1;
987 break;
988 case '6': /* trigger */
989 if (params->events) {
990 retval = trace_event_add_trigger(params->events, optarg);
991 if (retval) {
992 err_msg("Error adding trigger %s\n", optarg);
993 exit(EXIT_FAILURE);
994 }
995 } else {
996 timerlat_hist_usage("--trigger requires a previous -e\n");
997 }
998 break;
999 case '7': /* filter */
1000 if (params->events) {
1001 retval = trace_event_add_filter(params->events, optarg);
1002 if (retval) {
1003 err_msg("Error adding filter %s\n", optarg);
1004 exit(EXIT_FAILURE);
1005 }
1006 } else {
1007 timerlat_hist_usage("--filter requires a previous -e\n");
1008 }
1009 break;
1010 case '8':
1011 params->dma_latency = get_llong_from_str(optarg);
1012 if (params->dma_latency < 0 || params->dma_latency > 10000) {
1013 err_msg("--dma-latency needs to be >= 0 and < 10000");
1014 exit(EXIT_FAILURE);
1015 }
1016 break;
1017 case '9':
1018 params->no_aa = 1;
1019 break;
1020 case '\1':
1021 params->dump_tasks = 1;
1022 break;
1023 case '\2':
1024 params->warmup = get_llong_from_str(optarg);
1025 break;
1026 case '\3':
1027 params->buffer_size = get_llong_from_str(optarg);
1028 break;
1029 case '\4':
1030 params->deepest_idle_state = get_llong_from_str(optarg);
1031 break;
1032 default:
1033 timerlat_hist_usage("Invalid option");
1034 }
1035 }
1036
1037 if (geteuid()) {
1038 err_msg("rtla needs root permission\n");
1039 exit(EXIT_FAILURE);
1040 }
1041
1042 if (params->no_irq && params->no_thread)
1043 timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here");
1044
1045 if (params->no_index && !params->with_zeros)
1046 timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense");
1047
1048 /*
1049 * Auto analysis only happens if stop tracing, thus:
1050 */
1051 if (!params->stop_us && !params->stop_total_us)
1052 params->no_aa = 1;
1053
1054 if (params->kernel_workload && params->user_workload)
1055 timerlat_hist_usage("--kernel-threads and --user-threads are mutually exclusive!");
1056
1057 return params;
1058 }
1059
1060 /*
1061 * timerlat_hist_apply_config - apply the hist configs to the initialized tool
1062 */
1063 static int
timerlat_hist_apply_config(struct osnoise_tool * tool,struct timerlat_params * params)1064 timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_params *params)
1065 {
1066 int retval;
1067
1068 retval = timerlat_apply_config(tool, params);
1069 if (retval)
1070 goto out_err;
1071
1072 return 0;
1073
1074 out_err:
1075 return -1;
1076 }
1077
1078 /*
1079 * timerlat_init_hist - initialize a timerlat hist tool with parameters
1080 */
1081 static struct osnoise_tool
timerlat_init_hist(struct timerlat_params * params)1082 *timerlat_init_hist(struct timerlat_params *params)
1083 {
1084 struct osnoise_tool *tool;
1085 int nr_cpus;
1086
1087 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1088
1089 tool = osnoise_init_tool("timerlat_hist");
1090 if (!tool)
1091 return NULL;
1092
1093 tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
1094 if (!tool->data)
1095 goto out_err;
1096
1097 tool->params = params;
1098
1099 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
1100 timerlat_hist_handler, tool);
1101
1102 return tool;
1103
1104 out_err:
1105 osnoise_destroy_tool(tool);
1106 return NULL;
1107 }
1108
1109 static int stop_tracing;
1110 static struct trace_instance *hist_inst = NULL;
stop_hist(int sig)1111 static void stop_hist(int sig)
1112 {
1113 if (stop_tracing) {
1114 /*
1115 * Stop requested twice in a row; abort event processing and
1116 * exit immediately
1117 */
1118 tracefs_iterate_stop(hist_inst->inst);
1119 return;
1120 }
1121 stop_tracing = 1;
1122 if (hist_inst)
1123 trace_instance_stop(hist_inst);
1124 }
1125
1126 /*
1127 * timerlat_hist_set_signals - handles the signal to stop the tool
1128 */
1129 static void
timerlat_hist_set_signals(struct timerlat_params * params)1130 timerlat_hist_set_signals(struct timerlat_params *params)
1131 {
1132 signal(SIGINT, stop_hist);
1133 if (params->duration) {
1134 signal(SIGALRM, stop_hist);
1135 alarm(params->duration);
1136 }
1137 }
1138
timerlat_hist_main(int argc,char * argv[])1139 int timerlat_hist_main(int argc, char *argv[])
1140 {
1141 struct timerlat_params *params;
1142 struct osnoise_tool *record = NULL;
1143 struct timerlat_u_params params_u;
1144 enum result return_value = ERROR;
1145 struct osnoise_tool *tool = NULL;
1146 struct osnoise_tool *aa = NULL;
1147 struct trace_instance *trace;
1148 int dma_latency_fd = -1;
1149 pthread_t timerlat_u;
1150 int retval;
1151 int nr_cpus, i;
1152 bool no_bpf = false;
1153
1154 params = timerlat_hist_parse_args(argc, argv);
1155 if (!params)
1156 exit(1);
1157
1158 tool = timerlat_init_hist(params);
1159 if (!tool) {
1160 err_msg("Could not init osnoise hist\n");
1161 goto out_exit;
1162 }
1163
1164 retval = timerlat_hist_apply_config(tool, params);
1165 if (retval) {
1166 err_msg("Could not apply config\n");
1167 goto out_free;
1168 }
1169
1170 trace = &tool->trace;
1171 /*
1172 * Save trace instance into global variable so that SIGINT can stop
1173 * the timerlat tracer.
1174 * Otherwise, rtla could loop indefinitely when overloaded.
1175 */
1176 hist_inst = trace;
1177
1178 if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) {
1179 debug_msg("RTLA_NO_BPF set, disabling BPF\n");
1180 no_bpf = true;
1181 }
1182
1183 if (!no_bpf && !tep_find_event_by_name(trace->tep, "osnoise", "timerlat_sample")) {
1184 debug_msg("osnoise:timerlat_sample missing, disabling BPF\n");
1185 no_bpf = true;
1186 }
1187
1188 if (!no_bpf) {
1189 retval = timerlat_bpf_init(params);
1190 if (retval) {
1191 debug_msg("Could not enable BPF\n");
1192 no_bpf = true;
1193 }
1194 }
1195
1196 retval = enable_timerlat(trace);
1197 if (retval) {
1198 err_msg("Failed to enable timerlat tracer\n");
1199 goto out_free;
1200 }
1201
1202 if (params->set_sched) {
1203 retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param);
1204 if (retval) {
1205 err_msg("Failed to set sched parameters\n");
1206 goto out_free;
1207 }
1208 }
1209
1210 if (params->cgroup && !params->user_workload) {
1211 retval = set_comm_cgroup("timerlat/", params->cgroup_name);
1212 if (!retval) {
1213 err_msg("Failed to move threads to cgroup\n");
1214 goto out_free;
1215 }
1216 }
1217
1218 if (params->dma_latency >= 0) {
1219 dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
1220 if (dma_latency_fd < 0) {
1221 err_msg("Could not set /dev/cpu_dma_latency.\n");
1222 goto out_free;
1223 }
1224 }
1225
1226 if (params->deepest_idle_state >= -1) {
1227 if (!have_libcpupower_support()) {
1228 err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n");
1229 goto out_free;
1230 }
1231
1232 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1233
1234 for (i = 0; i < nr_cpus; i++) {
1235 if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus))
1236 continue;
1237 if (save_cpu_idle_disable_state(i) < 0) {
1238 err_msg("Could not save cpu idle state.\n");
1239 goto out_free;
1240 }
1241 if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) {
1242 err_msg("Could not set deepest cpu idle state.\n");
1243 goto out_free;
1244 }
1245 }
1246 }
1247
1248 if (params->trace_output) {
1249 record = osnoise_init_trace_tool("timerlat");
1250 if (!record) {
1251 err_msg("Failed to enable the trace instance\n");
1252 goto out_free;
1253 }
1254
1255 if (params->events) {
1256 retval = trace_events_enable(&record->trace, params->events);
1257 if (retval)
1258 goto out_hist;
1259 }
1260
1261 if (params->buffer_size > 0) {
1262 retval = trace_set_buffer_size(&record->trace, params->buffer_size);
1263 if (retval)
1264 goto out_hist;
1265 }
1266 }
1267
1268 if (!params->no_aa) {
1269 aa = osnoise_init_tool("timerlat_aa");
1270 if (!aa)
1271 goto out_hist;
1272
1273 retval = timerlat_aa_init(aa, params->dump_tasks);
1274 if (retval) {
1275 err_msg("Failed to enable the auto analysis instance\n");
1276 goto out_hist;
1277 }
1278
1279 retval = enable_timerlat(&aa->trace);
1280 if (retval) {
1281 err_msg("Failed to enable timerlat tracer\n");
1282 goto out_hist;
1283 }
1284 }
1285
1286 if (params->user_workload) {
1287 /* rtla asked to stop */
1288 params_u.should_run = 1;
1289 /* all threads left */
1290 params_u.stopped_running = 0;
1291
1292 params_u.set = ¶ms->monitored_cpus;
1293 if (params->set_sched)
1294 params_u.sched_param = ¶ms->sched_param;
1295 else
1296 params_u.sched_param = NULL;
1297
1298 params_u.cgroup_name = params->cgroup_name;
1299
1300 retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u);
1301 if (retval)
1302 err_msg("Error creating timerlat user-space threads\n");
1303 }
1304
1305 if (params->warmup > 0) {
1306 debug_msg("Warming up for %d seconds\n", params->warmup);
1307 sleep(params->warmup);
1308 if (stop_tracing)
1309 goto out_hist;
1310 }
1311
1312 /*
1313 * Start the tracers here, after having set all instances.
1314 *
1315 * Let the trace instance start first for the case of hitting a stop
1316 * tracing while enabling other instances. The trace instance is the
1317 * one with most valuable information.
1318 */
1319 if (params->trace_output)
1320 trace_instance_start(&record->trace);
1321 if (!params->no_aa)
1322 trace_instance_start(&aa->trace);
1323 if (no_bpf) {
1324 trace_instance_start(trace);
1325 } else {
1326 retval = timerlat_bpf_attach();
1327 if (retval) {
1328 err_msg("Error attaching BPF program\n");
1329 goto out_hist;
1330 }
1331 }
1332
1333 tool->start_time = time(NULL);
1334 timerlat_hist_set_signals(params);
1335
1336 if (no_bpf) {
1337 while (!stop_tracing) {
1338 sleep(params->sleep_time);
1339
1340 retval = tracefs_iterate_raw_events(trace->tep,
1341 trace->inst,
1342 NULL,
1343 0,
1344 collect_registered_events,
1345 trace);
1346 if (retval < 0) {
1347 err_msg("Error iterating on events\n");
1348 goto out_hist;
1349 }
1350
1351 if (osnoise_trace_is_off(tool, record))
1352 break;
1353
1354 /* is there still any user-threads ? */
1355 if (params->user_workload) {
1356 if (params_u.stopped_running) {
1357 debug_msg("timerlat user-space threads stopped!\n");
1358 break;
1359 }
1360 }
1361 }
1362 } else
1363 timerlat_bpf_wait(-1);
1364
1365 if (!no_bpf) {
1366 timerlat_bpf_detach();
1367 retval = timerlat_hist_bpf_pull_data(tool);
1368 if (retval) {
1369 err_msg("Error pulling BPF data\n");
1370 goto out_hist;
1371 }
1372 }
1373
1374 if (params->user_workload && !params_u.stopped_running) {
1375 params_u.should_run = 0;
1376 sleep(1);
1377 }
1378
1379 timerlat_print_stats(params, tool);
1380
1381 return_value = PASSED;
1382
1383 if (osnoise_trace_is_off(tool, record) && !stop_tracing) {
1384 printf("rtla timerlat hit stop tracing\n");
1385
1386 if (!params->no_aa)
1387 timerlat_auto_analysis(params->stop_us, params->stop_total_us);
1388
1389 save_trace_to_file(record ? record->trace.inst : NULL,
1390 params->trace_output);
1391 return_value = FAILED;
1392 }
1393
1394 out_hist:
1395 timerlat_aa_destroy();
1396 if (dma_latency_fd >= 0)
1397 close(dma_latency_fd);
1398 if (params->deepest_idle_state >= -1) {
1399 for (i = 0; i < nr_cpus; i++) {
1400 if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus))
1401 continue;
1402 restore_cpu_idle_disable_state(i);
1403 }
1404 }
1405 trace_events_destroy(&record->trace, params->events);
1406 params->events = NULL;
1407 out_free:
1408 timerlat_free_histogram(tool->data);
1409 osnoise_destroy_tool(aa);
1410 osnoise_destroy_tool(record);
1411 osnoise_destroy_tool(tool);
1412 free(params);
1413 free_cpu_idle_disable_states();
1414 if (!no_bpf)
1415 timerlat_bpf_destroy();
1416 out_exit:
1417 exit(return_value);
1418 }
1419