1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #define _GNU_SOURCE 7 #include <getopt.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <signal.h> 11 #include <unistd.h> 12 #include <stdio.h> 13 #include <time.h> 14 #include <errno.h> 15 #include <sched.h> 16 #include <pthread.h> 17 18 #include "utils.h" 19 #include "osnoise.h" 20 #include "timerlat.h" 21 #include "timerlat_aa.h" 22 #include "timerlat_u.h" 23 24 struct timerlat_top_params { 25 char *cpus; 26 cpu_set_t monitored_cpus; 27 char *trace_output; 28 char *cgroup_name; 29 unsigned long long runtime; 30 long long stop_us; 31 long long stop_total_us; 32 long long timerlat_period_us; 33 long long print_stack; 34 int sleep_time; 35 int output_divisor; 36 int duration; 37 int quiet; 38 int set_sched; 39 int dma_latency; 40 int no_aa; 41 int aa_only; 42 int dump_tasks; 43 int cgroup; 44 int hk_cpus; 45 int user_top; 46 int user_workload; 47 int kernel_workload; 48 int pretty_output; 49 int warmup; 50 int buffer_size; 51 int deepest_idle_state; 52 cpu_set_t hk_cpu_set; 53 struct sched_attr sched_param; 54 struct trace_events *events; 55 }; 56 57 struct timerlat_top_cpu { 58 unsigned long long irq_count; 59 unsigned long long thread_count; 60 unsigned long long user_count; 61 62 unsigned long long cur_irq; 63 unsigned long long min_irq; 64 unsigned long long sum_irq; 65 unsigned long long max_irq; 66 67 unsigned long long cur_thread; 68 unsigned long long min_thread; 69 unsigned long long sum_thread; 70 unsigned long long max_thread; 71 72 unsigned long long cur_user; 73 unsigned long long min_user; 74 unsigned long long sum_user; 75 unsigned long long max_user; 76 }; 77 78 struct timerlat_top_data { 79 struct timerlat_top_cpu *cpu_data; 80 int nr_cpus; 81 }; 82 83 /* 84 * timerlat_free_top - free runtime data 85 */ 86 static void 87 timerlat_free_top(struct timerlat_top_data *data) 88 { 89 free(data->cpu_data); 90 free(data); 91 } 92 93 /* 94 * timerlat_alloc_histogram - alloc runtime data 95 */ 96 static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) 97 { 98 struct timerlat_top_data *data; 99 int cpu; 100 101 data = calloc(1, sizeof(*data)); 102 if (!data) 103 return NULL; 104 105 data->nr_cpus = nr_cpus; 106 107 /* one set of histograms per CPU */ 108 data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); 109 if (!data->cpu_data) 110 goto cleanup; 111 112 /* set the min to max */ 113 for (cpu = 0; cpu < nr_cpus; cpu++) { 114 data->cpu_data[cpu].min_irq = ~0; 115 data->cpu_data[cpu].min_thread = ~0; 116 data->cpu_data[cpu].min_user = ~0; 117 } 118 119 return data; 120 121 cleanup: 122 timerlat_free_top(data); 123 return NULL; 124 } 125 126 static void 127 timerlat_top_reset_sum(struct timerlat_top_cpu *summary) 128 { 129 memset(summary, 0, sizeof(*summary)); 130 summary->min_irq = ~0; 131 summary->min_thread = ~0; 132 summary->min_user = ~0; 133 } 134 135 static void 136 timerlat_top_update_sum(struct osnoise_tool *tool, int cpu, struct timerlat_top_cpu *sum) 137 { 138 struct timerlat_top_data *data = tool->data; 139 struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; 140 141 sum->irq_count += cpu_data->irq_count; 142 update_min(&sum->min_irq, &cpu_data->min_irq); 143 update_sum(&sum->sum_irq, &cpu_data->sum_irq); 144 update_max(&sum->max_irq, &cpu_data->max_irq); 145 146 sum->thread_count += cpu_data->thread_count; 147 update_min(&sum->min_thread, &cpu_data->min_thread); 148 update_sum(&sum->sum_thread, &cpu_data->sum_thread); 149 update_max(&sum->max_thread, &cpu_data->max_thread); 150 151 sum->user_count += cpu_data->user_count; 152 update_min(&sum->min_user, &cpu_data->min_user); 153 update_sum(&sum->sum_user, &cpu_data->sum_user); 154 update_max(&sum->max_user, &cpu_data->max_user); 155 } 156 157 /* 158 * timerlat_hist_update - record a new timerlat occurent on cpu, updating data 159 */ 160 static void 161 timerlat_top_update(struct osnoise_tool *tool, int cpu, 162 unsigned long long thread, 163 unsigned long long latency) 164 { 165 struct timerlat_top_data *data = tool->data; 166 struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; 167 168 if (!thread) { 169 cpu_data->irq_count++; 170 cpu_data->cur_irq = latency; 171 update_min(&cpu_data->min_irq, &latency); 172 update_sum(&cpu_data->sum_irq, &latency); 173 update_max(&cpu_data->max_irq, &latency); 174 } else if (thread == 1) { 175 cpu_data->thread_count++; 176 cpu_data->cur_thread = latency; 177 update_min(&cpu_data->min_thread, &latency); 178 update_sum(&cpu_data->sum_thread, &latency); 179 update_max(&cpu_data->max_thread, &latency); 180 } else { 181 cpu_data->user_count++; 182 cpu_data->cur_user = latency; 183 update_min(&cpu_data->min_user, &latency); 184 update_sum(&cpu_data->sum_user, &latency); 185 update_max(&cpu_data->max_user, &latency); 186 } 187 } 188 189 /* 190 * timerlat_top_handler - this is the handler for timerlat tracer events 191 */ 192 static int 193 timerlat_top_handler(struct trace_seq *s, struct tep_record *record, 194 struct tep_event *event, void *context) 195 { 196 struct trace_instance *trace = context; 197 struct timerlat_top_params *params; 198 unsigned long long latency, thread; 199 struct osnoise_tool *top; 200 int cpu = record->cpu; 201 202 top = container_of(trace, struct osnoise_tool, trace); 203 params = top->params; 204 205 if (!params->aa_only) { 206 tep_get_field_val(s, event, "context", record, &thread, 1); 207 tep_get_field_val(s, event, "timer_latency", record, &latency, 1); 208 209 timerlat_top_update(top, cpu, thread, latency); 210 } 211 212 return 0; 213 } 214 215 /* 216 * timerlat_top_header - print the header of the tool output 217 */ 218 static void timerlat_top_header(struct timerlat_top_params *params, struct osnoise_tool *top) 219 { 220 struct trace_seq *s = top->trace.seq; 221 char duration[26]; 222 223 get_duration(top->start_time, duration, sizeof(duration)); 224 225 if (params->pretty_output) 226 trace_seq_printf(s, "\033[2;37;40m"); 227 228 trace_seq_printf(s, " Timer Latency "); 229 if (params->user_top) 230 trace_seq_printf(s, " "); 231 232 if (params->pretty_output) 233 trace_seq_printf(s, "\033[0;0;0m"); 234 trace_seq_printf(s, "\n"); 235 236 trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, 237 params->output_divisor == 1 ? "ns" : "us", 238 params->output_divisor == 1 ? "ns" : "us"); 239 240 if (params->user_top) { 241 trace_seq_printf(s, " | Ret user Timer Latency (%s)", 242 params->output_divisor == 1 ? "ns" : "us"); 243 } 244 245 trace_seq_printf(s, "\n"); 246 if (params->pretty_output) 247 trace_seq_printf(s, "\033[2;30;47m"); 248 249 trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); 250 if (params->user_top) 251 trace_seq_printf(s, " | cur min avg max"); 252 253 if (params->pretty_output) 254 trace_seq_printf(s, "\033[0;0;0m"); 255 trace_seq_printf(s, "\n"); 256 } 257 258 static const char *no_value = " -"; 259 260 /* 261 * timerlat_top_print - prints the output of a given CPU 262 */ 263 static void timerlat_top_print(struct osnoise_tool *top, int cpu) 264 { 265 266 struct timerlat_top_params *params = top->params; 267 struct timerlat_top_data *data = top->data; 268 struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; 269 int divisor = params->output_divisor; 270 struct trace_seq *s = top->trace.seq; 271 272 if (divisor == 0) 273 return; 274 275 /* 276 * Skip if no data is available: is this cpu offline? 277 */ 278 if (!cpu_data->irq_count && !cpu_data->thread_count) 279 return; 280 281 /* 282 * Unless trace is being lost, IRQ counter is always the max. 283 */ 284 trace_seq_printf(s, "%3d #%-9llu |", cpu, cpu_data->irq_count); 285 286 if (!cpu_data->irq_count) { 287 trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); 288 } else { 289 trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); 290 trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); 291 trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); 292 trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); 293 } 294 295 if (!cpu_data->thread_count) { 296 trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); 297 } else { 298 trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); 299 trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); 300 trace_seq_printf(s, "%9llu ", 301 (cpu_data->sum_thread / cpu_data->thread_count) / divisor); 302 trace_seq_printf(s, "%9llu", cpu_data->max_thread / divisor); 303 } 304 305 if (!params->user_top) { 306 trace_seq_printf(s, "\n"); 307 return; 308 } 309 310 trace_seq_printf(s, " |"); 311 312 if (!cpu_data->user_count) { 313 trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value); 314 } else { 315 trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); 316 trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor); 317 trace_seq_printf(s, "%9llu ", 318 (cpu_data->sum_user / cpu_data->user_count) / divisor); 319 trace_seq_printf(s, "%9llu\n", cpu_data->max_user / divisor); 320 } 321 } 322 323 /* 324 * timerlat_top_print_sum - prints the summary output 325 */ 326 static void 327 timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summary) 328 { 329 const char *split = "----------------------------------------"; 330 struct timerlat_top_params *params = top->params; 331 unsigned long long count = summary->irq_count; 332 int divisor = params->output_divisor; 333 struct trace_seq *s = top->trace.seq; 334 int e = 0; 335 336 if (divisor == 0) 337 return; 338 339 /* 340 * Skip if no data is available: is this cpu offline? 341 */ 342 if (!summary->irq_count && !summary->thread_count) 343 return; 344 345 while (count > 999999) { 346 e++; 347 count /= 10; 348 } 349 350 trace_seq_printf(s, "%.*s|%.*s|%.*s", 15, split, 40, split, 39, split); 351 if (params->user_top) 352 trace_seq_printf(s, "-|%.*s", 39, split); 353 trace_seq_printf(s, "\n"); 354 355 trace_seq_printf(s, "ALL #%-6llu e%d |", count, e); 356 357 if (!summary->irq_count) { 358 trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); 359 } else { 360 trace_seq_printf(s, " "); 361 trace_seq_printf(s, "%9llu ", summary->min_irq / params->output_divisor); 362 trace_seq_printf(s, "%9llu ", (summary->sum_irq / summary->irq_count) / divisor); 363 trace_seq_printf(s, "%9llu |", summary->max_irq / divisor); 364 } 365 366 if (!summary->thread_count) { 367 trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); 368 } else { 369 trace_seq_printf(s, " "); 370 trace_seq_printf(s, "%9llu ", summary->min_thread / divisor); 371 trace_seq_printf(s, "%9llu ", 372 (summary->sum_thread / summary->thread_count) / divisor); 373 trace_seq_printf(s, "%9llu", summary->max_thread / divisor); 374 } 375 376 if (!params->user_top) { 377 trace_seq_printf(s, "\n"); 378 return; 379 } 380 381 trace_seq_printf(s, " |"); 382 383 if (!summary->user_count) { 384 trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); 385 } else { 386 trace_seq_printf(s, " "); 387 trace_seq_printf(s, "%9llu ", summary->min_user / divisor); 388 trace_seq_printf(s, "%9llu ", 389 (summary->sum_user / summary->user_count) / divisor); 390 trace_seq_printf(s, "%9llu\n", summary->max_user / divisor); 391 } 392 } 393 394 /* 395 * clear_terminal - clears the output terminal 396 */ 397 static void clear_terminal(struct trace_seq *seq) 398 { 399 if (!config_debug) 400 trace_seq_printf(seq, "\033c"); 401 } 402 403 /* 404 * timerlat_print_stats - print data for all cpus 405 */ 406 static void 407 timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) 408 { 409 struct trace_instance *trace = &top->trace; 410 struct timerlat_top_cpu summary; 411 static int nr_cpus = -1; 412 int i; 413 414 if (params->aa_only) 415 return; 416 417 if (nr_cpus == -1) 418 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 419 420 if (!params->quiet) 421 clear_terminal(trace->seq); 422 423 timerlat_top_reset_sum(&summary); 424 425 timerlat_top_header(params, top); 426 427 for (i = 0; i < nr_cpus; i++) { 428 if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) 429 continue; 430 timerlat_top_print(top, i); 431 timerlat_top_update_sum(top, i, &summary); 432 } 433 434 timerlat_top_print_sum(top, &summary); 435 436 trace_seq_do_printf(trace->seq); 437 trace_seq_reset(trace->seq); 438 osnoise_report_missed_events(top); 439 } 440 441 /* 442 * timerlat_top_usage - prints timerlat top usage message 443 */ 444 static void timerlat_top_usage(char *usage) 445 { 446 int i; 447 448 static const char *const msg[] = { 449 "", 450 " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", 451 " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", 452 " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", 453 "", 454 " -h/--help: print this menu", 455 " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", 456 " --aa-only us: stop if <us> latency is hit, only printing the auto analysis (reduces CPU usage)", 457 " -p/--period us: timerlat period in us", 458 " -i/--irq us: stop trace if the irq latency is higher than the argument in us", 459 " -T/--thread us: stop trace if the thread latency is higher than the argument in us", 460 " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", 461 " -c/--cpus cpus: run the tracer only on the given cpus", 462 " -H/--house-keeping cpus: run rtla control threads only on the given cpus", 463 " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", 464 " -d/--duration time[s|m|h|d]: duration of the session", 465 " -D/--debug: print debug info", 466 " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", 467 " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]", 468 " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", 469 " --filter <command>: enable a trace event filter to the previous -e event", 470 " --trigger <command>: enable a trace event trigger to the previous -e event", 471 " -n/--nano: display data in nanoseconds", 472 " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", 473 " -q/--quiet print only a summary at the end", 474 " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency", 475 " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", 476 " o:prio - use SCHED_OTHER with prio", 477 " r:prio - use SCHED_RR with prio", 478 " f:prio - use SCHED_FIFO with prio", 479 " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", 480 " in nanoseconds", 481 " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", 482 " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", 483 " -U/--user-load: enable timerlat for user-defined user-space workload", 484 " --warm-up s: let the workload run for s seconds before collecting data", 485 " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", 486 " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", 487 NULL, 488 }; 489 490 if (usage) 491 fprintf(stderr, "%s\n", usage); 492 493 fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", 494 VERSION); 495 496 for (i = 0; msg[i]; i++) 497 fprintf(stderr, "%s\n", msg[i]); 498 499 if (usage) 500 exit(EXIT_FAILURE); 501 502 exit(EXIT_SUCCESS); 503 } 504 505 /* 506 * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters 507 */ 508 static struct timerlat_top_params 509 *timerlat_top_parse_args(int argc, char **argv) 510 { 511 struct timerlat_top_params *params; 512 struct trace_events *tevent; 513 long long auto_thresh; 514 int retval; 515 int c; 516 517 params = calloc(1, sizeof(*params)); 518 if (!params) 519 exit(1); 520 521 /* disabled by default */ 522 params->dma_latency = -1; 523 524 /* disabled by default */ 525 params->deepest_idle_state = -2; 526 527 /* display data in microseconds */ 528 params->output_divisor = 1000; 529 530 while (1) { 531 static struct option long_options[] = { 532 {"auto", required_argument, 0, 'a'}, 533 {"cpus", required_argument, 0, 'c'}, 534 {"cgroup", optional_argument, 0, 'C'}, 535 {"debug", no_argument, 0, 'D'}, 536 {"duration", required_argument, 0, 'd'}, 537 {"event", required_argument, 0, 'e'}, 538 {"help", no_argument, 0, 'h'}, 539 {"house-keeping", required_argument, 0, 'H'}, 540 {"irq", required_argument, 0, 'i'}, 541 {"nano", no_argument, 0, 'n'}, 542 {"period", required_argument, 0, 'p'}, 543 {"priority", required_argument, 0, 'P'}, 544 {"quiet", no_argument, 0, 'q'}, 545 {"stack", required_argument, 0, 's'}, 546 {"thread", required_argument, 0, 'T'}, 547 {"trace", optional_argument, 0, 't'}, 548 {"user-threads", no_argument, 0, 'u'}, 549 {"kernel-threads", no_argument, 0, 'k'}, 550 {"user-load", no_argument, 0, 'U'}, 551 {"trigger", required_argument, 0, '0'}, 552 {"filter", required_argument, 0, '1'}, 553 {"dma-latency", required_argument, 0, '2'}, 554 {"no-aa", no_argument, 0, '3'}, 555 {"dump-tasks", no_argument, 0, '4'}, 556 {"aa-only", required_argument, 0, '5'}, 557 {"warm-up", required_argument, 0, '6'}, 558 {"trace-buffer-size", required_argument, 0, '7'}, 559 {"deepest-idle-state", required_argument, 0, '8'}, 560 {0, 0, 0, 0} 561 }; 562 563 /* getopt_long stores the option index here. */ 564 int option_index = 0; 565 566 c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:7:", 567 long_options, &option_index); 568 569 /* detect the end of the options. */ 570 if (c == -1) 571 break; 572 573 switch (c) { 574 case 'a': 575 auto_thresh = get_llong_from_str(optarg); 576 577 /* set thread stop to auto_thresh */ 578 params->stop_total_us = auto_thresh; 579 params->stop_us = auto_thresh; 580 581 /* get stack trace */ 582 params->print_stack = auto_thresh; 583 584 /* set trace */ 585 params->trace_output = "timerlat_trace.txt"; 586 break; 587 case '5': 588 /* it is here because it is similar to -a */ 589 auto_thresh = get_llong_from_str(optarg); 590 591 /* set thread stop to auto_thresh */ 592 params->stop_total_us = auto_thresh; 593 params->stop_us = auto_thresh; 594 595 /* get stack trace */ 596 params->print_stack = auto_thresh; 597 598 /* set aa_only to avoid parsing the trace */ 599 params->aa_only = 1; 600 break; 601 case 'c': 602 retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); 603 if (retval) 604 timerlat_top_usage("\nInvalid -c cpu list\n"); 605 params->cpus = optarg; 606 break; 607 case 'C': 608 params->cgroup = 1; 609 if (!optarg) { 610 /* will inherit this cgroup */ 611 params->cgroup_name = NULL; 612 } else if (*optarg == '=') { 613 /* skip the = */ 614 params->cgroup_name = ++optarg; 615 } 616 break; 617 case 'D': 618 config_debug = 1; 619 break; 620 case 'd': 621 params->duration = parse_seconds_duration(optarg); 622 if (!params->duration) 623 timerlat_top_usage("Invalid -d duration\n"); 624 break; 625 case 'e': 626 tevent = trace_event_alloc(optarg); 627 if (!tevent) { 628 err_msg("Error alloc trace event"); 629 exit(EXIT_FAILURE); 630 } 631 632 if (params->events) 633 tevent->next = params->events; 634 params->events = tevent; 635 break; 636 case 'h': 637 case '?': 638 timerlat_top_usage(NULL); 639 break; 640 case 'H': 641 params->hk_cpus = 1; 642 retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); 643 if (retval) { 644 err_msg("Error parsing house keeping CPUs\n"); 645 exit(EXIT_FAILURE); 646 } 647 break; 648 case 'i': 649 params->stop_us = get_llong_from_str(optarg); 650 break; 651 case 'k': 652 params->kernel_workload = true; 653 break; 654 case 'n': 655 params->output_divisor = 1; 656 break; 657 case 'p': 658 params->timerlat_period_us = get_llong_from_str(optarg); 659 if (params->timerlat_period_us > 1000000) 660 timerlat_top_usage("Period longer than 1 s\n"); 661 break; 662 case 'P': 663 retval = parse_prio(optarg, ¶ms->sched_param); 664 if (retval == -1) 665 timerlat_top_usage("Invalid -P priority"); 666 params->set_sched = 1; 667 break; 668 case 'q': 669 params->quiet = 1; 670 break; 671 case 's': 672 params->print_stack = get_llong_from_str(optarg); 673 break; 674 case 'T': 675 params->stop_total_us = get_llong_from_str(optarg); 676 break; 677 case 't': 678 if (optarg) { 679 if (optarg[0] == '=') 680 params->trace_output = &optarg[1]; 681 else 682 params->trace_output = &optarg[0]; 683 } else if (optind < argc && argv[optind][0] != '-') 684 params->trace_output = argv[optind]; 685 else 686 params->trace_output = "timerlat_trace.txt"; 687 688 break; 689 case 'u': 690 params->user_workload = true; 691 /* fallback: -u implies -U */ 692 case 'U': 693 params->user_top = true; 694 break; 695 case '0': /* trigger */ 696 if (params->events) { 697 retval = trace_event_add_trigger(params->events, optarg); 698 if (retval) { 699 err_msg("Error adding trigger %s\n", optarg); 700 exit(EXIT_FAILURE); 701 } 702 } else { 703 timerlat_top_usage("--trigger requires a previous -e\n"); 704 } 705 break; 706 case '1': /* filter */ 707 if (params->events) { 708 retval = trace_event_add_filter(params->events, optarg); 709 if (retval) { 710 err_msg("Error adding filter %s\n", optarg); 711 exit(EXIT_FAILURE); 712 } 713 } else { 714 timerlat_top_usage("--filter requires a previous -e\n"); 715 } 716 break; 717 case '2': /* dma-latency */ 718 params->dma_latency = get_llong_from_str(optarg); 719 if (params->dma_latency < 0 || params->dma_latency > 10000) { 720 err_msg("--dma-latency needs to be >= 0 and < 10000"); 721 exit(EXIT_FAILURE); 722 } 723 break; 724 case '3': /* no-aa */ 725 params->no_aa = 1; 726 break; 727 case '4': 728 params->dump_tasks = 1; 729 break; 730 case '6': 731 params->warmup = get_llong_from_str(optarg); 732 break; 733 case '7': 734 params->buffer_size = get_llong_from_str(optarg); 735 break; 736 case '8': 737 params->deepest_idle_state = get_llong_from_str(optarg); 738 break; 739 default: 740 timerlat_top_usage("Invalid option"); 741 } 742 } 743 744 if (geteuid()) { 745 err_msg("rtla needs root permission\n"); 746 exit(EXIT_FAILURE); 747 } 748 749 /* 750 * Auto analysis only happens if stop tracing, thus: 751 */ 752 if (!params->stop_us && !params->stop_total_us) 753 params->no_aa = 1; 754 755 if (params->no_aa && params->aa_only) 756 timerlat_top_usage("--no-aa and --aa-only are mutually exclusive!"); 757 758 if (params->kernel_workload && params->user_workload) 759 timerlat_top_usage("--kernel-threads and --user-threads are mutually exclusive!"); 760 761 return params; 762 } 763 764 /* 765 * timerlat_top_apply_config - apply the top configs to the initialized tool 766 */ 767 static int 768 timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) 769 { 770 int retval; 771 int i; 772 773 if (!params->sleep_time) 774 params->sleep_time = 1; 775 776 if (params->cpus) { 777 retval = osnoise_set_cpus(top->context, params->cpus); 778 if (retval) { 779 err_msg("Failed to apply CPUs config\n"); 780 goto out_err; 781 } 782 } else { 783 for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) 784 CPU_SET(i, ¶ms->monitored_cpus); 785 } 786 787 if (params->stop_us) { 788 retval = osnoise_set_stop_us(top->context, params->stop_us); 789 if (retval) { 790 err_msg("Failed to set stop us\n"); 791 goto out_err; 792 } 793 } 794 795 if (params->stop_total_us) { 796 retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); 797 if (retval) { 798 err_msg("Failed to set stop total us\n"); 799 goto out_err; 800 } 801 } 802 803 804 if (params->timerlat_period_us) { 805 retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); 806 if (retval) { 807 err_msg("Failed to set timerlat period\n"); 808 goto out_err; 809 } 810 } 811 812 813 if (params->print_stack) { 814 retval = osnoise_set_print_stack(top->context, params->print_stack); 815 if (retval) { 816 err_msg("Failed to set print stack\n"); 817 goto out_err; 818 } 819 } 820 821 if (params->hk_cpus) { 822 retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), 823 ¶ms->hk_cpu_set); 824 if (retval == -1) { 825 err_msg("Failed to set rtla to the house keeping CPUs\n"); 826 goto out_err; 827 } 828 } else if (params->cpus) { 829 /* 830 * Even if the user do not set a house-keeping CPU, try to 831 * move rtla to a CPU set different to the one where the user 832 * set the workload to run. 833 * 834 * No need to check results as this is an automatic attempt. 835 */ 836 auto_house_keeping(¶ms->monitored_cpus); 837 } 838 839 /* 840 * If the user did not specify a type of thread, try user-threads first. 841 * Fall back to kernel threads otherwise. 842 */ 843 if (!params->kernel_workload && !params->user_top) { 844 retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); 845 if (retval) { 846 debug_msg("User-space interface detected, setting user-threads\n"); 847 params->user_workload = 1; 848 params->user_top = 1; 849 } else { 850 debug_msg("User-space interface not detected, setting kernel-threads\n"); 851 params->kernel_workload = 1; 852 } 853 } 854 855 /* 856 * Set workload according to type of thread if the kernel supports it. 857 * On kernels without support, user threads will have already failed 858 * on missing timerlat_fd, and kernel threads do not need it. 859 */ 860 retval = osnoise_set_workload(top->context, params->kernel_workload); 861 if (retval < -1) { 862 err_msg("Failed to set OSNOISE_WORKLOAD option\n"); 863 goto out_err; 864 } 865 866 if (isatty(STDOUT_FILENO) && !params->quiet) 867 params->pretty_output = 1; 868 869 return 0; 870 871 out_err: 872 return -1; 873 } 874 875 /* 876 * timerlat_init_top - initialize a timerlat top tool with parameters 877 */ 878 static struct osnoise_tool 879 *timerlat_init_top(struct timerlat_top_params *params) 880 { 881 struct osnoise_tool *top; 882 int nr_cpus; 883 884 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 885 886 top = osnoise_init_tool("timerlat_top"); 887 if (!top) 888 return NULL; 889 890 top->data = timerlat_alloc_top(nr_cpus); 891 if (!top->data) 892 goto out_err; 893 894 top->params = params; 895 896 tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", 897 timerlat_top_handler, top); 898 899 return top; 900 901 out_err: 902 osnoise_destroy_tool(top); 903 return NULL; 904 } 905 906 static int stop_tracing; 907 static struct trace_instance *top_inst = NULL; 908 static void stop_top(int sig) 909 { 910 if (stop_tracing) { 911 /* 912 * Stop requested twice in a row; abort event processing and 913 * exit immediately 914 */ 915 tracefs_iterate_stop(top_inst->inst); 916 return; 917 } 918 stop_tracing = 1; 919 if (top_inst) 920 trace_instance_stop(top_inst); 921 } 922 923 /* 924 * timerlat_top_set_signals - handles the signal to stop the tool 925 */ 926 static void 927 timerlat_top_set_signals(struct timerlat_top_params *params) 928 { 929 signal(SIGINT, stop_top); 930 if (params->duration) { 931 signal(SIGALRM, stop_top); 932 alarm(params->duration); 933 } 934 } 935 936 int timerlat_top_main(int argc, char *argv[]) 937 { 938 struct timerlat_top_params *params; 939 struct osnoise_tool *record = NULL; 940 struct timerlat_u_params params_u; 941 struct osnoise_tool *top = NULL; 942 struct osnoise_tool *aa = NULL; 943 struct trace_instance *trace; 944 int dma_latency_fd = -1; 945 pthread_t timerlat_u; 946 int return_value = 1; 947 char *max_lat; 948 int retval; 949 int nr_cpus, i; 950 951 params = timerlat_top_parse_args(argc, argv); 952 if (!params) 953 exit(1); 954 955 top = timerlat_init_top(params); 956 if (!top) { 957 err_msg("Could not init osnoise top\n"); 958 goto out_exit; 959 } 960 961 retval = timerlat_top_apply_config(top, params); 962 if (retval) { 963 err_msg("Could not apply config\n"); 964 goto out_free; 965 } 966 967 trace = &top->trace; 968 /* 969 * Save trace instance into global variable so that SIGINT can stop 970 * the timerlat tracer. 971 * Otherwise, rtla could loop indefinitely when overloaded. 972 */ 973 top_inst = trace; 974 975 976 retval = enable_timerlat(trace); 977 if (retval) { 978 err_msg("Failed to enable timerlat tracer\n"); 979 goto out_free; 980 } 981 982 if (params->set_sched) { 983 retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); 984 if (retval) { 985 err_msg("Failed to set sched parameters\n"); 986 goto out_free; 987 } 988 } 989 990 if (params->cgroup && !params->user_top) { 991 retval = set_comm_cgroup("timerlat/", params->cgroup_name); 992 if (!retval) { 993 err_msg("Failed to move threads to cgroup\n"); 994 goto out_free; 995 } 996 } 997 998 if (params->dma_latency >= 0) { 999 dma_latency_fd = set_cpu_dma_latency(params->dma_latency); 1000 if (dma_latency_fd < 0) { 1001 err_msg("Could not set /dev/cpu_dma_latency.\n"); 1002 goto out_free; 1003 } 1004 } 1005 1006 if (params->deepest_idle_state >= -1) { 1007 if (!have_libcpupower_support()) { 1008 err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); 1009 goto out_free; 1010 } 1011 1012 nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 1013 1014 for (i = 0; i < nr_cpus; i++) { 1015 if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) 1016 continue; 1017 if (save_cpu_idle_disable_state(i) < 0) { 1018 err_msg("Could not save cpu idle state.\n"); 1019 goto out_free; 1020 } 1021 if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { 1022 err_msg("Could not set deepest cpu idle state.\n"); 1023 goto out_free; 1024 } 1025 } 1026 } 1027 1028 if (params->trace_output) { 1029 record = osnoise_init_trace_tool("timerlat"); 1030 if (!record) { 1031 err_msg("Failed to enable the trace instance\n"); 1032 goto out_free; 1033 } 1034 1035 if (params->events) { 1036 retval = trace_events_enable(&record->trace, params->events); 1037 if (retval) 1038 goto out_top; 1039 } 1040 1041 if (params->buffer_size > 0) { 1042 retval = trace_set_buffer_size(&record->trace, params->buffer_size); 1043 if (retval) 1044 goto out_top; 1045 } 1046 } 1047 1048 if (!params->no_aa) { 1049 if (params->aa_only) { 1050 /* as top is not used for display, use it for aa */ 1051 aa = top; 1052 } else { 1053 /* otherwise, a new instance is needed */ 1054 aa = osnoise_init_tool("timerlat_aa"); 1055 if (!aa) 1056 goto out_top; 1057 } 1058 1059 retval = timerlat_aa_init(aa, params->dump_tasks); 1060 if (retval) { 1061 err_msg("Failed to enable the auto analysis instance\n"); 1062 goto out_top; 1063 } 1064 1065 /* if it is re-using the main instance, there is no need to start it */ 1066 if (aa != top) { 1067 retval = enable_timerlat(&aa->trace); 1068 if (retval) { 1069 err_msg("Failed to enable timerlat tracer\n"); 1070 goto out_top; 1071 } 1072 } 1073 } 1074 1075 if (params->user_workload) { 1076 /* rtla asked to stop */ 1077 params_u.should_run = 1; 1078 /* all threads left */ 1079 params_u.stopped_running = 0; 1080 1081 params_u.set = ¶ms->monitored_cpus; 1082 if (params->set_sched) 1083 params_u.sched_param = ¶ms->sched_param; 1084 else 1085 params_u.sched_param = NULL; 1086 1087 params_u.cgroup_name = params->cgroup_name; 1088 1089 retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u); 1090 if (retval) 1091 err_msg("Error creating timerlat user-space threads\n"); 1092 } 1093 1094 if (params->warmup > 0) { 1095 debug_msg("Warming up for %d seconds\n", params->warmup); 1096 sleep(params->warmup); 1097 } 1098 1099 /* 1100 * Start the tracers here, after having set all instances. 1101 * 1102 * Let the trace instance start first for the case of hitting a stop 1103 * tracing while enabling other instances. The trace instance is the 1104 * one with most valuable information. 1105 */ 1106 if (params->trace_output) 1107 trace_instance_start(&record->trace); 1108 if (!params->no_aa && aa != top) 1109 trace_instance_start(&aa->trace); 1110 trace_instance_start(trace); 1111 1112 top->start_time = time(NULL); 1113 timerlat_top_set_signals(params); 1114 1115 while (!stop_tracing) { 1116 sleep(params->sleep_time); 1117 1118 if (params->aa_only && !osnoise_trace_is_off(top, record)) 1119 continue; 1120 1121 retval = tracefs_iterate_raw_events(trace->tep, 1122 trace->inst, 1123 NULL, 1124 0, 1125 collect_registered_events, 1126 trace); 1127 if (retval < 0) { 1128 err_msg("Error iterating on events\n"); 1129 goto out_top; 1130 } 1131 1132 if (!params->quiet) 1133 timerlat_print_stats(params, top); 1134 1135 if (osnoise_trace_is_off(top, record)) 1136 break; 1137 1138 /* is there still any user-threads ? */ 1139 if (params->user_workload) { 1140 if (params_u.stopped_running) { 1141 debug_msg("timerlat user space threads stopped!\n"); 1142 break; 1143 } 1144 } 1145 } 1146 1147 if (params->user_workload && !params_u.stopped_running) { 1148 params_u.should_run = 0; 1149 sleep(1); 1150 } 1151 1152 timerlat_print_stats(params, top); 1153 1154 return_value = 0; 1155 1156 if (osnoise_trace_is_off(top, record) && !stop_tracing) { 1157 printf("rtla timerlat hit stop tracing\n"); 1158 1159 if (!params->no_aa) 1160 timerlat_auto_analysis(params->stop_us, params->stop_total_us); 1161 1162 if (params->trace_output) { 1163 printf(" Saving trace to %s\n", params->trace_output); 1164 save_trace_to_file(record->trace.inst, params->trace_output); 1165 } 1166 } else if (params->aa_only) { 1167 /* 1168 * If the trace did not stop with --aa-only, at least print the 1169 * max known latency. 1170 */ 1171 max_lat = tracefs_instance_file_read(trace->inst, "tracing_max_latency", NULL); 1172 if (max_lat) { 1173 printf(" Max latency was %s\n", max_lat); 1174 free(max_lat); 1175 } 1176 } 1177 1178 out_top: 1179 timerlat_aa_destroy(); 1180 if (dma_latency_fd >= 0) 1181 close(dma_latency_fd); 1182 if (params->deepest_idle_state >= -1) { 1183 for (i = 0; i < nr_cpus; i++) { 1184 if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) 1185 continue; 1186 restore_cpu_idle_disable_state(i); 1187 } 1188 } 1189 trace_events_destroy(&record->trace, params->events); 1190 params->events = NULL; 1191 out_free: 1192 timerlat_free_top(top->data); 1193 if (aa && aa != top) 1194 osnoise_destroy_tool(aa); 1195 osnoise_destroy_tool(record); 1196 osnoise_destroy_tool(top); 1197 free(params); 1198 free_cpu_idle_disable_states(); 1199 out_exit: 1200 exit(return_value); 1201 } 1202