1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #include <stdlib.h> 7 #include <errno.h> 8 #include "utils.h" 9 #include "osnoise.h" 10 #include "timerlat.h" 11 #include <unistd.h> 12 13 enum timelat_state { 14 TIMERLAT_INIT = 0, 15 TIMERLAT_WAITING_IRQ, 16 TIMERLAT_WAITING_THREAD, 17 }; 18 19 /* Used to fill spaces in the output */ 20 static const char *spaces = " "; 21 22 #define MAX_COMM 24 23 24 /* 25 * Per-cpu data statistics and data. 26 */ 27 struct timerlat_aa_data { 28 /* Current CPU state */ 29 int curr_state; 30 31 /* timerlat IRQ latency */ 32 unsigned long long tlat_irq_seqnum; 33 unsigned long long tlat_irq_latency; 34 unsigned long long tlat_irq_timstamp; 35 36 /* timerlat Thread latency */ 37 unsigned long long tlat_thread_seqnum; 38 unsigned long long tlat_thread_latency; 39 unsigned long long tlat_thread_timstamp; 40 41 /* 42 * Information about the thread running when the IRQ 43 * arrived. 44 * 45 * This can be blocking or interference, depending on the 46 * priority of the thread. Assuming timerlat is the highest 47 * prio, it is blocking. If timerlat has a lower prio, it is 48 * interference. 49 * note: "unsigned long long" because they are fetch using tep_get_field_val(); 50 */ 51 unsigned long long run_thread_pid; 52 char run_thread_comm[MAX_COMM]; 53 unsigned long long thread_blocking_duration; 54 unsigned long long max_exit_idle_latency; 55 56 /* Information about the timerlat timer irq */ 57 unsigned long long timer_irq_start_time; 58 unsigned long long timer_irq_start_delay; 59 unsigned long long timer_irq_duration; 60 unsigned long long timer_exit_from_idle; 61 62 /* 63 * Information about the last IRQ before the timerlat irq 64 * arrived. 65 * 66 * If now - timestamp is <= latency, it might have influenced 67 * in the timerlat irq latency. Otherwise, ignore it. 68 */ 69 unsigned long long prev_irq_duration; 70 unsigned long long prev_irq_timstamp; 71 72 /* 73 * Interference sum. 74 */ 75 unsigned long long thread_nmi_sum; 76 unsigned long long thread_irq_sum; 77 unsigned long long thread_softirq_sum; 78 unsigned long long thread_thread_sum; 79 80 /* 81 * Interference task information. 82 */ 83 struct trace_seq *prev_irqs_seq; 84 struct trace_seq *nmi_seq; 85 struct trace_seq *irqs_seq; 86 struct trace_seq *softirqs_seq; 87 struct trace_seq *threads_seq; 88 struct trace_seq *stack_seq; 89 90 /* 91 * Current thread. 92 */ 93 char current_comm[MAX_COMM]; 94 unsigned long long current_pid; 95 96 /* 97 * Is the system running a kworker? 98 */ 99 unsigned long long kworker; 100 unsigned long long kworker_func; 101 }; 102 103 /* 104 * The analysis context and system wide view 105 */ 106 struct timerlat_aa_context { 107 int nr_cpus; 108 int dump_tasks; 109 110 /* per CPU data */ 111 struct timerlat_aa_data *taa_data; 112 113 /* 114 * required to translate function names and register 115 * events. 116 */ 117 struct osnoise_tool *tool; 118 }; 119 120 /* 121 * The data is stored as a local variable, but accessed via a helper function. 122 * 123 * It could be stored inside the trace context. But every access would 124 * require container_of() + a series of pointers. Do we need it? Not sure. 125 * 126 * For now keep it simple. If needed, store it in the tool, add the *context 127 * as a parameter in timerlat_aa_get_ctx() and do the magic there. 128 */ 129 static struct timerlat_aa_context *__timerlat_aa_ctx; 130 131 static struct timerlat_aa_context *timerlat_aa_get_ctx(void) 132 { 133 return __timerlat_aa_ctx; 134 } 135 136 /* 137 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context 138 */ 139 static struct timerlat_aa_data 140 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) 141 { 142 return &taa_ctx->taa_data[cpu]; 143 } 144 145 /* 146 * timerlat_aa_irq_latency - Handles timerlat IRQ event 147 */ 148 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, 149 struct trace_seq *s, struct tep_record *record, 150 struct tep_event *event) 151 { 152 /* 153 * For interference, we start now looking for things that can delay 154 * the thread. 155 */ 156 taa_data->curr_state = TIMERLAT_WAITING_THREAD; 157 taa_data->tlat_irq_timstamp = record->ts; 158 159 /* 160 * Zero values. 161 */ 162 taa_data->thread_nmi_sum = 0; 163 taa_data->thread_irq_sum = 0; 164 taa_data->thread_softirq_sum = 0; 165 taa_data->thread_thread_sum = 0; 166 taa_data->thread_blocking_duration = 0; 167 taa_data->timer_irq_start_time = 0; 168 taa_data->timer_irq_duration = 0; 169 taa_data->timer_exit_from_idle = 0; 170 171 /* 172 * Zero interference tasks. 173 */ 174 trace_seq_reset(taa_data->nmi_seq); 175 trace_seq_reset(taa_data->irqs_seq); 176 trace_seq_reset(taa_data->softirqs_seq); 177 trace_seq_reset(taa_data->threads_seq); 178 179 /* IRQ latency values */ 180 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); 181 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); 182 183 /* The thread that can cause blocking */ 184 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); 185 186 /* 187 * Get exit from idle case. 188 * 189 * If it is not idle thread: 190 */ 191 if (taa_data->run_thread_pid) 192 return 0; 193 194 /* 195 * if the latency is shorter than the known exit from idle: 196 */ 197 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) 198 return 0; 199 200 /* 201 * To be safe, ignore the cases in which an IRQ/NMI could have 202 * interfered with the timerlat IRQ. 203 */ 204 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency 205 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 206 return 0; 207 208 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; 209 210 return 0; 211 } 212 213 /* 214 * timerlat_aa_thread_latency - Handles timerlat thread event 215 */ 216 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, 217 struct trace_seq *s, struct tep_record *record, 218 struct tep_event *event) 219 { 220 /* 221 * For interference, we start now looking for things that can delay 222 * the IRQ of the next cycle. 223 */ 224 taa_data->curr_state = TIMERLAT_WAITING_IRQ; 225 taa_data->tlat_thread_timstamp = record->ts; 226 227 /* Thread latency values */ 228 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); 229 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); 230 231 return 0; 232 } 233 234 /* 235 * timerlat_aa_handler - Handle timerlat events 236 * 237 * This function is called to handle timerlat events recording statistics. 238 * 239 * Returns 0 on success, -1 otherwise. 240 */ 241 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, 242 struct tep_event *event, void *context) 243 { 244 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 245 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 246 unsigned long long thread; 247 248 if (!taa_data) 249 return -1; 250 251 tep_get_field_val(s, event, "context", record, &thread, 1); 252 if (!thread) 253 return timerlat_aa_irq_latency(taa_data, s, record, event); 254 else 255 return timerlat_aa_thread_latency(taa_data, s, record, event); 256 } 257 258 /* 259 * timerlat_aa_nmi_handler - Handles NMI noise 260 * 261 * It is used to collect information about interferences from NMI. It is 262 * hooked to the osnoise:nmi_noise event. 263 */ 264 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, 265 struct tep_event *event, void *context) 266 { 267 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 268 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 269 unsigned long long duration; 270 unsigned long long start; 271 272 tep_get_field_val(s, event, "duration", record, &duration, 1); 273 tep_get_field_val(s, event, "start", record, &start, 1); 274 275 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 276 taa_data->prev_irq_duration = duration; 277 taa_data->prev_irq_timstamp = start; 278 279 trace_seq_reset(taa_data->prev_irqs_seq); 280 trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", 281 "nmi", 282 24, spaces, 283 ns_to_usf(duration)); 284 return 0; 285 } 286 287 taa_data->thread_nmi_sum += duration; 288 trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", 289 "nmi", 290 24, spaces, ns_to_usf(duration)); 291 292 return 0; 293 } 294 295 /* 296 * timerlat_aa_irq_handler - Handles IRQ noise 297 * 298 * It is used to collect information about interferences from IRQ. It is 299 * hooked to the osnoise:irq_noise event. 300 * 301 * It is a little bit more complex than the other because it measures: 302 * - The IRQs that can delay the timer IRQ before it happened. 303 * - The Timerlat IRQ handler 304 * - The IRQs that happened between the timerlat IRQ and the timerlat thread 305 * (IRQ interference). 306 */ 307 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, 308 struct tep_event *event, void *context) 309 { 310 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 311 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 312 unsigned long long expected_start; 313 unsigned long long duration; 314 unsigned long long vector; 315 unsigned long long start; 316 char *desc; 317 int val; 318 319 tep_get_field_val(s, event, "duration", record, &duration, 1); 320 tep_get_field_val(s, event, "start", record, &start, 1); 321 tep_get_field_val(s, event, "vector", record, &vector, 1); 322 desc = tep_get_field_raw(s, event, "desc", record, &val, 1); 323 324 /* 325 * Before the timerlat IRQ. 326 */ 327 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 328 taa_data->prev_irq_duration = duration; 329 taa_data->prev_irq_timstamp = start; 330 331 trace_seq_reset(taa_data->prev_irqs_seq); 332 trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 333 desc, vector, 334 15, spaces, 335 ns_to_usf(duration)); 336 return 0; 337 } 338 339 /* 340 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at 341 * the timerlat irq handler. 342 */ 343 if (!taa_data->timer_irq_start_time) { 344 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; 345 346 taa_data->timer_irq_start_time = start; 347 taa_data->timer_irq_duration = duration; 348 349 /* 350 * We are dealing with two different clock sources: the 351 * external clock source that timerlat uses as a reference 352 * and the clock used by the tracer. There are also two 353 * moments: the time reading the clock and the timer in 354 * which the event is placed in the buffer (the trace 355 * event timestamp). If the processor is slow or there 356 * is some hardware noise, the difference between the 357 * timestamp and the external clock read can be longer 358 * than the IRQ handler delay, resulting in a negative 359 * time. If so, set IRQ start delay as 0. In the end, 360 * it is less relevant than the noise. 361 */ 362 if (expected_start < taa_data->timer_irq_start_time) 363 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; 364 else 365 taa_data->timer_irq_start_delay = 0; 366 367 /* 368 * not exit from idle. 369 */ 370 if (taa_data->run_thread_pid) 371 return 0; 372 373 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 374 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; 375 376 return 0; 377 } 378 379 /* 380 * IRQ interference. 381 */ 382 taa_data->thread_irq_sum += duration; 383 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 384 desc, vector, 385 24, spaces, 386 ns_to_usf(duration)); 387 388 return 0; 389 } 390 391 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", 392 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; 393 394 395 /* 396 * timerlat_aa_softirq_handler - Handles Softirq noise 397 * 398 * It is used to collect information about interferences from Softirq. It is 399 * hooked to the osnoise:softirq_noise event. 400 * 401 * It is only printed in the non-rt kernel, as softirqs become thread on RT. 402 */ 403 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, 404 struct tep_event *event, void *context) 405 { 406 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 407 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 408 unsigned long long duration; 409 unsigned long long vector; 410 unsigned long long start; 411 412 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 413 return 0; 414 415 tep_get_field_val(s, event, "duration", record, &duration, 1); 416 tep_get_field_val(s, event, "start", record, &start, 1); 417 tep_get_field_val(s, event, "vector", record, &vector, 1); 418 419 taa_data->thread_softirq_sum += duration; 420 421 trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 422 softirq_name[vector], vector, 423 24, spaces, 424 ns_to_usf(duration)); 425 return 0; 426 } 427 428 /* 429 * timerlat_aa_softirq_handler - Handles thread noise 430 * 431 * It is used to collect information about interferences from threads. It is 432 * hooked to the osnoise:thread_noise event. 433 * 434 * Note: if you see thread noise, your timerlat thread was not the highest prio one. 435 */ 436 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, 437 struct tep_event *event, void *context) 438 { 439 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 440 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 441 unsigned long long duration; 442 unsigned long long start; 443 unsigned long long pid; 444 const char *comm; 445 int val; 446 447 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 448 return 0; 449 450 tep_get_field_val(s, event, "duration", record, &duration, 1); 451 tep_get_field_val(s, event, "start", record, &start, 1); 452 453 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); 454 comm = tep_get_field_raw(s, event, "comm", record, &val, 1); 455 456 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { 457 taa_data->thread_blocking_duration = duration; 458 459 if (comm) 460 strncpy(taa_data->run_thread_comm, comm, MAX_COMM); 461 else 462 sprintf(taa_data->run_thread_comm, "<...>"); 463 464 } else { 465 taa_data->thread_thread_sum += duration; 466 467 trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", 468 comm, pid, 469 15, spaces, 470 ns_to_usf(duration)); 471 } 472 473 return 0; 474 } 475 476 /* 477 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace 478 * 479 * Saves and parse the stack trace generated by the timerlat IRQ. 480 */ 481 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, 482 struct tep_event *event, void *context) 483 { 484 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 485 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 486 unsigned long *caller; 487 const char *function; 488 int val, i; 489 490 trace_seq_reset(taa_data->stack_seq); 491 492 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); 493 caller = tep_get_field_raw(s, event, "caller", record, &val, 1); 494 if (caller) { 495 for (i = 0; ; i++) { 496 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); 497 if (!function) 498 break; 499 trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", 500 14, spaces, function); 501 } 502 } 503 return 0; 504 } 505 506 /* 507 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU 508 * 509 * Handles the sched:sched_switch event to trace the current thread running on the 510 * CPU. It is used to display the threads running on the other CPUs when the trace 511 * stops. 512 */ 513 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, 514 struct tep_event *event, void *context) 515 { 516 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 517 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 518 const char *comm; 519 int val; 520 521 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); 522 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); 523 524 strncpy(taa_data->current_comm, comm, MAX_COMM); 525 526 /* 527 * If this was a kworker, clean the last kworkers that ran. 528 */ 529 taa_data->kworker = 0; 530 taa_data->kworker_func = 0; 531 532 return 0; 533 } 534 535 /* 536 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU 537 * 538 * Handles workqueue:workqueue_execute_start event, keeping track of 539 * the job that a kworker could be doing in the CPU. 540 * 541 * We already catch problems of hardware related latencies caused by work queues 542 * running driver code that causes hardware stall. For example, with DRM drivers. 543 */ 544 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, 545 struct tep_event *event, void *context) 546 { 547 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 548 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 549 550 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); 551 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); 552 return 0; 553 } 554 555 /* 556 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing 557 * 558 * This is the core of the analysis. 559 */ 560 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, 561 int irq_thresh, int thread_thresh) 562 { 563 long long exp_irq_ts; 564 int total; 565 int irq; 566 567 /* 568 * IRQ latency or Thread latency? 569 */ 570 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { 571 irq = 1; 572 total = taa_data->tlat_irq_latency; 573 } else { 574 irq = 0; 575 total = taa_data->tlat_thread_latency; 576 } 577 578 /* 579 * Expected IRQ arrival time using the trace clock as the base. 580 * 581 * TODO: Add a list of previous IRQ, and then run the list backwards. 582 */ 583 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; 584 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { 585 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) 586 printf(" Previous IRQ interference: %.*s up to %9.2f us\n", 587 16, spaces, 588 ns_to_usf(taa_data->prev_irq_duration)); 589 } 590 591 /* 592 * The delay that the IRQ suffered before starting. 593 */ 594 printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, 595 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", 596 ns_to_usf(taa_data->timer_irq_start_delay), 597 ns_to_per(total, taa_data->timer_irq_start_delay)); 598 599 /* 600 * Timerlat IRQ. 601 */ 602 printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, 603 ns_to_usf(taa_data->tlat_irq_latency)); 604 605 if (irq) { 606 /* 607 * If the trace stopped due to IRQ, the other events will not happen 608 * because... the trace stopped :-). 609 * 610 * That is all folks, the stack trace was printed before the stop, 611 * so it will be displayed, it is the key. 612 */ 613 printf(" Blocking thread:\n"); 614 printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, 615 taa_data->run_thread_pid); 616 } else { 617 /* 618 * The duration of the IRQ handler that handled the timerlat IRQ. 619 */ 620 printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", 621 30, spaces, 622 ns_to_usf(taa_data->timer_irq_duration), 623 ns_to_per(total, taa_data->timer_irq_duration)); 624 625 /* 626 * The amount of time that the current thread postponed the scheduler. 627 * 628 * Recalling that it is net from NMI/IRQ/Softirq interference, so there 629 * is no need to compute values here. 630 */ 631 printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, 632 ns_to_usf(taa_data->thread_blocking_duration), 633 ns_to_per(total, taa_data->thread_blocking_duration)); 634 635 printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, 636 taa_data->run_thread_comm, taa_data->run_thread_pid, 637 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); 638 } 639 640 /* 641 * Print the stack trace! 642 */ 643 trace_seq_do_printf(taa_data->stack_seq); 644 645 /* 646 * NMIs can happen during the IRQ, so they are always possible. 647 */ 648 if (taa_data->thread_nmi_sum) 649 printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, 650 ns_to_usf(taa_data->thread_nmi_sum), 651 ns_to_per(total, taa_data->thread_nmi_sum)); 652 653 /* 654 * If it is an IRQ latency, the other factors can be skipped. 655 */ 656 if (irq) 657 goto print_total; 658 659 /* 660 * Prints the interference caused by IRQs to the thread latency. 661 */ 662 if (taa_data->thread_irq_sum) { 663 printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, 664 ns_to_usf(taa_data->thread_irq_sum), 665 ns_to_per(total, taa_data->thread_irq_sum)); 666 667 trace_seq_do_printf(taa_data->irqs_seq); 668 } 669 670 /* 671 * Prints the interference caused by Softirqs to the thread latency. 672 */ 673 if (taa_data->thread_softirq_sum) { 674 printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, 675 ns_to_usf(taa_data->thread_softirq_sum), 676 ns_to_per(total, taa_data->thread_softirq_sum)); 677 678 trace_seq_do_printf(taa_data->softirqs_seq); 679 } 680 681 /* 682 * Prints the interference caused by other threads to the thread latency. 683 * 684 * If this happens, your timerlat is not the highest prio. OK, migration 685 * thread can happen. But otherwise, you are not measuring the "scheduling 686 * latency" only, and here is the difference from scheduling latency and 687 * timer handling latency. 688 */ 689 if (taa_data->thread_thread_sum) { 690 printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, 691 ns_to_usf(taa_data->thread_thread_sum), 692 ns_to_per(total, taa_data->thread_thread_sum)); 693 694 trace_seq_do_printf(taa_data->threads_seq); 695 } 696 697 /* 698 * Done. 699 */ 700 print_total: 701 printf("------------------------------------------------------------------------\n"); 702 printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", 703 37, spaces, ns_to_usf(total)); 704 } 705 706 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) 707 { 708 struct trace_instance *trace = &taa_ctx->tool->trace; 709 int retval; 710 711 retval = tracefs_iterate_raw_events(trace->tep, 712 trace->inst, 713 NULL, 714 0, 715 collect_registered_events, 716 trace); 717 if (retval < 0) { 718 err_msg("Error iterating on events\n"); 719 return 0; 720 } 721 722 return 1; 723 } 724 725 /** 726 * timerlat_auto_analysis - Analyze the collected data 727 */ 728 void timerlat_auto_analysis(int irq_thresh, int thread_thresh) 729 { 730 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 731 unsigned long long max_exit_from_idle = 0; 732 struct timerlat_aa_data *taa_data; 733 int max_exit_from_idle_cpu; 734 struct tep_handle *tep; 735 int cpu; 736 737 timerlat_auto_analysis_collect_trace(taa_ctx); 738 739 /* bring stop tracing to the ns scale */ 740 irq_thresh = irq_thresh * 1000; 741 thread_thresh = thread_thresh * 1000; 742 743 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { 744 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 745 746 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { 747 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 748 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 749 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { 750 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 751 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 752 } 753 754 if (taa_data->max_exit_idle_latency > max_exit_from_idle) { 755 max_exit_from_idle = taa_data->max_exit_idle_latency; 756 max_exit_from_idle_cpu = cpu; 757 } 758 759 } 760 761 if (max_exit_from_idle) { 762 printf("\n"); 763 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", 764 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); 765 } 766 if (!taa_ctx->dump_tasks) 767 return; 768 769 printf("\n"); 770 printf("Printing CPU tasks:\n"); 771 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { 772 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 773 tep = taa_ctx->tool->trace.tep; 774 775 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); 776 777 if (taa_data->kworker_func) 778 printf(" kworker:%s:%s", 779 tep_find_function(tep, taa_data->kworker) ? : "<...>", 780 tep_find_function(tep, taa_data->kworker_func)); 781 printf("\n"); 782 } 783 784 } 785 786 /* 787 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data 788 */ 789 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) 790 { 791 struct timerlat_aa_data *taa_data; 792 int i; 793 794 if (!taa_ctx->taa_data) 795 return; 796 797 for (i = 0; i < taa_ctx->nr_cpus; i++) { 798 taa_data = timerlat_aa_get_data(taa_ctx, i); 799 800 if (taa_data->prev_irqs_seq) { 801 trace_seq_destroy(taa_data->prev_irqs_seq); 802 free(taa_data->prev_irqs_seq); 803 } 804 805 if (taa_data->nmi_seq) { 806 trace_seq_destroy(taa_data->nmi_seq); 807 free(taa_data->nmi_seq); 808 } 809 810 if (taa_data->irqs_seq) { 811 trace_seq_destroy(taa_data->irqs_seq); 812 free(taa_data->irqs_seq); 813 } 814 815 if (taa_data->softirqs_seq) { 816 trace_seq_destroy(taa_data->softirqs_seq); 817 free(taa_data->softirqs_seq); 818 } 819 820 if (taa_data->threads_seq) { 821 trace_seq_destroy(taa_data->threads_seq); 822 free(taa_data->threads_seq); 823 } 824 825 if (taa_data->stack_seq) { 826 trace_seq_destroy(taa_data->stack_seq); 827 free(taa_data->stack_seq); 828 } 829 } 830 } 831 832 /* 833 * timerlat_aa_init_seqs - Init seq files used to store parsed information 834 * 835 * Instead of keeping data structures to store raw data, use seq files to 836 * store parsed data. 837 * 838 * Allocates and initialize seq files. 839 * 840 * Returns 0 on success, -1 otherwise. 841 */ 842 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) 843 { 844 struct timerlat_aa_data *taa_data; 845 int i; 846 847 for (i = 0; i < taa_ctx->nr_cpus; i++) { 848 849 taa_data = timerlat_aa_get_data(taa_ctx, i); 850 851 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); 852 if (!taa_data->prev_irqs_seq) 853 goto out_err; 854 855 trace_seq_init(taa_data->prev_irqs_seq); 856 857 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); 858 if (!taa_data->nmi_seq) 859 goto out_err; 860 861 trace_seq_init(taa_data->nmi_seq); 862 863 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); 864 if (!taa_data->irqs_seq) 865 goto out_err; 866 867 trace_seq_init(taa_data->irqs_seq); 868 869 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); 870 if (!taa_data->softirqs_seq) 871 goto out_err; 872 873 trace_seq_init(taa_data->softirqs_seq); 874 875 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); 876 if (!taa_data->threads_seq) 877 goto out_err; 878 879 trace_seq_init(taa_data->threads_seq); 880 881 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); 882 if (!taa_data->stack_seq) 883 goto out_err; 884 885 trace_seq_init(taa_data->stack_seq); 886 } 887 888 return 0; 889 890 out_err: 891 timerlat_aa_destroy_seqs(taa_ctx); 892 return -1; 893 } 894 895 /* 896 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis 897 */ 898 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) 899 { 900 901 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 902 timerlat_aa_handler, tool); 903 904 tracefs_event_disable(tool->trace.inst, "osnoise", NULL); 905 906 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 907 timerlat_aa_nmi_handler, tool); 908 909 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 910 timerlat_aa_irq_handler, tool); 911 912 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 913 timerlat_aa_softirq_handler, tool); 914 915 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 916 timerlat_aa_thread_handler, tool); 917 918 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 919 timerlat_aa_stack_handler, tool); 920 if (!dump_tasks) 921 return; 922 923 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); 924 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 925 timerlat_aa_sched_switch_handler, tool); 926 927 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 928 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 929 timerlat_aa_kworker_start_handler, tool); 930 } 931 932 /* 933 * timerlat_aa_register_events - Register events used in the auto-analysis 934 * 935 * Returns 0 on success, -1 otherwise. 936 */ 937 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) 938 { 939 int retval; 940 941 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 942 timerlat_aa_handler, tool); 943 944 945 /* 946 * register auto-analysis handlers. 947 */ 948 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); 949 if (retval < 0 && !errno) { 950 err_msg("Could not find osnoise events\n"); 951 goto out_err; 952 } 953 954 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 955 timerlat_aa_nmi_handler, tool); 956 957 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 958 timerlat_aa_irq_handler, tool); 959 960 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 961 timerlat_aa_softirq_handler, tool); 962 963 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 964 timerlat_aa_thread_handler, tool); 965 966 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 967 timerlat_aa_stack_handler, tool); 968 969 if (!dump_tasks) 970 return 0; 971 972 /* 973 * Dump task events. 974 */ 975 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); 976 if (retval < 0 && !errno) { 977 err_msg("Could not find sched_switch\n"); 978 goto out_err; 979 } 980 981 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 982 timerlat_aa_sched_switch_handler, tool); 983 984 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 985 if (retval < 0 && !errno) { 986 err_msg("Could not find workqueue_execute_start\n"); 987 goto out_err; 988 } 989 990 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 991 timerlat_aa_kworker_start_handler, tool); 992 993 return 0; 994 995 out_err: 996 timerlat_aa_unregister_events(tool, dump_tasks); 997 return -1; 998 } 999 1000 /** 1001 * timerlat_aa_destroy - Destroy timerlat auto-analysis 1002 */ 1003 void timerlat_aa_destroy(void) 1004 { 1005 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 1006 1007 if (!taa_ctx) 1008 return; 1009 1010 if (!taa_ctx->taa_data) 1011 goto out_ctx; 1012 1013 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); 1014 timerlat_aa_destroy_seqs(taa_ctx); 1015 free(taa_ctx->taa_data); 1016 out_ctx: 1017 free(taa_ctx); 1018 } 1019 1020 /** 1021 * timerlat_aa_init - Initialize timerlat auto-analysis 1022 * 1023 * Returns 0 on success, -1 otherwise. 1024 */ 1025 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) 1026 { 1027 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 1028 struct timerlat_aa_context *taa_ctx; 1029 int retval; 1030 1031 taa_ctx = calloc(1, sizeof(*taa_ctx)); 1032 if (!taa_ctx) 1033 return -1; 1034 1035 __timerlat_aa_ctx = taa_ctx; 1036 1037 taa_ctx->nr_cpus = nr_cpus; 1038 taa_ctx->tool = tool; 1039 taa_ctx->dump_tasks = dump_tasks; 1040 1041 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); 1042 if (!taa_ctx->taa_data) 1043 goto out_err; 1044 1045 retval = timerlat_aa_init_seqs(taa_ctx); 1046 if (retval) 1047 goto out_err; 1048 1049 retval = timerlat_aa_register_events(tool, dump_tasks); 1050 if (retval) 1051 goto out_err; 1052 1053 return 0; 1054 1055 out_err: 1056 timerlat_aa_destroy(); 1057 return -1; 1058 } 1059