1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #include <stdlib.h> 7 #include <errno.h> 8 #include "timerlat.h" 9 #include <unistd.h> 10 11 enum timelat_state { 12 TIMERLAT_INIT = 0, 13 TIMERLAT_WAITING_IRQ, 14 TIMERLAT_WAITING_THREAD, 15 }; 16 17 /* Used to fill spaces in the output */ 18 static const char *spaces = " "; 19 20 #define MAX_COMM 24 21 22 /* 23 * Per-cpu data statistics and data. 24 */ 25 struct timerlat_aa_data { 26 /* Current CPU state */ 27 int curr_state; 28 29 /* timerlat IRQ latency */ 30 unsigned long long tlat_irq_seqnum; 31 unsigned long long tlat_irq_latency; 32 unsigned long long tlat_irq_timstamp; 33 34 /* timerlat Thread latency */ 35 unsigned long long tlat_thread_seqnum; 36 unsigned long long tlat_thread_latency; 37 unsigned long long tlat_thread_timstamp; 38 39 /* 40 * Information about the thread running when the IRQ 41 * arrived. 42 * 43 * This can be blocking or interference, depending on the 44 * priority of the thread. Assuming timerlat is the highest 45 * prio, it is blocking. If timerlat has a lower prio, it is 46 * interference. 47 * note: "unsigned long long" because they are fetch using tep_get_field_val(); 48 */ 49 unsigned long long run_thread_pid; 50 char run_thread_comm[MAX_COMM]; 51 unsigned long long thread_blocking_duration; 52 unsigned long long max_exit_idle_latency; 53 54 /* Information about the timerlat timer irq */ 55 unsigned long long timer_irq_start_time; 56 unsigned long long timer_irq_start_delay; 57 unsigned long long timer_irq_duration; 58 unsigned long long timer_exit_from_idle; 59 60 /* 61 * Information about the last IRQ before the timerlat irq 62 * arrived. 63 * 64 * If now - timestamp is <= latency, it might have influenced 65 * in the timerlat irq latency. Otherwise, ignore it. 66 */ 67 unsigned long long prev_irq_duration; 68 unsigned long long prev_irq_timstamp; 69 70 /* 71 * Interference sum. 72 */ 73 unsigned long long thread_nmi_sum; 74 unsigned long long thread_irq_sum; 75 unsigned long long thread_softirq_sum; 76 unsigned long long thread_thread_sum; 77 78 /* 79 * Interference task information. 80 */ 81 struct trace_seq *prev_irqs_seq; 82 struct trace_seq *nmi_seq; 83 struct trace_seq *irqs_seq; 84 struct trace_seq *softirqs_seq; 85 struct trace_seq *threads_seq; 86 struct trace_seq *stack_seq; 87 88 /* 89 * Current thread. 90 */ 91 char current_comm[MAX_COMM]; 92 unsigned long long current_pid; 93 94 /* 95 * Is the system running a kworker? 96 */ 97 unsigned long long kworker; 98 unsigned long long kworker_func; 99 }; 100 101 /* 102 * The analysis context and system wide view 103 */ 104 struct timerlat_aa_context { 105 int dump_tasks; 106 enum stack_format stack_format; 107 108 /* per CPU data */ 109 struct timerlat_aa_data *taa_data; 110 111 /* 112 * required to translate function names and register 113 * events. 114 */ 115 struct osnoise_tool *tool; 116 }; 117 118 /* 119 * The data is stored as a local variable, but accessed via a helper function. 120 * 121 * It could be stored inside the trace context. But every access would 122 * require container_of() + a series of pointers. Do we need it? Not sure. 123 * 124 * For now keep it simple. If needed, store it in the tool, add the *context 125 * as a parameter in timerlat_aa_get_ctx() and do the magic there. 126 */ 127 static struct timerlat_aa_context *__timerlat_aa_ctx; 128 129 static struct timerlat_aa_context *timerlat_aa_get_ctx(void) 130 { 131 return __timerlat_aa_ctx; 132 } 133 134 /* 135 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context 136 */ 137 static struct timerlat_aa_data 138 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) 139 { 140 return &taa_ctx->taa_data[cpu]; 141 } 142 143 /* 144 * timerlat_aa_irq_latency - Handles timerlat IRQ event 145 */ 146 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, 147 struct trace_seq *s, struct tep_record *record, 148 struct tep_event *event) 149 { 150 /* 151 * For interference, we start now looking for things that can delay 152 * the thread. 153 */ 154 taa_data->curr_state = TIMERLAT_WAITING_THREAD; 155 taa_data->tlat_irq_timstamp = record->ts; 156 157 /* 158 * Zero values. 159 */ 160 taa_data->thread_nmi_sum = 0; 161 taa_data->thread_irq_sum = 0; 162 taa_data->thread_softirq_sum = 0; 163 taa_data->thread_thread_sum = 0; 164 taa_data->thread_blocking_duration = 0; 165 taa_data->timer_irq_start_time = 0; 166 taa_data->timer_irq_duration = 0; 167 taa_data->timer_exit_from_idle = 0; 168 169 /* 170 * Zero interference tasks. 171 */ 172 trace_seq_reset(taa_data->nmi_seq); 173 trace_seq_reset(taa_data->irqs_seq); 174 trace_seq_reset(taa_data->softirqs_seq); 175 trace_seq_reset(taa_data->threads_seq); 176 177 /* IRQ latency values */ 178 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); 179 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); 180 181 /* The thread that can cause blocking */ 182 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); 183 184 /* 185 * Get exit from idle case. 186 * 187 * If it is not idle thread: 188 */ 189 if (taa_data->run_thread_pid) 190 return 0; 191 192 /* 193 * if the latency is shorter than the known exit from idle: 194 */ 195 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) 196 return 0; 197 198 /* 199 * To be safe, ignore the cases in which an IRQ/NMI could have 200 * interfered with the timerlat IRQ. 201 */ 202 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency 203 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 204 return 0; 205 206 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; 207 208 return 0; 209 } 210 211 /* 212 * timerlat_aa_thread_latency - Handles timerlat thread event 213 */ 214 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, 215 struct trace_seq *s, struct tep_record *record, 216 struct tep_event *event) 217 { 218 /* 219 * For interference, we start now looking for things that can delay 220 * the IRQ of the next cycle. 221 */ 222 taa_data->curr_state = TIMERLAT_WAITING_IRQ; 223 taa_data->tlat_thread_timstamp = record->ts; 224 225 /* Thread latency values */ 226 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); 227 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); 228 229 return 0; 230 } 231 232 /* 233 * timerlat_aa_handler - Handle timerlat events 234 * 235 * This function is called to handle timerlat events recording statistics. 236 * 237 * Returns 0 on success, -1 otherwise. 238 */ 239 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, 240 struct tep_event *event, void *context) 241 { 242 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 243 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 244 unsigned long long thread; 245 246 if (!taa_data) 247 return -1; 248 249 tep_get_field_val(s, event, "context", record, &thread, 1); 250 if (!thread) 251 return timerlat_aa_irq_latency(taa_data, s, record, event); 252 else 253 return timerlat_aa_thread_latency(taa_data, s, record, event); 254 } 255 256 /* 257 * timerlat_aa_nmi_handler - Handles NMI noise 258 * 259 * It is used to collect information about interferences from NMI. It is 260 * hooked to the osnoise:nmi_noise event. 261 */ 262 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, 263 struct tep_event *event, void *context) 264 { 265 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 266 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 267 unsigned long long duration; 268 unsigned long long start; 269 270 tep_get_field_val(s, event, "duration", record, &duration, 1); 271 tep_get_field_val(s, event, "start", record, &start, 1); 272 273 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 274 taa_data->prev_irq_duration = duration; 275 taa_data->prev_irq_timstamp = start; 276 277 trace_seq_reset(taa_data->prev_irqs_seq); 278 trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", 279 "nmi", 280 24, spaces, 281 ns_to_usf(duration)); 282 return 0; 283 } 284 285 taa_data->thread_nmi_sum += duration; 286 trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", 287 "nmi", 288 24, spaces, ns_to_usf(duration)); 289 290 return 0; 291 } 292 293 /* 294 * timerlat_aa_irq_handler - Handles IRQ noise 295 * 296 * It is used to collect information about interferences from IRQ. It is 297 * hooked to the osnoise:irq_noise event. 298 * 299 * It is a little bit more complex than the other because it measures: 300 * - The IRQs that can delay the timer IRQ before it happened. 301 * - The Timerlat IRQ handler 302 * - The IRQs that happened between the timerlat IRQ and the timerlat thread 303 * (IRQ interference). 304 */ 305 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, 306 struct tep_event *event, void *context) 307 { 308 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 309 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 310 unsigned long long expected_start; 311 unsigned long long duration; 312 unsigned long long vector; 313 unsigned long long start; 314 char *desc; 315 int val; 316 317 tep_get_field_val(s, event, "duration", record, &duration, 1); 318 tep_get_field_val(s, event, "start", record, &start, 1); 319 tep_get_field_val(s, event, "vector", record, &vector, 1); 320 desc = tep_get_field_raw(s, event, "desc", record, &val, 1); 321 322 /* 323 * Before the timerlat IRQ. 324 */ 325 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 326 taa_data->prev_irq_duration = duration; 327 taa_data->prev_irq_timstamp = start; 328 329 trace_seq_reset(taa_data->prev_irqs_seq); 330 trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 331 desc, vector, 332 15, spaces, 333 ns_to_usf(duration)); 334 return 0; 335 } 336 337 /* 338 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at 339 * the timerlat irq handler. 340 */ 341 if (!taa_data->timer_irq_start_time) { 342 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; 343 344 taa_data->timer_irq_start_time = start; 345 taa_data->timer_irq_duration = duration; 346 347 /* 348 * We are dealing with two different clock sources: the 349 * external clock source that timerlat uses as a reference 350 * and the clock used by the tracer. There are also two 351 * moments: the time reading the clock and the timer in 352 * which the event is placed in the buffer (the trace 353 * event timestamp). If the processor is slow or there 354 * is some hardware noise, the difference between the 355 * timestamp and the external clock read can be longer 356 * than the IRQ handler delay, resulting in a negative 357 * time. If so, set IRQ start delay as 0. In the end, 358 * it is less relevant than the noise. 359 */ 360 if (expected_start < taa_data->timer_irq_start_time) 361 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; 362 else 363 taa_data->timer_irq_start_delay = 0; 364 365 /* 366 * not exit from idle. 367 */ 368 if (taa_data->run_thread_pid) 369 return 0; 370 371 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 372 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; 373 374 return 0; 375 } 376 377 /* 378 * IRQ interference. 379 */ 380 taa_data->thread_irq_sum += duration; 381 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 382 desc, vector, 383 24, spaces, 384 ns_to_usf(duration)); 385 386 return 0; 387 } 388 389 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", 390 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; 391 392 393 /* 394 * timerlat_aa_softirq_handler - Handles Softirq noise 395 * 396 * It is used to collect information about interferences from Softirq. It is 397 * hooked to the osnoise:softirq_noise event. 398 * 399 * It is only printed in the non-rt kernel, as softirqs become thread on RT. 400 */ 401 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, 402 struct tep_event *event, void *context) 403 { 404 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 405 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 406 unsigned long long duration; 407 unsigned long long vector; 408 unsigned long long start; 409 410 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 411 return 0; 412 413 tep_get_field_val(s, event, "duration", record, &duration, 1); 414 tep_get_field_val(s, event, "start", record, &start, 1); 415 tep_get_field_val(s, event, "vector", record, &vector, 1); 416 417 taa_data->thread_softirq_sum += duration; 418 419 trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 420 vector < ARRAY_SIZE(softirq_name) ? softirq_name[vector] : "UNKNOWN", 421 vector, 24, spaces, 422 ns_to_usf(duration)); 423 return 0; 424 } 425 426 /* 427 * timerlat_aa_softirq_handler - Handles thread noise 428 * 429 * It is used to collect information about interferences from threads. It is 430 * hooked to the osnoise:thread_noise event. 431 * 432 * Note: if you see thread noise, your timerlat thread was not the highest prio one. 433 */ 434 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, 435 struct tep_event *event, void *context) 436 { 437 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 438 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 439 unsigned long long duration; 440 unsigned long long start; 441 unsigned long long pid; 442 const char *comm; 443 int val; 444 445 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 446 return 0; 447 448 tep_get_field_val(s, event, "duration", record, &duration, 1); 449 tep_get_field_val(s, event, "start", record, &start, 1); 450 451 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); 452 comm = tep_get_field_raw(s, event, "comm", record, &val, 1); 453 454 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { 455 taa_data->thread_blocking_duration = duration; 456 457 if (comm) 458 strncpy(taa_data->run_thread_comm, comm, MAX_COMM); 459 else 460 sprintf(taa_data->run_thread_comm, "<...>"); 461 462 } else { 463 taa_data->thread_thread_sum += duration; 464 465 trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", 466 comm, pid, 467 15, spaces, 468 ns_to_usf(duration)); 469 } 470 471 return 0; 472 } 473 474 /* 475 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace 476 * 477 * Saves and parse the stack trace generated by the timerlat IRQ. 478 */ 479 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, 480 struct tep_event *event, void *context) 481 { 482 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 483 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 484 enum stack_format stack_format = taa_ctx->stack_format; 485 unsigned long *caller; 486 const char *function; 487 int val; 488 unsigned long long i; 489 490 trace_seq_reset(taa_data->stack_seq); 491 492 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); 493 caller = tep_get_field_raw(s, event, "caller", record, &val, 1); 494 495 if (caller) { 496 unsigned long long size; 497 unsigned long long max_entries; 498 499 if (tep_get_field_val(s, event, "size", record, &size, 1) == 0) 500 max_entries = size < 64 ? size : 64; 501 else 502 max_entries = 64; 503 504 for (i = 0; i < max_entries; i++) { 505 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); 506 if (!function) { 507 if (stack_format == STACK_FORMAT_TRUNCATE) 508 break; 509 else if (stack_format == STACK_FORMAT_SKIP) 510 continue; 511 else if (stack_format == STACK_FORMAT_FULL) 512 trace_seq_printf(taa_data->stack_seq, " %.*s -> 0x%lx\n", 513 14, spaces, caller[i]); 514 } else { 515 trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", 516 14, spaces, function); 517 } 518 } 519 } 520 521 return 0; 522 } 523 524 /* 525 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU 526 * 527 * Handles the sched:sched_switch event to trace the current thread running on the 528 * CPU. It is used to display the threads running on the other CPUs when the trace 529 * stops. 530 */ 531 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, 532 struct tep_event *event, void *context) 533 { 534 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 535 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 536 const char *comm; 537 int val; 538 539 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); 540 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); 541 542 strncpy(taa_data->current_comm, comm, MAX_COMM); 543 544 /* 545 * If this was a kworker, clean the last kworkers that ran. 546 */ 547 taa_data->kworker = 0; 548 taa_data->kworker_func = 0; 549 550 return 0; 551 } 552 553 /* 554 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU 555 * 556 * Handles workqueue:workqueue_execute_start event, keeping track of 557 * the job that a kworker could be doing in the CPU. 558 * 559 * We already catch problems of hardware related latencies caused by work queues 560 * running driver code that causes hardware stall. For example, with DRM drivers. 561 */ 562 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, 563 struct tep_event *event, void *context) 564 { 565 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 566 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 567 568 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); 569 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); 570 return 0; 571 } 572 573 /* 574 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing 575 * 576 * This is the core of the analysis. 577 */ 578 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, 579 int irq_thresh, int thread_thresh) 580 { 581 long long exp_irq_ts; 582 int total; 583 int irq; 584 585 /* 586 * IRQ latency or Thread latency? 587 */ 588 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { 589 irq = 1; 590 total = taa_data->tlat_irq_latency; 591 } else { 592 irq = 0; 593 total = taa_data->tlat_thread_latency; 594 } 595 596 /* 597 * Expected IRQ arrival time using the trace clock as the base. 598 * 599 * TODO: Add a list of previous IRQ, and then run the list backwards. 600 */ 601 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; 602 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { 603 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) 604 printf(" Previous IRQ interference: %.*s up to %9.2f us\n", 605 16, spaces, 606 ns_to_usf(taa_data->prev_irq_duration)); 607 } 608 609 /* 610 * The delay that the IRQ suffered before starting. 611 */ 612 printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, 613 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", 614 ns_to_usf(taa_data->timer_irq_start_delay), 615 ns_to_per(total, taa_data->timer_irq_start_delay)); 616 617 /* 618 * Timerlat IRQ. 619 */ 620 printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, 621 ns_to_usf(taa_data->tlat_irq_latency)); 622 623 if (irq) { 624 /* 625 * If the trace stopped due to IRQ, the other events will not happen 626 * because... the trace stopped :-). 627 * 628 * That is all folks, the stack trace was printed before the stop, 629 * so it will be displayed, it is the key. 630 */ 631 printf(" Blocking thread:\n"); 632 printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, 633 taa_data->run_thread_pid); 634 } else { 635 /* 636 * The duration of the IRQ handler that handled the timerlat IRQ. 637 */ 638 printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", 639 30, spaces, 640 ns_to_usf(taa_data->timer_irq_duration), 641 ns_to_per(total, taa_data->timer_irq_duration)); 642 643 /* 644 * The amount of time that the current thread postponed the scheduler. 645 * 646 * Recalling that it is net from NMI/IRQ/Softirq interference, so there 647 * is no need to compute values here. 648 */ 649 printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, 650 ns_to_usf(taa_data->thread_blocking_duration), 651 ns_to_per(total, taa_data->thread_blocking_duration)); 652 653 printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, 654 taa_data->run_thread_comm, taa_data->run_thread_pid, 655 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); 656 } 657 658 /* 659 * Print the stack trace! 660 */ 661 trace_seq_do_printf(taa_data->stack_seq); 662 663 /* 664 * NMIs can happen during the IRQ, so they are always possible. 665 */ 666 if (taa_data->thread_nmi_sum) 667 printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, 668 ns_to_usf(taa_data->thread_nmi_sum), 669 ns_to_per(total, taa_data->thread_nmi_sum)); 670 671 /* 672 * If it is an IRQ latency, the other factors can be skipped. 673 */ 674 if (irq) 675 goto print_total; 676 677 /* 678 * Prints the interference caused by IRQs to the thread latency. 679 */ 680 if (taa_data->thread_irq_sum) { 681 printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, 682 ns_to_usf(taa_data->thread_irq_sum), 683 ns_to_per(total, taa_data->thread_irq_sum)); 684 685 trace_seq_do_printf(taa_data->irqs_seq); 686 } 687 688 /* 689 * Prints the interference caused by Softirqs to the thread latency. 690 */ 691 if (taa_data->thread_softirq_sum) { 692 printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, 693 ns_to_usf(taa_data->thread_softirq_sum), 694 ns_to_per(total, taa_data->thread_softirq_sum)); 695 696 trace_seq_do_printf(taa_data->softirqs_seq); 697 } 698 699 /* 700 * Prints the interference caused by other threads to the thread latency. 701 * 702 * If this happens, your timerlat is not the highest prio. OK, migration 703 * thread can happen. But otherwise, you are not measuring the "scheduling 704 * latency" only, and here is the difference from scheduling latency and 705 * timer handling latency. 706 */ 707 if (taa_data->thread_thread_sum) { 708 printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, 709 ns_to_usf(taa_data->thread_thread_sum), 710 ns_to_per(total, taa_data->thread_thread_sum)); 711 712 trace_seq_do_printf(taa_data->threads_seq); 713 } 714 715 /* 716 * Done. 717 */ 718 print_total: 719 printf("------------------------------------------------------------------------\n"); 720 printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", 721 37, spaces, ns_to_usf(total)); 722 } 723 724 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) 725 { 726 struct trace_instance *trace = &taa_ctx->tool->trace; 727 int retval; 728 729 retval = tracefs_iterate_raw_events(trace->tep, 730 trace->inst, 731 NULL, 732 0, 733 collect_registered_events, 734 trace); 735 if (retval < 0) { 736 err_msg("Error iterating on events\n"); 737 return 0; 738 } 739 740 return 1; 741 } 742 743 /** 744 * timerlat_auto_analysis - Analyze the collected data 745 */ 746 void timerlat_auto_analysis(int irq_thresh, int thread_thresh) 747 { 748 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 749 unsigned long long max_exit_from_idle = 0; 750 struct timerlat_aa_data *taa_data; 751 int max_exit_from_idle_cpu; 752 struct tep_handle *tep; 753 int cpu; 754 755 timerlat_auto_analysis_collect_trace(taa_ctx); 756 757 /* bring stop tracing to the ns scale */ 758 irq_thresh = irq_thresh * 1000; 759 thread_thresh = thread_thresh * 1000; 760 761 for (cpu = 0; cpu < nr_cpus; cpu++) { 762 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 763 764 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { 765 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 766 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 767 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { 768 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 769 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 770 } 771 772 if (taa_data->max_exit_idle_latency > max_exit_from_idle) { 773 max_exit_from_idle = taa_data->max_exit_idle_latency; 774 max_exit_from_idle_cpu = cpu; 775 } 776 777 } 778 779 if (max_exit_from_idle) { 780 printf("\n"); 781 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", 782 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); 783 } 784 if (!taa_ctx->dump_tasks) 785 return; 786 787 printf("\n"); 788 printf("Printing CPU tasks:\n"); 789 for (cpu = 0; cpu < nr_cpus; cpu++) { 790 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 791 tep = taa_ctx->tool->trace.tep; 792 793 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); 794 795 if (taa_data->kworker_func) 796 printf(" kworker:%s:%s", 797 tep_find_function(tep, taa_data->kworker) ? : "<...>", 798 tep_find_function(tep, taa_data->kworker_func)); 799 printf("\n"); 800 } 801 802 } 803 804 /* 805 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data 806 */ 807 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) 808 { 809 struct timerlat_aa_data *taa_data; 810 int i; 811 812 if (!taa_ctx->taa_data) 813 return; 814 815 for (i = 0; i < nr_cpus; i++) { 816 taa_data = timerlat_aa_get_data(taa_ctx, i); 817 818 if (taa_data->prev_irqs_seq) { 819 trace_seq_destroy(taa_data->prev_irqs_seq); 820 free(taa_data->prev_irqs_seq); 821 } 822 823 if (taa_data->nmi_seq) { 824 trace_seq_destroy(taa_data->nmi_seq); 825 free(taa_data->nmi_seq); 826 } 827 828 if (taa_data->irqs_seq) { 829 trace_seq_destroy(taa_data->irqs_seq); 830 free(taa_data->irqs_seq); 831 } 832 833 if (taa_data->softirqs_seq) { 834 trace_seq_destroy(taa_data->softirqs_seq); 835 free(taa_data->softirqs_seq); 836 } 837 838 if (taa_data->threads_seq) { 839 trace_seq_destroy(taa_data->threads_seq); 840 free(taa_data->threads_seq); 841 } 842 843 if (taa_data->stack_seq) { 844 trace_seq_destroy(taa_data->stack_seq); 845 free(taa_data->stack_seq); 846 } 847 } 848 } 849 850 /* 851 * timerlat_aa_init_seqs - Init seq files used to store parsed information 852 * 853 * Instead of keeping data structures to store raw data, use seq files to 854 * store parsed data. 855 * 856 * Allocates and initialize seq files. 857 * 858 * Returns 0 on success, -1 otherwise. 859 */ 860 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) 861 { 862 struct timerlat_aa_data *taa_data; 863 int i; 864 865 for (i = 0; i < nr_cpus; i++) { 866 867 taa_data = timerlat_aa_get_data(taa_ctx, i); 868 869 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); 870 if (!taa_data->prev_irqs_seq) 871 goto out_err; 872 873 trace_seq_init(taa_data->prev_irqs_seq); 874 875 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); 876 if (!taa_data->nmi_seq) 877 goto out_err; 878 879 trace_seq_init(taa_data->nmi_seq); 880 881 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); 882 if (!taa_data->irqs_seq) 883 goto out_err; 884 885 trace_seq_init(taa_data->irqs_seq); 886 887 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); 888 if (!taa_data->softirqs_seq) 889 goto out_err; 890 891 trace_seq_init(taa_data->softirqs_seq); 892 893 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); 894 if (!taa_data->threads_seq) 895 goto out_err; 896 897 trace_seq_init(taa_data->threads_seq); 898 899 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); 900 if (!taa_data->stack_seq) 901 goto out_err; 902 903 trace_seq_init(taa_data->stack_seq); 904 } 905 906 return 0; 907 908 out_err: 909 timerlat_aa_destroy_seqs(taa_ctx); 910 return -1; 911 } 912 913 /* 914 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis 915 */ 916 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) 917 { 918 919 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 920 timerlat_aa_handler, tool); 921 922 tracefs_event_disable(tool->trace.inst, "osnoise", NULL); 923 924 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 925 timerlat_aa_nmi_handler, tool); 926 927 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 928 timerlat_aa_irq_handler, tool); 929 930 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 931 timerlat_aa_softirq_handler, tool); 932 933 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 934 timerlat_aa_thread_handler, tool); 935 936 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 937 timerlat_aa_stack_handler, tool); 938 if (!dump_tasks) 939 return; 940 941 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); 942 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 943 timerlat_aa_sched_switch_handler, tool); 944 945 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 946 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 947 timerlat_aa_kworker_start_handler, tool); 948 } 949 950 /* 951 * timerlat_aa_register_events - Register events used in the auto-analysis 952 * 953 * Returns 0 on success, -1 otherwise. 954 */ 955 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) 956 { 957 int retval; 958 959 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 960 timerlat_aa_handler, tool); 961 962 963 /* 964 * register auto-analysis handlers. 965 */ 966 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); 967 if (retval < 0 && !errno) { 968 err_msg("Could not find osnoise events\n"); 969 goto out_err; 970 } 971 972 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 973 timerlat_aa_nmi_handler, tool); 974 975 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 976 timerlat_aa_irq_handler, tool); 977 978 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 979 timerlat_aa_softirq_handler, tool); 980 981 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 982 timerlat_aa_thread_handler, tool); 983 984 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 985 timerlat_aa_stack_handler, tool); 986 987 if (!dump_tasks) 988 return 0; 989 990 /* 991 * Dump task events. 992 */ 993 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); 994 if (retval < 0 && !errno) { 995 err_msg("Could not find sched_switch\n"); 996 goto out_err; 997 } 998 999 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 1000 timerlat_aa_sched_switch_handler, tool); 1001 1002 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 1003 if (retval < 0 && !errno) { 1004 err_msg("Could not find workqueue_execute_start\n"); 1005 goto out_err; 1006 } 1007 1008 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 1009 timerlat_aa_kworker_start_handler, tool); 1010 1011 return 0; 1012 1013 out_err: 1014 timerlat_aa_unregister_events(tool, dump_tasks); 1015 return -1; 1016 } 1017 1018 /** 1019 * timerlat_aa_destroy - Destroy timerlat auto-analysis 1020 */ 1021 void timerlat_aa_destroy(void) 1022 { 1023 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 1024 1025 if (!taa_ctx) 1026 return; 1027 1028 if (!taa_ctx->taa_data) 1029 goto out_ctx; 1030 1031 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); 1032 timerlat_aa_destroy_seqs(taa_ctx); 1033 free(taa_ctx->taa_data); 1034 out_ctx: 1035 free(taa_ctx); 1036 } 1037 1038 /** 1039 * timerlat_aa_init - Initialize timerlat auto-analysis 1040 * 1041 * Returns 0 on success, -1 otherwise. 1042 */ 1043 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks, enum stack_format stack_format) 1044 { 1045 struct timerlat_aa_context *taa_ctx; 1046 int retval; 1047 1048 taa_ctx = calloc(1, sizeof(*taa_ctx)); 1049 if (!taa_ctx) 1050 return -1; 1051 1052 __timerlat_aa_ctx = taa_ctx; 1053 1054 taa_ctx->tool = tool; 1055 taa_ctx->dump_tasks = dump_tasks; 1056 taa_ctx->stack_format = stack_format; 1057 1058 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); 1059 if (!taa_ctx->taa_data) 1060 goto out_err; 1061 1062 retval = timerlat_aa_init_seqs(taa_ctx); 1063 if (retval) 1064 goto out_err; 1065 1066 retval = timerlat_aa_register_events(tool, dump_tasks); 1067 if (retval) 1068 goto out_err; 1069 1070 return 0; 1071 1072 out_err: 1073 timerlat_aa_destroy(); 1074 return -1; 1075 } 1076