1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #include <stdlib.h> 7 #include <errno.h> 8 #include "timerlat.h" 9 #include <unistd.h> 10 11 enum timelat_state { 12 TIMERLAT_INIT = 0, 13 TIMERLAT_WAITING_IRQ, 14 TIMERLAT_WAITING_THREAD, 15 }; 16 17 /* Used to fill spaces in the output */ 18 static const char *spaces = " "; 19 20 #define MAX_COMM 24 21 22 /* 23 * Per-cpu data statistics and data. 24 */ 25 struct timerlat_aa_data { 26 /* Current CPU state */ 27 int curr_state; 28 29 /* timerlat IRQ latency */ 30 unsigned long long tlat_irq_seqnum; 31 unsigned long long tlat_irq_latency; 32 unsigned long long tlat_irq_timstamp; 33 34 /* timerlat Thread latency */ 35 unsigned long long tlat_thread_seqnum; 36 unsigned long long tlat_thread_latency; 37 unsigned long long tlat_thread_timstamp; 38 39 /* 40 * Information about the thread running when the IRQ 41 * arrived. 42 * 43 * This can be blocking or interference, depending on the 44 * priority of the thread. Assuming timerlat is the highest 45 * prio, it is blocking. If timerlat has a lower prio, it is 46 * interference. 47 * note: "unsigned long long" because they are fetch using tep_get_field_val(); 48 */ 49 unsigned long long run_thread_pid; 50 char run_thread_comm[MAX_COMM]; 51 unsigned long long thread_blocking_duration; 52 unsigned long long max_exit_idle_latency; 53 54 /* Information about the timerlat timer irq */ 55 unsigned long long timer_irq_start_time; 56 unsigned long long timer_irq_start_delay; 57 unsigned long long timer_irq_duration; 58 unsigned long long timer_exit_from_idle; 59 60 /* 61 * Information about the last IRQ before the timerlat irq 62 * arrived. 63 * 64 * If now - timestamp is <= latency, it might have influenced 65 * in the timerlat irq latency. Otherwise, ignore it. 66 */ 67 unsigned long long prev_irq_duration; 68 unsigned long long prev_irq_timstamp; 69 70 /* 71 * Interference sum. 72 */ 73 unsigned long long thread_nmi_sum; 74 unsigned long long thread_irq_sum; 75 unsigned long long thread_softirq_sum; 76 unsigned long long thread_thread_sum; 77 78 /* 79 * Interference task information. 80 */ 81 struct trace_seq *prev_irqs_seq; 82 struct trace_seq *nmi_seq; 83 struct trace_seq *irqs_seq; 84 struct trace_seq *softirqs_seq; 85 struct trace_seq *threads_seq; 86 struct trace_seq *stack_seq; 87 88 /* 89 * Current thread. 90 */ 91 char current_comm[MAX_COMM]; 92 unsigned long long current_pid; 93 94 /* 95 * Is the system running a kworker? 96 */ 97 unsigned long long kworker; 98 unsigned long long kworker_func; 99 }; 100 101 /* 102 * The analysis context and system wide view 103 */ 104 struct timerlat_aa_context { 105 int nr_cpus; 106 int dump_tasks; 107 108 /* per CPU data */ 109 struct timerlat_aa_data *taa_data; 110 111 /* 112 * required to translate function names and register 113 * events. 114 */ 115 struct osnoise_tool *tool; 116 }; 117 118 /* 119 * The data is stored as a local variable, but accessed via a helper function. 120 * 121 * It could be stored inside the trace context. But every access would 122 * require container_of() + a series of pointers. Do we need it? Not sure. 123 * 124 * For now keep it simple. If needed, store it in the tool, add the *context 125 * as a parameter in timerlat_aa_get_ctx() and do the magic there. 126 */ 127 static struct timerlat_aa_context *__timerlat_aa_ctx; 128 129 static struct timerlat_aa_context *timerlat_aa_get_ctx(void) 130 { 131 return __timerlat_aa_ctx; 132 } 133 134 /* 135 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context 136 */ 137 static struct timerlat_aa_data 138 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) 139 { 140 return &taa_ctx->taa_data[cpu]; 141 } 142 143 /* 144 * timerlat_aa_irq_latency - Handles timerlat IRQ event 145 */ 146 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, 147 struct trace_seq *s, struct tep_record *record, 148 struct tep_event *event) 149 { 150 /* 151 * For interference, we start now looking for things that can delay 152 * the thread. 153 */ 154 taa_data->curr_state = TIMERLAT_WAITING_THREAD; 155 taa_data->tlat_irq_timstamp = record->ts; 156 157 /* 158 * Zero values. 159 */ 160 taa_data->thread_nmi_sum = 0; 161 taa_data->thread_irq_sum = 0; 162 taa_data->thread_softirq_sum = 0; 163 taa_data->thread_thread_sum = 0; 164 taa_data->thread_blocking_duration = 0; 165 taa_data->timer_irq_start_time = 0; 166 taa_data->timer_irq_duration = 0; 167 taa_data->timer_exit_from_idle = 0; 168 169 /* 170 * Zero interference tasks. 171 */ 172 trace_seq_reset(taa_data->nmi_seq); 173 trace_seq_reset(taa_data->irqs_seq); 174 trace_seq_reset(taa_data->softirqs_seq); 175 trace_seq_reset(taa_data->threads_seq); 176 177 /* IRQ latency values */ 178 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); 179 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); 180 181 /* The thread that can cause blocking */ 182 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); 183 184 /* 185 * Get exit from idle case. 186 * 187 * If it is not idle thread: 188 */ 189 if (taa_data->run_thread_pid) 190 return 0; 191 192 /* 193 * if the latency is shorter than the known exit from idle: 194 */ 195 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) 196 return 0; 197 198 /* 199 * To be safe, ignore the cases in which an IRQ/NMI could have 200 * interfered with the timerlat IRQ. 201 */ 202 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency 203 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 204 return 0; 205 206 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; 207 208 return 0; 209 } 210 211 /* 212 * timerlat_aa_thread_latency - Handles timerlat thread event 213 */ 214 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, 215 struct trace_seq *s, struct tep_record *record, 216 struct tep_event *event) 217 { 218 /* 219 * For interference, we start now looking for things that can delay 220 * the IRQ of the next cycle. 221 */ 222 taa_data->curr_state = TIMERLAT_WAITING_IRQ; 223 taa_data->tlat_thread_timstamp = record->ts; 224 225 /* Thread latency values */ 226 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); 227 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); 228 229 return 0; 230 } 231 232 /* 233 * timerlat_aa_handler - Handle timerlat events 234 * 235 * This function is called to handle timerlat events recording statistics. 236 * 237 * Returns 0 on success, -1 otherwise. 238 */ 239 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, 240 struct tep_event *event, void *context) 241 { 242 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 243 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 244 unsigned long long thread; 245 246 if (!taa_data) 247 return -1; 248 249 tep_get_field_val(s, event, "context", record, &thread, 1); 250 if (!thread) 251 return timerlat_aa_irq_latency(taa_data, s, record, event); 252 else 253 return timerlat_aa_thread_latency(taa_data, s, record, event); 254 } 255 256 /* 257 * timerlat_aa_nmi_handler - Handles NMI noise 258 * 259 * It is used to collect information about interferences from NMI. It is 260 * hooked to the osnoise:nmi_noise event. 261 */ 262 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, 263 struct tep_event *event, void *context) 264 { 265 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 266 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 267 unsigned long long duration; 268 unsigned long long start; 269 270 tep_get_field_val(s, event, "duration", record, &duration, 1); 271 tep_get_field_val(s, event, "start", record, &start, 1); 272 273 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 274 taa_data->prev_irq_duration = duration; 275 taa_data->prev_irq_timstamp = start; 276 277 trace_seq_reset(taa_data->prev_irqs_seq); 278 trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", 279 "nmi", 280 24, spaces, 281 ns_to_usf(duration)); 282 return 0; 283 } 284 285 taa_data->thread_nmi_sum += duration; 286 trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", 287 "nmi", 288 24, spaces, ns_to_usf(duration)); 289 290 return 0; 291 } 292 293 /* 294 * timerlat_aa_irq_handler - Handles IRQ noise 295 * 296 * It is used to collect information about interferences from IRQ. It is 297 * hooked to the osnoise:irq_noise event. 298 * 299 * It is a little bit more complex than the other because it measures: 300 * - The IRQs that can delay the timer IRQ before it happened. 301 * - The Timerlat IRQ handler 302 * - The IRQs that happened between the timerlat IRQ and the timerlat thread 303 * (IRQ interference). 304 */ 305 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, 306 struct tep_event *event, void *context) 307 { 308 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 309 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 310 unsigned long long expected_start; 311 unsigned long long duration; 312 unsigned long long vector; 313 unsigned long long start; 314 char *desc; 315 int val; 316 317 tep_get_field_val(s, event, "duration", record, &duration, 1); 318 tep_get_field_val(s, event, "start", record, &start, 1); 319 tep_get_field_val(s, event, "vector", record, &vector, 1); 320 desc = tep_get_field_raw(s, event, "desc", record, &val, 1); 321 322 /* 323 * Before the timerlat IRQ. 324 */ 325 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 326 taa_data->prev_irq_duration = duration; 327 taa_data->prev_irq_timstamp = start; 328 329 trace_seq_reset(taa_data->prev_irqs_seq); 330 trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 331 desc, vector, 332 15, spaces, 333 ns_to_usf(duration)); 334 return 0; 335 } 336 337 /* 338 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at 339 * the timerlat irq handler. 340 */ 341 if (!taa_data->timer_irq_start_time) { 342 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; 343 344 taa_data->timer_irq_start_time = start; 345 taa_data->timer_irq_duration = duration; 346 347 /* 348 * We are dealing with two different clock sources: the 349 * external clock source that timerlat uses as a reference 350 * and the clock used by the tracer. There are also two 351 * moments: the time reading the clock and the timer in 352 * which the event is placed in the buffer (the trace 353 * event timestamp). If the processor is slow or there 354 * is some hardware noise, the difference between the 355 * timestamp and the external clock read can be longer 356 * than the IRQ handler delay, resulting in a negative 357 * time. If so, set IRQ start delay as 0. In the end, 358 * it is less relevant than the noise. 359 */ 360 if (expected_start < taa_data->timer_irq_start_time) 361 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; 362 else 363 taa_data->timer_irq_start_delay = 0; 364 365 /* 366 * not exit from idle. 367 */ 368 if (taa_data->run_thread_pid) 369 return 0; 370 371 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 372 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; 373 374 return 0; 375 } 376 377 /* 378 * IRQ interference. 379 */ 380 taa_data->thread_irq_sum += duration; 381 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 382 desc, vector, 383 24, spaces, 384 ns_to_usf(duration)); 385 386 return 0; 387 } 388 389 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", 390 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; 391 392 393 /* 394 * timerlat_aa_softirq_handler - Handles Softirq noise 395 * 396 * It is used to collect information about interferences from Softirq. It is 397 * hooked to the osnoise:softirq_noise event. 398 * 399 * It is only printed in the non-rt kernel, as softirqs become thread on RT. 400 */ 401 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, 402 struct tep_event *event, void *context) 403 { 404 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 405 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 406 unsigned long long duration; 407 unsigned long long vector; 408 unsigned long long start; 409 410 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 411 return 0; 412 413 tep_get_field_val(s, event, "duration", record, &duration, 1); 414 tep_get_field_val(s, event, "start", record, &start, 1); 415 tep_get_field_val(s, event, "vector", record, &vector, 1); 416 417 taa_data->thread_softirq_sum += duration; 418 419 trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", 420 softirq_name[vector], vector, 421 24, spaces, 422 ns_to_usf(duration)); 423 return 0; 424 } 425 426 /* 427 * timerlat_aa_softirq_handler - Handles thread noise 428 * 429 * It is used to collect information about interferences from threads. It is 430 * hooked to the osnoise:thread_noise event. 431 * 432 * Note: if you see thread noise, your timerlat thread was not the highest prio one. 433 */ 434 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, 435 struct tep_event *event, void *context) 436 { 437 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 438 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 439 unsigned long long duration; 440 unsigned long long start; 441 unsigned long long pid; 442 const char *comm; 443 int val; 444 445 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 446 return 0; 447 448 tep_get_field_val(s, event, "duration", record, &duration, 1); 449 tep_get_field_val(s, event, "start", record, &start, 1); 450 451 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); 452 comm = tep_get_field_raw(s, event, "comm", record, &val, 1); 453 454 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { 455 taa_data->thread_blocking_duration = duration; 456 457 if (comm) 458 strncpy(taa_data->run_thread_comm, comm, MAX_COMM); 459 else 460 sprintf(taa_data->run_thread_comm, "<...>"); 461 462 } else { 463 taa_data->thread_thread_sum += duration; 464 465 trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", 466 comm, pid, 467 15, spaces, 468 ns_to_usf(duration)); 469 } 470 471 return 0; 472 } 473 474 /* 475 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace 476 * 477 * Saves and parse the stack trace generated by the timerlat IRQ. 478 */ 479 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, 480 struct tep_event *event, void *context) 481 { 482 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 483 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 484 unsigned long *caller; 485 const char *function; 486 int val, i; 487 488 trace_seq_reset(taa_data->stack_seq); 489 490 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); 491 caller = tep_get_field_raw(s, event, "caller", record, &val, 1); 492 if (caller) { 493 for (i = 0; ; i++) { 494 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); 495 if (!function) 496 break; 497 trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", 498 14, spaces, function); 499 } 500 } 501 return 0; 502 } 503 504 /* 505 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU 506 * 507 * Handles the sched:sched_switch event to trace the current thread running on the 508 * CPU. It is used to display the threads running on the other CPUs when the trace 509 * stops. 510 */ 511 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, 512 struct tep_event *event, void *context) 513 { 514 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 515 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 516 const char *comm; 517 int val; 518 519 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); 520 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); 521 522 strncpy(taa_data->current_comm, comm, MAX_COMM); 523 524 /* 525 * If this was a kworker, clean the last kworkers that ran. 526 */ 527 taa_data->kworker = 0; 528 taa_data->kworker_func = 0; 529 530 return 0; 531 } 532 533 /* 534 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU 535 * 536 * Handles workqueue:workqueue_execute_start event, keeping track of 537 * the job that a kworker could be doing in the CPU. 538 * 539 * We already catch problems of hardware related latencies caused by work queues 540 * running driver code that causes hardware stall. For example, with DRM drivers. 541 */ 542 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, 543 struct tep_event *event, void *context) 544 { 545 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 546 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 547 548 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); 549 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); 550 return 0; 551 } 552 553 /* 554 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing 555 * 556 * This is the core of the analysis. 557 */ 558 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, 559 int irq_thresh, int thread_thresh) 560 { 561 long long exp_irq_ts; 562 int total; 563 int irq; 564 565 /* 566 * IRQ latency or Thread latency? 567 */ 568 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { 569 irq = 1; 570 total = taa_data->tlat_irq_latency; 571 } else { 572 irq = 0; 573 total = taa_data->tlat_thread_latency; 574 } 575 576 /* 577 * Expected IRQ arrival time using the trace clock as the base. 578 * 579 * TODO: Add a list of previous IRQ, and then run the list backwards. 580 */ 581 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; 582 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { 583 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) 584 printf(" Previous IRQ interference: %.*s up to %9.2f us\n", 585 16, spaces, 586 ns_to_usf(taa_data->prev_irq_duration)); 587 } 588 589 /* 590 * The delay that the IRQ suffered before starting. 591 */ 592 printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, 593 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", 594 ns_to_usf(taa_data->timer_irq_start_delay), 595 ns_to_per(total, taa_data->timer_irq_start_delay)); 596 597 /* 598 * Timerlat IRQ. 599 */ 600 printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, 601 ns_to_usf(taa_data->tlat_irq_latency)); 602 603 if (irq) { 604 /* 605 * If the trace stopped due to IRQ, the other events will not happen 606 * because... the trace stopped :-). 607 * 608 * That is all folks, the stack trace was printed before the stop, 609 * so it will be displayed, it is the key. 610 */ 611 printf(" Blocking thread:\n"); 612 printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, 613 taa_data->run_thread_pid); 614 } else { 615 /* 616 * The duration of the IRQ handler that handled the timerlat IRQ. 617 */ 618 printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", 619 30, spaces, 620 ns_to_usf(taa_data->timer_irq_duration), 621 ns_to_per(total, taa_data->timer_irq_duration)); 622 623 /* 624 * The amount of time that the current thread postponed the scheduler. 625 * 626 * Recalling that it is net from NMI/IRQ/Softirq interference, so there 627 * is no need to compute values here. 628 */ 629 printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, 630 ns_to_usf(taa_data->thread_blocking_duration), 631 ns_to_per(total, taa_data->thread_blocking_duration)); 632 633 printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, 634 taa_data->run_thread_comm, taa_data->run_thread_pid, 635 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); 636 } 637 638 /* 639 * Print the stack trace! 640 */ 641 trace_seq_do_printf(taa_data->stack_seq); 642 643 /* 644 * NMIs can happen during the IRQ, so they are always possible. 645 */ 646 if (taa_data->thread_nmi_sum) 647 printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, 648 ns_to_usf(taa_data->thread_nmi_sum), 649 ns_to_per(total, taa_data->thread_nmi_sum)); 650 651 /* 652 * If it is an IRQ latency, the other factors can be skipped. 653 */ 654 if (irq) 655 goto print_total; 656 657 /* 658 * Prints the interference caused by IRQs to the thread latency. 659 */ 660 if (taa_data->thread_irq_sum) { 661 printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, 662 ns_to_usf(taa_data->thread_irq_sum), 663 ns_to_per(total, taa_data->thread_irq_sum)); 664 665 trace_seq_do_printf(taa_data->irqs_seq); 666 } 667 668 /* 669 * Prints the interference caused by Softirqs to the thread latency. 670 */ 671 if (taa_data->thread_softirq_sum) { 672 printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, 673 ns_to_usf(taa_data->thread_softirq_sum), 674 ns_to_per(total, taa_data->thread_softirq_sum)); 675 676 trace_seq_do_printf(taa_data->softirqs_seq); 677 } 678 679 /* 680 * Prints the interference caused by other threads to the thread latency. 681 * 682 * If this happens, your timerlat is not the highest prio. OK, migration 683 * thread can happen. But otherwise, you are not measuring the "scheduling 684 * latency" only, and here is the difference from scheduling latency and 685 * timer handling latency. 686 */ 687 if (taa_data->thread_thread_sum) { 688 printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, 689 ns_to_usf(taa_data->thread_thread_sum), 690 ns_to_per(total, taa_data->thread_thread_sum)); 691 692 trace_seq_do_printf(taa_data->threads_seq); 693 } 694 695 /* 696 * Done. 697 */ 698 print_total: 699 printf("------------------------------------------------------------------------\n"); 700 printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", 701 37, spaces, ns_to_usf(total)); 702 } 703 704 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) 705 { 706 struct trace_instance *trace = &taa_ctx->tool->trace; 707 int retval; 708 709 retval = tracefs_iterate_raw_events(trace->tep, 710 trace->inst, 711 NULL, 712 0, 713 collect_registered_events, 714 trace); 715 if (retval < 0) { 716 err_msg("Error iterating on events\n"); 717 return 0; 718 } 719 720 return 1; 721 } 722 723 /** 724 * timerlat_auto_analysis - Analyze the collected data 725 */ 726 void timerlat_auto_analysis(int irq_thresh, int thread_thresh) 727 { 728 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 729 unsigned long long max_exit_from_idle = 0; 730 struct timerlat_aa_data *taa_data; 731 int max_exit_from_idle_cpu; 732 struct tep_handle *tep; 733 int cpu; 734 735 timerlat_auto_analysis_collect_trace(taa_ctx); 736 737 /* bring stop tracing to the ns scale */ 738 irq_thresh = irq_thresh * 1000; 739 thread_thresh = thread_thresh * 1000; 740 741 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { 742 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 743 744 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { 745 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 746 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 747 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { 748 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 749 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 750 } 751 752 if (taa_data->max_exit_idle_latency > max_exit_from_idle) { 753 max_exit_from_idle = taa_data->max_exit_idle_latency; 754 max_exit_from_idle_cpu = cpu; 755 } 756 757 } 758 759 if (max_exit_from_idle) { 760 printf("\n"); 761 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", 762 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); 763 } 764 if (!taa_ctx->dump_tasks) 765 return; 766 767 printf("\n"); 768 printf("Printing CPU tasks:\n"); 769 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { 770 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 771 tep = taa_ctx->tool->trace.tep; 772 773 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); 774 775 if (taa_data->kworker_func) 776 printf(" kworker:%s:%s", 777 tep_find_function(tep, taa_data->kworker) ? : "<...>", 778 tep_find_function(tep, taa_data->kworker_func)); 779 printf("\n"); 780 } 781 782 } 783 784 /* 785 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data 786 */ 787 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) 788 { 789 struct timerlat_aa_data *taa_data; 790 int i; 791 792 if (!taa_ctx->taa_data) 793 return; 794 795 for (i = 0; i < taa_ctx->nr_cpus; i++) { 796 taa_data = timerlat_aa_get_data(taa_ctx, i); 797 798 if (taa_data->prev_irqs_seq) { 799 trace_seq_destroy(taa_data->prev_irqs_seq); 800 free(taa_data->prev_irqs_seq); 801 } 802 803 if (taa_data->nmi_seq) { 804 trace_seq_destroy(taa_data->nmi_seq); 805 free(taa_data->nmi_seq); 806 } 807 808 if (taa_data->irqs_seq) { 809 trace_seq_destroy(taa_data->irqs_seq); 810 free(taa_data->irqs_seq); 811 } 812 813 if (taa_data->softirqs_seq) { 814 trace_seq_destroy(taa_data->softirqs_seq); 815 free(taa_data->softirqs_seq); 816 } 817 818 if (taa_data->threads_seq) { 819 trace_seq_destroy(taa_data->threads_seq); 820 free(taa_data->threads_seq); 821 } 822 823 if (taa_data->stack_seq) { 824 trace_seq_destroy(taa_data->stack_seq); 825 free(taa_data->stack_seq); 826 } 827 } 828 } 829 830 /* 831 * timerlat_aa_init_seqs - Init seq files used to store parsed information 832 * 833 * Instead of keeping data structures to store raw data, use seq files to 834 * store parsed data. 835 * 836 * Allocates and initialize seq files. 837 * 838 * Returns 0 on success, -1 otherwise. 839 */ 840 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) 841 { 842 struct timerlat_aa_data *taa_data; 843 int i; 844 845 for (i = 0; i < taa_ctx->nr_cpus; i++) { 846 847 taa_data = timerlat_aa_get_data(taa_ctx, i); 848 849 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); 850 if (!taa_data->prev_irqs_seq) 851 goto out_err; 852 853 trace_seq_init(taa_data->prev_irqs_seq); 854 855 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); 856 if (!taa_data->nmi_seq) 857 goto out_err; 858 859 trace_seq_init(taa_data->nmi_seq); 860 861 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); 862 if (!taa_data->irqs_seq) 863 goto out_err; 864 865 trace_seq_init(taa_data->irqs_seq); 866 867 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); 868 if (!taa_data->softirqs_seq) 869 goto out_err; 870 871 trace_seq_init(taa_data->softirqs_seq); 872 873 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); 874 if (!taa_data->threads_seq) 875 goto out_err; 876 877 trace_seq_init(taa_data->threads_seq); 878 879 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); 880 if (!taa_data->stack_seq) 881 goto out_err; 882 883 trace_seq_init(taa_data->stack_seq); 884 } 885 886 return 0; 887 888 out_err: 889 timerlat_aa_destroy_seqs(taa_ctx); 890 return -1; 891 } 892 893 /* 894 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis 895 */ 896 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) 897 { 898 899 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 900 timerlat_aa_handler, tool); 901 902 tracefs_event_disable(tool->trace.inst, "osnoise", NULL); 903 904 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 905 timerlat_aa_nmi_handler, tool); 906 907 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 908 timerlat_aa_irq_handler, tool); 909 910 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 911 timerlat_aa_softirq_handler, tool); 912 913 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 914 timerlat_aa_thread_handler, tool); 915 916 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 917 timerlat_aa_stack_handler, tool); 918 if (!dump_tasks) 919 return; 920 921 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); 922 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 923 timerlat_aa_sched_switch_handler, tool); 924 925 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 926 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 927 timerlat_aa_kworker_start_handler, tool); 928 } 929 930 /* 931 * timerlat_aa_register_events - Register events used in the auto-analysis 932 * 933 * Returns 0 on success, -1 otherwise. 934 */ 935 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) 936 { 937 int retval; 938 939 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 940 timerlat_aa_handler, tool); 941 942 943 /* 944 * register auto-analysis handlers. 945 */ 946 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); 947 if (retval < 0 && !errno) { 948 err_msg("Could not find osnoise events\n"); 949 goto out_err; 950 } 951 952 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 953 timerlat_aa_nmi_handler, tool); 954 955 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 956 timerlat_aa_irq_handler, tool); 957 958 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 959 timerlat_aa_softirq_handler, tool); 960 961 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 962 timerlat_aa_thread_handler, tool); 963 964 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 965 timerlat_aa_stack_handler, tool); 966 967 if (!dump_tasks) 968 return 0; 969 970 /* 971 * Dump task events. 972 */ 973 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); 974 if (retval < 0 && !errno) { 975 err_msg("Could not find sched_switch\n"); 976 goto out_err; 977 } 978 979 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 980 timerlat_aa_sched_switch_handler, tool); 981 982 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 983 if (retval < 0 && !errno) { 984 err_msg("Could not find workqueue_execute_start\n"); 985 goto out_err; 986 } 987 988 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 989 timerlat_aa_kworker_start_handler, tool); 990 991 return 0; 992 993 out_err: 994 timerlat_aa_unregister_events(tool, dump_tasks); 995 return -1; 996 } 997 998 /** 999 * timerlat_aa_destroy - Destroy timerlat auto-analysis 1000 */ 1001 void timerlat_aa_destroy(void) 1002 { 1003 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 1004 1005 if (!taa_ctx) 1006 return; 1007 1008 if (!taa_ctx->taa_data) 1009 goto out_ctx; 1010 1011 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); 1012 timerlat_aa_destroy_seqs(taa_ctx); 1013 free(taa_ctx->taa_data); 1014 out_ctx: 1015 free(taa_ctx); 1016 } 1017 1018 /** 1019 * timerlat_aa_init - Initialize timerlat auto-analysis 1020 * 1021 * Returns 0 on success, -1 otherwise. 1022 */ 1023 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) 1024 { 1025 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 1026 struct timerlat_aa_context *taa_ctx; 1027 int retval; 1028 1029 taa_ctx = calloc(1, sizeof(*taa_ctx)); 1030 if (!taa_ctx) 1031 return -1; 1032 1033 __timerlat_aa_ctx = taa_ctx; 1034 1035 taa_ctx->nr_cpus = nr_cpus; 1036 taa_ctx->tool = tool; 1037 taa_ctx->dump_tasks = dump_tasks; 1038 1039 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); 1040 if (!taa_ctx->taa_data) 1041 goto out_err; 1042 1043 retval = timerlat_aa_init_seqs(taa_ctx); 1044 if (retval) 1045 goto out_err; 1046 1047 retval = timerlat_aa_register_events(tool, dump_tasks); 1048 if (retval) 1049 goto out_err; 1050 1051 return 0; 1052 1053 out_err: 1054 timerlat_aa_destroy(); 1055 return -1; 1056 } 1057