1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 */ 5 6 #include <stdlib.h> 7 #include <errno.h> 8 #include "utils.h" 9 #include "osnoise.h" 10 #include "timerlat.h" 11 #include <unistd.h> 12 13 enum timelat_state { 14 TIMERLAT_INIT = 0, 15 TIMERLAT_WAITING_IRQ, 16 TIMERLAT_WAITING_THREAD, 17 }; 18 19 #define MAX_COMM 24 20 21 /* 22 * Per-cpu data statistics and data. 23 */ 24 struct timerlat_aa_data { 25 /* Current CPU state */ 26 int curr_state; 27 28 /* timerlat IRQ latency */ 29 unsigned long long tlat_irq_seqnum; 30 unsigned long long tlat_irq_latency; 31 unsigned long long tlat_irq_timstamp; 32 33 /* timerlat Thread latency */ 34 unsigned long long tlat_thread_seqnum; 35 unsigned long long tlat_thread_latency; 36 unsigned long long tlat_thread_timstamp; 37 38 /* 39 * Information about the thread running when the IRQ 40 * arrived. 41 * 42 * This can be blocking or interference, depending on the 43 * priority of the thread. Assuming timerlat is the highest 44 * prio, it is blocking. If timerlat has a lower prio, it is 45 * interference. 46 * note: "unsigned long long" because they are fetch using tep_get_field_val(); 47 */ 48 unsigned long long run_thread_pid; 49 char run_thread_comm[MAX_COMM]; 50 unsigned long long thread_blocking_duration; 51 unsigned long long max_exit_idle_latency; 52 53 /* Information about the timerlat timer irq */ 54 unsigned long long timer_irq_start_time; 55 unsigned long long timer_irq_start_delay; 56 unsigned long long timer_irq_duration; 57 unsigned long long timer_exit_from_idle; 58 59 /* 60 * Information about the last IRQ before the timerlat irq 61 * arrived. 62 * 63 * If now - timestamp is <= latency, it might have influenced 64 * in the timerlat irq latency. Otherwise, ignore it. 65 */ 66 unsigned long long prev_irq_duration; 67 unsigned long long prev_irq_timstamp; 68 69 /* 70 * Interference sum. 71 */ 72 unsigned long long thread_nmi_sum; 73 unsigned long long thread_irq_sum; 74 unsigned long long thread_softirq_sum; 75 unsigned long long thread_thread_sum; 76 77 /* 78 * Interference task information. 79 */ 80 struct trace_seq *prev_irqs_seq; 81 struct trace_seq *nmi_seq; 82 struct trace_seq *irqs_seq; 83 struct trace_seq *softirqs_seq; 84 struct trace_seq *threads_seq; 85 struct trace_seq *stack_seq; 86 87 /* 88 * Current thread. 89 */ 90 char current_comm[MAX_COMM]; 91 unsigned long long current_pid; 92 93 /* 94 * Is the system running a kworker? 95 */ 96 unsigned long long kworker; 97 unsigned long long kworker_func; 98 }; 99 100 /* 101 * The analysis context and system wide view 102 */ 103 struct timerlat_aa_context { 104 int nr_cpus; 105 int dump_tasks; 106 107 /* per CPU data */ 108 struct timerlat_aa_data *taa_data; 109 110 /* 111 * required to translate function names and register 112 * events. 113 */ 114 struct osnoise_tool *tool; 115 }; 116 117 /* 118 * The data is stored as a local variable, but accessed via a helper function. 119 * 120 * It could be stored inside the trace context. But every access would 121 * require container_of() + a series of pointers. Do we need it? Not sure. 122 * 123 * For now keep it simple. If needed, store it in the tool, add the *context 124 * as a parameter in timerlat_aa_get_ctx() and do the magic there. 125 */ 126 static struct timerlat_aa_context *__timerlat_aa_ctx; 127 128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void) 129 { 130 return __timerlat_aa_ctx; 131 } 132 133 /* 134 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context 135 */ 136 static struct timerlat_aa_data 137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) 138 { 139 return &taa_ctx->taa_data[cpu]; 140 } 141 142 /* 143 * timerlat_aa_irq_latency - Handles timerlat IRQ event 144 */ 145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, 146 struct trace_seq *s, struct tep_record *record, 147 struct tep_event *event) 148 { 149 /* 150 * For interference, we start now looking for things that can delay 151 * the thread. 152 */ 153 taa_data->curr_state = TIMERLAT_WAITING_THREAD; 154 taa_data->tlat_irq_timstamp = record->ts; 155 156 /* 157 * Zero values. 158 */ 159 taa_data->thread_nmi_sum = 0; 160 taa_data->thread_irq_sum = 0; 161 taa_data->thread_softirq_sum = 0; 162 taa_data->thread_thread_sum = 0; 163 taa_data->thread_blocking_duration = 0; 164 taa_data->timer_irq_start_time = 0; 165 taa_data->timer_irq_duration = 0; 166 taa_data->timer_exit_from_idle = 0; 167 168 /* 169 * Zero interference tasks. 170 */ 171 trace_seq_reset(taa_data->nmi_seq); 172 trace_seq_reset(taa_data->irqs_seq); 173 trace_seq_reset(taa_data->softirqs_seq); 174 trace_seq_reset(taa_data->threads_seq); 175 176 /* IRQ latency values */ 177 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); 178 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); 179 180 /* The thread that can cause blocking */ 181 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); 182 183 /* 184 * Get exit from idle case. 185 * 186 * If it is not idle thread: 187 */ 188 if (taa_data->run_thread_pid) 189 return 0; 190 191 /* 192 * if the latency is shorter than the known exit from idle: 193 */ 194 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) 195 return 0; 196 197 /* 198 * To be safe, ignore the cases in which an IRQ/NMI could have 199 * interfered with the timerlat IRQ. 200 */ 201 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency 202 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 203 return 0; 204 205 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; 206 207 return 0; 208 } 209 210 /* 211 * timerlat_aa_thread_latency - Handles timerlat thread event 212 */ 213 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, 214 struct trace_seq *s, struct tep_record *record, 215 struct tep_event *event) 216 { 217 /* 218 * For interference, we start now looking for things that can delay 219 * the IRQ of the next cycle. 220 */ 221 taa_data->curr_state = TIMERLAT_WAITING_IRQ; 222 taa_data->tlat_thread_timstamp = record->ts; 223 224 /* Thread latency values */ 225 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); 226 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); 227 228 return 0; 229 } 230 231 /* 232 * timerlat_aa_handler - Handle timerlat events 233 * 234 * This function is called to handle timerlat events recording statistics. 235 * 236 * Returns 0 on success, -1 otherwise. 237 */ 238 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, 239 struct tep_event *event, void *context) 240 { 241 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 242 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 243 unsigned long long thread; 244 245 if (!taa_data) 246 return -1; 247 248 tep_get_field_val(s, event, "context", record, &thread, 1); 249 if (!thread) 250 return timerlat_aa_irq_latency(taa_data, s, record, event); 251 else 252 return timerlat_aa_thread_latency(taa_data, s, record, event); 253 } 254 255 /* 256 * timerlat_aa_nmi_handler - Handles NMI noise 257 * 258 * It is used to collect information about interferences from NMI. It is 259 * hooked to the osnoise:nmi_noise event. 260 */ 261 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, 262 struct tep_event *event, void *context) 263 { 264 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 265 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 266 unsigned long long duration; 267 unsigned long long start; 268 269 tep_get_field_val(s, event, "duration", record, &duration, 1); 270 tep_get_field_val(s, event, "start", record, &start, 1); 271 272 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 273 taa_data->prev_irq_duration = duration; 274 taa_data->prev_irq_timstamp = start; 275 276 trace_seq_reset(taa_data->prev_irqs_seq); 277 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n", 278 "nmi", ns_to_usf(duration)); 279 return 0; 280 } 281 282 taa_data->thread_nmi_sum += duration; 283 trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n", 284 "nmi", ns_to_usf(duration)); 285 286 return 0; 287 } 288 289 /* 290 * timerlat_aa_irq_handler - Handles IRQ noise 291 * 292 * It is used to collect information about interferences from IRQ. It is 293 * hooked to the osnoise:irq_noise event. 294 * 295 * It is a little bit more complex than the other because it measures: 296 * - The IRQs that can delay the timer IRQ before it happened. 297 * - The Timerlat IRQ handler 298 * - The IRQs that happened between the timerlat IRQ and the timerlat thread 299 * (IRQ interference). 300 */ 301 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, 302 struct tep_event *event, void *context) 303 { 304 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 305 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 306 unsigned long long expected_start; 307 unsigned long long duration; 308 unsigned long long vector; 309 unsigned long long start; 310 char *desc; 311 int val; 312 313 tep_get_field_val(s, event, "duration", record, &duration, 1); 314 tep_get_field_val(s, event, "start", record, &start, 1); 315 tep_get_field_val(s, event, "vector", record, &vector, 1); 316 desc = tep_get_field_raw(s, event, "desc", record, &val, 1); 317 318 /* 319 * Before the timerlat IRQ. 320 */ 321 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { 322 taa_data->prev_irq_duration = duration; 323 taa_data->prev_irq_timstamp = start; 324 325 trace_seq_reset(taa_data->prev_irqs_seq); 326 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n", 327 desc, vector, ns_to_usf(duration)); 328 return 0; 329 } 330 331 /* 332 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at 333 * the timerlat irq handler. 334 */ 335 if (!taa_data->timer_irq_start_time) { 336 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; 337 338 taa_data->timer_irq_start_time = start; 339 taa_data->timer_irq_duration = duration; 340 341 /* 342 * We are dealing with two different clock sources: the 343 * external clock source that timerlat uses as a reference 344 * and the clock used by the tracer. There are also two 345 * moments: the time reading the clock and the timer in 346 * which the event is placed in the buffer (the trace 347 * event timestamp). If the processor is slow or there 348 * is some hardware noise, the difference between the 349 * timestamp and the external clock read can be longer 350 * than the IRQ handler delay, resulting in a negative 351 * time. If so, set IRQ start delay as 0. In the end, 352 * it is less relevant than the noise. 353 */ 354 if (expected_start < taa_data->timer_irq_start_time) 355 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; 356 else 357 taa_data->timer_irq_start_delay = 0; 358 359 /* 360 * not exit from idle. 361 */ 362 if (taa_data->run_thread_pid) 363 return 0; 364 365 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) 366 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; 367 368 return 0; 369 } 370 371 /* 372 * IRQ interference. 373 */ 374 taa_data->thread_irq_sum += duration; 375 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n", 376 desc, vector, ns_to_usf(duration)); 377 378 return 0; 379 } 380 381 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", 382 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; 383 384 385 /* 386 * timerlat_aa_softirq_handler - Handles Softirq noise 387 * 388 * It is used to collect information about interferences from Softirq. It is 389 * hooked to the osnoise:softirq_noise event. 390 * 391 * It is only printed in the non-rt kernel, as softirqs become thread on RT. 392 */ 393 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, 394 struct tep_event *event, void *context) 395 { 396 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 397 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 398 unsigned long long duration; 399 unsigned long long vector; 400 unsigned long long start; 401 402 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 403 return 0; 404 405 tep_get_field_val(s, event, "duration", record, &duration, 1); 406 tep_get_field_val(s, event, "start", record, &start, 1); 407 tep_get_field_val(s, event, "vector", record, &vector, 1); 408 409 taa_data->thread_softirq_sum += duration; 410 411 trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n", 412 softirq_name[vector], vector, ns_to_usf(duration)); 413 return 0; 414 } 415 416 /* 417 * timerlat_aa_softirq_handler - Handles thread noise 418 * 419 * It is used to collect information about interferences from threads. It is 420 * hooked to the osnoise:thread_noise event. 421 * 422 * Note: if you see thread noise, your timerlat thread was not the highest prio one. 423 */ 424 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, 425 struct tep_event *event, void *context) 426 { 427 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 428 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 429 unsigned long long duration; 430 unsigned long long start; 431 unsigned long long pid; 432 const char *comm; 433 int val; 434 435 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) 436 return 0; 437 438 tep_get_field_val(s, event, "duration", record, &duration, 1); 439 tep_get_field_val(s, event, "start", record, &start, 1); 440 441 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); 442 comm = tep_get_field_raw(s, event, "comm", record, &val, 1); 443 444 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { 445 taa_data->thread_blocking_duration = duration; 446 447 if (comm) 448 strncpy(taa_data->run_thread_comm, comm, MAX_COMM); 449 else 450 sprintf(taa_data->run_thread_comm, "<...>"); 451 452 } else { 453 taa_data->thread_thread_sum += duration; 454 455 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n", 456 comm, pid, ns_to_usf(duration)); 457 } 458 459 return 0; 460 } 461 462 /* 463 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace 464 * 465 * Saves and parse the stack trace generated by the timerlat IRQ. 466 */ 467 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, 468 struct tep_event *event, void *context) 469 { 470 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 471 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 472 unsigned long *caller; 473 const char *function; 474 int val, i; 475 476 trace_seq_reset(taa_data->stack_seq); 477 478 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); 479 caller = tep_get_field_raw(s, event, "caller", record, &val, 1); 480 if (caller) { 481 for (i = 0; ; i++) { 482 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); 483 if (!function) 484 break; 485 trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function); 486 } 487 } 488 return 0; 489 } 490 491 /* 492 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU 493 * 494 * Handles the sched:sched_switch event to trace the current thread running on the 495 * CPU. It is used to display the threads running on the other CPUs when the trace 496 * stops. 497 */ 498 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, 499 struct tep_event *event, void *context) 500 { 501 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 502 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 503 const char *comm; 504 int val; 505 506 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); 507 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); 508 509 strncpy(taa_data->current_comm, comm, MAX_COMM); 510 511 /* 512 * If this was a kworker, clean the last kworkers that ran. 513 */ 514 taa_data->kworker = 0; 515 taa_data->kworker_func = 0; 516 517 return 0; 518 } 519 520 /* 521 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU 522 * 523 * Handles workqueue:workqueue_execute_start event, keeping track of 524 * the job that a kworker could be doing in the CPU. 525 * 526 * We already catch problems of hardware related latencies caused by work queues 527 * running driver code that causes hardware stall. For example, with DRM drivers. 528 */ 529 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, 530 struct tep_event *event, void *context) 531 { 532 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 533 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); 534 535 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); 536 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); 537 return 0; 538 } 539 540 /* 541 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing 542 * 543 * This is the core of the analysis. 544 */ 545 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, 546 int irq_thresh, int thread_thresh) 547 { 548 long long exp_irq_ts; 549 int total; 550 int irq; 551 552 /* 553 * IRQ latency or Thread latency? 554 */ 555 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { 556 irq = 1; 557 total = taa_data->tlat_irq_latency; 558 } else { 559 irq = 0; 560 total = taa_data->tlat_thread_latency; 561 } 562 563 /* 564 * Expected IRQ arrival time using the trace clock as the base. 565 * 566 * TODO: Add a list of previous IRQ, and then run the list backwards. 567 */ 568 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; 569 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { 570 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) 571 printf(" Previous IRQ interference: \t\t up to %9.2f us\n", 572 ns_to_usf(taa_data->prev_irq_duration)); 573 } 574 575 /* 576 * The delay that the IRQ suffered before starting. 577 */ 578 printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n", 579 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", 580 ns_to_usf(taa_data->timer_irq_start_delay), 581 ns_to_per(total, taa_data->timer_irq_start_delay)); 582 583 /* 584 * Timerlat IRQ. 585 */ 586 printf(" IRQ latency: \t\t\t\t %9.2f us\n", 587 ns_to_usf(taa_data->tlat_irq_latency)); 588 589 if (irq) { 590 /* 591 * If the trace stopped due to IRQ, the other events will not happen 592 * because... the trace stopped :-). 593 * 594 * That is all folks, the stack trace was printed before the stop, 595 * so it will be displayed, it is the key. 596 */ 597 printf(" Blocking thread:\n"); 598 printf(" %24s:%-9llu\n", 599 taa_data->run_thread_comm, taa_data->run_thread_pid); 600 } else { 601 /* 602 * The duration of the IRQ handler that handled the timerlat IRQ. 603 */ 604 printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n", 605 ns_to_usf(taa_data->timer_irq_duration), 606 ns_to_per(total, taa_data->timer_irq_duration)); 607 608 /* 609 * The amount of time that the current thread postponed the scheduler. 610 * 611 * Recalling that it is net from NMI/IRQ/Softirq interference, so there 612 * is no need to compute values here. 613 */ 614 printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n", 615 ns_to_usf(taa_data->thread_blocking_duration), 616 ns_to_per(total, taa_data->thread_blocking_duration)); 617 618 printf(" %24s:%-9llu %9.2f us\n", 619 taa_data->run_thread_comm, taa_data->run_thread_pid, 620 ns_to_usf(taa_data->thread_blocking_duration)); 621 } 622 623 /* 624 * Print the stack trace! 625 */ 626 trace_seq_do_printf(taa_data->stack_seq); 627 628 /* 629 * NMIs can happen during the IRQ, so they are always possible. 630 */ 631 if (taa_data->thread_nmi_sum) 632 printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n", 633 ns_to_usf(taa_data->thread_nmi_sum), 634 ns_to_per(total, taa_data->thread_nmi_sum)); 635 636 /* 637 * If it is an IRQ latency, the other factors can be skipped. 638 */ 639 if (irq) 640 goto print_total; 641 642 /* 643 * Prints the interference caused by IRQs to the thread latency. 644 */ 645 if (taa_data->thread_irq_sum) { 646 printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n", 647 ns_to_usf(taa_data->thread_irq_sum), 648 ns_to_per(total, taa_data->thread_irq_sum)); 649 650 trace_seq_do_printf(taa_data->irqs_seq); 651 } 652 653 /* 654 * Prints the interference caused by Softirqs to the thread latency. 655 */ 656 if (taa_data->thread_softirq_sum) { 657 printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n", 658 ns_to_usf(taa_data->thread_softirq_sum), 659 ns_to_per(total, taa_data->thread_softirq_sum)); 660 661 trace_seq_do_printf(taa_data->softirqs_seq); 662 } 663 664 /* 665 * Prints the interference caused by other threads to the thread latency. 666 * 667 * If this happens, your timerlat is not the highest prio. OK, migration 668 * thread can happen. But otherwise, you are not measuring the "scheduling 669 * latency" only, and here is the difference from scheduling latency and 670 * timer handling latency. 671 */ 672 if (taa_data->thread_thread_sum) { 673 printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n", 674 ns_to_usf(taa_data->thread_thread_sum), 675 ns_to_per(total, taa_data->thread_thread_sum)); 676 677 trace_seq_do_printf(taa_data->threads_seq); 678 } 679 680 /* 681 * Done. 682 */ 683 print_total: 684 printf("------------------------------------------------------------------------\n"); 685 printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread", 686 ns_to_usf(total)); 687 } 688 689 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) 690 { 691 struct trace_instance *trace = &taa_ctx->tool->trace; 692 int retval; 693 694 retval = tracefs_iterate_raw_events(trace->tep, 695 trace->inst, 696 NULL, 697 0, 698 collect_registered_events, 699 trace); 700 if (retval < 0) { 701 err_msg("Error iterating on events\n"); 702 return 0; 703 } 704 705 return 1; 706 } 707 708 /** 709 * timerlat_auto_analysis - Analyze the collected data 710 */ 711 void timerlat_auto_analysis(int irq_thresh, int thread_thresh) 712 { 713 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 714 unsigned long long max_exit_from_idle = 0; 715 struct timerlat_aa_data *taa_data; 716 int max_exit_from_idle_cpu; 717 struct tep_handle *tep; 718 int cpu; 719 720 timerlat_auto_analysis_collect_trace(taa_ctx); 721 722 /* bring stop tracing to the ns scale */ 723 irq_thresh = irq_thresh * 1000; 724 thread_thresh = thread_thresh * 1000; 725 726 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { 727 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 728 729 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { 730 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 731 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 732 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { 733 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); 734 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); 735 } 736 737 if (taa_data->max_exit_idle_latency > max_exit_from_idle) { 738 max_exit_from_idle = taa_data->max_exit_idle_latency; 739 max_exit_from_idle_cpu = cpu; 740 } 741 742 } 743 744 if (max_exit_from_idle) { 745 printf("\n"); 746 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", 747 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); 748 } 749 if (!taa_ctx->dump_tasks) 750 return; 751 752 printf("\n"); 753 printf("Printing CPU tasks:\n"); 754 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { 755 taa_data = timerlat_aa_get_data(taa_ctx, cpu); 756 tep = taa_ctx->tool->trace.tep; 757 758 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); 759 760 if (taa_data->kworker_func) 761 printf(" kworker:%s:%s", 762 tep_find_function(tep, taa_data->kworker) ? : "<...>", 763 tep_find_function(tep, taa_data->kworker_func)); 764 printf("\n"); 765 } 766 767 } 768 769 /* 770 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data 771 */ 772 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) 773 { 774 struct timerlat_aa_data *taa_data; 775 int i; 776 777 if (!taa_ctx->taa_data) 778 return; 779 780 for (i = 0; i < taa_ctx->nr_cpus; i++) { 781 taa_data = timerlat_aa_get_data(taa_ctx, i); 782 783 if (taa_data->prev_irqs_seq) { 784 trace_seq_destroy(taa_data->prev_irqs_seq); 785 free(taa_data->prev_irqs_seq); 786 } 787 788 if (taa_data->nmi_seq) { 789 trace_seq_destroy(taa_data->nmi_seq); 790 free(taa_data->nmi_seq); 791 } 792 793 if (taa_data->irqs_seq) { 794 trace_seq_destroy(taa_data->irqs_seq); 795 free(taa_data->irqs_seq); 796 } 797 798 if (taa_data->softirqs_seq) { 799 trace_seq_destroy(taa_data->softirqs_seq); 800 free(taa_data->softirqs_seq); 801 } 802 803 if (taa_data->threads_seq) { 804 trace_seq_destroy(taa_data->threads_seq); 805 free(taa_data->threads_seq); 806 } 807 808 if (taa_data->stack_seq) { 809 trace_seq_destroy(taa_data->stack_seq); 810 free(taa_data->stack_seq); 811 } 812 } 813 } 814 815 /* 816 * timerlat_aa_init_seqs - Init seq files used to store parsed information 817 * 818 * Instead of keeping data structures to store raw data, use seq files to 819 * store parsed data. 820 * 821 * Allocates and initialize seq files. 822 * 823 * Returns 0 on success, -1 otherwise. 824 */ 825 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) 826 { 827 struct timerlat_aa_data *taa_data; 828 int i; 829 830 for (i = 0; i < taa_ctx->nr_cpus; i++) { 831 832 taa_data = timerlat_aa_get_data(taa_ctx, i); 833 834 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); 835 if (!taa_data->prev_irqs_seq) 836 goto out_err; 837 838 trace_seq_init(taa_data->prev_irqs_seq); 839 840 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); 841 if (!taa_data->nmi_seq) 842 goto out_err; 843 844 trace_seq_init(taa_data->nmi_seq); 845 846 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); 847 if (!taa_data->irqs_seq) 848 goto out_err; 849 850 trace_seq_init(taa_data->irqs_seq); 851 852 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); 853 if (!taa_data->softirqs_seq) 854 goto out_err; 855 856 trace_seq_init(taa_data->softirqs_seq); 857 858 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); 859 if (!taa_data->threads_seq) 860 goto out_err; 861 862 trace_seq_init(taa_data->threads_seq); 863 864 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); 865 if (!taa_data->stack_seq) 866 goto out_err; 867 868 trace_seq_init(taa_data->stack_seq); 869 } 870 871 return 0; 872 873 out_err: 874 timerlat_aa_destroy_seqs(taa_ctx); 875 return -1; 876 } 877 878 /* 879 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis 880 */ 881 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) 882 { 883 884 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 885 timerlat_aa_handler, tool); 886 887 tracefs_event_disable(tool->trace.inst, "osnoise", NULL); 888 889 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 890 timerlat_aa_nmi_handler, tool); 891 892 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 893 timerlat_aa_irq_handler, tool); 894 895 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 896 timerlat_aa_softirq_handler, tool); 897 898 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 899 timerlat_aa_thread_handler, tool); 900 901 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 902 timerlat_aa_stack_handler, tool); 903 if (!dump_tasks) 904 return; 905 906 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); 907 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 908 timerlat_aa_sched_switch_handler, tool); 909 910 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 911 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 912 timerlat_aa_kworker_start_handler, tool); 913 } 914 915 /* 916 * timerlat_aa_register_events - Register events used in the auto-analysis 917 * 918 * Returns 0 on success, -1 otherwise. 919 */ 920 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) 921 { 922 int retval; 923 924 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", 925 timerlat_aa_handler, tool); 926 927 928 /* 929 * register auto-analysis handlers. 930 */ 931 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); 932 if (retval < 0 && !errno) { 933 err_msg("Could not find osnoise events\n"); 934 goto out_err; 935 } 936 937 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", 938 timerlat_aa_nmi_handler, tool); 939 940 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", 941 timerlat_aa_irq_handler, tool); 942 943 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", 944 timerlat_aa_softirq_handler, tool); 945 946 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", 947 timerlat_aa_thread_handler, tool); 948 949 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", 950 timerlat_aa_stack_handler, tool); 951 952 if (!dump_tasks) 953 return 0; 954 955 /* 956 * Dump task events. 957 */ 958 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); 959 if (retval < 0 && !errno) { 960 err_msg("Could not find sched_switch\n"); 961 goto out_err; 962 } 963 964 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", 965 timerlat_aa_sched_switch_handler, tool); 966 967 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); 968 if (retval < 0 && !errno) { 969 err_msg("Could not find workqueue_execute_start\n"); 970 goto out_err; 971 } 972 973 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", 974 timerlat_aa_kworker_start_handler, tool); 975 976 return 0; 977 978 out_err: 979 timerlat_aa_unregister_events(tool, dump_tasks); 980 return -1; 981 } 982 983 /** 984 * timerlat_aa_destroy - Destroy timerlat auto-analysis 985 */ 986 void timerlat_aa_destroy(void) 987 { 988 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); 989 990 if (!taa_ctx) 991 return; 992 993 if (!taa_ctx->taa_data) 994 goto out_ctx; 995 996 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); 997 timerlat_aa_destroy_seqs(taa_ctx); 998 free(taa_ctx->taa_data); 999 out_ctx: 1000 free(taa_ctx); 1001 } 1002 1003 /** 1004 * timerlat_aa_init - Initialize timerlat auto-analysis 1005 * 1006 * Returns 0 on success, -1 otherwise. 1007 */ 1008 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) 1009 { 1010 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 1011 struct timerlat_aa_context *taa_ctx; 1012 int retval; 1013 1014 taa_ctx = calloc(1, sizeof(*taa_ctx)); 1015 if (!taa_ctx) 1016 return -1; 1017 1018 __timerlat_aa_ctx = taa_ctx; 1019 1020 taa_ctx->nr_cpus = nr_cpus; 1021 taa_ctx->tool = tool; 1022 taa_ctx->dump_tasks = dump_tasks; 1023 1024 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); 1025 if (!taa_ctx->taa_data) 1026 goto out_err; 1027 1028 retval = timerlat_aa_init_seqs(taa_ctx); 1029 if (retval) 1030 goto out_err; 1031 1032 retval = timerlat_aa_register_events(tool, dump_tasks); 1033 if (retval) 1034 goto out_err; 1035 1036 return 0; 1037 1038 out_err: 1039 timerlat_aa_destroy(); 1040 return -1; 1041 } 1042