1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 static struct trace_array *osnoise_trace; 42 43 /* 44 * Default values. 45 */ 46 #define BANNER "osnoise: " 47 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 48 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 49 50 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 51 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 52 53 /* 54 * NMI runtime info. 55 */ 56 struct osn_nmi { 57 u64 count; 58 u64 delta_start; 59 }; 60 61 /* 62 * IRQ runtime info. 63 */ 64 struct osn_irq { 65 u64 count; 66 u64 arrival_time; 67 u64 delta_start; 68 }; 69 70 #define IRQ_CONTEXT 0 71 #define THREAD_CONTEXT 1 72 /* 73 * sofirq runtime info. 74 */ 75 struct osn_softirq { 76 u64 count; 77 u64 arrival_time; 78 u64 delta_start; 79 }; 80 81 /* 82 * thread runtime info. 83 */ 84 struct osn_thread { 85 u64 count; 86 u64 arrival_time; 87 u64 delta_start; 88 }; 89 90 /* 91 * Runtime information: this structure saves the runtime information used by 92 * one sampling thread. 93 */ 94 struct osnoise_variables { 95 struct task_struct *kthread; 96 bool sampling; 97 pid_t pid; 98 struct osn_nmi nmi; 99 struct osn_irq irq; 100 struct osn_softirq softirq; 101 struct osn_thread thread; 102 local_t int_counter; 103 }; 104 105 /* 106 * Per-cpu runtime information. 107 */ 108 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 109 110 /* 111 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 112 */ 113 static inline struct osnoise_variables *this_cpu_osn_var(void) 114 { 115 return this_cpu_ptr(&per_cpu_osnoise_var); 116 } 117 118 #ifdef CONFIG_TIMERLAT_TRACER 119 /* 120 * Runtime information for the timer mode. 121 */ 122 struct timerlat_variables { 123 struct task_struct *kthread; 124 struct hrtimer timer; 125 u64 rel_period; 126 u64 abs_period; 127 bool tracing_thread; 128 u64 count; 129 }; 130 131 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 132 133 /* 134 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 135 */ 136 static inline struct timerlat_variables *this_cpu_tmr_var(void) 137 { 138 return this_cpu_ptr(&per_cpu_timerlat_var); 139 } 140 141 /* 142 * tlat_var_reset - Reset the values of the given timerlat_variables 143 */ 144 static inline void tlat_var_reset(void) 145 { 146 struct timerlat_variables *tlat_var; 147 int cpu; 148 /* 149 * So far, all the values are initialized as 0, so 150 * zeroing the structure is perfect. 151 */ 152 for_each_cpu(cpu, cpu_online_mask) { 153 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 154 memset(tlat_var, 0, sizeof(*tlat_var)); 155 } 156 } 157 #else /* CONFIG_TIMERLAT_TRACER */ 158 #define tlat_var_reset() do {} while (0) 159 #endif /* CONFIG_TIMERLAT_TRACER */ 160 161 /* 162 * osn_var_reset - Reset the values of the given osnoise_variables 163 */ 164 static inline void osn_var_reset(void) 165 { 166 struct osnoise_variables *osn_var; 167 int cpu; 168 169 /* 170 * So far, all the values are initialized as 0, so 171 * zeroing the structure is perfect. 172 */ 173 for_each_cpu(cpu, cpu_online_mask) { 174 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 175 memset(osn_var, 0, sizeof(*osn_var)); 176 } 177 } 178 179 /* 180 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 181 */ 182 static inline void osn_var_reset_all(void) 183 { 184 osn_var_reset(); 185 tlat_var_reset(); 186 } 187 188 /* 189 * Tells NMIs to call back to the osnoise tracer to record timestamps. 190 */ 191 bool trace_osnoise_callback_enabled; 192 193 /* 194 * osnoise sample structure definition. Used to store the statistics of a 195 * sample run. 196 */ 197 struct osnoise_sample { 198 u64 runtime; /* runtime */ 199 u64 noise; /* noise */ 200 u64 max_sample; /* max single noise sample */ 201 int hw_count; /* # HW (incl. hypervisor) interference */ 202 int nmi_count; /* # NMIs during this sample */ 203 int irq_count; /* # IRQs during this sample */ 204 int softirq_count; /* # softirqs during this sample */ 205 int thread_count; /* # threads during this sample */ 206 }; 207 208 #ifdef CONFIG_TIMERLAT_TRACER 209 /* 210 * timerlat sample structure definition. Used to store the statistics of 211 * a sample run. 212 */ 213 struct timerlat_sample { 214 u64 timer_latency; /* timer_latency */ 215 unsigned int seqnum; /* unique sequence */ 216 int context; /* timer context */ 217 }; 218 #endif 219 220 /* 221 * Protect the interface. 222 */ 223 struct mutex interface_lock; 224 225 /* 226 * Tracer data. 227 */ 228 static struct osnoise_data { 229 u64 sample_period; /* total sampling period */ 230 u64 sample_runtime; /* active sampling portion of period */ 231 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 232 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 233 #ifdef CONFIG_TIMERLAT_TRACER 234 u64 timerlat_period; /* timerlat period */ 235 u64 print_stack; /* print IRQ stack if total > */ 236 int timerlat_tracer; /* timerlat tracer */ 237 #endif 238 bool tainted; /* infor users and developers about a problem */ 239 } osnoise_data = { 240 .sample_period = DEFAULT_SAMPLE_PERIOD, 241 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 242 .stop_tracing = 0, 243 .stop_tracing_total = 0, 244 #ifdef CONFIG_TIMERLAT_TRACER 245 .print_stack = 0, 246 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 247 .timerlat_tracer = 0, 248 #endif 249 }; 250 251 /* 252 * Boolean variable used to inform that the tracer is currently sampling. 253 */ 254 static bool osnoise_busy; 255 256 #ifdef CONFIG_PREEMPT_RT 257 /* 258 * Print the osnoise header info. 259 */ 260 static void print_osnoise_headers(struct seq_file *s) 261 { 262 if (osnoise_data.tainted) 263 seq_puts(s, "# osnoise is tainted!\n"); 264 265 seq_puts(s, "# _-------=> irqs-off\n"); 266 seq_puts(s, "# / _------=> need-resched\n"); 267 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 268 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 269 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 270 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 271 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 272 273 seq_puts(s, "# |||||| / "); 274 seq_puts(s, " MAX\n"); 275 276 seq_puts(s, "# ||||| / "); 277 seq_puts(s, " SINGLE Interference counters:\n"); 278 279 seq_puts(s, "# ||||||| RUNTIME "); 280 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 281 282 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 283 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 284 285 seq_puts(s, "# | | | ||||||| | | "); 286 seq_puts(s, " | | | | | | | |\n"); 287 } 288 #else /* CONFIG_PREEMPT_RT */ 289 static void print_osnoise_headers(struct seq_file *s) 290 { 291 if (osnoise_data.tainted) 292 seq_puts(s, "# osnoise is tainted!\n"); 293 294 seq_puts(s, "# _-----=> irqs-off\n"); 295 seq_puts(s, "# / _----=> need-resched\n"); 296 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 297 seq_puts(s, "# || / _--=> preempt-depth "); 298 seq_puts(s, " MAX\n"); 299 300 seq_puts(s, "# || / "); 301 seq_puts(s, " SINGLE Interference counters:\n"); 302 303 seq_puts(s, "# |||| RUNTIME "); 304 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 305 306 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP IN US "); 307 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 308 309 seq_puts(s, "# | | | |||| | | "); 310 seq_puts(s, " | | | | | | | |\n"); 311 } 312 #endif /* CONFIG_PREEMPT_RT */ 313 314 /* 315 * osnoise_taint - report an osnoise error. 316 */ 317 #define osnoise_taint(msg) ({ \ 318 struct trace_array *tr = osnoise_trace; \ 319 \ 320 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 321 osnoise_data.tainted = true; \ 322 }) 323 324 /* 325 * Record an osnoise_sample into the tracer buffer. 326 */ 327 static void trace_osnoise_sample(struct osnoise_sample *sample) 328 { 329 struct trace_array *tr = osnoise_trace; 330 struct trace_buffer *buffer = tr->array_buffer.buffer; 331 struct trace_event_call *call = &event_osnoise; 332 struct ring_buffer_event *event; 333 struct osnoise_entry *entry; 334 335 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 336 tracing_gen_ctx()); 337 if (!event) 338 return; 339 entry = ring_buffer_event_data(event); 340 entry->runtime = sample->runtime; 341 entry->noise = sample->noise; 342 entry->max_sample = sample->max_sample; 343 entry->hw_count = sample->hw_count; 344 entry->nmi_count = sample->nmi_count; 345 entry->irq_count = sample->irq_count; 346 entry->softirq_count = sample->softirq_count; 347 entry->thread_count = sample->thread_count; 348 349 if (!call_filter_check_discard(call, entry, buffer, event)) 350 trace_buffer_unlock_commit_nostack(buffer, event); 351 } 352 353 #ifdef CONFIG_TIMERLAT_TRACER 354 /* 355 * Print the timerlat header info. 356 */ 357 #ifdef CONFIG_PREEMPT_RT 358 static void print_timerlat_headers(struct seq_file *s) 359 { 360 seq_puts(s, "# _-------=> irqs-off\n"); 361 seq_puts(s, "# / _------=> need-resched\n"); 362 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 363 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 364 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 365 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 366 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 367 seq_puts(s, "# |||||| /\n"); 368 seq_puts(s, "# ||||||| ACTIVATION\n"); 369 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 370 seq_puts(s, " CONTEXT LATENCY\n"); 371 seq_puts(s, "# | | | ||||||| | | "); 372 seq_puts(s, " | |\n"); 373 } 374 #else /* CONFIG_PREEMPT_RT */ 375 static void print_timerlat_headers(struct seq_file *s) 376 { 377 seq_puts(s, "# _-----=> irqs-off\n"); 378 seq_puts(s, "# / _----=> need-resched\n"); 379 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 380 seq_puts(s, "# || / _--=> preempt-depth\n"); 381 seq_puts(s, "# || /\n"); 382 seq_puts(s, "# |||| ACTIVATION\n"); 383 seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP ID "); 384 seq_puts(s, " CONTEXT LATENCY\n"); 385 seq_puts(s, "# | | | |||| | | "); 386 seq_puts(s, " | |\n"); 387 } 388 #endif /* CONFIG_PREEMPT_RT */ 389 390 /* 391 * Record an timerlat_sample into the tracer buffer. 392 */ 393 static void trace_timerlat_sample(struct timerlat_sample *sample) 394 { 395 struct trace_array *tr = osnoise_trace; 396 struct trace_event_call *call = &event_osnoise; 397 struct trace_buffer *buffer = tr->array_buffer.buffer; 398 struct ring_buffer_event *event; 399 struct timerlat_entry *entry; 400 401 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 402 tracing_gen_ctx()); 403 if (!event) 404 return; 405 entry = ring_buffer_event_data(event); 406 entry->seqnum = sample->seqnum; 407 entry->context = sample->context; 408 entry->timer_latency = sample->timer_latency; 409 410 if (!call_filter_check_discard(call, entry, buffer, event)) 411 trace_buffer_unlock_commit_nostack(buffer, event); 412 } 413 414 #ifdef CONFIG_STACKTRACE 415 416 #define MAX_CALLS 256 417 418 /* 419 * Stack trace will take place only at IRQ level, so, no need 420 * to control nesting here. 421 */ 422 struct trace_stack { 423 int stack_size; 424 int nr_entries; 425 unsigned long calls[MAX_CALLS]; 426 }; 427 428 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 429 430 /* 431 * timerlat_save_stack - save a stack trace without printing 432 * 433 * Save the current stack trace without printing. The 434 * stack will be printed later, after the end of the measurement. 435 */ 436 static void timerlat_save_stack(int skip) 437 { 438 unsigned int size, nr_entries; 439 struct trace_stack *fstack; 440 441 fstack = this_cpu_ptr(&trace_stack); 442 443 size = ARRAY_SIZE(fstack->calls); 444 445 nr_entries = stack_trace_save(fstack->calls, size, skip); 446 447 fstack->stack_size = nr_entries * sizeof(unsigned long); 448 fstack->nr_entries = nr_entries; 449 450 return; 451 452 } 453 /* 454 * timerlat_dump_stack - dump a stack trace previously saved 455 * 456 * Dump a saved stack trace into the trace buffer. 457 */ 458 static void timerlat_dump_stack(void) 459 { 460 struct trace_event_call *call = &event_osnoise; 461 struct trace_array *tr = osnoise_trace; 462 struct trace_buffer *buffer = tr->array_buffer.buffer; 463 struct ring_buffer_event *event; 464 struct trace_stack *fstack; 465 struct stack_entry *entry; 466 unsigned int size; 467 468 preempt_disable_notrace(); 469 fstack = this_cpu_ptr(&trace_stack); 470 size = fstack->stack_size; 471 472 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 473 tracing_gen_ctx()); 474 if (!event) 475 goto out; 476 477 entry = ring_buffer_event_data(event); 478 479 memcpy(&entry->caller, fstack->calls, size); 480 entry->size = fstack->nr_entries; 481 482 if (!call_filter_check_discard(call, entry, buffer, event)) 483 trace_buffer_unlock_commit_nostack(buffer, event); 484 485 out: 486 preempt_enable_notrace(); 487 } 488 #else 489 #define timerlat_dump_stack() do {} while (0) 490 #define timerlat_save_stack(a) do {} while (0) 491 #endif /* CONFIG_STACKTRACE */ 492 #endif /* CONFIG_TIMERLAT_TRACER */ 493 494 /* 495 * Macros to encapsulate the time capturing infrastructure. 496 */ 497 #define time_get() trace_clock_local() 498 #define time_to_us(x) div_u64(x, 1000) 499 #define time_sub(a, b) ((a) - (b)) 500 501 /* 502 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 503 * 504 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 505 * to discount the NMI interference. 506 * 507 * See get_int_safe_duration(). 508 */ 509 static inline void 510 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 511 { 512 if (osn_var->irq.delta_start) 513 osn_var->irq.delta_start += duration; 514 } 515 516 #ifndef CONFIG_PREEMPT_RT 517 /* 518 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 519 * 520 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 521 * forward to discount the interference. 522 * 523 * See get_int_safe_duration(). 524 */ 525 static inline void 526 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 527 { 528 if (osn_var->softirq.delta_start) 529 osn_var->softirq.delta_start += duration; 530 } 531 #else /* CONFIG_PREEMPT_RT */ 532 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 533 #endif 534 535 /* 536 * cond_move_thread_delta_start - Forward the delta_start of a running thread 537 * 538 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 539 * is pushed forward to discount the interference. 540 * 541 * See get_int_safe_duration(). 542 */ 543 static inline void 544 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 545 { 546 if (osn_var->thread.delta_start) 547 osn_var->thread.delta_start += duration; 548 } 549 550 /* 551 * get_int_safe_duration - Get the duration of a window 552 * 553 * The irq, softirq and thread varaibles need to have its duration without 554 * the interference from higher priority interrupts. Instead of keeping a 555 * variable to discount the interrupt interference from these variables, the 556 * starting time of these variables are pushed forward with the interrupt's 557 * duration. In this way, a single variable is used to: 558 * 559 * - Know if a given window is being measured. 560 * - Account its duration. 561 * - Discount the interference. 562 * 563 * To avoid getting inconsistent values, e.g.,: 564 * 565 * now = time_get() 566 * ---> interrupt! 567 * delta_start -= int duration; 568 * <--- 569 * duration = now - delta_start; 570 * 571 * result: negative duration if the variable duration before the 572 * interrupt was smaller than the interrupt execution. 573 * 574 * A counter of interrupts is used. If the counter increased, try 575 * to capture an interference safe duration. 576 */ 577 static inline s64 578 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 579 { 580 u64 int_counter, now; 581 s64 duration; 582 583 do { 584 int_counter = local_read(&osn_var->int_counter); 585 /* synchronize with interrupts */ 586 barrier(); 587 588 now = time_get(); 589 duration = (now - *delta_start); 590 591 /* synchronize with interrupts */ 592 barrier(); 593 } while (int_counter != local_read(&osn_var->int_counter)); 594 595 /* 596 * This is an evidence of race conditions that cause 597 * a value to be "discounted" too much. 598 */ 599 if (duration < 0) 600 osnoise_taint("Negative duration!\n"); 601 602 *delta_start = 0; 603 604 return duration; 605 } 606 607 /* 608 * 609 * set_int_safe_time - Save the current time on *time, aware of interference 610 * 611 * Get the time, taking into consideration a possible interference from 612 * higher priority interrupts. 613 * 614 * See get_int_safe_duration() for an explanation. 615 */ 616 static u64 617 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 618 { 619 u64 int_counter; 620 621 do { 622 int_counter = local_read(&osn_var->int_counter); 623 /* synchronize with interrupts */ 624 barrier(); 625 626 *time = time_get(); 627 628 /* synchronize with interrupts */ 629 barrier(); 630 } while (int_counter != local_read(&osn_var->int_counter)); 631 632 return int_counter; 633 } 634 635 #ifdef CONFIG_TIMERLAT_TRACER 636 /* 637 * copy_int_safe_time - Copy *src into *desc aware of interference 638 */ 639 static u64 640 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 641 { 642 u64 int_counter; 643 644 do { 645 int_counter = local_read(&osn_var->int_counter); 646 /* synchronize with interrupts */ 647 barrier(); 648 649 *dst = *src; 650 651 /* synchronize with interrupts */ 652 barrier(); 653 } while (int_counter != local_read(&osn_var->int_counter)); 654 655 return int_counter; 656 } 657 #endif /* CONFIG_TIMERLAT_TRACER */ 658 659 /* 660 * trace_osnoise_callback - NMI entry/exit callback 661 * 662 * This function is called at the entry and exit NMI code. The bool enter 663 * distinguishes between either case. This function is used to note a NMI 664 * occurrence, compute the noise caused by the NMI, and to remove the noise 665 * it is potentially causing on other interference variables. 666 */ 667 void trace_osnoise_callback(bool enter) 668 { 669 struct osnoise_variables *osn_var = this_cpu_osn_var(); 670 u64 duration; 671 672 if (!osn_var->sampling) 673 return; 674 675 /* 676 * Currently trace_clock_local() calls sched_clock() and the 677 * generic version is not NMI safe. 678 */ 679 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 680 if (enter) { 681 osn_var->nmi.delta_start = time_get(); 682 local_inc(&osn_var->int_counter); 683 } else { 684 duration = time_get() - osn_var->nmi.delta_start; 685 686 trace_nmi_noise(osn_var->nmi.delta_start, duration); 687 688 cond_move_irq_delta_start(osn_var, duration); 689 cond_move_softirq_delta_start(osn_var, duration); 690 cond_move_thread_delta_start(osn_var, duration); 691 } 692 } 693 694 if (enter) 695 osn_var->nmi.count++; 696 } 697 698 /* 699 * osnoise_trace_irq_entry - Note the starting of an IRQ 700 * 701 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 702 * it is safe to use a single variable (ons_var->irq) to save the statistics. 703 * The arrival_time is used to report... the arrival time. The delta_start 704 * is used to compute the duration at the IRQ exit handler. See 705 * cond_move_irq_delta_start(). 706 */ 707 void osnoise_trace_irq_entry(int id) 708 { 709 struct osnoise_variables *osn_var = this_cpu_osn_var(); 710 711 if (!osn_var->sampling) 712 return; 713 /* 714 * This value will be used in the report, but not to compute 715 * the execution time, so it is safe to get it unsafe. 716 */ 717 osn_var->irq.arrival_time = time_get(); 718 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 719 osn_var->irq.count++; 720 721 local_inc(&osn_var->int_counter); 722 } 723 724 /* 725 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 726 * 727 * Computes the duration of the IRQ noise, and trace it. Also discounts the 728 * interference from other sources of noise could be currently being accounted. 729 */ 730 void osnoise_trace_irq_exit(int id, const char *desc) 731 { 732 struct osnoise_variables *osn_var = this_cpu_osn_var(); 733 int duration; 734 735 if (!osn_var->sampling) 736 return; 737 738 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 739 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 740 osn_var->irq.arrival_time = 0; 741 cond_move_softirq_delta_start(osn_var, duration); 742 cond_move_thread_delta_start(osn_var, duration); 743 } 744 745 /* 746 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 747 * 748 * Used to note the starting of an IRQ occurece. 749 */ 750 static void trace_irqentry_callback(void *data, int irq, 751 struct irqaction *action) 752 { 753 osnoise_trace_irq_entry(irq); 754 } 755 756 /* 757 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 758 * 759 * Used to note the end of an IRQ occurece. 760 */ 761 static void trace_irqexit_callback(void *data, int irq, 762 struct irqaction *action, int ret) 763 { 764 osnoise_trace_irq_exit(irq, action->name); 765 } 766 767 /* 768 * arch specific register function. 769 */ 770 int __weak osnoise_arch_register(void) 771 { 772 return 0; 773 } 774 775 /* 776 * arch specific unregister function. 777 */ 778 void __weak osnoise_arch_unregister(void) 779 { 780 return; 781 } 782 783 /* 784 * hook_irq_events - Hook IRQ handling events 785 * 786 * This function hooks the IRQ related callbacks to the respective trace 787 * events. 788 */ 789 static int hook_irq_events(void) 790 { 791 int ret; 792 793 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 794 if (ret) 795 goto out_err; 796 797 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 798 if (ret) 799 goto out_unregister_entry; 800 801 ret = osnoise_arch_register(); 802 if (ret) 803 goto out_irq_exit; 804 805 return 0; 806 807 out_irq_exit: 808 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 809 out_unregister_entry: 810 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 811 out_err: 812 return -EINVAL; 813 } 814 815 /* 816 * unhook_irq_events - Unhook IRQ handling events 817 * 818 * This function unhooks the IRQ related callbacks to the respective trace 819 * events. 820 */ 821 static void unhook_irq_events(void) 822 { 823 osnoise_arch_unregister(); 824 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 825 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 826 } 827 828 #ifndef CONFIG_PREEMPT_RT 829 /* 830 * trace_softirq_entry_callback - Note the starting of a softirq 831 * 832 * Save the starting time of a softirq. As softirqs are non-preemptive to 833 * other softirqs, it is safe to use a single variable (ons_var->softirq) 834 * to save the statistics. The arrival_time is used to report... the 835 * arrival time. The delta_start is used to compute the duration at the 836 * softirq exit handler. See cond_move_softirq_delta_start(). 837 */ 838 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 839 { 840 struct osnoise_variables *osn_var = this_cpu_osn_var(); 841 842 if (!osn_var->sampling) 843 return; 844 /* 845 * This value will be used in the report, but not to compute 846 * the execution time, so it is safe to get it unsafe. 847 */ 848 osn_var->softirq.arrival_time = time_get(); 849 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 850 osn_var->softirq.count++; 851 852 local_inc(&osn_var->int_counter); 853 } 854 855 /* 856 * trace_softirq_exit_callback - Note the end of an softirq 857 * 858 * Computes the duration of the softirq noise, and trace it. Also discounts the 859 * interference from other sources of noise could be currently being accounted. 860 */ 861 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 862 { 863 struct osnoise_variables *osn_var = this_cpu_osn_var(); 864 int duration; 865 866 if (!osn_var->sampling) 867 return; 868 869 #ifdef CONFIG_TIMERLAT_TRACER 870 /* 871 * If the timerlat is enabled, but the irq handler did 872 * not run yet enabling timerlat_tracer, do not trace. 873 */ 874 if (unlikely(osnoise_data.timerlat_tracer)) { 875 struct timerlat_variables *tlat_var; 876 tlat_var = this_cpu_tmr_var(); 877 if (!tlat_var->tracing_thread) { 878 osn_var->softirq.arrival_time = 0; 879 osn_var->softirq.delta_start = 0; 880 return; 881 } 882 } 883 #endif 884 885 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 886 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 887 cond_move_thread_delta_start(osn_var, duration); 888 osn_var->softirq.arrival_time = 0; 889 } 890 891 /* 892 * hook_softirq_events - Hook softirq handling events 893 * 894 * This function hooks the softirq related callbacks to the respective trace 895 * events. 896 */ 897 static int hook_softirq_events(void) 898 { 899 int ret; 900 901 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 902 if (ret) 903 goto out_err; 904 905 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 906 if (ret) 907 goto out_unreg_entry; 908 909 return 0; 910 911 out_unreg_entry: 912 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 913 out_err: 914 return -EINVAL; 915 } 916 917 /* 918 * unhook_softirq_events - Unhook softirq handling events 919 * 920 * This function hooks the softirq related callbacks to the respective trace 921 * events. 922 */ 923 static void unhook_softirq_events(void) 924 { 925 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 926 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 927 } 928 #else /* CONFIG_PREEMPT_RT */ 929 /* 930 * softirq are threads on the PREEMPT_RT mode. 931 */ 932 static int hook_softirq_events(void) 933 { 934 return 0; 935 } 936 static void unhook_softirq_events(void) 937 { 938 } 939 #endif 940 941 /* 942 * thread_entry - Record the starting of a thread noise window 943 * 944 * It saves the context switch time for a noisy thread, and increments 945 * the interference counters. 946 */ 947 static void 948 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 949 { 950 if (!osn_var->sampling) 951 return; 952 /* 953 * The arrival time will be used in the report, but not to compute 954 * the execution time, so it is safe to get it unsafe. 955 */ 956 osn_var->thread.arrival_time = time_get(); 957 958 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 959 960 osn_var->thread.count++; 961 local_inc(&osn_var->int_counter); 962 } 963 964 /* 965 * thread_exit - Report the end of a thread noise window 966 * 967 * It computes the total noise from a thread, tracing if needed. 968 */ 969 static void 970 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 971 { 972 int duration; 973 974 if (!osn_var->sampling) 975 return; 976 977 #ifdef CONFIG_TIMERLAT_TRACER 978 if (osnoise_data.timerlat_tracer) { 979 struct timerlat_variables *tlat_var; 980 tlat_var = this_cpu_tmr_var(); 981 if (!tlat_var->tracing_thread) { 982 osn_var->thread.delta_start = 0; 983 osn_var->thread.arrival_time = 0; 984 return; 985 } 986 } 987 #endif 988 989 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 990 991 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 992 993 osn_var->thread.arrival_time = 0; 994 } 995 996 /* 997 * trace_sched_switch - sched:sched_switch trace event handler 998 * 999 * This function is hooked to the sched:sched_switch trace event, and it is 1000 * used to record the beginning and to report the end of a thread noise window. 1001 */ 1002 static void 1003 trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, 1004 struct task_struct *n) 1005 { 1006 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1007 1008 if (p->pid != osn_var->pid) 1009 thread_exit(osn_var, p); 1010 1011 if (n->pid != osn_var->pid) 1012 thread_entry(osn_var, n); 1013 } 1014 1015 /* 1016 * hook_thread_events - Hook the insturmentation for thread noise 1017 * 1018 * Hook the osnoise tracer callbacks to handle the noise from other 1019 * threads on the necessary kernel events. 1020 */ 1021 static int hook_thread_events(void) 1022 { 1023 int ret; 1024 1025 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1026 if (ret) 1027 return -EINVAL; 1028 1029 return 0; 1030 } 1031 1032 /* 1033 * unhook_thread_events - *nhook the insturmentation for thread noise 1034 * 1035 * Unook the osnoise tracer callbacks to handle the noise from other 1036 * threads on the necessary kernel events. 1037 */ 1038 static void unhook_thread_events(void) 1039 { 1040 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1041 } 1042 1043 /* 1044 * save_osn_sample_stats - Save the osnoise_sample statistics 1045 * 1046 * Save the osnoise_sample statistics before the sampling phase. These 1047 * values will be used later to compute the diff betwneen the statistics 1048 * before and after the osnoise sampling. 1049 */ 1050 static void 1051 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1052 { 1053 s->nmi_count = osn_var->nmi.count; 1054 s->irq_count = osn_var->irq.count; 1055 s->softirq_count = osn_var->softirq.count; 1056 s->thread_count = osn_var->thread.count; 1057 } 1058 1059 /* 1060 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1061 * 1062 * After a sample period, compute the difference on the osnoise_sample 1063 * statistics. The struct osnoise_sample *s contains the statistics saved via 1064 * save_osn_sample_stats() before the osnoise sampling. 1065 */ 1066 static void 1067 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1068 { 1069 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1070 s->irq_count = osn_var->irq.count - s->irq_count; 1071 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1072 s->thread_count = osn_var->thread.count - s->thread_count; 1073 } 1074 1075 /* 1076 * osnoise_stop_tracing - Stop tracing and the tracer. 1077 */ 1078 static __always_inline void osnoise_stop_tracing(void) 1079 { 1080 struct trace_array *tr = osnoise_trace; 1081 1082 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1083 "stop tracing hit on cpu %d\n", smp_processor_id()); 1084 1085 tracer_tracing_off(tr); 1086 } 1087 1088 /* 1089 * run_osnoise - Sample the time and look for osnoise 1090 * 1091 * Used to capture the time, looking for potential osnoise latency repeatedly. 1092 * Different from hwlat_detector, it is called with preemption and interrupts 1093 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1094 * osnoise sampling thread, as they would do with a regular thread. 1095 */ 1096 static int run_osnoise(void) 1097 { 1098 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1099 struct trace_array *tr = osnoise_trace; 1100 u64 start, sample, last_sample; 1101 u64 last_int_count, int_count; 1102 s64 noise = 0, max_noise = 0; 1103 s64 total, last_total = 0; 1104 struct osnoise_sample s; 1105 unsigned int threshold; 1106 u64 runtime, stop_in; 1107 u64 sum_noise = 0; 1108 int hw_count = 0; 1109 int ret = -1; 1110 1111 /* 1112 * Considers the current thread as the workload. 1113 */ 1114 osn_var->pid = current->pid; 1115 1116 /* 1117 * Save the current stats for the diff 1118 */ 1119 save_osn_sample_stats(osn_var, &s); 1120 1121 /* 1122 * if threshold is 0, use the default value of 5 us. 1123 */ 1124 threshold = tracing_thresh ? : 5000; 1125 1126 /* 1127 * Make sure NMIs see sampling first 1128 */ 1129 osn_var->sampling = true; 1130 barrier(); 1131 1132 /* 1133 * Transform the *_us config to nanoseconds to avoid the 1134 * division on the main loop. 1135 */ 1136 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1137 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1138 1139 /* 1140 * Start timestemp 1141 */ 1142 start = time_get(); 1143 1144 /* 1145 * "previous" loop. 1146 */ 1147 last_int_count = set_int_safe_time(osn_var, &last_sample); 1148 1149 do { 1150 /* 1151 * Get sample! 1152 */ 1153 int_count = set_int_safe_time(osn_var, &sample); 1154 1155 noise = time_sub(sample, last_sample); 1156 1157 /* 1158 * This shouldn't happen. 1159 */ 1160 if (noise < 0) { 1161 osnoise_taint("negative noise!"); 1162 goto out; 1163 } 1164 1165 /* 1166 * Sample runtime. 1167 */ 1168 total = time_sub(sample, start); 1169 1170 /* 1171 * Check for possible overflows. 1172 */ 1173 if (total < last_total) { 1174 osnoise_taint("total overflow!"); 1175 break; 1176 } 1177 1178 last_total = total; 1179 1180 if (noise >= threshold) { 1181 int interference = int_count - last_int_count; 1182 1183 if (noise > max_noise) 1184 max_noise = noise; 1185 1186 if (!interference) 1187 hw_count++; 1188 1189 sum_noise += noise; 1190 1191 trace_sample_threshold(last_sample, noise, interference); 1192 1193 if (osnoise_data.stop_tracing) 1194 if (noise > stop_in) 1195 osnoise_stop_tracing(); 1196 } 1197 1198 /* 1199 * For the non-preemptive kernel config: let threads runs, if 1200 * they so wish. 1201 */ 1202 cond_resched(); 1203 1204 last_sample = sample; 1205 last_int_count = int_count; 1206 1207 } while (total < runtime && !kthread_should_stop()); 1208 1209 /* 1210 * Finish the above in the view for interrupts. 1211 */ 1212 barrier(); 1213 1214 osn_var->sampling = false; 1215 1216 /* 1217 * Make sure sampling data is no longer updated. 1218 */ 1219 barrier(); 1220 1221 /* 1222 * Save noise info. 1223 */ 1224 s.noise = time_to_us(sum_noise); 1225 s.runtime = time_to_us(total); 1226 s.max_sample = time_to_us(max_noise); 1227 s.hw_count = hw_count; 1228 1229 /* Save interference stats info */ 1230 diff_osn_sample_stats(osn_var, &s); 1231 1232 trace_osnoise_sample(&s); 1233 1234 /* Keep a running maximum ever recorded osnoise "latency" */ 1235 if (max_noise > tr->max_latency) { 1236 tr->max_latency = max_noise; 1237 latency_fsnotify(tr); 1238 } 1239 1240 if (osnoise_data.stop_tracing_total) 1241 if (s.noise > osnoise_data.stop_tracing_total) 1242 osnoise_stop_tracing(); 1243 1244 return 0; 1245 out: 1246 return ret; 1247 } 1248 1249 static struct cpumask osnoise_cpumask; 1250 static struct cpumask save_cpumask; 1251 1252 /* 1253 * osnoise_main - The osnoise detection kernel thread 1254 * 1255 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1256 * every period. 1257 */ 1258 static int osnoise_main(void *data) 1259 { 1260 u64 interval; 1261 1262 while (!kthread_should_stop()) { 1263 1264 run_osnoise(); 1265 1266 mutex_lock(&interface_lock); 1267 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1268 mutex_unlock(&interface_lock); 1269 1270 do_div(interval, USEC_PER_MSEC); 1271 1272 /* 1273 * differently from hwlat_detector, the osnoise tracer can run 1274 * without a pause because preemption is on. 1275 */ 1276 if (interval < 1) { 1277 /* Let synchronize_rcu_tasks() make progress */ 1278 cond_resched_tasks_rcu_qs(); 1279 continue; 1280 } 1281 1282 if (msleep_interruptible(interval)) 1283 break; 1284 } 1285 1286 return 0; 1287 } 1288 1289 #ifdef CONFIG_TIMERLAT_TRACER 1290 /* 1291 * timerlat_irq - hrtimer handler for timerlat. 1292 */ 1293 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1294 { 1295 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1296 struct trace_array *tr = osnoise_trace; 1297 struct timerlat_variables *tlat; 1298 struct timerlat_sample s; 1299 u64 now; 1300 u64 diff; 1301 1302 /* 1303 * I am not sure if the timer was armed for this CPU. So, get 1304 * the timerlat struct from the timer itself, not from this 1305 * CPU. 1306 */ 1307 tlat = container_of(timer, struct timerlat_variables, timer); 1308 1309 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1310 1311 /* 1312 * Enable the osnoise: events for thread an softirq. 1313 */ 1314 tlat->tracing_thread = true; 1315 1316 osn_var->thread.arrival_time = time_get(); 1317 1318 /* 1319 * A hardirq is running: the timer IRQ. It is for sure preempting 1320 * a thread, and potentially preempting a softirq. 1321 * 1322 * At this point, it is not interesting to know the duration of the 1323 * preempted thread (and maybe softirq), but how much time they will 1324 * delay the beginning of the execution of the timer thread. 1325 * 1326 * To get the correct (net) delay added by the softirq, its delta_start 1327 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1328 * start of the sofitrq will be zeroed, accounting then only the time 1329 * after that. 1330 * 1331 * The thread follows the same principle. However, if a softirq is 1332 * running, the thread needs to receive the softirq delta_start. The 1333 * reason being is that the softirq will be the last to be unfolded, 1334 * resseting the thread delay to zero. 1335 */ 1336 #ifndef CONFIG_PREEMPT_RT 1337 if (osn_var->softirq.delta_start) { 1338 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1339 &osn_var->softirq.delta_start); 1340 1341 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1342 &osn_var->irq.delta_start); 1343 } else { 1344 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1345 &osn_var->irq.delta_start); 1346 } 1347 #else /* CONFIG_PREEMPT_RT */ 1348 /* 1349 * The sofirqs run as threads on RT, so there is not need 1350 * to keep track of it. 1351 */ 1352 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); 1353 #endif /* CONFIG_PREEMPT_RT */ 1354 1355 /* 1356 * Compute the current time with the expected time. 1357 */ 1358 diff = now - tlat->abs_period; 1359 1360 tlat->count++; 1361 s.seqnum = tlat->count; 1362 s.timer_latency = diff; 1363 s.context = IRQ_CONTEXT; 1364 1365 trace_timerlat_sample(&s); 1366 1367 /* Keep a running maximum ever recorded os noise "latency" */ 1368 if (diff > tr->max_latency) { 1369 tr->max_latency = diff; 1370 latency_fsnotify(tr); 1371 } 1372 1373 if (osnoise_data.stop_tracing) 1374 if (time_to_us(diff) >= osnoise_data.stop_tracing) 1375 osnoise_stop_tracing(); 1376 1377 wake_up_process(tlat->kthread); 1378 1379 if (osnoise_data.print_stack) 1380 timerlat_save_stack(0); 1381 1382 return HRTIMER_NORESTART; 1383 } 1384 1385 /* 1386 * wait_next_period - Wait for the next period for timerlat 1387 */ 1388 static int wait_next_period(struct timerlat_variables *tlat) 1389 { 1390 ktime_t next_abs_period, now; 1391 u64 rel_period = osnoise_data.timerlat_period * 1000; 1392 1393 now = hrtimer_cb_get_time(&tlat->timer); 1394 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1395 1396 /* 1397 * Save the next abs_period. 1398 */ 1399 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1400 1401 /* 1402 * If the new abs_period is in the past, skip the activation. 1403 */ 1404 while (ktime_compare(now, next_abs_period) > 0) { 1405 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1406 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1407 } 1408 1409 set_current_state(TASK_INTERRUPTIBLE); 1410 1411 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1412 schedule(); 1413 return 1; 1414 } 1415 1416 /* 1417 * timerlat_main- Timerlat main 1418 */ 1419 static int timerlat_main(void *data) 1420 { 1421 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1422 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1423 struct timerlat_sample s; 1424 struct sched_param sp; 1425 u64 now, diff; 1426 1427 /* 1428 * Make the thread RT, that is how cyclictest is usually used. 1429 */ 1430 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1431 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1432 1433 tlat->count = 0; 1434 tlat->tracing_thread = false; 1435 1436 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1437 tlat->timer.function = timerlat_irq; 1438 tlat->kthread = current; 1439 osn_var->pid = current->pid; 1440 /* 1441 * Anotate the arrival time. 1442 */ 1443 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1444 1445 wait_next_period(tlat); 1446 1447 osn_var->sampling = 1; 1448 1449 while (!kthread_should_stop()) { 1450 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1451 diff = now - tlat->abs_period; 1452 1453 s.seqnum = tlat->count; 1454 s.timer_latency = diff; 1455 s.context = THREAD_CONTEXT; 1456 1457 trace_timerlat_sample(&s); 1458 1459 #ifdef CONFIG_STACKTRACE 1460 if (osnoise_data.print_stack) 1461 if (osnoise_data.print_stack <= time_to_us(diff)) 1462 timerlat_dump_stack(); 1463 #endif /* CONFIG_STACKTRACE */ 1464 1465 tlat->tracing_thread = false; 1466 if (osnoise_data.stop_tracing_total) 1467 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1468 osnoise_stop_tracing(); 1469 1470 wait_next_period(tlat); 1471 } 1472 1473 hrtimer_cancel(&tlat->timer); 1474 return 0; 1475 } 1476 #endif /* CONFIG_TIMERLAT_TRACER */ 1477 1478 /* 1479 * stop_kthread - stop a workload thread 1480 */ 1481 static void stop_kthread(unsigned int cpu) 1482 { 1483 struct task_struct *kthread; 1484 1485 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1486 if (kthread) 1487 kthread_stop(kthread); 1488 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1489 } 1490 1491 /* 1492 * stop_per_cpu_kthread - Stop per-cpu threads 1493 * 1494 * Stop the osnoise sampling htread. Use this on unload and at system 1495 * shutdown. 1496 */ 1497 static void stop_per_cpu_kthreads(void) 1498 { 1499 int cpu; 1500 1501 cpus_read_lock(); 1502 1503 for_each_online_cpu(cpu) 1504 stop_kthread(cpu); 1505 1506 cpus_read_unlock(); 1507 } 1508 1509 /* 1510 * start_kthread - Start a workload tread 1511 */ 1512 static int start_kthread(unsigned int cpu) 1513 { 1514 struct task_struct *kthread; 1515 void *main = osnoise_main; 1516 char comm[24]; 1517 1518 #ifdef CONFIG_TIMERLAT_TRACER 1519 if (osnoise_data.timerlat_tracer) { 1520 snprintf(comm, 24, "timerlat/%d", cpu); 1521 main = timerlat_main; 1522 } else { 1523 snprintf(comm, 24, "osnoise/%d", cpu); 1524 } 1525 #else 1526 snprintf(comm, 24, "osnoise/%d", cpu); 1527 #endif 1528 kthread = kthread_create_on_cpu(main, NULL, cpu, comm); 1529 1530 if (IS_ERR(kthread)) { 1531 pr_err(BANNER "could not start sampling thread\n"); 1532 stop_per_cpu_kthreads(); 1533 return -ENOMEM; 1534 } 1535 1536 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 1537 wake_up_process(kthread); 1538 1539 return 0; 1540 } 1541 1542 /* 1543 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 1544 * 1545 * This starts the kernel thread that will look for osnoise on many 1546 * cpus. 1547 */ 1548 static int start_per_cpu_kthreads(struct trace_array *tr) 1549 { 1550 struct cpumask *current_mask = &save_cpumask; 1551 int retval = 0; 1552 int cpu; 1553 1554 cpus_read_lock(); 1555 /* 1556 * Run only on CPUs in which trace and osnoise are allowed to run. 1557 */ 1558 cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); 1559 /* 1560 * And the CPU is online. 1561 */ 1562 cpumask_and(current_mask, cpu_online_mask, current_mask); 1563 1564 for_each_possible_cpu(cpu) 1565 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1566 1567 for_each_cpu(cpu, current_mask) { 1568 retval = start_kthread(cpu); 1569 if (retval) { 1570 stop_per_cpu_kthreads(); 1571 break; 1572 } 1573 } 1574 1575 cpus_read_unlock(); 1576 1577 return retval; 1578 } 1579 1580 #ifdef CONFIG_HOTPLUG_CPU 1581 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1582 { 1583 struct trace_array *tr = osnoise_trace; 1584 unsigned int cpu = smp_processor_id(); 1585 1586 1587 mutex_lock(&trace_types_lock); 1588 1589 if (!osnoise_busy) 1590 goto out_unlock_trace; 1591 1592 mutex_lock(&interface_lock); 1593 cpus_read_lock(); 1594 1595 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1596 goto out_unlock; 1597 1598 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 1599 goto out_unlock; 1600 1601 start_kthread(cpu); 1602 1603 out_unlock: 1604 cpus_read_unlock(); 1605 mutex_unlock(&interface_lock); 1606 out_unlock_trace: 1607 mutex_unlock(&trace_types_lock); 1608 } 1609 1610 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 1611 1612 /* 1613 * osnoise_cpu_init - CPU hotplug online callback function 1614 */ 1615 static int osnoise_cpu_init(unsigned int cpu) 1616 { 1617 schedule_work_on(cpu, &osnoise_hotplug_work); 1618 return 0; 1619 } 1620 1621 /* 1622 * osnoise_cpu_die - CPU hotplug offline callback function 1623 */ 1624 static int osnoise_cpu_die(unsigned int cpu) 1625 { 1626 stop_kthread(cpu); 1627 return 0; 1628 } 1629 1630 static void osnoise_init_hotplug_support(void) 1631 { 1632 int ret; 1633 1634 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 1635 osnoise_cpu_init, osnoise_cpu_die); 1636 if (ret < 0) 1637 pr_warn(BANNER "Error to init cpu hotplug support\n"); 1638 1639 return; 1640 } 1641 #else /* CONFIG_HOTPLUG_CPU */ 1642 static void osnoise_init_hotplug_support(void) 1643 { 1644 return; 1645 } 1646 #endif /* CONFIG_HOTPLUG_CPU */ 1647 1648 /* 1649 * osnoise_cpus_read - Read function for reading the "cpus" file 1650 * @filp: The active open file structure 1651 * @ubuf: The userspace provided buffer to read value into 1652 * @cnt: The maximum number of bytes to read 1653 * @ppos: The current "file" position 1654 * 1655 * Prints the "cpus" output into the user-provided buffer. 1656 */ 1657 static ssize_t 1658 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 1659 loff_t *ppos) 1660 { 1661 char *mask_str; 1662 int len; 1663 1664 mutex_lock(&interface_lock); 1665 1666 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 1667 mask_str = kmalloc(len, GFP_KERNEL); 1668 if (!mask_str) { 1669 count = -ENOMEM; 1670 goto out_unlock; 1671 } 1672 1673 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 1674 if (len >= count) { 1675 count = -EINVAL; 1676 goto out_free; 1677 } 1678 1679 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 1680 1681 out_free: 1682 kfree(mask_str); 1683 out_unlock: 1684 mutex_unlock(&interface_lock); 1685 1686 return count; 1687 } 1688 1689 static void osnoise_tracer_start(struct trace_array *tr); 1690 static void osnoise_tracer_stop(struct trace_array *tr); 1691 1692 /* 1693 * osnoise_cpus_write - Write function for "cpus" entry 1694 * @filp: The active open file structure 1695 * @ubuf: The user buffer that contains the value to write 1696 * @cnt: The maximum number of bytes to write to "file" 1697 * @ppos: The current position in @file 1698 * 1699 * This function provides a write implementation for the "cpus" 1700 * interface to the osnoise trace. By default, it lists all CPUs, 1701 * in this way, allowing osnoise threads to run on any online CPU 1702 * of the system. It serves to restrict the execution of osnoise to the 1703 * set of CPUs writing via this interface. Note that osnoise also 1704 * respects the "tracing_cpumask." Hence, osnoise threads will run only 1705 * on the set of CPUs allowed here AND on "tracing_cpumask." Why not 1706 * have just "tracing_cpumask?" Because the user might be interested 1707 * in tracing what is running on other CPUs. For instance, one might 1708 * run osnoise in one HT CPU while observing what is running on the 1709 * sibling HT CPU. 1710 */ 1711 static ssize_t 1712 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1713 loff_t *ppos) 1714 { 1715 struct trace_array *tr = osnoise_trace; 1716 cpumask_var_t osnoise_cpumask_new; 1717 int running, err; 1718 char buf[256]; 1719 1720 if (count >= 256) 1721 return -EINVAL; 1722 1723 if (copy_from_user(buf, ubuf, count)) 1724 return -EFAULT; 1725 1726 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 1727 return -ENOMEM; 1728 1729 err = cpulist_parse(buf, osnoise_cpumask_new); 1730 if (err) 1731 goto err_free; 1732 1733 /* 1734 * trace_types_lock is taken to avoid concurrency on start/stop 1735 * and osnoise_busy. 1736 */ 1737 mutex_lock(&trace_types_lock); 1738 running = osnoise_busy; 1739 if (running) 1740 osnoise_tracer_stop(tr); 1741 1742 mutex_lock(&interface_lock); 1743 /* 1744 * osnoise_cpumask is read by CPU hotplug operations. 1745 */ 1746 cpus_read_lock(); 1747 1748 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 1749 1750 cpus_read_unlock(); 1751 mutex_unlock(&interface_lock); 1752 1753 if (running) 1754 osnoise_tracer_start(tr); 1755 mutex_unlock(&trace_types_lock); 1756 1757 free_cpumask_var(osnoise_cpumask_new); 1758 return count; 1759 1760 err_free: 1761 free_cpumask_var(osnoise_cpumask_new); 1762 1763 return err; 1764 } 1765 1766 /* 1767 * osnoise/runtime_us: cannot be greater than the period. 1768 */ 1769 static struct trace_min_max_param osnoise_runtime = { 1770 .lock = &interface_lock, 1771 .val = &osnoise_data.sample_runtime, 1772 .max = &osnoise_data.sample_period, 1773 .min = NULL, 1774 }; 1775 1776 /* 1777 * osnoise/period_us: cannot be smaller than the runtime. 1778 */ 1779 static struct trace_min_max_param osnoise_period = { 1780 .lock = &interface_lock, 1781 .val = &osnoise_data.sample_period, 1782 .max = NULL, 1783 .min = &osnoise_data.sample_runtime, 1784 }; 1785 1786 /* 1787 * osnoise/stop_tracing_us: no limit. 1788 */ 1789 static struct trace_min_max_param osnoise_stop_tracing_in = { 1790 .lock = &interface_lock, 1791 .val = &osnoise_data.stop_tracing, 1792 .max = NULL, 1793 .min = NULL, 1794 }; 1795 1796 /* 1797 * osnoise/stop_tracing_total_us: no limit. 1798 */ 1799 static struct trace_min_max_param osnoise_stop_tracing_total = { 1800 .lock = &interface_lock, 1801 .val = &osnoise_data.stop_tracing_total, 1802 .max = NULL, 1803 .min = NULL, 1804 }; 1805 1806 #ifdef CONFIG_TIMERLAT_TRACER 1807 /* 1808 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 1809 * latency is higher than val. 1810 */ 1811 static struct trace_min_max_param osnoise_print_stack = { 1812 .lock = &interface_lock, 1813 .val = &osnoise_data.print_stack, 1814 .max = NULL, 1815 .min = NULL, 1816 }; 1817 1818 /* 1819 * osnoise/timerlat_period: min 100 us, max 1 s 1820 */ 1821 u64 timerlat_min_period = 100; 1822 u64 timerlat_max_period = 1000000; 1823 static struct trace_min_max_param timerlat_period = { 1824 .lock = &interface_lock, 1825 .val = &osnoise_data.timerlat_period, 1826 .max = &timerlat_max_period, 1827 .min = &timerlat_min_period, 1828 }; 1829 #endif 1830 1831 static const struct file_operations cpus_fops = { 1832 .open = tracing_open_generic, 1833 .read = osnoise_cpus_read, 1834 .write = osnoise_cpus_write, 1835 .llseek = generic_file_llseek, 1836 }; 1837 1838 /* 1839 * init_tracefs - A function to initialize the tracefs interface files 1840 * 1841 * This function creates entries in tracefs for "osnoise" and "timerlat". 1842 * It creates these directories in the tracing directory, and within that 1843 * directory the use can change and view the configs. 1844 */ 1845 static int init_tracefs(void) 1846 { 1847 struct dentry *top_dir; 1848 struct dentry *tmp; 1849 int ret; 1850 1851 ret = tracing_init_dentry(); 1852 if (ret) 1853 return -ENOMEM; 1854 1855 top_dir = tracefs_create_dir("osnoise", NULL); 1856 if (!top_dir) 1857 return 0; 1858 1859 tmp = tracefs_create_file("period_us", 0640, top_dir, 1860 &osnoise_period, &trace_min_max_fops); 1861 if (!tmp) 1862 goto err; 1863 1864 tmp = tracefs_create_file("runtime_us", 0644, top_dir, 1865 &osnoise_runtime, &trace_min_max_fops); 1866 if (!tmp) 1867 goto err; 1868 1869 tmp = tracefs_create_file("stop_tracing_us", 0640, top_dir, 1870 &osnoise_stop_tracing_in, &trace_min_max_fops); 1871 if (!tmp) 1872 goto err; 1873 1874 tmp = tracefs_create_file("stop_tracing_total_us", 0640, top_dir, 1875 &osnoise_stop_tracing_total, &trace_min_max_fops); 1876 if (!tmp) 1877 goto err; 1878 1879 tmp = trace_create_file("cpus", 0644, top_dir, NULL, &cpus_fops); 1880 if (!tmp) 1881 goto err; 1882 #ifdef CONFIG_TIMERLAT_TRACER 1883 #ifdef CONFIG_STACKTRACE 1884 tmp = tracefs_create_file("print_stack", 0640, top_dir, 1885 &osnoise_print_stack, &trace_min_max_fops); 1886 if (!tmp) 1887 goto err; 1888 #endif 1889 1890 tmp = tracefs_create_file("timerlat_period_us", 0640, top_dir, 1891 &timerlat_period, &trace_min_max_fops); 1892 if (!tmp) 1893 goto err; 1894 #endif 1895 1896 return 0; 1897 1898 err: 1899 tracefs_remove(top_dir); 1900 return -ENOMEM; 1901 } 1902 1903 static int osnoise_hook_events(void) 1904 { 1905 int retval; 1906 1907 /* 1908 * Trace is already hooked, we are re-enabling from 1909 * a stop_tracing_*. 1910 */ 1911 if (trace_osnoise_callback_enabled) 1912 return 0; 1913 1914 retval = hook_irq_events(); 1915 if (retval) 1916 return -EINVAL; 1917 1918 retval = hook_softirq_events(); 1919 if (retval) 1920 goto out_unhook_irq; 1921 1922 retval = hook_thread_events(); 1923 /* 1924 * All fine! 1925 */ 1926 if (!retval) 1927 return 0; 1928 1929 unhook_softirq_events(); 1930 out_unhook_irq: 1931 unhook_irq_events(); 1932 return -EINVAL; 1933 } 1934 1935 static int __osnoise_tracer_start(struct trace_array *tr) 1936 { 1937 int retval; 1938 1939 osn_var_reset_all(); 1940 1941 retval = osnoise_hook_events(); 1942 if (retval) 1943 return retval; 1944 /* 1945 * Make sure NMIs see reseted values. 1946 */ 1947 barrier(); 1948 trace_osnoise_callback_enabled = true; 1949 1950 retval = start_per_cpu_kthreads(tr); 1951 if (retval) { 1952 unhook_irq_events(); 1953 return retval; 1954 } 1955 1956 osnoise_busy = true; 1957 1958 return 0; 1959 } 1960 1961 static void osnoise_tracer_start(struct trace_array *tr) 1962 { 1963 int retval; 1964 1965 if (osnoise_busy) 1966 return; 1967 1968 retval = __osnoise_tracer_start(tr); 1969 if (retval) 1970 pr_err(BANNER "Error starting osnoise tracer\n"); 1971 1972 } 1973 1974 static void osnoise_tracer_stop(struct trace_array *tr) 1975 { 1976 if (!osnoise_busy) 1977 return; 1978 1979 trace_osnoise_callback_enabled = false; 1980 barrier(); 1981 1982 stop_per_cpu_kthreads(); 1983 1984 unhook_irq_events(); 1985 unhook_softirq_events(); 1986 unhook_thread_events(); 1987 1988 osnoise_busy = false; 1989 } 1990 1991 static int osnoise_tracer_init(struct trace_array *tr) 1992 { 1993 1994 /* Only allow one instance to enable this */ 1995 if (osnoise_busy) 1996 return -EBUSY; 1997 1998 osnoise_trace = tr; 1999 tr->max_latency = 0; 2000 2001 osnoise_tracer_start(tr); 2002 2003 return 0; 2004 } 2005 2006 static void osnoise_tracer_reset(struct trace_array *tr) 2007 { 2008 osnoise_tracer_stop(tr); 2009 } 2010 2011 static struct tracer osnoise_tracer __read_mostly = { 2012 .name = "osnoise", 2013 .init = osnoise_tracer_init, 2014 .reset = osnoise_tracer_reset, 2015 .start = osnoise_tracer_start, 2016 .stop = osnoise_tracer_stop, 2017 .print_header = print_osnoise_headers, 2018 .allow_instances = true, 2019 }; 2020 2021 #ifdef CONFIG_TIMERLAT_TRACER 2022 static void timerlat_tracer_start(struct trace_array *tr) 2023 { 2024 int retval; 2025 2026 if (osnoise_busy) 2027 return; 2028 2029 osnoise_data.timerlat_tracer = 1; 2030 2031 retval = __osnoise_tracer_start(tr); 2032 if (retval) 2033 goto out_err; 2034 2035 return; 2036 out_err: 2037 pr_err(BANNER "Error starting timerlat tracer\n"); 2038 } 2039 2040 static void timerlat_tracer_stop(struct trace_array *tr) 2041 { 2042 int cpu; 2043 2044 if (!osnoise_busy) 2045 return; 2046 2047 for_each_online_cpu(cpu) 2048 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2049 2050 osnoise_tracer_stop(tr); 2051 2052 osnoise_data.timerlat_tracer = 0; 2053 } 2054 2055 static int timerlat_tracer_init(struct trace_array *tr) 2056 { 2057 /* Only allow one instance to enable this */ 2058 if (osnoise_busy) 2059 return -EBUSY; 2060 2061 osnoise_trace = tr; 2062 2063 tr->max_latency = 0; 2064 2065 timerlat_tracer_start(tr); 2066 2067 return 0; 2068 } 2069 2070 static void timerlat_tracer_reset(struct trace_array *tr) 2071 { 2072 timerlat_tracer_stop(tr); 2073 } 2074 2075 static struct tracer timerlat_tracer __read_mostly = { 2076 .name = "timerlat", 2077 .init = timerlat_tracer_init, 2078 .reset = timerlat_tracer_reset, 2079 .start = timerlat_tracer_start, 2080 .stop = timerlat_tracer_stop, 2081 .print_header = print_timerlat_headers, 2082 .allow_instances = true, 2083 }; 2084 #endif /* CONFIG_TIMERLAT_TRACER */ 2085 2086 __init static int init_osnoise_tracer(void) 2087 { 2088 int ret; 2089 2090 mutex_init(&interface_lock); 2091 2092 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 2093 2094 ret = register_tracer(&osnoise_tracer); 2095 if (ret) { 2096 pr_err(BANNER "Error registering osnoise!\n"); 2097 return ret; 2098 } 2099 2100 #ifdef CONFIG_TIMERLAT_TRACER 2101 ret = register_tracer(&timerlat_tracer); 2102 if (ret) { 2103 pr_err(BANNER "Error registering timerlat\n"); 2104 return ret; 2105 } 2106 #endif 2107 osnoise_init_hotplug_support(); 2108 2109 init_tracefs(); 2110 2111 return 0; 2112 } 2113 late_initcall(init_osnoise_tracer); 2114