1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include <linux/string.h> 28 #include "trace.h" 29 30 #ifdef CONFIG_X86_LOCAL_APIC 31 #include <asm/trace/irq_vectors.h> 32 #undef TRACE_INCLUDE_PATH 33 #undef TRACE_INCLUDE_FILE 34 #endif /* CONFIG_X86_LOCAL_APIC */ 35 36 #include <trace/events/irq.h> 37 #include <trace/events/sched.h> 38 39 #define CREATE_TRACE_POINTS 40 #include <trace/events/osnoise.h> 41 42 /* 43 * Default values. 44 */ 45 #define BANNER "osnoise: " 46 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 47 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 48 49 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 50 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 51 52 /* 53 * osnoise/options entries. 54 */ 55 enum osnoise_options_index { 56 OSN_DEFAULTS = 0, 57 OSN_WORKLOAD, 58 OSN_PANIC_ON_STOP, 59 OSN_PREEMPT_DISABLE, 60 OSN_IRQ_DISABLE, 61 OSN_TIMERLAT_ALIGN, 62 OSN_MAX 63 }; 64 65 static const char * const osnoise_options_str[OSN_MAX] = { 66 "DEFAULTS", 67 "OSNOISE_WORKLOAD", 68 "PANIC_ON_STOP", 69 "OSNOISE_PREEMPT_DISABLE", 70 "OSNOISE_IRQ_DISABLE", 71 "TIMERLAT_ALIGN" }; 72 73 #define OSN_DEFAULT_OPTIONS 0x2 74 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 75 76 /* 77 * trace_array of the enabled osnoise/timerlat instances. 78 */ 79 struct osnoise_instance { 80 struct list_head list; 81 struct trace_array *tr; 82 }; 83 84 static struct list_head osnoise_instances; 85 86 static bool osnoise_has_registered_instances(void) 87 { 88 return !!list_first_or_null_rcu(&osnoise_instances, 89 struct osnoise_instance, 90 list); 91 } 92 93 /* 94 * osnoise_instance_registered - check if a tr is already registered 95 */ 96 static int osnoise_instance_registered(struct trace_array *tr) 97 { 98 struct osnoise_instance *inst; 99 int found = 0; 100 101 rcu_read_lock(); 102 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 103 if (inst->tr == tr) 104 found = 1; 105 } 106 rcu_read_unlock(); 107 108 return found; 109 } 110 111 /* 112 * osnoise_register_instance - register a new trace instance 113 * 114 * Register a trace_array *tr in the list of instances running 115 * osnoise/timerlat tracers. 116 */ 117 static int osnoise_register_instance(struct trace_array *tr) 118 { 119 struct osnoise_instance *inst; 120 121 /* 122 * register/unregister serialization is provided by trace's 123 * trace_types_lock. 124 */ 125 lockdep_assert_held(&trace_types_lock); 126 127 inst = kmalloc_obj(*inst); 128 if (!inst) 129 return -ENOMEM; 130 131 INIT_LIST_HEAD_RCU(&inst->list); 132 inst->tr = tr; 133 list_add_tail_rcu(&inst->list, &osnoise_instances); 134 135 return 0; 136 } 137 138 /* 139 * osnoise_unregister_instance - unregister a registered trace instance 140 * 141 * Remove the trace_array *tr from the list of instances running 142 * osnoise/timerlat tracers. 143 */ 144 static void osnoise_unregister_instance(struct trace_array *tr) 145 { 146 struct osnoise_instance *inst; 147 int found = 0; 148 149 /* 150 * register/unregister serialization is provided by trace's 151 * trace_types_lock. 152 */ 153 list_for_each_entry_rcu(inst, &osnoise_instances, list, 154 lockdep_is_held(&trace_types_lock)) { 155 if (inst->tr == tr) { 156 list_del_rcu(&inst->list); 157 found = 1; 158 break; 159 } 160 } 161 162 if (!found) 163 return; 164 165 kvfree_rcu_mightsleep(inst); 166 } 167 168 /* 169 * NMI runtime info. 170 */ 171 struct osn_nmi { 172 u64 count; 173 u64 delta_start; 174 }; 175 176 /* 177 * IRQ runtime info. 178 */ 179 struct osn_irq { 180 u64 count; 181 u64 arrival_time; 182 u64 delta_start; 183 }; 184 185 #define IRQ_CONTEXT 0 186 #define THREAD_CONTEXT 1 187 #define THREAD_URET 2 188 /* 189 * sofirq runtime info. 190 */ 191 struct osn_softirq { 192 u64 count; 193 u64 arrival_time; 194 u64 delta_start; 195 }; 196 197 /* 198 * thread runtime info. 199 */ 200 struct osn_thread { 201 u64 count; 202 u64 arrival_time; 203 u64 delta_start; 204 }; 205 206 /* 207 * Runtime information: this structure saves the runtime information used by 208 * one sampling thread. 209 */ 210 struct osnoise_variables { 211 struct task_struct *kthread; 212 bool sampling; 213 pid_t pid; 214 struct osn_nmi nmi; 215 struct osn_irq irq; 216 struct osn_softirq softirq; 217 struct osn_thread thread; 218 local_t int_counter; 219 }; 220 221 /* 222 * Per-cpu runtime information. 223 */ 224 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 225 226 /* 227 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 228 */ 229 static inline struct osnoise_variables *this_cpu_osn_var(void) 230 { 231 return this_cpu_ptr(&per_cpu_osnoise_var); 232 } 233 234 /* 235 * Protect the interface. 236 */ 237 static struct mutex interface_lock; 238 239 #ifdef CONFIG_TIMERLAT_TRACER 240 /* 241 * Runtime information for the timer mode. 242 */ 243 struct timerlat_variables { 244 struct task_struct *kthread; 245 struct hrtimer timer; 246 u64 rel_period; 247 u64 abs_period; 248 bool tracing_thread; 249 u64 count; 250 bool uthread_migrate; 251 }; 252 253 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 254 255 /* 256 * timerlat wake-up offset for next thread with TIMERLAT_ALIGN set. 257 */ 258 static atomic64_t align_next; 259 260 /* 261 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 262 */ 263 static inline struct timerlat_variables *this_cpu_tmr_var(void) 264 { 265 return this_cpu_ptr(&per_cpu_timerlat_var); 266 } 267 268 /* 269 * tlat_var_reset - Reset the values of the given timerlat_variables 270 */ 271 static inline void tlat_var_reset(void) 272 { 273 struct timerlat_variables *tlat_var; 274 int cpu; 275 276 /* Synchronize with the timerlat interfaces */ 277 mutex_lock(&interface_lock); 278 279 /* 280 * So far, all the values are initialized as 0, so 281 * zeroing the structure is perfect. 282 */ 283 for_each_online_cpu(cpu) { 284 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 285 if (tlat_var->kthread) 286 hrtimer_cancel(&tlat_var->timer); 287 memset(tlat_var, 0, sizeof(*tlat_var)); 288 } 289 /* 290 * Reset also align_next, to be filled by a new offset by the first timerlat 291 * thread that wakes up, if TIMERLAT_ALIGN is set. 292 */ 293 atomic64_set(&align_next, 0); 294 295 mutex_unlock(&interface_lock); 296 } 297 #else /* CONFIG_TIMERLAT_TRACER */ 298 #define tlat_var_reset() do {} while (0) 299 #endif /* CONFIG_TIMERLAT_TRACER */ 300 301 /* 302 * osn_var_reset - Reset the values of the given osnoise_variables 303 */ 304 static inline void osn_var_reset(void) 305 { 306 struct osnoise_variables *osn_var; 307 int cpu; 308 309 /* 310 * So far, all the values are initialized as 0, so 311 * zeroing the structure is perfect. 312 */ 313 for_each_online_cpu(cpu) { 314 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 315 memset(osn_var, 0, sizeof(*osn_var)); 316 } 317 } 318 319 /* 320 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 321 */ 322 static inline void osn_var_reset_all(void) 323 { 324 osn_var_reset(); 325 tlat_var_reset(); 326 } 327 328 /* 329 * Tells NMIs to call back to the osnoise tracer to record timestamps. 330 */ 331 bool trace_osnoise_callback_enabled; 332 333 /* 334 * Tracer data. 335 */ 336 static struct osnoise_data { 337 u64 sample_period; /* total sampling period */ 338 u64 sample_runtime; /* active sampling portion of period */ 339 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 340 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 341 #ifdef CONFIG_TIMERLAT_TRACER 342 u64 timerlat_period; /* timerlat period */ 343 u64 timerlat_align_us; /* timerlat alignment */ 344 u64 print_stack; /* print IRQ stack if total > */ 345 int timerlat_tracer; /* timerlat tracer */ 346 #endif 347 bool tainted; /* info users and developers about a problem */ 348 } osnoise_data = { 349 .sample_period = DEFAULT_SAMPLE_PERIOD, 350 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 351 .stop_tracing = 0, 352 .stop_tracing_total = 0, 353 #ifdef CONFIG_TIMERLAT_TRACER 354 .print_stack = 0, 355 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 356 .timerlat_align_us = 0, 357 .timerlat_tracer = 0, 358 #endif 359 }; 360 361 #ifdef CONFIG_TIMERLAT_TRACER 362 static inline bool timerlat_enabled(void) 363 { 364 return osnoise_data.timerlat_tracer; 365 } 366 367 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 368 { 369 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 370 /* 371 * If the timerlat is enabled, but the irq handler did 372 * not run yet enabling timerlat_tracer, do not trace. 373 */ 374 if (!tlat_var->tracing_thread) { 375 osn_var->softirq.arrival_time = 0; 376 osn_var->softirq.delta_start = 0; 377 return 0; 378 } 379 return 1; 380 } 381 382 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 383 { 384 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 385 /* 386 * If the timerlat is enabled, but the irq handler did 387 * not run yet enabling timerlat_tracer, do not trace. 388 */ 389 if (!tlat_var->tracing_thread) { 390 osn_var->thread.delta_start = 0; 391 osn_var->thread.arrival_time = 0; 392 return 0; 393 } 394 return 1; 395 } 396 #else /* CONFIG_TIMERLAT_TRACER */ 397 static inline bool timerlat_enabled(void) 398 { 399 return false; 400 } 401 402 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 403 { 404 return 1; 405 } 406 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 407 { 408 return 1; 409 } 410 #endif 411 412 #ifdef CONFIG_PREEMPT_RT 413 /* 414 * Print the osnoise header info. 415 */ 416 static void print_osnoise_headers(struct seq_file *s) 417 { 418 if (osnoise_data.tainted) 419 seq_puts(s, "# osnoise is tainted!\n"); 420 421 seq_puts(s, "# _-------=> irqs-off\n"); 422 seq_puts(s, "# / _------=> need-resched\n"); 423 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 424 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 425 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 426 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 427 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 428 429 seq_puts(s, "# |||||| / "); 430 seq_puts(s, " MAX\n"); 431 432 seq_puts(s, "# ||||| / "); 433 seq_puts(s, " SINGLE Interference counters:\n"); 434 435 seq_puts(s, "# ||||||| RUNTIME "); 436 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 437 438 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 439 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 440 441 seq_puts(s, "# | | | ||||||| | | "); 442 seq_puts(s, " | | | | | | | |\n"); 443 } 444 #else /* CONFIG_PREEMPT_RT */ 445 static void print_osnoise_headers(struct seq_file *s) 446 { 447 if (osnoise_data.tainted) 448 seq_puts(s, "# osnoise is tainted!\n"); 449 450 seq_puts(s, "# _-----=> irqs-off\n"); 451 seq_puts(s, "# / _----=> need-resched\n"); 452 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 453 seq_puts(s, "# || / _--=> preempt-depth\n"); 454 seq_puts(s, "# ||| / _-=> migrate-disable "); 455 seq_puts(s, " MAX\n"); 456 seq_puts(s, "# |||| / delay "); 457 seq_puts(s, " SINGLE Interference counters:\n"); 458 459 seq_puts(s, "# ||||| RUNTIME "); 460 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 461 462 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 463 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 464 465 seq_puts(s, "# | | | ||||| | | "); 466 seq_puts(s, " | | | | | | | |\n"); 467 } 468 #endif /* CONFIG_PREEMPT_RT */ 469 470 /* 471 * osnoise_taint - report an osnoise error. 472 */ 473 #define osnoise_taint(msg) ({ \ 474 struct osnoise_instance *inst; \ 475 struct trace_buffer *buffer; \ 476 \ 477 rcu_read_lock(); \ 478 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 479 buffer = inst->tr->array_buffer.buffer; \ 480 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 481 } \ 482 rcu_read_unlock(); \ 483 osnoise_data.tainted = true; \ 484 }) 485 486 /* 487 * Record an osnoise_sample into the tracer buffer. 488 */ 489 static void 490 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 491 { 492 struct ring_buffer_event *event; 493 struct osnoise_entry *entry; 494 495 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 496 tracing_gen_ctx()); 497 if (!event) 498 return; 499 entry = ring_buffer_event_data(event); 500 entry->runtime = sample->runtime; 501 entry->noise = sample->noise; 502 entry->max_sample = sample->max_sample; 503 entry->hw_count = sample->hw_count; 504 entry->nmi_count = sample->nmi_count; 505 entry->irq_count = sample->irq_count; 506 entry->softirq_count = sample->softirq_count; 507 entry->thread_count = sample->thread_count; 508 509 trace_buffer_unlock_commit_nostack(buffer, event); 510 } 511 512 /* 513 * Record an osnoise_sample on all osnoise instances and fire trace event. 514 */ 515 static void record_osnoise_sample(struct osnoise_sample *sample) 516 { 517 struct osnoise_instance *inst; 518 struct trace_buffer *buffer; 519 520 trace_osnoise_sample(sample); 521 522 rcu_read_lock(); 523 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 524 buffer = inst->tr->array_buffer.buffer; 525 __record_osnoise_sample(sample, buffer); 526 } 527 rcu_read_unlock(); 528 } 529 530 #ifdef CONFIG_TIMERLAT_TRACER 531 /* 532 * Print the timerlat header info. 533 */ 534 #ifdef CONFIG_PREEMPT_RT 535 static void print_timerlat_headers(struct seq_file *s) 536 { 537 seq_puts(s, "# _-------=> irqs-off\n"); 538 seq_puts(s, "# / _------=> need-resched\n"); 539 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 540 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 541 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 542 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 543 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 544 seq_puts(s, "# |||||| /\n"); 545 seq_puts(s, "# ||||||| ACTIVATION\n"); 546 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 547 seq_puts(s, " CONTEXT LATENCY\n"); 548 seq_puts(s, "# | | | ||||||| | | "); 549 seq_puts(s, " | |\n"); 550 } 551 #else /* CONFIG_PREEMPT_RT */ 552 static void print_timerlat_headers(struct seq_file *s) 553 { 554 seq_puts(s, "# _-----=> irqs-off\n"); 555 seq_puts(s, "# / _----=> need-resched\n"); 556 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 557 seq_puts(s, "# || / _--=> preempt-depth\n"); 558 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 559 seq_puts(s, "# |||| / delay\n"); 560 seq_puts(s, "# ||||| ACTIVATION\n"); 561 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 562 seq_puts(s, " CONTEXT LATENCY\n"); 563 seq_puts(s, "# | | | ||||| | | "); 564 seq_puts(s, " | |\n"); 565 } 566 #endif /* CONFIG_PREEMPT_RT */ 567 568 static void 569 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 570 { 571 struct ring_buffer_event *event; 572 struct timerlat_entry *entry; 573 574 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 575 tracing_gen_ctx()); 576 if (!event) 577 return; 578 entry = ring_buffer_event_data(event); 579 entry->seqnum = sample->seqnum; 580 entry->context = sample->context; 581 entry->timer_latency = sample->timer_latency; 582 583 trace_buffer_unlock_commit_nostack(buffer, event); 584 } 585 586 /* 587 * Record an timerlat_sample into the tracer buffer. 588 */ 589 static void record_timerlat_sample(struct timerlat_sample *sample) 590 { 591 struct osnoise_instance *inst; 592 struct trace_buffer *buffer; 593 594 trace_timerlat_sample(sample); 595 596 rcu_read_lock(); 597 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 598 buffer = inst->tr->array_buffer.buffer; 599 __record_timerlat_sample(sample, buffer); 600 } 601 rcu_read_unlock(); 602 } 603 604 #ifdef CONFIG_STACKTRACE 605 606 #define MAX_CALLS 256 607 608 /* 609 * Stack trace will take place only at IRQ level, so, no need 610 * to control nesting here. 611 */ 612 struct trace_stack { 613 int stack_size; 614 int nr_entries; 615 unsigned long calls[MAX_CALLS]; 616 }; 617 618 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 619 620 /* 621 * timerlat_save_stack - save a stack trace without printing 622 * 623 * Save the current stack trace without printing. The 624 * stack will be printed later, after the end of the measurement. 625 */ 626 static void timerlat_save_stack(int skip) 627 { 628 unsigned int size, nr_entries; 629 struct trace_stack *fstack; 630 631 fstack = this_cpu_ptr(&trace_stack); 632 633 size = ARRAY_SIZE(fstack->calls); 634 635 nr_entries = stack_trace_save(fstack->calls, size, skip); 636 637 fstack->stack_size = nr_entries * sizeof(unsigned long); 638 fstack->nr_entries = nr_entries; 639 640 return; 641 642 } 643 644 static void 645 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 646 { 647 struct ring_buffer_event *event; 648 struct stack_entry *entry; 649 650 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 651 tracing_gen_ctx()); 652 if (!event) 653 return; 654 655 entry = ring_buffer_event_data(event); 656 657 entry->size = fstack->nr_entries; 658 memcpy(&entry->caller, fstack->calls, size); 659 660 trace_buffer_unlock_commit_nostack(buffer, event); 661 } 662 663 /* 664 * timerlat_dump_stack - dump a stack trace previously saved 665 */ 666 static void timerlat_dump_stack(u64 latency) 667 { 668 struct osnoise_instance *inst; 669 struct trace_buffer *buffer; 670 struct trace_stack *fstack; 671 unsigned int size; 672 673 /* 674 * trace only if latency > print_stack config, if enabled. 675 */ 676 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 677 return; 678 679 preempt_disable_notrace(); 680 fstack = this_cpu_ptr(&trace_stack); 681 size = fstack->stack_size; 682 683 rcu_read_lock(); 684 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 685 buffer = inst->tr->array_buffer.buffer; 686 __timerlat_dump_stack(buffer, fstack, size); 687 688 } 689 rcu_read_unlock(); 690 preempt_enable_notrace(); 691 } 692 #else /* CONFIG_STACKTRACE */ 693 #define timerlat_dump_stack(u64 latency) do {} while (0) 694 #define timerlat_save_stack(a) do {} while (0) 695 #endif /* CONFIG_STACKTRACE */ 696 #endif /* CONFIG_TIMERLAT_TRACER */ 697 698 /* 699 * Macros to encapsulate the time capturing infrastructure. 700 */ 701 #define time_get() trace_clock_local() 702 #define time_to_us(x) div_u64(x, 1000) 703 #define time_sub(a, b) ((a) - (b)) 704 705 /* 706 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 707 * 708 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 709 * to discount the NMI interference. 710 * 711 * See get_int_safe_duration(). 712 */ 713 static inline void 714 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 715 { 716 if (osn_var->irq.delta_start) 717 osn_var->irq.delta_start += duration; 718 } 719 720 #ifndef CONFIG_PREEMPT_RT 721 /* 722 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 723 * 724 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 725 * forward to discount the interference. 726 * 727 * See get_int_safe_duration(). 728 */ 729 static inline void 730 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 731 { 732 if (osn_var->softirq.delta_start) 733 osn_var->softirq.delta_start += duration; 734 } 735 #else /* CONFIG_PREEMPT_RT */ 736 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 737 #endif 738 739 /* 740 * cond_move_thread_delta_start - Forward the delta_start of a running thread 741 * 742 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 743 * is pushed forward to discount the interference. 744 * 745 * See get_int_safe_duration(). 746 */ 747 static inline void 748 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 749 { 750 if (osn_var->thread.delta_start) 751 osn_var->thread.delta_start += duration; 752 } 753 754 /* 755 * get_int_safe_duration - Get the duration of a window 756 * 757 * The irq, softirq and thread variables need to have its duration without 758 * the interference from higher priority interrupts. Instead of keeping a 759 * variable to discount the interrupt interference from these variables, the 760 * starting time of these variables are pushed forward with the interrupt's 761 * duration. In this way, a single variable is used to: 762 * 763 * - Know if a given window is being measured. 764 * - Account its duration. 765 * - Discount the interference. 766 * 767 * To avoid getting inconsistent values, e.g.,: 768 * 769 * now = time_get() 770 * ---> interrupt! 771 * delta_start -= int duration; 772 * <--- 773 * duration = now - delta_start; 774 * 775 * result: negative duration if the variable duration before the 776 * interrupt was smaller than the interrupt execution. 777 * 778 * A counter of interrupts is used. If the counter increased, try 779 * to capture an interference safe duration. 780 */ 781 static inline s64 782 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 783 { 784 u64 int_counter, now; 785 s64 duration; 786 787 do { 788 int_counter = local_read(&osn_var->int_counter); 789 /* synchronize with interrupts */ 790 barrier(); 791 792 now = time_get(); 793 duration = (now - *delta_start); 794 795 /* synchronize with interrupts */ 796 barrier(); 797 } while (int_counter != local_read(&osn_var->int_counter)); 798 799 /* 800 * This is an evidence of race conditions that cause 801 * a value to be "discounted" too much. 802 */ 803 if (duration < 0) 804 osnoise_taint("Negative duration!\n"); 805 806 *delta_start = 0; 807 808 return duration; 809 } 810 811 /* 812 * 813 * set_int_safe_time - Save the current time on *time, aware of interference 814 * 815 * Get the time, taking into consideration a possible interference from 816 * higher priority interrupts. 817 * 818 * See get_int_safe_duration() for an explanation. 819 */ 820 static u64 821 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 822 { 823 u64 int_counter; 824 825 do { 826 int_counter = local_read(&osn_var->int_counter); 827 /* synchronize with interrupts */ 828 barrier(); 829 830 *time = time_get(); 831 832 /* synchronize with interrupts */ 833 barrier(); 834 } while (int_counter != local_read(&osn_var->int_counter)); 835 836 return int_counter; 837 } 838 839 #ifdef CONFIG_TIMERLAT_TRACER 840 /* 841 * copy_int_safe_time - Copy *src into *desc aware of interference 842 */ 843 static u64 844 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 845 { 846 u64 int_counter; 847 848 do { 849 int_counter = local_read(&osn_var->int_counter); 850 /* synchronize with interrupts */ 851 barrier(); 852 853 *dst = *src; 854 855 /* synchronize with interrupts */ 856 barrier(); 857 } while (int_counter != local_read(&osn_var->int_counter)); 858 859 return int_counter; 860 } 861 #endif /* CONFIG_TIMERLAT_TRACER */ 862 863 /* 864 * trace_osnoise_callback - NMI entry/exit callback 865 * 866 * This function is called at the entry and exit NMI code. The bool enter 867 * distinguishes between either case. This function is used to note a NMI 868 * occurrence, compute the noise caused by the NMI, and to remove the noise 869 * it is potentially causing on other interference variables. 870 */ 871 void trace_osnoise_callback(bool enter) 872 { 873 struct osnoise_variables *osn_var = this_cpu_osn_var(); 874 u64 duration; 875 876 if (!osn_var->sampling) 877 return; 878 879 /* 880 * Currently trace_clock_local() calls sched_clock() and the 881 * generic version is not NMI safe. 882 */ 883 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 884 if (enter) { 885 osn_var->nmi.delta_start = time_get(); 886 local_inc(&osn_var->int_counter); 887 } else { 888 duration = time_get() - osn_var->nmi.delta_start; 889 890 trace_nmi_noise(osn_var->nmi.delta_start, duration); 891 892 cond_move_irq_delta_start(osn_var, duration); 893 cond_move_softirq_delta_start(osn_var, duration); 894 cond_move_thread_delta_start(osn_var, duration); 895 } 896 } 897 898 if (enter) 899 osn_var->nmi.count++; 900 } 901 902 /* 903 * osnoise_trace_irq_entry - Note the starting of an IRQ 904 * 905 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 906 * it is safe to use a single variable (ons_var->irq) to save the statistics. 907 * The arrival_time is used to report... the arrival time. The delta_start 908 * is used to compute the duration at the IRQ exit handler. See 909 * cond_move_irq_delta_start(). 910 */ 911 void osnoise_trace_irq_entry(int id) 912 { 913 struct osnoise_variables *osn_var = this_cpu_osn_var(); 914 915 if (!osn_var->sampling) 916 return; 917 /* 918 * This value will be used in the report, but not to compute 919 * the execution time, so it is safe to get it unsafe. 920 */ 921 osn_var->irq.arrival_time = time_get(); 922 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 923 osn_var->irq.count++; 924 925 local_inc(&osn_var->int_counter); 926 } 927 928 /* 929 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 930 * 931 * Computes the duration of the IRQ noise, and trace it. Also discounts the 932 * interference from other sources of noise could be currently being accounted. 933 */ 934 void osnoise_trace_irq_exit(int id, const char *desc) 935 { 936 struct osnoise_variables *osn_var = this_cpu_osn_var(); 937 s64 duration; 938 939 if (!osn_var->sampling) 940 return; 941 942 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 943 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 944 osn_var->irq.arrival_time = 0; 945 cond_move_softirq_delta_start(osn_var, duration); 946 cond_move_thread_delta_start(osn_var, duration); 947 } 948 949 /* 950 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 951 * 952 * Used to note the starting of an IRQ occurece. 953 */ 954 static void trace_irqentry_callback(void *data, int irq, 955 struct irqaction *action) 956 { 957 osnoise_trace_irq_entry(irq); 958 } 959 960 /* 961 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 962 * 963 * Used to note the end of an IRQ occurece. 964 */ 965 static void trace_irqexit_callback(void *data, int irq, 966 struct irqaction *action, int ret) 967 { 968 osnoise_trace_irq_exit(irq, action->name); 969 } 970 971 /* 972 * arch specific register function. 973 */ 974 int __weak osnoise_arch_register(void) 975 { 976 return 0; 977 } 978 979 /* 980 * arch specific unregister function. 981 */ 982 void __weak osnoise_arch_unregister(void) 983 { 984 return; 985 } 986 987 /* 988 * hook_irq_events - Hook IRQ handling events 989 * 990 * This function hooks the IRQ related callbacks to the respective trace 991 * events. 992 */ 993 static int hook_irq_events(void) 994 { 995 int ret; 996 997 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 998 if (ret) 999 goto out_err; 1000 1001 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1002 if (ret) 1003 goto out_unregister_entry; 1004 1005 ret = osnoise_arch_register(); 1006 if (ret) 1007 goto out_irq_exit; 1008 1009 return 0; 1010 1011 out_irq_exit: 1012 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1013 out_unregister_entry: 1014 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1015 out_err: 1016 return -EINVAL; 1017 } 1018 1019 /* 1020 * unhook_irq_events - Unhook IRQ handling events 1021 * 1022 * This function unhooks the IRQ related callbacks to the respective trace 1023 * events. 1024 */ 1025 static void unhook_irq_events(void) 1026 { 1027 osnoise_arch_unregister(); 1028 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1029 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1030 } 1031 1032 #ifndef CONFIG_PREEMPT_RT 1033 /* 1034 * trace_softirq_entry_callback - Note the starting of a softirq 1035 * 1036 * Save the starting time of a softirq. As softirqs are non-preemptive to 1037 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1038 * to save the statistics. The arrival_time is used to report... the 1039 * arrival time. The delta_start is used to compute the duration at the 1040 * softirq exit handler. See cond_move_softirq_delta_start(). 1041 */ 1042 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1043 { 1044 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1045 1046 if (!osn_var->sampling) 1047 return; 1048 /* 1049 * This value will be used in the report, but not to compute 1050 * the execution time, so it is safe to get it unsafe. 1051 */ 1052 osn_var->softirq.arrival_time = time_get(); 1053 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1054 osn_var->softirq.count++; 1055 1056 local_inc(&osn_var->int_counter); 1057 } 1058 1059 /* 1060 * trace_softirq_exit_callback - Note the end of an softirq 1061 * 1062 * Computes the duration of the softirq noise, and trace it. Also discounts the 1063 * interference from other sources of noise could be currently being accounted. 1064 */ 1065 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1066 { 1067 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1068 s64 duration; 1069 1070 if (!osn_var->sampling) 1071 return; 1072 1073 if (unlikely(timerlat_enabled())) 1074 if (!timerlat_softirq_exit(osn_var)) 1075 return; 1076 1077 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1078 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1079 cond_move_thread_delta_start(osn_var, duration); 1080 osn_var->softirq.arrival_time = 0; 1081 } 1082 1083 /* 1084 * hook_softirq_events - Hook softirq handling events 1085 * 1086 * This function hooks the softirq related callbacks to the respective trace 1087 * events. 1088 */ 1089 static int hook_softirq_events(void) 1090 { 1091 int ret; 1092 1093 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1094 if (ret) 1095 goto out_err; 1096 1097 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1098 if (ret) 1099 goto out_unreg_entry; 1100 1101 return 0; 1102 1103 out_unreg_entry: 1104 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1105 out_err: 1106 return -EINVAL; 1107 } 1108 1109 /* 1110 * unhook_softirq_events - Unhook softirq handling events 1111 * 1112 * This function hooks the softirq related callbacks to the respective trace 1113 * events. 1114 */ 1115 static void unhook_softirq_events(void) 1116 { 1117 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1118 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1119 } 1120 #else /* CONFIG_PREEMPT_RT */ 1121 /* 1122 * softirq are threads on the PREEMPT_RT mode. 1123 */ 1124 static int hook_softirq_events(void) 1125 { 1126 return 0; 1127 } 1128 static void unhook_softirq_events(void) 1129 { 1130 } 1131 #endif 1132 1133 /* 1134 * thread_entry - Record the starting of a thread noise window 1135 * 1136 * It saves the context switch time for a noisy thread, and increments 1137 * the interference counters. 1138 */ 1139 static void 1140 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1141 { 1142 if (!osn_var->sampling) 1143 return; 1144 /* 1145 * The arrival time will be used in the report, but not to compute 1146 * the execution time, so it is safe to get it unsafe. 1147 */ 1148 osn_var->thread.arrival_time = time_get(); 1149 1150 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1151 1152 osn_var->thread.count++; 1153 local_inc(&osn_var->int_counter); 1154 } 1155 1156 /* 1157 * thread_exit - Report the end of a thread noise window 1158 * 1159 * It computes the total noise from a thread, tracing if needed. 1160 */ 1161 static void 1162 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1163 { 1164 s64 duration; 1165 1166 if (!osn_var->sampling) 1167 return; 1168 1169 if (unlikely(timerlat_enabled())) 1170 if (!timerlat_thread_exit(osn_var)) 1171 return; 1172 1173 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1174 1175 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1176 1177 osn_var->thread.arrival_time = 0; 1178 } 1179 1180 #ifdef CONFIG_TIMERLAT_TRACER 1181 /* 1182 * osnoise_stop_exception - Stop tracing and the tracer. 1183 */ 1184 static __always_inline void osnoise_stop_exception(char *msg, int cpu) 1185 { 1186 struct osnoise_instance *inst; 1187 struct trace_array *tr; 1188 1189 rcu_read_lock(); 1190 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1191 tr = inst->tr; 1192 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1193 "stop tracing hit on cpu %d due to exception: %s\n", 1194 smp_processor_id(), 1195 msg); 1196 1197 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1198 panic("tracer hit on cpu %d due to exception: %s\n", 1199 smp_processor_id(), 1200 msg); 1201 1202 tracer_tracing_off(tr); 1203 } 1204 rcu_read_unlock(); 1205 } 1206 1207 /* 1208 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler 1209 * 1210 * his function is hooked to the sched:sched_migrate_task trace event, and monitors 1211 * timerlat user-space thread migration. 1212 */ 1213 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu) 1214 { 1215 struct osnoise_variables *osn_var; 1216 long cpu = task_cpu(p); 1217 1218 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 1219 if (osn_var->pid == p->pid && dest_cpu != cpu) { 1220 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 1221 osnoise_taint("timerlat user-thread migrated\n"); 1222 osnoise_stop_exception("timerlat user-thread migrated", cpu); 1223 } 1224 } 1225 1226 static bool monitor_enabled; 1227 1228 static int register_migration_monitor(void) 1229 { 1230 int ret = 0; 1231 1232 /* 1233 * Timerlat thread migration check is only required when running timerlat in user-space. 1234 * Thus, enable callback only if timerlat is set with no workload. 1235 */ 1236 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) { 1237 if (WARN_ON_ONCE(monitor_enabled)) 1238 return 0; 1239 1240 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1241 if (!ret) 1242 monitor_enabled = true; 1243 } 1244 1245 return ret; 1246 } 1247 1248 static void unregister_migration_monitor(void) 1249 { 1250 if (!monitor_enabled) 1251 return; 1252 1253 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1254 monitor_enabled = false; 1255 } 1256 #else 1257 static int register_migration_monitor(void) 1258 { 1259 return 0; 1260 } 1261 static void unregister_migration_monitor(void) {} 1262 #endif 1263 /* 1264 * trace_sched_switch - sched:sched_switch trace event handler 1265 * 1266 * This function is hooked to the sched:sched_switch trace event, and it is 1267 * used to record the beginning and to report the end of a thread noise window. 1268 */ 1269 static void 1270 trace_sched_switch_callback(void *data, bool preempt, 1271 struct task_struct *p, 1272 struct task_struct *n, 1273 unsigned int prev_state) 1274 { 1275 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1276 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1277 1278 if ((p->pid != osn_var->pid) || !workload) 1279 thread_exit(osn_var, p); 1280 1281 if ((n->pid != osn_var->pid) || !workload) 1282 thread_entry(osn_var, n); 1283 } 1284 1285 /* 1286 * hook_thread_events - Hook the instrumentation for thread noise 1287 * 1288 * Hook the osnoise tracer callbacks to handle the noise from other 1289 * threads on the necessary kernel events. 1290 */ 1291 static int hook_thread_events(void) 1292 { 1293 int ret; 1294 1295 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1296 if (ret) 1297 return -EINVAL; 1298 1299 ret = register_migration_monitor(); 1300 if (ret) 1301 goto out_unreg; 1302 1303 return 0; 1304 1305 out_unreg: 1306 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1307 return -EINVAL; 1308 } 1309 1310 /* 1311 * unhook_thread_events - unhook the instrumentation for thread noise 1312 * 1313 * Unook the osnoise tracer callbacks to handle the noise from other 1314 * threads on the necessary kernel events. 1315 */ 1316 static void unhook_thread_events(void) 1317 { 1318 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1319 unregister_migration_monitor(); 1320 } 1321 1322 /* 1323 * save_osn_sample_stats - Save the osnoise_sample statistics 1324 * 1325 * Save the osnoise_sample statistics before the sampling phase. These 1326 * values will be used later to compute the diff betwneen the statistics 1327 * before and after the osnoise sampling. 1328 */ 1329 static void 1330 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1331 { 1332 s->nmi_count = osn_var->nmi.count; 1333 s->irq_count = osn_var->irq.count; 1334 s->softirq_count = osn_var->softirq.count; 1335 s->thread_count = osn_var->thread.count; 1336 } 1337 1338 /* 1339 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1340 * 1341 * After a sample period, compute the difference on the osnoise_sample 1342 * statistics. The struct osnoise_sample *s contains the statistics saved via 1343 * save_osn_sample_stats() before the osnoise sampling. 1344 */ 1345 static void 1346 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1347 { 1348 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1349 s->irq_count = osn_var->irq.count - s->irq_count; 1350 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1351 s->thread_count = osn_var->thread.count - s->thread_count; 1352 } 1353 1354 /* 1355 * osnoise_stop_tracing - Stop tracing and the tracer. 1356 */ 1357 static __always_inline void osnoise_stop_tracing(void) 1358 { 1359 struct osnoise_instance *inst; 1360 struct trace_array *tr; 1361 1362 rcu_read_lock(); 1363 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1364 tr = inst->tr; 1365 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1366 "stop tracing hit on cpu %d\n", smp_processor_id()); 1367 1368 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1369 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1370 1371 tracer_tracing_off(tr); 1372 } 1373 rcu_read_unlock(); 1374 } 1375 1376 /* 1377 * osnoise_has_tracing_on - Check if there is at least one instance on 1378 */ 1379 static __always_inline int osnoise_has_tracing_on(void) 1380 { 1381 struct osnoise_instance *inst; 1382 int trace_is_on = 0; 1383 1384 rcu_read_lock(); 1385 list_for_each_entry_rcu(inst, &osnoise_instances, list) 1386 trace_is_on += tracer_tracing_is_on(inst->tr); 1387 rcu_read_unlock(); 1388 1389 return trace_is_on; 1390 } 1391 1392 /* 1393 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1394 */ 1395 static void notify_new_max_latency(u64 latency) 1396 { 1397 struct osnoise_instance *inst; 1398 struct trace_array *tr; 1399 1400 rcu_read_lock(); 1401 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1402 tr = inst->tr; 1403 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1404 tr->max_latency = latency; 1405 latency_fsnotify(tr); 1406 } 1407 } 1408 rcu_read_unlock(); 1409 } 1410 1411 /* 1412 * run_osnoise - Sample the time and look for osnoise 1413 * 1414 * Used to capture the time, looking for potential osnoise latency repeatedly. 1415 * Different from hwlat_detector, it is called with preemption and interrupts 1416 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1417 * osnoise sampling thread, as they would do with a regular thread. 1418 */ 1419 static int run_osnoise(void) 1420 { 1421 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1422 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1423 u64 start, sample, last_sample; 1424 u64 last_int_count, int_count; 1425 s64 noise = 0, max_noise = 0; 1426 s64 total, last_total = 0; 1427 struct osnoise_sample s; 1428 bool disable_preemption; 1429 unsigned int threshold; 1430 u64 runtime, stop_in; 1431 u64 sum_noise = 0; 1432 int hw_count = 0; 1433 int ret = -1; 1434 1435 /* 1436 * Disabling preemption is only required if IRQs are enabled, 1437 * and the options is set on. 1438 */ 1439 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1440 1441 /* 1442 * Considers the current thread as the workload. 1443 */ 1444 osn_var->pid = current->pid; 1445 1446 /* 1447 * Save the current stats for the diff 1448 */ 1449 save_osn_sample_stats(osn_var, &s); 1450 1451 /* 1452 * if threshold is 0, use the default value of 1 us. 1453 */ 1454 threshold = tracing_thresh ? : 1000; 1455 1456 /* 1457 * Apply PREEMPT and IRQ disabled options. 1458 */ 1459 if (disable_irq) 1460 local_irq_disable(); 1461 1462 if (disable_preemption) 1463 preempt_disable(); 1464 1465 /* 1466 * Make sure NMIs see sampling first 1467 */ 1468 osn_var->sampling = true; 1469 barrier(); 1470 1471 /* 1472 * Transform the *_us config to nanoseconds to avoid the 1473 * division on the main loop. 1474 */ 1475 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1476 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1477 1478 /* 1479 * Start timestamp 1480 */ 1481 start = time_get(); 1482 1483 /* 1484 * "previous" loop. 1485 */ 1486 last_int_count = set_int_safe_time(osn_var, &last_sample); 1487 1488 do { 1489 /* 1490 * Get sample! 1491 */ 1492 int_count = set_int_safe_time(osn_var, &sample); 1493 1494 noise = time_sub(sample, last_sample); 1495 1496 /* 1497 * This shouldn't happen. 1498 */ 1499 if (noise < 0) { 1500 osnoise_taint("negative noise!"); 1501 goto out; 1502 } 1503 1504 /* 1505 * Sample runtime. 1506 */ 1507 total = time_sub(sample, start); 1508 1509 /* 1510 * Check for possible overflows. 1511 */ 1512 if (total < last_total) { 1513 osnoise_taint("total overflow!"); 1514 break; 1515 } 1516 1517 last_total = total; 1518 1519 if (noise >= threshold) { 1520 int interference = int_count - last_int_count; 1521 1522 if (noise > max_noise) 1523 max_noise = noise; 1524 1525 if (!interference) 1526 hw_count++; 1527 1528 sum_noise += noise; 1529 1530 trace_sample_threshold(last_sample, noise, interference); 1531 1532 if (osnoise_data.stop_tracing) 1533 if (noise > stop_in) 1534 osnoise_stop_tracing(); 1535 } 1536 1537 /* 1538 * In some cases, notably when running on a nohz_full CPU with 1539 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to 1540 * account for QSs. This will eventually cause unwarranted 1541 * noise as RCU forces preemption as the means of ending the 1542 * current grace period. We avoid this by calling 1543 * rcu_momentary_eqs(), which performs a zero duration EQS 1544 * allowing RCU to end the current grace period. This call 1545 * shouldn't be wrapped inside an RCU critical section. 1546 * 1547 * Normally QSs for other cases are handled through cond_resched(). 1548 * For simplicity, however, we call rcu_momentary_eqs() for all 1549 * configurations here. 1550 */ 1551 if (!disable_irq) 1552 local_irq_disable(); 1553 1554 rcu_momentary_eqs(); 1555 1556 if (!disable_irq) 1557 local_irq_enable(); 1558 1559 /* 1560 * For the non-preemptive kernel config: let threads runs, if 1561 * they so wish, unless set not do to so. 1562 */ 1563 if (!disable_irq && !disable_preemption) 1564 cond_resched(); 1565 1566 last_sample = sample; 1567 last_int_count = int_count; 1568 1569 } while (total < runtime && !kthread_should_stop()); 1570 1571 /* 1572 * Finish the above in the view for interrupts. 1573 */ 1574 barrier(); 1575 1576 osn_var->sampling = false; 1577 1578 /* 1579 * Make sure sampling data is no longer updated. 1580 */ 1581 barrier(); 1582 1583 /* 1584 * Return to the preemptive state. 1585 */ 1586 if (disable_preemption) 1587 preempt_enable(); 1588 1589 if (disable_irq) 1590 local_irq_enable(); 1591 1592 /* 1593 * Save noise info. 1594 */ 1595 s.noise = time_to_us(sum_noise); 1596 s.runtime = time_to_us(total); 1597 s.max_sample = time_to_us(max_noise); 1598 s.hw_count = hw_count; 1599 1600 /* Save interference stats info */ 1601 diff_osn_sample_stats(osn_var, &s); 1602 1603 record_osnoise_sample(&s); 1604 1605 notify_new_max_latency(max_noise); 1606 1607 if (osnoise_data.stop_tracing_total) 1608 if (s.noise > osnoise_data.stop_tracing_total) 1609 osnoise_stop_tracing(); 1610 1611 return 0; 1612 out: 1613 return ret; 1614 } 1615 1616 static struct cpumask osnoise_cpumask; 1617 static struct cpumask save_cpumask; 1618 static struct cpumask kthread_cpumask; 1619 1620 /* 1621 * osnoise_sleep - sleep until the next period 1622 */ 1623 static void osnoise_sleep(bool skip_period) 1624 { 1625 u64 interval; 1626 ktime_t wake_time; 1627 1628 mutex_lock(&interface_lock); 1629 if (skip_period) 1630 interval = osnoise_data.sample_period; 1631 else 1632 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1633 mutex_unlock(&interface_lock); 1634 1635 /* 1636 * differently from hwlat_detector, the osnoise tracer can run 1637 * without a pause because preemption is on. 1638 */ 1639 if (!interval) { 1640 /* Let synchronize_rcu_tasks() make progress */ 1641 cond_resched_tasks_rcu_qs(); 1642 return; 1643 } 1644 1645 wake_time = ktime_add_us(ktime_get(), interval); 1646 __set_current_state(TASK_INTERRUPTIBLE); 1647 1648 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { 1649 if (kthread_should_stop()) 1650 break; 1651 } 1652 } 1653 1654 /* 1655 * osnoise_migration_pending - checks if the task needs to migrate 1656 * 1657 * osnoise/timerlat threads are per-cpu. If there is a pending request to 1658 * migrate the thread away from the current CPU, something bad has happened. 1659 * Play the good citizen and leave. 1660 * 1661 * Returns 0 if it is safe to continue, 1 otherwise. 1662 */ 1663 static inline int osnoise_migration_pending(void) 1664 { 1665 if (!current->migration_pending) 1666 return 0; 1667 1668 /* 1669 * If migration is pending, there is a task waiting for the 1670 * tracer to enable migration. The tracer does not allow migration, 1671 * thus: taint and leave to unblock the blocked thread. 1672 */ 1673 osnoise_taint("migration requested to osnoise threads, leaving."); 1674 1675 /* 1676 * Unset this thread from the threads managed by the interface. 1677 * The tracers are responsible for cleaning their env before 1678 * exiting. 1679 */ 1680 mutex_lock(&interface_lock); 1681 this_cpu_osn_var()->kthread = NULL; 1682 cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); 1683 mutex_unlock(&interface_lock); 1684 1685 return 1; 1686 } 1687 1688 /* 1689 * osnoise_main - The osnoise detection kernel thread 1690 * 1691 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1692 * every period. 1693 */ 1694 static int osnoise_main(void *data) 1695 { 1696 unsigned long flags; 1697 1698 /* 1699 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1700 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1701 * 1702 * To work around this limitation, disable migration and remove the 1703 * flag. 1704 */ 1705 migrate_disable(); 1706 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1707 current->flags &= ~(PF_NO_SETAFFINITY); 1708 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1709 1710 while (!kthread_should_stop()) { 1711 if (osnoise_migration_pending()) 1712 break; 1713 1714 /* skip a period if tracing is off on all instances */ 1715 if (!osnoise_has_tracing_on()) { 1716 osnoise_sleep(true); 1717 continue; 1718 } 1719 1720 run_osnoise(); 1721 osnoise_sleep(false); 1722 } 1723 1724 migrate_enable(); 1725 return 0; 1726 } 1727 1728 #ifdef CONFIG_TIMERLAT_TRACER 1729 /* 1730 * timerlat_irq - hrtimer handler for timerlat. 1731 */ 1732 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1733 { 1734 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1735 struct timerlat_variables *tlat; 1736 struct timerlat_sample s; 1737 u64 now; 1738 u64 diff; 1739 1740 /* 1741 * I am not sure if the timer was armed for this CPU. So, get 1742 * the timerlat struct from the timer itself, not from this 1743 * CPU. 1744 */ 1745 tlat = container_of(timer, struct timerlat_variables, timer); 1746 1747 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1748 1749 /* 1750 * Enable the osnoise: events for thread an softirq. 1751 */ 1752 tlat->tracing_thread = true; 1753 1754 osn_var->thread.arrival_time = time_get(); 1755 1756 /* 1757 * A hardirq is running: the timer IRQ. It is for sure preempting 1758 * a thread, and potentially preempting a softirq. 1759 * 1760 * At this point, it is not interesting to know the duration of the 1761 * preempted thread (and maybe softirq), but how much time they will 1762 * delay the beginning of the execution of the timer thread. 1763 * 1764 * To get the correct (net) delay added by the softirq, its delta_start 1765 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1766 * start of the sofitrq will be zeroed, accounting then only the time 1767 * after that. 1768 * 1769 * The thread follows the same principle. However, if a softirq is 1770 * running, the thread needs to receive the softirq delta_start. The 1771 * reason being is that the softirq will be the last to be unfolded, 1772 * resseting the thread delay to zero. 1773 * 1774 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1775 * on RT, moving the thread is enough. 1776 */ 1777 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1778 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1779 &osn_var->softirq.delta_start); 1780 1781 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1782 &osn_var->irq.delta_start); 1783 } else { 1784 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1785 &osn_var->irq.delta_start); 1786 } 1787 1788 /* 1789 * Compute the current time with the expected time. 1790 */ 1791 diff = now - tlat->abs_period; 1792 1793 tlat->count++; 1794 s.seqnum = tlat->count; 1795 s.timer_latency = diff; 1796 s.context = IRQ_CONTEXT; 1797 1798 record_timerlat_sample(&s); 1799 1800 if (osnoise_data.stop_tracing) { 1801 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1802 1803 /* 1804 * At this point, if stop_tracing is set and <= print_stack, 1805 * print_stack is set and would be printed in the thread handler. 1806 * 1807 * Thus, print the stack trace as it is helpful to define the 1808 * root cause of an IRQ latency. 1809 */ 1810 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1811 timerlat_save_stack(0); 1812 timerlat_dump_stack(time_to_us(diff)); 1813 } 1814 1815 osnoise_stop_tracing(); 1816 notify_new_max_latency(diff); 1817 1818 wake_up_process(tlat->kthread); 1819 1820 return HRTIMER_NORESTART; 1821 } 1822 } 1823 1824 wake_up_process(tlat->kthread); 1825 1826 if (osnoise_data.print_stack) 1827 timerlat_save_stack(0); 1828 1829 return HRTIMER_NORESTART; 1830 } 1831 1832 /* 1833 * wait_next_period - Wait for the next period for timerlat 1834 */ 1835 static int wait_next_period(struct timerlat_variables *tlat) 1836 { 1837 ktime_t next_abs_period, now; 1838 u64 rel_period = osnoise_data.timerlat_period * 1000; 1839 1840 now = hrtimer_cb_get_time(&tlat->timer); 1841 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1842 1843 /* 1844 * Save the next abs_period. 1845 */ 1846 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1847 1848 /* 1849 * Align thread in the first cycle on each CPU to the set alignment 1850 * if TIMERLAT_ALIGN is set. 1851 * 1852 * This is done by using an atomic64_t to store the next absolute period. 1853 * The first thread that wakes up will set the atomic64_t to its 1854 * absolute period, and the other threads will increment it by 1855 * the alignment value. 1856 */ 1857 if (test_bit(OSN_TIMERLAT_ALIGN, &osnoise_options) && !tlat->count 1858 && atomic64_cmpxchg_relaxed(&align_next, 0, tlat->abs_period)) { 1859 /* 1860 * A thread has already set align_next, use it and increment it 1861 * to be used by the next thread that wakes up after this one. 1862 */ 1863 tlat->abs_period = atomic64_add_return_relaxed( 1864 osnoise_data.timerlat_align_us * 1000, &align_next); 1865 next_abs_period = ns_to_ktime(tlat->abs_period); 1866 } 1867 1868 /* 1869 * If the new abs_period is in the past, skip the activation. 1870 */ 1871 while (ktime_compare(now, next_abs_period) > 0) { 1872 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1873 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1874 } 1875 1876 set_current_state(TASK_INTERRUPTIBLE); 1877 1878 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1879 schedule(); 1880 return 1; 1881 } 1882 1883 /* 1884 * timerlat_main- Timerlat main 1885 */ 1886 static int timerlat_main(void *data) 1887 { 1888 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1889 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1890 struct timerlat_sample s; 1891 struct sched_param sp; 1892 unsigned long flags; 1893 u64 now, diff; 1894 1895 /* 1896 * Make the thread RT, that is how cyclictest is usually used. 1897 */ 1898 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1899 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1900 1901 /* 1902 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1903 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1904 * 1905 * To work around this limitation, disable migration and remove the 1906 * flag. 1907 */ 1908 migrate_disable(); 1909 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1910 current->flags &= ~(PF_NO_SETAFFINITY); 1911 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1912 1913 tlat->count = 0; 1914 tlat->tracing_thread = false; 1915 1916 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1917 tlat->kthread = current; 1918 osn_var->pid = current->pid; 1919 /* 1920 * Annotate the arrival time. 1921 */ 1922 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1923 1924 wait_next_period(tlat); 1925 1926 osn_var->sampling = 1; 1927 1928 while (!kthread_should_stop()) { 1929 1930 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1931 diff = now - tlat->abs_period; 1932 1933 s.seqnum = tlat->count; 1934 s.timer_latency = diff; 1935 s.context = THREAD_CONTEXT; 1936 1937 record_timerlat_sample(&s); 1938 1939 notify_new_max_latency(diff); 1940 1941 timerlat_dump_stack(time_to_us(diff)); 1942 1943 tlat->tracing_thread = false; 1944 if (osnoise_data.stop_tracing_total) 1945 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1946 osnoise_stop_tracing(); 1947 1948 if (osnoise_migration_pending()) 1949 break; 1950 1951 wait_next_period(tlat); 1952 } 1953 1954 hrtimer_cancel(&tlat->timer); 1955 migrate_enable(); 1956 return 0; 1957 } 1958 #else /* CONFIG_TIMERLAT_TRACER */ 1959 static int timerlat_main(void *data) 1960 { 1961 return 0; 1962 } 1963 #endif /* CONFIG_TIMERLAT_TRACER */ 1964 1965 /* 1966 * stop_kthread - stop a workload thread 1967 */ 1968 static void stop_kthread(unsigned int cpu) 1969 { 1970 struct task_struct *kthread; 1971 1972 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); 1973 if (kthread) { 1974 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && 1975 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { 1976 kthread_stop(kthread); 1977 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { 1978 /* 1979 * This is a user thread waiting on the timerlat_fd. We need 1980 * to close all users, and the best way to guarantee this is 1981 * by killing the thread. NOTE: this is a purpose specific file. 1982 */ 1983 kill_pid(kthread->thread_pid, SIGKILL, 1); 1984 put_task_struct(kthread); 1985 } 1986 } else { 1987 /* if no workload, just return */ 1988 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1989 /* 1990 * This is set in the osnoise tracer case. 1991 */ 1992 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1993 barrier(); 1994 } 1995 } 1996 } 1997 1998 /* 1999 * stop_per_cpu_kthread - Stop per-cpu threads 2000 * 2001 * Stop the osnoise sampling htread. Use this on unload and at system 2002 * shutdown. 2003 */ 2004 static void stop_per_cpu_kthreads(void) 2005 { 2006 int cpu; 2007 2008 cpus_read_lock(); 2009 2010 for_each_online_cpu(cpu) 2011 stop_kthread(cpu); 2012 2013 cpus_read_unlock(); 2014 } 2015 2016 /* 2017 * start_kthread - Start a workload thread 2018 */ 2019 static int start_kthread(unsigned int cpu) 2020 { 2021 struct task_struct *kthread; 2022 void *main = osnoise_main; 2023 char comm[24]; 2024 2025 /* Do not start a new thread if it is already running */ 2026 if (per_cpu(per_cpu_osnoise_var, cpu).kthread) 2027 return 0; 2028 2029 if (timerlat_enabled()) { 2030 snprintf(comm, 24, "timerlat/%d", cpu); 2031 main = timerlat_main; 2032 } else { 2033 /* if no workload, just return */ 2034 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2035 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 2036 barrier(); 2037 return 0; 2038 } 2039 snprintf(comm, 24, "osnoise/%d", cpu); 2040 } 2041 2042 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 2043 2044 if (IS_ERR(kthread)) { 2045 pr_err(BANNER "could not start sampling thread\n"); 2046 return -ENOMEM; 2047 } 2048 2049 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 2050 cpumask_set_cpu(cpu, &kthread_cpumask); 2051 2052 return 0; 2053 } 2054 2055 /* 2056 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 2057 * 2058 * This starts the kernel thread that will look for osnoise on many 2059 * cpus. 2060 */ 2061 static int start_per_cpu_kthreads(void) 2062 { 2063 struct cpumask *current_mask = &save_cpumask; 2064 int retval = 0; 2065 int cpu; 2066 2067 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2068 if (timerlat_enabled()) 2069 return 0; 2070 } 2071 2072 cpus_read_lock(); 2073 /* 2074 * Run only on online CPUs in which osnoise is allowed to run. 2075 */ 2076 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 2077 2078 for_each_possible_cpu(cpu) { 2079 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { 2080 struct task_struct *kthread; 2081 2082 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); 2083 if (!WARN_ON(!kthread)) 2084 kthread_stop(kthread); 2085 } 2086 } 2087 2088 for_each_cpu(cpu, current_mask) { 2089 retval = start_kthread(cpu); 2090 if (retval) { 2091 cpus_read_unlock(); 2092 stop_per_cpu_kthreads(); 2093 return retval; 2094 } 2095 } 2096 2097 cpus_read_unlock(); 2098 2099 return retval; 2100 } 2101 2102 #ifdef CONFIG_HOTPLUG_CPU 2103 static void osnoise_hotplug_workfn(struct work_struct *dummy) 2104 { 2105 unsigned int cpu = smp_processor_id(); 2106 2107 guard(mutex)(&trace_types_lock); 2108 2109 if (!osnoise_has_registered_instances()) 2110 return; 2111 2112 guard(cpus_read_lock)(); 2113 guard(mutex)(&interface_lock); 2114 2115 if (!cpu_online(cpu)) 2116 return; 2117 2118 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 2119 return; 2120 2121 start_kthread(cpu); 2122 } 2123 2124 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 2125 2126 /* 2127 * osnoise_cpu_init - CPU hotplug online callback function 2128 */ 2129 static int osnoise_cpu_init(unsigned int cpu) 2130 { 2131 schedule_work_on(cpu, &osnoise_hotplug_work); 2132 return 0; 2133 } 2134 2135 /* 2136 * osnoise_cpu_die - CPU hotplug offline callback function 2137 */ 2138 static int osnoise_cpu_die(unsigned int cpu) 2139 { 2140 stop_kthread(cpu); 2141 return 0; 2142 } 2143 2144 static void osnoise_init_hotplug_support(void) 2145 { 2146 int ret; 2147 2148 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 2149 osnoise_cpu_init, osnoise_cpu_die); 2150 if (ret < 0) 2151 pr_warn(BANNER "Error to init cpu hotplug support\n"); 2152 2153 return; 2154 } 2155 #else /* CONFIG_HOTPLUG_CPU */ 2156 static void osnoise_init_hotplug_support(void) 2157 { 2158 return; 2159 } 2160 #endif /* CONFIG_HOTPLUG_CPU */ 2161 2162 /* 2163 * seq file functions for the osnoise/options file. 2164 */ 2165 static void *s_options_start(struct seq_file *s, loff_t *pos) 2166 { 2167 int option = *pos; 2168 2169 mutex_lock(&interface_lock); 2170 2171 if (option >= OSN_MAX) 2172 return NULL; 2173 2174 return pos; 2175 } 2176 2177 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 2178 { 2179 int option = ++(*pos); 2180 2181 if (option >= OSN_MAX) 2182 return NULL; 2183 2184 return pos; 2185 } 2186 2187 static int s_options_show(struct seq_file *s, void *v) 2188 { 2189 loff_t *pos = v; 2190 int option = *pos; 2191 2192 if (option == OSN_DEFAULTS) { 2193 if (osnoise_options == OSN_DEFAULT_OPTIONS) 2194 seq_printf(s, "%s", osnoise_options_str[option]); 2195 else 2196 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2197 goto out; 2198 } 2199 2200 if (test_bit(option, &osnoise_options)) 2201 seq_printf(s, "%s", osnoise_options_str[option]); 2202 else 2203 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2204 2205 out: 2206 if (option != OSN_MAX) 2207 seq_puts(s, " "); 2208 2209 return 0; 2210 } 2211 2212 static void s_options_stop(struct seq_file *s, void *v) 2213 { 2214 seq_puts(s, "\n"); 2215 mutex_unlock(&interface_lock); 2216 } 2217 2218 static const struct seq_operations osnoise_options_seq_ops = { 2219 .start = s_options_start, 2220 .next = s_options_next, 2221 .show = s_options_show, 2222 .stop = s_options_stop 2223 }; 2224 2225 static int osnoise_options_open(struct inode *inode, struct file *file) 2226 { 2227 return seq_open(file, &osnoise_options_seq_ops); 2228 }; 2229 2230 /** 2231 * osnoise_options_write - Write function for "options" entry 2232 * @filp: The active open file structure 2233 * @ubuf: The user buffer that contains the value to write 2234 * @cnt: The maximum number of bytes to write to "file" 2235 * @ppos: The current position in @file 2236 * 2237 * Writing the option name sets the option, writing the "NO_" 2238 * prefix in front of the option name disables it. 2239 * 2240 * Writing "DEFAULTS" resets the option values to the default ones. 2241 */ 2242 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2243 size_t cnt, loff_t *ppos) 2244 { 2245 int running, option, enable, retval; 2246 char buf[256], *option_str; 2247 2248 if (cnt >= 256) 2249 return -EINVAL; 2250 2251 if (copy_from_user(buf, ubuf, cnt)) 2252 return -EFAULT; 2253 2254 buf[cnt] = 0; 2255 2256 if (strncmp(buf, "NO_", 3)) { 2257 option_str = strstrip(buf); 2258 enable = true; 2259 } else { 2260 option_str = strstrip(&buf[3]); 2261 enable = false; 2262 } 2263 2264 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2265 if (option < 0) 2266 return -EINVAL; 2267 2268 /* 2269 * trace_types_lock is taken to avoid concurrency on start/stop. 2270 */ 2271 mutex_lock(&trace_types_lock); 2272 running = osnoise_has_registered_instances(); 2273 if (running) 2274 stop_per_cpu_kthreads(); 2275 2276 /* 2277 * avoid CPU hotplug operations that might read options. 2278 */ 2279 cpus_read_lock(); 2280 mutex_lock(&interface_lock); 2281 2282 retval = cnt; 2283 2284 if (enable) { 2285 if (option == OSN_DEFAULTS) 2286 osnoise_options = OSN_DEFAULT_OPTIONS; 2287 else 2288 set_bit(option, &osnoise_options); 2289 } else { 2290 if (option == OSN_DEFAULTS) 2291 retval = -EINVAL; 2292 else 2293 clear_bit(option, &osnoise_options); 2294 } 2295 2296 mutex_unlock(&interface_lock); 2297 cpus_read_unlock(); 2298 2299 if (running) 2300 start_per_cpu_kthreads(); 2301 mutex_unlock(&trace_types_lock); 2302 2303 return retval; 2304 } 2305 2306 /* 2307 * osnoise_cpus_read - Read function for reading the "cpus" file 2308 * @filp: The active open file structure 2309 * @ubuf: The userspace provided buffer to read value into 2310 * @cnt: The maximum number of bytes to read 2311 * @ppos: The current "file" position 2312 * 2313 * Prints the "cpus" output into the user-provided buffer. 2314 */ 2315 static ssize_t 2316 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2317 loff_t *ppos) 2318 { 2319 char *mask_str __free(kfree) = NULL; 2320 int len; 2321 2322 guard(mutex)(&interface_lock); 2323 2324 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2325 mask_str = kmalloc(len, GFP_KERNEL); 2326 if (!mask_str) 2327 return -ENOMEM; 2328 2329 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2330 if (len >= count) 2331 return -EINVAL; 2332 2333 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2334 2335 return count; 2336 } 2337 2338 /* 2339 * osnoise_cpus_write - Write function for "cpus" entry 2340 * @filp: The active open file structure 2341 * @ubuf: The user buffer that contains the value to write 2342 * @count: The maximum number of bytes to write to "file" 2343 * @ppos: The current position in @file 2344 * 2345 * This function provides a write implementation for the "cpus" 2346 * interface to the osnoise trace. By default, it lists all CPUs, 2347 * in this way, allowing osnoise threads to run on any online CPU 2348 * of the system. It serves to restrict the execution of osnoise to the 2349 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2350 * Because the user might be interested in tracing what is running on 2351 * other CPUs. For instance, one might run osnoise in one HT CPU 2352 * while observing what is running on the sibling HT CPU. 2353 */ 2354 static ssize_t 2355 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2356 loff_t *ppos) 2357 { 2358 cpumask_var_t osnoise_cpumask_new; 2359 int running, err; 2360 char *buf __free(kfree) = NULL; 2361 2362 if (count < 1) 2363 return 0; 2364 2365 buf = memdup_user_nul(ubuf, count); 2366 if (IS_ERR(buf)) 2367 return PTR_ERR(buf); 2368 2369 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2370 return -ENOMEM; 2371 2372 err = cpulist_parse(buf, osnoise_cpumask_new); 2373 if (err) 2374 goto err_free; 2375 2376 /* 2377 * trace_types_lock is taken to avoid concurrency on start/stop. 2378 */ 2379 mutex_lock(&trace_types_lock); 2380 running = osnoise_has_registered_instances(); 2381 if (running) 2382 stop_per_cpu_kthreads(); 2383 2384 /* 2385 * osnoise_cpumask is read by CPU hotplug operations. 2386 */ 2387 cpus_read_lock(); 2388 mutex_lock(&interface_lock); 2389 2390 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2391 2392 mutex_unlock(&interface_lock); 2393 cpus_read_unlock(); 2394 2395 if (running) 2396 start_per_cpu_kthreads(); 2397 mutex_unlock(&trace_types_lock); 2398 2399 free_cpumask_var(osnoise_cpumask_new); 2400 return count; 2401 2402 err_free: 2403 free_cpumask_var(osnoise_cpumask_new); 2404 2405 return err; 2406 } 2407 2408 #ifdef CONFIG_TIMERLAT_TRACER 2409 static int timerlat_fd_open(struct inode *inode, struct file *file) 2410 { 2411 struct osnoise_variables *osn_var; 2412 struct timerlat_variables *tlat; 2413 long cpu = (long) inode->i_cdev; 2414 2415 mutex_lock(&interface_lock); 2416 2417 /* 2418 * This file is accessible only if timerlat is enabled, and 2419 * NO_OSNOISE_WORKLOAD is set. 2420 */ 2421 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) { 2422 mutex_unlock(&interface_lock); 2423 return -EINVAL; 2424 } 2425 2426 migrate_disable(); 2427 2428 osn_var = this_cpu_osn_var(); 2429 2430 /* 2431 * The osn_var->pid holds the single access to this file. 2432 */ 2433 if (osn_var->pid) { 2434 mutex_unlock(&interface_lock); 2435 migrate_enable(); 2436 return -EBUSY; 2437 } 2438 2439 /* 2440 * timerlat tracer is a per-cpu tracer. Check if the user-space too 2441 * is pinned to a single CPU. The tracer laters monitor if the task 2442 * migrates and then disables tracer if it does. However, it is 2443 * worth doing this basic acceptance test to avoid obviusly wrong 2444 * setup. 2445 */ 2446 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) { 2447 mutex_unlock(&interface_lock); 2448 migrate_enable(); 2449 return -EPERM; 2450 } 2451 2452 /* 2453 * From now on, it is good to go. 2454 */ 2455 file->private_data = inode->i_cdev; 2456 2457 get_task_struct(current); 2458 2459 osn_var->kthread = current; 2460 osn_var->pid = current->pid; 2461 2462 /* 2463 * Setup is done. 2464 */ 2465 mutex_unlock(&interface_lock); 2466 2467 tlat = this_cpu_tmr_var(); 2468 tlat->count = 0; 2469 2470 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 2471 2472 migrate_enable(); 2473 return 0; 2474 }; 2475 2476 /* 2477 * timerlat_fd_read - Read function for "timerlat_fd" file 2478 * @file: The active open file structure 2479 * @ubuf: The userspace provided buffer to read value into 2480 * @cnt: The maximum number of bytes to read 2481 * @ppos: The current "file" position 2482 * 2483 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error. 2484 */ 2485 static ssize_t 2486 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, 2487 loff_t *ppos) 2488 { 2489 long cpu = (long) file->private_data; 2490 struct osnoise_variables *osn_var; 2491 struct timerlat_variables *tlat; 2492 struct timerlat_sample s; 2493 s64 diff; 2494 u64 now; 2495 2496 migrate_disable(); 2497 2498 tlat = this_cpu_tmr_var(); 2499 2500 /* 2501 * While in user-space, the thread is migratable. There is nothing 2502 * we can do about it. 2503 * So, if the thread is running on another CPU, stop the machinery. 2504 */ 2505 if (cpu == smp_processor_id()) { 2506 if (tlat->uthread_migrate) { 2507 migrate_enable(); 2508 return -EINVAL; 2509 } 2510 } else { 2511 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 2512 osnoise_taint("timerlat user thread migrate\n"); 2513 osnoise_stop_tracing(); 2514 migrate_enable(); 2515 return -EINVAL; 2516 } 2517 2518 osn_var = this_cpu_osn_var(); 2519 2520 /* 2521 * The timerlat in user-space runs in a different order: 2522 * the read() starts from the execution of the previous occurrence, 2523 * sleeping for the next occurrence. 2524 * 2525 * So, skip if we are entering on read() before the first wakeup 2526 * from timerlat IRQ: 2527 */ 2528 if (likely(osn_var->sampling)) { 2529 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2530 diff = now - tlat->abs_period; 2531 2532 /* 2533 * it was not a timer firing, but some other signal? 2534 */ 2535 if (diff < 0) 2536 goto out; 2537 2538 s.seqnum = tlat->count; 2539 s.timer_latency = diff; 2540 s.context = THREAD_URET; 2541 2542 record_timerlat_sample(&s); 2543 2544 notify_new_max_latency(diff); 2545 2546 tlat->tracing_thread = false; 2547 if (osnoise_data.stop_tracing_total) 2548 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 2549 osnoise_stop_tracing(); 2550 } else { 2551 tlat->tracing_thread = false; 2552 tlat->kthread = current; 2553 2554 /* Annotate now to drift new period */ 2555 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 2556 2557 osn_var->sampling = 1; 2558 } 2559 2560 /* wait for the next period */ 2561 wait_next_period(tlat); 2562 2563 /* This is the wakeup from this cycle */ 2564 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2565 diff = now - tlat->abs_period; 2566 2567 /* 2568 * it was not a timer firing, but some other signal? 2569 */ 2570 if (diff < 0) 2571 goto out; 2572 2573 s.seqnum = tlat->count; 2574 s.timer_latency = diff; 2575 s.context = THREAD_CONTEXT; 2576 2577 record_timerlat_sample(&s); 2578 2579 if (osnoise_data.stop_tracing_total) { 2580 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { 2581 timerlat_dump_stack(time_to_us(diff)); 2582 notify_new_max_latency(diff); 2583 osnoise_stop_tracing(); 2584 } 2585 } 2586 2587 out: 2588 migrate_enable(); 2589 return 0; 2590 } 2591 2592 static int timerlat_fd_release(struct inode *inode, struct file *file) 2593 { 2594 struct osnoise_variables *osn_var; 2595 struct timerlat_variables *tlat_var; 2596 long cpu = (long) file->private_data; 2597 2598 migrate_disable(); 2599 mutex_lock(&interface_lock); 2600 2601 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 2602 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 2603 2604 if (tlat_var->kthread) 2605 hrtimer_cancel(&tlat_var->timer); 2606 memset(tlat_var, 0, sizeof(*tlat_var)); 2607 2608 osn_var->sampling = 0; 2609 osn_var->pid = 0; 2610 2611 /* 2612 * We are leaving, not being stopped... see stop_kthread(); 2613 */ 2614 if (osn_var->kthread) { 2615 put_task_struct(osn_var->kthread); 2616 osn_var->kthread = NULL; 2617 } 2618 2619 mutex_unlock(&interface_lock); 2620 migrate_enable(); 2621 return 0; 2622 } 2623 #endif 2624 2625 /* 2626 * osnoise/runtime_us: cannot be greater than the period. 2627 */ 2628 static struct trace_min_max_param osnoise_runtime = { 2629 .lock = &interface_lock, 2630 .val = &osnoise_data.sample_runtime, 2631 .max = &osnoise_data.sample_period, 2632 .min = NULL, 2633 }; 2634 2635 /* 2636 * osnoise/period_us: cannot be smaller than the runtime. 2637 */ 2638 static struct trace_min_max_param osnoise_period = { 2639 .lock = &interface_lock, 2640 .val = &osnoise_data.sample_period, 2641 .max = NULL, 2642 .min = &osnoise_data.sample_runtime, 2643 }; 2644 2645 /* 2646 * osnoise/stop_tracing_us: no limit. 2647 */ 2648 static struct trace_min_max_param osnoise_stop_tracing_in = { 2649 .lock = &interface_lock, 2650 .val = &osnoise_data.stop_tracing, 2651 .max = NULL, 2652 .min = NULL, 2653 }; 2654 2655 /* 2656 * osnoise/stop_tracing_total_us: no limit. 2657 */ 2658 static struct trace_min_max_param osnoise_stop_tracing_total = { 2659 .lock = &interface_lock, 2660 .val = &osnoise_data.stop_tracing_total, 2661 .max = NULL, 2662 .min = NULL, 2663 }; 2664 2665 #ifdef CONFIG_TIMERLAT_TRACER 2666 /* 2667 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2668 * latency is higher than val. 2669 */ 2670 static struct trace_min_max_param osnoise_print_stack = { 2671 .lock = &interface_lock, 2672 .val = &osnoise_data.print_stack, 2673 .max = NULL, 2674 .min = NULL, 2675 }; 2676 2677 /* 2678 * osnoise/timerlat_period: min 100 us, max 1 s 2679 */ 2680 static u64 timerlat_min_period = 100; 2681 static u64 timerlat_max_period = 1000000; 2682 static struct trace_min_max_param timerlat_period = { 2683 .lock = &interface_lock, 2684 .val = &osnoise_data.timerlat_period, 2685 .max = &timerlat_max_period, 2686 .min = &timerlat_min_period, 2687 }; 2688 2689 /* 2690 * osnoise/timerlat_align_us: align the first wakeup of all timerlat 2691 * threads to a common boundary (in us). 0 means disabled. 2692 */ 2693 static struct trace_min_max_param timerlat_align_us = { 2694 .lock = &interface_lock, 2695 .val = &osnoise_data.timerlat_align_us, 2696 .max = NULL, 2697 .min = NULL, 2698 }; 2699 2700 static const struct file_operations timerlat_fd_fops = { 2701 .open = timerlat_fd_open, 2702 .read = timerlat_fd_read, 2703 .release = timerlat_fd_release, 2704 .llseek = generic_file_llseek, 2705 }; 2706 #endif 2707 2708 static const struct file_operations cpus_fops = { 2709 .open = tracing_open_generic, 2710 .read = osnoise_cpus_read, 2711 .write = osnoise_cpus_write, 2712 .llseek = generic_file_llseek, 2713 }; 2714 2715 static const struct file_operations osnoise_options_fops = { 2716 .open = osnoise_options_open, 2717 .read = seq_read, 2718 .llseek = seq_lseek, 2719 .release = seq_release, 2720 .write = osnoise_options_write 2721 }; 2722 2723 #ifdef CONFIG_TIMERLAT_TRACER 2724 #ifdef CONFIG_STACKTRACE 2725 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2726 { 2727 struct dentry *tmp; 2728 2729 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2730 &osnoise_print_stack, &trace_min_max_fops); 2731 if (!tmp) 2732 return -ENOMEM; 2733 2734 return 0; 2735 } 2736 #else /* CONFIG_STACKTRACE */ 2737 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2738 { 2739 return 0; 2740 } 2741 #endif /* CONFIG_STACKTRACE */ 2742 2743 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir) 2744 { 2745 struct dentry *timerlat_fd; 2746 struct dentry *per_cpu; 2747 struct dentry *cpu_dir; 2748 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */ 2749 long cpu; 2750 2751 /* 2752 * Why not using tracing instance per_cpu/ dir? 2753 * 2754 * Because osnoise/timerlat have a single workload, having 2755 * multiple files like these are waste of memory. 2756 */ 2757 per_cpu = tracefs_create_dir("per_cpu", top_dir); 2758 if (!per_cpu) 2759 return -ENOMEM; 2760 2761 for_each_possible_cpu(cpu) { 2762 snprintf(cpu_str, 30, "cpu%ld", cpu); 2763 cpu_dir = tracefs_create_dir(cpu_str, per_cpu); 2764 if (!cpu_dir) 2765 goto out_clean; 2766 2767 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ, 2768 cpu_dir, NULL, &timerlat_fd_fops); 2769 if (!timerlat_fd) 2770 goto out_clean; 2771 2772 /* Record the CPU */ 2773 d_inode(timerlat_fd)->i_cdev = (void *)(cpu); 2774 } 2775 2776 return 0; 2777 2778 out_clean: 2779 tracefs_remove(per_cpu); 2780 return -ENOMEM; 2781 } 2782 2783 /* 2784 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2785 */ 2786 static int init_timerlat_tracefs(struct dentry *top_dir) 2787 { 2788 struct dentry *tmp; 2789 int retval; 2790 2791 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2792 &timerlat_period, &trace_min_max_fops); 2793 if (!tmp) 2794 return -ENOMEM; 2795 2796 tmp = tracefs_create_file("timerlat_align_us", TRACE_MODE_WRITE, top_dir, 2797 &timerlat_align_us, &trace_min_max_fops); 2798 if (!tmp) 2799 return -ENOMEM; 2800 2801 retval = osnoise_create_cpu_timerlat_fd(top_dir); 2802 if (retval) 2803 return retval; 2804 2805 return init_timerlat_stack_tracefs(top_dir); 2806 } 2807 #else /* CONFIG_TIMERLAT_TRACER */ 2808 static int init_timerlat_tracefs(struct dentry *top_dir) 2809 { 2810 return 0; 2811 } 2812 #endif /* CONFIG_TIMERLAT_TRACER */ 2813 2814 /* 2815 * init_tracefs - A function to initialize the tracefs interface files 2816 * 2817 * This function creates entries in tracefs for "osnoise" and "timerlat". 2818 * It creates these directories in the tracing directory, and within that 2819 * directory the use can change and view the configs. 2820 */ 2821 static int init_tracefs(void) 2822 { 2823 struct dentry *top_dir; 2824 struct dentry *tmp; 2825 int ret; 2826 2827 ret = tracing_init_dentry(); 2828 if (ret) 2829 return -ENOMEM; 2830 2831 top_dir = tracefs_create_dir("osnoise", NULL); 2832 if (!top_dir) 2833 return 0; 2834 2835 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2836 &osnoise_period, &trace_min_max_fops); 2837 if (!tmp) 2838 goto err; 2839 2840 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2841 &osnoise_runtime, &trace_min_max_fops); 2842 if (!tmp) 2843 goto err; 2844 2845 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2846 &osnoise_stop_tracing_in, &trace_min_max_fops); 2847 if (!tmp) 2848 goto err; 2849 2850 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2851 &osnoise_stop_tracing_total, &trace_min_max_fops); 2852 if (!tmp) 2853 goto err; 2854 2855 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2856 if (!tmp) 2857 goto err; 2858 2859 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2860 &osnoise_options_fops); 2861 if (!tmp) 2862 goto err; 2863 2864 ret = init_timerlat_tracefs(top_dir); 2865 if (ret) 2866 goto err; 2867 2868 return 0; 2869 2870 err: 2871 tracefs_remove(top_dir); 2872 return -ENOMEM; 2873 } 2874 2875 static int osnoise_hook_events(void) 2876 { 2877 int retval; 2878 2879 /* 2880 * Trace is already hooked, we are re-enabling from 2881 * a stop_tracing_*. 2882 */ 2883 if (trace_osnoise_callback_enabled) 2884 return 0; 2885 2886 retval = hook_irq_events(); 2887 if (retval) 2888 return -EINVAL; 2889 2890 retval = hook_softirq_events(); 2891 if (retval) 2892 goto out_unhook_irq; 2893 2894 retval = hook_thread_events(); 2895 /* 2896 * All fine! 2897 */ 2898 if (!retval) 2899 return 0; 2900 2901 unhook_softirq_events(); 2902 out_unhook_irq: 2903 unhook_irq_events(); 2904 return -EINVAL; 2905 } 2906 2907 static void osnoise_unhook_events(void) 2908 { 2909 unhook_thread_events(); 2910 unhook_softirq_events(); 2911 unhook_irq_events(); 2912 } 2913 2914 /* 2915 * osnoise_workload_start - start the workload and hook to events 2916 */ 2917 static int osnoise_workload_start(void) 2918 { 2919 int retval; 2920 2921 /* 2922 * Instances need to be registered after calling workload 2923 * start. Hence, if there is already an instance, the 2924 * workload was already registered. Otherwise, this 2925 * code is on the way to register the first instance, 2926 * and the workload will start. 2927 */ 2928 if (osnoise_has_registered_instances()) 2929 return 0; 2930 2931 osn_var_reset_all(); 2932 2933 retval = osnoise_hook_events(); 2934 if (retval) 2935 return retval; 2936 2937 /* 2938 * Make sure that ftrace_nmi_enter/exit() see reset values 2939 * before enabling trace_osnoise_callback_enabled. 2940 */ 2941 barrier(); 2942 trace_osnoise_callback_enabled = true; 2943 2944 retval = start_per_cpu_kthreads(); 2945 if (retval) { 2946 trace_osnoise_callback_enabled = false; 2947 /* 2948 * Make sure that ftrace_nmi_enter/exit() see 2949 * trace_osnoise_callback_enabled as false before continuing. 2950 */ 2951 barrier(); 2952 2953 osnoise_unhook_events(); 2954 return retval; 2955 } 2956 2957 return 0; 2958 } 2959 2960 /* 2961 * osnoise_workload_stop - stop the workload and unhook the events 2962 */ 2963 static void osnoise_workload_stop(void) 2964 { 2965 /* 2966 * Instances need to be unregistered before calling 2967 * stop. Hence, if there is a registered instance, more 2968 * than one instance is running, and the workload will not 2969 * yet stop. Otherwise, this code is on the way to disable 2970 * the last instance, and the workload can stop. 2971 */ 2972 if (osnoise_has_registered_instances()) 2973 return; 2974 2975 /* 2976 * If callbacks were already disabled in a previous stop 2977 * call, there is no need to disable then again. 2978 * 2979 * For instance, this happens when tracing is stopped via: 2980 * echo 0 > tracing_on 2981 * echo nop > current_tracer. 2982 */ 2983 if (!trace_osnoise_callback_enabled) 2984 return; 2985 2986 trace_osnoise_callback_enabled = false; 2987 /* 2988 * Make sure that ftrace_nmi_enter/exit() see 2989 * trace_osnoise_callback_enabled as false before continuing. 2990 */ 2991 barrier(); 2992 2993 stop_per_cpu_kthreads(); 2994 2995 osnoise_unhook_events(); 2996 } 2997 2998 static void osnoise_tracer_start(struct trace_array *tr) 2999 { 3000 int retval; 3001 3002 /* 3003 * If the instance is already registered, there is no need to 3004 * register it again. 3005 */ 3006 if (osnoise_instance_registered(tr)) 3007 return; 3008 3009 retval = osnoise_workload_start(); 3010 if (retval) 3011 pr_err(BANNER "Error starting osnoise tracer\n"); 3012 3013 osnoise_register_instance(tr); 3014 } 3015 3016 static void osnoise_tracer_stop(struct trace_array *tr) 3017 { 3018 osnoise_unregister_instance(tr); 3019 osnoise_workload_stop(); 3020 } 3021 3022 static int osnoise_tracer_init(struct trace_array *tr) 3023 { 3024 /* 3025 * Only allow osnoise tracer if timerlat tracer is not running 3026 * already. 3027 */ 3028 if (timerlat_enabled()) 3029 return -EBUSY; 3030 3031 tr->max_latency = 0; 3032 3033 osnoise_tracer_start(tr); 3034 return 0; 3035 } 3036 3037 static void osnoise_tracer_reset(struct trace_array *tr) 3038 { 3039 osnoise_tracer_stop(tr); 3040 } 3041 3042 static struct tracer osnoise_tracer __read_mostly = { 3043 .name = "osnoise", 3044 .init = osnoise_tracer_init, 3045 .reset = osnoise_tracer_reset, 3046 .start = osnoise_tracer_start, 3047 .stop = osnoise_tracer_stop, 3048 .print_header = print_osnoise_headers, 3049 .allow_instances = true, 3050 }; 3051 3052 #ifdef CONFIG_TIMERLAT_TRACER 3053 static void timerlat_tracer_start(struct trace_array *tr) 3054 { 3055 int retval; 3056 3057 /* 3058 * If the instance is already registered, there is no need to 3059 * register it again. 3060 */ 3061 if (osnoise_instance_registered(tr)) 3062 return; 3063 3064 retval = osnoise_workload_start(); 3065 if (retval) 3066 pr_err(BANNER "Error starting timerlat tracer\n"); 3067 3068 osnoise_register_instance(tr); 3069 3070 return; 3071 } 3072 3073 static void timerlat_tracer_stop(struct trace_array *tr) 3074 { 3075 int cpu; 3076 3077 osnoise_unregister_instance(tr); 3078 3079 /* 3080 * Instruct the threads to stop only if this is the last instance. 3081 */ 3082 if (!osnoise_has_registered_instances()) { 3083 for_each_online_cpu(cpu) 3084 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 3085 } 3086 3087 osnoise_workload_stop(); 3088 } 3089 3090 static int timerlat_tracer_init(struct trace_array *tr) 3091 { 3092 /* 3093 * Only allow timerlat tracer if osnoise tracer is not running already. 3094 */ 3095 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 3096 return -EBUSY; 3097 3098 /* 3099 * If this is the first instance, set timerlat_tracer to block 3100 * osnoise tracer start. 3101 */ 3102 if (!osnoise_has_registered_instances()) 3103 osnoise_data.timerlat_tracer = 1; 3104 3105 tr->max_latency = 0; 3106 timerlat_tracer_start(tr); 3107 3108 return 0; 3109 } 3110 3111 static void timerlat_tracer_reset(struct trace_array *tr) 3112 { 3113 timerlat_tracer_stop(tr); 3114 3115 /* 3116 * If this is the last instance, reset timerlat_tracer allowing 3117 * osnoise to be started. 3118 */ 3119 if (!osnoise_has_registered_instances()) 3120 osnoise_data.timerlat_tracer = 0; 3121 } 3122 3123 static struct tracer timerlat_tracer __read_mostly = { 3124 .name = "timerlat", 3125 .init = timerlat_tracer_init, 3126 .reset = timerlat_tracer_reset, 3127 .start = timerlat_tracer_start, 3128 .stop = timerlat_tracer_stop, 3129 .print_header = print_timerlat_headers, 3130 .allow_instances = true, 3131 }; 3132 3133 __init static int init_timerlat_tracer(void) 3134 { 3135 return register_tracer(&timerlat_tracer); 3136 } 3137 #else /* CONFIG_TIMERLAT_TRACER */ 3138 __init static int init_timerlat_tracer(void) 3139 { 3140 return 0; 3141 } 3142 #endif /* CONFIG_TIMERLAT_TRACER */ 3143 3144 __init static int init_osnoise_tracer(void) 3145 { 3146 int ret; 3147 3148 mutex_init(&interface_lock); 3149 3150 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 3151 3152 ret = register_tracer(&osnoise_tracer); 3153 if (ret) { 3154 pr_err(BANNER "Error registering osnoise!\n"); 3155 return ret; 3156 } 3157 3158 ret = init_timerlat_tracer(); 3159 if (ret) { 3160 pr_err(BANNER "Error registering timerlat!\n"); 3161 return ret; 3162 } 3163 3164 osnoise_init_hotplug_support(); 3165 3166 INIT_LIST_HEAD_RCU(&osnoise_instances); 3167 3168 init_tracefs(); 3169 3170 return 0; 3171 } 3172 late_initcall(init_osnoise_tracer); 3173