1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include <linux/string.h> 28 #include "trace.h" 29 30 #ifdef CONFIG_X86_LOCAL_APIC 31 #include <asm/trace/irq_vectors.h> 32 #undef TRACE_INCLUDE_PATH 33 #undef TRACE_INCLUDE_FILE 34 #endif /* CONFIG_X86_LOCAL_APIC */ 35 36 #include <trace/events/irq.h> 37 #include <trace/events/sched.h> 38 39 #define CREATE_TRACE_POINTS 40 #include <trace/events/osnoise.h> 41 42 /* 43 * Default values. 44 */ 45 #define BANNER "osnoise: " 46 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 47 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 48 49 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 50 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 51 52 /* 53 * osnoise/options entries. 54 */ 55 enum osnoise_options_index { 56 OSN_DEFAULTS = 0, 57 OSN_WORKLOAD, 58 OSN_PANIC_ON_STOP, 59 OSN_PREEMPT_DISABLE, 60 OSN_IRQ_DISABLE, 61 OSN_TIMERLAT_ALIGN, 62 OSN_MAX 63 }; 64 65 static const char * const osnoise_options_str[OSN_MAX] = { 66 "DEFAULTS", 67 "OSNOISE_WORKLOAD", 68 "PANIC_ON_STOP", 69 "OSNOISE_PREEMPT_DISABLE", 70 "OSNOISE_IRQ_DISABLE", 71 "TIMERLAT_ALIGN" }; 72 73 #define OSN_DEFAULT_OPTIONS 0x2 74 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 75 76 /* 77 * trace_array of the enabled osnoise/timerlat instances. 78 */ 79 struct osnoise_instance { 80 struct list_head list; 81 struct trace_array *tr; 82 }; 83 84 static struct list_head osnoise_instances; 85 86 static void osnoise_print(const char *fmt, ...) 87 { 88 struct osnoise_instance *inst; 89 struct trace_array *tr; 90 va_list ap; 91 92 rcu_read_lock(); 93 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 94 tr = inst->tr; 95 va_start(ap, fmt); 96 trace_array_vprintk(tr, _RET_IP_, fmt, ap); 97 va_end(ap); 98 } 99 rcu_read_unlock(); 100 } 101 102 static bool osnoise_has_registered_instances(void) 103 { 104 return !!list_first_or_null_rcu(&osnoise_instances, 105 struct osnoise_instance, 106 list); 107 } 108 109 /* 110 * osnoise_instance_registered - check if a tr is already registered 111 */ 112 static int osnoise_instance_registered(struct trace_array *tr) 113 { 114 struct osnoise_instance *inst; 115 int found = 0; 116 117 rcu_read_lock(); 118 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 119 if (inst->tr == tr) 120 found = 1; 121 } 122 rcu_read_unlock(); 123 124 return found; 125 } 126 127 /* 128 * osnoise_register_instance - register a new trace instance 129 * 130 * Register a trace_array *tr in the list of instances running 131 * osnoise/timerlat tracers. 132 */ 133 static int osnoise_register_instance(struct trace_array *tr) 134 { 135 struct osnoise_instance *inst; 136 137 /* 138 * register/unregister serialization is provided by trace's 139 * trace_types_lock. 140 */ 141 lockdep_assert_held(&trace_types_lock); 142 trace_array_init_printk(tr); 143 144 inst = kmalloc_obj(*inst); 145 if (!inst) 146 return -ENOMEM; 147 148 INIT_LIST_HEAD_RCU(&inst->list); 149 inst->tr = tr; 150 list_add_tail_rcu(&inst->list, &osnoise_instances); 151 152 return 0; 153 } 154 155 /* 156 * osnoise_unregister_instance - unregister a registered trace instance 157 * 158 * Remove the trace_array *tr from the list of instances running 159 * osnoise/timerlat tracers. 160 */ 161 static void osnoise_unregister_instance(struct trace_array *tr) 162 { 163 struct osnoise_instance *inst; 164 int found = 0; 165 166 /* 167 * register/unregister serialization is provided by trace's 168 * trace_types_lock. 169 */ 170 list_for_each_entry_rcu(inst, &osnoise_instances, list, 171 lockdep_is_held(&trace_types_lock)) { 172 if (inst->tr == tr) { 173 list_del_rcu(&inst->list); 174 found = 1; 175 break; 176 } 177 } 178 179 if (!found) 180 return; 181 182 kvfree_rcu_mightsleep(inst); 183 } 184 185 /* 186 * NMI runtime info. 187 */ 188 struct osn_nmi { 189 u64 count; 190 u64 delta_start; 191 }; 192 193 /* 194 * IRQ runtime info. 195 */ 196 struct osn_irq { 197 u64 count; 198 u64 arrival_time; 199 u64 delta_start; 200 }; 201 202 #define IRQ_CONTEXT 0 203 #define THREAD_CONTEXT 1 204 #define THREAD_URET 2 205 /* 206 * sofirq runtime info. 207 */ 208 struct osn_softirq { 209 u64 count; 210 u64 arrival_time; 211 u64 delta_start; 212 }; 213 214 /* 215 * thread runtime info. 216 */ 217 struct osn_thread { 218 u64 count; 219 u64 arrival_time; 220 u64 delta_start; 221 }; 222 223 /* 224 * Runtime information: this structure saves the runtime information used by 225 * one sampling thread. 226 */ 227 struct osnoise_variables { 228 struct task_struct *kthread; 229 bool sampling; 230 pid_t pid; 231 struct osn_nmi nmi; 232 struct osn_irq irq; 233 struct osn_softirq softirq; 234 struct osn_thread thread; 235 local_t int_counter; 236 }; 237 238 /* 239 * Per-cpu runtime information. 240 */ 241 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 242 243 /* 244 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 245 */ 246 static inline struct osnoise_variables *this_cpu_osn_var(void) 247 { 248 return this_cpu_ptr(&per_cpu_osnoise_var); 249 } 250 251 /* 252 * Protect the interface. 253 */ 254 static struct mutex interface_lock; 255 256 #ifdef CONFIG_TIMERLAT_TRACER 257 /* 258 * Runtime information for the timer mode. 259 */ 260 struct timerlat_variables { 261 struct task_struct *kthread; 262 struct hrtimer timer; 263 u64 rel_period; 264 u64 abs_period; 265 bool tracing_thread; 266 u64 count; 267 bool uthread_migrate; 268 }; 269 270 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 271 272 /* 273 * timerlat wake-up offset for next thread with TIMERLAT_ALIGN set. 274 */ 275 static atomic64_t align_next; 276 277 /* 278 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 279 */ 280 static inline struct timerlat_variables *this_cpu_tmr_var(void) 281 { 282 return this_cpu_ptr(&per_cpu_timerlat_var); 283 } 284 285 /* 286 * tlat_var_reset - Reset the values of the given timerlat_variables 287 */ 288 static inline void tlat_var_reset(void) 289 { 290 struct timerlat_variables *tlat_var; 291 int cpu; 292 293 /* Synchronize with the timerlat interfaces */ 294 mutex_lock(&interface_lock); 295 296 /* 297 * So far, all the values are initialized as 0, so 298 * zeroing the structure is perfect. 299 */ 300 for_each_online_cpu(cpu) { 301 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 302 if (tlat_var->kthread) 303 hrtimer_cancel(&tlat_var->timer); 304 memset(tlat_var, 0, sizeof(*tlat_var)); 305 } 306 /* 307 * Reset also align_next, to be filled by a new offset by the first timerlat 308 * thread that wakes up, if TIMERLAT_ALIGN is set. 309 */ 310 atomic64_set(&align_next, 0); 311 312 mutex_unlock(&interface_lock); 313 } 314 #else /* CONFIG_TIMERLAT_TRACER */ 315 #define tlat_var_reset() do {} while (0) 316 #endif /* CONFIG_TIMERLAT_TRACER */ 317 318 /* 319 * osn_var_reset - Reset the values of the given osnoise_variables 320 */ 321 static inline void osn_var_reset(void) 322 { 323 struct osnoise_variables *osn_var; 324 int cpu; 325 326 /* 327 * So far, all the values are initialized as 0, so 328 * zeroing the structure is perfect. 329 */ 330 for_each_online_cpu(cpu) { 331 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 332 memset(osn_var, 0, sizeof(*osn_var)); 333 } 334 } 335 336 /* 337 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 338 */ 339 static inline void osn_var_reset_all(void) 340 { 341 osn_var_reset(); 342 tlat_var_reset(); 343 } 344 345 /* 346 * Tells NMIs to call back to the osnoise tracer to record timestamps. 347 */ 348 bool trace_osnoise_callback_enabled; 349 350 /* 351 * Tracer data. 352 */ 353 static struct osnoise_data { 354 u64 sample_period; /* total sampling period */ 355 u64 sample_runtime; /* active sampling portion of period */ 356 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 357 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 358 #ifdef CONFIG_TIMERLAT_TRACER 359 u64 timerlat_period; /* timerlat period */ 360 u64 timerlat_align_us; /* timerlat alignment */ 361 u64 print_stack; /* print IRQ stack if total > */ 362 int timerlat_tracer; /* timerlat tracer */ 363 #endif 364 bool tainted; /* info users and developers about a problem */ 365 } osnoise_data = { 366 .sample_period = DEFAULT_SAMPLE_PERIOD, 367 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 368 .stop_tracing = 0, 369 .stop_tracing_total = 0, 370 #ifdef CONFIG_TIMERLAT_TRACER 371 .print_stack = 0, 372 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 373 .timerlat_align_us = 0, 374 .timerlat_tracer = 0, 375 #endif 376 }; 377 378 #ifdef CONFIG_TIMERLAT_TRACER 379 static inline bool timerlat_enabled(void) 380 { 381 return osnoise_data.timerlat_tracer; 382 } 383 384 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 385 { 386 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 387 /* 388 * If the timerlat is enabled, but the irq handler did 389 * not run yet enabling timerlat_tracer, do not trace. 390 */ 391 if (!tlat_var->tracing_thread) { 392 osn_var->softirq.arrival_time = 0; 393 osn_var->softirq.delta_start = 0; 394 return 0; 395 } 396 return 1; 397 } 398 399 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 400 { 401 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 402 /* 403 * If the timerlat is enabled, but the irq handler did 404 * not run yet enabling timerlat_tracer, do not trace. 405 */ 406 if (!tlat_var->tracing_thread) { 407 osn_var->thread.delta_start = 0; 408 osn_var->thread.arrival_time = 0; 409 return 0; 410 } 411 return 1; 412 } 413 #else /* CONFIG_TIMERLAT_TRACER */ 414 static inline bool timerlat_enabled(void) 415 { 416 return false; 417 } 418 419 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 420 { 421 return 1; 422 } 423 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 424 { 425 return 1; 426 } 427 #endif 428 429 #ifdef CONFIG_PREEMPT_RT 430 /* 431 * Print the osnoise header info. 432 */ 433 static void print_osnoise_headers(struct seq_file *s) 434 { 435 if (osnoise_data.tainted) 436 seq_puts(s, "# osnoise is tainted!\n"); 437 438 seq_puts(s, "# _-------=> irqs-off\n"); 439 seq_puts(s, "# / _------=> need-resched\n"); 440 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 441 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 442 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 443 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 444 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 445 446 seq_puts(s, "# |||||| / "); 447 seq_puts(s, " MAX\n"); 448 449 seq_puts(s, "# ||||| / "); 450 seq_puts(s, " SINGLE Interference counters:\n"); 451 452 seq_puts(s, "# ||||||| RUNTIME "); 453 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 454 455 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 456 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 457 458 seq_puts(s, "# | | | ||||||| | | "); 459 seq_puts(s, " | | | | | | | |\n"); 460 } 461 #else /* CONFIG_PREEMPT_RT */ 462 static void print_osnoise_headers(struct seq_file *s) 463 { 464 if (osnoise_data.tainted) 465 seq_puts(s, "# osnoise is tainted!\n"); 466 467 seq_puts(s, "# _-----=> irqs-off\n"); 468 seq_puts(s, "# / _----=> need-resched\n"); 469 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 470 seq_puts(s, "# || / _--=> preempt-depth\n"); 471 seq_puts(s, "# ||| / _-=> migrate-disable "); 472 seq_puts(s, " MAX\n"); 473 seq_puts(s, "# |||| / delay "); 474 seq_puts(s, " SINGLE Interference counters:\n"); 475 476 seq_puts(s, "# ||||| RUNTIME "); 477 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 478 479 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 480 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 481 482 seq_puts(s, "# | | | ||||| | | "); 483 seq_puts(s, " | | | | | | | |\n"); 484 } 485 #endif /* CONFIG_PREEMPT_RT */ 486 487 /* 488 * osnoise_taint - report an osnoise error. 489 */ 490 #define osnoise_taint(msg) ({ \ 491 osnoise_print(msg); \ 492 osnoise_data.tainted = true; \ 493 }) 494 495 /* 496 * Record an osnoise_sample into the tracer buffer. 497 */ 498 static void 499 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 500 { 501 struct ring_buffer_event *event; 502 struct osnoise_entry *entry; 503 504 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 505 tracing_gen_ctx()); 506 if (!event) 507 return; 508 entry = ring_buffer_event_data(event); 509 entry->runtime = sample->runtime; 510 entry->noise = sample->noise; 511 entry->max_sample = sample->max_sample; 512 entry->hw_count = sample->hw_count; 513 entry->nmi_count = sample->nmi_count; 514 entry->irq_count = sample->irq_count; 515 entry->softirq_count = sample->softirq_count; 516 entry->thread_count = sample->thread_count; 517 518 trace_buffer_unlock_commit_nostack(buffer, event); 519 } 520 521 /* 522 * Record an osnoise_sample on all osnoise instances and fire trace event. 523 */ 524 static void record_osnoise_sample(struct osnoise_sample *sample) 525 { 526 struct osnoise_instance *inst; 527 struct trace_buffer *buffer; 528 529 trace_osnoise_sample(sample); 530 531 rcu_read_lock(); 532 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 533 buffer = inst->tr->array_buffer.buffer; 534 __record_osnoise_sample(sample, buffer); 535 } 536 rcu_read_unlock(); 537 } 538 539 #ifdef CONFIG_TIMERLAT_TRACER 540 /* 541 * Print the timerlat header info. 542 */ 543 #ifdef CONFIG_PREEMPT_RT 544 static void print_timerlat_headers(struct seq_file *s) 545 { 546 seq_puts(s, "# _-------=> irqs-off\n"); 547 seq_puts(s, "# / _------=> need-resched\n"); 548 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 549 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 550 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 551 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 552 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 553 seq_puts(s, "# |||||| /\n"); 554 seq_puts(s, "# ||||||| ACTIVATION\n"); 555 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 556 seq_puts(s, " CONTEXT LATENCY\n"); 557 seq_puts(s, "# | | | ||||||| | | "); 558 seq_puts(s, " | |\n"); 559 } 560 #else /* CONFIG_PREEMPT_RT */ 561 static void print_timerlat_headers(struct seq_file *s) 562 { 563 seq_puts(s, "# _-----=> irqs-off\n"); 564 seq_puts(s, "# / _----=> need-resched\n"); 565 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 566 seq_puts(s, "# || / _--=> preempt-depth\n"); 567 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 568 seq_puts(s, "# |||| / delay\n"); 569 seq_puts(s, "# ||||| ACTIVATION\n"); 570 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 571 seq_puts(s, " CONTEXT LATENCY\n"); 572 seq_puts(s, "# | | | ||||| | | "); 573 seq_puts(s, " | |\n"); 574 } 575 #endif /* CONFIG_PREEMPT_RT */ 576 577 static void 578 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 579 { 580 struct ring_buffer_event *event; 581 struct timerlat_entry *entry; 582 583 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 584 tracing_gen_ctx()); 585 if (!event) 586 return; 587 entry = ring_buffer_event_data(event); 588 entry->seqnum = sample->seqnum; 589 entry->context = sample->context; 590 entry->timer_latency = sample->timer_latency; 591 592 trace_buffer_unlock_commit_nostack(buffer, event); 593 } 594 595 /* 596 * Record an timerlat_sample into the tracer buffer. 597 */ 598 static void record_timerlat_sample(struct timerlat_sample *sample) 599 { 600 struct osnoise_instance *inst; 601 struct trace_buffer *buffer; 602 603 trace_timerlat_sample(sample); 604 605 rcu_read_lock(); 606 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 607 buffer = inst->tr->array_buffer.buffer; 608 __record_timerlat_sample(sample, buffer); 609 } 610 rcu_read_unlock(); 611 } 612 613 #ifdef CONFIG_STACKTRACE 614 615 #define MAX_CALLS 256 616 617 /* 618 * Stack trace will take place only at IRQ level, so, no need 619 * to control nesting here. 620 */ 621 struct trace_stack { 622 int stack_size; 623 int nr_entries; 624 unsigned long calls[MAX_CALLS]; 625 }; 626 627 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 628 629 /* 630 * timerlat_save_stack - save a stack trace without printing 631 * 632 * Save the current stack trace without printing. The 633 * stack will be printed later, after the end of the measurement. 634 */ 635 static void timerlat_save_stack(int skip) 636 { 637 unsigned int size, nr_entries; 638 struct trace_stack *fstack; 639 640 fstack = this_cpu_ptr(&trace_stack); 641 642 size = ARRAY_SIZE(fstack->calls); 643 644 nr_entries = stack_trace_save(fstack->calls, size, skip); 645 646 fstack->stack_size = nr_entries * sizeof(unsigned long); 647 fstack->nr_entries = nr_entries; 648 649 return; 650 651 } 652 653 static void 654 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 655 { 656 struct ring_buffer_event *event; 657 struct stack_entry *entry; 658 659 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 660 tracing_gen_ctx()); 661 if (!event) 662 return; 663 664 entry = ring_buffer_event_data(event); 665 666 entry->size = fstack->nr_entries; 667 memcpy(&entry->caller, fstack->calls, size); 668 669 trace_buffer_unlock_commit_nostack(buffer, event); 670 } 671 672 /* 673 * timerlat_dump_stack - dump a stack trace previously saved 674 */ 675 static void timerlat_dump_stack(u64 latency) 676 { 677 struct osnoise_instance *inst; 678 struct trace_buffer *buffer; 679 struct trace_stack *fstack; 680 unsigned int size; 681 682 /* 683 * trace only if latency > print_stack config, if enabled. 684 */ 685 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 686 return; 687 688 preempt_disable_notrace(); 689 fstack = this_cpu_ptr(&trace_stack); 690 size = fstack->stack_size; 691 692 rcu_read_lock(); 693 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 694 buffer = inst->tr->array_buffer.buffer; 695 __timerlat_dump_stack(buffer, fstack, size); 696 697 } 698 rcu_read_unlock(); 699 preempt_enable_notrace(); 700 } 701 #else /* CONFIG_STACKTRACE */ 702 #define timerlat_dump_stack(u64 latency) do {} while (0) 703 #define timerlat_save_stack(a) do {} while (0) 704 #endif /* CONFIG_STACKTRACE */ 705 #endif /* CONFIG_TIMERLAT_TRACER */ 706 707 /* 708 * Macros to encapsulate the time capturing infrastructure. 709 */ 710 #define time_get() trace_clock_local() 711 #define time_to_us(x) div_u64(x, 1000) 712 #define time_sub(a, b) ((a) - (b)) 713 714 /* 715 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 716 * 717 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 718 * to discount the NMI interference. 719 * 720 * See get_int_safe_duration(). 721 */ 722 static inline void 723 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 724 { 725 if (osn_var->irq.delta_start) 726 osn_var->irq.delta_start += duration; 727 } 728 729 #ifndef CONFIG_PREEMPT_RT 730 /* 731 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 732 * 733 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 734 * forward to discount the interference. 735 * 736 * See get_int_safe_duration(). 737 */ 738 static inline void 739 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 740 { 741 if (osn_var->softirq.delta_start) 742 osn_var->softirq.delta_start += duration; 743 } 744 #else /* CONFIG_PREEMPT_RT */ 745 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 746 #endif 747 748 /* 749 * cond_move_thread_delta_start - Forward the delta_start of a running thread 750 * 751 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 752 * is pushed forward to discount the interference. 753 * 754 * See get_int_safe_duration(). 755 */ 756 static inline void 757 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 758 { 759 if (osn_var->thread.delta_start) 760 osn_var->thread.delta_start += duration; 761 } 762 763 /* 764 * get_int_safe_duration - Get the duration of a window 765 * 766 * The irq, softirq and thread variables need to have its duration without 767 * the interference from higher priority interrupts. Instead of keeping a 768 * variable to discount the interrupt interference from these variables, the 769 * starting time of these variables are pushed forward with the interrupt's 770 * duration. In this way, a single variable is used to: 771 * 772 * - Know if a given window is being measured. 773 * - Account its duration. 774 * - Discount the interference. 775 * 776 * To avoid getting inconsistent values, e.g.,: 777 * 778 * now = time_get() 779 * ---> interrupt! 780 * delta_start -= int duration; 781 * <--- 782 * duration = now - delta_start; 783 * 784 * result: negative duration if the variable duration before the 785 * interrupt was smaller than the interrupt execution. 786 * 787 * A counter of interrupts is used. If the counter increased, try 788 * to capture an interference safe duration. 789 */ 790 static inline s64 791 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 792 { 793 u64 int_counter, now; 794 s64 duration; 795 796 do { 797 int_counter = local_read(&osn_var->int_counter); 798 /* synchronize with interrupts */ 799 barrier(); 800 801 now = time_get(); 802 duration = (now - *delta_start); 803 804 /* synchronize with interrupts */ 805 barrier(); 806 } while (int_counter != local_read(&osn_var->int_counter)); 807 808 /* 809 * This is an evidence of race conditions that cause 810 * a value to be "discounted" too much. 811 */ 812 if (duration < 0) 813 osnoise_taint("Negative duration!\n"); 814 815 *delta_start = 0; 816 817 return duration; 818 } 819 820 /* 821 * 822 * set_int_safe_time - Save the current time on *time, aware of interference 823 * 824 * Get the time, taking into consideration a possible interference from 825 * higher priority interrupts. 826 * 827 * See get_int_safe_duration() for an explanation. 828 */ 829 static u64 830 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 831 { 832 u64 int_counter; 833 834 do { 835 int_counter = local_read(&osn_var->int_counter); 836 /* synchronize with interrupts */ 837 barrier(); 838 839 *time = time_get(); 840 841 /* synchronize with interrupts */ 842 barrier(); 843 } while (int_counter != local_read(&osn_var->int_counter)); 844 845 return int_counter; 846 } 847 848 #ifdef CONFIG_TIMERLAT_TRACER 849 /* 850 * copy_int_safe_time - Copy *src into *desc aware of interference 851 */ 852 static u64 853 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 854 { 855 u64 int_counter; 856 857 do { 858 int_counter = local_read(&osn_var->int_counter); 859 /* synchronize with interrupts */ 860 barrier(); 861 862 *dst = *src; 863 864 /* synchronize with interrupts */ 865 barrier(); 866 } while (int_counter != local_read(&osn_var->int_counter)); 867 868 return int_counter; 869 } 870 #endif /* CONFIG_TIMERLAT_TRACER */ 871 872 /* 873 * trace_osnoise_callback - NMI entry/exit callback 874 * 875 * This function is called at the entry and exit NMI code. The bool enter 876 * distinguishes between either case. This function is used to note a NMI 877 * occurrence, compute the noise caused by the NMI, and to remove the noise 878 * it is potentially causing on other interference variables. 879 */ 880 void trace_osnoise_callback(bool enter) 881 { 882 struct osnoise_variables *osn_var = this_cpu_osn_var(); 883 u64 duration; 884 885 if (!osn_var->sampling) 886 return; 887 888 /* 889 * Currently trace_clock_local() calls sched_clock() and the 890 * generic version is not NMI safe. 891 */ 892 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 893 if (enter) { 894 osn_var->nmi.delta_start = time_get(); 895 local_inc(&osn_var->int_counter); 896 } else { 897 duration = time_get() - osn_var->nmi.delta_start; 898 899 trace_nmi_noise(osn_var->nmi.delta_start, duration); 900 901 cond_move_irq_delta_start(osn_var, duration); 902 cond_move_softirq_delta_start(osn_var, duration); 903 cond_move_thread_delta_start(osn_var, duration); 904 } 905 } 906 907 if (enter) 908 osn_var->nmi.count++; 909 } 910 911 /* 912 * osnoise_trace_irq_entry - Note the starting of an IRQ 913 * 914 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 915 * it is safe to use a single variable (ons_var->irq) to save the statistics. 916 * The arrival_time is used to report... the arrival time. The delta_start 917 * is used to compute the duration at the IRQ exit handler. See 918 * cond_move_irq_delta_start(). 919 */ 920 void osnoise_trace_irq_entry(int id) 921 { 922 struct osnoise_variables *osn_var = this_cpu_osn_var(); 923 924 if (!osn_var->sampling) 925 return; 926 /* 927 * This value will be used in the report, but not to compute 928 * the execution time, so it is safe to get it unsafe. 929 */ 930 osn_var->irq.arrival_time = time_get(); 931 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 932 osn_var->irq.count++; 933 934 local_inc(&osn_var->int_counter); 935 } 936 937 /* 938 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 939 * 940 * Computes the duration of the IRQ noise, and trace it. Also discounts the 941 * interference from other sources of noise could be currently being accounted. 942 */ 943 void osnoise_trace_irq_exit(int id, const char *desc) 944 { 945 struct osnoise_variables *osn_var = this_cpu_osn_var(); 946 s64 duration; 947 948 if (!osn_var->sampling) 949 return; 950 951 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 952 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 953 osn_var->irq.arrival_time = 0; 954 cond_move_softirq_delta_start(osn_var, duration); 955 cond_move_thread_delta_start(osn_var, duration); 956 } 957 958 /* 959 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 960 * 961 * Used to note the starting of an IRQ occurece. 962 */ 963 static void trace_irqentry_callback(void *data, int irq, 964 struct irqaction *action) 965 { 966 osnoise_trace_irq_entry(irq); 967 } 968 969 /* 970 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 971 * 972 * Used to note the end of an IRQ occurece. 973 */ 974 static void trace_irqexit_callback(void *data, int irq, 975 struct irqaction *action, int ret) 976 { 977 osnoise_trace_irq_exit(irq, action->name); 978 } 979 980 /* 981 * arch specific register function. 982 */ 983 int __weak osnoise_arch_register(void) 984 { 985 return 0; 986 } 987 988 /* 989 * arch specific unregister function. 990 */ 991 void __weak osnoise_arch_unregister(void) 992 { 993 return; 994 } 995 996 /* 997 * hook_irq_events - Hook IRQ handling events 998 * 999 * This function hooks the IRQ related callbacks to the respective trace 1000 * events. 1001 */ 1002 static int hook_irq_events(void) 1003 { 1004 int ret; 1005 1006 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1007 if (ret) 1008 goto out_err; 1009 1010 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1011 if (ret) 1012 goto out_unregister_entry; 1013 1014 ret = osnoise_arch_register(); 1015 if (ret) 1016 goto out_irq_exit; 1017 1018 return 0; 1019 1020 out_irq_exit: 1021 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1022 out_unregister_entry: 1023 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1024 out_err: 1025 return -EINVAL; 1026 } 1027 1028 /* 1029 * unhook_irq_events - Unhook IRQ handling events 1030 * 1031 * This function unhooks the IRQ related callbacks to the respective trace 1032 * events. 1033 */ 1034 static void unhook_irq_events(void) 1035 { 1036 osnoise_arch_unregister(); 1037 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1038 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1039 } 1040 1041 #ifndef CONFIG_PREEMPT_RT 1042 /* 1043 * trace_softirq_entry_callback - Note the starting of a softirq 1044 * 1045 * Save the starting time of a softirq. As softirqs are non-preemptive to 1046 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1047 * to save the statistics. The arrival_time is used to report... the 1048 * arrival time. The delta_start is used to compute the duration at the 1049 * softirq exit handler. See cond_move_softirq_delta_start(). 1050 */ 1051 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1052 { 1053 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1054 1055 if (!osn_var->sampling) 1056 return; 1057 /* 1058 * This value will be used in the report, but not to compute 1059 * the execution time, so it is safe to get it unsafe. 1060 */ 1061 osn_var->softirq.arrival_time = time_get(); 1062 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1063 osn_var->softirq.count++; 1064 1065 local_inc(&osn_var->int_counter); 1066 } 1067 1068 /* 1069 * trace_softirq_exit_callback - Note the end of an softirq 1070 * 1071 * Computes the duration of the softirq noise, and trace it. Also discounts the 1072 * interference from other sources of noise could be currently being accounted. 1073 */ 1074 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1075 { 1076 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1077 s64 duration; 1078 1079 if (!osn_var->sampling) 1080 return; 1081 1082 if (unlikely(timerlat_enabled())) 1083 if (!timerlat_softirq_exit(osn_var)) 1084 return; 1085 1086 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1087 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1088 cond_move_thread_delta_start(osn_var, duration); 1089 osn_var->softirq.arrival_time = 0; 1090 } 1091 1092 /* 1093 * hook_softirq_events - Hook softirq handling events 1094 * 1095 * This function hooks the softirq related callbacks to the respective trace 1096 * events. 1097 */ 1098 static int hook_softirq_events(void) 1099 { 1100 int ret; 1101 1102 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1103 if (ret) 1104 goto out_err; 1105 1106 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1107 if (ret) 1108 goto out_unreg_entry; 1109 1110 return 0; 1111 1112 out_unreg_entry: 1113 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1114 out_err: 1115 return -EINVAL; 1116 } 1117 1118 /* 1119 * unhook_softirq_events - Unhook softirq handling events 1120 * 1121 * This function hooks the softirq related callbacks to the respective trace 1122 * events. 1123 */ 1124 static void unhook_softirq_events(void) 1125 { 1126 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1127 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1128 } 1129 #else /* CONFIG_PREEMPT_RT */ 1130 /* 1131 * softirq are threads on the PREEMPT_RT mode. 1132 */ 1133 static int hook_softirq_events(void) 1134 { 1135 return 0; 1136 } 1137 static void unhook_softirq_events(void) 1138 { 1139 } 1140 #endif 1141 1142 /* 1143 * thread_entry - Record the starting of a thread noise window 1144 * 1145 * It saves the context switch time for a noisy thread, and increments 1146 * the interference counters. 1147 */ 1148 static void 1149 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1150 { 1151 if (!osn_var->sampling) 1152 return; 1153 /* 1154 * The arrival time will be used in the report, but not to compute 1155 * the execution time, so it is safe to get it unsafe. 1156 */ 1157 osn_var->thread.arrival_time = time_get(); 1158 1159 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1160 1161 osn_var->thread.count++; 1162 local_inc(&osn_var->int_counter); 1163 } 1164 1165 /* 1166 * thread_exit - Report the end of a thread noise window 1167 * 1168 * It computes the total noise from a thread, tracing if needed. 1169 */ 1170 static void 1171 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1172 { 1173 s64 duration; 1174 1175 if (!osn_var->sampling) 1176 return; 1177 1178 if (unlikely(timerlat_enabled())) 1179 if (!timerlat_thread_exit(osn_var)) 1180 return; 1181 1182 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1183 1184 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1185 1186 osn_var->thread.arrival_time = 0; 1187 } 1188 1189 #ifdef CONFIG_TIMERLAT_TRACER 1190 /* 1191 * osnoise_stop_exception - Stop tracing and the tracer. 1192 */ 1193 static __always_inline void osnoise_stop_exception(char *msg, int cpu) 1194 { 1195 struct osnoise_instance *inst; 1196 struct trace_array *tr; 1197 1198 rcu_read_lock(); 1199 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1200 tr = inst->tr; 1201 trace_array_printk(tr, _THIS_IP_, 1202 "stop tracing hit on cpu %d due to exception: %s\n", 1203 smp_processor_id(), 1204 msg); 1205 1206 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1207 panic("tracer hit on cpu %d due to exception: %s\n", 1208 smp_processor_id(), 1209 msg); 1210 1211 tracer_tracing_off(tr); 1212 } 1213 rcu_read_unlock(); 1214 } 1215 1216 /* 1217 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler 1218 * 1219 * his function is hooked to the sched:sched_migrate_task trace event, and monitors 1220 * timerlat user-space thread migration. 1221 */ 1222 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu) 1223 { 1224 struct osnoise_variables *osn_var; 1225 long cpu = task_cpu(p); 1226 1227 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 1228 if (osn_var->pid == p->pid && dest_cpu != cpu) { 1229 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 1230 osnoise_taint("timerlat user-thread migrated\n"); 1231 osnoise_stop_exception("timerlat user-thread migrated", cpu); 1232 } 1233 } 1234 1235 static bool monitor_enabled; 1236 1237 static int register_migration_monitor(void) 1238 { 1239 int ret = 0; 1240 1241 /* 1242 * Timerlat thread migration check is only required when running timerlat in user-space. 1243 * Thus, enable callback only if timerlat is set with no workload. 1244 */ 1245 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) { 1246 if (WARN_ON_ONCE(monitor_enabled)) 1247 return 0; 1248 1249 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1250 if (!ret) 1251 monitor_enabled = true; 1252 } 1253 1254 return ret; 1255 } 1256 1257 static void unregister_migration_monitor(void) 1258 { 1259 if (!monitor_enabled) 1260 return; 1261 1262 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1263 monitor_enabled = false; 1264 } 1265 #else 1266 static int register_migration_monitor(void) 1267 { 1268 return 0; 1269 } 1270 static void unregister_migration_monitor(void) {} 1271 #endif 1272 /* 1273 * trace_sched_switch - sched:sched_switch trace event handler 1274 * 1275 * This function is hooked to the sched:sched_switch trace event, and it is 1276 * used to record the beginning and to report the end of a thread noise window. 1277 */ 1278 static void 1279 trace_sched_switch_callback(void *data, bool preempt, 1280 struct task_struct *p, 1281 struct task_struct *n, 1282 unsigned int prev_state) 1283 { 1284 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1285 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1286 1287 if ((p->pid != osn_var->pid) || !workload) 1288 thread_exit(osn_var, p); 1289 1290 if ((n->pid != osn_var->pid) || !workload) 1291 thread_entry(osn_var, n); 1292 } 1293 1294 /* 1295 * hook_thread_events - Hook the instrumentation for thread noise 1296 * 1297 * Hook the osnoise tracer callbacks to handle the noise from other 1298 * threads on the necessary kernel events. 1299 */ 1300 static int hook_thread_events(void) 1301 { 1302 int ret; 1303 1304 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1305 if (ret) 1306 return -EINVAL; 1307 1308 ret = register_migration_monitor(); 1309 if (ret) 1310 goto out_unreg; 1311 1312 return 0; 1313 1314 out_unreg: 1315 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1316 return -EINVAL; 1317 } 1318 1319 /* 1320 * unhook_thread_events - unhook the instrumentation for thread noise 1321 * 1322 * Unook the osnoise tracer callbacks to handle the noise from other 1323 * threads on the necessary kernel events. 1324 */ 1325 static void unhook_thread_events(void) 1326 { 1327 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1328 unregister_migration_monitor(); 1329 } 1330 1331 /* 1332 * save_osn_sample_stats - Save the osnoise_sample statistics 1333 * 1334 * Save the osnoise_sample statistics before the sampling phase. These 1335 * values will be used later to compute the diff betwneen the statistics 1336 * before and after the osnoise sampling. 1337 */ 1338 static void 1339 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1340 { 1341 s->nmi_count = osn_var->nmi.count; 1342 s->irq_count = osn_var->irq.count; 1343 s->softirq_count = osn_var->softirq.count; 1344 s->thread_count = osn_var->thread.count; 1345 } 1346 1347 /* 1348 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1349 * 1350 * After a sample period, compute the difference on the osnoise_sample 1351 * statistics. The struct osnoise_sample *s contains the statistics saved via 1352 * save_osn_sample_stats() before the osnoise sampling. 1353 */ 1354 static void 1355 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1356 { 1357 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1358 s->irq_count = osn_var->irq.count - s->irq_count; 1359 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1360 s->thread_count = osn_var->thread.count - s->thread_count; 1361 } 1362 1363 /* 1364 * osnoise_stop_tracing - Stop tracing and the tracer. 1365 */ 1366 static __always_inline void osnoise_stop_tracing(void) 1367 { 1368 struct osnoise_instance *inst; 1369 struct trace_array *tr; 1370 1371 rcu_read_lock(); 1372 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1373 tr = inst->tr; 1374 trace_array_printk(tr, _THIS_IP_, 1375 "stop tracing hit on cpu %d\n", smp_processor_id()); 1376 1377 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1378 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1379 1380 tracer_tracing_off(tr); 1381 } 1382 rcu_read_unlock(); 1383 } 1384 1385 /* 1386 * osnoise_has_tracing_on - Check if there is at least one instance on 1387 */ 1388 static __always_inline int osnoise_has_tracing_on(void) 1389 { 1390 struct osnoise_instance *inst; 1391 int trace_is_on = 0; 1392 1393 rcu_read_lock(); 1394 list_for_each_entry_rcu(inst, &osnoise_instances, list) 1395 trace_is_on += tracer_tracing_is_on(inst->tr); 1396 rcu_read_unlock(); 1397 1398 return trace_is_on; 1399 } 1400 1401 /* 1402 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1403 */ 1404 static void notify_new_max_latency(u64 latency) 1405 { 1406 struct osnoise_instance *inst; 1407 struct trace_array *tr; 1408 1409 rcu_read_lock(); 1410 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1411 tr = inst->tr; 1412 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1413 tr->max_latency = latency; 1414 latency_fsnotify(tr); 1415 } 1416 } 1417 rcu_read_unlock(); 1418 } 1419 1420 /* 1421 * run_osnoise - Sample the time and look for osnoise 1422 * 1423 * Used to capture the time, looking for potential osnoise latency repeatedly. 1424 * Different from hwlat_detector, it is called with preemption and interrupts 1425 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1426 * osnoise sampling thread, as they would do with a regular thread. 1427 */ 1428 static int run_osnoise(void) 1429 { 1430 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1431 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1432 u64 start, sample, last_sample; 1433 u64 last_int_count, int_count; 1434 s64 noise = 0, max_noise = 0; 1435 s64 total, last_total = 0; 1436 struct osnoise_sample s; 1437 bool disable_preemption; 1438 unsigned int threshold; 1439 u64 runtime, stop_in; 1440 u64 sum_noise = 0; 1441 int hw_count = 0; 1442 int ret = -1; 1443 1444 /* 1445 * Disabling preemption is only required if IRQs are enabled, 1446 * and the options is set on. 1447 */ 1448 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1449 1450 /* 1451 * Considers the current thread as the workload. 1452 */ 1453 osn_var->pid = current->pid; 1454 1455 /* 1456 * Save the current stats for the diff 1457 */ 1458 save_osn_sample_stats(osn_var, &s); 1459 1460 /* 1461 * if threshold is 0, use the default value of 1 us. 1462 */ 1463 threshold = tracing_thresh ? : 1000; 1464 1465 /* 1466 * Apply PREEMPT and IRQ disabled options. 1467 */ 1468 if (disable_irq) 1469 local_irq_disable(); 1470 1471 if (disable_preemption) 1472 preempt_disable(); 1473 1474 /* 1475 * Make sure NMIs see sampling first 1476 */ 1477 osn_var->sampling = true; 1478 barrier(); 1479 1480 /* 1481 * Transform the *_us config to nanoseconds to avoid the 1482 * division on the main loop. 1483 */ 1484 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1485 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1486 1487 /* 1488 * Start timestamp 1489 */ 1490 start = time_get(); 1491 1492 /* 1493 * "previous" loop. 1494 */ 1495 last_int_count = set_int_safe_time(osn_var, &last_sample); 1496 1497 do { 1498 /* 1499 * Get sample! 1500 */ 1501 int_count = set_int_safe_time(osn_var, &sample); 1502 1503 noise = time_sub(sample, last_sample); 1504 1505 /* 1506 * This shouldn't happen. 1507 */ 1508 if (noise < 0) { 1509 osnoise_taint("negative noise!"); 1510 goto out; 1511 } 1512 1513 /* 1514 * Sample runtime. 1515 */ 1516 total = time_sub(sample, start); 1517 1518 /* 1519 * Check for possible overflows. 1520 */ 1521 if (total < last_total) { 1522 osnoise_taint("total overflow!"); 1523 break; 1524 } 1525 1526 last_total = total; 1527 1528 if (noise >= threshold) { 1529 int interference = int_count - last_int_count; 1530 1531 if (noise > max_noise) 1532 max_noise = noise; 1533 1534 if (!interference) 1535 hw_count++; 1536 1537 sum_noise += noise; 1538 1539 trace_sample_threshold(last_sample, noise, interference); 1540 1541 if (osnoise_data.stop_tracing) 1542 if (noise > stop_in) 1543 osnoise_stop_tracing(); 1544 } 1545 1546 /* 1547 * In some cases, notably when running on a nohz_full CPU with 1548 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to 1549 * account for QSs. This will eventually cause unwarranted 1550 * noise as RCU forces preemption as the means of ending the 1551 * current grace period. We avoid this by calling 1552 * rcu_momentary_eqs(), which performs a zero duration EQS 1553 * allowing RCU to end the current grace period. This call 1554 * shouldn't be wrapped inside an RCU critical section. 1555 * 1556 * Normally QSs for other cases are handled through cond_resched(). 1557 * For simplicity, however, we call rcu_momentary_eqs() for all 1558 * configurations here. 1559 */ 1560 if (!disable_irq) 1561 local_irq_disable(); 1562 1563 rcu_momentary_eqs(); 1564 1565 if (!disable_irq) 1566 local_irq_enable(); 1567 1568 /* 1569 * For the non-preemptive kernel config: let threads runs, if 1570 * they so wish, unless set not do to so. 1571 */ 1572 if (!disable_irq && !disable_preemption) 1573 cond_resched(); 1574 1575 last_sample = sample; 1576 last_int_count = int_count; 1577 1578 } while (total < runtime && !kthread_should_stop()); 1579 1580 /* 1581 * Finish the above in the view for interrupts. 1582 */ 1583 barrier(); 1584 1585 osn_var->sampling = false; 1586 1587 /* 1588 * Make sure sampling data is no longer updated. 1589 */ 1590 barrier(); 1591 1592 /* 1593 * Return to the preemptive state. 1594 */ 1595 if (disable_preemption) 1596 preempt_enable(); 1597 1598 if (disable_irq) 1599 local_irq_enable(); 1600 1601 /* 1602 * Save noise info. 1603 */ 1604 s.noise = time_to_us(sum_noise); 1605 s.runtime = time_to_us(total); 1606 s.max_sample = time_to_us(max_noise); 1607 s.hw_count = hw_count; 1608 1609 /* Save interference stats info */ 1610 diff_osn_sample_stats(osn_var, &s); 1611 1612 record_osnoise_sample(&s); 1613 1614 notify_new_max_latency(max_noise); 1615 1616 if (osnoise_data.stop_tracing_total) 1617 if (s.noise > osnoise_data.stop_tracing_total) 1618 osnoise_stop_tracing(); 1619 1620 return 0; 1621 out: 1622 return ret; 1623 } 1624 1625 static struct cpumask osnoise_cpumask; 1626 static struct cpumask save_cpumask; 1627 static struct cpumask kthread_cpumask; 1628 1629 /* 1630 * osnoise_sleep - sleep until the next period 1631 */ 1632 static void osnoise_sleep(bool skip_period) 1633 { 1634 u64 interval; 1635 ktime_t wake_time; 1636 1637 mutex_lock(&interface_lock); 1638 if (skip_period) 1639 interval = osnoise_data.sample_period; 1640 else 1641 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1642 mutex_unlock(&interface_lock); 1643 1644 /* 1645 * differently from hwlat_detector, the osnoise tracer can run 1646 * without a pause because preemption is on. 1647 */ 1648 if (!interval) { 1649 /* Let synchronize_rcu_tasks() make progress */ 1650 cond_resched_tasks_rcu_qs(); 1651 return; 1652 } 1653 1654 wake_time = ktime_add_us(ktime_get(), interval); 1655 __set_current_state(TASK_INTERRUPTIBLE); 1656 1657 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { 1658 if (kthread_should_stop()) 1659 break; 1660 } 1661 } 1662 1663 /* 1664 * osnoise_migration_pending - checks if the task needs to migrate 1665 * 1666 * osnoise/timerlat threads are per-cpu. If there is a pending request to 1667 * migrate the thread away from the current CPU, something bad has happened. 1668 * Play the good citizen and leave. 1669 * 1670 * Returns 0 if it is safe to continue, 1 otherwise. 1671 */ 1672 static inline int osnoise_migration_pending(void) 1673 { 1674 if (!current->migration_pending) 1675 return 0; 1676 1677 /* 1678 * If migration is pending, there is a task waiting for the 1679 * tracer to enable migration. The tracer does not allow migration, 1680 * thus: taint and leave to unblock the blocked thread. 1681 */ 1682 osnoise_taint("migration requested to osnoise threads, leaving."); 1683 1684 /* 1685 * Unset this thread from the threads managed by the interface. 1686 * The tracers are responsible for cleaning their env before 1687 * exiting. 1688 */ 1689 mutex_lock(&interface_lock); 1690 this_cpu_osn_var()->kthread = NULL; 1691 cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); 1692 mutex_unlock(&interface_lock); 1693 1694 return 1; 1695 } 1696 1697 /* 1698 * osnoise_main - The osnoise detection kernel thread 1699 * 1700 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1701 * every period. 1702 */ 1703 static int osnoise_main(void *data) 1704 { 1705 unsigned long flags; 1706 1707 /* 1708 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1709 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1710 * 1711 * To work around this limitation, disable migration and remove the 1712 * flag. 1713 */ 1714 migrate_disable(); 1715 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1716 current->flags &= ~(PF_NO_SETAFFINITY); 1717 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1718 1719 while (!kthread_should_stop()) { 1720 if (osnoise_migration_pending()) 1721 break; 1722 1723 /* skip a period if tracing is off on all instances */ 1724 if (!osnoise_has_tracing_on()) { 1725 osnoise_sleep(true); 1726 continue; 1727 } 1728 1729 run_osnoise(); 1730 osnoise_sleep(false); 1731 } 1732 1733 migrate_enable(); 1734 return 0; 1735 } 1736 1737 #ifdef CONFIG_TIMERLAT_TRACER 1738 /* 1739 * timerlat_irq - hrtimer handler for timerlat. 1740 */ 1741 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1742 { 1743 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1744 struct timerlat_variables *tlat; 1745 struct timerlat_sample s; 1746 u64 now; 1747 u64 diff; 1748 1749 /* 1750 * I am not sure if the timer was armed for this CPU. So, get 1751 * the timerlat struct from the timer itself, not from this 1752 * CPU. 1753 */ 1754 tlat = container_of(timer, struct timerlat_variables, timer); 1755 1756 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1757 1758 /* 1759 * Enable the osnoise: events for thread an softirq. 1760 */ 1761 tlat->tracing_thread = true; 1762 1763 osn_var->thread.arrival_time = time_get(); 1764 1765 /* 1766 * A hardirq is running: the timer IRQ. It is for sure preempting 1767 * a thread, and potentially preempting a softirq. 1768 * 1769 * At this point, it is not interesting to know the duration of the 1770 * preempted thread (and maybe softirq), but how much time they will 1771 * delay the beginning of the execution of the timer thread. 1772 * 1773 * To get the correct (net) delay added by the softirq, its delta_start 1774 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1775 * start of the sofitrq will be zeroed, accounting then only the time 1776 * after that. 1777 * 1778 * The thread follows the same principle. However, if a softirq is 1779 * running, the thread needs to receive the softirq delta_start. The 1780 * reason being is that the softirq will be the last to be unfolded, 1781 * resseting the thread delay to zero. 1782 * 1783 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1784 * on RT, moving the thread is enough. 1785 */ 1786 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1787 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1788 &osn_var->softirq.delta_start); 1789 1790 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1791 &osn_var->irq.delta_start); 1792 } else { 1793 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1794 &osn_var->irq.delta_start); 1795 } 1796 1797 /* 1798 * Compute the current time with the expected time. 1799 */ 1800 diff = now - tlat->abs_period; 1801 1802 tlat->count++; 1803 s.seqnum = tlat->count; 1804 s.timer_latency = diff; 1805 s.context = IRQ_CONTEXT; 1806 1807 record_timerlat_sample(&s); 1808 1809 if (osnoise_data.stop_tracing) { 1810 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1811 1812 /* 1813 * At this point, if stop_tracing is set and <= print_stack, 1814 * print_stack is set and would be printed in the thread handler. 1815 * 1816 * Thus, print the stack trace as it is helpful to define the 1817 * root cause of an IRQ latency. 1818 */ 1819 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1820 timerlat_save_stack(0); 1821 timerlat_dump_stack(time_to_us(diff)); 1822 } 1823 1824 osnoise_stop_tracing(); 1825 notify_new_max_latency(diff); 1826 1827 wake_up_process(tlat->kthread); 1828 1829 return HRTIMER_NORESTART; 1830 } 1831 } 1832 1833 wake_up_process(tlat->kthread); 1834 1835 if (osnoise_data.print_stack) 1836 timerlat_save_stack(0); 1837 1838 return HRTIMER_NORESTART; 1839 } 1840 1841 /* 1842 * wait_next_period - Wait for the next period for timerlat 1843 */ 1844 static int wait_next_period(struct timerlat_variables *tlat) 1845 { 1846 ktime_t next_abs_period, now; 1847 u64 rel_period = osnoise_data.timerlat_period * 1000; 1848 1849 now = hrtimer_cb_get_time(&tlat->timer); 1850 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1851 1852 /* 1853 * Save the next abs_period. 1854 */ 1855 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1856 1857 /* 1858 * Align thread in the first cycle on each CPU to the set alignment 1859 * if TIMERLAT_ALIGN is set. 1860 * 1861 * This is done by using an atomic64_t to store the next absolute period. 1862 * The first thread that wakes up will set the atomic64_t to its 1863 * absolute period, and the other threads will increment it by 1864 * the alignment value. 1865 */ 1866 if (test_bit(OSN_TIMERLAT_ALIGN, &osnoise_options) && !tlat->count 1867 && atomic64_cmpxchg_relaxed(&align_next, 0, tlat->abs_period)) { 1868 /* 1869 * A thread has already set align_next, use it and increment it 1870 * to be used by the next thread that wakes up after this one. 1871 */ 1872 tlat->abs_period = atomic64_add_return_relaxed( 1873 osnoise_data.timerlat_align_us * 1000, &align_next); 1874 next_abs_period = ns_to_ktime(tlat->abs_period); 1875 } 1876 1877 /* 1878 * If the new abs_period is in the past, skip the activation. 1879 */ 1880 while (ktime_compare(now, next_abs_period) > 0) { 1881 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1882 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1883 } 1884 1885 set_current_state(TASK_INTERRUPTIBLE); 1886 1887 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1888 schedule(); 1889 return 1; 1890 } 1891 1892 /* 1893 * timerlat_main- Timerlat main 1894 */ 1895 static int timerlat_main(void *data) 1896 { 1897 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1898 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1899 struct timerlat_sample s; 1900 struct sched_param sp; 1901 unsigned long flags; 1902 u64 now, diff; 1903 1904 /* 1905 * Make the thread RT, that is how cyclictest is usually used. 1906 */ 1907 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1908 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1909 1910 /* 1911 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1912 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1913 * 1914 * To work around this limitation, disable migration and remove the 1915 * flag. 1916 */ 1917 migrate_disable(); 1918 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1919 current->flags &= ~(PF_NO_SETAFFINITY); 1920 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1921 1922 tlat->count = 0; 1923 tlat->tracing_thread = false; 1924 1925 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1926 tlat->kthread = current; 1927 osn_var->pid = current->pid; 1928 /* 1929 * Annotate the arrival time. 1930 */ 1931 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1932 1933 wait_next_period(tlat); 1934 1935 osn_var->sampling = 1; 1936 1937 while (!kthread_should_stop()) { 1938 1939 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1940 diff = now - tlat->abs_period; 1941 1942 s.seqnum = tlat->count; 1943 s.timer_latency = diff; 1944 s.context = THREAD_CONTEXT; 1945 1946 record_timerlat_sample(&s); 1947 1948 notify_new_max_latency(diff); 1949 1950 timerlat_dump_stack(time_to_us(diff)); 1951 1952 tlat->tracing_thread = false; 1953 if (osnoise_data.stop_tracing_total) 1954 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1955 osnoise_stop_tracing(); 1956 1957 if (osnoise_migration_pending()) 1958 break; 1959 1960 wait_next_period(tlat); 1961 } 1962 1963 hrtimer_cancel(&tlat->timer); 1964 migrate_enable(); 1965 return 0; 1966 } 1967 #else /* CONFIG_TIMERLAT_TRACER */ 1968 static int timerlat_main(void *data) 1969 { 1970 return 0; 1971 } 1972 #endif /* CONFIG_TIMERLAT_TRACER */ 1973 1974 /* 1975 * stop_kthread - stop a workload thread 1976 */ 1977 static void stop_kthread(unsigned int cpu) 1978 { 1979 struct task_struct *kthread; 1980 1981 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); 1982 if (kthread) { 1983 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && 1984 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { 1985 kthread_stop(kthread); 1986 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { 1987 /* 1988 * This is a user thread waiting on the timerlat_fd. We need 1989 * to close all users, and the best way to guarantee this is 1990 * by killing the thread. NOTE: this is a purpose specific file. 1991 */ 1992 kill_pid(kthread->thread_pid, SIGKILL, 1); 1993 put_task_struct(kthread); 1994 } 1995 } else { 1996 /* if no workload, just return */ 1997 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1998 /* 1999 * This is set in the osnoise tracer case. 2000 */ 2001 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 2002 barrier(); 2003 } 2004 } 2005 } 2006 2007 /* 2008 * stop_per_cpu_kthread - Stop per-cpu threads 2009 * 2010 * Stop the osnoise sampling htread. Use this on unload and at system 2011 * shutdown. 2012 */ 2013 static void stop_per_cpu_kthreads(void) 2014 { 2015 int cpu; 2016 2017 cpus_read_lock(); 2018 2019 for_each_online_cpu(cpu) 2020 stop_kthread(cpu); 2021 2022 cpus_read_unlock(); 2023 } 2024 2025 /* 2026 * start_kthread - Start a workload thread 2027 */ 2028 static int start_kthread(unsigned int cpu) 2029 { 2030 struct task_struct *kthread; 2031 void *main = osnoise_main; 2032 char comm[24]; 2033 2034 /* Do not start a new thread if it is already running */ 2035 if (per_cpu(per_cpu_osnoise_var, cpu).kthread) 2036 return 0; 2037 2038 if (timerlat_enabled()) { 2039 snprintf(comm, 24, "timerlat/%d", cpu); 2040 main = timerlat_main; 2041 } else { 2042 /* if no workload, just return */ 2043 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2044 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 2045 barrier(); 2046 return 0; 2047 } 2048 snprintf(comm, 24, "osnoise/%d", cpu); 2049 } 2050 2051 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 2052 2053 if (IS_ERR(kthread)) { 2054 pr_err(BANNER "could not start sampling thread\n"); 2055 return -ENOMEM; 2056 } 2057 2058 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 2059 cpumask_set_cpu(cpu, &kthread_cpumask); 2060 2061 return 0; 2062 } 2063 2064 /* 2065 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 2066 * 2067 * This starts the kernel thread that will look for osnoise on many 2068 * cpus. 2069 */ 2070 static int start_per_cpu_kthreads(void) 2071 { 2072 struct cpumask *current_mask = &save_cpumask; 2073 int retval = 0; 2074 int cpu; 2075 2076 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2077 if (timerlat_enabled()) 2078 return 0; 2079 } 2080 2081 cpus_read_lock(); 2082 /* 2083 * Run only on online CPUs in which osnoise is allowed to run. 2084 */ 2085 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 2086 2087 for_each_possible_cpu(cpu) { 2088 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { 2089 struct task_struct *kthread; 2090 2091 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); 2092 if (!WARN_ON(!kthread)) 2093 kthread_stop(kthread); 2094 } 2095 } 2096 2097 for_each_cpu(cpu, current_mask) { 2098 retval = start_kthread(cpu); 2099 if (retval) { 2100 cpus_read_unlock(); 2101 stop_per_cpu_kthreads(); 2102 return retval; 2103 } 2104 } 2105 2106 cpus_read_unlock(); 2107 2108 return retval; 2109 } 2110 2111 #ifdef CONFIG_HOTPLUG_CPU 2112 static void osnoise_hotplug_workfn(struct work_struct *dummy) 2113 { 2114 unsigned int cpu = smp_processor_id(); 2115 2116 guard(mutex)(&trace_types_lock); 2117 2118 if (!osnoise_has_registered_instances()) 2119 return; 2120 2121 guard(cpus_read_lock)(); 2122 guard(mutex)(&interface_lock); 2123 2124 if (!cpu_online(cpu)) 2125 return; 2126 2127 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 2128 return; 2129 2130 start_kthread(cpu); 2131 } 2132 2133 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 2134 2135 /* 2136 * osnoise_cpu_init - CPU hotplug online callback function 2137 */ 2138 static int osnoise_cpu_init(unsigned int cpu) 2139 { 2140 schedule_work_on(cpu, &osnoise_hotplug_work); 2141 return 0; 2142 } 2143 2144 /* 2145 * osnoise_cpu_die - CPU hotplug offline callback function 2146 */ 2147 static int osnoise_cpu_die(unsigned int cpu) 2148 { 2149 stop_kthread(cpu); 2150 return 0; 2151 } 2152 2153 static void osnoise_init_hotplug_support(void) 2154 { 2155 int ret; 2156 2157 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 2158 osnoise_cpu_init, osnoise_cpu_die); 2159 if (ret < 0) 2160 pr_warn(BANNER "Error to init cpu hotplug support\n"); 2161 2162 return; 2163 } 2164 #else /* CONFIG_HOTPLUG_CPU */ 2165 static void osnoise_init_hotplug_support(void) 2166 { 2167 return; 2168 } 2169 #endif /* CONFIG_HOTPLUG_CPU */ 2170 2171 /* 2172 * seq file functions for the osnoise/options file. 2173 */ 2174 static void *s_options_start(struct seq_file *s, loff_t *pos) 2175 { 2176 int option = *pos; 2177 2178 mutex_lock(&interface_lock); 2179 2180 if (option >= OSN_MAX) 2181 return NULL; 2182 2183 return pos; 2184 } 2185 2186 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 2187 { 2188 int option = ++(*pos); 2189 2190 if (option >= OSN_MAX) 2191 return NULL; 2192 2193 return pos; 2194 } 2195 2196 static int s_options_show(struct seq_file *s, void *v) 2197 { 2198 loff_t *pos = v; 2199 int option = *pos; 2200 2201 if (option == OSN_DEFAULTS) { 2202 if (osnoise_options == OSN_DEFAULT_OPTIONS) 2203 seq_printf(s, "%s", osnoise_options_str[option]); 2204 else 2205 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2206 goto out; 2207 } 2208 2209 if (test_bit(option, &osnoise_options)) 2210 seq_printf(s, "%s", osnoise_options_str[option]); 2211 else 2212 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2213 2214 out: 2215 if (option != OSN_MAX) 2216 seq_puts(s, " "); 2217 2218 return 0; 2219 } 2220 2221 static void s_options_stop(struct seq_file *s, void *v) 2222 { 2223 seq_puts(s, "\n"); 2224 mutex_unlock(&interface_lock); 2225 } 2226 2227 static const struct seq_operations osnoise_options_seq_ops = { 2228 .start = s_options_start, 2229 .next = s_options_next, 2230 .show = s_options_show, 2231 .stop = s_options_stop 2232 }; 2233 2234 static int osnoise_options_open(struct inode *inode, struct file *file) 2235 { 2236 return seq_open(file, &osnoise_options_seq_ops); 2237 }; 2238 2239 /** 2240 * osnoise_options_write - Write function for "options" entry 2241 * @filp: The active open file structure 2242 * @ubuf: The user buffer that contains the value to write 2243 * @cnt: The maximum number of bytes to write to "file" 2244 * @ppos: The current position in @file 2245 * 2246 * Writing the option name sets the option, writing the "NO_" 2247 * prefix in front of the option name disables it. 2248 * 2249 * Writing "DEFAULTS" resets the option values to the default ones. 2250 */ 2251 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2252 size_t cnt, loff_t *ppos) 2253 { 2254 int running, option, enable, retval; 2255 char buf[256], *option_str; 2256 2257 if (cnt >= 256) 2258 return -EINVAL; 2259 2260 if (copy_from_user(buf, ubuf, cnt)) 2261 return -EFAULT; 2262 2263 buf[cnt] = 0; 2264 2265 if (strncmp(buf, "NO_", 3)) { 2266 option_str = strstrip(buf); 2267 enable = true; 2268 } else { 2269 option_str = strstrip(&buf[3]); 2270 enable = false; 2271 } 2272 2273 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2274 if (option < 0) 2275 return -EINVAL; 2276 2277 /* 2278 * trace_types_lock is taken to avoid concurrency on start/stop. 2279 */ 2280 mutex_lock(&trace_types_lock); 2281 running = osnoise_has_registered_instances(); 2282 if (running) 2283 stop_per_cpu_kthreads(); 2284 2285 /* 2286 * avoid CPU hotplug operations that might read options. 2287 */ 2288 cpus_read_lock(); 2289 mutex_lock(&interface_lock); 2290 2291 retval = cnt; 2292 2293 if (enable) { 2294 if (option == OSN_DEFAULTS) 2295 osnoise_options = OSN_DEFAULT_OPTIONS; 2296 else 2297 set_bit(option, &osnoise_options); 2298 } else { 2299 if (option == OSN_DEFAULTS) 2300 retval = -EINVAL; 2301 else 2302 clear_bit(option, &osnoise_options); 2303 } 2304 2305 mutex_unlock(&interface_lock); 2306 cpus_read_unlock(); 2307 2308 if (running) 2309 start_per_cpu_kthreads(); 2310 mutex_unlock(&trace_types_lock); 2311 2312 return retval; 2313 } 2314 2315 /* 2316 * osnoise_cpus_read - Read function for reading the "cpus" file 2317 * @filp: The active open file structure 2318 * @ubuf: The userspace provided buffer to read value into 2319 * @cnt: The maximum number of bytes to read 2320 * @ppos: The current "file" position 2321 * 2322 * Prints the "cpus" output into the user-provided buffer. 2323 */ 2324 static ssize_t 2325 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2326 loff_t *ppos) 2327 { 2328 char *mask_str __free(kfree) = NULL; 2329 int len; 2330 2331 guard(mutex)(&interface_lock); 2332 2333 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2334 mask_str = kmalloc(len, GFP_KERNEL); 2335 if (!mask_str) 2336 return -ENOMEM; 2337 2338 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2339 if (len >= count) 2340 return -EINVAL; 2341 2342 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2343 2344 return count; 2345 } 2346 2347 /* 2348 * osnoise_cpus_write - Write function for "cpus" entry 2349 * @filp: The active open file structure 2350 * @ubuf: The user buffer that contains the value to write 2351 * @count: The maximum number of bytes to write to "file" 2352 * @ppos: The current position in @file 2353 * 2354 * This function provides a write implementation for the "cpus" 2355 * interface to the osnoise trace. By default, it lists all CPUs, 2356 * in this way, allowing osnoise threads to run on any online CPU 2357 * of the system. It serves to restrict the execution of osnoise to the 2358 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2359 * Because the user might be interested in tracing what is running on 2360 * other CPUs. For instance, one might run osnoise in one HT CPU 2361 * while observing what is running on the sibling HT CPU. 2362 */ 2363 static ssize_t 2364 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2365 loff_t *ppos) 2366 { 2367 cpumask_var_t osnoise_cpumask_new; 2368 int running, err; 2369 char *buf __free(kfree) = NULL; 2370 2371 if (count < 1) 2372 return 0; 2373 2374 buf = memdup_user_nul(ubuf, count); 2375 if (IS_ERR(buf)) 2376 return PTR_ERR(buf); 2377 2378 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2379 return -ENOMEM; 2380 2381 err = cpulist_parse(buf, osnoise_cpumask_new); 2382 if (err) 2383 goto err_free; 2384 2385 /* 2386 * trace_types_lock is taken to avoid concurrency on start/stop. 2387 */ 2388 mutex_lock(&trace_types_lock); 2389 running = osnoise_has_registered_instances(); 2390 if (running) 2391 stop_per_cpu_kthreads(); 2392 2393 /* 2394 * osnoise_cpumask is read by CPU hotplug operations. 2395 */ 2396 cpus_read_lock(); 2397 mutex_lock(&interface_lock); 2398 2399 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2400 2401 mutex_unlock(&interface_lock); 2402 cpus_read_unlock(); 2403 2404 if (running) 2405 start_per_cpu_kthreads(); 2406 mutex_unlock(&trace_types_lock); 2407 2408 free_cpumask_var(osnoise_cpumask_new); 2409 return count; 2410 2411 err_free: 2412 free_cpumask_var(osnoise_cpumask_new); 2413 2414 return err; 2415 } 2416 2417 #ifdef CONFIG_TIMERLAT_TRACER 2418 static int timerlat_fd_open(struct inode *inode, struct file *file) 2419 { 2420 struct osnoise_variables *osn_var; 2421 struct timerlat_variables *tlat; 2422 long cpu = (long) inode->i_cdev; 2423 2424 mutex_lock(&interface_lock); 2425 2426 /* 2427 * This file is accessible only if timerlat is enabled, and 2428 * NO_OSNOISE_WORKLOAD is set. 2429 */ 2430 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) { 2431 mutex_unlock(&interface_lock); 2432 return -EINVAL; 2433 } 2434 2435 migrate_disable(); 2436 2437 osn_var = this_cpu_osn_var(); 2438 2439 /* 2440 * The osn_var->pid holds the single access to this file. 2441 */ 2442 if (osn_var->pid) { 2443 mutex_unlock(&interface_lock); 2444 migrate_enable(); 2445 return -EBUSY; 2446 } 2447 2448 /* 2449 * timerlat tracer is a per-cpu tracer. Check if the user-space too 2450 * is pinned to a single CPU. The tracer laters monitor if the task 2451 * migrates and then disables tracer if it does. However, it is 2452 * worth doing this basic acceptance test to avoid obviusly wrong 2453 * setup. 2454 */ 2455 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) { 2456 mutex_unlock(&interface_lock); 2457 migrate_enable(); 2458 return -EPERM; 2459 } 2460 2461 /* 2462 * From now on, it is good to go. 2463 */ 2464 file->private_data = inode->i_cdev; 2465 2466 get_task_struct(current); 2467 2468 osn_var->kthread = current; 2469 osn_var->pid = current->pid; 2470 2471 /* 2472 * Setup is done. 2473 */ 2474 mutex_unlock(&interface_lock); 2475 2476 tlat = this_cpu_tmr_var(); 2477 tlat->count = 0; 2478 2479 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 2480 2481 migrate_enable(); 2482 return 0; 2483 }; 2484 2485 /* 2486 * timerlat_fd_read - Read function for "timerlat_fd" file 2487 * @file: The active open file structure 2488 * @ubuf: The userspace provided buffer to read value into 2489 * @cnt: The maximum number of bytes to read 2490 * @ppos: The current "file" position 2491 * 2492 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error. 2493 */ 2494 static ssize_t 2495 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, 2496 loff_t *ppos) 2497 { 2498 long cpu = (long) file->private_data; 2499 struct osnoise_variables *osn_var; 2500 struct timerlat_variables *tlat; 2501 struct timerlat_sample s; 2502 s64 diff; 2503 u64 now; 2504 2505 migrate_disable(); 2506 2507 tlat = this_cpu_tmr_var(); 2508 2509 /* 2510 * While in user-space, the thread is migratable. There is nothing 2511 * we can do about it. 2512 * So, if the thread is running on another CPU, stop the machinery. 2513 */ 2514 if (cpu == smp_processor_id()) { 2515 if (tlat->uthread_migrate) { 2516 migrate_enable(); 2517 return -EINVAL; 2518 } 2519 } else { 2520 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 2521 osnoise_taint("timerlat user thread migrate\n"); 2522 osnoise_stop_tracing(); 2523 migrate_enable(); 2524 return -EINVAL; 2525 } 2526 2527 osn_var = this_cpu_osn_var(); 2528 2529 /* 2530 * The timerlat in user-space runs in a different order: 2531 * the read() starts from the execution of the previous occurrence, 2532 * sleeping for the next occurrence. 2533 * 2534 * So, skip if we are entering on read() before the first wakeup 2535 * from timerlat IRQ: 2536 */ 2537 if (likely(osn_var->sampling)) { 2538 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2539 diff = now - tlat->abs_period; 2540 2541 /* 2542 * it was not a timer firing, but some other signal? 2543 */ 2544 if (diff < 0) 2545 goto out; 2546 2547 s.seqnum = tlat->count; 2548 s.timer_latency = diff; 2549 s.context = THREAD_URET; 2550 2551 record_timerlat_sample(&s); 2552 2553 notify_new_max_latency(diff); 2554 2555 tlat->tracing_thread = false; 2556 if (osnoise_data.stop_tracing_total) { 2557 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { 2558 timerlat_dump_stack(time_to_us(diff)); 2559 osnoise_stop_tracing(); 2560 } 2561 } 2562 } else { 2563 tlat->tracing_thread = false; 2564 tlat->kthread = current; 2565 2566 /* Annotate now to drift new period */ 2567 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 2568 2569 osn_var->sampling = 1; 2570 } 2571 2572 /* wait for the next period */ 2573 wait_next_period(tlat); 2574 2575 /* This is the wakeup from this cycle */ 2576 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2577 diff = now - tlat->abs_period; 2578 2579 /* 2580 * it was not a timer firing, but some other signal? 2581 */ 2582 if (diff < 0) 2583 goto out; 2584 2585 s.seqnum = tlat->count; 2586 s.timer_latency = diff; 2587 s.context = THREAD_CONTEXT; 2588 2589 record_timerlat_sample(&s); 2590 2591 if (osnoise_data.stop_tracing_total) { 2592 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { 2593 timerlat_dump_stack(time_to_us(diff)); 2594 notify_new_max_latency(diff); 2595 osnoise_stop_tracing(); 2596 } 2597 } 2598 2599 out: 2600 migrate_enable(); 2601 return 0; 2602 } 2603 2604 static int timerlat_fd_release(struct inode *inode, struct file *file) 2605 { 2606 struct osnoise_variables *osn_var; 2607 struct timerlat_variables *tlat_var; 2608 long cpu = (long) file->private_data; 2609 2610 migrate_disable(); 2611 mutex_lock(&interface_lock); 2612 2613 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 2614 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 2615 2616 if (tlat_var->kthread) 2617 hrtimer_cancel(&tlat_var->timer); 2618 memset(tlat_var, 0, sizeof(*tlat_var)); 2619 2620 osn_var->sampling = 0; 2621 osn_var->pid = 0; 2622 2623 /* 2624 * We are leaving, not being stopped... see stop_kthread(); 2625 */ 2626 if (osn_var->kthread) { 2627 put_task_struct(osn_var->kthread); 2628 osn_var->kthread = NULL; 2629 } 2630 2631 mutex_unlock(&interface_lock); 2632 migrate_enable(); 2633 return 0; 2634 } 2635 #endif 2636 2637 /* 2638 * osnoise/runtime_us: cannot be greater than the period. 2639 */ 2640 static struct trace_min_max_param osnoise_runtime = { 2641 .lock = &interface_lock, 2642 .val = &osnoise_data.sample_runtime, 2643 .max = &osnoise_data.sample_period, 2644 .min = NULL, 2645 }; 2646 2647 /* 2648 * osnoise/period_us: cannot be smaller than the runtime. 2649 */ 2650 static struct trace_min_max_param osnoise_period = { 2651 .lock = &interface_lock, 2652 .val = &osnoise_data.sample_period, 2653 .max = NULL, 2654 .min = &osnoise_data.sample_runtime, 2655 }; 2656 2657 /* 2658 * osnoise/stop_tracing_us: no limit. 2659 */ 2660 static struct trace_min_max_param osnoise_stop_tracing_in = { 2661 .lock = &interface_lock, 2662 .val = &osnoise_data.stop_tracing, 2663 .max = NULL, 2664 .min = NULL, 2665 }; 2666 2667 /* 2668 * osnoise/stop_tracing_total_us: no limit. 2669 */ 2670 static struct trace_min_max_param osnoise_stop_tracing_total = { 2671 .lock = &interface_lock, 2672 .val = &osnoise_data.stop_tracing_total, 2673 .max = NULL, 2674 .min = NULL, 2675 }; 2676 2677 #ifdef CONFIG_TIMERLAT_TRACER 2678 /* 2679 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2680 * latency is higher than val. 2681 */ 2682 static struct trace_min_max_param osnoise_print_stack = { 2683 .lock = &interface_lock, 2684 .val = &osnoise_data.print_stack, 2685 .max = NULL, 2686 .min = NULL, 2687 }; 2688 2689 /* 2690 * osnoise/timerlat_period: min 100 us, max 1 s 2691 */ 2692 static u64 timerlat_min_period = 100; 2693 static u64 timerlat_max_period = 1000000; 2694 static struct trace_min_max_param timerlat_period = { 2695 .lock = &interface_lock, 2696 .val = &osnoise_data.timerlat_period, 2697 .max = &timerlat_max_period, 2698 .min = &timerlat_min_period, 2699 }; 2700 2701 /* 2702 * osnoise/timerlat_align_us: align the first wakeup of all timerlat 2703 * threads to a common boundary (in us). 0 means disabled. 2704 */ 2705 static struct trace_min_max_param timerlat_align_us = { 2706 .lock = &interface_lock, 2707 .val = &osnoise_data.timerlat_align_us, 2708 .max = NULL, 2709 .min = NULL, 2710 }; 2711 2712 static const struct file_operations timerlat_fd_fops = { 2713 .open = timerlat_fd_open, 2714 .read = timerlat_fd_read, 2715 .release = timerlat_fd_release, 2716 .llseek = generic_file_llseek, 2717 }; 2718 #endif 2719 2720 static const struct file_operations cpus_fops = { 2721 .open = tracing_open_generic, 2722 .read = osnoise_cpus_read, 2723 .write = osnoise_cpus_write, 2724 .llseek = generic_file_llseek, 2725 }; 2726 2727 static const struct file_operations osnoise_options_fops = { 2728 .open = osnoise_options_open, 2729 .read = seq_read, 2730 .llseek = seq_lseek, 2731 .release = seq_release, 2732 .write = osnoise_options_write 2733 }; 2734 2735 #ifdef CONFIG_TIMERLAT_TRACER 2736 #ifdef CONFIG_STACKTRACE 2737 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2738 { 2739 struct dentry *tmp; 2740 2741 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2742 &osnoise_print_stack, &trace_min_max_fops); 2743 if (!tmp) 2744 return -ENOMEM; 2745 2746 return 0; 2747 } 2748 #else /* CONFIG_STACKTRACE */ 2749 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2750 { 2751 return 0; 2752 } 2753 #endif /* CONFIG_STACKTRACE */ 2754 2755 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir) 2756 { 2757 struct dentry *timerlat_fd; 2758 struct dentry *per_cpu; 2759 struct dentry *cpu_dir; 2760 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */ 2761 long cpu; 2762 2763 /* 2764 * Why not using tracing instance per_cpu/ dir? 2765 * 2766 * Because osnoise/timerlat have a single workload, having 2767 * multiple files like these are waste of memory. 2768 */ 2769 per_cpu = tracefs_create_dir("per_cpu", top_dir); 2770 if (!per_cpu) 2771 return -ENOMEM; 2772 2773 for_each_possible_cpu(cpu) { 2774 snprintf(cpu_str, 30, "cpu%ld", cpu); 2775 cpu_dir = tracefs_create_dir(cpu_str, per_cpu); 2776 if (!cpu_dir) 2777 goto out_clean; 2778 2779 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ, 2780 cpu_dir, NULL, &timerlat_fd_fops); 2781 if (!timerlat_fd) 2782 goto out_clean; 2783 2784 /* Record the CPU */ 2785 d_inode(timerlat_fd)->i_cdev = (void *)(cpu); 2786 } 2787 2788 return 0; 2789 2790 out_clean: 2791 tracefs_remove(per_cpu); 2792 return -ENOMEM; 2793 } 2794 2795 /* 2796 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2797 */ 2798 static int init_timerlat_tracefs(struct dentry *top_dir) 2799 { 2800 struct dentry *tmp; 2801 int retval; 2802 2803 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2804 &timerlat_period, &trace_min_max_fops); 2805 if (!tmp) 2806 return -ENOMEM; 2807 2808 tmp = tracefs_create_file("timerlat_align_us", TRACE_MODE_WRITE, top_dir, 2809 &timerlat_align_us, &trace_min_max_fops); 2810 if (!tmp) 2811 return -ENOMEM; 2812 2813 retval = osnoise_create_cpu_timerlat_fd(top_dir); 2814 if (retval) 2815 return retval; 2816 2817 return init_timerlat_stack_tracefs(top_dir); 2818 } 2819 #else /* CONFIG_TIMERLAT_TRACER */ 2820 static int init_timerlat_tracefs(struct dentry *top_dir) 2821 { 2822 return 0; 2823 } 2824 #endif /* CONFIG_TIMERLAT_TRACER */ 2825 2826 /* 2827 * init_tracefs - A function to initialize the tracefs interface files 2828 * 2829 * This function creates entries in tracefs for "osnoise" and "timerlat". 2830 * It creates these directories in the tracing directory, and within that 2831 * directory the use can change and view the configs. 2832 */ 2833 static int init_tracefs(void) 2834 { 2835 struct dentry *top_dir; 2836 struct dentry *tmp; 2837 int ret; 2838 2839 ret = tracing_init_dentry(); 2840 if (ret) 2841 return -ENOMEM; 2842 2843 top_dir = tracefs_create_dir("osnoise", NULL); 2844 if (!top_dir) 2845 return 0; 2846 2847 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2848 &osnoise_period, &trace_min_max_fops); 2849 if (!tmp) 2850 goto err; 2851 2852 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2853 &osnoise_runtime, &trace_min_max_fops); 2854 if (!tmp) 2855 goto err; 2856 2857 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2858 &osnoise_stop_tracing_in, &trace_min_max_fops); 2859 if (!tmp) 2860 goto err; 2861 2862 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2863 &osnoise_stop_tracing_total, &trace_min_max_fops); 2864 if (!tmp) 2865 goto err; 2866 2867 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2868 if (!tmp) 2869 goto err; 2870 2871 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2872 &osnoise_options_fops); 2873 if (!tmp) 2874 goto err; 2875 2876 ret = init_timerlat_tracefs(top_dir); 2877 if (ret) 2878 goto err; 2879 2880 return 0; 2881 2882 err: 2883 tracefs_remove(top_dir); 2884 return -ENOMEM; 2885 } 2886 2887 static int osnoise_hook_events(void) 2888 { 2889 int retval; 2890 2891 /* 2892 * Trace is already hooked, we are re-enabling from 2893 * a stop_tracing_*. 2894 */ 2895 if (trace_osnoise_callback_enabled) 2896 return 0; 2897 2898 retval = hook_irq_events(); 2899 if (retval) 2900 return -EINVAL; 2901 2902 retval = hook_softirq_events(); 2903 if (retval) 2904 goto out_unhook_irq; 2905 2906 retval = hook_thread_events(); 2907 /* 2908 * All fine! 2909 */ 2910 if (!retval) 2911 return 0; 2912 2913 unhook_softirq_events(); 2914 out_unhook_irq: 2915 unhook_irq_events(); 2916 return -EINVAL; 2917 } 2918 2919 static void osnoise_unhook_events(void) 2920 { 2921 unhook_thread_events(); 2922 unhook_softirq_events(); 2923 unhook_irq_events(); 2924 } 2925 2926 /* 2927 * osnoise_workload_start - start the workload and hook to events 2928 */ 2929 static int osnoise_workload_start(void) 2930 { 2931 int retval; 2932 2933 /* 2934 * Instances need to be registered after calling workload 2935 * start. Hence, if there is already an instance, the 2936 * workload was already registered. Otherwise, this 2937 * code is on the way to register the first instance, 2938 * and the workload will start. 2939 */ 2940 if (osnoise_has_registered_instances()) 2941 return 0; 2942 2943 osn_var_reset_all(); 2944 2945 retval = osnoise_hook_events(); 2946 if (retval) 2947 return retval; 2948 2949 /* 2950 * Make sure that ftrace_nmi_enter/exit() see reset values 2951 * before enabling trace_osnoise_callback_enabled. 2952 */ 2953 barrier(); 2954 trace_osnoise_callback_enabled = true; 2955 2956 retval = start_per_cpu_kthreads(); 2957 if (retval) { 2958 trace_osnoise_callback_enabled = false; 2959 /* 2960 * Make sure that ftrace_nmi_enter/exit() see 2961 * trace_osnoise_callback_enabled as false before continuing. 2962 */ 2963 barrier(); 2964 2965 osnoise_unhook_events(); 2966 return retval; 2967 } 2968 2969 return 0; 2970 } 2971 2972 /* 2973 * osnoise_workload_stop - stop the workload and unhook the events 2974 */ 2975 static void osnoise_workload_stop(void) 2976 { 2977 /* 2978 * Instances need to be unregistered before calling 2979 * stop. Hence, if there is a registered instance, more 2980 * than one instance is running, and the workload will not 2981 * yet stop. Otherwise, this code is on the way to disable 2982 * the last instance, and the workload can stop. 2983 */ 2984 if (osnoise_has_registered_instances()) 2985 return; 2986 2987 /* 2988 * If callbacks were already disabled in a previous stop 2989 * call, there is no need to disable then again. 2990 * 2991 * For instance, this happens when tracing is stopped via: 2992 * echo 0 > tracing_on 2993 * echo nop > current_tracer. 2994 */ 2995 if (!trace_osnoise_callback_enabled) 2996 return; 2997 2998 trace_osnoise_callback_enabled = false; 2999 /* 3000 * Make sure that ftrace_nmi_enter/exit() see 3001 * trace_osnoise_callback_enabled as false before continuing. 3002 */ 3003 barrier(); 3004 3005 stop_per_cpu_kthreads(); 3006 3007 osnoise_unhook_events(); 3008 } 3009 3010 static void osnoise_tracer_start(struct trace_array *tr) 3011 { 3012 int retval; 3013 3014 /* 3015 * If the instance is already registered, there is no need to 3016 * register it again. 3017 */ 3018 if (osnoise_instance_registered(tr)) 3019 return; 3020 3021 retval = osnoise_workload_start(); 3022 if (retval) 3023 pr_err(BANNER "Error starting osnoise tracer\n"); 3024 3025 osnoise_register_instance(tr); 3026 } 3027 3028 static void osnoise_tracer_stop(struct trace_array *tr) 3029 { 3030 osnoise_unregister_instance(tr); 3031 osnoise_workload_stop(); 3032 } 3033 3034 static int osnoise_tracer_init(struct trace_array *tr) 3035 { 3036 /* 3037 * Only allow osnoise tracer if timerlat tracer is not running 3038 * already. 3039 */ 3040 if (timerlat_enabled()) 3041 return -EBUSY; 3042 3043 tr->max_latency = 0; 3044 3045 osnoise_tracer_start(tr); 3046 return 0; 3047 } 3048 3049 static void osnoise_tracer_reset(struct trace_array *tr) 3050 { 3051 osnoise_tracer_stop(tr); 3052 } 3053 3054 static struct tracer osnoise_tracer __read_mostly = { 3055 .name = "osnoise", 3056 .init = osnoise_tracer_init, 3057 .reset = osnoise_tracer_reset, 3058 .start = osnoise_tracer_start, 3059 .stop = osnoise_tracer_stop, 3060 .print_header = print_osnoise_headers, 3061 .allow_instances = true, 3062 }; 3063 3064 #ifdef CONFIG_TIMERLAT_TRACER 3065 static void timerlat_tracer_start(struct trace_array *tr) 3066 { 3067 int retval; 3068 3069 /* 3070 * If the instance is already registered, there is no need to 3071 * register it again. 3072 */ 3073 if (osnoise_instance_registered(tr)) 3074 return; 3075 3076 retval = osnoise_workload_start(); 3077 if (retval) 3078 pr_err(BANNER "Error starting timerlat tracer\n"); 3079 3080 osnoise_register_instance(tr); 3081 3082 return; 3083 } 3084 3085 static void timerlat_tracer_stop(struct trace_array *tr) 3086 { 3087 int cpu; 3088 3089 osnoise_unregister_instance(tr); 3090 3091 /* 3092 * Instruct the threads to stop only if this is the last instance. 3093 */ 3094 if (!osnoise_has_registered_instances()) { 3095 for_each_online_cpu(cpu) 3096 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 3097 } 3098 3099 osnoise_workload_stop(); 3100 } 3101 3102 static int timerlat_tracer_init(struct trace_array *tr) 3103 { 3104 /* 3105 * Only allow timerlat tracer if osnoise tracer is not running already. 3106 */ 3107 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 3108 return -EBUSY; 3109 3110 /* 3111 * If this is the first instance, set timerlat_tracer to block 3112 * osnoise tracer start. 3113 */ 3114 if (!osnoise_has_registered_instances()) 3115 osnoise_data.timerlat_tracer = 1; 3116 3117 tr->max_latency = 0; 3118 timerlat_tracer_start(tr); 3119 3120 return 0; 3121 } 3122 3123 static void timerlat_tracer_reset(struct trace_array *tr) 3124 { 3125 timerlat_tracer_stop(tr); 3126 3127 /* 3128 * If this is the last instance, reset timerlat_tracer allowing 3129 * osnoise to be started. 3130 */ 3131 if (!osnoise_has_registered_instances()) 3132 osnoise_data.timerlat_tracer = 0; 3133 } 3134 3135 static struct tracer timerlat_tracer __read_mostly = { 3136 .name = "timerlat", 3137 .init = timerlat_tracer_init, 3138 .reset = timerlat_tracer_reset, 3139 .start = timerlat_tracer_start, 3140 .stop = timerlat_tracer_stop, 3141 .print_header = print_timerlat_headers, 3142 .allow_instances = true, 3143 }; 3144 3145 __init static int init_timerlat_tracer(void) 3146 { 3147 return register_tracer(&timerlat_tracer); 3148 } 3149 #else /* CONFIG_TIMERLAT_TRACER */ 3150 __init static int init_timerlat_tracer(void) 3151 { 3152 return 0; 3153 } 3154 #endif /* CONFIG_TIMERLAT_TRACER */ 3155 3156 __init static int init_osnoise_tracer(void) 3157 { 3158 int ret; 3159 3160 mutex_init(&interface_lock); 3161 3162 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 3163 3164 ret = register_tracer(&osnoise_tracer); 3165 if (ret) { 3166 pr_err(BANNER "Error registering osnoise!\n"); 3167 return ret; 3168 } 3169 3170 ret = init_timerlat_tracer(); 3171 if (ret) { 3172 pr_err(BANNER "Error registering timerlat!\n"); 3173 return ret; 3174 } 3175 3176 osnoise_init_hotplug_support(); 3177 3178 INIT_LIST_HEAD_RCU(&osnoise_instances); 3179 3180 init_tracefs(); 3181 3182 return 0; 3183 } 3184 late_initcall(init_osnoise_tracer); 3185