1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include <linux/string.h> 28 #include "trace.h" 29 30 #ifdef CONFIG_X86_LOCAL_APIC 31 #include <asm/trace/irq_vectors.h> 32 #undef TRACE_INCLUDE_PATH 33 #undef TRACE_INCLUDE_FILE 34 #endif /* CONFIG_X86_LOCAL_APIC */ 35 36 #include <trace/events/irq.h> 37 #include <trace/events/sched.h> 38 39 #define CREATE_TRACE_POINTS 40 #include <trace/events/osnoise.h> 41 42 /* 43 * Default values. 44 */ 45 #define BANNER "osnoise: " 46 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 47 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 48 49 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 50 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 51 52 /* 53 * osnoise/options entries. 54 */ 55 enum osnoise_options_index { 56 OSN_DEFAULTS = 0, 57 OSN_WORKLOAD, 58 OSN_PANIC_ON_STOP, 59 OSN_PREEMPT_DISABLE, 60 OSN_IRQ_DISABLE, 61 OSN_MAX 62 }; 63 64 static const char * const osnoise_options_str[OSN_MAX] = { 65 "DEFAULTS", 66 "OSNOISE_WORKLOAD", 67 "PANIC_ON_STOP", 68 "OSNOISE_PREEMPT_DISABLE", 69 "OSNOISE_IRQ_DISABLE" }; 70 71 #define OSN_DEFAULT_OPTIONS 0x2 72 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 73 74 /* 75 * trace_array of the enabled osnoise/timerlat instances. 76 */ 77 struct osnoise_instance { 78 struct list_head list; 79 struct trace_array *tr; 80 }; 81 82 static struct list_head osnoise_instances; 83 84 static bool osnoise_has_registered_instances(void) 85 { 86 return !!list_first_or_null_rcu(&osnoise_instances, 87 struct osnoise_instance, 88 list); 89 } 90 91 /* 92 * osnoise_instance_registered - check if a tr is already registered 93 */ 94 static int osnoise_instance_registered(struct trace_array *tr) 95 { 96 struct osnoise_instance *inst; 97 int found = 0; 98 99 rcu_read_lock(); 100 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 101 if (inst->tr == tr) 102 found = 1; 103 } 104 rcu_read_unlock(); 105 106 return found; 107 } 108 109 /* 110 * osnoise_register_instance - register a new trace instance 111 * 112 * Register a trace_array *tr in the list of instances running 113 * osnoise/timerlat tracers. 114 */ 115 static int osnoise_register_instance(struct trace_array *tr) 116 { 117 struct osnoise_instance *inst; 118 119 /* 120 * register/unregister serialization is provided by trace's 121 * trace_types_lock. 122 */ 123 lockdep_assert_held(&trace_types_lock); 124 125 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 126 if (!inst) 127 return -ENOMEM; 128 129 INIT_LIST_HEAD_RCU(&inst->list); 130 inst->tr = tr; 131 list_add_tail_rcu(&inst->list, &osnoise_instances); 132 133 return 0; 134 } 135 136 /* 137 * osnoise_unregister_instance - unregister a registered trace instance 138 * 139 * Remove the trace_array *tr from the list of instances running 140 * osnoise/timerlat tracers. 141 */ 142 static void osnoise_unregister_instance(struct trace_array *tr) 143 { 144 struct osnoise_instance *inst; 145 int found = 0; 146 147 /* 148 * register/unregister serialization is provided by trace's 149 * trace_types_lock. 150 */ 151 list_for_each_entry_rcu(inst, &osnoise_instances, list, 152 lockdep_is_held(&trace_types_lock)) { 153 if (inst->tr == tr) { 154 list_del_rcu(&inst->list); 155 found = 1; 156 break; 157 } 158 } 159 160 if (!found) 161 return; 162 163 kvfree_rcu_mightsleep(inst); 164 } 165 166 /* 167 * NMI runtime info. 168 */ 169 struct osn_nmi { 170 u64 count; 171 u64 delta_start; 172 }; 173 174 /* 175 * IRQ runtime info. 176 */ 177 struct osn_irq { 178 u64 count; 179 u64 arrival_time; 180 u64 delta_start; 181 }; 182 183 #define IRQ_CONTEXT 0 184 #define THREAD_CONTEXT 1 185 #define THREAD_URET 2 186 /* 187 * sofirq runtime info. 188 */ 189 struct osn_softirq { 190 u64 count; 191 u64 arrival_time; 192 u64 delta_start; 193 }; 194 195 /* 196 * thread runtime info. 197 */ 198 struct osn_thread { 199 u64 count; 200 u64 arrival_time; 201 u64 delta_start; 202 }; 203 204 /* 205 * Runtime information: this structure saves the runtime information used by 206 * one sampling thread. 207 */ 208 struct osnoise_variables { 209 struct task_struct *kthread; 210 bool sampling; 211 pid_t pid; 212 struct osn_nmi nmi; 213 struct osn_irq irq; 214 struct osn_softirq softirq; 215 struct osn_thread thread; 216 local_t int_counter; 217 }; 218 219 /* 220 * Per-cpu runtime information. 221 */ 222 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 223 224 /* 225 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 226 */ 227 static inline struct osnoise_variables *this_cpu_osn_var(void) 228 { 229 return this_cpu_ptr(&per_cpu_osnoise_var); 230 } 231 232 /* 233 * Protect the interface. 234 */ 235 static struct mutex interface_lock; 236 237 #ifdef CONFIG_TIMERLAT_TRACER 238 /* 239 * Runtime information for the timer mode. 240 */ 241 struct timerlat_variables { 242 struct task_struct *kthread; 243 struct hrtimer timer; 244 u64 rel_period; 245 u64 abs_period; 246 bool tracing_thread; 247 u64 count; 248 bool uthread_migrate; 249 }; 250 251 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 252 253 /* 254 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 255 */ 256 static inline struct timerlat_variables *this_cpu_tmr_var(void) 257 { 258 return this_cpu_ptr(&per_cpu_timerlat_var); 259 } 260 261 /* 262 * tlat_var_reset - Reset the values of the given timerlat_variables 263 */ 264 static inline void tlat_var_reset(void) 265 { 266 struct timerlat_variables *tlat_var; 267 int cpu; 268 269 /* Synchronize with the timerlat interfaces */ 270 mutex_lock(&interface_lock); 271 /* 272 * So far, all the values are initialized as 0, so 273 * zeroing the structure is perfect. 274 */ 275 for_each_online_cpu(cpu) { 276 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 277 if (tlat_var->kthread) 278 hrtimer_cancel(&tlat_var->timer); 279 memset(tlat_var, 0, sizeof(*tlat_var)); 280 } 281 mutex_unlock(&interface_lock); 282 } 283 #else /* CONFIG_TIMERLAT_TRACER */ 284 #define tlat_var_reset() do {} while (0) 285 #endif /* CONFIG_TIMERLAT_TRACER */ 286 287 /* 288 * osn_var_reset - Reset the values of the given osnoise_variables 289 */ 290 static inline void osn_var_reset(void) 291 { 292 struct osnoise_variables *osn_var; 293 int cpu; 294 295 /* 296 * So far, all the values are initialized as 0, so 297 * zeroing the structure is perfect. 298 */ 299 for_each_online_cpu(cpu) { 300 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 301 memset(osn_var, 0, sizeof(*osn_var)); 302 } 303 } 304 305 /* 306 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 307 */ 308 static inline void osn_var_reset_all(void) 309 { 310 osn_var_reset(); 311 tlat_var_reset(); 312 } 313 314 /* 315 * Tells NMIs to call back to the osnoise tracer to record timestamps. 316 */ 317 bool trace_osnoise_callback_enabled; 318 319 /* 320 * Tracer data. 321 */ 322 static struct osnoise_data { 323 u64 sample_period; /* total sampling period */ 324 u64 sample_runtime; /* active sampling portion of period */ 325 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 326 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 327 #ifdef CONFIG_TIMERLAT_TRACER 328 u64 timerlat_period; /* timerlat period */ 329 u64 print_stack; /* print IRQ stack if total > */ 330 int timerlat_tracer; /* timerlat tracer */ 331 #endif 332 bool tainted; /* infor users and developers about a problem */ 333 } osnoise_data = { 334 .sample_period = DEFAULT_SAMPLE_PERIOD, 335 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 336 .stop_tracing = 0, 337 .stop_tracing_total = 0, 338 #ifdef CONFIG_TIMERLAT_TRACER 339 .print_stack = 0, 340 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 341 .timerlat_tracer = 0, 342 #endif 343 }; 344 345 #ifdef CONFIG_TIMERLAT_TRACER 346 static inline bool timerlat_enabled(void) 347 { 348 return osnoise_data.timerlat_tracer; 349 } 350 351 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 352 { 353 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 354 /* 355 * If the timerlat is enabled, but the irq handler did 356 * not run yet enabling timerlat_tracer, do not trace. 357 */ 358 if (!tlat_var->tracing_thread) { 359 osn_var->softirq.arrival_time = 0; 360 osn_var->softirq.delta_start = 0; 361 return 0; 362 } 363 return 1; 364 } 365 366 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 367 { 368 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 369 /* 370 * If the timerlat is enabled, but the irq handler did 371 * not run yet enabling timerlat_tracer, do not trace. 372 */ 373 if (!tlat_var->tracing_thread) { 374 osn_var->thread.delta_start = 0; 375 osn_var->thread.arrival_time = 0; 376 return 0; 377 } 378 return 1; 379 } 380 #else /* CONFIG_TIMERLAT_TRACER */ 381 static inline bool timerlat_enabled(void) 382 { 383 return false; 384 } 385 386 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 387 { 388 return 1; 389 } 390 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 391 { 392 return 1; 393 } 394 #endif 395 396 #ifdef CONFIG_PREEMPT_RT 397 /* 398 * Print the osnoise header info. 399 */ 400 static void print_osnoise_headers(struct seq_file *s) 401 { 402 if (osnoise_data.tainted) 403 seq_puts(s, "# osnoise is tainted!\n"); 404 405 seq_puts(s, "# _-------=> irqs-off\n"); 406 seq_puts(s, "# / _------=> need-resched\n"); 407 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 408 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 409 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 410 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 411 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 412 413 seq_puts(s, "# |||||| / "); 414 seq_puts(s, " MAX\n"); 415 416 seq_puts(s, "# ||||| / "); 417 seq_puts(s, " SINGLE Interference counters:\n"); 418 419 seq_puts(s, "# ||||||| RUNTIME "); 420 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 421 422 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 423 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 424 425 seq_puts(s, "# | | | ||||||| | | "); 426 seq_puts(s, " | | | | | | | |\n"); 427 } 428 #else /* CONFIG_PREEMPT_RT */ 429 static void print_osnoise_headers(struct seq_file *s) 430 { 431 if (osnoise_data.tainted) 432 seq_puts(s, "# osnoise is tainted!\n"); 433 434 seq_puts(s, "# _-----=> irqs-off\n"); 435 seq_puts(s, "# / _----=> need-resched\n"); 436 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 437 seq_puts(s, "# || / _--=> preempt-depth\n"); 438 seq_puts(s, "# ||| / _-=> migrate-disable "); 439 seq_puts(s, " MAX\n"); 440 seq_puts(s, "# |||| / delay "); 441 seq_puts(s, " SINGLE Interference counters:\n"); 442 443 seq_puts(s, "# ||||| RUNTIME "); 444 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 445 446 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 447 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 448 449 seq_puts(s, "# | | | ||||| | | "); 450 seq_puts(s, " | | | | | | | |\n"); 451 } 452 #endif /* CONFIG_PREEMPT_RT */ 453 454 /* 455 * osnoise_taint - report an osnoise error. 456 */ 457 #define osnoise_taint(msg) ({ \ 458 struct osnoise_instance *inst; \ 459 struct trace_buffer *buffer; \ 460 \ 461 rcu_read_lock(); \ 462 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 463 buffer = inst->tr->array_buffer.buffer; \ 464 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 465 } \ 466 rcu_read_unlock(); \ 467 osnoise_data.tainted = true; \ 468 }) 469 470 /* 471 * Record an osnoise_sample into the tracer buffer. 472 */ 473 static void 474 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 475 { 476 struct ring_buffer_event *event; 477 struct osnoise_entry *entry; 478 479 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 480 tracing_gen_ctx()); 481 if (!event) 482 return; 483 entry = ring_buffer_event_data(event); 484 entry->runtime = sample->runtime; 485 entry->noise = sample->noise; 486 entry->max_sample = sample->max_sample; 487 entry->hw_count = sample->hw_count; 488 entry->nmi_count = sample->nmi_count; 489 entry->irq_count = sample->irq_count; 490 entry->softirq_count = sample->softirq_count; 491 entry->thread_count = sample->thread_count; 492 493 trace_buffer_unlock_commit_nostack(buffer, event); 494 } 495 496 /* 497 * Record an osnoise_sample on all osnoise instances and fire trace event. 498 */ 499 static void record_osnoise_sample(struct osnoise_sample *sample) 500 { 501 struct osnoise_instance *inst; 502 struct trace_buffer *buffer; 503 504 trace_osnoise_sample(sample); 505 506 rcu_read_lock(); 507 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 508 buffer = inst->tr->array_buffer.buffer; 509 __record_osnoise_sample(sample, buffer); 510 } 511 rcu_read_unlock(); 512 } 513 514 #ifdef CONFIG_TIMERLAT_TRACER 515 /* 516 * Print the timerlat header info. 517 */ 518 #ifdef CONFIG_PREEMPT_RT 519 static void print_timerlat_headers(struct seq_file *s) 520 { 521 seq_puts(s, "# _-------=> irqs-off\n"); 522 seq_puts(s, "# / _------=> need-resched\n"); 523 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 524 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 525 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 526 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 527 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 528 seq_puts(s, "# |||||| /\n"); 529 seq_puts(s, "# ||||||| ACTIVATION\n"); 530 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 531 seq_puts(s, " CONTEXT LATENCY\n"); 532 seq_puts(s, "# | | | ||||||| | | "); 533 seq_puts(s, " | |\n"); 534 } 535 #else /* CONFIG_PREEMPT_RT */ 536 static void print_timerlat_headers(struct seq_file *s) 537 { 538 seq_puts(s, "# _-----=> irqs-off\n"); 539 seq_puts(s, "# / _----=> need-resched\n"); 540 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 541 seq_puts(s, "# || / _--=> preempt-depth\n"); 542 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 543 seq_puts(s, "# |||| / delay\n"); 544 seq_puts(s, "# ||||| ACTIVATION\n"); 545 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 546 seq_puts(s, " CONTEXT LATENCY\n"); 547 seq_puts(s, "# | | | ||||| | | "); 548 seq_puts(s, " | |\n"); 549 } 550 #endif /* CONFIG_PREEMPT_RT */ 551 552 static void 553 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 554 { 555 struct ring_buffer_event *event; 556 struct timerlat_entry *entry; 557 558 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 559 tracing_gen_ctx()); 560 if (!event) 561 return; 562 entry = ring_buffer_event_data(event); 563 entry->seqnum = sample->seqnum; 564 entry->context = sample->context; 565 entry->timer_latency = sample->timer_latency; 566 567 trace_buffer_unlock_commit_nostack(buffer, event); 568 } 569 570 /* 571 * Record an timerlat_sample into the tracer buffer. 572 */ 573 static void record_timerlat_sample(struct timerlat_sample *sample) 574 { 575 struct osnoise_instance *inst; 576 struct trace_buffer *buffer; 577 578 trace_timerlat_sample(sample); 579 580 rcu_read_lock(); 581 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 582 buffer = inst->tr->array_buffer.buffer; 583 __record_timerlat_sample(sample, buffer); 584 } 585 rcu_read_unlock(); 586 } 587 588 #ifdef CONFIG_STACKTRACE 589 590 #define MAX_CALLS 256 591 592 /* 593 * Stack trace will take place only at IRQ level, so, no need 594 * to control nesting here. 595 */ 596 struct trace_stack { 597 int stack_size; 598 int nr_entries; 599 unsigned long calls[MAX_CALLS]; 600 }; 601 602 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 603 604 /* 605 * timerlat_save_stack - save a stack trace without printing 606 * 607 * Save the current stack trace without printing. The 608 * stack will be printed later, after the end of the measurement. 609 */ 610 static void timerlat_save_stack(int skip) 611 { 612 unsigned int size, nr_entries; 613 struct trace_stack *fstack; 614 615 fstack = this_cpu_ptr(&trace_stack); 616 617 size = ARRAY_SIZE(fstack->calls); 618 619 nr_entries = stack_trace_save(fstack->calls, size, skip); 620 621 fstack->stack_size = nr_entries * sizeof(unsigned long); 622 fstack->nr_entries = nr_entries; 623 624 return; 625 626 } 627 628 static void 629 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 630 { 631 struct ring_buffer_event *event; 632 struct stack_entry *entry; 633 634 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 635 tracing_gen_ctx()); 636 if (!event) 637 return; 638 639 entry = ring_buffer_event_data(event); 640 641 entry->size = fstack->nr_entries; 642 memcpy(&entry->caller, fstack->calls, size); 643 644 trace_buffer_unlock_commit_nostack(buffer, event); 645 } 646 647 /* 648 * timerlat_dump_stack - dump a stack trace previously saved 649 */ 650 static void timerlat_dump_stack(u64 latency) 651 { 652 struct osnoise_instance *inst; 653 struct trace_buffer *buffer; 654 struct trace_stack *fstack; 655 unsigned int size; 656 657 /* 658 * trace only if latency > print_stack config, if enabled. 659 */ 660 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 661 return; 662 663 preempt_disable_notrace(); 664 fstack = this_cpu_ptr(&trace_stack); 665 size = fstack->stack_size; 666 667 rcu_read_lock(); 668 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 669 buffer = inst->tr->array_buffer.buffer; 670 __timerlat_dump_stack(buffer, fstack, size); 671 672 } 673 rcu_read_unlock(); 674 preempt_enable_notrace(); 675 } 676 #else /* CONFIG_STACKTRACE */ 677 #define timerlat_dump_stack(u64 latency) do {} while (0) 678 #define timerlat_save_stack(a) do {} while (0) 679 #endif /* CONFIG_STACKTRACE */ 680 #endif /* CONFIG_TIMERLAT_TRACER */ 681 682 /* 683 * Macros to encapsulate the time capturing infrastructure. 684 */ 685 #define time_get() trace_clock_local() 686 #define time_to_us(x) div_u64(x, 1000) 687 #define time_sub(a, b) ((a) - (b)) 688 689 /* 690 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 691 * 692 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 693 * to discount the NMI interference. 694 * 695 * See get_int_safe_duration(). 696 */ 697 static inline void 698 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 699 { 700 if (osn_var->irq.delta_start) 701 osn_var->irq.delta_start += duration; 702 } 703 704 #ifndef CONFIG_PREEMPT_RT 705 /* 706 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 707 * 708 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 709 * forward to discount the interference. 710 * 711 * See get_int_safe_duration(). 712 */ 713 static inline void 714 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 715 { 716 if (osn_var->softirq.delta_start) 717 osn_var->softirq.delta_start += duration; 718 } 719 #else /* CONFIG_PREEMPT_RT */ 720 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 721 #endif 722 723 /* 724 * cond_move_thread_delta_start - Forward the delta_start of a running thread 725 * 726 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 727 * is pushed forward to discount the interference. 728 * 729 * See get_int_safe_duration(). 730 */ 731 static inline void 732 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 733 { 734 if (osn_var->thread.delta_start) 735 osn_var->thread.delta_start += duration; 736 } 737 738 /* 739 * get_int_safe_duration - Get the duration of a window 740 * 741 * The irq, softirq and thread varaibles need to have its duration without 742 * the interference from higher priority interrupts. Instead of keeping a 743 * variable to discount the interrupt interference from these variables, the 744 * starting time of these variables are pushed forward with the interrupt's 745 * duration. In this way, a single variable is used to: 746 * 747 * - Know if a given window is being measured. 748 * - Account its duration. 749 * - Discount the interference. 750 * 751 * To avoid getting inconsistent values, e.g.,: 752 * 753 * now = time_get() 754 * ---> interrupt! 755 * delta_start -= int duration; 756 * <--- 757 * duration = now - delta_start; 758 * 759 * result: negative duration if the variable duration before the 760 * interrupt was smaller than the interrupt execution. 761 * 762 * A counter of interrupts is used. If the counter increased, try 763 * to capture an interference safe duration. 764 */ 765 static inline s64 766 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 767 { 768 u64 int_counter, now; 769 s64 duration; 770 771 do { 772 int_counter = local_read(&osn_var->int_counter); 773 /* synchronize with interrupts */ 774 barrier(); 775 776 now = time_get(); 777 duration = (now - *delta_start); 778 779 /* synchronize with interrupts */ 780 barrier(); 781 } while (int_counter != local_read(&osn_var->int_counter)); 782 783 /* 784 * This is an evidence of race conditions that cause 785 * a value to be "discounted" too much. 786 */ 787 if (duration < 0) 788 osnoise_taint("Negative duration!\n"); 789 790 *delta_start = 0; 791 792 return duration; 793 } 794 795 /* 796 * 797 * set_int_safe_time - Save the current time on *time, aware of interference 798 * 799 * Get the time, taking into consideration a possible interference from 800 * higher priority interrupts. 801 * 802 * See get_int_safe_duration() for an explanation. 803 */ 804 static u64 805 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 806 { 807 u64 int_counter; 808 809 do { 810 int_counter = local_read(&osn_var->int_counter); 811 /* synchronize with interrupts */ 812 barrier(); 813 814 *time = time_get(); 815 816 /* synchronize with interrupts */ 817 barrier(); 818 } while (int_counter != local_read(&osn_var->int_counter)); 819 820 return int_counter; 821 } 822 823 #ifdef CONFIG_TIMERLAT_TRACER 824 /* 825 * copy_int_safe_time - Copy *src into *desc aware of interference 826 */ 827 static u64 828 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 829 { 830 u64 int_counter; 831 832 do { 833 int_counter = local_read(&osn_var->int_counter); 834 /* synchronize with interrupts */ 835 barrier(); 836 837 *dst = *src; 838 839 /* synchronize with interrupts */ 840 barrier(); 841 } while (int_counter != local_read(&osn_var->int_counter)); 842 843 return int_counter; 844 } 845 #endif /* CONFIG_TIMERLAT_TRACER */ 846 847 /* 848 * trace_osnoise_callback - NMI entry/exit callback 849 * 850 * This function is called at the entry and exit NMI code. The bool enter 851 * distinguishes between either case. This function is used to note a NMI 852 * occurrence, compute the noise caused by the NMI, and to remove the noise 853 * it is potentially causing on other interference variables. 854 */ 855 void trace_osnoise_callback(bool enter) 856 { 857 struct osnoise_variables *osn_var = this_cpu_osn_var(); 858 u64 duration; 859 860 if (!osn_var->sampling) 861 return; 862 863 /* 864 * Currently trace_clock_local() calls sched_clock() and the 865 * generic version is not NMI safe. 866 */ 867 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 868 if (enter) { 869 osn_var->nmi.delta_start = time_get(); 870 local_inc(&osn_var->int_counter); 871 } else { 872 duration = time_get() - osn_var->nmi.delta_start; 873 874 trace_nmi_noise(osn_var->nmi.delta_start, duration); 875 876 cond_move_irq_delta_start(osn_var, duration); 877 cond_move_softirq_delta_start(osn_var, duration); 878 cond_move_thread_delta_start(osn_var, duration); 879 } 880 } 881 882 if (enter) 883 osn_var->nmi.count++; 884 } 885 886 /* 887 * osnoise_trace_irq_entry - Note the starting of an IRQ 888 * 889 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 890 * it is safe to use a single variable (ons_var->irq) to save the statistics. 891 * The arrival_time is used to report... the arrival time. The delta_start 892 * is used to compute the duration at the IRQ exit handler. See 893 * cond_move_irq_delta_start(). 894 */ 895 void osnoise_trace_irq_entry(int id) 896 { 897 struct osnoise_variables *osn_var = this_cpu_osn_var(); 898 899 if (!osn_var->sampling) 900 return; 901 /* 902 * This value will be used in the report, but not to compute 903 * the execution time, so it is safe to get it unsafe. 904 */ 905 osn_var->irq.arrival_time = time_get(); 906 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 907 osn_var->irq.count++; 908 909 local_inc(&osn_var->int_counter); 910 } 911 912 /* 913 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 914 * 915 * Computes the duration of the IRQ noise, and trace it. Also discounts the 916 * interference from other sources of noise could be currently being accounted. 917 */ 918 void osnoise_trace_irq_exit(int id, const char *desc) 919 { 920 struct osnoise_variables *osn_var = this_cpu_osn_var(); 921 s64 duration; 922 923 if (!osn_var->sampling) 924 return; 925 926 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 927 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 928 osn_var->irq.arrival_time = 0; 929 cond_move_softirq_delta_start(osn_var, duration); 930 cond_move_thread_delta_start(osn_var, duration); 931 } 932 933 /* 934 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 935 * 936 * Used to note the starting of an IRQ occurece. 937 */ 938 static void trace_irqentry_callback(void *data, int irq, 939 struct irqaction *action) 940 { 941 osnoise_trace_irq_entry(irq); 942 } 943 944 /* 945 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 946 * 947 * Used to note the end of an IRQ occurece. 948 */ 949 static void trace_irqexit_callback(void *data, int irq, 950 struct irqaction *action, int ret) 951 { 952 osnoise_trace_irq_exit(irq, action->name); 953 } 954 955 /* 956 * arch specific register function. 957 */ 958 int __weak osnoise_arch_register(void) 959 { 960 return 0; 961 } 962 963 /* 964 * arch specific unregister function. 965 */ 966 void __weak osnoise_arch_unregister(void) 967 { 968 return; 969 } 970 971 /* 972 * hook_irq_events - Hook IRQ handling events 973 * 974 * This function hooks the IRQ related callbacks to the respective trace 975 * events. 976 */ 977 static int hook_irq_events(void) 978 { 979 int ret; 980 981 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 982 if (ret) 983 goto out_err; 984 985 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 986 if (ret) 987 goto out_unregister_entry; 988 989 ret = osnoise_arch_register(); 990 if (ret) 991 goto out_irq_exit; 992 993 return 0; 994 995 out_irq_exit: 996 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 997 out_unregister_entry: 998 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 999 out_err: 1000 return -EINVAL; 1001 } 1002 1003 /* 1004 * unhook_irq_events - Unhook IRQ handling events 1005 * 1006 * This function unhooks the IRQ related callbacks to the respective trace 1007 * events. 1008 */ 1009 static void unhook_irq_events(void) 1010 { 1011 osnoise_arch_unregister(); 1012 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1013 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1014 } 1015 1016 #ifndef CONFIG_PREEMPT_RT 1017 /* 1018 * trace_softirq_entry_callback - Note the starting of a softirq 1019 * 1020 * Save the starting time of a softirq. As softirqs are non-preemptive to 1021 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1022 * to save the statistics. The arrival_time is used to report... the 1023 * arrival time. The delta_start is used to compute the duration at the 1024 * softirq exit handler. See cond_move_softirq_delta_start(). 1025 */ 1026 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1027 { 1028 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1029 1030 if (!osn_var->sampling) 1031 return; 1032 /* 1033 * This value will be used in the report, but not to compute 1034 * the execution time, so it is safe to get it unsafe. 1035 */ 1036 osn_var->softirq.arrival_time = time_get(); 1037 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1038 osn_var->softirq.count++; 1039 1040 local_inc(&osn_var->int_counter); 1041 } 1042 1043 /* 1044 * trace_softirq_exit_callback - Note the end of an softirq 1045 * 1046 * Computes the duration of the softirq noise, and trace it. Also discounts the 1047 * interference from other sources of noise could be currently being accounted. 1048 */ 1049 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1050 { 1051 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1052 s64 duration; 1053 1054 if (!osn_var->sampling) 1055 return; 1056 1057 if (unlikely(timerlat_enabled())) 1058 if (!timerlat_softirq_exit(osn_var)) 1059 return; 1060 1061 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1062 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1063 cond_move_thread_delta_start(osn_var, duration); 1064 osn_var->softirq.arrival_time = 0; 1065 } 1066 1067 /* 1068 * hook_softirq_events - Hook softirq handling events 1069 * 1070 * This function hooks the softirq related callbacks to the respective trace 1071 * events. 1072 */ 1073 static int hook_softirq_events(void) 1074 { 1075 int ret; 1076 1077 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1078 if (ret) 1079 goto out_err; 1080 1081 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1082 if (ret) 1083 goto out_unreg_entry; 1084 1085 return 0; 1086 1087 out_unreg_entry: 1088 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1089 out_err: 1090 return -EINVAL; 1091 } 1092 1093 /* 1094 * unhook_softirq_events - Unhook softirq handling events 1095 * 1096 * This function hooks the softirq related callbacks to the respective trace 1097 * events. 1098 */ 1099 static void unhook_softirq_events(void) 1100 { 1101 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1102 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1103 } 1104 #else /* CONFIG_PREEMPT_RT */ 1105 /* 1106 * softirq are threads on the PREEMPT_RT mode. 1107 */ 1108 static int hook_softirq_events(void) 1109 { 1110 return 0; 1111 } 1112 static void unhook_softirq_events(void) 1113 { 1114 } 1115 #endif 1116 1117 /* 1118 * thread_entry - Record the starting of a thread noise window 1119 * 1120 * It saves the context switch time for a noisy thread, and increments 1121 * the interference counters. 1122 */ 1123 static void 1124 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1125 { 1126 if (!osn_var->sampling) 1127 return; 1128 /* 1129 * The arrival time will be used in the report, but not to compute 1130 * the execution time, so it is safe to get it unsafe. 1131 */ 1132 osn_var->thread.arrival_time = time_get(); 1133 1134 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1135 1136 osn_var->thread.count++; 1137 local_inc(&osn_var->int_counter); 1138 } 1139 1140 /* 1141 * thread_exit - Report the end of a thread noise window 1142 * 1143 * It computes the total noise from a thread, tracing if needed. 1144 */ 1145 static void 1146 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1147 { 1148 s64 duration; 1149 1150 if (!osn_var->sampling) 1151 return; 1152 1153 if (unlikely(timerlat_enabled())) 1154 if (!timerlat_thread_exit(osn_var)) 1155 return; 1156 1157 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1158 1159 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1160 1161 osn_var->thread.arrival_time = 0; 1162 } 1163 1164 #ifdef CONFIG_TIMERLAT_TRACER 1165 /* 1166 * osnoise_stop_exception - Stop tracing and the tracer. 1167 */ 1168 static __always_inline void osnoise_stop_exception(char *msg, int cpu) 1169 { 1170 struct osnoise_instance *inst; 1171 struct trace_array *tr; 1172 1173 rcu_read_lock(); 1174 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1175 tr = inst->tr; 1176 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1177 "stop tracing hit on cpu %d due to exception: %s\n", 1178 smp_processor_id(), 1179 msg); 1180 1181 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1182 panic("tracer hit on cpu %d due to exception: %s\n", 1183 smp_processor_id(), 1184 msg); 1185 1186 tracer_tracing_off(tr); 1187 } 1188 rcu_read_unlock(); 1189 } 1190 1191 /* 1192 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler 1193 * 1194 * his function is hooked to the sched:sched_migrate_task trace event, and monitors 1195 * timerlat user-space thread migration. 1196 */ 1197 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu) 1198 { 1199 struct osnoise_variables *osn_var; 1200 long cpu = task_cpu(p); 1201 1202 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 1203 if (osn_var->pid == p->pid && dest_cpu != cpu) { 1204 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 1205 osnoise_taint("timerlat user-thread migrated\n"); 1206 osnoise_stop_exception("timerlat user-thread migrated", cpu); 1207 } 1208 } 1209 1210 static bool monitor_enabled; 1211 1212 static int register_migration_monitor(void) 1213 { 1214 int ret = 0; 1215 1216 /* 1217 * Timerlat thread migration check is only required when running timerlat in user-space. 1218 * Thus, enable callback only if timerlat is set with no workload. 1219 */ 1220 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) { 1221 if (WARN_ON_ONCE(monitor_enabled)) 1222 return 0; 1223 1224 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1225 if (!ret) 1226 monitor_enabled = true; 1227 } 1228 1229 return ret; 1230 } 1231 1232 static void unregister_migration_monitor(void) 1233 { 1234 if (!monitor_enabled) 1235 return; 1236 1237 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1238 monitor_enabled = false; 1239 } 1240 #else 1241 static int register_migration_monitor(void) 1242 { 1243 return 0; 1244 } 1245 static void unregister_migration_monitor(void) {} 1246 #endif 1247 /* 1248 * trace_sched_switch - sched:sched_switch trace event handler 1249 * 1250 * This function is hooked to the sched:sched_switch trace event, and it is 1251 * used to record the beginning and to report the end of a thread noise window. 1252 */ 1253 static void 1254 trace_sched_switch_callback(void *data, bool preempt, 1255 struct task_struct *p, 1256 struct task_struct *n, 1257 unsigned int prev_state) 1258 { 1259 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1260 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1261 1262 if ((p->pid != osn_var->pid) || !workload) 1263 thread_exit(osn_var, p); 1264 1265 if ((n->pid != osn_var->pid) || !workload) 1266 thread_entry(osn_var, n); 1267 } 1268 1269 /* 1270 * hook_thread_events - Hook the instrumentation for thread noise 1271 * 1272 * Hook the osnoise tracer callbacks to handle the noise from other 1273 * threads on the necessary kernel events. 1274 */ 1275 static int hook_thread_events(void) 1276 { 1277 int ret; 1278 1279 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1280 if (ret) 1281 return -EINVAL; 1282 1283 ret = register_migration_monitor(); 1284 if (ret) 1285 goto out_unreg; 1286 1287 return 0; 1288 1289 out_unreg: 1290 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1291 return -EINVAL; 1292 } 1293 1294 /* 1295 * unhook_thread_events - unhook the instrumentation for thread noise 1296 * 1297 * Unook the osnoise tracer callbacks to handle the noise from other 1298 * threads on the necessary kernel events. 1299 */ 1300 static void unhook_thread_events(void) 1301 { 1302 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1303 unregister_migration_monitor(); 1304 } 1305 1306 /* 1307 * save_osn_sample_stats - Save the osnoise_sample statistics 1308 * 1309 * Save the osnoise_sample statistics before the sampling phase. These 1310 * values will be used later to compute the diff betwneen the statistics 1311 * before and after the osnoise sampling. 1312 */ 1313 static void 1314 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1315 { 1316 s->nmi_count = osn_var->nmi.count; 1317 s->irq_count = osn_var->irq.count; 1318 s->softirq_count = osn_var->softirq.count; 1319 s->thread_count = osn_var->thread.count; 1320 } 1321 1322 /* 1323 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1324 * 1325 * After a sample period, compute the difference on the osnoise_sample 1326 * statistics. The struct osnoise_sample *s contains the statistics saved via 1327 * save_osn_sample_stats() before the osnoise sampling. 1328 */ 1329 static void 1330 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1331 { 1332 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1333 s->irq_count = osn_var->irq.count - s->irq_count; 1334 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1335 s->thread_count = osn_var->thread.count - s->thread_count; 1336 } 1337 1338 /* 1339 * osnoise_stop_tracing - Stop tracing and the tracer. 1340 */ 1341 static __always_inline void osnoise_stop_tracing(void) 1342 { 1343 struct osnoise_instance *inst; 1344 struct trace_array *tr; 1345 1346 rcu_read_lock(); 1347 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1348 tr = inst->tr; 1349 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1350 "stop tracing hit on cpu %d\n", smp_processor_id()); 1351 1352 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1353 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1354 1355 tracer_tracing_off(tr); 1356 } 1357 rcu_read_unlock(); 1358 } 1359 1360 /* 1361 * osnoise_has_tracing_on - Check if there is at least one instance on 1362 */ 1363 static __always_inline int osnoise_has_tracing_on(void) 1364 { 1365 struct osnoise_instance *inst; 1366 int trace_is_on = 0; 1367 1368 rcu_read_lock(); 1369 list_for_each_entry_rcu(inst, &osnoise_instances, list) 1370 trace_is_on += tracer_tracing_is_on(inst->tr); 1371 rcu_read_unlock(); 1372 1373 return trace_is_on; 1374 } 1375 1376 /* 1377 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1378 */ 1379 static void notify_new_max_latency(u64 latency) 1380 { 1381 struct osnoise_instance *inst; 1382 struct trace_array *tr; 1383 1384 rcu_read_lock(); 1385 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1386 tr = inst->tr; 1387 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1388 tr->max_latency = latency; 1389 latency_fsnotify(tr); 1390 } 1391 } 1392 rcu_read_unlock(); 1393 } 1394 1395 /* 1396 * run_osnoise - Sample the time and look for osnoise 1397 * 1398 * Used to capture the time, looking for potential osnoise latency repeatedly. 1399 * Different from hwlat_detector, it is called with preemption and interrupts 1400 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1401 * osnoise sampling thread, as they would do with a regular thread. 1402 */ 1403 static int run_osnoise(void) 1404 { 1405 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1406 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1407 u64 start, sample, last_sample; 1408 u64 last_int_count, int_count; 1409 s64 noise = 0, max_noise = 0; 1410 s64 total, last_total = 0; 1411 struct osnoise_sample s; 1412 bool disable_preemption; 1413 unsigned int threshold; 1414 u64 runtime, stop_in; 1415 u64 sum_noise = 0; 1416 int hw_count = 0; 1417 int ret = -1; 1418 1419 /* 1420 * Disabling preemption is only required if IRQs are enabled, 1421 * and the options is set on. 1422 */ 1423 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1424 1425 /* 1426 * Considers the current thread as the workload. 1427 */ 1428 osn_var->pid = current->pid; 1429 1430 /* 1431 * Save the current stats for the diff 1432 */ 1433 save_osn_sample_stats(osn_var, &s); 1434 1435 /* 1436 * if threshold is 0, use the default value of 1 us. 1437 */ 1438 threshold = tracing_thresh ? : 1000; 1439 1440 /* 1441 * Apply PREEMPT and IRQ disabled options. 1442 */ 1443 if (disable_irq) 1444 local_irq_disable(); 1445 1446 if (disable_preemption) 1447 preempt_disable(); 1448 1449 /* 1450 * Make sure NMIs see sampling first 1451 */ 1452 osn_var->sampling = true; 1453 barrier(); 1454 1455 /* 1456 * Transform the *_us config to nanoseconds to avoid the 1457 * division on the main loop. 1458 */ 1459 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1460 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1461 1462 /* 1463 * Start timestemp 1464 */ 1465 start = time_get(); 1466 1467 /* 1468 * "previous" loop. 1469 */ 1470 last_int_count = set_int_safe_time(osn_var, &last_sample); 1471 1472 do { 1473 /* 1474 * Get sample! 1475 */ 1476 int_count = set_int_safe_time(osn_var, &sample); 1477 1478 noise = time_sub(sample, last_sample); 1479 1480 /* 1481 * This shouldn't happen. 1482 */ 1483 if (noise < 0) { 1484 osnoise_taint("negative noise!"); 1485 goto out; 1486 } 1487 1488 /* 1489 * Sample runtime. 1490 */ 1491 total = time_sub(sample, start); 1492 1493 /* 1494 * Check for possible overflows. 1495 */ 1496 if (total < last_total) { 1497 osnoise_taint("total overflow!"); 1498 break; 1499 } 1500 1501 last_total = total; 1502 1503 if (noise >= threshold) { 1504 int interference = int_count - last_int_count; 1505 1506 if (noise > max_noise) 1507 max_noise = noise; 1508 1509 if (!interference) 1510 hw_count++; 1511 1512 sum_noise += noise; 1513 1514 trace_sample_threshold(last_sample, noise, interference); 1515 1516 if (osnoise_data.stop_tracing) 1517 if (noise > stop_in) 1518 osnoise_stop_tracing(); 1519 } 1520 1521 /* 1522 * In some cases, notably when running on a nohz_full CPU with 1523 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to 1524 * account for QSs. This will eventually cause unwarranted 1525 * noise as RCU forces preemption as the means of ending the 1526 * current grace period. We avoid this by calling 1527 * rcu_momentary_eqs(), which performs a zero duration EQS 1528 * allowing RCU to end the current grace period. This call 1529 * shouldn't be wrapped inside an RCU critical section. 1530 * 1531 * Normally QSs for other cases are handled through cond_resched(). 1532 * For simplicity, however, we call rcu_momentary_eqs() for all 1533 * configurations here. 1534 */ 1535 if (!disable_irq) 1536 local_irq_disable(); 1537 1538 rcu_momentary_eqs(); 1539 1540 if (!disable_irq) 1541 local_irq_enable(); 1542 1543 /* 1544 * For the non-preemptive kernel config: let threads runs, if 1545 * they so wish, unless set not do to so. 1546 */ 1547 if (!disable_irq && !disable_preemption) 1548 cond_resched(); 1549 1550 last_sample = sample; 1551 last_int_count = int_count; 1552 1553 } while (total < runtime && !kthread_should_stop()); 1554 1555 /* 1556 * Finish the above in the view for interrupts. 1557 */ 1558 barrier(); 1559 1560 osn_var->sampling = false; 1561 1562 /* 1563 * Make sure sampling data is no longer updated. 1564 */ 1565 barrier(); 1566 1567 /* 1568 * Return to the preemptive state. 1569 */ 1570 if (disable_preemption) 1571 preempt_enable(); 1572 1573 if (disable_irq) 1574 local_irq_enable(); 1575 1576 /* 1577 * Save noise info. 1578 */ 1579 s.noise = time_to_us(sum_noise); 1580 s.runtime = time_to_us(total); 1581 s.max_sample = time_to_us(max_noise); 1582 s.hw_count = hw_count; 1583 1584 /* Save interference stats info */ 1585 diff_osn_sample_stats(osn_var, &s); 1586 1587 record_osnoise_sample(&s); 1588 1589 notify_new_max_latency(max_noise); 1590 1591 if (osnoise_data.stop_tracing_total) 1592 if (s.noise > osnoise_data.stop_tracing_total) 1593 osnoise_stop_tracing(); 1594 1595 return 0; 1596 out: 1597 return ret; 1598 } 1599 1600 static struct cpumask osnoise_cpumask; 1601 static struct cpumask save_cpumask; 1602 static struct cpumask kthread_cpumask; 1603 1604 /* 1605 * osnoise_sleep - sleep until the next period 1606 */ 1607 static void osnoise_sleep(bool skip_period) 1608 { 1609 u64 interval; 1610 ktime_t wake_time; 1611 1612 mutex_lock(&interface_lock); 1613 if (skip_period) 1614 interval = osnoise_data.sample_period; 1615 else 1616 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1617 mutex_unlock(&interface_lock); 1618 1619 /* 1620 * differently from hwlat_detector, the osnoise tracer can run 1621 * without a pause because preemption is on. 1622 */ 1623 if (!interval) { 1624 /* Let synchronize_rcu_tasks() make progress */ 1625 cond_resched_tasks_rcu_qs(); 1626 return; 1627 } 1628 1629 wake_time = ktime_add_us(ktime_get(), interval); 1630 __set_current_state(TASK_INTERRUPTIBLE); 1631 1632 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { 1633 if (kthread_should_stop()) 1634 break; 1635 } 1636 } 1637 1638 /* 1639 * osnoise_migration_pending - checks if the task needs to migrate 1640 * 1641 * osnoise/timerlat threads are per-cpu. If there is a pending request to 1642 * migrate the thread away from the current CPU, something bad has happened. 1643 * Play the good citizen and leave. 1644 * 1645 * Returns 0 if it is safe to continue, 1 otherwise. 1646 */ 1647 static inline int osnoise_migration_pending(void) 1648 { 1649 if (!current->migration_pending) 1650 return 0; 1651 1652 /* 1653 * If migration is pending, there is a task waiting for the 1654 * tracer to enable migration. The tracer does not allow migration, 1655 * thus: taint and leave to unblock the blocked thread. 1656 */ 1657 osnoise_taint("migration requested to osnoise threads, leaving."); 1658 1659 /* 1660 * Unset this thread from the threads managed by the interface. 1661 * The tracers are responsible for cleaning their env before 1662 * exiting. 1663 */ 1664 mutex_lock(&interface_lock); 1665 this_cpu_osn_var()->kthread = NULL; 1666 cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); 1667 mutex_unlock(&interface_lock); 1668 1669 return 1; 1670 } 1671 1672 /* 1673 * osnoise_main - The osnoise detection kernel thread 1674 * 1675 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1676 * every period. 1677 */ 1678 static int osnoise_main(void *data) 1679 { 1680 unsigned long flags; 1681 1682 /* 1683 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1684 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1685 * 1686 * To work around this limitation, disable migration and remove the 1687 * flag. 1688 */ 1689 migrate_disable(); 1690 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1691 current->flags &= ~(PF_NO_SETAFFINITY); 1692 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1693 1694 while (!kthread_should_stop()) { 1695 if (osnoise_migration_pending()) 1696 break; 1697 1698 /* skip a period if tracing is off on all instances */ 1699 if (!osnoise_has_tracing_on()) { 1700 osnoise_sleep(true); 1701 continue; 1702 } 1703 1704 run_osnoise(); 1705 osnoise_sleep(false); 1706 } 1707 1708 migrate_enable(); 1709 return 0; 1710 } 1711 1712 #ifdef CONFIG_TIMERLAT_TRACER 1713 /* 1714 * timerlat_irq - hrtimer handler for timerlat. 1715 */ 1716 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1717 { 1718 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1719 struct timerlat_variables *tlat; 1720 struct timerlat_sample s; 1721 u64 now; 1722 u64 diff; 1723 1724 /* 1725 * I am not sure if the timer was armed for this CPU. So, get 1726 * the timerlat struct from the timer itself, not from this 1727 * CPU. 1728 */ 1729 tlat = container_of(timer, struct timerlat_variables, timer); 1730 1731 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1732 1733 /* 1734 * Enable the osnoise: events for thread an softirq. 1735 */ 1736 tlat->tracing_thread = true; 1737 1738 osn_var->thread.arrival_time = time_get(); 1739 1740 /* 1741 * A hardirq is running: the timer IRQ. It is for sure preempting 1742 * a thread, and potentially preempting a softirq. 1743 * 1744 * At this point, it is not interesting to know the duration of the 1745 * preempted thread (and maybe softirq), but how much time they will 1746 * delay the beginning of the execution of the timer thread. 1747 * 1748 * To get the correct (net) delay added by the softirq, its delta_start 1749 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1750 * start of the sofitrq will be zeroed, accounting then only the time 1751 * after that. 1752 * 1753 * The thread follows the same principle. However, if a softirq is 1754 * running, the thread needs to receive the softirq delta_start. The 1755 * reason being is that the softirq will be the last to be unfolded, 1756 * resseting the thread delay to zero. 1757 * 1758 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1759 * on RT, moving the thread is enough. 1760 */ 1761 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1762 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1763 &osn_var->softirq.delta_start); 1764 1765 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1766 &osn_var->irq.delta_start); 1767 } else { 1768 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1769 &osn_var->irq.delta_start); 1770 } 1771 1772 /* 1773 * Compute the current time with the expected time. 1774 */ 1775 diff = now - tlat->abs_period; 1776 1777 tlat->count++; 1778 s.seqnum = tlat->count; 1779 s.timer_latency = diff; 1780 s.context = IRQ_CONTEXT; 1781 1782 record_timerlat_sample(&s); 1783 1784 if (osnoise_data.stop_tracing) { 1785 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1786 1787 /* 1788 * At this point, if stop_tracing is set and <= print_stack, 1789 * print_stack is set and would be printed in the thread handler. 1790 * 1791 * Thus, print the stack trace as it is helpful to define the 1792 * root cause of an IRQ latency. 1793 */ 1794 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1795 timerlat_save_stack(0); 1796 timerlat_dump_stack(time_to_us(diff)); 1797 } 1798 1799 osnoise_stop_tracing(); 1800 notify_new_max_latency(diff); 1801 1802 wake_up_process(tlat->kthread); 1803 1804 return HRTIMER_NORESTART; 1805 } 1806 } 1807 1808 wake_up_process(tlat->kthread); 1809 1810 if (osnoise_data.print_stack) 1811 timerlat_save_stack(0); 1812 1813 return HRTIMER_NORESTART; 1814 } 1815 1816 /* 1817 * wait_next_period - Wait for the next period for timerlat 1818 */ 1819 static int wait_next_period(struct timerlat_variables *tlat) 1820 { 1821 ktime_t next_abs_period, now; 1822 u64 rel_period = osnoise_data.timerlat_period * 1000; 1823 1824 now = hrtimer_cb_get_time(&tlat->timer); 1825 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1826 1827 /* 1828 * Save the next abs_period. 1829 */ 1830 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1831 1832 /* 1833 * If the new abs_period is in the past, skip the activation. 1834 */ 1835 while (ktime_compare(now, next_abs_period) > 0) { 1836 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1837 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1838 } 1839 1840 set_current_state(TASK_INTERRUPTIBLE); 1841 1842 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1843 schedule(); 1844 return 1; 1845 } 1846 1847 /* 1848 * timerlat_main- Timerlat main 1849 */ 1850 static int timerlat_main(void *data) 1851 { 1852 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1853 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1854 struct timerlat_sample s; 1855 struct sched_param sp; 1856 unsigned long flags; 1857 u64 now, diff; 1858 1859 /* 1860 * Make the thread RT, that is how cyclictest is usually used. 1861 */ 1862 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1863 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1864 1865 /* 1866 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1867 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1868 * 1869 * To work around this limitation, disable migration and remove the 1870 * flag. 1871 */ 1872 migrate_disable(); 1873 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1874 current->flags &= ~(PF_NO_SETAFFINITY); 1875 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1876 1877 tlat->count = 0; 1878 tlat->tracing_thread = false; 1879 1880 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1881 tlat->kthread = current; 1882 osn_var->pid = current->pid; 1883 /* 1884 * Anotate the arrival time. 1885 */ 1886 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1887 1888 wait_next_period(tlat); 1889 1890 osn_var->sampling = 1; 1891 1892 while (!kthread_should_stop()) { 1893 1894 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1895 diff = now - tlat->abs_period; 1896 1897 s.seqnum = tlat->count; 1898 s.timer_latency = diff; 1899 s.context = THREAD_CONTEXT; 1900 1901 record_timerlat_sample(&s); 1902 1903 notify_new_max_latency(diff); 1904 1905 timerlat_dump_stack(time_to_us(diff)); 1906 1907 tlat->tracing_thread = false; 1908 if (osnoise_data.stop_tracing_total) 1909 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1910 osnoise_stop_tracing(); 1911 1912 if (osnoise_migration_pending()) 1913 break; 1914 1915 wait_next_period(tlat); 1916 } 1917 1918 hrtimer_cancel(&tlat->timer); 1919 migrate_enable(); 1920 return 0; 1921 } 1922 #else /* CONFIG_TIMERLAT_TRACER */ 1923 static int timerlat_main(void *data) 1924 { 1925 return 0; 1926 } 1927 #endif /* CONFIG_TIMERLAT_TRACER */ 1928 1929 /* 1930 * stop_kthread - stop a workload thread 1931 */ 1932 static void stop_kthread(unsigned int cpu) 1933 { 1934 struct task_struct *kthread; 1935 1936 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); 1937 if (kthread) { 1938 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && 1939 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { 1940 kthread_stop(kthread); 1941 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { 1942 /* 1943 * This is a user thread waiting on the timerlat_fd. We need 1944 * to close all users, and the best way to guarantee this is 1945 * by killing the thread. NOTE: this is a purpose specific file. 1946 */ 1947 kill_pid(kthread->thread_pid, SIGKILL, 1); 1948 put_task_struct(kthread); 1949 } 1950 } else { 1951 /* if no workload, just return */ 1952 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1953 /* 1954 * This is set in the osnoise tracer case. 1955 */ 1956 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1957 barrier(); 1958 } 1959 } 1960 } 1961 1962 /* 1963 * stop_per_cpu_kthread - Stop per-cpu threads 1964 * 1965 * Stop the osnoise sampling htread. Use this on unload and at system 1966 * shutdown. 1967 */ 1968 static void stop_per_cpu_kthreads(void) 1969 { 1970 int cpu; 1971 1972 cpus_read_lock(); 1973 1974 for_each_online_cpu(cpu) 1975 stop_kthread(cpu); 1976 1977 cpus_read_unlock(); 1978 } 1979 1980 /* 1981 * start_kthread - Start a workload tread 1982 */ 1983 static int start_kthread(unsigned int cpu) 1984 { 1985 struct task_struct *kthread; 1986 void *main = osnoise_main; 1987 char comm[24]; 1988 1989 /* Do not start a new thread if it is already running */ 1990 if (per_cpu(per_cpu_osnoise_var, cpu).kthread) 1991 return 0; 1992 1993 if (timerlat_enabled()) { 1994 snprintf(comm, 24, "timerlat/%d", cpu); 1995 main = timerlat_main; 1996 } else { 1997 /* if no workload, just return */ 1998 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1999 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 2000 barrier(); 2001 return 0; 2002 } 2003 snprintf(comm, 24, "osnoise/%d", cpu); 2004 } 2005 2006 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 2007 2008 if (IS_ERR(kthread)) { 2009 pr_err(BANNER "could not start sampling thread\n"); 2010 return -ENOMEM; 2011 } 2012 2013 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 2014 cpumask_set_cpu(cpu, &kthread_cpumask); 2015 2016 return 0; 2017 } 2018 2019 /* 2020 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 2021 * 2022 * This starts the kernel thread that will look for osnoise on many 2023 * cpus. 2024 */ 2025 static int start_per_cpu_kthreads(void) 2026 { 2027 struct cpumask *current_mask = &save_cpumask; 2028 int retval = 0; 2029 int cpu; 2030 2031 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2032 if (timerlat_enabled()) 2033 return 0; 2034 } 2035 2036 cpus_read_lock(); 2037 /* 2038 * Run only on online CPUs in which osnoise is allowed to run. 2039 */ 2040 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 2041 2042 for_each_possible_cpu(cpu) { 2043 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { 2044 struct task_struct *kthread; 2045 2046 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); 2047 if (!WARN_ON(!kthread)) 2048 kthread_stop(kthread); 2049 } 2050 } 2051 2052 for_each_cpu(cpu, current_mask) { 2053 retval = start_kthread(cpu); 2054 if (retval) { 2055 cpus_read_unlock(); 2056 stop_per_cpu_kthreads(); 2057 return retval; 2058 } 2059 } 2060 2061 cpus_read_unlock(); 2062 2063 return retval; 2064 } 2065 2066 #ifdef CONFIG_HOTPLUG_CPU 2067 static void osnoise_hotplug_workfn(struct work_struct *dummy) 2068 { 2069 unsigned int cpu = smp_processor_id(); 2070 2071 guard(mutex)(&trace_types_lock); 2072 2073 if (!osnoise_has_registered_instances()) 2074 return; 2075 2076 guard(mutex)(&interface_lock); 2077 guard(cpus_read_lock)(); 2078 2079 if (!cpu_online(cpu)) 2080 return; 2081 2082 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 2083 return; 2084 2085 start_kthread(cpu); 2086 } 2087 2088 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 2089 2090 /* 2091 * osnoise_cpu_init - CPU hotplug online callback function 2092 */ 2093 static int osnoise_cpu_init(unsigned int cpu) 2094 { 2095 schedule_work_on(cpu, &osnoise_hotplug_work); 2096 return 0; 2097 } 2098 2099 /* 2100 * osnoise_cpu_die - CPU hotplug offline callback function 2101 */ 2102 static int osnoise_cpu_die(unsigned int cpu) 2103 { 2104 stop_kthread(cpu); 2105 return 0; 2106 } 2107 2108 static void osnoise_init_hotplug_support(void) 2109 { 2110 int ret; 2111 2112 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 2113 osnoise_cpu_init, osnoise_cpu_die); 2114 if (ret < 0) 2115 pr_warn(BANNER "Error to init cpu hotplug support\n"); 2116 2117 return; 2118 } 2119 #else /* CONFIG_HOTPLUG_CPU */ 2120 static void osnoise_init_hotplug_support(void) 2121 { 2122 return; 2123 } 2124 #endif /* CONFIG_HOTPLUG_CPU */ 2125 2126 /* 2127 * seq file functions for the osnoise/options file. 2128 */ 2129 static void *s_options_start(struct seq_file *s, loff_t *pos) 2130 { 2131 int option = *pos; 2132 2133 mutex_lock(&interface_lock); 2134 2135 if (option >= OSN_MAX) 2136 return NULL; 2137 2138 return pos; 2139 } 2140 2141 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 2142 { 2143 int option = ++(*pos); 2144 2145 if (option >= OSN_MAX) 2146 return NULL; 2147 2148 return pos; 2149 } 2150 2151 static int s_options_show(struct seq_file *s, void *v) 2152 { 2153 loff_t *pos = v; 2154 int option = *pos; 2155 2156 if (option == OSN_DEFAULTS) { 2157 if (osnoise_options == OSN_DEFAULT_OPTIONS) 2158 seq_printf(s, "%s", osnoise_options_str[option]); 2159 else 2160 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2161 goto out; 2162 } 2163 2164 if (test_bit(option, &osnoise_options)) 2165 seq_printf(s, "%s", osnoise_options_str[option]); 2166 else 2167 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2168 2169 out: 2170 if (option != OSN_MAX) 2171 seq_puts(s, " "); 2172 2173 return 0; 2174 } 2175 2176 static void s_options_stop(struct seq_file *s, void *v) 2177 { 2178 seq_puts(s, "\n"); 2179 mutex_unlock(&interface_lock); 2180 } 2181 2182 static const struct seq_operations osnoise_options_seq_ops = { 2183 .start = s_options_start, 2184 .next = s_options_next, 2185 .show = s_options_show, 2186 .stop = s_options_stop 2187 }; 2188 2189 static int osnoise_options_open(struct inode *inode, struct file *file) 2190 { 2191 return seq_open(file, &osnoise_options_seq_ops); 2192 }; 2193 2194 /** 2195 * osnoise_options_write - Write function for "options" entry 2196 * @filp: The active open file structure 2197 * @ubuf: The user buffer that contains the value to write 2198 * @cnt: The maximum number of bytes to write to "file" 2199 * @ppos: The current position in @file 2200 * 2201 * Writing the option name sets the option, writing the "NO_" 2202 * prefix in front of the option name disables it. 2203 * 2204 * Writing "DEFAULTS" resets the option values to the default ones. 2205 */ 2206 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2207 size_t cnt, loff_t *ppos) 2208 { 2209 int running, option, enable, retval; 2210 char buf[256], *option_str; 2211 2212 if (cnt >= 256) 2213 return -EINVAL; 2214 2215 if (copy_from_user(buf, ubuf, cnt)) 2216 return -EFAULT; 2217 2218 buf[cnt] = 0; 2219 2220 if (strncmp(buf, "NO_", 3)) { 2221 option_str = strstrip(buf); 2222 enable = true; 2223 } else { 2224 option_str = strstrip(&buf[3]); 2225 enable = false; 2226 } 2227 2228 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2229 if (option < 0) 2230 return -EINVAL; 2231 2232 /* 2233 * trace_types_lock is taken to avoid concurrency on start/stop. 2234 */ 2235 mutex_lock(&trace_types_lock); 2236 running = osnoise_has_registered_instances(); 2237 if (running) 2238 stop_per_cpu_kthreads(); 2239 2240 mutex_lock(&interface_lock); 2241 /* 2242 * avoid CPU hotplug operations that might read options. 2243 */ 2244 cpus_read_lock(); 2245 2246 retval = cnt; 2247 2248 if (enable) { 2249 if (option == OSN_DEFAULTS) 2250 osnoise_options = OSN_DEFAULT_OPTIONS; 2251 else 2252 set_bit(option, &osnoise_options); 2253 } else { 2254 if (option == OSN_DEFAULTS) 2255 retval = -EINVAL; 2256 else 2257 clear_bit(option, &osnoise_options); 2258 } 2259 2260 cpus_read_unlock(); 2261 mutex_unlock(&interface_lock); 2262 2263 if (running) 2264 start_per_cpu_kthreads(); 2265 mutex_unlock(&trace_types_lock); 2266 2267 return retval; 2268 } 2269 2270 /* 2271 * osnoise_cpus_read - Read function for reading the "cpus" file 2272 * @filp: The active open file structure 2273 * @ubuf: The userspace provided buffer to read value into 2274 * @cnt: The maximum number of bytes to read 2275 * @ppos: The current "file" position 2276 * 2277 * Prints the "cpus" output into the user-provided buffer. 2278 */ 2279 static ssize_t 2280 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2281 loff_t *ppos) 2282 { 2283 char *mask_str __free(kfree) = NULL; 2284 int len; 2285 2286 guard(mutex)(&interface_lock); 2287 2288 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2289 mask_str = kmalloc(len, GFP_KERNEL); 2290 if (!mask_str) 2291 return -ENOMEM; 2292 2293 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2294 if (len >= count) 2295 return -EINVAL; 2296 2297 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2298 2299 return count; 2300 } 2301 2302 /* 2303 * osnoise_cpus_write - Write function for "cpus" entry 2304 * @filp: The active open file structure 2305 * @ubuf: The user buffer that contains the value to write 2306 * @count: The maximum number of bytes to write to "file" 2307 * @ppos: The current position in @file 2308 * 2309 * This function provides a write implementation for the "cpus" 2310 * interface to the osnoise trace. By default, it lists all CPUs, 2311 * in this way, allowing osnoise threads to run on any online CPU 2312 * of the system. It serves to restrict the execution of osnoise to the 2313 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2314 * Because the user might be interested in tracing what is running on 2315 * other CPUs. For instance, one might run osnoise in one HT CPU 2316 * while observing what is running on the sibling HT CPU. 2317 */ 2318 static ssize_t 2319 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2320 loff_t *ppos) 2321 { 2322 cpumask_var_t osnoise_cpumask_new; 2323 int running, err; 2324 char *buf __free(kfree) = NULL; 2325 2326 if (count < 1) 2327 return 0; 2328 2329 buf = memdup_user_nul(ubuf, count); 2330 if (IS_ERR(buf)) 2331 return PTR_ERR(buf); 2332 2333 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2334 return -ENOMEM; 2335 2336 err = cpulist_parse(buf, osnoise_cpumask_new); 2337 if (err) 2338 goto err_free; 2339 2340 /* 2341 * trace_types_lock is taken to avoid concurrency on start/stop. 2342 */ 2343 mutex_lock(&trace_types_lock); 2344 running = osnoise_has_registered_instances(); 2345 if (running) 2346 stop_per_cpu_kthreads(); 2347 2348 mutex_lock(&interface_lock); 2349 /* 2350 * osnoise_cpumask is read by CPU hotplug operations. 2351 */ 2352 cpus_read_lock(); 2353 2354 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2355 2356 cpus_read_unlock(); 2357 mutex_unlock(&interface_lock); 2358 2359 if (running) 2360 start_per_cpu_kthreads(); 2361 mutex_unlock(&trace_types_lock); 2362 2363 free_cpumask_var(osnoise_cpumask_new); 2364 return count; 2365 2366 err_free: 2367 free_cpumask_var(osnoise_cpumask_new); 2368 2369 return err; 2370 } 2371 2372 #ifdef CONFIG_TIMERLAT_TRACER 2373 static int timerlat_fd_open(struct inode *inode, struct file *file) 2374 { 2375 struct osnoise_variables *osn_var; 2376 struct timerlat_variables *tlat; 2377 long cpu = (long) inode->i_cdev; 2378 2379 mutex_lock(&interface_lock); 2380 2381 /* 2382 * This file is accessible only if timerlat is enabled, and 2383 * NO_OSNOISE_WORKLOAD is set. 2384 */ 2385 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) { 2386 mutex_unlock(&interface_lock); 2387 return -EINVAL; 2388 } 2389 2390 migrate_disable(); 2391 2392 osn_var = this_cpu_osn_var(); 2393 2394 /* 2395 * The osn_var->pid holds the single access to this file. 2396 */ 2397 if (osn_var->pid) { 2398 mutex_unlock(&interface_lock); 2399 migrate_enable(); 2400 return -EBUSY; 2401 } 2402 2403 /* 2404 * timerlat tracer is a per-cpu tracer. Check if the user-space too 2405 * is pinned to a single CPU. The tracer laters monitor if the task 2406 * migrates and then disables tracer if it does. However, it is 2407 * worth doing this basic acceptance test to avoid obviusly wrong 2408 * setup. 2409 */ 2410 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) { 2411 mutex_unlock(&interface_lock); 2412 migrate_enable(); 2413 return -EPERM; 2414 } 2415 2416 /* 2417 * From now on, it is good to go. 2418 */ 2419 file->private_data = inode->i_cdev; 2420 2421 get_task_struct(current); 2422 2423 osn_var->kthread = current; 2424 osn_var->pid = current->pid; 2425 2426 /* 2427 * Setup is done. 2428 */ 2429 mutex_unlock(&interface_lock); 2430 2431 tlat = this_cpu_tmr_var(); 2432 tlat->count = 0; 2433 2434 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 2435 2436 migrate_enable(); 2437 return 0; 2438 }; 2439 2440 /* 2441 * timerlat_fd_read - Read function for "timerlat_fd" file 2442 * @file: The active open file structure 2443 * @ubuf: The userspace provided buffer to read value into 2444 * @cnt: The maximum number of bytes to read 2445 * @ppos: The current "file" position 2446 * 2447 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error. 2448 */ 2449 static ssize_t 2450 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, 2451 loff_t *ppos) 2452 { 2453 long cpu = (long) file->private_data; 2454 struct osnoise_variables *osn_var; 2455 struct timerlat_variables *tlat; 2456 struct timerlat_sample s; 2457 s64 diff; 2458 u64 now; 2459 2460 migrate_disable(); 2461 2462 tlat = this_cpu_tmr_var(); 2463 2464 /* 2465 * While in user-space, the thread is migratable. There is nothing 2466 * we can do about it. 2467 * So, if the thread is running on another CPU, stop the machinery. 2468 */ 2469 if (cpu == smp_processor_id()) { 2470 if (tlat->uthread_migrate) { 2471 migrate_enable(); 2472 return -EINVAL; 2473 } 2474 } else { 2475 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 2476 osnoise_taint("timerlat user thread migrate\n"); 2477 osnoise_stop_tracing(); 2478 migrate_enable(); 2479 return -EINVAL; 2480 } 2481 2482 osn_var = this_cpu_osn_var(); 2483 2484 /* 2485 * The timerlat in user-space runs in a different order: 2486 * the read() starts from the execution of the previous occurrence, 2487 * sleeping for the next occurrence. 2488 * 2489 * So, skip if we are entering on read() before the first wakeup 2490 * from timerlat IRQ: 2491 */ 2492 if (likely(osn_var->sampling)) { 2493 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2494 diff = now - tlat->abs_period; 2495 2496 /* 2497 * it was not a timer firing, but some other signal? 2498 */ 2499 if (diff < 0) 2500 goto out; 2501 2502 s.seqnum = tlat->count; 2503 s.timer_latency = diff; 2504 s.context = THREAD_URET; 2505 2506 record_timerlat_sample(&s); 2507 2508 notify_new_max_latency(diff); 2509 2510 tlat->tracing_thread = false; 2511 if (osnoise_data.stop_tracing_total) 2512 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 2513 osnoise_stop_tracing(); 2514 } else { 2515 tlat->tracing_thread = false; 2516 tlat->kthread = current; 2517 2518 /* Annotate now to drift new period */ 2519 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 2520 2521 osn_var->sampling = 1; 2522 } 2523 2524 /* wait for the next period */ 2525 wait_next_period(tlat); 2526 2527 /* This is the wakeup from this cycle */ 2528 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2529 diff = now - tlat->abs_period; 2530 2531 /* 2532 * it was not a timer firing, but some other signal? 2533 */ 2534 if (diff < 0) 2535 goto out; 2536 2537 s.seqnum = tlat->count; 2538 s.timer_latency = diff; 2539 s.context = THREAD_CONTEXT; 2540 2541 record_timerlat_sample(&s); 2542 2543 if (osnoise_data.stop_tracing_total) { 2544 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { 2545 timerlat_dump_stack(time_to_us(diff)); 2546 notify_new_max_latency(diff); 2547 osnoise_stop_tracing(); 2548 } 2549 } 2550 2551 out: 2552 migrate_enable(); 2553 return 0; 2554 } 2555 2556 static int timerlat_fd_release(struct inode *inode, struct file *file) 2557 { 2558 struct osnoise_variables *osn_var; 2559 struct timerlat_variables *tlat_var; 2560 long cpu = (long) file->private_data; 2561 2562 migrate_disable(); 2563 mutex_lock(&interface_lock); 2564 2565 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 2566 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 2567 2568 if (tlat_var->kthread) 2569 hrtimer_cancel(&tlat_var->timer); 2570 memset(tlat_var, 0, sizeof(*tlat_var)); 2571 2572 osn_var->sampling = 0; 2573 osn_var->pid = 0; 2574 2575 /* 2576 * We are leaving, not being stopped... see stop_kthread(); 2577 */ 2578 if (osn_var->kthread) { 2579 put_task_struct(osn_var->kthread); 2580 osn_var->kthread = NULL; 2581 } 2582 2583 mutex_unlock(&interface_lock); 2584 migrate_enable(); 2585 return 0; 2586 } 2587 #endif 2588 2589 /* 2590 * osnoise/runtime_us: cannot be greater than the period. 2591 */ 2592 static struct trace_min_max_param osnoise_runtime = { 2593 .lock = &interface_lock, 2594 .val = &osnoise_data.sample_runtime, 2595 .max = &osnoise_data.sample_period, 2596 .min = NULL, 2597 }; 2598 2599 /* 2600 * osnoise/period_us: cannot be smaller than the runtime. 2601 */ 2602 static struct trace_min_max_param osnoise_period = { 2603 .lock = &interface_lock, 2604 .val = &osnoise_data.sample_period, 2605 .max = NULL, 2606 .min = &osnoise_data.sample_runtime, 2607 }; 2608 2609 /* 2610 * osnoise/stop_tracing_us: no limit. 2611 */ 2612 static struct trace_min_max_param osnoise_stop_tracing_in = { 2613 .lock = &interface_lock, 2614 .val = &osnoise_data.stop_tracing, 2615 .max = NULL, 2616 .min = NULL, 2617 }; 2618 2619 /* 2620 * osnoise/stop_tracing_total_us: no limit. 2621 */ 2622 static struct trace_min_max_param osnoise_stop_tracing_total = { 2623 .lock = &interface_lock, 2624 .val = &osnoise_data.stop_tracing_total, 2625 .max = NULL, 2626 .min = NULL, 2627 }; 2628 2629 #ifdef CONFIG_TIMERLAT_TRACER 2630 /* 2631 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2632 * latency is higher than val. 2633 */ 2634 static struct trace_min_max_param osnoise_print_stack = { 2635 .lock = &interface_lock, 2636 .val = &osnoise_data.print_stack, 2637 .max = NULL, 2638 .min = NULL, 2639 }; 2640 2641 /* 2642 * osnoise/timerlat_period: min 100 us, max 1 s 2643 */ 2644 static u64 timerlat_min_period = 100; 2645 static u64 timerlat_max_period = 1000000; 2646 static struct trace_min_max_param timerlat_period = { 2647 .lock = &interface_lock, 2648 .val = &osnoise_data.timerlat_period, 2649 .max = &timerlat_max_period, 2650 .min = &timerlat_min_period, 2651 }; 2652 2653 static const struct file_operations timerlat_fd_fops = { 2654 .open = timerlat_fd_open, 2655 .read = timerlat_fd_read, 2656 .release = timerlat_fd_release, 2657 .llseek = generic_file_llseek, 2658 }; 2659 #endif 2660 2661 static const struct file_operations cpus_fops = { 2662 .open = tracing_open_generic, 2663 .read = osnoise_cpus_read, 2664 .write = osnoise_cpus_write, 2665 .llseek = generic_file_llseek, 2666 }; 2667 2668 static const struct file_operations osnoise_options_fops = { 2669 .open = osnoise_options_open, 2670 .read = seq_read, 2671 .llseek = seq_lseek, 2672 .release = seq_release, 2673 .write = osnoise_options_write 2674 }; 2675 2676 #ifdef CONFIG_TIMERLAT_TRACER 2677 #ifdef CONFIG_STACKTRACE 2678 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2679 { 2680 struct dentry *tmp; 2681 2682 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2683 &osnoise_print_stack, &trace_min_max_fops); 2684 if (!tmp) 2685 return -ENOMEM; 2686 2687 return 0; 2688 } 2689 #else /* CONFIG_STACKTRACE */ 2690 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2691 { 2692 return 0; 2693 } 2694 #endif /* CONFIG_STACKTRACE */ 2695 2696 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir) 2697 { 2698 struct dentry *timerlat_fd; 2699 struct dentry *per_cpu; 2700 struct dentry *cpu_dir; 2701 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */ 2702 long cpu; 2703 2704 /* 2705 * Why not using tracing instance per_cpu/ dir? 2706 * 2707 * Because osnoise/timerlat have a single workload, having 2708 * multiple files like these are wast of memory. 2709 */ 2710 per_cpu = tracefs_create_dir("per_cpu", top_dir); 2711 if (!per_cpu) 2712 return -ENOMEM; 2713 2714 for_each_possible_cpu(cpu) { 2715 snprintf(cpu_str, 30, "cpu%ld", cpu); 2716 cpu_dir = tracefs_create_dir(cpu_str, per_cpu); 2717 if (!cpu_dir) 2718 goto out_clean; 2719 2720 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ, 2721 cpu_dir, NULL, &timerlat_fd_fops); 2722 if (!timerlat_fd) 2723 goto out_clean; 2724 2725 /* Record the CPU */ 2726 d_inode(timerlat_fd)->i_cdev = (void *)(cpu); 2727 } 2728 2729 return 0; 2730 2731 out_clean: 2732 tracefs_remove(per_cpu); 2733 return -ENOMEM; 2734 } 2735 2736 /* 2737 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2738 */ 2739 static int init_timerlat_tracefs(struct dentry *top_dir) 2740 { 2741 struct dentry *tmp; 2742 int retval; 2743 2744 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2745 &timerlat_period, &trace_min_max_fops); 2746 if (!tmp) 2747 return -ENOMEM; 2748 2749 retval = osnoise_create_cpu_timerlat_fd(top_dir); 2750 if (retval) 2751 return retval; 2752 2753 return init_timerlat_stack_tracefs(top_dir); 2754 } 2755 #else /* CONFIG_TIMERLAT_TRACER */ 2756 static int init_timerlat_tracefs(struct dentry *top_dir) 2757 { 2758 return 0; 2759 } 2760 #endif /* CONFIG_TIMERLAT_TRACER */ 2761 2762 /* 2763 * init_tracefs - A function to initialize the tracefs interface files 2764 * 2765 * This function creates entries in tracefs for "osnoise" and "timerlat". 2766 * It creates these directories in the tracing directory, and within that 2767 * directory the use can change and view the configs. 2768 */ 2769 static int init_tracefs(void) 2770 { 2771 struct dentry *top_dir; 2772 struct dentry *tmp; 2773 int ret; 2774 2775 ret = tracing_init_dentry(); 2776 if (ret) 2777 return -ENOMEM; 2778 2779 top_dir = tracefs_create_dir("osnoise", NULL); 2780 if (!top_dir) 2781 return 0; 2782 2783 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2784 &osnoise_period, &trace_min_max_fops); 2785 if (!tmp) 2786 goto err; 2787 2788 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2789 &osnoise_runtime, &trace_min_max_fops); 2790 if (!tmp) 2791 goto err; 2792 2793 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2794 &osnoise_stop_tracing_in, &trace_min_max_fops); 2795 if (!tmp) 2796 goto err; 2797 2798 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2799 &osnoise_stop_tracing_total, &trace_min_max_fops); 2800 if (!tmp) 2801 goto err; 2802 2803 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2804 if (!tmp) 2805 goto err; 2806 2807 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2808 &osnoise_options_fops); 2809 if (!tmp) 2810 goto err; 2811 2812 ret = init_timerlat_tracefs(top_dir); 2813 if (ret) 2814 goto err; 2815 2816 return 0; 2817 2818 err: 2819 tracefs_remove(top_dir); 2820 return -ENOMEM; 2821 } 2822 2823 static int osnoise_hook_events(void) 2824 { 2825 int retval; 2826 2827 /* 2828 * Trace is already hooked, we are re-enabling from 2829 * a stop_tracing_*. 2830 */ 2831 if (trace_osnoise_callback_enabled) 2832 return 0; 2833 2834 retval = hook_irq_events(); 2835 if (retval) 2836 return -EINVAL; 2837 2838 retval = hook_softirq_events(); 2839 if (retval) 2840 goto out_unhook_irq; 2841 2842 retval = hook_thread_events(); 2843 /* 2844 * All fine! 2845 */ 2846 if (!retval) 2847 return 0; 2848 2849 unhook_softirq_events(); 2850 out_unhook_irq: 2851 unhook_irq_events(); 2852 return -EINVAL; 2853 } 2854 2855 static void osnoise_unhook_events(void) 2856 { 2857 unhook_thread_events(); 2858 unhook_softirq_events(); 2859 unhook_irq_events(); 2860 } 2861 2862 /* 2863 * osnoise_workload_start - start the workload and hook to events 2864 */ 2865 static int osnoise_workload_start(void) 2866 { 2867 int retval; 2868 2869 /* 2870 * Instances need to be registered after calling workload 2871 * start. Hence, if there is already an instance, the 2872 * workload was already registered. Otherwise, this 2873 * code is on the way to register the first instance, 2874 * and the workload will start. 2875 */ 2876 if (osnoise_has_registered_instances()) 2877 return 0; 2878 2879 osn_var_reset_all(); 2880 2881 retval = osnoise_hook_events(); 2882 if (retval) 2883 return retval; 2884 2885 /* 2886 * Make sure that ftrace_nmi_enter/exit() see reset values 2887 * before enabling trace_osnoise_callback_enabled. 2888 */ 2889 barrier(); 2890 trace_osnoise_callback_enabled = true; 2891 2892 retval = start_per_cpu_kthreads(); 2893 if (retval) { 2894 trace_osnoise_callback_enabled = false; 2895 /* 2896 * Make sure that ftrace_nmi_enter/exit() see 2897 * trace_osnoise_callback_enabled as false before continuing. 2898 */ 2899 barrier(); 2900 2901 osnoise_unhook_events(); 2902 return retval; 2903 } 2904 2905 return 0; 2906 } 2907 2908 /* 2909 * osnoise_workload_stop - stop the workload and unhook the events 2910 */ 2911 static void osnoise_workload_stop(void) 2912 { 2913 /* 2914 * Instances need to be unregistered before calling 2915 * stop. Hence, if there is a registered instance, more 2916 * than one instance is running, and the workload will not 2917 * yet stop. Otherwise, this code is on the way to disable 2918 * the last instance, and the workload can stop. 2919 */ 2920 if (osnoise_has_registered_instances()) 2921 return; 2922 2923 /* 2924 * If callbacks were already disabled in a previous stop 2925 * call, there is no need to disable then again. 2926 * 2927 * For instance, this happens when tracing is stopped via: 2928 * echo 0 > tracing_on 2929 * echo nop > current_tracer. 2930 */ 2931 if (!trace_osnoise_callback_enabled) 2932 return; 2933 2934 trace_osnoise_callback_enabled = false; 2935 /* 2936 * Make sure that ftrace_nmi_enter/exit() see 2937 * trace_osnoise_callback_enabled as false before continuing. 2938 */ 2939 barrier(); 2940 2941 stop_per_cpu_kthreads(); 2942 2943 osnoise_unhook_events(); 2944 } 2945 2946 static void osnoise_tracer_start(struct trace_array *tr) 2947 { 2948 int retval; 2949 2950 /* 2951 * If the instance is already registered, there is no need to 2952 * register it again. 2953 */ 2954 if (osnoise_instance_registered(tr)) 2955 return; 2956 2957 retval = osnoise_workload_start(); 2958 if (retval) 2959 pr_err(BANNER "Error starting osnoise tracer\n"); 2960 2961 osnoise_register_instance(tr); 2962 } 2963 2964 static void osnoise_tracer_stop(struct trace_array *tr) 2965 { 2966 osnoise_unregister_instance(tr); 2967 osnoise_workload_stop(); 2968 } 2969 2970 static int osnoise_tracer_init(struct trace_array *tr) 2971 { 2972 /* 2973 * Only allow osnoise tracer if timerlat tracer is not running 2974 * already. 2975 */ 2976 if (timerlat_enabled()) 2977 return -EBUSY; 2978 2979 tr->max_latency = 0; 2980 2981 osnoise_tracer_start(tr); 2982 return 0; 2983 } 2984 2985 static void osnoise_tracer_reset(struct trace_array *tr) 2986 { 2987 osnoise_tracer_stop(tr); 2988 } 2989 2990 static struct tracer osnoise_tracer __read_mostly = { 2991 .name = "osnoise", 2992 .init = osnoise_tracer_init, 2993 .reset = osnoise_tracer_reset, 2994 .start = osnoise_tracer_start, 2995 .stop = osnoise_tracer_stop, 2996 .print_header = print_osnoise_headers, 2997 .allow_instances = true, 2998 }; 2999 3000 #ifdef CONFIG_TIMERLAT_TRACER 3001 static void timerlat_tracer_start(struct trace_array *tr) 3002 { 3003 int retval; 3004 3005 /* 3006 * If the instance is already registered, there is no need to 3007 * register it again. 3008 */ 3009 if (osnoise_instance_registered(tr)) 3010 return; 3011 3012 retval = osnoise_workload_start(); 3013 if (retval) 3014 pr_err(BANNER "Error starting timerlat tracer\n"); 3015 3016 osnoise_register_instance(tr); 3017 3018 return; 3019 } 3020 3021 static void timerlat_tracer_stop(struct trace_array *tr) 3022 { 3023 int cpu; 3024 3025 osnoise_unregister_instance(tr); 3026 3027 /* 3028 * Instruct the threads to stop only if this is the last instance. 3029 */ 3030 if (!osnoise_has_registered_instances()) { 3031 for_each_online_cpu(cpu) 3032 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 3033 } 3034 3035 osnoise_workload_stop(); 3036 } 3037 3038 static int timerlat_tracer_init(struct trace_array *tr) 3039 { 3040 /* 3041 * Only allow timerlat tracer if osnoise tracer is not running already. 3042 */ 3043 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 3044 return -EBUSY; 3045 3046 /* 3047 * If this is the first instance, set timerlat_tracer to block 3048 * osnoise tracer start. 3049 */ 3050 if (!osnoise_has_registered_instances()) 3051 osnoise_data.timerlat_tracer = 1; 3052 3053 tr->max_latency = 0; 3054 timerlat_tracer_start(tr); 3055 3056 return 0; 3057 } 3058 3059 static void timerlat_tracer_reset(struct trace_array *tr) 3060 { 3061 timerlat_tracer_stop(tr); 3062 3063 /* 3064 * If this is the last instance, reset timerlat_tracer allowing 3065 * osnoise to be started. 3066 */ 3067 if (!osnoise_has_registered_instances()) 3068 osnoise_data.timerlat_tracer = 0; 3069 } 3070 3071 static struct tracer timerlat_tracer __read_mostly = { 3072 .name = "timerlat", 3073 .init = timerlat_tracer_init, 3074 .reset = timerlat_tracer_reset, 3075 .start = timerlat_tracer_start, 3076 .stop = timerlat_tracer_stop, 3077 .print_header = print_timerlat_headers, 3078 .allow_instances = true, 3079 }; 3080 3081 __init static int init_timerlat_tracer(void) 3082 { 3083 return register_tracer(&timerlat_tracer); 3084 } 3085 #else /* CONFIG_TIMERLAT_TRACER */ 3086 __init static int init_timerlat_tracer(void) 3087 { 3088 return 0; 3089 } 3090 #endif /* CONFIG_TIMERLAT_TRACER */ 3091 3092 __init static int init_osnoise_tracer(void) 3093 { 3094 int ret; 3095 3096 mutex_init(&interface_lock); 3097 3098 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 3099 3100 ret = register_tracer(&osnoise_tracer); 3101 if (ret) { 3102 pr_err(BANNER "Error registering osnoise!\n"); 3103 return ret; 3104 } 3105 3106 ret = init_timerlat_tracer(); 3107 if (ret) { 3108 pr_err(BANNER "Error registering timerlat!\n"); 3109 return ret; 3110 } 3111 3112 osnoise_init_hotplug_support(); 3113 3114 INIT_LIST_HEAD_RCU(&osnoise_instances); 3115 3116 init_tracefs(); 3117 3118 return 0; 3119 } 3120 late_initcall(init_osnoise_tracer); 3121