1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OS Noise Tracer: computes the OS Noise suffered by a running thread. 4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. 5 * 6 * Based on "hwlat_detector" tracer by: 7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> 8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> 9 * With feedback from Clark Williams <williams@redhat.com> 10 * 11 * And also based on the rtsl tracer presented on: 12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux 13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems 14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. 15 * 16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> 17 */ 18 19 #include <linux/kthread.h> 20 #include <linux/tracefs.h> 21 #include <linux/uaccess.h> 22 #include <linux/cpumask.h> 23 #include <linux/delay.h> 24 #include <linux/sched/clock.h> 25 #include <uapi/linux/sched/types.h> 26 #include <linux/sched.h> 27 #include "trace.h" 28 29 #ifdef CONFIG_X86_LOCAL_APIC 30 #include <asm/trace/irq_vectors.h> 31 #undef TRACE_INCLUDE_PATH 32 #undef TRACE_INCLUDE_FILE 33 #endif /* CONFIG_X86_LOCAL_APIC */ 34 35 #include <trace/events/irq.h> 36 #include <trace/events/sched.h> 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/osnoise.h> 40 41 /* 42 * Default values. 43 */ 44 #define BANNER "osnoise: " 45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ 46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ 47 48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 50 51 /* 52 * osnoise/options entries. 53 */ 54 enum osnoise_options_index { 55 OSN_DEFAULTS = 0, 56 OSN_WORKLOAD, 57 OSN_PANIC_ON_STOP, 58 OSN_PREEMPT_DISABLE, 59 OSN_IRQ_DISABLE, 60 OSN_MAX 61 }; 62 63 static const char * const osnoise_options_str[OSN_MAX] = { 64 "DEFAULTS", 65 "OSNOISE_WORKLOAD", 66 "PANIC_ON_STOP", 67 "OSNOISE_PREEMPT_DISABLE", 68 "OSNOISE_IRQ_DISABLE" }; 69 70 #define OSN_DEFAULT_OPTIONS 0x2 71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; 72 73 /* 74 * trace_array of the enabled osnoise/timerlat instances. 75 */ 76 struct osnoise_instance { 77 struct list_head list; 78 struct trace_array *tr; 79 }; 80 81 static struct list_head osnoise_instances; 82 83 static bool osnoise_has_registered_instances(void) 84 { 85 return !!list_first_or_null_rcu(&osnoise_instances, 86 struct osnoise_instance, 87 list); 88 } 89 90 /* 91 * osnoise_instance_registered - check if a tr is already registered 92 */ 93 static int osnoise_instance_registered(struct trace_array *tr) 94 { 95 struct osnoise_instance *inst; 96 int found = 0; 97 98 rcu_read_lock(); 99 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 100 if (inst->tr == tr) 101 found = 1; 102 } 103 rcu_read_unlock(); 104 105 return found; 106 } 107 108 /* 109 * osnoise_register_instance - register a new trace instance 110 * 111 * Register a trace_array *tr in the list of instances running 112 * osnoise/timerlat tracers. 113 */ 114 static int osnoise_register_instance(struct trace_array *tr) 115 { 116 struct osnoise_instance *inst; 117 118 /* 119 * register/unregister serialization is provided by trace's 120 * trace_types_lock. 121 */ 122 lockdep_assert_held(&trace_types_lock); 123 124 inst = kmalloc(sizeof(*inst), GFP_KERNEL); 125 if (!inst) 126 return -ENOMEM; 127 128 INIT_LIST_HEAD_RCU(&inst->list); 129 inst->tr = tr; 130 list_add_tail_rcu(&inst->list, &osnoise_instances); 131 132 return 0; 133 } 134 135 /* 136 * osnoise_unregister_instance - unregister a registered trace instance 137 * 138 * Remove the trace_array *tr from the list of instances running 139 * osnoise/timerlat tracers. 140 */ 141 static void osnoise_unregister_instance(struct trace_array *tr) 142 { 143 struct osnoise_instance *inst; 144 int found = 0; 145 146 /* 147 * register/unregister serialization is provided by trace's 148 * trace_types_lock. 149 */ 150 list_for_each_entry_rcu(inst, &osnoise_instances, list, 151 lockdep_is_held(&trace_types_lock)) { 152 if (inst->tr == tr) { 153 list_del_rcu(&inst->list); 154 found = 1; 155 break; 156 } 157 } 158 159 if (!found) 160 return; 161 162 kvfree_rcu_mightsleep(inst); 163 } 164 165 /* 166 * NMI runtime info. 167 */ 168 struct osn_nmi { 169 u64 count; 170 u64 delta_start; 171 }; 172 173 /* 174 * IRQ runtime info. 175 */ 176 struct osn_irq { 177 u64 count; 178 u64 arrival_time; 179 u64 delta_start; 180 }; 181 182 #define IRQ_CONTEXT 0 183 #define THREAD_CONTEXT 1 184 #define THREAD_URET 2 185 /* 186 * sofirq runtime info. 187 */ 188 struct osn_softirq { 189 u64 count; 190 u64 arrival_time; 191 u64 delta_start; 192 }; 193 194 /* 195 * thread runtime info. 196 */ 197 struct osn_thread { 198 u64 count; 199 u64 arrival_time; 200 u64 delta_start; 201 }; 202 203 /* 204 * Runtime information: this structure saves the runtime information used by 205 * one sampling thread. 206 */ 207 struct osnoise_variables { 208 struct task_struct *kthread; 209 bool sampling; 210 pid_t pid; 211 struct osn_nmi nmi; 212 struct osn_irq irq; 213 struct osn_softirq softirq; 214 struct osn_thread thread; 215 local_t int_counter; 216 }; 217 218 /* 219 * Per-cpu runtime information. 220 */ 221 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); 222 223 /* 224 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU 225 */ 226 static inline struct osnoise_variables *this_cpu_osn_var(void) 227 { 228 return this_cpu_ptr(&per_cpu_osnoise_var); 229 } 230 231 #ifdef CONFIG_TIMERLAT_TRACER 232 /* 233 * Runtime information for the timer mode. 234 */ 235 struct timerlat_variables { 236 struct task_struct *kthread; 237 struct hrtimer timer; 238 u64 rel_period; 239 u64 abs_period; 240 bool tracing_thread; 241 u64 count; 242 bool uthread_migrate; 243 }; 244 245 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); 246 247 /* 248 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU 249 */ 250 static inline struct timerlat_variables *this_cpu_tmr_var(void) 251 { 252 return this_cpu_ptr(&per_cpu_timerlat_var); 253 } 254 255 /* 256 * tlat_var_reset - Reset the values of the given timerlat_variables 257 */ 258 static inline void tlat_var_reset(void) 259 { 260 struct timerlat_variables *tlat_var; 261 int cpu; 262 /* 263 * So far, all the values are initialized as 0, so 264 * zeroing the structure is perfect. 265 */ 266 for_each_cpu(cpu, cpu_online_mask) { 267 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 268 memset(tlat_var, 0, sizeof(*tlat_var)); 269 } 270 } 271 #else /* CONFIG_TIMERLAT_TRACER */ 272 #define tlat_var_reset() do {} while (0) 273 #endif /* CONFIG_TIMERLAT_TRACER */ 274 275 /* 276 * osn_var_reset - Reset the values of the given osnoise_variables 277 */ 278 static inline void osn_var_reset(void) 279 { 280 struct osnoise_variables *osn_var; 281 int cpu; 282 283 /* 284 * So far, all the values are initialized as 0, so 285 * zeroing the structure is perfect. 286 */ 287 for_each_cpu(cpu, cpu_online_mask) { 288 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 289 memset(osn_var, 0, sizeof(*osn_var)); 290 } 291 } 292 293 /* 294 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables 295 */ 296 static inline void osn_var_reset_all(void) 297 { 298 osn_var_reset(); 299 tlat_var_reset(); 300 } 301 302 /* 303 * Tells NMIs to call back to the osnoise tracer to record timestamps. 304 */ 305 bool trace_osnoise_callback_enabled; 306 307 /* 308 * osnoise sample structure definition. Used to store the statistics of a 309 * sample run. 310 */ 311 struct osnoise_sample { 312 u64 runtime; /* runtime */ 313 u64 noise; /* noise */ 314 u64 max_sample; /* max single noise sample */ 315 int hw_count; /* # HW (incl. hypervisor) interference */ 316 int nmi_count; /* # NMIs during this sample */ 317 int irq_count; /* # IRQs during this sample */ 318 int softirq_count; /* # softirqs during this sample */ 319 int thread_count; /* # threads during this sample */ 320 }; 321 322 #ifdef CONFIG_TIMERLAT_TRACER 323 /* 324 * timerlat sample structure definition. Used to store the statistics of 325 * a sample run. 326 */ 327 struct timerlat_sample { 328 u64 timer_latency; /* timer_latency */ 329 unsigned int seqnum; /* unique sequence */ 330 int context; /* timer context */ 331 }; 332 #endif 333 334 /* 335 * Protect the interface. 336 */ 337 static struct mutex interface_lock; 338 339 /* 340 * Tracer data. 341 */ 342 static struct osnoise_data { 343 u64 sample_period; /* total sampling period */ 344 u64 sample_runtime; /* active sampling portion of period */ 345 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ 346 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ 347 #ifdef CONFIG_TIMERLAT_TRACER 348 u64 timerlat_period; /* timerlat period */ 349 u64 print_stack; /* print IRQ stack if total > */ 350 int timerlat_tracer; /* timerlat tracer */ 351 #endif 352 bool tainted; /* infor users and developers about a problem */ 353 } osnoise_data = { 354 .sample_period = DEFAULT_SAMPLE_PERIOD, 355 .sample_runtime = DEFAULT_SAMPLE_RUNTIME, 356 .stop_tracing = 0, 357 .stop_tracing_total = 0, 358 #ifdef CONFIG_TIMERLAT_TRACER 359 .print_stack = 0, 360 .timerlat_period = DEFAULT_TIMERLAT_PERIOD, 361 .timerlat_tracer = 0, 362 #endif 363 }; 364 365 #ifdef CONFIG_TIMERLAT_TRACER 366 static inline bool timerlat_enabled(void) 367 { 368 return osnoise_data.timerlat_tracer; 369 } 370 371 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 372 { 373 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 374 /* 375 * If the timerlat is enabled, but the irq handler did 376 * not run yet enabling timerlat_tracer, do not trace. 377 */ 378 if (!tlat_var->tracing_thread) { 379 osn_var->softirq.arrival_time = 0; 380 osn_var->softirq.delta_start = 0; 381 return 0; 382 } 383 return 1; 384 } 385 386 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 387 { 388 struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 389 /* 390 * If the timerlat is enabled, but the irq handler did 391 * not run yet enabling timerlat_tracer, do not trace. 392 */ 393 if (!tlat_var->tracing_thread) { 394 osn_var->thread.delta_start = 0; 395 osn_var->thread.arrival_time = 0; 396 return 0; 397 } 398 return 1; 399 } 400 #else /* CONFIG_TIMERLAT_TRACER */ 401 static inline bool timerlat_enabled(void) 402 { 403 return false; 404 } 405 406 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 407 { 408 return 1; 409 } 410 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 411 { 412 return 1; 413 } 414 #endif 415 416 #ifdef CONFIG_PREEMPT_RT 417 /* 418 * Print the osnoise header info. 419 */ 420 static void print_osnoise_headers(struct seq_file *s) 421 { 422 if (osnoise_data.tainted) 423 seq_puts(s, "# osnoise is tainted!\n"); 424 425 seq_puts(s, "# _-------=> irqs-off\n"); 426 seq_puts(s, "# / _------=> need-resched\n"); 427 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 428 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 429 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 430 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 431 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 432 433 seq_puts(s, "# |||||| / "); 434 seq_puts(s, " MAX\n"); 435 436 seq_puts(s, "# ||||| / "); 437 seq_puts(s, " SINGLE Interference counters:\n"); 438 439 seq_puts(s, "# ||||||| RUNTIME "); 440 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 441 442 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); 443 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 444 445 seq_puts(s, "# | | | ||||||| | | "); 446 seq_puts(s, " | | | | | | | |\n"); 447 } 448 #else /* CONFIG_PREEMPT_RT */ 449 static void print_osnoise_headers(struct seq_file *s) 450 { 451 if (osnoise_data.tainted) 452 seq_puts(s, "# osnoise is tainted!\n"); 453 454 seq_puts(s, "# _-----=> irqs-off\n"); 455 seq_puts(s, "# / _----=> need-resched\n"); 456 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 457 seq_puts(s, "# || / _--=> preempt-depth\n"); 458 seq_puts(s, "# ||| / _-=> migrate-disable "); 459 seq_puts(s, " MAX\n"); 460 seq_puts(s, "# |||| / delay "); 461 seq_puts(s, " SINGLE Interference counters:\n"); 462 463 seq_puts(s, "# ||||| RUNTIME "); 464 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); 465 466 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); 467 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); 468 469 seq_puts(s, "# | | | ||||| | | "); 470 seq_puts(s, " | | | | | | | |\n"); 471 } 472 #endif /* CONFIG_PREEMPT_RT */ 473 474 /* 475 * osnoise_taint - report an osnoise error. 476 */ 477 #define osnoise_taint(msg) ({ \ 478 struct osnoise_instance *inst; \ 479 struct trace_buffer *buffer; \ 480 \ 481 rcu_read_lock(); \ 482 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 483 buffer = inst->tr->array_buffer.buffer; \ 484 trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 485 } \ 486 rcu_read_unlock(); \ 487 osnoise_data.tainted = true; \ 488 }) 489 490 /* 491 * Record an osnoise_sample into the tracer buffer. 492 */ 493 static void 494 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 495 { 496 struct trace_event_call *call = &event_osnoise; 497 struct ring_buffer_event *event; 498 struct osnoise_entry *entry; 499 500 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), 501 tracing_gen_ctx()); 502 if (!event) 503 return; 504 entry = ring_buffer_event_data(event); 505 entry->runtime = sample->runtime; 506 entry->noise = sample->noise; 507 entry->max_sample = sample->max_sample; 508 entry->hw_count = sample->hw_count; 509 entry->nmi_count = sample->nmi_count; 510 entry->irq_count = sample->irq_count; 511 entry->softirq_count = sample->softirq_count; 512 entry->thread_count = sample->thread_count; 513 514 if (!call_filter_check_discard(call, entry, buffer, event)) 515 trace_buffer_unlock_commit_nostack(buffer, event); 516 } 517 518 /* 519 * Record an osnoise_sample on all osnoise instances. 520 */ 521 static void trace_osnoise_sample(struct osnoise_sample *sample) 522 { 523 struct osnoise_instance *inst; 524 struct trace_buffer *buffer; 525 526 rcu_read_lock(); 527 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 528 buffer = inst->tr->array_buffer.buffer; 529 __trace_osnoise_sample(sample, buffer); 530 } 531 rcu_read_unlock(); 532 } 533 534 #ifdef CONFIG_TIMERLAT_TRACER 535 /* 536 * Print the timerlat header info. 537 */ 538 #ifdef CONFIG_PREEMPT_RT 539 static void print_timerlat_headers(struct seq_file *s) 540 { 541 seq_puts(s, "# _-------=> irqs-off\n"); 542 seq_puts(s, "# / _------=> need-resched\n"); 543 seq_puts(s, "# | / _-----=> need-resched-lazy\n"); 544 seq_puts(s, "# || / _----=> hardirq/softirq\n"); 545 seq_puts(s, "# ||| / _---=> preempt-depth\n"); 546 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); 547 seq_puts(s, "# ||||| / _-=> migrate-disable\n"); 548 seq_puts(s, "# |||||| /\n"); 549 seq_puts(s, "# ||||||| ACTIVATION\n"); 550 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); 551 seq_puts(s, " CONTEXT LATENCY\n"); 552 seq_puts(s, "# | | | ||||||| | | "); 553 seq_puts(s, " | |\n"); 554 } 555 #else /* CONFIG_PREEMPT_RT */ 556 static void print_timerlat_headers(struct seq_file *s) 557 { 558 seq_puts(s, "# _-----=> irqs-off\n"); 559 seq_puts(s, "# / _----=> need-resched\n"); 560 seq_puts(s, "# | / _---=> hardirq/softirq\n"); 561 seq_puts(s, "# || / _--=> preempt-depth\n"); 562 seq_puts(s, "# ||| / _-=> migrate-disable\n"); 563 seq_puts(s, "# |||| / delay\n"); 564 seq_puts(s, "# ||||| ACTIVATION\n"); 565 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); 566 seq_puts(s, " CONTEXT LATENCY\n"); 567 seq_puts(s, "# | | | ||||| | | "); 568 seq_puts(s, " | |\n"); 569 } 570 #endif /* CONFIG_PREEMPT_RT */ 571 572 static void 573 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 574 { 575 struct trace_event_call *call = &event_osnoise; 576 struct ring_buffer_event *event; 577 struct timerlat_entry *entry; 578 579 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), 580 tracing_gen_ctx()); 581 if (!event) 582 return; 583 entry = ring_buffer_event_data(event); 584 entry->seqnum = sample->seqnum; 585 entry->context = sample->context; 586 entry->timer_latency = sample->timer_latency; 587 588 if (!call_filter_check_discard(call, entry, buffer, event)) 589 trace_buffer_unlock_commit_nostack(buffer, event); 590 } 591 592 /* 593 * Record an timerlat_sample into the tracer buffer. 594 */ 595 static void trace_timerlat_sample(struct timerlat_sample *sample) 596 { 597 struct osnoise_instance *inst; 598 struct trace_buffer *buffer; 599 600 rcu_read_lock(); 601 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 602 buffer = inst->tr->array_buffer.buffer; 603 __trace_timerlat_sample(sample, buffer); 604 } 605 rcu_read_unlock(); 606 } 607 608 #ifdef CONFIG_STACKTRACE 609 610 #define MAX_CALLS 256 611 612 /* 613 * Stack trace will take place only at IRQ level, so, no need 614 * to control nesting here. 615 */ 616 struct trace_stack { 617 int stack_size; 618 int nr_entries; 619 unsigned long calls[MAX_CALLS]; 620 }; 621 622 static DEFINE_PER_CPU(struct trace_stack, trace_stack); 623 624 /* 625 * timerlat_save_stack - save a stack trace without printing 626 * 627 * Save the current stack trace without printing. The 628 * stack will be printed later, after the end of the measurement. 629 */ 630 static void timerlat_save_stack(int skip) 631 { 632 unsigned int size, nr_entries; 633 struct trace_stack *fstack; 634 635 fstack = this_cpu_ptr(&trace_stack); 636 637 size = ARRAY_SIZE(fstack->calls); 638 639 nr_entries = stack_trace_save(fstack->calls, size, skip); 640 641 fstack->stack_size = nr_entries * sizeof(unsigned long); 642 fstack->nr_entries = nr_entries; 643 644 return; 645 646 } 647 648 static void 649 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 650 { 651 struct trace_event_call *call = &event_osnoise; 652 struct ring_buffer_event *event; 653 struct stack_entry *entry; 654 655 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 656 tracing_gen_ctx()); 657 if (!event) 658 return; 659 660 entry = ring_buffer_event_data(event); 661 662 memcpy(&entry->caller, fstack->calls, size); 663 entry->size = fstack->nr_entries; 664 665 if (!call_filter_check_discard(call, entry, buffer, event)) 666 trace_buffer_unlock_commit_nostack(buffer, event); 667 } 668 669 /* 670 * timerlat_dump_stack - dump a stack trace previously saved 671 */ 672 static void timerlat_dump_stack(u64 latency) 673 { 674 struct osnoise_instance *inst; 675 struct trace_buffer *buffer; 676 struct trace_stack *fstack; 677 unsigned int size; 678 679 /* 680 * trace only if latency > print_stack config, if enabled. 681 */ 682 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 683 return; 684 685 preempt_disable_notrace(); 686 fstack = this_cpu_ptr(&trace_stack); 687 size = fstack->stack_size; 688 689 rcu_read_lock(); 690 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 691 buffer = inst->tr->array_buffer.buffer; 692 __timerlat_dump_stack(buffer, fstack, size); 693 694 } 695 rcu_read_unlock(); 696 preempt_enable_notrace(); 697 } 698 #else /* CONFIG_STACKTRACE */ 699 #define timerlat_dump_stack(u64 latency) do {} while (0) 700 #define timerlat_save_stack(a) do {} while (0) 701 #endif /* CONFIG_STACKTRACE */ 702 #endif /* CONFIG_TIMERLAT_TRACER */ 703 704 /* 705 * Macros to encapsulate the time capturing infrastructure. 706 */ 707 #define time_get() trace_clock_local() 708 #define time_to_us(x) div_u64(x, 1000) 709 #define time_sub(a, b) ((a) - (b)) 710 711 /* 712 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ 713 * 714 * If an IRQ is preempted by an NMI, its delta_start is pushed forward 715 * to discount the NMI interference. 716 * 717 * See get_int_safe_duration(). 718 */ 719 static inline void 720 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) 721 { 722 if (osn_var->irq.delta_start) 723 osn_var->irq.delta_start += duration; 724 } 725 726 #ifndef CONFIG_PREEMPT_RT 727 /* 728 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. 729 * 730 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed 731 * forward to discount the interference. 732 * 733 * See get_int_safe_duration(). 734 */ 735 static inline void 736 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) 737 { 738 if (osn_var->softirq.delta_start) 739 osn_var->softirq.delta_start += duration; 740 } 741 #else /* CONFIG_PREEMPT_RT */ 742 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) 743 #endif 744 745 /* 746 * cond_move_thread_delta_start - Forward the delta_start of a running thread 747 * 748 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start 749 * is pushed forward to discount the interference. 750 * 751 * See get_int_safe_duration(). 752 */ 753 static inline void 754 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) 755 { 756 if (osn_var->thread.delta_start) 757 osn_var->thread.delta_start += duration; 758 } 759 760 /* 761 * get_int_safe_duration - Get the duration of a window 762 * 763 * The irq, softirq and thread varaibles need to have its duration without 764 * the interference from higher priority interrupts. Instead of keeping a 765 * variable to discount the interrupt interference from these variables, the 766 * starting time of these variables are pushed forward with the interrupt's 767 * duration. In this way, a single variable is used to: 768 * 769 * - Know if a given window is being measured. 770 * - Account its duration. 771 * - Discount the interference. 772 * 773 * To avoid getting inconsistent values, e.g.,: 774 * 775 * now = time_get() 776 * ---> interrupt! 777 * delta_start -= int duration; 778 * <--- 779 * duration = now - delta_start; 780 * 781 * result: negative duration if the variable duration before the 782 * interrupt was smaller than the interrupt execution. 783 * 784 * A counter of interrupts is used. If the counter increased, try 785 * to capture an interference safe duration. 786 */ 787 static inline s64 788 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) 789 { 790 u64 int_counter, now; 791 s64 duration; 792 793 do { 794 int_counter = local_read(&osn_var->int_counter); 795 /* synchronize with interrupts */ 796 barrier(); 797 798 now = time_get(); 799 duration = (now - *delta_start); 800 801 /* synchronize with interrupts */ 802 barrier(); 803 } while (int_counter != local_read(&osn_var->int_counter)); 804 805 /* 806 * This is an evidence of race conditions that cause 807 * a value to be "discounted" too much. 808 */ 809 if (duration < 0) 810 osnoise_taint("Negative duration!\n"); 811 812 *delta_start = 0; 813 814 return duration; 815 } 816 817 /* 818 * 819 * set_int_safe_time - Save the current time on *time, aware of interference 820 * 821 * Get the time, taking into consideration a possible interference from 822 * higher priority interrupts. 823 * 824 * See get_int_safe_duration() for an explanation. 825 */ 826 static u64 827 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) 828 { 829 u64 int_counter; 830 831 do { 832 int_counter = local_read(&osn_var->int_counter); 833 /* synchronize with interrupts */ 834 barrier(); 835 836 *time = time_get(); 837 838 /* synchronize with interrupts */ 839 barrier(); 840 } while (int_counter != local_read(&osn_var->int_counter)); 841 842 return int_counter; 843 } 844 845 #ifdef CONFIG_TIMERLAT_TRACER 846 /* 847 * copy_int_safe_time - Copy *src into *desc aware of interference 848 */ 849 static u64 850 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) 851 { 852 u64 int_counter; 853 854 do { 855 int_counter = local_read(&osn_var->int_counter); 856 /* synchronize with interrupts */ 857 barrier(); 858 859 *dst = *src; 860 861 /* synchronize with interrupts */ 862 barrier(); 863 } while (int_counter != local_read(&osn_var->int_counter)); 864 865 return int_counter; 866 } 867 #endif /* CONFIG_TIMERLAT_TRACER */ 868 869 /* 870 * trace_osnoise_callback - NMI entry/exit callback 871 * 872 * This function is called at the entry and exit NMI code. The bool enter 873 * distinguishes between either case. This function is used to note a NMI 874 * occurrence, compute the noise caused by the NMI, and to remove the noise 875 * it is potentially causing on other interference variables. 876 */ 877 void trace_osnoise_callback(bool enter) 878 { 879 struct osnoise_variables *osn_var = this_cpu_osn_var(); 880 u64 duration; 881 882 if (!osn_var->sampling) 883 return; 884 885 /* 886 * Currently trace_clock_local() calls sched_clock() and the 887 * generic version is not NMI safe. 888 */ 889 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { 890 if (enter) { 891 osn_var->nmi.delta_start = time_get(); 892 local_inc(&osn_var->int_counter); 893 } else { 894 duration = time_get() - osn_var->nmi.delta_start; 895 896 trace_nmi_noise(osn_var->nmi.delta_start, duration); 897 898 cond_move_irq_delta_start(osn_var, duration); 899 cond_move_softirq_delta_start(osn_var, duration); 900 cond_move_thread_delta_start(osn_var, duration); 901 } 902 } 903 904 if (enter) 905 osn_var->nmi.count++; 906 } 907 908 /* 909 * osnoise_trace_irq_entry - Note the starting of an IRQ 910 * 911 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, 912 * it is safe to use a single variable (ons_var->irq) to save the statistics. 913 * The arrival_time is used to report... the arrival time. The delta_start 914 * is used to compute the duration at the IRQ exit handler. See 915 * cond_move_irq_delta_start(). 916 */ 917 void osnoise_trace_irq_entry(int id) 918 { 919 struct osnoise_variables *osn_var = this_cpu_osn_var(); 920 921 if (!osn_var->sampling) 922 return; 923 /* 924 * This value will be used in the report, but not to compute 925 * the execution time, so it is safe to get it unsafe. 926 */ 927 osn_var->irq.arrival_time = time_get(); 928 set_int_safe_time(osn_var, &osn_var->irq.delta_start); 929 osn_var->irq.count++; 930 931 local_inc(&osn_var->int_counter); 932 } 933 934 /* 935 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace 936 * 937 * Computes the duration of the IRQ noise, and trace it. Also discounts the 938 * interference from other sources of noise could be currently being accounted. 939 */ 940 void osnoise_trace_irq_exit(int id, const char *desc) 941 { 942 struct osnoise_variables *osn_var = this_cpu_osn_var(); 943 s64 duration; 944 945 if (!osn_var->sampling) 946 return; 947 948 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); 949 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); 950 osn_var->irq.arrival_time = 0; 951 cond_move_softirq_delta_start(osn_var, duration); 952 cond_move_thread_delta_start(osn_var, duration); 953 } 954 955 /* 956 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent 957 * 958 * Used to note the starting of an IRQ occurece. 959 */ 960 static void trace_irqentry_callback(void *data, int irq, 961 struct irqaction *action) 962 { 963 osnoise_trace_irq_entry(irq); 964 } 965 966 /* 967 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent 968 * 969 * Used to note the end of an IRQ occurece. 970 */ 971 static void trace_irqexit_callback(void *data, int irq, 972 struct irqaction *action, int ret) 973 { 974 osnoise_trace_irq_exit(irq, action->name); 975 } 976 977 /* 978 * arch specific register function. 979 */ 980 int __weak osnoise_arch_register(void) 981 { 982 return 0; 983 } 984 985 /* 986 * arch specific unregister function. 987 */ 988 void __weak osnoise_arch_unregister(void) 989 { 990 return; 991 } 992 993 /* 994 * hook_irq_events - Hook IRQ handling events 995 * 996 * This function hooks the IRQ related callbacks to the respective trace 997 * events. 998 */ 999 static int hook_irq_events(void) 1000 { 1001 int ret; 1002 1003 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1004 if (ret) 1005 goto out_err; 1006 1007 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1008 if (ret) 1009 goto out_unregister_entry; 1010 1011 ret = osnoise_arch_register(); 1012 if (ret) 1013 goto out_irq_exit; 1014 1015 return 0; 1016 1017 out_irq_exit: 1018 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1019 out_unregister_entry: 1020 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1021 out_err: 1022 return -EINVAL; 1023 } 1024 1025 /* 1026 * unhook_irq_events - Unhook IRQ handling events 1027 * 1028 * This function unhooks the IRQ related callbacks to the respective trace 1029 * events. 1030 */ 1031 static void unhook_irq_events(void) 1032 { 1033 osnoise_arch_unregister(); 1034 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); 1035 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); 1036 } 1037 1038 #ifndef CONFIG_PREEMPT_RT 1039 /* 1040 * trace_softirq_entry_callback - Note the starting of a softirq 1041 * 1042 * Save the starting time of a softirq. As softirqs are non-preemptive to 1043 * other softirqs, it is safe to use a single variable (ons_var->softirq) 1044 * to save the statistics. The arrival_time is used to report... the 1045 * arrival time. The delta_start is used to compute the duration at the 1046 * softirq exit handler. See cond_move_softirq_delta_start(). 1047 */ 1048 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) 1049 { 1050 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1051 1052 if (!osn_var->sampling) 1053 return; 1054 /* 1055 * This value will be used in the report, but not to compute 1056 * the execution time, so it is safe to get it unsafe. 1057 */ 1058 osn_var->softirq.arrival_time = time_get(); 1059 set_int_safe_time(osn_var, &osn_var->softirq.delta_start); 1060 osn_var->softirq.count++; 1061 1062 local_inc(&osn_var->int_counter); 1063 } 1064 1065 /* 1066 * trace_softirq_exit_callback - Note the end of an softirq 1067 * 1068 * Computes the duration of the softirq noise, and trace it. Also discounts the 1069 * interference from other sources of noise could be currently being accounted. 1070 */ 1071 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) 1072 { 1073 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1074 s64 duration; 1075 1076 if (!osn_var->sampling) 1077 return; 1078 1079 if (unlikely(timerlat_enabled())) 1080 if (!timerlat_softirq_exit(osn_var)) 1081 return; 1082 1083 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1084 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); 1085 cond_move_thread_delta_start(osn_var, duration); 1086 osn_var->softirq.arrival_time = 0; 1087 } 1088 1089 /* 1090 * hook_softirq_events - Hook softirq handling events 1091 * 1092 * This function hooks the softirq related callbacks to the respective trace 1093 * events. 1094 */ 1095 static int hook_softirq_events(void) 1096 { 1097 int ret; 1098 1099 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1100 if (ret) 1101 goto out_err; 1102 1103 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1104 if (ret) 1105 goto out_unreg_entry; 1106 1107 return 0; 1108 1109 out_unreg_entry: 1110 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1111 out_err: 1112 return -EINVAL; 1113 } 1114 1115 /* 1116 * unhook_softirq_events - Unhook softirq handling events 1117 * 1118 * This function hooks the softirq related callbacks to the respective trace 1119 * events. 1120 */ 1121 static void unhook_softirq_events(void) 1122 { 1123 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); 1124 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); 1125 } 1126 #else /* CONFIG_PREEMPT_RT */ 1127 /* 1128 * softirq are threads on the PREEMPT_RT mode. 1129 */ 1130 static int hook_softirq_events(void) 1131 { 1132 return 0; 1133 } 1134 static void unhook_softirq_events(void) 1135 { 1136 } 1137 #endif 1138 1139 /* 1140 * thread_entry - Record the starting of a thread noise window 1141 * 1142 * It saves the context switch time for a noisy thread, and increments 1143 * the interference counters. 1144 */ 1145 static void 1146 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) 1147 { 1148 if (!osn_var->sampling) 1149 return; 1150 /* 1151 * The arrival time will be used in the report, but not to compute 1152 * the execution time, so it is safe to get it unsafe. 1153 */ 1154 osn_var->thread.arrival_time = time_get(); 1155 1156 set_int_safe_time(osn_var, &osn_var->thread.delta_start); 1157 1158 osn_var->thread.count++; 1159 local_inc(&osn_var->int_counter); 1160 } 1161 1162 /* 1163 * thread_exit - Report the end of a thread noise window 1164 * 1165 * It computes the total noise from a thread, tracing if needed. 1166 */ 1167 static void 1168 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) 1169 { 1170 s64 duration; 1171 1172 if (!osn_var->sampling) 1173 return; 1174 1175 if (unlikely(timerlat_enabled())) 1176 if (!timerlat_thread_exit(osn_var)) 1177 return; 1178 1179 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1180 1181 trace_thread_noise(t, osn_var->thread.arrival_time, duration); 1182 1183 osn_var->thread.arrival_time = 0; 1184 } 1185 1186 #ifdef CONFIG_TIMERLAT_TRACER 1187 /* 1188 * osnoise_stop_exception - Stop tracing and the tracer. 1189 */ 1190 static __always_inline void osnoise_stop_exception(char *msg, int cpu) 1191 { 1192 struct osnoise_instance *inst; 1193 struct trace_array *tr; 1194 1195 rcu_read_lock(); 1196 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1197 tr = inst->tr; 1198 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1199 "stop tracing hit on cpu %d due to exception: %s\n", 1200 smp_processor_id(), 1201 msg); 1202 1203 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1204 panic("tracer hit on cpu %d due to exception: %s\n", 1205 smp_processor_id(), 1206 msg); 1207 1208 tracer_tracing_off(tr); 1209 } 1210 rcu_read_unlock(); 1211 } 1212 1213 /* 1214 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler 1215 * 1216 * his function is hooked to the sched:sched_migrate_task trace event, and monitors 1217 * timerlat user-space thread migration. 1218 */ 1219 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu) 1220 { 1221 struct osnoise_variables *osn_var; 1222 long cpu = task_cpu(p); 1223 1224 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 1225 if (osn_var->pid == p->pid && dest_cpu != cpu) { 1226 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 1227 osnoise_taint("timerlat user-thread migrated\n"); 1228 osnoise_stop_exception("timerlat user-thread migrated", cpu); 1229 } 1230 } 1231 1232 static int register_migration_monitor(void) 1233 { 1234 int ret = 0; 1235 1236 /* 1237 * Timerlat thread migration check is only required when running timerlat in user-space. 1238 * Thus, enable callback only if timerlat is set with no workload. 1239 */ 1240 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) 1241 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1242 1243 return ret; 1244 } 1245 1246 static void unregister_migration_monitor(void) 1247 { 1248 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) 1249 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); 1250 } 1251 #else 1252 static int register_migration_monitor(void) 1253 { 1254 return 0; 1255 } 1256 static void unregister_migration_monitor(void) {} 1257 #endif 1258 /* 1259 * trace_sched_switch - sched:sched_switch trace event handler 1260 * 1261 * This function is hooked to the sched:sched_switch trace event, and it is 1262 * used to record the beginning and to report the end of a thread noise window. 1263 */ 1264 static void 1265 trace_sched_switch_callback(void *data, bool preempt, 1266 struct task_struct *p, 1267 struct task_struct *n, 1268 unsigned int prev_state) 1269 { 1270 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1271 int workload = test_bit(OSN_WORKLOAD, &osnoise_options); 1272 1273 if ((p->pid != osn_var->pid) || !workload) 1274 thread_exit(osn_var, p); 1275 1276 if ((n->pid != osn_var->pid) || !workload) 1277 thread_entry(osn_var, n); 1278 } 1279 1280 /* 1281 * hook_thread_events - Hook the instrumentation for thread noise 1282 * 1283 * Hook the osnoise tracer callbacks to handle the noise from other 1284 * threads on the necessary kernel events. 1285 */ 1286 static int hook_thread_events(void) 1287 { 1288 int ret; 1289 1290 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); 1291 if (ret) 1292 return -EINVAL; 1293 1294 ret = register_migration_monitor(); 1295 if (ret) 1296 goto out_unreg; 1297 1298 return 0; 1299 1300 out_unreg: 1301 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1302 return -EINVAL; 1303 } 1304 1305 /* 1306 * unhook_thread_events - unhook the instrumentation for thread noise 1307 * 1308 * Unook the osnoise tracer callbacks to handle the noise from other 1309 * threads on the necessary kernel events. 1310 */ 1311 static void unhook_thread_events(void) 1312 { 1313 unregister_trace_sched_switch(trace_sched_switch_callback, NULL); 1314 unregister_migration_monitor(); 1315 } 1316 1317 /* 1318 * save_osn_sample_stats - Save the osnoise_sample statistics 1319 * 1320 * Save the osnoise_sample statistics before the sampling phase. These 1321 * values will be used later to compute the diff betwneen the statistics 1322 * before and after the osnoise sampling. 1323 */ 1324 static void 1325 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1326 { 1327 s->nmi_count = osn_var->nmi.count; 1328 s->irq_count = osn_var->irq.count; 1329 s->softirq_count = osn_var->softirq.count; 1330 s->thread_count = osn_var->thread.count; 1331 } 1332 1333 /* 1334 * diff_osn_sample_stats - Compute the osnoise_sample statistics 1335 * 1336 * After a sample period, compute the difference on the osnoise_sample 1337 * statistics. The struct osnoise_sample *s contains the statistics saved via 1338 * save_osn_sample_stats() before the osnoise sampling. 1339 */ 1340 static void 1341 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) 1342 { 1343 s->nmi_count = osn_var->nmi.count - s->nmi_count; 1344 s->irq_count = osn_var->irq.count - s->irq_count; 1345 s->softirq_count = osn_var->softirq.count - s->softirq_count; 1346 s->thread_count = osn_var->thread.count - s->thread_count; 1347 } 1348 1349 /* 1350 * osnoise_stop_tracing - Stop tracing and the tracer. 1351 */ 1352 static __always_inline void osnoise_stop_tracing(void) 1353 { 1354 struct osnoise_instance *inst; 1355 struct trace_array *tr; 1356 1357 rcu_read_lock(); 1358 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1359 tr = inst->tr; 1360 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1361 "stop tracing hit on cpu %d\n", smp_processor_id()); 1362 1363 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) 1364 panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); 1365 1366 tracer_tracing_off(tr); 1367 } 1368 rcu_read_unlock(); 1369 } 1370 1371 /* 1372 * osnoise_has_tracing_on - Check if there is at least one instance on 1373 */ 1374 static __always_inline int osnoise_has_tracing_on(void) 1375 { 1376 struct osnoise_instance *inst; 1377 int trace_is_on = 0; 1378 1379 rcu_read_lock(); 1380 list_for_each_entry_rcu(inst, &osnoise_instances, list) 1381 trace_is_on += tracer_tracing_is_on(inst->tr); 1382 rcu_read_unlock(); 1383 1384 return trace_is_on; 1385 } 1386 1387 /* 1388 * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1389 */ 1390 static void notify_new_max_latency(u64 latency) 1391 { 1392 struct osnoise_instance *inst; 1393 struct trace_array *tr; 1394 1395 rcu_read_lock(); 1396 list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1397 tr = inst->tr; 1398 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { 1399 tr->max_latency = latency; 1400 latency_fsnotify(tr); 1401 } 1402 } 1403 rcu_read_unlock(); 1404 } 1405 1406 /* 1407 * run_osnoise - Sample the time and look for osnoise 1408 * 1409 * Used to capture the time, looking for potential osnoise latency repeatedly. 1410 * Different from hwlat_detector, it is called with preemption and interrupts 1411 * enabled. This allows irqs, softirqs and threads to run, interfering on the 1412 * osnoise sampling thread, as they would do with a regular thread. 1413 */ 1414 static int run_osnoise(void) 1415 { 1416 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); 1417 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1418 u64 start, sample, last_sample; 1419 u64 last_int_count, int_count; 1420 s64 noise = 0, max_noise = 0; 1421 s64 total, last_total = 0; 1422 struct osnoise_sample s; 1423 bool disable_preemption; 1424 unsigned int threshold; 1425 u64 runtime, stop_in; 1426 u64 sum_noise = 0; 1427 int hw_count = 0; 1428 int ret = -1; 1429 1430 /* 1431 * Disabling preemption is only required if IRQs are enabled, 1432 * and the options is set on. 1433 */ 1434 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); 1435 1436 /* 1437 * Considers the current thread as the workload. 1438 */ 1439 osn_var->pid = current->pid; 1440 1441 /* 1442 * Save the current stats for the diff 1443 */ 1444 save_osn_sample_stats(osn_var, &s); 1445 1446 /* 1447 * if threshold is 0, use the default value of 5 us. 1448 */ 1449 threshold = tracing_thresh ? : 5000; 1450 1451 /* 1452 * Apply PREEMPT and IRQ disabled options. 1453 */ 1454 if (disable_irq) 1455 local_irq_disable(); 1456 1457 if (disable_preemption) 1458 preempt_disable(); 1459 1460 /* 1461 * Make sure NMIs see sampling first 1462 */ 1463 osn_var->sampling = true; 1464 barrier(); 1465 1466 /* 1467 * Transform the *_us config to nanoseconds to avoid the 1468 * division on the main loop. 1469 */ 1470 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; 1471 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; 1472 1473 /* 1474 * Start timestemp 1475 */ 1476 start = time_get(); 1477 1478 /* 1479 * "previous" loop. 1480 */ 1481 last_int_count = set_int_safe_time(osn_var, &last_sample); 1482 1483 do { 1484 /* 1485 * Get sample! 1486 */ 1487 int_count = set_int_safe_time(osn_var, &sample); 1488 1489 noise = time_sub(sample, last_sample); 1490 1491 /* 1492 * This shouldn't happen. 1493 */ 1494 if (noise < 0) { 1495 osnoise_taint("negative noise!"); 1496 goto out; 1497 } 1498 1499 /* 1500 * Sample runtime. 1501 */ 1502 total = time_sub(sample, start); 1503 1504 /* 1505 * Check for possible overflows. 1506 */ 1507 if (total < last_total) { 1508 osnoise_taint("total overflow!"); 1509 break; 1510 } 1511 1512 last_total = total; 1513 1514 if (noise >= threshold) { 1515 int interference = int_count - last_int_count; 1516 1517 if (noise > max_noise) 1518 max_noise = noise; 1519 1520 if (!interference) 1521 hw_count++; 1522 1523 sum_noise += noise; 1524 1525 trace_sample_threshold(last_sample, noise, interference); 1526 1527 if (osnoise_data.stop_tracing) 1528 if (noise > stop_in) 1529 osnoise_stop_tracing(); 1530 } 1531 1532 /* 1533 * In some cases, notably when running on a nohz_full CPU with 1534 * a stopped tick PREEMPT_RCU has no way to account for QSs. 1535 * This will eventually cause unwarranted noise as PREEMPT_RCU 1536 * will force preemption as the means of ending the current 1537 * grace period. We avoid this problem by calling 1538 * rcu_momentary_dyntick_idle(), which performs a zero duration 1539 * EQS allowing PREEMPT_RCU to end the current grace period. 1540 * This call shouldn't be wrapped inside an RCU critical 1541 * section. 1542 * 1543 * Note that in non PREEMPT_RCU kernels QSs are handled through 1544 * cond_resched() 1545 */ 1546 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { 1547 if (!disable_irq) 1548 local_irq_disable(); 1549 1550 rcu_momentary_dyntick_idle(); 1551 1552 if (!disable_irq) 1553 local_irq_enable(); 1554 } 1555 1556 /* 1557 * For the non-preemptive kernel config: let threads runs, if 1558 * they so wish, unless set not do to so. 1559 */ 1560 if (!disable_irq && !disable_preemption) 1561 cond_resched(); 1562 1563 last_sample = sample; 1564 last_int_count = int_count; 1565 1566 } while (total < runtime && !kthread_should_stop()); 1567 1568 /* 1569 * Finish the above in the view for interrupts. 1570 */ 1571 barrier(); 1572 1573 osn_var->sampling = false; 1574 1575 /* 1576 * Make sure sampling data is no longer updated. 1577 */ 1578 barrier(); 1579 1580 /* 1581 * Return to the preemptive state. 1582 */ 1583 if (disable_preemption) 1584 preempt_enable(); 1585 1586 if (disable_irq) 1587 local_irq_enable(); 1588 1589 /* 1590 * Save noise info. 1591 */ 1592 s.noise = time_to_us(sum_noise); 1593 s.runtime = time_to_us(total); 1594 s.max_sample = time_to_us(max_noise); 1595 s.hw_count = hw_count; 1596 1597 /* Save interference stats info */ 1598 diff_osn_sample_stats(osn_var, &s); 1599 1600 trace_osnoise_sample(&s); 1601 1602 notify_new_max_latency(max_noise); 1603 1604 if (osnoise_data.stop_tracing_total) 1605 if (s.noise > osnoise_data.stop_tracing_total) 1606 osnoise_stop_tracing(); 1607 1608 return 0; 1609 out: 1610 return ret; 1611 } 1612 1613 static struct cpumask osnoise_cpumask; 1614 static struct cpumask save_cpumask; 1615 1616 /* 1617 * osnoise_sleep - sleep until the next period 1618 */ 1619 static void osnoise_sleep(bool skip_period) 1620 { 1621 u64 interval; 1622 ktime_t wake_time; 1623 1624 mutex_lock(&interface_lock); 1625 if (skip_period) 1626 interval = osnoise_data.sample_period; 1627 else 1628 interval = osnoise_data.sample_period - osnoise_data.sample_runtime; 1629 mutex_unlock(&interface_lock); 1630 1631 /* 1632 * differently from hwlat_detector, the osnoise tracer can run 1633 * without a pause because preemption is on. 1634 */ 1635 if (!interval) { 1636 /* Let synchronize_rcu_tasks() make progress */ 1637 cond_resched_tasks_rcu_qs(); 1638 return; 1639 } 1640 1641 wake_time = ktime_add_us(ktime_get(), interval); 1642 __set_current_state(TASK_INTERRUPTIBLE); 1643 1644 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { 1645 if (kthread_should_stop()) 1646 break; 1647 } 1648 } 1649 1650 /* 1651 * osnoise_migration_pending - checks if the task needs to migrate 1652 * 1653 * osnoise/timerlat threads are per-cpu. If there is a pending request to 1654 * migrate the thread away from the current CPU, something bad has happened. 1655 * Play the good citizen and leave. 1656 * 1657 * Returns 0 if it is safe to continue, 1 otherwise. 1658 */ 1659 static inline int osnoise_migration_pending(void) 1660 { 1661 if (!current->migration_pending) 1662 return 0; 1663 1664 /* 1665 * If migration is pending, there is a task waiting for the 1666 * tracer to enable migration. The tracer does not allow migration, 1667 * thus: taint and leave to unblock the blocked thread. 1668 */ 1669 osnoise_taint("migration requested to osnoise threads, leaving."); 1670 1671 /* 1672 * Unset this thread from the threads managed by the interface. 1673 * The tracers are responsible for cleaning their env before 1674 * exiting. 1675 */ 1676 mutex_lock(&interface_lock); 1677 this_cpu_osn_var()->kthread = NULL; 1678 mutex_unlock(&interface_lock); 1679 1680 return 1; 1681 } 1682 1683 /* 1684 * osnoise_main - The osnoise detection kernel thread 1685 * 1686 * Calls run_osnoise() function to measure the osnoise for the configured runtime, 1687 * every period. 1688 */ 1689 static int osnoise_main(void *data) 1690 { 1691 unsigned long flags; 1692 1693 /* 1694 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1695 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1696 * 1697 * To work around this limitation, disable migration and remove the 1698 * flag. 1699 */ 1700 migrate_disable(); 1701 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1702 current->flags &= ~(PF_NO_SETAFFINITY); 1703 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1704 1705 while (!kthread_should_stop()) { 1706 if (osnoise_migration_pending()) 1707 break; 1708 1709 /* skip a period if tracing is off on all instances */ 1710 if (!osnoise_has_tracing_on()) { 1711 osnoise_sleep(true); 1712 continue; 1713 } 1714 1715 run_osnoise(); 1716 osnoise_sleep(false); 1717 } 1718 1719 migrate_enable(); 1720 return 0; 1721 } 1722 1723 #ifdef CONFIG_TIMERLAT_TRACER 1724 /* 1725 * timerlat_irq - hrtimer handler for timerlat. 1726 */ 1727 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1728 { 1729 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1730 struct timerlat_variables *tlat; 1731 struct timerlat_sample s; 1732 u64 now; 1733 u64 diff; 1734 1735 /* 1736 * I am not sure if the timer was armed for this CPU. So, get 1737 * the timerlat struct from the timer itself, not from this 1738 * CPU. 1739 */ 1740 tlat = container_of(timer, struct timerlat_variables, timer); 1741 1742 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1743 1744 /* 1745 * Enable the osnoise: events for thread an softirq. 1746 */ 1747 tlat->tracing_thread = true; 1748 1749 osn_var->thread.arrival_time = time_get(); 1750 1751 /* 1752 * A hardirq is running: the timer IRQ. It is for sure preempting 1753 * a thread, and potentially preempting a softirq. 1754 * 1755 * At this point, it is not interesting to know the duration of the 1756 * preempted thread (and maybe softirq), but how much time they will 1757 * delay the beginning of the execution of the timer thread. 1758 * 1759 * To get the correct (net) delay added by the softirq, its delta_start 1760 * is set as the IRQ one. In this way, at the return of the IRQ, the delta 1761 * start of the sofitrq will be zeroed, accounting then only the time 1762 * after that. 1763 * 1764 * The thread follows the same principle. However, if a softirq is 1765 * running, the thread needs to receive the softirq delta_start. The 1766 * reason being is that the softirq will be the last to be unfolded, 1767 * resseting the thread delay to zero. 1768 * 1769 * The PREEMPT_RT is a special case, though. As softirqs run as threads 1770 * on RT, moving the thread is enough. 1771 */ 1772 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1773 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1774 &osn_var->softirq.delta_start); 1775 1776 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, 1777 &osn_var->irq.delta_start); 1778 } else { 1779 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1780 &osn_var->irq.delta_start); 1781 } 1782 1783 /* 1784 * Compute the current time with the expected time. 1785 */ 1786 diff = now - tlat->abs_period; 1787 1788 tlat->count++; 1789 s.seqnum = tlat->count; 1790 s.timer_latency = diff; 1791 s.context = IRQ_CONTEXT; 1792 1793 trace_timerlat_sample(&s); 1794 1795 if (osnoise_data.stop_tracing) { 1796 if (time_to_us(diff) >= osnoise_data.stop_tracing) { 1797 1798 /* 1799 * At this point, if stop_tracing is set and <= print_stack, 1800 * print_stack is set and would be printed in the thread handler. 1801 * 1802 * Thus, print the stack trace as it is helpful to define the 1803 * root cause of an IRQ latency. 1804 */ 1805 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { 1806 timerlat_save_stack(0); 1807 timerlat_dump_stack(time_to_us(diff)); 1808 } 1809 1810 osnoise_stop_tracing(); 1811 notify_new_max_latency(diff); 1812 1813 wake_up_process(tlat->kthread); 1814 1815 return HRTIMER_NORESTART; 1816 } 1817 } 1818 1819 wake_up_process(tlat->kthread); 1820 1821 if (osnoise_data.print_stack) 1822 timerlat_save_stack(0); 1823 1824 return HRTIMER_NORESTART; 1825 } 1826 1827 /* 1828 * wait_next_period - Wait for the next period for timerlat 1829 */ 1830 static int wait_next_period(struct timerlat_variables *tlat) 1831 { 1832 ktime_t next_abs_period, now; 1833 u64 rel_period = osnoise_data.timerlat_period * 1000; 1834 1835 now = hrtimer_cb_get_time(&tlat->timer); 1836 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1837 1838 /* 1839 * Save the next abs_period. 1840 */ 1841 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1842 1843 /* 1844 * If the new abs_period is in the past, skip the activation. 1845 */ 1846 while (ktime_compare(now, next_abs_period) > 0) { 1847 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); 1848 tlat->abs_period = (u64) ktime_to_ns(next_abs_period); 1849 } 1850 1851 set_current_state(TASK_INTERRUPTIBLE); 1852 1853 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); 1854 schedule(); 1855 return 1; 1856 } 1857 1858 /* 1859 * timerlat_main- Timerlat main 1860 */ 1861 static int timerlat_main(void *data) 1862 { 1863 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1864 struct timerlat_variables *tlat = this_cpu_tmr_var(); 1865 struct timerlat_sample s; 1866 struct sched_param sp; 1867 unsigned long flags; 1868 u64 now, diff; 1869 1870 /* 1871 * Make the thread RT, that is how cyclictest is usually used. 1872 */ 1873 sp.sched_priority = DEFAULT_TIMERLAT_PRIO; 1874 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1875 1876 /* 1877 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. 1878 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. 1879 * 1880 * To work around this limitation, disable migration and remove the 1881 * flag. 1882 */ 1883 migrate_disable(); 1884 raw_spin_lock_irqsave(¤t->pi_lock, flags); 1885 current->flags &= ~(PF_NO_SETAFFINITY); 1886 raw_spin_unlock_irqrestore(¤t->pi_lock, flags); 1887 1888 tlat->count = 0; 1889 tlat->tracing_thread = false; 1890 1891 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 1892 tlat->timer.function = timerlat_irq; 1893 tlat->kthread = current; 1894 osn_var->pid = current->pid; 1895 /* 1896 * Anotate the arrival time. 1897 */ 1898 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 1899 1900 wait_next_period(tlat); 1901 1902 osn_var->sampling = 1; 1903 1904 while (!kthread_should_stop()) { 1905 1906 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 1907 diff = now - tlat->abs_period; 1908 1909 s.seqnum = tlat->count; 1910 s.timer_latency = diff; 1911 s.context = THREAD_CONTEXT; 1912 1913 trace_timerlat_sample(&s); 1914 1915 notify_new_max_latency(diff); 1916 1917 timerlat_dump_stack(time_to_us(diff)); 1918 1919 tlat->tracing_thread = false; 1920 if (osnoise_data.stop_tracing_total) 1921 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 1922 osnoise_stop_tracing(); 1923 1924 if (osnoise_migration_pending()) 1925 break; 1926 1927 wait_next_period(tlat); 1928 } 1929 1930 hrtimer_cancel(&tlat->timer); 1931 migrate_enable(); 1932 return 0; 1933 } 1934 #else /* CONFIG_TIMERLAT_TRACER */ 1935 static int timerlat_main(void *data) 1936 { 1937 return 0; 1938 } 1939 #endif /* CONFIG_TIMERLAT_TRACER */ 1940 1941 /* 1942 * stop_kthread - stop a workload thread 1943 */ 1944 static void stop_kthread(unsigned int cpu) 1945 { 1946 struct task_struct *kthread; 1947 1948 kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; 1949 if (kthread) { 1950 if (test_bit(OSN_WORKLOAD, &osnoise_options)) { 1951 kthread_stop(kthread); 1952 } else { 1953 /* 1954 * This is a user thread waiting on the timerlat_fd. We need 1955 * to close all users, and the best way to guarantee this is 1956 * by killing the thread. NOTE: this is a purpose specific file. 1957 */ 1958 kill_pid(kthread->thread_pid, SIGKILL, 1); 1959 put_task_struct(kthread); 1960 } 1961 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 1962 } else { 1963 /* if no workload, just return */ 1964 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 1965 /* 1966 * This is set in the osnoise tracer case. 1967 */ 1968 per_cpu(per_cpu_osnoise_var, cpu).sampling = false; 1969 barrier(); 1970 return; 1971 } 1972 } 1973 } 1974 1975 /* 1976 * stop_per_cpu_kthread - Stop per-cpu threads 1977 * 1978 * Stop the osnoise sampling htread. Use this on unload and at system 1979 * shutdown. 1980 */ 1981 static void stop_per_cpu_kthreads(void) 1982 { 1983 int cpu; 1984 1985 cpus_read_lock(); 1986 1987 for_each_online_cpu(cpu) 1988 stop_kthread(cpu); 1989 1990 cpus_read_unlock(); 1991 } 1992 1993 /* 1994 * start_kthread - Start a workload tread 1995 */ 1996 static int start_kthread(unsigned int cpu) 1997 { 1998 struct task_struct *kthread; 1999 void *main = osnoise_main; 2000 char comm[24]; 2001 2002 if (timerlat_enabled()) { 2003 snprintf(comm, 24, "timerlat/%d", cpu); 2004 main = timerlat_main; 2005 } else { 2006 /* if no workload, just return */ 2007 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2008 per_cpu(per_cpu_osnoise_var, cpu).sampling = true; 2009 barrier(); 2010 return 0; 2011 } 2012 snprintf(comm, 24, "osnoise/%d", cpu); 2013 } 2014 2015 kthread = kthread_run_on_cpu(main, NULL, cpu, comm); 2016 2017 if (IS_ERR(kthread)) { 2018 pr_err(BANNER "could not start sampling thread\n"); 2019 stop_per_cpu_kthreads(); 2020 return -ENOMEM; 2021 } 2022 2023 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; 2024 2025 return 0; 2026 } 2027 2028 /* 2029 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads 2030 * 2031 * This starts the kernel thread that will look for osnoise on many 2032 * cpus. 2033 */ 2034 static int start_per_cpu_kthreads(void) 2035 { 2036 struct cpumask *current_mask = &save_cpumask; 2037 int retval = 0; 2038 int cpu; 2039 2040 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { 2041 if (timerlat_enabled()) 2042 return 0; 2043 } 2044 2045 cpus_read_lock(); 2046 /* 2047 * Run only on online CPUs in which osnoise is allowed to run. 2048 */ 2049 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 2050 2051 for_each_possible_cpu(cpu) 2052 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; 2053 2054 for_each_cpu(cpu, current_mask) { 2055 retval = start_kthread(cpu); 2056 if (retval) { 2057 cpus_read_unlock(); 2058 stop_per_cpu_kthreads(); 2059 return retval; 2060 } 2061 } 2062 2063 cpus_read_unlock(); 2064 2065 return retval; 2066 } 2067 2068 #ifdef CONFIG_HOTPLUG_CPU 2069 static void osnoise_hotplug_workfn(struct work_struct *dummy) 2070 { 2071 unsigned int cpu = smp_processor_id(); 2072 2073 mutex_lock(&trace_types_lock); 2074 2075 if (!osnoise_has_registered_instances()) 2076 goto out_unlock_trace; 2077 2078 mutex_lock(&interface_lock); 2079 cpus_read_lock(); 2080 2081 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 2082 goto out_unlock; 2083 2084 start_kthread(cpu); 2085 2086 out_unlock: 2087 cpus_read_unlock(); 2088 mutex_unlock(&interface_lock); 2089 out_unlock_trace: 2090 mutex_unlock(&trace_types_lock); 2091 } 2092 2093 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); 2094 2095 /* 2096 * osnoise_cpu_init - CPU hotplug online callback function 2097 */ 2098 static int osnoise_cpu_init(unsigned int cpu) 2099 { 2100 schedule_work_on(cpu, &osnoise_hotplug_work); 2101 return 0; 2102 } 2103 2104 /* 2105 * osnoise_cpu_die - CPU hotplug offline callback function 2106 */ 2107 static int osnoise_cpu_die(unsigned int cpu) 2108 { 2109 stop_kthread(cpu); 2110 return 0; 2111 } 2112 2113 static void osnoise_init_hotplug_support(void) 2114 { 2115 int ret; 2116 2117 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", 2118 osnoise_cpu_init, osnoise_cpu_die); 2119 if (ret < 0) 2120 pr_warn(BANNER "Error to init cpu hotplug support\n"); 2121 2122 return; 2123 } 2124 #else /* CONFIG_HOTPLUG_CPU */ 2125 static void osnoise_init_hotplug_support(void) 2126 { 2127 return; 2128 } 2129 #endif /* CONFIG_HOTPLUG_CPU */ 2130 2131 /* 2132 * seq file functions for the osnoise/options file. 2133 */ 2134 static void *s_options_start(struct seq_file *s, loff_t *pos) 2135 { 2136 int option = *pos; 2137 2138 mutex_lock(&interface_lock); 2139 2140 if (option >= OSN_MAX) 2141 return NULL; 2142 2143 return pos; 2144 } 2145 2146 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) 2147 { 2148 int option = ++(*pos); 2149 2150 if (option >= OSN_MAX) 2151 return NULL; 2152 2153 return pos; 2154 } 2155 2156 static int s_options_show(struct seq_file *s, void *v) 2157 { 2158 loff_t *pos = v; 2159 int option = *pos; 2160 2161 if (option == OSN_DEFAULTS) { 2162 if (osnoise_options == OSN_DEFAULT_OPTIONS) 2163 seq_printf(s, "%s", osnoise_options_str[option]); 2164 else 2165 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2166 goto out; 2167 } 2168 2169 if (test_bit(option, &osnoise_options)) 2170 seq_printf(s, "%s", osnoise_options_str[option]); 2171 else 2172 seq_printf(s, "NO_%s", osnoise_options_str[option]); 2173 2174 out: 2175 if (option != OSN_MAX) 2176 seq_puts(s, " "); 2177 2178 return 0; 2179 } 2180 2181 static void s_options_stop(struct seq_file *s, void *v) 2182 { 2183 seq_puts(s, "\n"); 2184 mutex_unlock(&interface_lock); 2185 } 2186 2187 static const struct seq_operations osnoise_options_seq_ops = { 2188 .start = s_options_start, 2189 .next = s_options_next, 2190 .show = s_options_show, 2191 .stop = s_options_stop 2192 }; 2193 2194 static int osnoise_options_open(struct inode *inode, struct file *file) 2195 { 2196 return seq_open(file, &osnoise_options_seq_ops); 2197 }; 2198 2199 /** 2200 * osnoise_options_write - Write function for "options" entry 2201 * @filp: The active open file structure 2202 * @ubuf: The user buffer that contains the value to write 2203 * @cnt: The maximum number of bytes to write to "file" 2204 * @ppos: The current position in @file 2205 * 2206 * Writing the option name sets the option, writing the "NO_" 2207 * prefix in front of the option name disables it. 2208 * 2209 * Writing "DEFAULTS" resets the option values to the default ones. 2210 */ 2211 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, 2212 size_t cnt, loff_t *ppos) 2213 { 2214 int running, option, enable, retval; 2215 char buf[256], *option_str; 2216 2217 if (cnt >= 256) 2218 return -EINVAL; 2219 2220 if (copy_from_user(buf, ubuf, cnt)) 2221 return -EFAULT; 2222 2223 buf[cnt] = 0; 2224 2225 if (strncmp(buf, "NO_", 3)) { 2226 option_str = strstrip(buf); 2227 enable = true; 2228 } else { 2229 option_str = strstrip(&buf[3]); 2230 enable = false; 2231 } 2232 2233 option = match_string(osnoise_options_str, OSN_MAX, option_str); 2234 if (option < 0) 2235 return -EINVAL; 2236 2237 /* 2238 * trace_types_lock is taken to avoid concurrency on start/stop. 2239 */ 2240 mutex_lock(&trace_types_lock); 2241 running = osnoise_has_registered_instances(); 2242 if (running) 2243 stop_per_cpu_kthreads(); 2244 2245 mutex_lock(&interface_lock); 2246 /* 2247 * avoid CPU hotplug operations that might read options. 2248 */ 2249 cpus_read_lock(); 2250 2251 retval = cnt; 2252 2253 if (enable) { 2254 if (option == OSN_DEFAULTS) 2255 osnoise_options = OSN_DEFAULT_OPTIONS; 2256 else 2257 set_bit(option, &osnoise_options); 2258 } else { 2259 if (option == OSN_DEFAULTS) 2260 retval = -EINVAL; 2261 else 2262 clear_bit(option, &osnoise_options); 2263 } 2264 2265 cpus_read_unlock(); 2266 mutex_unlock(&interface_lock); 2267 2268 if (running) 2269 start_per_cpu_kthreads(); 2270 mutex_unlock(&trace_types_lock); 2271 2272 return retval; 2273 } 2274 2275 /* 2276 * osnoise_cpus_read - Read function for reading the "cpus" file 2277 * @filp: The active open file structure 2278 * @ubuf: The userspace provided buffer to read value into 2279 * @cnt: The maximum number of bytes to read 2280 * @ppos: The current "file" position 2281 * 2282 * Prints the "cpus" output into the user-provided buffer. 2283 */ 2284 static ssize_t 2285 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, 2286 loff_t *ppos) 2287 { 2288 char *mask_str; 2289 int len; 2290 2291 mutex_lock(&interface_lock); 2292 2293 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; 2294 mask_str = kmalloc(len, GFP_KERNEL); 2295 if (!mask_str) { 2296 count = -ENOMEM; 2297 goto out_unlock; 2298 } 2299 2300 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); 2301 if (len >= count) { 2302 count = -EINVAL; 2303 goto out_free; 2304 } 2305 2306 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 2307 2308 out_free: 2309 kfree(mask_str); 2310 out_unlock: 2311 mutex_unlock(&interface_lock); 2312 2313 return count; 2314 } 2315 2316 /* 2317 * osnoise_cpus_write - Write function for "cpus" entry 2318 * @filp: The active open file structure 2319 * @ubuf: The user buffer that contains the value to write 2320 * @cnt: The maximum number of bytes to write to "file" 2321 * @ppos: The current position in @file 2322 * 2323 * This function provides a write implementation for the "cpus" 2324 * interface to the osnoise trace. By default, it lists all CPUs, 2325 * in this way, allowing osnoise threads to run on any online CPU 2326 * of the system. It serves to restrict the execution of osnoise to the 2327 * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 2328 * Because the user might be interested in tracing what is running on 2329 * other CPUs. For instance, one might run osnoise in one HT CPU 2330 * while observing what is running on the sibling HT CPU. 2331 */ 2332 static ssize_t 2333 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 2334 loff_t *ppos) 2335 { 2336 cpumask_var_t osnoise_cpumask_new; 2337 int running, err; 2338 char buf[256]; 2339 2340 if (count >= 256) 2341 return -EINVAL; 2342 2343 if (copy_from_user(buf, ubuf, count)) 2344 return -EFAULT; 2345 2346 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) 2347 return -ENOMEM; 2348 2349 err = cpulist_parse(buf, osnoise_cpumask_new); 2350 if (err) 2351 goto err_free; 2352 2353 /* 2354 * trace_types_lock is taken to avoid concurrency on start/stop. 2355 */ 2356 mutex_lock(&trace_types_lock); 2357 running = osnoise_has_registered_instances(); 2358 if (running) 2359 stop_per_cpu_kthreads(); 2360 2361 mutex_lock(&interface_lock); 2362 /* 2363 * osnoise_cpumask is read by CPU hotplug operations. 2364 */ 2365 cpus_read_lock(); 2366 2367 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); 2368 2369 cpus_read_unlock(); 2370 mutex_unlock(&interface_lock); 2371 2372 if (running) 2373 start_per_cpu_kthreads(); 2374 mutex_unlock(&trace_types_lock); 2375 2376 free_cpumask_var(osnoise_cpumask_new); 2377 return count; 2378 2379 err_free: 2380 free_cpumask_var(osnoise_cpumask_new); 2381 2382 return err; 2383 } 2384 2385 #ifdef CONFIG_TIMERLAT_TRACER 2386 static int timerlat_fd_open(struct inode *inode, struct file *file) 2387 { 2388 struct osnoise_variables *osn_var; 2389 struct timerlat_variables *tlat; 2390 long cpu = (long) inode->i_cdev; 2391 2392 mutex_lock(&interface_lock); 2393 2394 /* 2395 * This file is accessible only if timerlat is enabled, and 2396 * NO_OSNOISE_WORKLOAD is set. 2397 */ 2398 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) { 2399 mutex_unlock(&interface_lock); 2400 return -EINVAL; 2401 } 2402 2403 migrate_disable(); 2404 2405 osn_var = this_cpu_osn_var(); 2406 2407 /* 2408 * The osn_var->pid holds the single access to this file. 2409 */ 2410 if (osn_var->pid) { 2411 mutex_unlock(&interface_lock); 2412 migrate_enable(); 2413 return -EBUSY; 2414 } 2415 2416 /* 2417 * timerlat tracer is a per-cpu tracer. Check if the user-space too 2418 * is pinned to a single CPU. The tracer laters monitor if the task 2419 * migrates and then disables tracer if it does. However, it is 2420 * worth doing this basic acceptance test to avoid obviusly wrong 2421 * setup. 2422 */ 2423 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) { 2424 mutex_unlock(&interface_lock); 2425 migrate_enable(); 2426 return -EPERM; 2427 } 2428 2429 /* 2430 * From now on, it is good to go. 2431 */ 2432 file->private_data = inode->i_cdev; 2433 2434 get_task_struct(current); 2435 2436 osn_var->kthread = current; 2437 osn_var->pid = current->pid; 2438 2439 /* 2440 * Setup is done. 2441 */ 2442 mutex_unlock(&interface_lock); 2443 2444 tlat = this_cpu_tmr_var(); 2445 tlat->count = 0; 2446 2447 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 2448 tlat->timer.function = timerlat_irq; 2449 2450 migrate_enable(); 2451 return 0; 2452 }; 2453 2454 /* 2455 * timerlat_fd_read - Read function for "timerlat_fd" file 2456 * @file: The active open file structure 2457 * @ubuf: The userspace provided buffer to read value into 2458 * @cnt: The maximum number of bytes to read 2459 * @ppos: The current "file" position 2460 * 2461 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error. 2462 */ 2463 static ssize_t 2464 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, 2465 loff_t *ppos) 2466 { 2467 long cpu = (long) file->private_data; 2468 struct osnoise_variables *osn_var; 2469 struct timerlat_variables *tlat; 2470 struct timerlat_sample s; 2471 s64 diff; 2472 u64 now; 2473 2474 migrate_disable(); 2475 2476 tlat = this_cpu_tmr_var(); 2477 2478 /* 2479 * While in user-space, the thread is migratable. There is nothing 2480 * we can do about it. 2481 * So, if the thread is running on another CPU, stop the machinery. 2482 */ 2483 if (cpu == smp_processor_id()) { 2484 if (tlat->uthread_migrate) { 2485 migrate_enable(); 2486 return -EINVAL; 2487 } 2488 } else { 2489 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; 2490 osnoise_taint("timerlat user thread migrate\n"); 2491 osnoise_stop_tracing(); 2492 migrate_enable(); 2493 return -EINVAL; 2494 } 2495 2496 osn_var = this_cpu_osn_var(); 2497 2498 /* 2499 * The timerlat in user-space runs in a different order: 2500 * the read() starts from the execution of the previous occurrence, 2501 * sleeping for the next occurrence. 2502 * 2503 * So, skip if we are entering on read() before the first wakeup 2504 * from timerlat IRQ: 2505 */ 2506 if (likely(osn_var->sampling)) { 2507 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2508 diff = now - tlat->abs_period; 2509 2510 /* 2511 * it was not a timer firing, but some other signal? 2512 */ 2513 if (diff < 0) 2514 goto out; 2515 2516 s.seqnum = tlat->count; 2517 s.timer_latency = diff; 2518 s.context = THREAD_URET; 2519 2520 trace_timerlat_sample(&s); 2521 2522 notify_new_max_latency(diff); 2523 2524 tlat->tracing_thread = false; 2525 if (osnoise_data.stop_tracing_total) 2526 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) 2527 osnoise_stop_tracing(); 2528 } else { 2529 tlat->tracing_thread = false; 2530 tlat->kthread = current; 2531 2532 /* Annotate now to drift new period */ 2533 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); 2534 2535 osn_var->sampling = 1; 2536 } 2537 2538 /* wait for the next period */ 2539 wait_next_period(tlat); 2540 2541 /* This is the wakeup from this cycle */ 2542 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); 2543 diff = now - tlat->abs_period; 2544 2545 /* 2546 * it was not a timer firing, but some other signal? 2547 */ 2548 if (diff < 0) 2549 goto out; 2550 2551 s.seqnum = tlat->count; 2552 s.timer_latency = diff; 2553 s.context = THREAD_CONTEXT; 2554 2555 trace_timerlat_sample(&s); 2556 2557 if (osnoise_data.stop_tracing_total) { 2558 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { 2559 timerlat_dump_stack(time_to_us(diff)); 2560 notify_new_max_latency(diff); 2561 osnoise_stop_tracing(); 2562 } 2563 } 2564 2565 out: 2566 migrate_enable(); 2567 return 0; 2568 } 2569 2570 static int timerlat_fd_release(struct inode *inode, struct file *file) 2571 { 2572 struct osnoise_variables *osn_var; 2573 struct timerlat_variables *tlat_var; 2574 long cpu = (long) file->private_data; 2575 2576 migrate_disable(); 2577 mutex_lock(&interface_lock); 2578 2579 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); 2580 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); 2581 2582 hrtimer_cancel(&tlat_var->timer); 2583 memset(tlat_var, 0, sizeof(*tlat_var)); 2584 2585 osn_var->sampling = 0; 2586 osn_var->pid = 0; 2587 2588 /* 2589 * We are leaving, not being stopped... see stop_kthread(); 2590 */ 2591 if (osn_var->kthread) { 2592 put_task_struct(osn_var->kthread); 2593 osn_var->kthread = NULL; 2594 } 2595 2596 mutex_unlock(&interface_lock); 2597 migrate_enable(); 2598 return 0; 2599 } 2600 #endif 2601 2602 /* 2603 * osnoise/runtime_us: cannot be greater than the period. 2604 */ 2605 static struct trace_min_max_param osnoise_runtime = { 2606 .lock = &interface_lock, 2607 .val = &osnoise_data.sample_runtime, 2608 .max = &osnoise_data.sample_period, 2609 .min = NULL, 2610 }; 2611 2612 /* 2613 * osnoise/period_us: cannot be smaller than the runtime. 2614 */ 2615 static struct trace_min_max_param osnoise_period = { 2616 .lock = &interface_lock, 2617 .val = &osnoise_data.sample_period, 2618 .max = NULL, 2619 .min = &osnoise_data.sample_runtime, 2620 }; 2621 2622 /* 2623 * osnoise/stop_tracing_us: no limit. 2624 */ 2625 static struct trace_min_max_param osnoise_stop_tracing_in = { 2626 .lock = &interface_lock, 2627 .val = &osnoise_data.stop_tracing, 2628 .max = NULL, 2629 .min = NULL, 2630 }; 2631 2632 /* 2633 * osnoise/stop_tracing_total_us: no limit. 2634 */ 2635 static struct trace_min_max_param osnoise_stop_tracing_total = { 2636 .lock = &interface_lock, 2637 .val = &osnoise_data.stop_tracing_total, 2638 .max = NULL, 2639 .min = NULL, 2640 }; 2641 2642 #ifdef CONFIG_TIMERLAT_TRACER 2643 /* 2644 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total 2645 * latency is higher than val. 2646 */ 2647 static struct trace_min_max_param osnoise_print_stack = { 2648 .lock = &interface_lock, 2649 .val = &osnoise_data.print_stack, 2650 .max = NULL, 2651 .min = NULL, 2652 }; 2653 2654 /* 2655 * osnoise/timerlat_period: min 100 us, max 1 s 2656 */ 2657 static u64 timerlat_min_period = 100; 2658 static u64 timerlat_max_period = 1000000; 2659 static struct trace_min_max_param timerlat_period = { 2660 .lock = &interface_lock, 2661 .val = &osnoise_data.timerlat_period, 2662 .max = &timerlat_max_period, 2663 .min = &timerlat_min_period, 2664 }; 2665 2666 static const struct file_operations timerlat_fd_fops = { 2667 .open = timerlat_fd_open, 2668 .read = timerlat_fd_read, 2669 .release = timerlat_fd_release, 2670 .llseek = generic_file_llseek, 2671 }; 2672 #endif 2673 2674 static const struct file_operations cpus_fops = { 2675 .open = tracing_open_generic, 2676 .read = osnoise_cpus_read, 2677 .write = osnoise_cpus_write, 2678 .llseek = generic_file_llseek, 2679 }; 2680 2681 static const struct file_operations osnoise_options_fops = { 2682 .open = osnoise_options_open, 2683 .read = seq_read, 2684 .llseek = seq_lseek, 2685 .release = seq_release, 2686 .write = osnoise_options_write 2687 }; 2688 2689 #ifdef CONFIG_TIMERLAT_TRACER 2690 #ifdef CONFIG_STACKTRACE 2691 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2692 { 2693 struct dentry *tmp; 2694 2695 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2696 &osnoise_print_stack, &trace_min_max_fops); 2697 if (!tmp) 2698 return -ENOMEM; 2699 2700 return 0; 2701 } 2702 #else /* CONFIG_STACKTRACE */ 2703 static int init_timerlat_stack_tracefs(struct dentry *top_dir) 2704 { 2705 return 0; 2706 } 2707 #endif /* CONFIG_STACKTRACE */ 2708 2709 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir) 2710 { 2711 struct dentry *timerlat_fd; 2712 struct dentry *per_cpu; 2713 struct dentry *cpu_dir; 2714 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */ 2715 long cpu; 2716 2717 /* 2718 * Why not using tracing instance per_cpu/ dir? 2719 * 2720 * Because osnoise/timerlat have a single workload, having 2721 * multiple files like these are wast of memory. 2722 */ 2723 per_cpu = tracefs_create_dir("per_cpu", top_dir); 2724 if (!per_cpu) 2725 return -ENOMEM; 2726 2727 for_each_possible_cpu(cpu) { 2728 snprintf(cpu_str, 30, "cpu%ld", cpu); 2729 cpu_dir = tracefs_create_dir(cpu_str, per_cpu); 2730 if (!cpu_dir) 2731 goto out_clean; 2732 2733 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ, 2734 cpu_dir, NULL, &timerlat_fd_fops); 2735 if (!timerlat_fd) 2736 goto out_clean; 2737 2738 /* Record the CPU */ 2739 d_inode(timerlat_fd)->i_cdev = (void *)(cpu); 2740 } 2741 2742 return 0; 2743 2744 out_clean: 2745 tracefs_remove(per_cpu); 2746 return -ENOMEM; 2747 } 2748 2749 /* 2750 * init_timerlat_tracefs - A function to initialize the timerlat interface files 2751 */ 2752 static int init_timerlat_tracefs(struct dentry *top_dir) 2753 { 2754 struct dentry *tmp; 2755 int retval; 2756 2757 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2758 &timerlat_period, &trace_min_max_fops); 2759 if (!tmp) 2760 return -ENOMEM; 2761 2762 retval = osnoise_create_cpu_timerlat_fd(top_dir); 2763 if (retval) 2764 return retval; 2765 2766 return init_timerlat_stack_tracefs(top_dir); 2767 } 2768 #else /* CONFIG_TIMERLAT_TRACER */ 2769 static int init_timerlat_tracefs(struct dentry *top_dir) 2770 { 2771 return 0; 2772 } 2773 #endif /* CONFIG_TIMERLAT_TRACER */ 2774 2775 /* 2776 * init_tracefs - A function to initialize the tracefs interface files 2777 * 2778 * This function creates entries in tracefs for "osnoise" and "timerlat". 2779 * It creates these directories in the tracing directory, and within that 2780 * directory the use can change and view the configs. 2781 */ 2782 static int init_tracefs(void) 2783 { 2784 struct dentry *top_dir; 2785 struct dentry *tmp; 2786 int ret; 2787 2788 ret = tracing_init_dentry(); 2789 if (ret) 2790 return -ENOMEM; 2791 2792 top_dir = tracefs_create_dir("osnoise", NULL); 2793 if (!top_dir) 2794 return 0; 2795 2796 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, 2797 &osnoise_period, &trace_min_max_fops); 2798 if (!tmp) 2799 goto err; 2800 2801 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, 2802 &osnoise_runtime, &trace_min_max_fops); 2803 if (!tmp) 2804 goto err; 2805 2806 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, 2807 &osnoise_stop_tracing_in, &trace_min_max_fops); 2808 if (!tmp) 2809 goto err; 2810 2811 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, 2812 &osnoise_stop_tracing_total, &trace_min_max_fops); 2813 if (!tmp) 2814 goto err; 2815 2816 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2817 if (!tmp) 2818 goto err; 2819 2820 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, 2821 &osnoise_options_fops); 2822 if (!tmp) 2823 goto err; 2824 2825 ret = init_timerlat_tracefs(top_dir); 2826 if (ret) 2827 goto err; 2828 2829 return 0; 2830 2831 err: 2832 tracefs_remove(top_dir); 2833 return -ENOMEM; 2834 } 2835 2836 static int osnoise_hook_events(void) 2837 { 2838 int retval; 2839 2840 /* 2841 * Trace is already hooked, we are re-enabling from 2842 * a stop_tracing_*. 2843 */ 2844 if (trace_osnoise_callback_enabled) 2845 return 0; 2846 2847 retval = hook_irq_events(); 2848 if (retval) 2849 return -EINVAL; 2850 2851 retval = hook_softirq_events(); 2852 if (retval) 2853 goto out_unhook_irq; 2854 2855 retval = hook_thread_events(); 2856 /* 2857 * All fine! 2858 */ 2859 if (!retval) 2860 return 0; 2861 2862 unhook_softirq_events(); 2863 out_unhook_irq: 2864 unhook_irq_events(); 2865 return -EINVAL; 2866 } 2867 2868 static void osnoise_unhook_events(void) 2869 { 2870 unhook_thread_events(); 2871 unhook_softirq_events(); 2872 unhook_irq_events(); 2873 } 2874 2875 /* 2876 * osnoise_workload_start - start the workload and hook to events 2877 */ 2878 static int osnoise_workload_start(void) 2879 { 2880 int retval; 2881 2882 /* 2883 * Instances need to be registered after calling workload 2884 * start. Hence, if there is already an instance, the 2885 * workload was already registered. Otherwise, this 2886 * code is on the way to register the first instance, 2887 * and the workload will start. 2888 */ 2889 if (osnoise_has_registered_instances()) 2890 return 0; 2891 2892 osn_var_reset_all(); 2893 2894 retval = osnoise_hook_events(); 2895 if (retval) 2896 return retval; 2897 2898 /* 2899 * Make sure that ftrace_nmi_enter/exit() see reset values 2900 * before enabling trace_osnoise_callback_enabled. 2901 */ 2902 barrier(); 2903 trace_osnoise_callback_enabled = true; 2904 2905 retval = start_per_cpu_kthreads(); 2906 if (retval) { 2907 trace_osnoise_callback_enabled = false; 2908 /* 2909 * Make sure that ftrace_nmi_enter/exit() see 2910 * trace_osnoise_callback_enabled as false before continuing. 2911 */ 2912 barrier(); 2913 2914 osnoise_unhook_events(); 2915 return retval; 2916 } 2917 2918 return 0; 2919 } 2920 2921 /* 2922 * osnoise_workload_stop - stop the workload and unhook the events 2923 */ 2924 static void osnoise_workload_stop(void) 2925 { 2926 /* 2927 * Instances need to be unregistered before calling 2928 * stop. Hence, if there is a registered instance, more 2929 * than one instance is running, and the workload will not 2930 * yet stop. Otherwise, this code is on the way to disable 2931 * the last instance, and the workload can stop. 2932 */ 2933 if (osnoise_has_registered_instances()) 2934 return; 2935 2936 /* 2937 * If callbacks were already disabled in a previous stop 2938 * call, there is no need to disable then again. 2939 * 2940 * For instance, this happens when tracing is stopped via: 2941 * echo 0 > tracing_on 2942 * echo nop > current_tracer. 2943 */ 2944 if (!trace_osnoise_callback_enabled) 2945 return; 2946 2947 trace_osnoise_callback_enabled = false; 2948 /* 2949 * Make sure that ftrace_nmi_enter/exit() see 2950 * trace_osnoise_callback_enabled as false before continuing. 2951 */ 2952 barrier(); 2953 2954 stop_per_cpu_kthreads(); 2955 2956 osnoise_unhook_events(); 2957 } 2958 2959 static void osnoise_tracer_start(struct trace_array *tr) 2960 { 2961 int retval; 2962 2963 /* 2964 * If the instance is already registered, there is no need to 2965 * register it again. 2966 */ 2967 if (osnoise_instance_registered(tr)) 2968 return; 2969 2970 retval = osnoise_workload_start(); 2971 if (retval) 2972 pr_err(BANNER "Error starting osnoise tracer\n"); 2973 2974 osnoise_register_instance(tr); 2975 } 2976 2977 static void osnoise_tracer_stop(struct trace_array *tr) 2978 { 2979 osnoise_unregister_instance(tr); 2980 osnoise_workload_stop(); 2981 } 2982 2983 static int osnoise_tracer_init(struct trace_array *tr) 2984 { 2985 /* 2986 * Only allow osnoise tracer if timerlat tracer is not running 2987 * already. 2988 */ 2989 if (timerlat_enabled()) 2990 return -EBUSY; 2991 2992 tr->max_latency = 0; 2993 2994 osnoise_tracer_start(tr); 2995 return 0; 2996 } 2997 2998 static void osnoise_tracer_reset(struct trace_array *tr) 2999 { 3000 osnoise_tracer_stop(tr); 3001 } 3002 3003 static struct tracer osnoise_tracer __read_mostly = { 3004 .name = "osnoise", 3005 .init = osnoise_tracer_init, 3006 .reset = osnoise_tracer_reset, 3007 .start = osnoise_tracer_start, 3008 .stop = osnoise_tracer_stop, 3009 .print_header = print_osnoise_headers, 3010 .allow_instances = true, 3011 }; 3012 3013 #ifdef CONFIG_TIMERLAT_TRACER 3014 static void timerlat_tracer_start(struct trace_array *tr) 3015 { 3016 int retval; 3017 3018 /* 3019 * If the instance is already registered, there is no need to 3020 * register it again. 3021 */ 3022 if (osnoise_instance_registered(tr)) 3023 return; 3024 3025 retval = osnoise_workload_start(); 3026 if (retval) 3027 pr_err(BANNER "Error starting timerlat tracer\n"); 3028 3029 osnoise_register_instance(tr); 3030 3031 return; 3032 } 3033 3034 static void timerlat_tracer_stop(struct trace_array *tr) 3035 { 3036 int cpu; 3037 3038 osnoise_unregister_instance(tr); 3039 3040 /* 3041 * Instruct the threads to stop only if this is the last instance. 3042 */ 3043 if (!osnoise_has_registered_instances()) { 3044 for_each_online_cpu(cpu) 3045 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 3046 } 3047 3048 osnoise_workload_stop(); 3049 } 3050 3051 static int timerlat_tracer_init(struct trace_array *tr) 3052 { 3053 /* 3054 * Only allow timerlat tracer if osnoise tracer is not running already. 3055 */ 3056 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 3057 return -EBUSY; 3058 3059 /* 3060 * If this is the first instance, set timerlat_tracer to block 3061 * osnoise tracer start. 3062 */ 3063 if (!osnoise_has_registered_instances()) 3064 osnoise_data.timerlat_tracer = 1; 3065 3066 tr->max_latency = 0; 3067 timerlat_tracer_start(tr); 3068 3069 return 0; 3070 } 3071 3072 static void timerlat_tracer_reset(struct trace_array *tr) 3073 { 3074 timerlat_tracer_stop(tr); 3075 3076 /* 3077 * If this is the last instance, reset timerlat_tracer allowing 3078 * osnoise to be started. 3079 */ 3080 if (!osnoise_has_registered_instances()) 3081 osnoise_data.timerlat_tracer = 0; 3082 } 3083 3084 static struct tracer timerlat_tracer __read_mostly = { 3085 .name = "timerlat", 3086 .init = timerlat_tracer_init, 3087 .reset = timerlat_tracer_reset, 3088 .start = timerlat_tracer_start, 3089 .stop = timerlat_tracer_stop, 3090 .print_header = print_timerlat_headers, 3091 .allow_instances = true, 3092 }; 3093 3094 __init static int init_timerlat_tracer(void) 3095 { 3096 return register_tracer(&timerlat_tracer); 3097 } 3098 #else /* CONFIG_TIMERLAT_TRACER */ 3099 __init static int init_timerlat_tracer(void) 3100 { 3101 return 0; 3102 } 3103 #endif /* CONFIG_TIMERLAT_TRACER */ 3104 3105 __init static int init_osnoise_tracer(void) 3106 { 3107 int ret; 3108 3109 mutex_init(&interface_lock); 3110 3111 cpumask_copy(&osnoise_cpumask, cpu_all_mask); 3112 3113 ret = register_tracer(&osnoise_tracer); 3114 if (ret) { 3115 pr_err(BANNER "Error registering osnoise!\n"); 3116 return ret; 3117 } 3118 3119 ret = init_timerlat_tracer(); 3120 if (ret) { 3121 pr_err(BANNER "Error registering timerlat!\n"); 3122 return ret; 3123 } 3124 3125 osnoise_init_hotplug_support(); 3126 3127 INIT_LIST_HEAD_RCU(&osnoise_instances); 3128 3129 init_tracefs(); 3130 3131 return 0; 3132 } 3133 late_initcall(init_osnoise_tracer); 3134