1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5 *
6 * Based on "hwlat_detector" tracer by:
7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9 * With feedback from Clark Williams <williams@redhat.com>
10 *
11 * And also based on the rtsl tracer presented on:
12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15 *
16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17 */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40
41 /*
42 * Default values.
43 */
44 #define BANNER "osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */
47
48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */
50
51 /*
52 * osnoise/options entries.
53 */
54 enum osnoise_options_index {
55 OSN_DEFAULTS = 0,
56 OSN_WORKLOAD,
57 OSN_PANIC_ON_STOP,
58 OSN_PREEMPT_DISABLE,
59 OSN_IRQ_DISABLE,
60 OSN_MAX
61 };
62
63 static const char * const osnoise_options_str[OSN_MAX] = {
64 "DEFAULTS",
65 "OSNOISE_WORKLOAD",
66 "PANIC_ON_STOP",
67 "OSNOISE_PREEMPT_DISABLE",
68 "OSNOISE_IRQ_DISABLE" };
69
70 #define OSN_DEFAULT_OPTIONS 0x2
71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS;
72
73 /*
74 * trace_array of the enabled osnoise/timerlat instances.
75 */
76 struct osnoise_instance {
77 struct list_head list;
78 struct trace_array *tr;
79 };
80
81 static struct list_head osnoise_instances;
82
osnoise_has_registered_instances(void)83 static bool osnoise_has_registered_instances(void)
84 {
85 return !!list_first_or_null_rcu(&osnoise_instances,
86 struct osnoise_instance,
87 list);
88 }
89
90 /*
91 * osnoise_instance_registered - check if a tr is already registered
92 */
osnoise_instance_registered(struct trace_array * tr)93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95 struct osnoise_instance *inst;
96 int found = 0;
97
98 rcu_read_lock();
99 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100 if (inst->tr == tr)
101 found = 1;
102 }
103 rcu_read_unlock();
104
105 return found;
106 }
107
108 /*
109 * osnoise_register_instance - register a new trace instance
110 *
111 * Register a trace_array *tr in the list of instances running
112 * osnoise/timerlat tracers.
113 */
osnoise_register_instance(struct trace_array * tr)114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116 struct osnoise_instance *inst;
117
118 /*
119 * register/unregister serialization is provided by trace's
120 * trace_types_lock.
121 */
122 lockdep_assert_held(&trace_types_lock);
123
124 inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125 if (!inst)
126 return -ENOMEM;
127
128 INIT_LIST_HEAD_RCU(&inst->list);
129 inst->tr = tr;
130 list_add_tail_rcu(&inst->list, &osnoise_instances);
131
132 return 0;
133 }
134
135 /*
136 * osnoise_unregister_instance - unregister a registered trace instance
137 *
138 * Remove the trace_array *tr from the list of instances running
139 * osnoise/timerlat tracers.
140 */
osnoise_unregister_instance(struct trace_array * tr)141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143 struct osnoise_instance *inst;
144 int found = 0;
145
146 /*
147 * register/unregister serialization is provided by trace's
148 * trace_types_lock.
149 */
150 list_for_each_entry_rcu(inst, &osnoise_instances, list,
151 lockdep_is_held(&trace_types_lock)) {
152 if (inst->tr == tr) {
153 list_del_rcu(&inst->list);
154 found = 1;
155 break;
156 }
157 }
158
159 if (!found)
160 return;
161
162 kvfree_rcu_mightsleep(inst);
163 }
164
165 /*
166 * NMI runtime info.
167 */
168 struct osn_nmi {
169 u64 count;
170 u64 delta_start;
171 };
172
173 /*
174 * IRQ runtime info.
175 */
176 struct osn_irq {
177 u64 count;
178 u64 arrival_time;
179 u64 delta_start;
180 };
181
182 #define IRQ_CONTEXT 0
183 #define THREAD_CONTEXT 1
184 #define THREAD_URET 2
185 /*
186 * sofirq runtime info.
187 */
188 struct osn_softirq {
189 u64 count;
190 u64 arrival_time;
191 u64 delta_start;
192 };
193
194 /*
195 * thread runtime info.
196 */
197 struct osn_thread {
198 u64 count;
199 u64 arrival_time;
200 u64 delta_start;
201 };
202
203 /*
204 * Runtime information: this structure saves the runtime information used by
205 * one sampling thread.
206 */
207 struct osnoise_variables {
208 struct task_struct *kthread;
209 bool sampling;
210 pid_t pid;
211 struct osn_nmi nmi;
212 struct osn_irq irq;
213 struct osn_softirq softirq;
214 struct osn_thread thread;
215 local_t int_counter;
216 };
217
218 /*
219 * Per-cpu runtime information.
220 */
221 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
222
223 /*
224 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
225 */
this_cpu_osn_var(void)226 static inline struct osnoise_variables *this_cpu_osn_var(void)
227 {
228 return this_cpu_ptr(&per_cpu_osnoise_var);
229 }
230
231 /*
232 * Protect the interface.
233 */
234 static struct mutex interface_lock;
235
236 #ifdef CONFIG_TIMERLAT_TRACER
237 /*
238 * Runtime information for the timer mode.
239 */
240 struct timerlat_variables {
241 struct task_struct *kthread;
242 struct hrtimer timer;
243 u64 rel_period;
244 u64 abs_period;
245 bool tracing_thread;
246 u64 count;
247 bool uthread_migrate;
248 };
249
250 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
251
252 /*
253 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
254 */
this_cpu_tmr_var(void)255 static inline struct timerlat_variables *this_cpu_tmr_var(void)
256 {
257 return this_cpu_ptr(&per_cpu_timerlat_var);
258 }
259
260 /*
261 * tlat_var_reset - Reset the values of the given timerlat_variables
262 */
tlat_var_reset(void)263 static inline void tlat_var_reset(void)
264 {
265 struct timerlat_variables *tlat_var;
266 int cpu;
267
268 /* Synchronize with the timerlat interfaces */
269 mutex_lock(&interface_lock);
270 /*
271 * So far, all the values are initialized as 0, so
272 * zeroing the structure is perfect.
273 */
274 for_each_cpu(cpu, cpu_online_mask) {
275 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
276 if (tlat_var->kthread)
277 hrtimer_cancel(&tlat_var->timer);
278 memset(tlat_var, 0, sizeof(*tlat_var));
279 }
280 mutex_unlock(&interface_lock);
281 }
282 #else /* CONFIG_TIMERLAT_TRACER */
283 #define tlat_var_reset() do {} while (0)
284 #endif /* CONFIG_TIMERLAT_TRACER */
285
286 /*
287 * osn_var_reset - Reset the values of the given osnoise_variables
288 */
osn_var_reset(void)289 static inline void osn_var_reset(void)
290 {
291 struct osnoise_variables *osn_var;
292 int cpu;
293
294 /*
295 * So far, all the values are initialized as 0, so
296 * zeroing the structure is perfect.
297 */
298 for_each_cpu(cpu, cpu_online_mask) {
299 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
300 memset(osn_var, 0, sizeof(*osn_var));
301 }
302 }
303
304 /*
305 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
306 */
osn_var_reset_all(void)307 static inline void osn_var_reset_all(void)
308 {
309 osn_var_reset();
310 tlat_var_reset();
311 }
312
313 /*
314 * Tells NMIs to call back to the osnoise tracer to record timestamps.
315 */
316 bool trace_osnoise_callback_enabled;
317
318 /*
319 * Tracer data.
320 */
321 static struct osnoise_data {
322 u64 sample_period; /* total sampling period */
323 u64 sample_runtime; /* active sampling portion of period */
324 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */
325 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */
326 #ifdef CONFIG_TIMERLAT_TRACER
327 u64 timerlat_period; /* timerlat period */
328 u64 print_stack; /* print IRQ stack if total > */
329 int timerlat_tracer; /* timerlat tracer */
330 #endif
331 bool tainted; /* infor users and developers about a problem */
332 } osnoise_data = {
333 .sample_period = DEFAULT_SAMPLE_PERIOD,
334 .sample_runtime = DEFAULT_SAMPLE_RUNTIME,
335 .stop_tracing = 0,
336 .stop_tracing_total = 0,
337 #ifdef CONFIG_TIMERLAT_TRACER
338 .print_stack = 0,
339 .timerlat_period = DEFAULT_TIMERLAT_PERIOD,
340 .timerlat_tracer = 0,
341 #endif
342 };
343
344 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_enabled(void)345 static inline bool timerlat_enabled(void)
346 {
347 return osnoise_data.timerlat_tracer;
348 }
349
timerlat_softirq_exit(struct osnoise_variables * osn_var)350 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
351 {
352 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
353 /*
354 * If the timerlat is enabled, but the irq handler did
355 * not run yet enabling timerlat_tracer, do not trace.
356 */
357 if (!tlat_var->tracing_thread) {
358 osn_var->softirq.arrival_time = 0;
359 osn_var->softirq.delta_start = 0;
360 return 0;
361 }
362 return 1;
363 }
364
timerlat_thread_exit(struct osnoise_variables * osn_var)365 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
366 {
367 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
368 /*
369 * If the timerlat is enabled, but the irq handler did
370 * not run yet enabling timerlat_tracer, do not trace.
371 */
372 if (!tlat_var->tracing_thread) {
373 osn_var->thread.delta_start = 0;
374 osn_var->thread.arrival_time = 0;
375 return 0;
376 }
377 return 1;
378 }
379 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_enabled(void)380 static inline bool timerlat_enabled(void)
381 {
382 return false;
383 }
384
timerlat_softirq_exit(struct osnoise_variables * osn_var)385 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
386 {
387 return 1;
388 }
timerlat_thread_exit(struct osnoise_variables * osn_var)389 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
390 {
391 return 1;
392 }
393 #endif
394
395 #ifdef CONFIG_PREEMPT_RT
396 /*
397 * Print the osnoise header info.
398 */
print_osnoise_headers(struct seq_file * s)399 static void print_osnoise_headers(struct seq_file *s)
400 {
401 if (osnoise_data.tainted)
402 seq_puts(s, "# osnoise is tainted!\n");
403
404 seq_puts(s, "# _-------=> irqs-off\n");
405 seq_puts(s, "# / _------=> need-resched\n");
406 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
407 seq_puts(s, "# || / _----=> hardirq/softirq\n");
408 seq_puts(s, "# ||| / _---=> preempt-depth\n");
409 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
410 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
411
412 seq_puts(s, "# |||||| / ");
413 seq_puts(s, " MAX\n");
414
415 seq_puts(s, "# ||||| / ");
416 seq_puts(s, " SINGLE Interference counters:\n");
417
418 seq_puts(s, "# ||||||| RUNTIME ");
419 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
420
421 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US ");
422 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
423
424 seq_puts(s, "# | | | ||||||| | | ");
425 seq_puts(s, " | | | | | | | |\n");
426 }
427 #else /* CONFIG_PREEMPT_RT */
print_osnoise_headers(struct seq_file * s)428 static void print_osnoise_headers(struct seq_file *s)
429 {
430 if (osnoise_data.tainted)
431 seq_puts(s, "# osnoise is tainted!\n");
432
433 seq_puts(s, "# _-----=> irqs-off\n");
434 seq_puts(s, "# / _----=> need-resched\n");
435 seq_puts(s, "# | / _---=> hardirq/softirq\n");
436 seq_puts(s, "# || / _--=> preempt-depth\n");
437 seq_puts(s, "# ||| / _-=> migrate-disable ");
438 seq_puts(s, " MAX\n");
439 seq_puts(s, "# |||| / delay ");
440 seq_puts(s, " SINGLE Interference counters:\n");
441
442 seq_puts(s, "# ||||| RUNTIME ");
443 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
444
445 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US ");
446 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
447
448 seq_puts(s, "# | | | ||||| | | ");
449 seq_puts(s, " | | | | | | | |\n");
450 }
451 #endif /* CONFIG_PREEMPT_RT */
452
453 /*
454 * osnoise_taint - report an osnoise error.
455 */
456 #define osnoise_taint(msg) ({ \
457 struct osnoise_instance *inst; \
458 struct trace_buffer *buffer; \
459 \
460 rcu_read_lock(); \
461 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \
462 buffer = inst->tr->array_buffer.buffer; \
463 trace_array_printk_buf(buffer, _THIS_IP_, msg); \
464 } \
465 rcu_read_unlock(); \
466 osnoise_data.tainted = true; \
467 })
468
469 /*
470 * Record an osnoise_sample into the tracer buffer.
471 */
472 static void
__record_osnoise_sample(struct osnoise_sample * sample,struct trace_buffer * buffer)473 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
474 {
475 struct ring_buffer_event *event;
476 struct osnoise_entry *entry;
477
478 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
479 tracing_gen_ctx());
480 if (!event)
481 return;
482 entry = ring_buffer_event_data(event);
483 entry->runtime = sample->runtime;
484 entry->noise = sample->noise;
485 entry->max_sample = sample->max_sample;
486 entry->hw_count = sample->hw_count;
487 entry->nmi_count = sample->nmi_count;
488 entry->irq_count = sample->irq_count;
489 entry->softirq_count = sample->softirq_count;
490 entry->thread_count = sample->thread_count;
491
492 trace_buffer_unlock_commit_nostack(buffer, event);
493 }
494
495 /*
496 * Record an osnoise_sample on all osnoise instances and fire trace event.
497 */
record_osnoise_sample(struct osnoise_sample * sample)498 static void record_osnoise_sample(struct osnoise_sample *sample)
499 {
500 struct osnoise_instance *inst;
501 struct trace_buffer *buffer;
502
503 trace_osnoise_sample(sample);
504
505 rcu_read_lock();
506 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
507 buffer = inst->tr->array_buffer.buffer;
508 __record_osnoise_sample(sample, buffer);
509 }
510 rcu_read_unlock();
511 }
512
513 #ifdef CONFIG_TIMERLAT_TRACER
514 /*
515 * Print the timerlat header info.
516 */
517 #ifdef CONFIG_PREEMPT_RT
print_timerlat_headers(struct seq_file * s)518 static void print_timerlat_headers(struct seq_file *s)
519 {
520 seq_puts(s, "# _-------=> irqs-off\n");
521 seq_puts(s, "# / _------=> need-resched\n");
522 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
523 seq_puts(s, "# || / _----=> hardirq/softirq\n");
524 seq_puts(s, "# ||| / _---=> preempt-depth\n");
525 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
526 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
527 seq_puts(s, "# |||||| /\n");
528 seq_puts(s, "# ||||||| ACTIVATION\n");
529 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID ");
530 seq_puts(s, " CONTEXT LATENCY\n");
531 seq_puts(s, "# | | | ||||||| | | ");
532 seq_puts(s, " | |\n");
533 }
534 #else /* CONFIG_PREEMPT_RT */
print_timerlat_headers(struct seq_file * s)535 static void print_timerlat_headers(struct seq_file *s)
536 {
537 seq_puts(s, "# _-----=> irqs-off\n");
538 seq_puts(s, "# / _----=> need-resched\n");
539 seq_puts(s, "# | / _---=> hardirq/softirq\n");
540 seq_puts(s, "# || / _--=> preempt-depth\n");
541 seq_puts(s, "# ||| / _-=> migrate-disable\n");
542 seq_puts(s, "# |||| / delay\n");
543 seq_puts(s, "# ||||| ACTIVATION\n");
544 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID ");
545 seq_puts(s, " CONTEXT LATENCY\n");
546 seq_puts(s, "# | | | ||||| | | ");
547 seq_puts(s, " | |\n");
548 }
549 #endif /* CONFIG_PREEMPT_RT */
550
551 static void
__record_timerlat_sample(struct timerlat_sample * sample,struct trace_buffer * buffer)552 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
553 {
554 struct ring_buffer_event *event;
555 struct timerlat_entry *entry;
556
557 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
558 tracing_gen_ctx());
559 if (!event)
560 return;
561 entry = ring_buffer_event_data(event);
562 entry->seqnum = sample->seqnum;
563 entry->context = sample->context;
564 entry->timer_latency = sample->timer_latency;
565
566 trace_buffer_unlock_commit_nostack(buffer, event);
567 }
568
569 /*
570 * Record an timerlat_sample into the tracer buffer.
571 */
record_timerlat_sample(struct timerlat_sample * sample)572 static void record_timerlat_sample(struct timerlat_sample *sample)
573 {
574 struct osnoise_instance *inst;
575 struct trace_buffer *buffer;
576
577 trace_timerlat_sample(sample);
578
579 rcu_read_lock();
580 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
581 buffer = inst->tr->array_buffer.buffer;
582 __record_timerlat_sample(sample, buffer);
583 }
584 rcu_read_unlock();
585 }
586
587 #ifdef CONFIG_STACKTRACE
588
589 #define MAX_CALLS 256
590
591 /*
592 * Stack trace will take place only at IRQ level, so, no need
593 * to control nesting here.
594 */
595 struct trace_stack {
596 int stack_size;
597 int nr_entries;
598 unsigned long calls[MAX_CALLS];
599 };
600
601 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
602
603 /*
604 * timerlat_save_stack - save a stack trace without printing
605 *
606 * Save the current stack trace without printing. The
607 * stack will be printed later, after the end of the measurement.
608 */
timerlat_save_stack(int skip)609 static void timerlat_save_stack(int skip)
610 {
611 unsigned int size, nr_entries;
612 struct trace_stack *fstack;
613
614 fstack = this_cpu_ptr(&trace_stack);
615
616 size = ARRAY_SIZE(fstack->calls);
617
618 nr_entries = stack_trace_save(fstack->calls, size, skip);
619
620 fstack->stack_size = nr_entries * sizeof(unsigned long);
621 fstack->nr_entries = nr_entries;
622
623 return;
624
625 }
626
627 static void
__timerlat_dump_stack(struct trace_buffer * buffer,struct trace_stack * fstack,unsigned int size)628 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
629 {
630 struct ring_buffer_event *event;
631 struct stack_entry *entry;
632
633 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
634 tracing_gen_ctx());
635 if (!event)
636 return;
637
638 entry = ring_buffer_event_data(event);
639
640 memcpy(&entry->caller, fstack->calls, size);
641 entry->size = fstack->nr_entries;
642
643 trace_buffer_unlock_commit_nostack(buffer, event);
644 }
645
646 /*
647 * timerlat_dump_stack - dump a stack trace previously saved
648 */
timerlat_dump_stack(u64 latency)649 static void timerlat_dump_stack(u64 latency)
650 {
651 struct osnoise_instance *inst;
652 struct trace_buffer *buffer;
653 struct trace_stack *fstack;
654 unsigned int size;
655
656 /*
657 * trace only if latency > print_stack config, if enabled.
658 */
659 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
660 return;
661
662 preempt_disable_notrace();
663 fstack = this_cpu_ptr(&trace_stack);
664 size = fstack->stack_size;
665
666 rcu_read_lock();
667 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
668 buffer = inst->tr->array_buffer.buffer;
669 __timerlat_dump_stack(buffer, fstack, size);
670
671 }
672 rcu_read_unlock();
673 preempt_enable_notrace();
674 }
675 #else /* CONFIG_STACKTRACE */
676 #define timerlat_dump_stack(u64 latency) do {} while (0)
677 #define timerlat_save_stack(a) do {} while (0)
678 #endif /* CONFIG_STACKTRACE */
679 #endif /* CONFIG_TIMERLAT_TRACER */
680
681 /*
682 * Macros to encapsulate the time capturing infrastructure.
683 */
684 #define time_get() trace_clock_local()
685 #define time_to_us(x) div_u64(x, 1000)
686 #define time_sub(a, b) ((a) - (b))
687
688 /*
689 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
690 *
691 * If an IRQ is preempted by an NMI, its delta_start is pushed forward
692 * to discount the NMI interference.
693 *
694 * See get_int_safe_duration().
695 */
696 static inline void
cond_move_irq_delta_start(struct osnoise_variables * osn_var,u64 duration)697 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
698 {
699 if (osn_var->irq.delta_start)
700 osn_var->irq.delta_start += duration;
701 }
702
703 #ifndef CONFIG_PREEMPT_RT
704 /*
705 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
706 *
707 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
708 * forward to discount the interference.
709 *
710 * See get_int_safe_duration().
711 */
712 static inline void
cond_move_softirq_delta_start(struct osnoise_variables * osn_var,u64 duration)713 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
714 {
715 if (osn_var->softirq.delta_start)
716 osn_var->softirq.delta_start += duration;
717 }
718 #else /* CONFIG_PREEMPT_RT */
719 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
720 #endif
721
722 /*
723 * cond_move_thread_delta_start - Forward the delta_start of a running thread
724 *
725 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
726 * is pushed forward to discount the interference.
727 *
728 * See get_int_safe_duration().
729 */
730 static inline void
cond_move_thread_delta_start(struct osnoise_variables * osn_var,u64 duration)731 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
732 {
733 if (osn_var->thread.delta_start)
734 osn_var->thread.delta_start += duration;
735 }
736
737 /*
738 * get_int_safe_duration - Get the duration of a window
739 *
740 * The irq, softirq and thread varaibles need to have its duration without
741 * the interference from higher priority interrupts. Instead of keeping a
742 * variable to discount the interrupt interference from these variables, the
743 * starting time of these variables are pushed forward with the interrupt's
744 * duration. In this way, a single variable is used to:
745 *
746 * - Know if a given window is being measured.
747 * - Account its duration.
748 * - Discount the interference.
749 *
750 * To avoid getting inconsistent values, e.g.,:
751 *
752 * now = time_get()
753 * ---> interrupt!
754 * delta_start -= int duration;
755 * <---
756 * duration = now - delta_start;
757 *
758 * result: negative duration if the variable duration before the
759 * interrupt was smaller than the interrupt execution.
760 *
761 * A counter of interrupts is used. If the counter increased, try
762 * to capture an interference safe duration.
763 */
764 static inline s64
get_int_safe_duration(struct osnoise_variables * osn_var,u64 * delta_start)765 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
766 {
767 u64 int_counter, now;
768 s64 duration;
769
770 do {
771 int_counter = local_read(&osn_var->int_counter);
772 /* synchronize with interrupts */
773 barrier();
774
775 now = time_get();
776 duration = (now - *delta_start);
777
778 /* synchronize with interrupts */
779 barrier();
780 } while (int_counter != local_read(&osn_var->int_counter));
781
782 /*
783 * This is an evidence of race conditions that cause
784 * a value to be "discounted" too much.
785 */
786 if (duration < 0)
787 osnoise_taint("Negative duration!\n");
788
789 *delta_start = 0;
790
791 return duration;
792 }
793
794 /*
795 *
796 * set_int_safe_time - Save the current time on *time, aware of interference
797 *
798 * Get the time, taking into consideration a possible interference from
799 * higher priority interrupts.
800 *
801 * See get_int_safe_duration() for an explanation.
802 */
803 static u64
set_int_safe_time(struct osnoise_variables * osn_var,u64 * time)804 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
805 {
806 u64 int_counter;
807
808 do {
809 int_counter = local_read(&osn_var->int_counter);
810 /* synchronize with interrupts */
811 barrier();
812
813 *time = time_get();
814
815 /* synchronize with interrupts */
816 barrier();
817 } while (int_counter != local_read(&osn_var->int_counter));
818
819 return int_counter;
820 }
821
822 #ifdef CONFIG_TIMERLAT_TRACER
823 /*
824 * copy_int_safe_time - Copy *src into *desc aware of interference
825 */
826 static u64
copy_int_safe_time(struct osnoise_variables * osn_var,u64 * dst,u64 * src)827 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
828 {
829 u64 int_counter;
830
831 do {
832 int_counter = local_read(&osn_var->int_counter);
833 /* synchronize with interrupts */
834 barrier();
835
836 *dst = *src;
837
838 /* synchronize with interrupts */
839 barrier();
840 } while (int_counter != local_read(&osn_var->int_counter));
841
842 return int_counter;
843 }
844 #endif /* CONFIG_TIMERLAT_TRACER */
845
846 /*
847 * trace_osnoise_callback - NMI entry/exit callback
848 *
849 * This function is called at the entry and exit NMI code. The bool enter
850 * distinguishes between either case. This function is used to note a NMI
851 * occurrence, compute the noise caused by the NMI, and to remove the noise
852 * it is potentially causing on other interference variables.
853 */
trace_osnoise_callback(bool enter)854 void trace_osnoise_callback(bool enter)
855 {
856 struct osnoise_variables *osn_var = this_cpu_osn_var();
857 u64 duration;
858
859 if (!osn_var->sampling)
860 return;
861
862 /*
863 * Currently trace_clock_local() calls sched_clock() and the
864 * generic version is not NMI safe.
865 */
866 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
867 if (enter) {
868 osn_var->nmi.delta_start = time_get();
869 local_inc(&osn_var->int_counter);
870 } else {
871 duration = time_get() - osn_var->nmi.delta_start;
872
873 trace_nmi_noise(osn_var->nmi.delta_start, duration);
874
875 cond_move_irq_delta_start(osn_var, duration);
876 cond_move_softirq_delta_start(osn_var, duration);
877 cond_move_thread_delta_start(osn_var, duration);
878 }
879 }
880
881 if (enter)
882 osn_var->nmi.count++;
883 }
884
885 /*
886 * osnoise_trace_irq_entry - Note the starting of an IRQ
887 *
888 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
889 * it is safe to use a single variable (ons_var->irq) to save the statistics.
890 * The arrival_time is used to report... the arrival time. The delta_start
891 * is used to compute the duration at the IRQ exit handler. See
892 * cond_move_irq_delta_start().
893 */
osnoise_trace_irq_entry(int id)894 void osnoise_trace_irq_entry(int id)
895 {
896 struct osnoise_variables *osn_var = this_cpu_osn_var();
897
898 if (!osn_var->sampling)
899 return;
900 /*
901 * This value will be used in the report, but not to compute
902 * the execution time, so it is safe to get it unsafe.
903 */
904 osn_var->irq.arrival_time = time_get();
905 set_int_safe_time(osn_var, &osn_var->irq.delta_start);
906 osn_var->irq.count++;
907
908 local_inc(&osn_var->int_counter);
909 }
910
911 /*
912 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
913 *
914 * Computes the duration of the IRQ noise, and trace it. Also discounts the
915 * interference from other sources of noise could be currently being accounted.
916 */
osnoise_trace_irq_exit(int id,const char * desc)917 void osnoise_trace_irq_exit(int id, const char *desc)
918 {
919 struct osnoise_variables *osn_var = this_cpu_osn_var();
920 s64 duration;
921
922 if (!osn_var->sampling)
923 return;
924
925 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
926 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
927 osn_var->irq.arrival_time = 0;
928 cond_move_softirq_delta_start(osn_var, duration);
929 cond_move_thread_delta_start(osn_var, duration);
930 }
931
932 /*
933 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
934 *
935 * Used to note the starting of an IRQ occurece.
936 */
trace_irqentry_callback(void * data,int irq,struct irqaction * action)937 static void trace_irqentry_callback(void *data, int irq,
938 struct irqaction *action)
939 {
940 osnoise_trace_irq_entry(irq);
941 }
942
943 /*
944 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
945 *
946 * Used to note the end of an IRQ occurece.
947 */
trace_irqexit_callback(void * data,int irq,struct irqaction * action,int ret)948 static void trace_irqexit_callback(void *data, int irq,
949 struct irqaction *action, int ret)
950 {
951 osnoise_trace_irq_exit(irq, action->name);
952 }
953
954 /*
955 * arch specific register function.
956 */
osnoise_arch_register(void)957 int __weak osnoise_arch_register(void)
958 {
959 return 0;
960 }
961
962 /*
963 * arch specific unregister function.
964 */
osnoise_arch_unregister(void)965 void __weak osnoise_arch_unregister(void)
966 {
967 return;
968 }
969
970 /*
971 * hook_irq_events - Hook IRQ handling events
972 *
973 * This function hooks the IRQ related callbacks to the respective trace
974 * events.
975 */
hook_irq_events(void)976 static int hook_irq_events(void)
977 {
978 int ret;
979
980 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
981 if (ret)
982 goto out_err;
983
984 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
985 if (ret)
986 goto out_unregister_entry;
987
988 ret = osnoise_arch_register();
989 if (ret)
990 goto out_irq_exit;
991
992 return 0;
993
994 out_irq_exit:
995 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
996 out_unregister_entry:
997 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
998 out_err:
999 return -EINVAL;
1000 }
1001
1002 /*
1003 * unhook_irq_events - Unhook IRQ handling events
1004 *
1005 * This function unhooks the IRQ related callbacks to the respective trace
1006 * events.
1007 */
unhook_irq_events(void)1008 static void unhook_irq_events(void)
1009 {
1010 osnoise_arch_unregister();
1011 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1012 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1013 }
1014
1015 #ifndef CONFIG_PREEMPT_RT
1016 /*
1017 * trace_softirq_entry_callback - Note the starting of a softirq
1018 *
1019 * Save the starting time of a softirq. As softirqs are non-preemptive to
1020 * other softirqs, it is safe to use a single variable (ons_var->softirq)
1021 * to save the statistics. The arrival_time is used to report... the
1022 * arrival time. The delta_start is used to compute the duration at the
1023 * softirq exit handler. See cond_move_softirq_delta_start().
1024 */
trace_softirq_entry_callback(void * data,unsigned int vec_nr)1025 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1026 {
1027 struct osnoise_variables *osn_var = this_cpu_osn_var();
1028
1029 if (!osn_var->sampling)
1030 return;
1031 /*
1032 * This value will be used in the report, but not to compute
1033 * the execution time, so it is safe to get it unsafe.
1034 */
1035 osn_var->softirq.arrival_time = time_get();
1036 set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1037 osn_var->softirq.count++;
1038
1039 local_inc(&osn_var->int_counter);
1040 }
1041
1042 /*
1043 * trace_softirq_exit_callback - Note the end of an softirq
1044 *
1045 * Computes the duration of the softirq noise, and trace it. Also discounts the
1046 * interference from other sources of noise could be currently being accounted.
1047 */
trace_softirq_exit_callback(void * data,unsigned int vec_nr)1048 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1049 {
1050 struct osnoise_variables *osn_var = this_cpu_osn_var();
1051 s64 duration;
1052
1053 if (!osn_var->sampling)
1054 return;
1055
1056 if (unlikely(timerlat_enabled()))
1057 if (!timerlat_softirq_exit(osn_var))
1058 return;
1059
1060 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1061 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1062 cond_move_thread_delta_start(osn_var, duration);
1063 osn_var->softirq.arrival_time = 0;
1064 }
1065
1066 /*
1067 * hook_softirq_events - Hook softirq handling events
1068 *
1069 * This function hooks the softirq related callbacks to the respective trace
1070 * events.
1071 */
hook_softirq_events(void)1072 static int hook_softirq_events(void)
1073 {
1074 int ret;
1075
1076 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1077 if (ret)
1078 goto out_err;
1079
1080 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1081 if (ret)
1082 goto out_unreg_entry;
1083
1084 return 0;
1085
1086 out_unreg_entry:
1087 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1088 out_err:
1089 return -EINVAL;
1090 }
1091
1092 /*
1093 * unhook_softirq_events - Unhook softirq handling events
1094 *
1095 * This function hooks the softirq related callbacks to the respective trace
1096 * events.
1097 */
unhook_softirq_events(void)1098 static void unhook_softirq_events(void)
1099 {
1100 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1101 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102 }
1103 #else /* CONFIG_PREEMPT_RT */
1104 /*
1105 * softirq are threads on the PREEMPT_RT mode.
1106 */
hook_softirq_events(void)1107 static int hook_softirq_events(void)
1108 {
1109 return 0;
1110 }
unhook_softirq_events(void)1111 static void unhook_softirq_events(void)
1112 {
1113 }
1114 #endif
1115
1116 /*
1117 * thread_entry - Record the starting of a thread noise window
1118 *
1119 * It saves the context switch time for a noisy thread, and increments
1120 * the interference counters.
1121 */
1122 static void
thread_entry(struct osnoise_variables * osn_var,struct task_struct * t)1123 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1124 {
1125 if (!osn_var->sampling)
1126 return;
1127 /*
1128 * The arrival time will be used in the report, but not to compute
1129 * the execution time, so it is safe to get it unsafe.
1130 */
1131 osn_var->thread.arrival_time = time_get();
1132
1133 set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1134
1135 osn_var->thread.count++;
1136 local_inc(&osn_var->int_counter);
1137 }
1138
1139 /*
1140 * thread_exit - Report the end of a thread noise window
1141 *
1142 * It computes the total noise from a thread, tracing if needed.
1143 */
1144 static void
thread_exit(struct osnoise_variables * osn_var,struct task_struct * t)1145 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1146 {
1147 s64 duration;
1148
1149 if (!osn_var->sampling)
1150 return;
1151
1152 if (unlikely(timerlat_enabled()))
1153 if (!timerlat_thread_exit(osn_var))
1154 return;
1155
1156 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1157
1158 trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1159
1160 osn_var->thread.arrival_time = 0;
1161 }
1162
1163 #ifdef CONFIG_TIMERLAT_TRACER
1164 /*
1165 * osnoise_stop_exception - Stop tracing and the tracer.
1166 */
osnoise_stop_exception(char * msg,int cpu)1167 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1168 {
1169 struct osnoise_instance *inst;
1170 struct trace_array *tr;
1171
1172 rcu_read_lock();
1173 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1174 tr = inst->tr;
1175 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1176 "stop tracing hit on cpu %d due to exception: %s\n",
1177 smp_processor_id(),
1178 msg);
1179
1180 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1181 panic("tracer hit on cpu %d due to exception: %s\n",
1182 smp_processor_id(),
1183 msg);
1184
1185 tracer_tracing_off(tr);
1186 }
1187 rcu_read_unlock();
1188 }
1189
1190 /*
1191 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1192 *
1193 * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1194 * timerlat user-space thread migration.
1195 */
trace_sched_migrate_callback(void * data,struct task_struct * p,int dest_cpu)1196 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1197 {
1198 struct osnoise_variables *osn_var;
1199 long cpu = task_cpu(p);
1200
1201 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1202 if (osn_var->pid == p->pid && dest_cpu != cpu) {
1203 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1204 osnoise_taint("timerlat user-thread migrated\n");
1205 osnoise_stop_exception("timerlat user-thread migrated", cpu);
1206 }
1207 }
1208
1209 static bool monitor_enabled;
1210
register_migration_monitor(void)1211 static int register_migration_monitor(void)
1212 {
1213 int ret = 0;
1214
1215 /*
1216 * Timerlat thread migration check is only required when running timerlat in user-space.
1217 * Thus, enable callback only if timerlat is set with no workload.
1218 */
1219 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1220 if (WARN_ON_ONCE(monitor_enabled))
1221 return 0;
1222
1223 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1224 if (!ret)
1225 monitor_enabled = true;
1226 }
1227
1228 return ret;
1229 }
1230
unregister_migration_monitor(void)1231 static void unregister_migration_monitor(void)
1232 {
1233 if (!monitor_enabled)
1234 return;
1235
1236 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1237 monitor_enabled = false;
1238 }
1239 #else
register_migration_monitor(void)1240 static int register_migration_monitor(void)
1241 {
1242 return 0;
1243 }
unregister_migration_monitor(void)1244 static void unregister_migration_monitor(void) {}
1245 #endif
1246 /*
1247 * trace_sched_switch - sched:sched_switch trace event handler
1248 *
1249 * This function is hooked to the sched:sched_switch trace event, and it is
1250 * used to record the beginning and to report the end of a thread noise window.
1251 */
1252 static void
trace_sched_switch_callback(void * data,bool preempt,struct task_struct * p,struct task_struct * n,unsigned int prev_state)1253 trace_sched_switch_callback(void *data, bool preempt,
1254 struct task_struct *p,
1255 struct task_struct *n,
1256 unsigned int prev_state)
1257 {
1258 struct osnoise_variables *osn_var = this_cpu_osn_var();
1259 int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1260
1261 if ((p->pid != osn_var->pid) || !workload)
1262 thread_exit(osn_var, p);
1263
1264 if ((n->pid != osn_var->pid) || !workload)
1265 thread_entry(osn_var, n);
1266 }
1267
1268 /*
1269 * hook_thread_events - Hook the instrumentation for thread noise
1270 *
1271 * Hook the osnoise tracer callbacks to handle the noise from other
1272 * threads on the necessary kernel events.
1273 */
hook_thread_events(void)1274 static int hook_thread_events(void)
1275 {
1276 int ret;
1277
1278 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1279 if (ret)
1280 return -EINVAL;
1281
1282 ret = register_migration_monitor();
1283 if (ret)
1284 goto out_unreg;
1285
1286 return 0;
1287
1288 out_unreg:
1289 unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1290 return -EINVAL;
1291 }
1292
1293 /*
1294 * unhook_thread_events - unhook the instrumentation for thread noise
1295 *
1296 * Unook the osnoise tracer callbacks to handle the noise from other
1297 * threads on the necessary kernel events.
1298 */
unhook_thread_events(void)1299 static void unhook_thread_events(void)
1300 {
1301 unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1302 unregister_migration_monitor();
1303 }
1304
1305 /*
1306 * save_osn_sample_stats - Save the osnoise_sample statistics
1307 *
1308 * Save the osnoise_sample statistics before the sampling phase. These
1309 * values will be used later to compute the diff betwneen the statistics
1310 * before and after the osnoise sampling.
1311 */
1312 static void
save_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1313 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1314 {
1315 s->nmi_count = osn_var->nmi.count;
1316 s->irq_count = osn_var->irq.count;
1317 s->softirq_count = osn_var->softirq.count;
1318 s->thread_count = osn_var->thread.count;
1319 }
1320
1321 /*
1322 * diff_osn_sample_stats - Compute the osnoise_sample statistics
1323 *
1324 * After a sample period, compute the difference on the osnoise_sample
1325 * statistics. The struct osnoise_sample *s contains the statistics saved via
1326 * save_osn_sample_stats() before the osnoise sampling.
1327 */
1328 static void
diff_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1329 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1330 {
1331 s->nmi_count = osn_var->nmi.count - s->nmi_count;
1332 s->irq_count = osn_var->irq.count - s->irq_count;
1333 s->softirq_count = osn_var->softirq.count - s->softirq_count;
1334 s->thread_count = osn_var->thread.count - s->thread_count;
1335 }
1336
1337 /*
1338 * osnoise_stop_tracing - Stop tracing and the tracer.
1339 */
osnoise_stop_tracing(void)1340 static __always_inline void osnoise_stop_tracing(void)
1341 {
1342 struct osnoise_instance *inst;
1343 struct trace_array *tr;
1344
1345 rcu_read_lock();
1346 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1347 tr = inst->tr;
1348 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1349 "stop tracing hit on cpu %d\n", smp_processor_id());
1350
1351 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1352 panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1353
1354 tracer_tracing_off(tr);
1355 }
1356 rcu_read_unlock();
1357 }
1358
1359 /*
1360 * osnoise_has_tracing_on - Check if there is at least one instance on
1361 */
osnoise_has_tracing_on(void)1362 static __always_inline int osnoise_has_tracing_on(void)
1363 {
1364 struct osnoise_instance *inst;
1365 int trace_is_on = 0;
1366
1367 rcu_read_lock();
1368 list_for_each_entry_rcu(inst, &osnoise_instances, list)
1369 trace_is_on += tracer_tracing_is_on(inst->tr);
1370 rcu_read_unlock();
1371
1372 return trace_is_on;
1373 }
1374
1375 /*
1376 * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1377 */
notify_new_max_latency(u64 latency)1378 static void notify_new_max_latency(u64 latency)
1379 {
1380 struct osnoise_instance *inst;
1381 struct trace_array *tr;
1382
1383 rcu_read_lock();
1384 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1385 tr = inst->tr;
1386 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1387 tr->max_latency = latency;
1388 latency_fsnotify(tr);
1389 }
1390 }
1391 rcu_read_unlock();
1392 }
1393
1394 /*
1395 * run_osnoise - Sample the time and look for osnoise
1396 *
1397 * Used to capture the time, looking for potential osnoise latency repeatedly.
1398 * Different from hwlat_detector, it is called with preemption and interrupts
1399 * enabled. This allows irqs, softirqs and threads to run, interfering on the
1400 * osnoise sampling thread, as they would do with a regular thread.
1401 */
run_osnoise(void)1402 static int run_osnoise(void)
1403 {
1404 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1405 struct osnoise_variables *osn_var = this_cpu_osn_var();
1406 u64 start, sample, last_sample;
1407 u64 last_int_count, int_count;
1408 s64 noise = 0, max_noise = 0;
1409 s64 total, last_total = 0;
1410 struct osnoise_sample s;
1411 bool disable_preemption;
1412 unsigned int threshold;
1413 u64 runtime, stop_in;
1414 u64 sum_noise = 0;
1415 int hw_count = 0;
1416 int ret = -1;
1417
1418 /*
1419 * Disabling preemption is only required if IRQs are enabled,
1420 * and the options is set on.
1421 */
1422 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1423
1424 /*
1425 * Considers the current thread as the workload.
1426 */
1427 osn_var->pid = current->pid;
1428
1429 /*
1430 * Save the current stats for the diff
1431 */
1432 save_osn_sample_stats(osn_var, &s);
1433
1434 /*
1435 * if threshold is 0, use the default value of 1 us.
1436 */
1437 threshold = tracing_thresh ? : 1000;
1438
1439 /*
1440 * Apply PREEMPT and IRQ disabled options.
1441 */
1442 if (disable_irq)
1443 local_irq_disable();
1444
1445 if (disable_preemption)
1446 preempt_disable();
1447
1448 /*
1449 * Make sure NMIs see sampling first
1450 */
1451 osn_var->sampling = true;
1452 barrier();
1453
1454 /*
1455 * Transform the *_us config to nanoseconds to avoid the
1456 * division on the main loop.
1457 */
1458 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1459 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1460
1461 /*
1462 * Start timestemp
1463 */
1464 start = time_get();
1465
1466 /*
1467 * "previous" loop.
1468 */
1469 last_int_count = set_int_safe_time(osn_var, &last_sample);
1470
1471 do {
1472 /*
1473 * Get sample!
1474 */
1475 int_count = set_int_safe_time(osn_var, &sample);
1476
1477 noise = time_sub(sample, last_sample);
1478
1479 /*
1480 * This shouldn't happen.
1481 */
1482 if (noise < 0) {
1483 osnoise_taint("negative noise!");
1484 goto out;
1485 }
1486
1487 /*
1488 * Sample runtime.
1489 */
1490 total = time_sub(sample, start);
1491
1492 /*
1493 * Check for possible overflows.
1494 */
1495 if (total < last_total) {
1496 osnoise_taint("total overflow!");
1497 break;
1498 }
1499
1500 last_total = total;
1501
1502 if (noise >= threshold) {
1503 int interference = int_count - last_int_count;
1504
1505 if (noise > max_noise)
1506 max_noise = noise;
1507
1508 if (!interference)
1509 hw_count++;
1510
1511 sum_noise += noise;
1512
1513 trace_sample_threshold(last_sample, noise, interference);
1514
1515 if (osnoise_data.stop_tracing)
1516 if (noise > stop_in)
1517 osnoise_stop_tracing();
1518 }
1519
1520 /*
1521 * In some cases, notably when running on a nohz_full CPU with
1522 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1523 * account for QSs. This will eventually cause unwarranted
1524 * noise as RCU forces preemption as the means of ending the
1525 * current grace period. We avoid this by calling
1526 * rcu_momentary_eqs(), which performs a zero duration EQS
1527 * allowing RCU to end the current grace period. This call
1528 * shouldn't be wrapped inside an RCU critical section.
1529 *
1530 * Normally QSs for other cases are handled through cond_resched().
1531 * For simplicity, however, we call rcu_momentary_eqs() for all
1532 * configurations here.
1533 */
1534 if (!disable_irq)
1535 local_irq_disable();
1536
1537 rcu_momentary_eqs();
1538
1539 if (!disable_irq)
1540 local_irq_enable();
1541
1542 /*
1543 * For the non-preemptive kernel config: let threads runs, if
1544 * they so wish, unless set not do to so.
1545 */
1546 if (!disable_irq && !disable_preemption)
1547 cond_resched();
1548
1549 last_sample = sample;
1550 last_int_count = int_count;
1551
1552 } while (total < runtime && !kthread_should_stop());
1553
1554 /*
1555 * Finish the above in the view for interrupts.
1556 */
1557 barrier();
1558
1559 osn_var->sampling = false;
1560
1561 /*
1562 * Make sure sampling data is no longer updated.
1563 */
1564 barrier();
1565
1566 /*
1567 * Return to the preemptive state.
1568 */
1569 if (disable_preemption)
1570 preempt_enable();
1571
1572 if (disable_irq)
1573 local_irq_enable();
1574
1575 /*
1576 * Save noise info.
1577 */
1578 s.noise = time_to_us(sum_noise);
1579 s.runtime = time_to_us(total);
1580 s.max_sample = time_to_us(max_noise);
1581 s.hw_count = hw_count;
1582
1583 /* Save interference stats info */
1584 diff_osn_sample_stats(osn_var, &s);
1585
1586 record_osnoise_sample(&s);
1587
1588 notify_new_max_latency(max_noise);
1589
1590 if (osnoise_data.stop_tracing_total)
1591 if (s.noise > osnoise_data.stop_tracing_total)
1592 osnoise_stop_tracing();
1593
1594 return 0;
1595 out:
1596 return ret;
1597 }
1598
1599 static struct cpumask osnoise_cpumask;
1600 static struct cpumask save_cpumask;
1601 static struct cpumask kthread_cpumask;
1602
1603 /*
1604 * osnoise_sleep - sleep until the next period
1605 */
osnoise_sleep(bool skip_period)1606 static void osnoise_sleep(bool skip_period)
1607 {
1608 u64 interval;
1609 ktime_t wake_time;
1610
1611 mutex_lock(&interface_lock);
1612 if (skip_period)
1613 interval = osnoise_data.sample_period;
1614 else
1615 interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1616 mutex_unlock(&interface_lock);
1617
1618 /*
1619 * differently from hwlat_detector, the osnoise tracer can run
1620 * without a pause because preemption is on.
1621 */
1622 if (!interval) {
1623 /* Let synchronize_rcu_tasks() make progress */
1624 cond_resched_tasks_rcu_qs();
1625 return;
1626 }
1627
1628 wake_time = ktime_add_us(ktime_get(), interval);
1629 __set_current_state(TASK_INTERRUPTIBLE);
1630
1631 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1632 if (kthread_should_stop())
1633 break;
1634 }
1635 }
1636
1637 /*
1638 * osnoise_migration_pending - checks if the task needs to migrate
1639 *
1640 * osnoise/timerlat threads are per-cpu. If there is a pending request to
1641 * migrate the thread away from the current CPU, something bad has happened.
1642 * Play the good citizen and leave.
1643 *
1644 * Returns 0 if it is safe to continue, 1 otherwise.
1645 */
osnoise_migration_pending(void)1646 static inline int osnoise_migration_pending(void)
1647 {
1648 if (!current->migration_pending)
1649 return 0;
1650
1651 /*
1652 * If migration is pending, there is a task waiting for the
1653 * tracer to enable migration. The tracer does not allow migration,
1654 * thus: taint and leave to unblock the blocked thread.
1655 */
1656 osnoise_taint("migration requested to osnoise threads, leaving.");
1657
1658 /*
1659 * Unset this thread from the threads managed by the interface.
1660 * The tracers are responsible for cleaning their env before
1661 * exiting.
1662 */
1663 mutex_lock(&interface_lock);
1664 this_cpu_osn_var()->kthread = NULL;
1665 cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
1666 mutex_unlock(&interface_lock);
1667
1668 return 1;
1669 }
1670
1671 /*
1672 * osnoise_main - The osnoise detection kernel thread
1673 *
1674 * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1675 * every period.
1676 */
osnoise_main(void * data)1677 static int osnoise_main(void *data)
1678 {
1679 unsigned long flags;
1680
1681 /*
1682 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1683 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1684 *
1685 * To work around this limitation, disable migration and remove the
1686 * flag.
1687 */
1688 migrate_disable();
1689 raw_spin_lock_irqsave(¤t->pi_lock, flags);
1690 current->flags &= ~(PF_NO_SETAFFINITY);
1691 raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
1692
1693 while (!kthread_should_stop()) {
1694 if (osnoise_migration_pending())
1695 break;
1696
1697 /* skip a period if tracing is off on all instances */
1698 if (!osnoise_has_tracing_on()) {
1699 osnoise_sleep(true);
1700 continue;
1701 }
1702
1703 run_osnoise();
1704 osnoise_sleep(false);
1705 }
1706
1707 migrate_enable();
1708 return 0;
1709 }
1710
1711 #ifdef CONFIG_TIMERLAT_TRACER
1712 /*
1713 * timerlat_irq - hrtimer handler for timerlat.
1714 */
timerlat_irq(struct hrtimer * timer)1715 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1716 {
1717 struct osnoise_variables *osn_var = this_cpu_osn_var();
1718 struct timerlat_variables *tlat;
1719 struct timerlat_sample s;
1720 u64 now;
1721 u64 diff;
1722
1723 /*
1724 * I am not sure if the timer was armed for this CPU. So, get
1725 * the timerlat struct from the timer itself, not from this
1726 * CPU.
1727 */
1728 tlat = container_of(timer, struct timerlat_variables, timer);
1729
1730 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1731
1732 /*
1733 * Enable the osnoise: events for thread an softirq.
1734 */
1735 tlat->tracing_thread = true;
1736
1737 osn_var->thread.arrival_time = time_get();
1738
1739 /*
1740 * A hardirq is running: the timer IRQ. It is for sure preempting
1741 * a thread, and potentially preempting a softirq.
1742 *
1743 * At this point, it is not interesting to know the duration of the
1744 * preempted thread (and maybe softirq), but how much time they will
1745 * delay the beginning of the execution of the timer thread.
1746 *
1747 * To get the correct (net) delay added by the softirq, its delta_start
1748 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1749 * start of the sofitrq will be zeroed, accounting then only the time
1750 * after that.
1751 *
1752 * The thread follows the same principle. However, if a softirq is
1753 * running, the thread needs to receive the softirq delta_start. The
1754 * reason being is that the softirq will be the last to be unfolded,
1755 * resseting the thread delay to zero.
1756 *
1757 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1758 * on RT, moving the thread is enough.
1759 */
1760 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1761 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1762 &osn_var->softirq.delta_start);
1763
1764 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1765 &osn_var->irq.delta_start);
1766 } else {
1767 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1768 &osn_var->irq.delta_start);
1769 }
1770
1771 /*
1772 * Compute the current time with the expected time.
1773 */
1774 diff = now - tlat->abs_period;
1775
1776 tlat->count++;
1777 s.seqnum = tlat->count;
1778 s.timer_latency = diff;
1779 s.context = IRQ_CONTEXT;
1780
1781 record_timerlat_sample(&s);
1782
1783 if (osnoise_data.stop_tracing) {
1784 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1785
1786 /*
1787 * At this point, if stop_tracing is set and <= print_stack,
1788 * print_stack is set and would be printed in the thread handler.
1789 *
1790 * Thus, print the stack trace as it is helpful to define the
1791 * root cause of an IRQ latency.
1792 */
1793 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1794 timerlat_save_stack(0);
1795 timerlat_dump_stack(time_to_us(diff));
1796 }
1797
1798 osnoise_stop_tracing();
1799 notify_new_max_latency(diff);
1800
1801 wake_up_process(tlat->kthread);
1802
1803 return HRTIMER_NORESTART;
1804 }
1805 }
1806
1807 wake_up_process(tlat->kthread);
1808
1809 if (osnoise_data.print_stack)
1810 timerlat_save_stack(0);
1811
1812 return HRTIMER_NORESTART;
1813 }
1814
1815 /*
1816 * wait_next_period - Wait for the next period for timerlat
1817 */
wait_next_period(struct timerlat_variables * tlat)1818 static int wait_next_period(struct timerlat_variables *tlat)
1819 {
1820 ktime_t next_abs_period, now;
1821 u64 rel_period = osnoise_data.timerlat_period * 1000;
1822
1823 now = hrtimer_cb_get_time(&tlat->timer);
1824 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1825
1826 /*
1827 * Save the next abs_period.
1828 */
1829 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1830
1831 /*
1832 * If the new abs_period is in the past, skip the activation.
1833 */
1834 while (ktime_compare(now, next_abs_period) > 0) {
1835 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1836 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1837 }
1838
1839 set_current_state(TASK_INTERRUPTIBLE);
1840
1841 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1842 schedule();
1843 return 1;
1844 }
1845
1846 /*
1847 * timerlat_main- Timerlat main
1848 */
timerlat_main(void * data)1849 static int timerlat_main(void *data)
1850 {
1851 struct osnoise_variables *osn_var = this_cpu_osn_var();
1852 struct timerlat_variables *tlat = this_cpu_tmr_var();
1853 struct timerlat_sample s;
1854 struct sched_param sp;
1855 unsigned long flags;
1856 u64 now, diff;
1857
1858 /*
1859 * Make the thread RT, that is how cyclictest is usually used.
1860 */
1861 sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1862 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1863
1864 /*
1865 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1866 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1867 *
1868 * To work around this limitation, disable migration and remove the
1869 * flag.
1870 */
1871 migrate_disable();
1872 raw_spin_lock_irqsave(¤t->pi_lock, flags);
1873 current->flags &= ~(PF_NO_SETAFFINITY);
1874 raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
1875
1876 tlat->count = 0;
1877 tlat->tracing_thread = false;
1878
1879 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1880 tlat->kthread = current;
1881 osn_var->pid = current->pid;
1882 /*
1883 * Anotate the arrival time.
1884 */
1885 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1886
1887 wait_next_period(tlat);
1888
1889 osn_var->sampling = 1;
1890
1891 while (!kthread_should_stop()) {
1892
1893 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1894 diff = now - tlat->abs_period;
1895
1896 s.seqnum = tlat->count;
1897 s.timer_latency = diff;
1898 s.context = THREAD_CONTEXT;
1899
1900 record_timerlat_sample(&s);
1901
1902 notify_new_max_latency(diff);
1903
1904 timerlat_dump_stack(time_to_us(diff));
1905
1906 tlat->tracing_thread = false;
1907 if (osnoise_data.stop_tracing_total)
1908 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1909 osnoise_stop_tracing();
1910
1911 if (osnoise_migration_pending())
1912 break;
1913
1914 wait_next_period(tlat);
1915 }
1916
1917 hrtimer_cancel(&tlat->timer);
1918 migrate_enable();
1919 return 0;
1920 }
1921 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_main(void * data)1922 static int timerlat_main(void *data)
1923 {
1924 return 0;
1925 }
1926 #endif /* CONFIG_TIMERLAT_TRACER */
1927
1928 /*
1929 * stop_kthread - stop a workload thread
1930 */
stop_kthread(unsigned int cpu)1931 static void stop_kthread(unsigned int cpu)
1932 {
1933 struct task_struct *kthread;
1934
1935 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1936 if (kthread) {
1937 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1938 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1939 kthread_stop(kthread);
1940 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1941 /*
1942 * This is a user thread waiting on the timerlat_fd. We need
1943 * to close all users, and the best way to guarantee this is
1944 * by killing the thread. NOTE: this is a purpose specific file.
1945 */
1946 kill_pid(kthread->thread_pid, SIGKILL, 1);
1947 put_task_struct(kthread);
1948 }
1949 } else {
1950 /* if no workload, just return */
1951 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1952 /*
1953 * This is set in the osnoise tracer case.
1954 */
1955 per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1956 barrier();
1957 }
1958 }
1959 }
1960
1961 /*
1962 * stop_per_cpu_kthread - Stop per-cpu threads
1963 *
1964 * Stop the osnoise sampling htread. Use this on unload and at system
1965 * shutdown.
1966 */
stop_per_cpu_kthreads(void)1967 static void stop_per_cpu_kthreads(void)
1968 {
1969 int cpu;
1970
1971 cpus_read_lock();
1972
1973 for_each_online_cpu(cpu)
1974 stop_kthread(cpu);
1975
1976 cpus_read_unlock();
1977 }
1978
1979 /*
1980 * start_kthread - Start a workload tread
1981 */
start_kthread(unsigned int cpu)1982 static int start_kthread(unsigned int cpu)
1983 {
1984 struct task_struct *kthread;
1985 void *main = osnoise_main;
1986 char comm[24];
1987
1988 /* Do not start a new thread if it is already running */
1989 if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1990 return 0;
1991
1992 if (timerlat_enabled()) {
1993 snprintf(comm, 24, "timerlat/%d", cpu);
1994 main = timerlat_main;
1995 } else {
1996 /* if no workload, just return */
1997 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1998 per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
1999 barrier();
2000 return 0;
2001 }
2002 snprintf(comm, 24, "osnoise/%d", cpu);
2003 }
2004
2005 kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2006
2007 if (IS_ERR(kthread)) {
2008 pr_err(BANNER "could not start sampling thread\n");
2009 return -ENOMEM;
2010 }
2011
2012 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2013 cpumask_set_cpu(cpu, &kthread_cpumask);
2014
2015 return 0;
2016 }
2017
2018 /*
2019 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2020 *
2021 * This starts the kernel thread that will look for osnoise on many
2022 * cpus.
2023 */
start_per_cpu_kthreads(void)2024 static int start_per_cpu_kthreads(void)
2025 {
2026 struct cpumask *current_mask = &save_cpumask;
2027 int retval = 0;
2028 int cpu;
2029
2030 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2031 if (timerlat_enabled())
2032 return 0;
2033 }
2034
2035 cpus_read_lock();
2036 /*
2037 * Run only on online CPUs in which osnoise is allowed to run.
2038 */
2039 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2040
2041 for_each_possible_cpu(cpu) {
2042 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2043 struct task_struct *kthread;
2044
2045 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2046 if (!WARN_ON(!kthread))
2047 kthread_stop(kthread);
2048 }
2049 }
2050
2051 for_each_cpu(cpu, current_mask) {
2052 retval = start_kthread(cpu);
2053 if (retval) {
2054 cpus_read_unlock();
2055 stop_per_cpu_kthreads();
2056 return retval;
2057 }
2058 }
2059
2060 cpus_read_unlock();
2061
2062 return retval;
2063 }
2064
2065 #ifdef CONFIG_HOTPLUG_CPU
osnoise_hotplug_workfn(struct work_struct * dummy)2066 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2067 {
2068 unsigned int cpu = smp_processor_id();
2069
2070 guard(mutex)(&trace_types_lock);
2071
2072 if (!osnoise_has_registered_instances())
2073 return;
2074
2075 guard(mutex)(&interface_lock);
2076 guard(cpus_read_lock)();
2077
2078 if (!cpu_online(cpu))
2079 return;
2080
2081 if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2082 return;
2083
2084 start_kthread(cpu);
2085 }
2086
2087 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2088
2089 /*
2090 * osnoise_cpu_init - CPU hotplug online callback function
2091 */
osnoise_cpu_init(unsigned int cpu)2092 static int osnoise_cpu_init(unsigned int cpu)
2093 {
2094 schedule_work_on(cpu, &osnoise_hotplug_work);
2095 return 0;
2096 }
2097
2098 /*
2099 * osnoise_cpu_die - CPU hotplug offline callback function
2100 */
osnoise_cpu_die(unsigned int cpu)2101 static int osnoise_cpu_die(unsigned int cpu)
2102 {
2103 stop_kthread(cpu);
2104 return 0;
2105 }
2106
osnoise_init_hotplug_support(void)2107 static void osnoise_init_hotplug_support(void)
2108 {
2109 int ret;
2110
2111 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2112 osnoise_cpu_init, osnoise_cpu_die);
2113 if (ret < 0)
2114 pr_warn(BANNER "Error to init cpu hotplug support\n");
2115
2116 return;
2117 }
2118 #else /* CONFIG_HOTPLUG_CPU */
osnoise_init_hotplug_support(void)2119 static void osnoise_init_hotplug_support(void)
2120 {
2121 return;
2122 }
2123 #endif /* CONFIG_HOTPLUG_CPU */
2124
2125 /*
2126 * seq file functions for the osnoise/options file.
2127 */
s_options_start(struct seq_file * s,loff_t * pos)2128 static void *s_options_start(struct seq_file *s, loff_t *pos)
2129 {
2130 int option = *pos;
2131
2132 mutex_lock(&interface_lock);
2133
2134 if (option >= OSN_MAX)
2135 return NULL;
2136
2137 return pos;
2138 }
2139
s_options_next(struct seq_file * s,void * v,loff_t * pos)2140 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2141 {
2142 int option = ++(*pos);
2143
2144 if (option >= OSN_MAX)
2145 return NULL;
2146
2147 return pos;
2148 }
2149
s_options_show(struct seq_file * s,void * v)2150 static int s_options_show(struct seq_file *s, void *v)
2151 {
2152 loff_t *pos = v;
2153 int option = *pos;
2154
2155 if (option == OSN_DEFAULTS) {
2156 if (osnoise_options == OSN_DEFAULT_OPTIONS)
2157 seq_printf(s, "%s", osnoise_options_str[option]);
2158 else
2159 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2160 goto out;
2161 }
2162
2163 if (test_bit(option, &osnoise_options))
2164 seq_printf(s, "%s", osnoise_options_str[option]);
2165 else
2166 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2167
2168 out:
2169 if (option != OSN_MAX)
2170 seq_puts(s, " ");
2171
2172 return 0;
2173 }
2174
s_options_stop(struct seq_file * s,void * v)2175 static void s_options_stop(struct seq_file *s, void *v)
2176 {
2177 seq_puts(s, "\n");
2178 mutex_unlock(&interface_lock);
2179 }
2180
2181 static const struct seq_operations osnoise_options_seq_ops = {
2182 .start = s_options_start,
2183 .next = s_options_next,
2184 .show = s_options_show,
2185 .stop = s_options_stop
2186 };
2187
osnoise_options_open(struct inode * inode,struct file * file)2188 static int osnoise_options_open(struct inode *inode, struct file *file)
2189 {
2190 return seq_open(file, &osnoise_options_seq_ops);
2191 };
2192
2193 /**
2194 * osnoise_options_write - Write function for "options" entry
2195 * @filp: The active open file structure
2196 * @ubuf: The user buffer that contains the value to write
2197 * @cnt: The maximum number of bytes to write to "file"
2198 * @ppos: The current position in @file
2199 *
2200 * Writing the option name sets the option, writing the "NO_"
2201 * prefix in front of the option name disables it.
2202 *
2203 * Writing "DEFAULTS" resets the option values to the default ones.
2204 */
osnoise_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2205 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2206 size_t cnt, loff_t *ppos)
2207 {
2208 int running, option, enable, retval;
2209 char buf[256], *option_str;
2210
2211 if (cnt >= 256)
2212 return -EINVAL;
2213
2214 if (copy_from_user(buf, ubuf, cnt))
2215 return -EFAULT;
2216
2217 buf[cnt] = 0;
2218
2219 if (strncmp(buf, "NO_", 3)) {
2220 option_str = strstrip(buf);
2221 enable = true;
2222 } else {
2223 option_str = strstrip(&buf[3]);
2224 enable = false;
2225 }
2226
2227 option = match_string(osnoise_options_str, OSN_MAX, option_str);
2228 if (option < 0)
2229 return -EINVAL;
2230
2231 /*
2232 * trace_types_lock is taken to avoid concurrency on start/stop.
2233 */
2234 mutex_lock(&trace_types_lock);
2235 running = osnoise_has_registered_instances();
2236 if (running)
2237 stop_per_cpu_kthreads();
2238
2239 mutex_lock(&interface_lock);
2240 /*
2241 * avoid CPU hotplug operations that might read options.
2242 */
2243 cpus_read_lock();
2244
2245 retval = cnt;
2246
2247 if (enable) {
2248 if (option == OSN_DEFAULTS)
2249 osnoise_options = OSN_DEFAULT_OPTIONS;
2250 else
2251 set_bit(option, &osnoise_options);
2252 } else {
2253 if (option == OSN_DEFAULTS)
2254 retval = -EINVAL;
2255 else
2256 clear_bit(option, &osnoise_options);
2257 }
2258
2259 cpus_read_unlock();
2260 mutex_unlock(&interface_lock);
2261
2262 if (running)
2263 start_per_cpu_kthreads();
2264 mutex_unlock(&trace_types_lock);
2265
2266 return retval;
2267 }
2268
2269 /*
2270 * osnoise_cpus_read - Read function for reading the "cpus" file
2271 * @filp: The active open file structure
2272 * @ubuf: The userspace provided buffer to read value into
2273 * @cnt: The maximum number of bytes to read
2274 * @ppos: The current "file" position
2275 *
2276 * Prints the "cpus" output into the user-provided buffer.
2277 */
2278 static ssize_t
osnoise_cpus_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)2279 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2280 loff_t *ppos)
2281 {
2282 char *mask_str __free(kfree) = NULL;
2283 int len;
2284
2285 guard(mutex)(&interface_lock);
2286
2287 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2288 mask_str = kmalloc(len, GFP_KERNEL);
2289 if (!mask_str)
2290 return -ENOMEM;
2291
2292 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2293 if (len >= count)
2294 return -EINVAL;
2295
2296 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2297
2298 return count;
2299 }
2300
2301 /*
2302 * osnoise_cpus_write - Write function for "cpus" entry
2303 * @filp: The active open file structure
2304 * @ubuf: The user buffer that contains the value to write
2305 * @cnt: The maximum number of bytes to write to "file"
2306 * @ppos: The current position in @file
2307 *
2308 * This function provides a write implementation for the "cpus"
2309 * interface to the osnoise trace. By default, it lists all CPUs,
2310 * in this way, allowing osnoise threads to run on any online CPU
2311 * of the system. It serves to restrict the execution of osnoise to the
2312 * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2313 * Because the user might be interested in tracing what is running on
2314 * other CPUs. For instance, one might run osnoise in one HT CPU
2315 * while observing what is running on the sibling HT CPU.
2316 */
2317 static ssize_t
osnoise_cpus_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2318 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2319 loff_t *ppos)
2320 {
2321 cpumask_var_t osnoise_cpumask_new;
2322 int running, err;
2323 char buf[256];
2324
2325 if (count >= 256)
2326 return -EINVAL;
2327
2328 if (copy_from_user(buf, ubuf, count))
2329 return -EFAULT;
2330
2331 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2332 return -ENOMEM;
2333
2334 err = cpulist_parse(buf, osnoise_cpumask_new);
2335 if (err)
2336 goto err_free;
2337
2338 /*
2339 * trace_types_lock is taken to avoid concurrency on start/stop.
2340 */
2341 mutex_lock(&trace_types_lock);
2342 running = osnoise_has_registered_instances();
2343 if (running)
2344 stop_per_cpu_kthreads();
2345
2346 mutex_lock(&interface_lock);
2347 /*
2348 * osnoise_cpumask is read by CPU hotplug operations.
2349 */
2350 cpus_read_lock();
2351
2352 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2353
2354 cpus_read_unlock();
2355 mutex_unlock(&interface_lock);
2356
2357 if (running)
2358 start_per_cpu_kthreads();
2359 mutex_unlock(&trace_types_lock);
2360
2361 free_cpumask_var(osnoise_cpumask_new);
2362 return count;
2363
2364 err_free:
2365 free_cpumask_var(osnoise_cpumask_new);
2366
2367 return err;
2368 }
2369
2370 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_fd_open(struct inode * inode,struct file * file)2371 static int timerlat_fd_open(struct inode *inode, struct file *file)
2372 {
2373 struct osnoise_variables *osn_var;
2374 struct timerlat_variables *tlat;
2375 long cpu = (long) inode->i_cdev;
2376
2377 mutex_lock(&interface_lock);
2378
2379 /*
2380 * This file is accessible only if timerlat is enabled, and
2381 * NO_OSNOISE_WORKLOAD is set.
2382 */
2383 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2384 mutex_unlock(&interface_lock);
2385 return -EINVAL;
2386 }
2387
2388 migrate_disable();
2389
2390 osn_var = this_cpu_osn_var();
2391
2392 /*
2393 * The osn_var->pid holds the single access to this file.
2394 */
2395 if (osn_var->pid) {
2396 mutex_unlock(&interface_lock);
2397 migrate_enable();
2398 return -EBUSY;
2399 }
2400
2401 /*
2402 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2403 * is pinned to a single CPU. The tracer laters monitor if the task
2404 * migrates and then disables tracer if it does. However, it is
2405 * worth doing this basic acceptance test to avoid obviusly wrong
2406 * setup.
2407 */
2408 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) {
2409 mutex_unlock(&interface_lock);
2410 migrate_enable();
2411 return -EPERM;
2412 }
2413
2414 /*
2415 * From now on, it is good to go.
2416 */
2417 file->private_data = inode->i_cdev;
2418
2419 get_task_struct(current);
2420
2421 osn_var->kthread = current;
2422 osn_var->pid = current->pid;
2423
2424 /*
2425 * Setup is done.
2426 */
2427 mutex_unlock(&interface_lock);
2428
2429 tlat = this_cpu_tmr_var();
2430 tlat->count = 0;
2431
2432 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2433
2434 migrate_enable();
2435 return 0;
2436 };
2437
2438 /*
2439 * timerlat_fd_read - Read function for "timerlat_fd" file
2440 * @file: The active open file structure
2441 * @ubuf: The userspace provided buffer to read value into
2442 * @cnt: The maximum number of bytes to read
2443 * @ppos: The current "file" position
2444 *
2445 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2446 */
2447 static ssize_t
timerlat_fd_read(struct file * file,char __user * ubuf,size_t count,loff_t * ppos)2448 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2449 loff_t *ppos)
2450 {
2451 long cpu = (long) file->private_data;
2452 struct osnoise_variables *osn_var;
2453 struct timerlat_variables *tlat;
2454 struct timerlat_sample s;
2455 s64 diff;
2456 u64 now;
2457
2458 migrate_disable();
2459
2460 tlat = this_cpu_tmr_var();
2461
2462 /*
2463 * While in user-space, the thread is migratable. There is nothing
2464 * we can do about it.
2465 * So, if the thread is running on another CPU, stop the machinery.
2466 */
2467 if (cpu == smp_processor_id()) {
2468 if (tlat->uthread_migrate) {
2469 migrate_enable();
2470 return -EINVAL;
2471 }
2472 } else {
2473 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2474 osnoise_taint("timerlat user thread migrate\n");
2475 osnoise_stop_tracing();
2476 migrate_enable();
2477 return -EINVAL;
2478 }
2479
2480 osn_var = this_cpu_osn_var();
2481
2482 /*
2483 * The timerlat in user-space runs in a different order:
2484 * the read() starts from the execution of the previous occurrence,
2485 * sleeping for the next occurrence.
2486 *
2487 * So, skip if we are entering on read() before the first wakeup
2488 * from timerlat IRQ:
2489 */
2490 if (likely(osn_var->sampling)) {
2491 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2492 diff = now - tlat->abs_period;
2493
2494 /*
2495 * it was not a timer firing, but some other signal?
2496 */
2497 if (diff < 0)
2498 goto out;
2499
2500 s.seqnum = tlat->count;
2501 s.timer_latency = diff;
2502 s.context = THREAD_URET;
2503
2504 record_timerlat_sample(&s);
2505
2506 notify_new_max_latency(diff);
2507
2508 tlat->tracing_thread = false;
2509 if (osnoise_data.stop_tracing_total)
2510 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2511 osnoise_stop_tracing();
2512 } else {
2513 tlat->tracing_thread = false;
2514 tlat->kthread = current;
2515
2516 /* Annotate now to drift new period */
2517 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2518
2519 osn_var->sampling = 1;
2520 }
2521
2522 /* wait for the next period */
2523 wait_next_period(tlat);
2524
2525 /* This is the wakeup from this cycle */
2526 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2527 diff = now - tlat->abs_period;
2528
2529 /*
2530 * it was not a timer firing, but some other signal?
2531 */
2532 if (diff < 0)
2533 goto out;
2534
2535 s.seqnum = tlat->count;
2536 s.timer_latency = diff;
2537 s.context = THREAD_CONTEXT;
2538
2539 record_timerlat_sample(&s);
2540
2541 if (osnoise_data.stop_tracing_total) {
2542 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2543 timerlat_dump_stack(time_to_us(diff));
2544 notify_new_max_latency(diff);
2545 osnoise_stop_tracing();
2546 }
2547 }
2548
2549 out:
2550 migrate_enable();
2551 return 0;
2552 }
2553
timerlat_fd_release(struct inode * inode,struct file * file)2554 static int timerlat_fd_release(struct inode *inode, struct file *file)
2555 {
2556 struct osnoise_variables *osn_var;
2557 struct timerlat_variables *tlat_var;
2558 long cpu = (long) file->private_data;
2559
2560 migrate_disable();
2561 mutex_lock(&interface_lock);
2562
2563 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2564 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2565
2566 if (tlat_var->kthread)
2567 hrtimer_cancel(&tlat_var->timer);
2568 memset(tlat_var, 0, sizeof(*tlat_var));
2569
2570 osn_var->sampling = 0;
2571 osn_var->pid = 0;
2572
2573 /*
2574 * We are leaving, not being stopped... see stop_kthread();
2575 */
2576 if (osn_var->kthread) {
2577 put_task_struct(osn_var->kthread);
2578 osn_var->kthread = NULL;
2579 }
2580
2581 mutex_unlock(&interface_lock);
2582 migrate_enable();
2583 return 0;
2584 }
2585 #endif
2586
2587 /*
2588 * osnoise/runtime_us: cannot be greater than the period.
2589 */
2590 static struct trace_min_max_param osnoise_runtime = {
2591 .lock = &interface_lock,
2592 .val = &osnoise_data.sample_runtime,
2593 .max = &osnoise_data.sample_period,
2594 .min = NULL,
2595 };
2596
2597 /*
2598 * osnoise/period_us: cannot be smaller than the runtime.
2599 */
2600 static struct trace_min_max_param osnoise_period = {
2601 .lock = &interface_lock,
2602 .val = &osnoise_data.sample_period,
2603 .max = NULL,
2604 .min = &osnoise_data.sample_runtime,
2605 };
2606
2607 /*
2608 * osnoise/stop_tracing_us: no limit.
2609 */
2610 static struct trace_min_max_param osnoise_stop_tracing_in = {
2611 .lock = &interface_lock,
2612 .val = &osnoise_data.stop_tracing,
2613 .max = NULL,
2614 .min = NULL,
2615 };
2616
2617 /*
2618 * osnoise/stop_tracing_total_us: no limit.
2619 */
2620 static struct trace_min_max_param osnoise_stop_tracing_total = {
2621 .lock = &interface_lock,
2622 .val = &osnoise_data.stop_tracing_total,
2623 .max = NULL,
2624 .min = NULL,
2625 };
2626
2627 #ifdef CONFIG_TIMERLAT_TRACER
2628 /*
2629 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2630 * latency is higher than val.
2631 */
2632 static struct trace_min_max_param osnoise_print_stack = {
2633 .lock = &interface_lock,
2634 .val = &osnoise_data.print_stack,
2635 .max = NULL,
2636 .min = NULL,
2637 };
2638
2639 /*
2640 * osnoise/timerlat_period: min 100 us, max 1 s
2641 */
2642 static u64 timerlat_min_period = 100;
2643 static u64 timerlat_max_period = 1000000;
2644 static struct trace_min_max_param timerlat_period = {
2645 .lock = &interface_lock,
2646 .val = &osnoise_data.timerlat_period,
2647 .max = &timerlat_max_period,
2648 .min = &timerlat_min_period,
2649 };
2650
2651 static const struct file_operations timerlat_fd_fops = {
2652 .open = timerlat_fd_open,
2653 .read = timerlat_fd_read,
2654 .release = timerlat_fd_release,
2655 .llseek = generic_file_llseek,
2656 };
2657 #endif
2658
2659 static const struct file_operations cpus_fops = {
2660 .open = tracing_open_generic,
2661 .read = osnoise_cpus_read,
2662 .write = osnoise_cpus_write,
2663 .llseek = generic_file_llseek,
2664 };
2665
2666 static const struct file_operations osnoise_options_fops = {
2667 .open = osnoise_options_open,
2668 .read = seq_read,
2669 .llseek = seq_lseek,
2670 .release = seq_release,
2671 .write = osnoise_options_write
2672 };
2673
2674 #ifdef CONFIG_TIMERLAT_TRACER
2675 #ifdef CONFIG_STACKTRACE
init_timerlat_stack_tracefs(struct dentry * top_dir)2676 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2677 {
2678 struct dentry *tmp;
2679
2680 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2681 &osnoise_print_stack, &trace_min_max_fops);
2682 if (!tmp)
2683 return -ENOMEM;
2684
2685 return 0;
2686 }
2687 #else /* CONFIG_STACKTRACE */
init_timerlat_stack_tracefs(struct dentry * top_dir)2688 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2689 {
2690 return 0;
2691 }
2692 #endif /* CONFIG_STACKTRACE */
2693
osnoise_create_cpu_timerlat_fd(struct dentry * top_dir)2694 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2695 {
2696 struct dentry *timerlat_fd;
2697 struct dentry *per_cpu;
2698 struct dentry *cpu_dir;
2699 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2700 long cpu;
2701
2702 /*
2703 * Why not using tracing instance per_cpu/ dir?
2704 *
2705 * Because osnoise/timerlat have a single workload, having
2706 * multiple files like these are wast of memory.
2707 */
2708 per_cpu = tracefs_create_dir("per_cpu", top_dir);
2709 if (!per_cpu)
2710 return -ENOMEM;
2711
2712 for_each_possible_cpu(cpu) {
2713 snprintf(cpu_str, 30, "cpu%ld", cpu);
2714 cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2715 if (!cpu_dir)
2716 goto out_clean;
2717
2718 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2719 cpu_dir, NULL, &timerlat_fd_fops);
2720 if (!timerlat_fd)
2721 goto out_clean;
2722
2723 /* Record the CPU */
2724 d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2725 }
2726
2727 return 0;
2728
2729 out_clean:
2730 tracefs_remove(per_cpu);
2731 return -ENOMEM;
2732 }
2733
2734 /*
2735 * init_timerlat_tracefs - A function to initialize the timerlat interface files
2736 */
init_timerlat_tracefs(struct dentry * top_dir)2737 static int init_timerlat_tracefs(struct dentry *top_dir)
2738 {
2739 struct dentry *tmp;
2740 int retval;
2741
2742 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2743 &timerlat_period, &trace_min_max_fops);
2744 if (!tmp)
2745 return -ENOMEM;
2746
2747 retval = osnoise_create_cpu_timerlat_fd(top_dir);
2748 if (retval)
2749 return retval;
2750
2751 return init_timerlat_stack_tracefs(top_dir);
2752 }
2753 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracefs(struct dentry * top_dir)2754 static int init_timerlat_tracefs(struct dentry *top_dir)
2755 {
2756 return 0;
2757 }
2758 #endif /* CONFIG_TIMERLAT_TRACER */
2759
2760 /*
2761 * init_tracefs - A function to initialize the tracefs interface files
2762 *
2763 * This function creates entries in tracefs for "osnoise" and "timerlat".
2764 * It creates these directories in the tracing directory, and within that
2765 * directory the use can change and view the configs.
2766 */
init_tracefs(void)2767 static int init_tracefs(void)
2768 {
2769 struct dentry *top_dir;
2770 struct dentry *tmp;
2771 int ret;
2772
2773 ret = tracing_init_dentry();
2774 if (ret)
2775 return -ENOMEM;
2776
2777 top_dir = tracefs_create_dir("osnoise", NULL);
2778 if (!top_dir)
2779 return 0;
2780
2781 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2782 &osnoise_period, &trace_min_max_fops);
2783 if (!tmp)
2784 goto err;
2785
2786 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2787 &osnoise_runtime, &trace_min_max_fops);
2788 if (!tmp)
2789 goto err;
2790
2791 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2792 &osnoise_stop_tracing_in, &trace_min_max_fops);
2793 if (!tmp)
2794 goto err;
2795
2796 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2797 &osnoise_stop_tracing_total, &trace_min_max_fops);
2798 if (!tmp)
2799 goto err;
2800
2801 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2802 if (!tmp)
2803 goto err;
2804
2805 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2806 &osnoise_options_fops);
2807 if (!tmp)
2808 goto err;
2809
2810 ret = init_timerlat_tracefs(top_dir);
2811 if (ret)
2812 goto err;
2813
2814 return 0;
2815
2816 err:
2817 tracefs_remove(top_dir);
2818 return -ENOMEM;
2819 }
2820
osnoise_hook_events(void)2821 static int osnoise_hook_events(void)
2822 {
2823 int retval;
2824
2825 /*
2826 * Trace is already hooked, we are re-enabling from
2827 * a stop_tracing_*.
2828 */
2829 if (trace_osnoise_callback_enabled)
2830 return 0;
2831
2832 retval = hook_irq_events();
2833 if (retval)
2834 return -EINVAL;
2835
2836 retval = hook_softirq_events();
2837 if (retval)
2838 goto out_unhook_irq;
2839
2840 retval = hook_thread_events();
2841 /*
2842 * All fine!
2843 */
2844 if (!retval)
2845 return 0;
2846
2847 unhook_softirq_events();
2848 out_unhook_irq:
2849 unhook_irq_events();
2850 return -EINVAL;
2851 }
2852
osnoise_unhook_events(void)2853 static void osnoise_unhook_events(void)
2854 {
2855 unhook_thread_events();
2856 unhook_softirq_events();
2857 unhook_irq_events();
2858 }
2859
2860 /*
2861 * osnoise_workload_start - start the workload and hook to events
2862 */
osnoise_workload_start(void)2863 static int osnoise_workload_start(void)
2864 {
2865 int retval;
2866
2867 /*
2868 * Instances need to be registered after calling workload
2869 * start. Hence, if there is already an instance, the
2870 * workload was already registered. Otherwise, this
2871 * code is on the way to register the first instance,
2872 * and the workload will start.
2873 */
2874 if (osnoise_has_registered_instances())
2875 return 0;
2876
2877 osn_var_reset_all();
2878
2879 retval = osnoise_hook_events();
2880 if (retval)
2881 return retval;
2882
2883 /*
2884 * Make sure that ftrace_nmi_enter/exit() see reset values
2885 * before enabling trace_osnoise_callback_enabled.
2886 */
2887 barrier();
2888 trace_osnoise_callback_enabled = true;
2889
2890 retval = start_per_cpu_kthreads();
2891 if (retval) {
2892 trace_osnoise_callback_enabled = false;
2893 /*
2894 * Make sure that ftrace_nmi_enter/exit() see
2895 * trace_osnoise_callback_enabled as false before continuing.
2896 */
2897 barrier();
2898
2899 osnoise_unhook_events();
2900 return retval;
2901 }
2902
2903 return 0;
2904 }
2905
2906 /*
2907 * osnoise_workload_stop - stop the workload and unhook the events
2908 */
osnoise_workload_stop(void)2909 static void osnoise_workload_stop(void)
2910 {
2911 /*
2912 * Instances need to be unregistered before calling
2913 * stop. Hence, if there is a registered instance, more
2914 * than one instance is running, and the workload will not
2915 * yet stop. Otherwise, this code is on the way to disable
2916 * the last instance, and the workload can stop.
2917 */
2918 if (osnoise_has_registered_instances())
2919 return;
2920
2921 /*
2922 * If callbacks were already disabled in a previous stop
2923 * call, there is no need to disable then again.
2924 *
2925 * For instance, this happens when tracing is stopped via:
2926 * echo 0 > tracing_on
2927 * echo nop > current_tracer.
2928 */
2929 if (!trace_osnoise_callback_enabled)
2930 return;
2931
2932 trace_osnoise_callback_enabled = false;
2933 /*
2934 * Make sure that ftrace_nmi_enter/exit() see
2935 * trace_osnoise_callback_enabled as false before continuing.
2936 */
2937 barrier();
2938
2939 stop_per_cpu_kthreads();
2940
2941 osnoise_unhook_events();
2942 }
2943
osnoise_tracer_start(struct trace_array * tr)2944 static void osnoise_tracer_start(struct trace_array *tr)
2945 {
2946 int retval;
2947
2948 /*
2949 * If the instance is already registered, there is no need to
2950 * register it again.
2951 */
2952 if (osnoise_instance_registered(tr))
2953 return;
2954
2955 retval = osnoise_workload_start();
2956 if (retval)
2957 pr_err(BANNER "Error starting osnoise tracer\n");
2958
2959 osnoise_register_instance(tr);
2960 }
2961
osnoise_tracer_stop(struct trace_array * tr)2962 static void osnoise_tracer_stop(struct trace_array *tr)
2963 {
2964 osnoise_unregister_instance(tr);
2965 osnoise_workload_stop();
2966 }
2967
osnoise_tracer_init(struct trace_array * tr)2968 static int osnoise_tracer_init(struct trace_array *tr)
2969 {
2970 /*
2971 * Only allow osnoise tracer if timerlat tracer is not running
2972 * already.
2973 */
2974 if (timerlat_enabled())
2975 return -EBUSY;
2976
2977 tr->max_latency = 0;
2978
2979 osnoise_tracer_start(tr);
2980 return 0;
2981 }
2982
osnoise_tracer_reset(struct trace_array * tr)2983 static void osnoise_tracer_reset(struct trace_array *tr)
2984 {
2985 osnoise_tracer_stop(tr);
2986 }
2987
2988 static struct tracer osnoise_tracer __read_mostly = {
2989 .name = "osnoise",
2990 .init = osnoise_tracer_init,
2991 .reset = osnoise_tracer_reset,
2992 .start = osnoise_tracer_start,
2993 .stop = osnoise_tracer_stop,
2994 .print_header = print_osnoise_headers,
2995 .allow_instances = true,
2996 };
2997
2998 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_tracer_start(struct trace_array * tr)2999 static void timerlat_tracer_start(struct trace_array *tr)
3000 {
3001 int retval;
3002
3003 /*
3004 * If the instance is already registered, there is no need to
3005 * register it again.
3006 */
3007 if (osnoise_instance_registered(tr))
3008 return;
3009
3010 retval = osnoise_workload_start();
3011 if (retval)
3012 pr_err(BANNER "Error starting timerlat tracer\n");
3013
3014 osnoise_register_instance(tr);
3015
3016 return;
3017 }
3018
timerlat_tracer_stop(struct trace_array * tr)3019 static void timerlat_tracer_stop(struct trace_array *tr)
3020 {
3021 int cpu;
3022
3023 osnoise_unregister_instance(tr);
3024
3025 /*
3026 * Instruct the threads to stop only if this is the last instance.
3027 */
3028 if (!osnoise_has_registered_instances()) {
3029 for_each_online_cpu(cpu)
3030 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3031 }
3032
3033 osnoise_workload_stop();
3034 }
3035
timerlat_tracer_init(struct trace_array * tr)3036 static int timerlat_tracer_init(struct trace_array *tr)
3037 {
3038 /*
3039 * Only allow timerlat tracer if osnoise tracer is not running already.
3040 */
3041 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3042 return -EBUSY;
3043
3044 /*
3045 * If this is the first instance, set timerlat_tracer to block
3046 * osnoise tracer start.
3047 */
3048 if (!osnoise_has_registered_instances())
3049 osnoise_data.timerlat_tracer = 1;
3050
3051 tr->max_latency = 0;
3052 timerlat_tracer_start(tr);
3053
3054 return 0;
3055 }
3056
timerlat_tracer_reset(struct trace_array * tr)3057 static void timerlat_tracer_reset(struct trace_array *tr)
3058 {
3059 timerlat_tracer_stop(tr);
3060
3061 /*
3062 * If this is the last instance, reset timerlat_tracer allowing
3063 * osnoise to be started.
3064 */
3065 if (!osnoise_has_registered_instances())
3066 osnoise_data.timerlat_tracer = 0;
3067 }
3068
3069 static struct tracer timerlat_tracer __read_mostly = {
3070 .name = "timerlat",
3071 .init = timerlat_tracer_init,
3072 .reset = timerlat_tracer_reset,
3073 .start = timerlat_tracer_start,
3074 .stop = timerlat_tracer_stop,
3075 .print_header = print_timerlat_headers,
3076 .allow_instances = true,
3077 };
3078
init_timerlat_tracer(void)3079 __init static int init_timerlat_tracer(void)
3080 {
3081 return register_tracer(&timerlat_tracer);
3082 }
3083 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracer(void)3084 __init static int init_timerlat_tracer(void)
3085 {
3086 return 0;
3087 }
3088 #endif /* CONFIG_TIMERLAT_TRACER */
3089
init_osnoise_tracer(void)3090 __init static int init_osnoise_tracer(void)
3091 {
3092 int ret;
3093
3094 mutex_init(&interface_lock);
3095
3096 cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3097
3098 ret = register_tracer(&osnoise_tracer);
3099 if (ret) {
3100 pr_err(BANNER "Error registering osnoise!\n");
3101 return ret;
3102 }
3103
3104 ret = init_timerlat_tracer();
3105 if (ret) {
3106 pr_err(BANNER "Error registering timerlat!\n");
3107 return ret;
3108 }
3109
3110 osnoise_init_hotplug_support();
3111
3112 INIT_LIST_HEAD_RCU(&osnoise_instances);
3113
3114 init_tracefs();
3115
3116 return 0;
3117 }
3118 late_initcall(init_osnoise_tracer);
3119