1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5 *
6 * Based on "hwlat_detector" tracer by:
7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9 * With feedback from Clark Williams <williams@redhat.com>
10 *
11 * And also based on the rtsl tracer presented on:
12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15 *
16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17 */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40
41 /*
42 * Default values.
43 */
44 #define BANNER "osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */
47
48 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */
50
51 /*
52 * osnoise/options entries.
53 */
54 enum osnoise_options_index {
55 OSN_DEFAULTS = 0,
56 OSN_WORKLOAD,
57 OSN_PANIC_ON_STOP,
58 OSN_PREEMPT_DISABLE,
59 OSN_IRQ_DISABLE,
60 OSN_MAX
61 };
62
63 static const char * const osnoise_options_str[OSN_MAX] = {
64 "DEFAULTS",
65 "OSNOISE_WORKLOAD",
66 "PANIC_ON_STOP",
67 "OSNOISE_PREEMPT_DISABLE",
68 "OSNOISE_IRQ_DISABLE" };
69
70 #define OSN_DEFAULT_OPTIONS 0x2
71 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS;
72
73 /*
74 * trace_array of the enabled osnoise/timerlat instances.
75 */
76 struct osnoise_instance {
77 struct list_head list;
78 struct trace_array *tr;
79 };
80
81 static struct list_head osnoise_instances;
82
osnoise_has_registered_instances(void)83 static bool osnoise_has_registered_instances(void)
84 {
85 return !!list_first_or_null_rcu(&osnoise_instances,
86 struct osnoise_instance,
87 list);
88 }
89
90 /*
91 * osnoise_instance_registered - check if a tr is already registered
92 */
osnoise_instance_registered(struct trace_array * tr)93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95 struct osnoise_instance *inst;
96 int found = 0;
97
98 rcu_read_lock();
99 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100 if (inst->tr == tr)
101 found = 1;
102 }
103 rcu_read_unlock();
104
105 return found;
106 }
107
108 /*
109 * osnoise_register_instance - register a new trace instance
110 *
111 * Register a trace_array *tr in the list of instances running
112 * osnoise/timerlat tracers.
113 */
osnoise_register_instance(struct trace_array * tr)114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116 struct osnoise_instance *inst;
117
118 /*
119 * register/unregister serialization is provided by trace's
120 * trace_types_lock.
121 */
122 lockdep_assert_held(&trace_types_lock);
123
124 inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125 if (!inst)
126 return -ENOMEM;
127
128 INIT_LIST_HEAD_RCU(&inst->list);
129 inst->tr = tr;
130 list_add_tail_rcu(&inst->list, &osnoise_instances);
131
132 return 0;
133 }
134
135 /*
136 * osnoise_unregister_instance - unregister a registered trace instance
137 *
138 * Remove the trace_array *tr from the list of instances running
139 * osnoise/timerlat tracers.
140 */
osnoise_unregister_instance(struct trace_array * tr)141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143 struct osnoise_instance *inst;
144 int found = 0;
145
146 /*
147 * register/unregister serialization is provided by trace's
148 * trace_types_lock.
149 */
150 list_for_each_entry_rcu(inst, &osnoise_instances, list,
151 lockdep_is_held(&trace_types_lock)) {
152 if (inst->tr == tr) {
153 list_del_rcu(&inst->list);
154 found = 1;
155 break;
156 }
157 }
158
159 if (!found)
160 return;
161
162 kvfree_rcu_mightsleep(inst);
163 }
164
165 /*
166 * NMI runtime info.
167 */
168 struct osn_nmi {
169 u64 count;
170 u64 delta_start;
171 };
172
173 /*
174 * IRQ runtime info.
175 */
176 struct osn_irq {
177 u64 count;
178 u64 arrival_time;
179 u64 delta_start;
180 };
181
182 #define IRQ_CONTEXT 0
183 #define THREAD_CONTEXT 1
184 #define THREAD_URET 2
185 /*
186 * sofirq runtime info.
187 */
188 struct osn_softirq {
189 u64 count;
190 u64 arrival_time;
191 u64 delta_start;
192 };
193
194 /*
195 * thread runtime info.
196 */
197 struct osn_thread {
198 u64 count;
199 u64 arrival_time;
200 u64 delta_start;
201 };
202
203 /*
204 * Runtime information: this structure saves the runtime information used by
205 * one sampling thread.
206 */
207 struct osnoise_variables {
208 struct task_struct *kthread;
209 bool sampling;
210 pid_t pid;
211 struct osn_nmi nmi;
212 struct osn_irq irq;
213 struct osn_softirq softirq;
214 struct osn_thread thread;
215 local_t int_counter;
216 };
217
218 /*
219 * Per-cpu runtime information.
220 */
221 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
222
223 /*
224 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
225 */
this_cpu_osn_var(void)226 static inline struct osnoise_variables *this_cpu_osn_var(void)
227 {
228 return this_cpu_ptr(&per_cpu_osnoise_var);
229 }
230
231 /*
232 * Protect the interface.
233 */
234 static struct mutex interface_lock;
235
236 #ifdef CONFIG_TIMERLAT_TRACER
237 /*
238 * Runtime information for the timer mode.
239 */
240 struct timerlat_variables {
241 struct task_struct *kthread;
242 struct hrtimer timer;
243 u64 rel_period;
244 u64 abs_period;
245 bool tracing_thread;
246 u64 count;
247 bool uthread_migrate;
248 };
249
250 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
251
252 /*
253 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
254 */
this_cpu_tmr_var(void)255 static inline struct timerlat_variables *this_cpu_tmr_var(void)
256 {
257 return this_cpu_ptr(&per_cpu_timerlat_var);
258 }
259
260 /*
261 * tlat_var_reset - Reset the values of the given timerlat_variables
262 */
tlat_var_reset(void)263 static inline void tlat_var_reset(void)
264 {
265 struct timerlat_variables *tlat_var;
266 int cpu;
267
268 /* Synchronize with the timerlat interfaces */
269 mutex_lock(&interface_lock);
270 /*
271 * So far, all the values are initialized as 0, so
272 * zeroing the structure is perfect.
273 */
274 for_each_cpu(cpu, cpu_online_mask) {
275 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
276 if (tlat_var->kthread)
277 hrtimer_cancel(&tlat_var->timer);
278 memset(tlat_var, 0, sizeof(*tlat_var));
279 }
280 mutex_unlock(&interface_lock);
281 }
282 #else /* CONFIG_TIMERLAT_TRACER */
283 #define tlat_var_reset() do {} while (0)
284 #endif /* CONFIG_TIMERLAT_TRACER */
285
286 /*
287 * osn_var_reset - Reset the values of the given osnoise_variables
288 */
osn_var_reset(void)289 static inline void osn_var_reset(void)
290 {
291 struct osnoise_variables *osn_var;
292 int cpu;
293
294 /*
295 * So far, all the values are initialized as 0, so
296 * zeroing the structure is perfect.
297 */
298 for_each_cpu(cpu, cpu_online_mask) {
299 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
300 memset(osn_var, 0, sizeof(*osn_var));
301 }
302 }
303
304 /*
305 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
306 */
osn_var_reset_all(void)307 static inline void osn_var_reset_all(void)
308 {
309 osn_var_reset();
310 tlat_var_reset();
311 }
312
313 /*
314 * Tells NMIs to call back to the osnoise tracer to record timestamps.
315 */
316 bool trace_osnoise_callback_enabled;
317
318 /*
319 * Tracer data.
320 */
321 static struct osnoise_data {
322 u64 sample_period; /* total sampling period */
323 u64 sample_runtime; /* active sampling portion of period */
324 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */
325 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */
326 #ifdef CONFIG_TIMERLAT_TRACER
327 u64 timerlat_period; /* timerlat period */
328 u64 print_stack; /* print IRQ stack if total > */
329 int timerlat_tracer; /* timerlat tracer */
330 #endif
331 bool tainted; /* infor users and developers about a problem */
332 } osnoise_data = {
333 .sample_period = DEFAULT_SAMPLE_PERIOD,
334 .sample_runtime = DEFAULT_SAMPLE_RUNTIME,
335 .stop_tracing = 0,
336 .stop_tracing_total = 0,
337 #ifdef CONFIG_TIMERLAT_TRACER
338 .print_stack = 0,
339 .timerlat_period = DEFAULT_TIMERLAT_PERIOD,
340 .timerlat_tracer = 0,
341 #endif
342 };
343
344 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_enabled(void)345 static inline bool timerlat_enabled(void)
346 {
347 return osnoise_data.timerlat_tracer;
348 }
349
timerlat_softirq_exit(struct osnoise_variables * osn_var)350 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
351 {
352 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
353 /*
354 * If the timerlat is enabled, but the irq handler did
355 * not run yet enabling timerlat_tracer, do not trace.
356 */
357 if (!tlat_var->tracing_thread) {
358 osn_var->softirq.arrival_time = 0;
359 osn_var->softirq.delta_start = 0;
360 return 0;
361 }
362 return 1;
363 }
364
timerlat_thread_exit(struct osnoise_variables * osn_var)365 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
366 {
367 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
368 /*
369 * If the timerlat is enabled, but the irq handler did
370 * not run yet enabling timerlat_tracer, do not trace.
371 */
372 if (!tlat_var->tracing_thread) {
373 osn_var->thread.delta_start = 0;
374 osn_var->thread.arrival_time = 0;
375 return 0;
376 }
377 return 1;
378 }
379 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_enabled(void)380 static inline bool timerlat_enabled(void)
381 {
382 return false;
383 }
384
timerlat_softirq_exit(struct osnoise_variables * osn_var)385 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
386 {
387 return 1;
388 }
timerlat_thread_exit(struct osnoise_variables * osn_var)389 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
390 {
391 return 1;
392 }
393 #endif
394
395 #ifdef CONFIG_PREEMPT_RT
396 /*
397 * Print the osnoise header info.
398 */
print_osnoise_headers(struct seq_file * s)399 static void print_osnoise_headers(struct seq_file *s)
400 {
401 if (osnoise_data.tainted)
402 seq_puts(s, "# osnoise is tainted!\n");
403
404 seq_puts(s, "# _-------=> irqs-off\n");
405 seq_puts(s, "# / _------=> need-resched\n");
406 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
407 seq_puts(s, "# || / _----=> hardirq/softirq\n");
408 seq_puts(s, "# ||| / _---=> preempt-depth\n");
409 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
410 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
411
412 seq_puts(s, "# |||||| / ");
413 seq_puts(s, " MAX\n");
414
415 seq_puts(s, "# ||||| / ");
416 seq_puts(s, " SINGLE Interference counters:\n");
417
418 seq_puts(s, "# ||||||| RUNTIME ");
419 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
420
421 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US ");
422 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
423
424 seq_puts(s, "# | | | ||||||| | | ");
425 seq_puts(s, " | | | | | | | |\n");
426 }
427 #else /* CONFIG_PREEMPT_RT */
print_osnoise_headers(struct seq_file * s)428 static void print_osnoise_headers(struct seq_file *s)
429 {
430 if (osnoise_data.tainted)
431 seq_puts(s, "# osnoise is tainted!\n");
432
433 seq_puts(s, "# _-----=> irqs-off\n");
434 seq_puts(s, "# / _----=> need-resched\n");
435 seq_puts(s, "# | / _---=> hardirq/softirq\n");
436 seq_puts(s, "# || / _--=> preempt-depth\n");
437 seq_puts(s, "# ||| / _-=> migrate-disable ");
438 seq_puts(s, " MAX\n");
439 seq_puts(s, "# |||| / delay ");
440 seq_puts(s, " SINGLE Interference counters:\n");
441
442 seq_puts(s, "# ||||| RUNTIME ");
443 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
444
445 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US ");
446 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
447
448 seq_puts(s, "# | | | ||||| | | ");
449 seq_puts(s, " | | | | | | | |\n");
450 }
451 #endif /* CONFIG_PREEMPT_RT */
452
453 /*
454 * osnoise_taint - report an osnoise error.
455 */
456 #define osnoise_taint(msg) ({ \
457 struct osnoise_instance *inst; \
458 struct trace_buffer *buffer; \
459 \
460 rcu_read_lock(); \
461 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \
462 buffer = inst->tr->array_buffer.buffer; \
463 trace_array_printk_buf(buffer, _THIS_IP_, msg); \
464 } \
465 rcu_read_unlock(); \
466 osnoise_data.tainted = true; \
467 })
468
469 /*
470 * Record an osnoise_sample into the tracer buffer.
471 */
472 static void
__record_osnoise_sample(struct osnoise_sample * sample,struct trace_buffer * buffer)473 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
474 {
475 struct ring_buffer_event *event;
476 struct osnoise_entry *entry;
477
478 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
479 tracing_gen_ctx());
480 if (!event)
481 return;
482 entry = ring_buffer_event_data(event);
483 entry->runtime = sample->runtime;
484 entry->noise = sample->noise;
485 entry->max_sample = sample->max_sample;
486 entry->hw_count = sample->hw_count;
487 entry->nmi_count = sample->nmi_count;
488 entry->irq_count = sample->irq_count;
489 entry->softirq_count = sample->softirq_count;
490 entry->thread_count = sample->thread_count;
491
492 trace_buffer_unlock_commit_nostack(buffer, event);
493 }
494
495 /*
496 * Record an osnoise_sample on all osnoise instances and fire trace event.
497 */
record_osnoise_sample(struct osnoise_sample * sample)498 static void record_osnoise_sample(struct osnoise_sample *sample)
499 {
500 struct osnoise_instance *inst;
501 struct trace_buffer *buffer;
502
503 trace_osnoise_sample(sample);
504
505 rcu_read_lock();
506 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
507 buffer = inst->tr->array_buffer.buffer;
508 __record_osnoise_sample(sample, buffer);
509 }
510 rcu_read_unlock();
511 }
512
513 #ifdef CONFIG_TIMERLAT_TRACER
514 /*
515 * Print the timerlat header info.
516 */
517 #ifdef CONFIG_PREEMPT_RT
print_timerlat_headers(struct seq_file * s)518 static void print_timerlat_headers(struct seq_file *s)
519 {
520 seq_puts(s, "# _-------=> irqs-off\n");
521 seq_puts(s, "# / _------=> need-resched\n");
522 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
523 seq_puts(s, "# || / _----=> hardirq/softirq\n");
524 seq_puts(s, "# ||| / _---=> preempt-depth\n");
525 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
526 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
527 seq_puts(s, "# |||||| /\n");
528 seq_puts(s, "# ||||||| ACTIVATION\n");
529 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID ");
530 seq_puts(s, " CONTEXT LATENCY\n");
531 seq_puts(s, "# | | | ||||||| | | ");
532 seq_puts(s, " | |\n");
533 }
534 #else /* CONFIG_PREEMPT_RT */
print_timerlat_headers(struct seq_file * s)535 static void print_timerlat_headers(struct seq_file *s)
536 {
537 seq_puts(s, "# _-----=> irqs-off\n");
538 seq_puts(s, "# / _----=> need-resched\n");
539 seq_puts(s, "# | / _---=> hardirq/softirq\n");
540 seq_puts(s, "# || / _--=> preempt-depth\n");
541 seq_puts(s, "# ||| / _-=> migrate-disable\n");
542 seq_puts(s, "# |||| / delay\n");
543 seq_puts(s, "# ||||| ACTIVATION\n");
544 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID ");
545 seq_puts(s, " CONTEXT LATENCY\n");
546 seq_puts(s, "# | | | ||||| | | ");
547 seq_puts(s, " | |\n");
548 }
549 #endif /* CONFIG_PREEMPT_RT */
550
551 static void
__record_timerlat_sample(struct timerlat_sample * sample,struct trace_buffer * buffer)552 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
553 {
554 struct ring_buffer_event *event;
555 struct timerlat_entry *entry;
556
557 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
558 tracing_gen_ctx());
559 if (!event)
560 return;
561 entry = ring_buffer_event_data(event);
562 entry->seqnum = sample->seqnum;
563 entry->context = sample->context;
564 entry->timer_latency = sample->timer_latency;
565
566 trace_buffer_unlock_commit_nostack(buffer, event);
567 }
568
569 /*
570 * Record an timerlat_sample into the tracer buffer.
571 */
record_timerlat_sample(struct timerlat_sample * sample)572 static void record_timerlat_sample(struct timerlat_sample *sample)
573 {
574 struct osnoise_instance *inst;
575 struct trace_buffer *buffer;
576
577 trace_timerlat_sample(sample);
578
579 rcu_read_lock();
580 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
581 buffer = inst->tr->array_buffer.buffer;
582 __record_timerlat_sample(sample, buffer);
583 }
584 rcu_read_unlock();
585 }
586
587 #ifdef CONFIG_STACKTRACE
588
589 #define MAX_CALLS 256
590
591 /*
592 * Stack trace will take place only at IRQ level, so, no need
593 * to control nesting here.
594 */
595 struct trace_stack {
596 int stack_size;
597 int nr_entries;
598 unsigned long calls[MAX_CALLS];
599 };
600
601 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
602
603 /*
604 * timerlat_save_stack - save a stack trace without printing
605 *
606 * Save the current stack trace without printing. The
607 * stack will be printed later, after the end of the measurement.
608 */
timerlat_save_stack(int skip)609 static void timerlat_save_stack(int skip)
610 {
611 unsigned int size, nr_entries;
612 struct trace_stack *fstack;
613
614 fstack = this_cpu_ptr(&trace_stack);
615
616 size = ARRAY_SIZE(fstack->calls);
617
618 nr_entries = stack_trace_save(fstack->calls, size, skip);
619
620 fstack->stack_size = nr_entries * sizeof(unsigned long);
621 fstack->nr_entries = nr_entries;
622
623 return;
624
625 }
626
627 static void
__timerlat_dump_stack(struct trace_buffer * buffer,struct trace_stack * fstack,unsigned int size)628 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
629 {
630 struct ring_buffer_event *event;
631 struct stack_entry *entry;
632
633 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
634 tracing_gen_ctx());
635 if (!event)
636 return;
637
638 entry = ring_buffer_event_data(event);
639
640 memcpy(&entry->caller, fstack->calls, size);
641 entry->size = fstack->nr_entries;
642
643 trace_buffer_unlock_commit_nostack(buffer, event);
644 }
645
646 /*
647 * timerlat_dump_stack - dump a stack trace previously saved
648 */
timerlat_dump_stack(u64 latency)649 static void timerlat_dump_stack(u64 latency)
650 {
651 struct osnoise_instance *inst;
652 struct trace_buffer *buffer;
653 struct trace_stack *fstack;
654 unsigned int size;
655
656 /*
657 * trace only if latency > print_stack config, if enabled.
658 */
659 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
660 return;
661
662 preempt_disable_notrace();
663 fstack = this_cpu_ptr(&trace_stack);
664 size = fstack->stack_size;
665
666 rcu_read_lock();
667 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
668 buffer = inst->tr->array_buffer.buffer;
669 __timerlat_dump_stack(buffer, fstack, size);
670
671 }
672 rcu_read_unlock();
673 preempt_enable_notrace();
674 }
675 #else /* CONFIG_STACKTRACE */
676 #define timerlat_dump_stack(u64 latency) do {} while (0)
677 #define timerlat_save_stack(a) do {} while (0)
678 #endif /* CONFIG_STACKTRACE */
679 #endif /* CONFIG_TIMERLAT_TRACER */
680
681 /*
682 * Macros to encapsulate the time capturing infrastructure.
683 */
684 #define time_get() trace_clock_local()
685 #define time_to_us(x) div_u64(x, 1000)
686 #define time_sub(a, b) ((a) - (b))
687
688 /*
689 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
690 *
691 * If an IRQ is preempted by an NMI, its delta_start is pushed forward
692 * to discount the NMI interference.
693 *
694 * See get_int_safe_duration().
695 */
696 static inline void
cond_move_irq_delta_start(struct osnoise_variables * osn_var,u64 duration)697 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
698 {
699 if (osn_var->irq.delta_start)
700 osn_var->irq.delta_start += duration;
701 }
702
703 #ifndef CONFIG_PREEMPT_RT
704 /*
705 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
706 *
707 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
708 * forward to discount the interference.
709 *
710 * See get_int_safe_duration().
711 */
712 static inline void
cond_move_softirq_delta_start(struct osnoise_variables * osn_var,u64 duration)713 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
714 {
715 if (osn_var->softirq.delta_start)
716 osn_var->softirq.delta_start += duration;
717 }
718 #else /* CONFIG_PREEMPT_RT */
719 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
720 #endif
721
722 /*
723 * cond_move_thread_delta_start - Forward the delta_start of a running thread
724 *
725 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
726 * is pushed forward to discount the interference.
727 *
728 * See get_int_safe_duration().
729 */
730 static inline void
cond_move_thread_delta_start(struct osnoise_variables * osn_var,u64 duration)731 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
732 {
733 if (osn_var->thread.delta_start)
734 osn_var->thread.delta_start += duration;
735 }
736
737 /*
738 * get_int_safe_duration - Get the duration of a window
739 *
740 * The irq, softirq and thread varaibles need to have its duration without
741 * the interference from higher priority interrupts. Instead of keeping a
742 * variable to discount the interrupt interference from these variables, the
743 * starting time of these variables are pushed forward with the interrupt's
744 * duration. In this way, a single variable is used to:
745 *
746 * - Know if a given window is being measured.
747 * - Account its duration.
748 * - Discount the interference.
749 *
750 * To avoid getting inconsistent values, e.g.,:
751 *
752 * now = time_get()
753 * ---> interrupt!
754 * delta_start -= int duration;
755 * <---
756 * duration = now - delta_start;
757 *
758 * result: negative duration if the variable duration before the
759 * interrupt was smaller than the interrupt execution.
760 *
761 * A counter of interrupts is used. If the counter increased, try
762 * to capture an interference safe duration.
763 */
764 static inline s64
get_int_safe_duration(struct osnoise_variables * osn_var,u64 * delta_start)765 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
766 {
767 u64 int_counter, now;
768 s64 duration;
769
770 do {
771 int_counter = local_read(&osn_var->int_counter);
772 /* synchronize with interrupts */
773 barrier();
774
775 now = time_get();
776 duration = (now - *delta_start);
777
778 /* synchronize with interrupts */
779 barrier();
780 } while (int_counter != local_read(&osn_var->int_counter));
781
782 /*
783 * This is an evidence of race conditions that cause
784 * a value to be "discounted" too much.
785 */
786 if (duration < 0)
787 osnoise_taint("Negative duration!\n");
788
789 *delta_start = 0;
790
791 return duration;
792 }
793
794 /*
795 *
796 * set_int_safe_time - Save the current time on *time, aware of interference
797 *
798 * Get the time, taking into consideration a possible interference from
799 * higher priority interrupts.
800 *
801 * See get_int_safe_duration() for an explanation.
802 */
803 static u64
set_int_safe_time(struct osnoise_variables * osn_var,u64 * time)804 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
805 {
806 u64 int_counter;
807
808 do {
809 int_counter = local_read(&osn_var->int_counter);
810 /* synchronize with interrupts */
811 barrier();
812
813 *time = time_get();
814
815 /* synchronize with interrupts */
816 barrier();
817 } while (int_counter != local_read(&osn_var->int_counter));
818
819 return int_counter;
820 }
821
822 #ifdef CONFIG_TIMERLAT_TRACER
823 /*
824 * copy_int_safe_time - Copy *src into *desc aware of interference
825 */
826 static u64
copy_int_safe_time(struct osnoise_variables * osn_var,u64 * dst,u64 * src)827 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
828 {
829 u64 int_counter;
830
831 do {
832 int_counter = local_read(&osn_var->int_counter);
833 /* synchronize with interrupts */
834 barrier();
835
836 *dst = *src;
837
838 /* synchronize with interrupts */
839 barrier();
840 } while (int_counter != local_read(&osn_var->int_counter));
841
842 return int_counter;
843 }
844 #endif /* CONFIG_TIMERLAT_TRACER */
845
846 /*
847 * trace_osnoise_callback - NMI entry/exit callback
848 *
849 * This function is called at the entry and exit NMI code. The bool enter
850 * distinguishes between either case. This function is used to note a NMI
851 * occurrence, compute the noise caused by the NMI, and to remove the noise
852 * it is potentially causing on other interference variables.
853 */
trace_osnoise_callback(bool enter)854 void trace_osnoise_callback(bool enter)
855 {
856 struct osnoise_variables *osn_var = this_cpu_osn_var();
857 u64 duration;
858
859 if (!osn_var->sampling)
860 return;
861
862 /*
863 * Currently trace_clock_local() calls sched_clock() and the
864 * generic version is not NMI safe.
865 */
866 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
867 if (enter) {
868 osn_var->nmi.delta_start = time_get();
869 local_inc(&osn_var->int_counter);
870 } else {
871 duration = time_get() - osn_var->nmi.delta_start;
872
873 trace_nmi_noise(osn_var->nmi.delta_start, duration);
874
875 cond_move_irq_delta_start(osn_var, duration);
876 cond_move_softirq_delta_start(osn_var, duration);
877 cond_move_thread_delta_start(osn_var, duration);
878 }
879 }
880
881 if (enter)
882 osn_var->nmi.count++;
883 }
884
885 /*
886 * osnoise_trace_irq_entry - Note the starting of an IRQ
887 *
888 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
889 * it is safe to use a single variable (ons_var->irq) to save the statistics.
890 * The arrival_time is used to report... the arrival time. The delta_start
891 * is used to compute the duration at the IRQ exit handler. See
892 * cond_move_irq_delta_start().
893 */
osnoise_trace_irq_entry(int id)894 void osnoise_trace_irq_entry(int id)
895 {
896 struct osnoise_variables *osn_var = this_cpu_osn_var();
897
898 if (!osn_var->sampling)
899 return;
900 /*
901 * This value will be used in the report, but not to compute
902 * the execution time, so it is safe to get it unsafe.
903 */
904 osn_var->irq.arrival_time = time_get();
905 set_int_safe_time(osn_var, &osn_var->irq.delta_start);
906 osn_var->irq.count++;
907
908 local_inc(&osn_var->int_counter);
909 }
910
911 /*
912 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
913 *
914 * Computes the duration of the IRQ noise, and trace it. Also discounts the
915 * interference from other sources of noise could be currently being accounted.
916 */
osnoise_trace_irq_exit(int id,const char * desc)917 void osnoise_trace_irq_exit(int id, const char *desc)
918 {
919 struct osnoise_variables *osn_var = this_cpu_osn_var();
920 s64 duration;
921
922 if (!osn_var->sampling)
923 return;
924
925 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
926 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
927 osn_var->irq.arrival_time = 0;
928 cond_move_softirq_delta_start(osn_var, duration);
929 cond_move_thread_delta_start(osn_var, duration);
930 }
931
932 /*
933 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
934 *
935 * Used to note the starting of an IRQ occurece.
936 */
trace_irqentry_callback(void * data,int irq,struct irqaction * action)937 static void trace_irqentry_callback(void *data, int irq,
938 struct irqaction *action)
939 {
940 osnoise_trace_irq_entry(irq);
941 }
942
943 /*
944 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
945 *
946 * Used to note the end of an IRQ occurece.
947 */
trace_irqexit_callback(void * data,int irq,struct irqaction * action,int ret)948 static void trace_irqexit_callback(void *data, int irq,
949 struct irqaction *action, int ret)
950 {
951 osnoise_trace_irq_exit(irq, action->name);
952 }
953
954 /*
955 * arch specific register function.
956 */
osnoise_arch_register(void)957 int __weak osnoise_arch_register(void)
958 {
959 return 0;
960 }
961
962 /*
963 * arch specific unregister function.
964 */
osnoise_arch_unregister(void)965 void __weak osnoise_arch_unregister(void)
966 {
967 return;
968 }
969
970 /*
971 * hook_irq_events - Hook IRQ handling events
972 *
973 * This function hooks the IRQ related callbacks to the respective trace
974 * events.
975 */
hook_irq_events(void)976 static int hook_irq_events(void)
977 {
978 int ret;
979
980 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
981 if (ret)
982 goto out_err;
983
984 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
985 if (ret)
986 goto out_unregister_entry;
987
988 ret = osnoise_arch_register();
989 if (ret)
990 goto out_irq_exit;
991
992 return 0;
993
994 out_irq_exit:
995 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
996 out_unregister_entry:
997 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
998 out_err:
999 return -EINVAL;
1000 }
1001
1002 /*
1003 * unhook_irq_events - Unhook IRQ handling events
1004 *
1005 * This function unhooks the IRQ related callbacks to the respective trace
1006 * events.
1007 */
unhook_irq_events(void)1008 static void unhook_irq_events(void)
1009 {
1010 osnoise_arch_unregister();
1011 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1012 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1013 }
1014
1015 #ifndef CONFIG_PREEMPT_RT
1016 /*
1017 * trace_softirq_entry_callback - Note the starting of a softirq
1018 *
1019 * Save the starting time of a softirq. As softirqs are non-preemptive to
1020 * other softirqs, it is safe to use a single variable (ons_var->softirq)
1021 * to save the statistics. The arrival_time is used to report... the
1022 * arrival time. The delta_start is used to compute the duration at the
1023 * softirq exit handler. See cond_move_softirq_delta_start().
1024 */
trace_softirq_entry_callback(void * data,unsigned int vec_nr)1025 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1026 {
1027 struct osnoise_variables *osn_var = this_cpu_osn_var();
1028
1029 if (!osn_var->sampling)
1030 return;
1031 /*
1032 * This value will be used in the report, but not to compute
1033 * the execution time, so it is safe to get it unsafe.
1034 */
1035 osn_var->softirq.arrival_time = time_get();
1036 set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1037 osn_var->softirq.count++;
1038
1039 local_inc(&osn_var->int_counter);
1040 }
1041
1042 /*
1043 * trace_softirq_exit_callback - Note the end of an softirq
1044 *
1045 * Computes the duration of the softirq noise, and trace it. Also discounts the
1046 * interference from other sources of noise could be currently being accounted.
1047 */
trace_softirq_exit_callback(void * data,unsigned int vec_nr)1048 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1049 {
1050 struct osnoise_variables *osn_var = this_cpu_osn_var();
1051 s64 duration;
1052
1053 if (!osn_var->sampling)
1054 return;
1055
1056 if (unlikely(timerlat_enabled()))
1057 if (!timerlat_softirq_exit(osn_var))
1058 return;
1059
1060 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1061 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1062 cond_move_thread_delta_start(osn_var, duration);
1063 osn_var->softirq.arrival_time = 0;
1064 }
1065
1066 /*
1067 * hook_softirq_events - Hook softirq handling events
1068 *
1069 * This function hooks the softirq related callbacks to the respective trace
1070 * events.
1071 */
hook_softirq_events(void)1072 static int hook_softirq_events(void)
1073 {
1074 int ret;
1075
1076 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1077 if (ret)
1078 goto out_err;
1079
1080 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1081 if (ret)
1082 goto out_unreg_entry;
1083
1084 return 0;
1085
1086 out_unreg_entry:
1087 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1088 out_err:
1089 return -EINVAL;
1090 }
1091
1092 /*
1093 * unhook_softirq_events - Unhook softirq handling events
1094 *
1095 * This function hooks the softirq related callbacks to the respective trace
1096 * events.
1097 */
unhook_softirq_events(void)1098 static void unhook_softirq_events(void)
1099 {
1100 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1101 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102 }
1103 #else /* CONFIG_PREEMPT_RT */
1104 /*
1105 * softirq are threads on the PREEMPT_RT mode.
1106 */
hook_softirq_events(void)1107 static int hook_softirq_events(void)
1108 {
1109 return 0;
1110 }
unhook_softirq_events(void)1111 static void unhook_softirq_events(void)
1112 {
1113 }
1114 #endif
1115
1116 /*
1117 * thread_entry - Record the starting of a thread noise window
1118 *
1119 * It saves the context switch time for a noisy thread, and increments
1120 * the interference counters.
1121 */
1122 static void
thread_entry(struct osnoise_variables * osn_var,struct task_struct * t)1123 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1124 {
1125 if (!osn_var->sampling)
1126 return;
1127 /*
1128 * The arrival time will be used in the report, but not to compute
1129 * the execution time, so it is safe to get it unsafe.
1130 */
1131 osn_var->thread.arrival_time = time_get();
1132
1133 set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1134
1135 osn_var->thread.count++;
1136 local_inc(&osn_var->int_counter);
1137 }
1138
1139 /*
1140 * thread_exit - Report the end of a thread noise window
1141 *
1142 * It computes the total noise from a thread, tracing if needed.
1143 */
1144 static void
thread_exit(struct osnoise_variables * osn_var,struct task_struct * t)1145 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1146 {
1147 s64 duration;
1148
1149 if (!osn_var->sampling)
1150 return;
1151
1152 if (unlikely(timerlat_enabled()))
1153 if (!timerlat_thread_exit(osn_var))
1154 return;
1155
1156 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1157
1158 trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1159
1160 osn_var->thread.arrival_time = 0;
1161 }
1162
1163 #ifdef CONFIG_TIMERLAT_TRACER
1164 /*
1165 * osnoise_stop_exception - Stop tracing and the tracer.
1166 */
osnoise_stop_exception(char * msg,int cpu)1167 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1168 {
1169 struct osnoise_instance *inst;
1170 struct trace_array *tr;
1171
1172 rcu_read_lock();
1173 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1174 tr = inst->tr;
1175 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1176 "stop tracing hit on cpu %d due to exception: %s\n",
1177 smp_processor_id(),
1178 msg);
1179
1180 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1181 panic("tracer hit on cpu %d due to exception: %s\n",
1182 smp_processor_id(),
1183 msg);
1184
1185 tracer_tracing_off(tr);
1186 }
1187 rcu_read_unlock();
1188 }
1189
1190 /*
1191 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1192 *
1193 * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1194 * timerlat user-space thread migration.
1195 */
trace_sched_migrate_callback(void * data,struct task_struct * p,int dest_cpu)1196 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1197 {
1198 struct osnoise_variables *osn_var;
1199 long cpu = task_cpu(p);
1200
1201 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1202 if (osn_var->pid == p->pid && dest_cpu != cpu) {
1203 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1204 osnoise_taint("timerlat user-thread migrated\n");
1205 osnoise_stop_exception("timerlat user-thread migrated", cpu);
1206 }
1207 }
1208
1209 static bool monitor_enabled;
1210
register_migration_monitor(void)1211 static int register_migration_monitor(void)
1212 {
1213 int ret = 0;
1214
1215 /*
1216 * Timerlat thread migration check is only required when running timerlat in user-space.
1217 * Thus, enable callback only if timerlat is set with no workload.
1218 */
1219 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1220 if (WARN_ON_ONCE(monitor_enabled))
1221 return 0;
1222
1223 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1224 if (!ret)
1225 monitor_enabled = true;
1226 }
1227
1228 return ret;
1229 }
1230
unregister_migration_monitor(void)1231 static void unregister_migration_monitor(void)
1232 {
1233 if (!monitor_enabled)
1234 return;
1235
1236 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1237 monitor_enabled = false;
1238 }
1239 #else
register_migration_monitor(void)1240 static int register_migration_monitor(void)
1241 {
1242 return 0;
1243 }
unregister_migration_monitor(void)1244 static void unregister_migration_monitor(void) {}
1245 #endif
1246 /*
1247 * trace_sched_switch - sched:sched_switch trace event handler
1248 *
1249 * This function is hooked to the sched:sched_switch trace event, and it is
1250 * used to record the beginning and to report the end of a thread noise window.
1251 */
1252 static void
trace_sched_switch_callback(void * data,bool preempt,struct task_struct * p,struct task_struct * n,unsigned int prev_state)1253 trace_sched_switch_callback(void *data, bool preempt,
1254 struct task_struct *p,
1255 struct task_struct *n,
1256 unsigned int prev_state)
1257 {
1258 struct osnoise_variables *osn_var = this_cpu_osn_var();
1259 int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1260
1261 if ((p->pid != osn_var->pid) || !workload)
1262 thread_exit(osn_var, p);
1263
1264 if ((n->pid != osn_var->pid) || !workload)
1265 thread_entry(osn_var, n);
1266 }
1267
1268 /*
1269 * hook_thread_events - Hook the instrumentation for thread noise
1270 *
1271 * Hook the osnoise tracer callbacks to handle the noise from other
1272 * threads on the necessary kernel events.
1273 */
hook_thread_events(void)1274 static int hook_thread_events(void)
1275 {
1276 int ret;
1277
1278 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1279 if (ret)
1280 return -EINVAL;
1281
1282 ret = register_migration_monitor();
1283 if (ret)
1284 goto out_unreg;
1285
1286 return 0;
1287
1288 out_unreg:
1289 unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1290 return -EINVAL;
1291 }
1292
1293 /*
1294 * unhook_thread_events - unhook the instrumentation for thread noise
1295 *
1296 * Unook the osnoise tracer callbacks to handle the noise from other
1297 * threads on the necessary kernel events.
1298 */
unhook_thread_events(void)1299 static void unhook_thread_events(void)
1300 {
1301 unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1302 unregister_migration_monitor();
1303 }
1304
1305 /*
1306 * save_osn_sample_stats - Save the osnoise_sample statistics
1307 *
1308 * Save the osnoise_sample statistics before the sampling phase. These
1309 * values will be used later to compute the diff betwneen the statistics
1310 * before and after the osnoise sampling.
1311 */
1312 static void
save_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1313 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1314 {
1315 s->nmi_count = osn_var->nmi.count;
1316 s->irq_count = osn_var->irq.count;
1317 s->softirq_count = osn_var->softirq.count;
1318 s->thread_count = osn_var->thread.count;
1319 }
1320
1321 /*
1322 * diff_osn_sample_stats - Compute the osnoise_sample statistics
1323 *
1324 * After a sample period, compute the difference on the osnoise_sample
1325 * statistics. The struct osnoise_sample *s contains the statistics saved via
1326 * save_osn_sample_stats() before the osnoise sampling.
1327 */
1328 static void
diff_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1329 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1330 {
1331 s->nmi_count = osn_var->nmi.count - s->nmi_count;
1332 s->irq_count = osn_var->irq.count - s->irq_count;
1333 s->softirq_count = osn_var->softirq.count - s->softirq_count;
1334 s->thread_count = osn_var->thread.count - s->thread_count;
1335 }
1336
1337 /*
1338 * osnoise_stop_tracing - Stop tracing and the tracer.
1339 */
osnoise_stop_tracing(void)1340 static __always_inline void osnoise_stop_tracing(void)
1341 {
1342 struct osnoise_instance *inst;
1343 struct trace_array *tr;
1344
1345 rcu_read_lock();
1346 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1347 tr = inst->tr;
1348 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1349 "stop tracing hit on cpu %d\n", smp_processor_id());
1350
1351 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1352 panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1353
1354 tracer_tracing_off(tr);
1355 }
1356 rcu_read_unlock();
1357 }
1358
1359 /*
1360 * osnoise_has_tracing_on - Check if there is at least one instance on
1361 */
osnoise_has_tracing_on(void)1362 static __always_inline int osnoise_has_tracing_on(void)
1363 {
1364 struct osnoise_instance *inst;
1365 int trace_is_on = 0;
1366
1367 rcu_read_lock();
1368 list_for_each_entry_rcu(inst, &osnoise_instances, list)
1369 trace_is_on += tracer_tracing_is_on(inst->tr);
1370 rcu_read_unlock();
1371
1372 return trace_is_on;
1373 }
1374
1375 /*
1376 * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1377 */
notify_new_max_latency(u64 latency)1378 static void notify_new_max_latency(u64 latency)
1379 {
1380 struct osnoise_instance *inst;
1381 struct trace_array *tr;
1382
1383 rcu_read_lock();
1384 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1385 tr = inst->tr;
1386 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1387 tr->max_latency = latency;
1388 latency_fsnotify(tr);
1389 }
1390 }
1391 rcu_read_unlock();
1392 }
1393
1394 /*
1395 * run_osnoise - Sample the time and look for osnoise
1396 *
1397 * Used to capture the time, looking for potential osnoise latency repeatedly.
1398 * Different from hwlat_detector, it is called with preemption and interrupts
1399 * enabled. This allows irqs, softirqs and threads to run, interfering on the
1400 * osnoise sampling thread, as they would do with a regular thread.
1401 */
run_osnoise(void)1402 static int run_osnoise(void)
1403 {
1404 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1405 struct osnoise_variables *osn_var = this_cpu_osn_var();
1406 u64 start, sample, last_sample;
1407 u64 last_int_count, int_count;
1408 s64 noise = 0, max_noise = 0;
1409 s64 total, last_total = 0;
1410 struct osnoise_sample s;
1411 bool disable_preemption;
1412 unsigned int threshold;
1413 u64 runtime, stop_in;
1414 u64 sum_noise = 0;
1415 int hw_count = 0;
1416 int ret = -1;
1417
1418 /*
1419 * Disabling preemption is only required if IRQs are enabled,
1420 * and the options is set on.
1421 */
1422 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1423
1424 /*
1425 * Considers the current thread as the workload.
1426 */
1427 osn_var->pid = current->pid;
1428
1429 /*
1430 * Save the current stats for the diff
1431 */
1432 save_osn_sample_stats(osn_var, &s);
1433
1434 /*
1435 * if threshold is 0, use the default value of 1 us.
1436 */
1437 threshold = tracing_thresh ? : 1000;
1438
1439 /*
1440 * Apply PREEMPT and IRQ disabled options.
1441 */
1442 if (disable_irq)
1443 local_irq_disable();
1444
1445 if (disable_preemption)
1446 preempt_disable();
1447
1448 /*
1449 * Make sure NMIs see sampling first
1450 */
1451 osn_var->sampling = true;
1452 barrier();
1453
1454 /*
1455 * Transform the *_us config to nanoseconds to avoid the
1456 * division on the main loop.
1457 */
1458 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1459 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1460
1461 /*
1462 * Start timestemp
1463 */
1464 start = time_get();
1465
1466 /*
1467 * "previous" loop.
1468 */
1469 last_int_count = set_int_safe_time(osn_var, &last_sample);
1470
1471 do {
1472 /*
1473 * Get sample!
1474 */
1475 int_count = set_int_safe_time(osn_var, &sample);
1476
1477 noise = time_sub(sample, last_sample);
1478
1479 /*
1480 * This shouldn't happen.
1481 */
1482 if (noise < 0) {
1483 osnoise_taint("negative noise!");
1484 goto out;
1485 }
1486
1487 /*
1488 * Sample runtime.
1489 */
1490 total = time_sub(sample, start);
1491
1492 /*
1493 * Check for possible overflows.
1494 */
1495 if (total < last_total) {
1496 osnoise_taint("total overflow!");
1497 break;
1498 }
1499
1500 last_total = total;
1501
1502 if (noise >= threshold) {
1503 int interference = int_count - last_int_count;
1504
1505 if (noise > max_noise)
1506 max_noise = noise;
1507
1508 if (!interference)
1509 hw_count++;
1510
1511 sum_noise += noise;
1512
1513 trace_sample_threshold(last_sample, noise, interference);
1514
1515 if (osnoise_data.stop_tracing)
1516 if (noise > stop_in)
1517 osnoise_stop_tracing();
1518 }
1519
1520 /*
1521 * In some cases, notably when running on a nohz_full CPU with
1522 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1523 * account for QSs. This will eventually cause unwarranted
1524 * noise as RCU forces preemption as the means of ending the
1525 * current grace period. We avoid this by calling
1526 * rcu_momentary_eqs(), which performs a zero duration EQS
1527 * allowing RCU to end the current grace period. This call
1528 * shouldn't be wrapped inside an RCU critical section.
1529 *
1530 * Normally QSs for other cases are handled through cond_resched().
1531 * For simplicity, however, we call rcu_momentary_eqs() for all
1532 * configurations here.
1533 */
1534 if (!disable_irq)
1535 local_irq_disable();
1536
1537 rcu_momentary_eqs();
1538
1539 if (!disable_irq)
1540 local_irq_enable();
1541
1542 /*
1543 * For the non-preemptive kernel config: let threads runs, if
1544 * they so wish, unless set not do to so.
1545 */
1546 if (!disable_irq && !disable_preemption)
1547 cond_resched();
1548
1549 last_sample = sample;
1550 last_int_count = int_count;
1551
1552 } while (total < runtime && !kthread_should_stop());
1553
1554 /*
1555 * Finish the above in the view for interrupts.
1556 */
1557 barrier();
1558
1559 osn_var->sampling = false;
1560
1561 /*
1562 * Make sure sampling data is no longer updated.
1563 */
1564 barrier();
1565
1566 /*
1567 * Return to the preemptive state.
1568 */
1569 if (disable_preemption)
1570 preempt_enable();
1571
1572 if (disable_irq)
1573 local_irq_enable();
1574
1575 /*
1576 * Save noise info.
1577 */
1578 s.noise = time_to_us(sum_noise);
1579 s.runtime = time_to_us(total);
1580 s.max_sample = time_to_us(max_noise);
1581 s.hw_count = hw_count;
1582
1583 /* Save interference stats info */
1584 diff_osn_sample_stats(osn_var, &s);
1585
1586 record_osnoise_sample(&s);
1587
1588 notify_new_max_latency(max_noise);
1589
1590 if (osnoise_data.stop_tracing_total)
1591 if (s.noise > osnoise_data.stop_tracing_total)
1592 osnoise_stop_tracing();
1593
1594 return 0;
1595 out:
1596 return ret;
1597 }
1598
1599 static struct cpumask osnoise_cpumask;
1600 static struct cpumask save_cpumask;
1601 static struct cpumask kthread_cpumask;
1602
1603 /*
1604 * osnoise_sleep - sleep until the next period
1605 */
osnoise_sleep(bool skip_period)1606 static void osnoise_sleep(bool skip_period)
1607 {
1608 u64 interval;
1609 ktime_t wake_time;
1610
1611 mutex_lock(&interface_lock);
1612 if (skip_period)
1613 interval = osnoise_data.sample_period;
1614 else
1615 interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1616 mutex_unlock(&interface_lock);
1617
1618 /*
1619 * differently from hwlat_detector, the osnoise tracer can run
1620 * without a pause because preemption is on.
1621 */
1622 if (!interval) {
1623 /* Let synchronize_rcu_tasks() make progress */
1624 cond_resched_tasks_rcu_qs();
1625 return;
1626 }
1627
1628 wake_time = ktime_add_us(ktime_get(), interval);
1629 __set_current_state(TASK_INTERRUPTIBLE);
1630
1631 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1632 if (kthread_should_stop())
1633 break;
1634 }
1635 }
1636
1637 /*
1638 * osnoise_migration_pending - checks if the task needs to migrate
1639 *
1640 * osnoise/timerlat threads are per-cpu. If there is a pending request to
1641 * migrate the thread away from the current CPU, something bad has happened.
1642 * Play the good citizen and leave.
1643 *
1644 * Returns 0 if it is safe to continue, 1 otherwise.
1645 */
osnoise_migration_pending(void)1646 static inline int osnoise_migration_pending(void)
1647 {
1648 if (!current->migration_pending)
1649 return 0;
1650
1651 /*
1652 * If migration is pending, there is a task waiting for the
1653 * tracer to enable migration. The tracer does not allow migration,
1654 * thus: taint and leave to unblock the blocked thread.
1655 */
1656 osnoise_taint("migration requested to osnoise threads, leaving.");
1657
1658 /*
1659 * Unset this thread from the threads managed by the interface.
1660 * The tracers are responsible for cleaning their env before
1661 * exiting.
1662 */
1663 mutex_lock(&interface_lock);
1664 this_cpu_osn_var()->kthread = NULL;
1665 cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
1666 mutex_unlock(&interface_lock);
1667
1668 return 1;
1669 }
1670
1671 /*
1672 * osnoise_main - The osnoise detection kernel thread
1673 *
1674 * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1675 * every period.
1676 */
osnoise_main(void * data)1677 static int osnoise_main(void *data)
1678 {
1679 unsigned long flags;
1680
1681 /*
1682 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1683 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1684 *
1685 * To work around this limitation, disable migration and remove the
1686 * flag.
1687 */
1688 migrate_disable();
1689 raw_spin_lock_irqsave(¤t->pi_lock, flags);
1690 current->flags &= ~(PF_NO_SETAFFINITY);
1691 raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
1692
1693 while (!kthread_should_stop()) {
1694 if (osnoise_migration_pending())
1695 break;
1696
1697 /* skip a period if tracing is off on all instances */
1698 if (!osnoise_has_tracing_on()) {
1699 osnoise_sleep(true);
1700 continue;
1701 }
1702
1703 run_osnoise();
1704 osnoise_sleep(false);
1705 }
1706
1707 migrate_enable();
1708 return 0;
1709 }
1710
1711 #ifdef CONFIG_TIMERLAT_TRACER
1712 /*
1713 * timerlat_irq - hrtimer handler for timerlat.
1714 */
timerlat_irq(struct hrtimer * timer)1715 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1716 {
1717 struct osnoise_variables *osn_var = this_cpu_osn_var();
1718 struct timerlat_variables *tlat;
1719 struct timerlat_sample s;
1720 u64 now;
1721 u64 diff;
1722
1723 /*
1724 * I am not sure if the timer was armed for this CPU. So, get
1725 * the timerlat struct from the timer itself, not from this
1726 * CPU.
1727 */
1728 tlat = container_of(timer, struct timerlat_variables, timer);
1729
1730 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1731
1732 /*
1733 * Enable the osnoise: events for thread an softirq.
1734 */
1735 tlat->tracing_thread = true;
1736
1737 osn_var->thread.arrival_time = time_get();
1738
1739 /*
1740 * A hardirq is running: the timer IRQ. It is for sure preempting
1741 * a thread, and potentially preempting a softirq.
1742 *
1743 * At this point, it is not interesting to know the duration of the
1744 * preempted thread (and maybe softirq), but how much time they will
1745 * delay the beginning of the execution of the timer thread.
1746 *
1747 * To get the correct (net) delay added by the softirq, its delta_start
1748 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1749 * start of the sofitrq will be zeroed, accounting then only the time
1750 * after that.
1751 *
1752 * The thread follows the same principle. However, if a softirq is
1753 * running, the thread needs to receive the softirq delta_start. The
1754 * reason being is that the softirq will be the last to be unfolded,
1755 * resseting the thread delay to zero.
1756 *
1757 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1758 * on RT, moving the thread is enough.
1759 */
1760 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1761 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1762 &osn_var->softirq.delta_start);
1763
1764 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1765 &osn_var->irq.delta_start);
1766 } else {
1767 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1768 &osn_var->irq.delta_start);
1769 }
1770
1771 /*
1772 * Compute the current time with the expected time.
1773 */
1774 diff = now - tlat->abs_period;
1775
1776 tlat->count++;
1777 s.seqnum = tlat->count;
1778 s.timer_latency = diff;
1779 s.context = IRQ_CONTEXT;
1780
1781 record_timerlat_sample(&s);
1782
1783 if (osnoise_data.stop_tracing) {
1784 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1785
1786 /*
1787 * At this point, if stop_tracing is set and <= print_stack,
1788 * print_stack is set and would be printed in the thread handler.
1789 *
1790 * Thus, print the stack trace as it is helpful to define the
1791 * root cause of an IRQ latency.
1792 */
1793 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1794 timerlat_save_stack(0);
1795 timerlat_dump_stack(time_to_us(diff));
1796 }
1797
1798 osnoise_stop_tracing();
1799 notify_new_max_latency(diff);
1800
1801 wake_up_process(tlat->kthread);
1802
1803 return HRTIMER_NORESTART;
1804 }
1805 }
1806
1807 wake_up_process(tlat->kthread);
1808
1809 if (osnoise_data.print_stack)
1810 timerlat_save_stack(0);
1811
1812 return HRTIMER_NORESTART;
1813 }
1814
1815 /*
1816 * wait_next_period - Wait for the next period for timerlat
1817 */
wait_next_period(struct timerlat_variables * tlat)1818 static int wait_next_period(struct timerlat_variables *tlat)
1819 {
1820 ktime_t next_abs_period, now;
1821 u64 rel_period = osnoise_data.timerlat_period * 1000;
1822
1823 now = hrtimer_cb_get_time(&tlat->timer);
1824 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1825
1826 /*
1827 * Save the next abs_period.
1828 */
1829 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1830
1831 /*
1832 * If the new abs_period is in the past, skip the activation.
1833 */
1834 while (ktime_compare(now, next_abs_period) > 0) {
1835 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1836 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1837 }
1838
1839 set_current_state(TASK_INTERRUPTIBLE);
1840
1841 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1842 schedule();
1843 return 1;
1844 }
1845
1846 /*
1847 * timerlat_main- Timerlat main
1848 */
timerlat_main(void * data)1849 static int timerlat_main(void *data)
1850 {
1851 struct osnoise_variables *osn_var = this_cpu_osn_var();
1852 struct timerlat_variables *tlat = this_cpu_tmr_var();
1853 struct timerlat_sample s;
1854 struct sched_param sp;
1855 unsigned long flags;
1856 u64 now, diff;
1857
1858 /*
1859 * Make the thread RT, that is how cyclictest is usually used.
1860 */
1861 sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1862 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1863
1864 /*
1865 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1866 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1867 *
1868 * To work around this limitation, disable migration and remove the
1869 * flag.
1870 */
1871 migrate_disable();
1872 raw_spin_lock_irqsave(¤t->pi_lock, flags);
1873 current->flags &= ~(PF_NO_SETAFFINITY);
1874 raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
1875
1876 tlat->count = 0;
1877 tlat->tracing_thread = false;
1878
1879 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1880 tlat->kthread = current;
1881 osn_var->pid = current->pid;
1882 /*
1883 * Anotate the arrival time.
1884 */
1885 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1886
1887 wait_next_period(tlat);
1888
1889 osn_var->sampling = 1;
1890
1891 while (!kthread_should_stop()) {
1892
1893 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1894 diff = now - tlat->abs_period;
1895
1896 s.seqnum = tlat->count;
1897 s.timer_latency = diff;
1898 s.context = THREAD_CONTEXT;
1899
1900 record_timerlat_sample(&s);
1901
1902 notify_new_max_latency(diff);
1903
1904 timerlat_dump_stack(time_to_us(diff));
1905
1906 tlat->tracing_thread = false;
1907 if (osnoise_data.stop_tracing_total)
1908 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1909 osnoise_stop_tracing();
1910
1911 if (osnoise_migration_pending())
1912 break;
1913
1914 wait_next_period(tlat);
1915 }
1916
1917 hrtimer_cancel(&tlat->timer);
1918 migrate_enable();
1919 return 0;
1920 }
1921 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_main(void * data)1922 static int timerlat_main(void *data)
1923 {
1924 return 0;
1925 }
1926 #endif /* CONFIG_TIMERLAT_TRACER */
1927
1928 /*
1929 * stop_kthread - stop a workload thread
1930 */
stop_kthread(unsigned int cpu)1931 static void stop_kthread(unsigned int cpu)
1932 {
1933 struct task_struct *kthread;
1934
1935 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1936 if (kthread) {
1937 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1938 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1939 kthread_stop(kthread);
1940 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1941 /*
1942 * This is a user thread waiting on the timerlat_fd. We need
1943 * to close all users, and the best way to guarantee this is
1944 * by killing the thread. NOTE: this is a purpose specific file.
1945 */
1946 kill_pid(kthread->thread_pid, SIGKILL, 1);
1947 put_task_struct(kthread);
1948 }
1949 } else {
1950 /* if no workload, just return */
1951 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1952 /*
1953 * This is set in the osnoise tracer case.
1954 */
1955 per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1956 barrier();
1957 }
1958 }
1959 }
1960
1961 /*
1962 * stop_per_cpu_kthread - Stop per-cpu threads
1963 *
1964 * Stop the osnoise sampling htread. Use this on unload and at system
1965 * shutdown.
1966 */
stop_per_cpu_kthreads(void)1967 static void stop_per_cpu_kthreads(void)
1968 {
1969 int cpu;
1970
1971 cpus_read_lock();
1972
1973 for_each_online_cpu(cpu)
1974 stop_kthread(cpu);
1975
1976 cpus_read_unlock();
1977 }
1978
1979 /*
1980 * start_kthread - Start a workload tread
1981 */
start_kthread(unsigned int cpu)1982 static int start_kthread(unsigned int cpu)
1983 {
1984 struct task_struct *kthread;
1985 void *main = osnoise_main;
1986 char comm[24];
1987
1988 /* Do not start a new thread if it is already running */
1989 if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1990 return 0;
1991
1992 if (timerlat_enabled()) {
1993 snprintf(comm, 24, "timerlat/%d", cpu);
1994 main = timerlat_main;
1995 } else {
1996 /* if no workload, just return */
1997 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1998 per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
1999 barrier();
2000 return 0;
2001 }
2002 snprintf(comm, 24, "osnoise/%d", cpu);
2003 }
2004
2005 kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2006
2007 if (IS_ERR(kthread)) {
2008 pr_err(BANNER "could not start sampling thread\n");
2009 return -ENOMEM;
2010 }
2011
2012 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2013 cpumask_set_cpu(cpu, &kthread_cpumask);
2014
2015 return 0;
2016 }
2017
2018 /*
2019 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2020 *
2021 * This starts the kernel thread that will look for osnoise on many
2022 * cpus.
2023 */
start_per_cpu_kthreads(void)2024 static int start_per_cpu_kthreads(void)
2025 {
2026 struct cpumask *current_mask = &save_cpumask;
2027 int retval = 0;
2028 int cpu;
2029
2030 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2031 if (timerlat_enabled())
2032 return 0;
2033 }
2034
2035 cpus_read_lock();
2036 /*
2037 * Run only on online CPUs in which osnoise is allowed to run.
2038 */
2039 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2040
2041 for_each_possible_cpu(cpu) {
2042 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2043 struct task_struct *kthread;
2044
2045 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2046 if (!WARN_ON(!kthread))
2047 kthread_stop(kthread);
2048 }
2049 }
2050
2051 for_each_cpu(cpu, current_mask) {
2052 retval = start_kthread(cpu);
2053 if (retval) {
2054 cpus_read_unlock();
2055 stop_per_cpu_kthreads();
2056 return retval;
2057 }
2058 }
2059
2060 cpus_read_unlock();
2061
2062 return retval;
2063 }
2064
2065 #ifdef CONFIG_HOTPLUG_CPU
osnoise_hotplug_workfn(struct work_struct * dummy)2066 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2067 {
2068 unsigned int cpu = smp_processor_id();
2069
2070 guard(mutex)(&trace_types_lock);
2071
2072 if (!osnoise_has_registered_instances())
2073 return;
2074
2075 guard(mutex)(&interface_lock);
2076 guard(cpus_read_lock)();
2077
2078 if (!cpu_online(cpu))
2079 return;
2080
2081 if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2082 return;
2083
2084 start_kthread(cpu);
2085 }
2086
2087 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2088
2089 /*
2090 * osnoise_cpu_init - CPU hotplug online callback function
2091 */
osnoise_cpu_init(unsigned int cpu)2092 static int osnoise_cpu_init(unsigned int cpu)
2093 {
2094 schedule_work_on(cpu, &osnoise_hotplug_work);
2095 return 0;
2096 }
2097
2098 /*
2099 * osnoise_cpu_die - CPU hotplug offline callback function
2100 */
osnoise_cpu_die(unsigned int cpu)2101 static int osnoise_cpu_die(unsigned int cpu)
2102 {
2103 stop_kthread(cpu);
2104 return 0;
2105 }
2106
osnoise_init_hotplug_support(void)2107 static void osnoise_init_hotplug_support(void)
2108 {
2109 int ret;
2110
2111 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2112 osnoise_cpu_init, osnoise_cpu_die);
2113 if (ret < 0)
2114 pr_warn(BANNER "Error to init cpu hotplug support\n");
2115
2116 return;
2117 }
2118 #else /* CONFIG_HOTPLUG_CPU */
osnoise_init_hotplug_support(void)2119 static void osnoise_init_hotplug_support(void)
2120 {
2121 return;
2122 }
2123 #endif /* CONFIG_HOTPLUG_CPU */
2124
2125 /*
2126 * seq file functions for the osnoise/options file.
2127 */
s_options_start(struct seq_file * s,loff_t * pos)2128 static void *s_options_start(struct seq_file *s, loff_t *pos)
2129 {
2130 int option = *pos;
2131
2132 mutex_lock(&interface_lock);
2133
2134 if (option >= OSN_MAX)
2135 return NULL;
2136
2137 return pos;
2138 }
2139
s_options_next(struct seq_file * s,void * v,loff_t * pos)2140 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2141 {
2142 int option = ++(*pos);
2143
2144 if (option >= OSN_MAX)
2145 return NULL;
2146
2147 return pos;
2148 }
2149
s_options_show(struct seq_file * s,void * v)2150 static int s_options_show(struct seq_file *s, void *v)
2151 {
2152 loff_t *pos = v;
2153 int option = *pos;
2154
2155 if (option == OSN_DEFAULTS) {
2156 if (osnoise_options == OSN_DEFAULT_OPTIONS)
2157 seq_printf(s, "%s", osnoise_options_str[option]);
2158 else
2159 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2160 goto out;
2161 }
2162
2163 if (test_bit(option, &osnoise_options))
2164 seq_printf(s, "%s", osnoise_options_str[option]);
2165 else
2166 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2167
2168 out:
2169 if (option != OSN_MAX)
2170 seq_puts(s, " ");
2171
2172 return 0;
2173 }
2174
s_options_stop(struct seq_file * s,void * v)2175 static void s_options_stop(struct seq_file *s, void *v)
2176 {
2177 seq_puts(s, "\n");
2178 mutex_unlock(&interface_lock);
2179 }
2180
2181 static const struct seq_operations osnoise_options_seq_ops = {
2182 .start = s_options_start,
2183 .next = s_options_next,
2184 .show = s_options_show,
2185 .stop = s_options_stop
2186 };
2187
osnoise_options_open(struct inode * inode,struct file * file)2188 static int osnoise_options_open(struct inode *inode, struct file *file)
2189 {
2190 return seq_open(file, &osnoise_options_seq_ops);
2191 };
2192
2193 /**
2194 * osnoise_options_write - Write function for "options" entry
2195 * @filp: The active open file structure
2196 * @ubuf: The user buffer that contains the value to write
2197 * @cnt: The maximum number of bytes to write to "file"
2198 * @ppos: The current position in @file
2199 *
2200 * Writing the option name sets the option, writing the "NO_"
2201 * prefix in front of the option name disables it.
2202 *
2203 * Writing "DEFAULTS" resets the option values to the default ones.
2204 */
osnoise_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2205 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2206 size_t cnt, loff_t *ppos)
2207 {
2208 int running, option, enable, retval;
2209 char buf[256], *option_str;
2210
2211 if (cnt >= 256)
2212 return -EINVAL;
2213
2214 if (copy_from_user(buf, ubuf, cnt))
2215 return -EFAULT;
2216
2217 buf[cnt] = 0;
2218
2219 if (strncmp(buf, "NO_", 3)) {
2220 option_str = strstrip(buf);
2221 enable = true;
2222 } else {
2223 option_str = strstrip(&buf[3]);
2224 enable = false;
2225 }
2226
2227 option = match_string(osnoise_options_str, OSN_MAX, option_str);
2228 if (option < 0)
2229 return -EINVAL;
2230
2231 /*
2232 * trace_types_lock is taken to avoid concurrency on start/stop.
2233 */
2234 mutex_lock(&trace_types_lock);
2235 running = osnoise_has_registered_instances();
2236 if (running)
2237 stop_per_cpu_kthreads();
2238
2239 mutex_lock(&interface_lock);
2240 /*
2241 * avoid CPU hotplug operations that might read options.
2242 */
2243 cpus_read_lock();
2244
2245 retval = cnt;
2246
2247 if (enable) {
2248 if (option == OSN_DEFAULTS)
2249 osnoise_options = OSN_DEFAULT_OPTIONS;
2250 else
2251 set_bit(option, &osnoise_options);
2252 } else {
2253 if (option == OSN_DEFAULTS)
2254 retval = -EINVAL;
2255 else
2256 clear_bit(option, &osnoise_options);
2257 }
2258
2259 cpus_read_unlock();
2260 mutex_unlock(&interface_lock);
2261
2262 if (running)
2263 start_per_cpu_kthreads();
2264 mutex_unlock(&trace_types_lock);
2265
2266 return retval;
2267 }
2268
2269 /*
2270 * osnoise_cpus_read - Read function for reading the "cpus" file
2271 * @filp: The active open file structure
2272 * @ubuf: The userspace provided buffer to read value into
2273 * @cnt: The maximum number of bytes to read
2274 * @ppos: The current "file" position
2275 *
2276 * Prints the "cpus" output into the user-provided buffer.
2277 */
2278 static ssize_t
osnoise_cpus_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)2279 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2280 loff_t *ppos)
2281 {
2282 char *mask_str __free(kfree) = NULL;
2283 int len;
2284
2285 guard(mutex)(&interface_lock);
2286
2287 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2288 mask_str = kmalloc(len, GFP_KERNEL);
2289 if (!mask_str)
2290 return -ENOMEM;
2291
2292 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2293 if (len >= count)
2294 return -EINVAL;
2295
2296 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2297
2298 return count;
2299 }
2300
2301 /*
2302 * osnoise_cpus_write - Write function for "cpus" entry
2303 * @filp: The active open file structure
2304 * @ubuf: The user buffer that contains the value to write
2305 * @count: The maximum number of bytes to write to "file"
2306 * @ppos: The current position in @file
2307 *
2308 * This function provides a write implementation for the "cpus"
2309 * interface to the osnoise trace. By default, it lists all CPUs,
2310 * in this way, allowing osnoise threads to run on any online CPU
2311 * of the system. It serves to restrict the execution of osnoise to the
2312 * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2313 * Because the user might be interested in tracing what is running on
2314 * other CPUs. For instance, one might run osnoise in one HT CPU
2315 * while observing what is running on the sibling HT CPU.
2316 */
2317 static ssize_t
osnoise_cpus_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2318 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2319 loff_t *ppos)
2320 {
2321 cpumask_var_t osnoise_cpumask_new;
2322 int running, err;
2323 char *buf __free(kfree) = NULL;
2324
2325 buf = kmalloc(count, GFP_KERNEL);
2326 if (!buf)
2327 return -ENOMEM;
2328
2329 if (copy_from_user(buf, ubuf, count))
2330 return -EFAULT;
2331
2332 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2333 return -ENOMEM;
2334
2335 err = cpulist_parse(buf, osnoise_cpumask_new);
2336 if (err)
2337 goto err_free;
2338
2339 /*
2340 * trace_types_lock is taken to avoid concurrency on start/stop.
2341 */
2342 mutex_lock(&trace_types_lock);
2343 running = osnoise_has_registered_instances();
2344 if (running)
2345 stop_per_cpu_kthreads();
2346
2347 mutex_lock(&interface_lock);
2348 /*
2349 * osnoise_cpumask is read by CPU hotplug operations.
2350 */
2351 cpus_read_lock();
2352
2353 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2354
2355 cpus_read_unlock();
2356 mutex_unlock(&interface_lock);
2357
2358 if (running)
2359 start_per_cpu_kthreads();
2360 mutex_unlock(&trace_types_lock);
2361
2362 free_cpumask_var(osnoise_cpumask_new);
2363 return count;
2364
2365 err_free:
2366 free_cpumask_var(osnoise_cpumask_new);
2367
2368 return err;
2369 }
2370
2371 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_fd_open(struct inode * inode,struct file * file)2372 static int timerlat_fd_open(struct inode *inode, struct file *file)
2373 {
2374 struct osnoise_variables *osn_var;
2375 struct timerlat_variables *tlat;
2376 long cpu = (long) inode->i_cdev;
2377
2378 mutex_lock(&interface_lock);
2379
2380 /*
2381 * This file is accessible only if timerlat is enabled, and
2382 * NO_OSNOISE_WORKLOAD is set.
2383 */
2384 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2385 mutex_unlock(&interface_lock);
2386 return -EINVAL;
2387 }
2388
2389 migrate_disable();
2390
2391 osn_var = this_cpu_osn_var();
2392
2393 /*
2394 * The osn_var->pid holds the single access to this file.
2395 */
2396 if (osn_var->pid) {
2397 mutex_unlock(&interface_lock);
2398 migrate_enable();
2399 return -EBUSY;
2400 }
2401
2402 /*
2403 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2404 * is pinned to a single CPU. The tracer laters monitor if the task
2405 * migrates and then disables tracer if it does. However, it is
2406 * worth doing this basic acceptance test to avoid obviusly wrong
2407 * setup.
2408 */
2409 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) {
2410 mutex_unlock(&interface_lock);
2411 migrate_enable();
2412 return -EPERM;
2413 }
2414
2415 /*
2416 * From now on, it is good to go.
2417 */
2418 file->private_data = inode->i_cdev;
2419
2420 get_task_struct(current);
2421
2422 osn_var->kthread = current;
2423 osn_var->pid = current->pid;
2424
2425 /*
2426 * Setup is done.
2427 */
2428 mutex_unlock(&interface_lock);
2429
2430 tlat = this_cpu_tmr_var();
2431 tlat->count = 0;
2432
2433 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2434
2435 migrate_enable();
2436 return 0;
2437 };
2438
2439 /*
2440 * timerlat_fd_read - Read function for "timerlat_fd" file
2441 * @file: The active open file structure
2442 * @ubuf: The userspace provided buffer to read value into
2443 * @cnt: The maximum number of bytes to read
2444 * @ppos: The current "file" position
2445 *
2446 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2447 */
2448 static ssize_t
timerlat_fd_read(struct file * file,char __user * ubuf,size_t count,loff_t * ppos)2449 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2450 loff_t *ppos)
2451 {
2452 long cpu = (long) file->private_data;
2453 struct osnoise_variables *osn_var;
2454 struct timerlat_variables *tlat;
2455 struct timerlat_sample s;
2456 s64 diff;
2457 u64 now;
2458
2459 migrate_disable();
2460
2461 tlat = this_cpu_tmr_var();
2462
2463 /*
2464 * While in user-space, the thread is migratable. There is nothing
2465 * we can do about it.
2466 * So, if the thread is running on another CPU, stop the machinery.
2467 */
2468 if (cpu == smp_processor_id()) {
2469 if (tlat->uthread_migrate) {
2470 migrate_enable();
2471 return -EINVAL;
2472 }
2473 } else {
2474 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2475 osnoise_taint("timerlat user thread migrate\n");
2476 osnoise_stop_tracing();
2477 migrate_enable();
2478 return -EINVAL;
2479 }
2480
2481 osn_var = this_cpu_osn_var();
2482
2483 /*
2484 * The timerlat in user-space runs in a different order:
2485 * the read() starts from the execution of the previous occurrence,
2486 * sleeping for the next occurrence.
2487 *
2488 * So, skip if we are entering on read() before the first wakeup
2489 * from timerlat IRQ:
2490 */
2491 if (likely(osn_var->sampling)) {
2492 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2493 diff = now - tlat->abs_period;
2494
2495 /*
2496 * it was not a timer firing, but some other signal?
2497 */
2498 if (diff < 0)
2499 goto out;
2500
2501 s.seqnum = tlat->count;
2502 s.timer_latency = diff;
2503 s.context = THREAD_URET;
2504
2505 record_timerlat_sample(&s);
2506
2507 notify_new_max_latency(diff);
2508
2509 tlat->tracing_thread = false;
2510 if (osnoise_data.stop_tracing_total)
2511 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2512 osnoise_stop_tracing();
2513 } else {
2514 tlat->tracing_thread = false;
2515 tlat->kthread = current;
2516
2517 /* Annotate now to drift new period */
2518 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2519
2520 osn_var->sampling = 1;
2521 }
2522
2523 /* wait for the next period */
2524 wait_next_period(tlat);
2525
2526 /* This is the wakeup from this cycle */
2527 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2528 diff = now - tlat->abs_period;
2529
2530 /*
2531 * it was not a timer firing, but some other signal?
2532 */
2533 if (diff < 0)
2534 goto out;
2535
2536 s.seqnum = tlat->count;
2537 s.timer_latency = diff;
2538 s.context = THREAD_CONTEXT;
2539
2540 record_timerlat_sample(&s);
2541
2542 if (osnoise_data.stop_tracing_total) {
2543 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2544 timerlat_dump_stack(time_to_us(diff));
2545 notify_new_max_latency(diff);
2546 osnoise_stop_tracing();
2547 }
2548 }
2549
2550 out:
2551 migrate_enable();
2552 return 0;
2553 }
2554
timerlat_fd_release(struct inode * inode,struct file * file)2555 static int timerlat_fd_release(struct inode *inode, struct file *file)
2556 {
2557 struct osnoise_variables *osn_var;
2558 struct timerlat_variables *tlat_var;
2559 long cpu = (long) file->private_data;
2560
2561 migrate_disable();
2562 mutex_lock(&interface_lock);
2563
2564 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2565 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2566
2567 if (tlat_var->kthread)
2568 hrtimer_cancel(&tlat_var->timer);
2569 memset(tlat_var, 0, sizeof(*tlat_var));
2570
2571 osn_var->sampling = 0;
2572 osn_var->pid = 0;
2573
2574 /*
2575 * We are leaving, not being stopped... see stop_kthread();
2576 */
2577 if (osn_var->kthread) {
2578 put_task_struct(osn_var->kthread);
2579 osn_var->kthread = NULL;
2580 }
2581
2582 mutex_unlock(&interface_lock);
2583 migrate_enable();
2584 return 0;
2585 }
2586 #endif
2587
2588 /*
2589 * osnoise/runtime_us: cannot be greater than the period.
2590 */
2591 static struct trace_min_max_param osnoise_runtime = {
2592 .lock = &interface_lock,
2593 .val = &osnoise_data.sample_runtime,
2594 .max = &osnoise_data.sample_period,
2595 .min = NULL,
2596 };
2597
2598 /*
2599 * osnoise/period_us: cannot be smaller than the runtime.
2600 */
2601 static struct trace_min_max_param osnoise_period = {
2602 .lock = &interface_lock,
2603 .val = &osnoise_data.sample_period,
2604 .max = NULL,
2605 .min = &osnoise_data.sample_runtime,
2606 };
2607
2608 /*
2609 * osnoise/stop_tracing_us: no limit.
2610 */
2611 static struct trace_min_max_param osnoise_stop_tracing_in = {
2612 .lock = &interface_lock,
2613 .val = &osnoise_data.stop_tracing,
2614 .max = NULL,
2615 .min = NULL,
2616 };
2617
2618 /*
2619 * osnoise/stop_tracing_total_us: no limit.
2620 */
2621 static struct trace_min_max_param osnoise_stop_tracing_total = {
2622 .lock = &interface_lock,
2623 .val = &osnoise_data.stop_tracing_total,
2624 .max = NULL,
2625 .min = NULL,
2626 };
2627
2628 #ifdef CONFIG_TIMERLAT_TRACER
2629 /*
2630 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2631 * latency is higher than val.
2632 */
2633 static struct trace_min_max_param osnoise_print_stack = {
2634 .lock = &interface_lock,
2635 .val = &osnoise_data.print_stack,
2636 .max = NULL,
2637 .min = NULL,
2638 };
2639
2640 /*
2641 * osnoise/timerlat_period: min 100 us, max 1 s
2642 */
2643 static u64 timerlat_min_period = 100;
2644 static u64 timerlat_max_period = 1000000;
2645 static struct trace_min_max_param timerlat_period = {
2646 .lock = &interface_lock,
2647 .val = &osnoise_data.timerlat_period,
2648 .max = &timerlat_max_period,
2649 .min = &timerlat_min_period,
2650 };
2651
2652 static const struct file_operations timerlat_fd_fops = {
2653 .open = timerlat_fd_open,
2654 .read = timerlat_fd_read,
2655 .release = timerlat_fd_release,
2656 .llseek = generic_file_llseek,
2657 };
2658 #endif
2659
2660 static const struct file_operations cpus_fops = {
2661 .open = tracing_open_generic,
2662 .read = osnoise_cpus_read,
2663 .write = osnoise_cpus_write,
2664 .llseek = generic_file_llseek,
2665 };
2666
2667 static const struct file_operations osnoise_options_fops = {
2668 .open = osnoise_options_open,
2669 .read = seq_read,
2670 .llseek = seq_lseek,
2671 .release = seq_release,
2672 .write = osnoise_options_write
2673 };
2674
2675 #ifdef CONFIG_TIMERLAT_TRACER
2676 #ifdef CONFIG_STACKTRACE
init_timerlat_stack_tracefs(struct dentry * top_dir)2677 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2678 {
2679 struct dentry *tmp;
2680
2681 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2682 &osnoise_print_stack, &trace_min_max_fops);
2683 if (!tmp)
2684 return -ENOMEM;
2685
2686 return 0;
2687 }
2688 #else /* CONFIG_STACKTRACE */
init_timerlat_stack_tracefs(struct dentry * top_dir)2689 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2690 {
2691 return 0;
2692 }
2693 #endif /* CONFIG_STACKTRACE */
2694
osnoise_create_cpu_timerlat_fd(struct dentry * top_dir)2695 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2696 {
2697 struct dentry *timerlat_fd;
2698 struct dentry *per_cpu;
2699 struct dentry *cpu_dir;
2700 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2701 long cpu;
2702
2703 /*
2704 * Why not using tracing instance per_cpu/ dir?
2705 *
2706 * Because osnoise/timerlat have a single workload, having
2707 * multiple files like these are wast of memory.
2708 */
2709 per_cpu = tracefs_create_dir("per_cpu", top_dir);
2710 if (!per_cpu)
2711 return -ENOMEM;
2712
2713 for_each_possible_cpu(cpu) {
2714 snprintf(cpu_str, 30, "cpu%ld", cpu);
2715 cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2716 if (!cpu_dir)
2717 goto out_clean;
2718
2719 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2720 cpu_dir, NULL, &timerlat_fd_fops);
2721 if (!timerlat_fd)
2722 goto out_clean;
2723
2724 /* Record the CPU */
2725 d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2726 }
2727
2728 return 0;
2729
2730 out_clean:
2731 tracefs_remove(per_cpu);
2732 return -ENOMEM;
2733 }
2734
2735 /*
2736 * init_timerlat_tracefs - A function to initialize the timerlat interface files
2737 */
init_timerlat_tracefs(struct dentry * top_dir)2738 static int init_timerlat_tracefs(struct dentry *top_dir)
2739 {
2740 struct dentry *tmp;
2741 int retval;
2742
2743 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2744 &timerlat_period, &trace_min_max_fops);
2745 if (!tmp)
2746 return -ENOMEM;
2747
2748 retval = osnoise_create_cpu_timerlat_fd(top_dir);
2749 if (retval)
2750 return retval;
2751
2752 return init_timerlat_stack_tracefs(top_dir);
2753 }
2754 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracefs(struct dentry * top_dir)2755 static int init_timerlat_tracefs(struct dentry *top_dir)
2756 {
2757 return 0;
2758 }
2759 #endif /* CONFIG_TIMERLAT_TRACER */
2760
2761 /*
2762 * init_tracefs - A function to initialize the tracefs interface files
2763 *
2764 * This function creates entries in tracefs for "osnoise" and "timerlat".
2765 * It creates these directories in the tracing directory, and within that
2766 * directory the use can change and view the configs.
2767 */
init_tracefs(void)2768 static int init_tracefs(void)
2769 {
2770 struct dentry *top_dir;
2771 struct dentry *tmp;
2772 int ret;
2773
2774 ret = tracing_init_dentry();
2775 if (ret)
2776 return -ENOMEM;
2777
2778 top_dir = tracefs_create_dir("osnoise", NULL);
2779 if (!top_dir)
2780 return 0;
2781
2782 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2783 &osnoise_period, &trace_min_max_fops);
2784 if (!tmp)
2785 goto err;
2786
2787 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2788 &osnoise_runtime, &trace_min_max_fops);
2789 if (!tmp)
2790 goto err;
2791
2792 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2793 &osnoise_stop_tracing_in, &trace_min_max_fops);
2794 if (!tmp)
2795 goto err;
2796
2797 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2798 &osnoise_stop_tracing_total, &trace_min_max_fops);
2799 if (!tmp)
2800 goto err;
2801
2802 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2803 if (!tmp)
2804 goto err;
2805
2806 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2807 &osnoise_options_fops);
2808 if (!tmp)
2809 goto err;
2810
2811 ret = init_timerlat_tracefs(top_dir);
2812 if (ret)
2813 goto err;
2814
2815 return 0;
2816
2817 err:
2818 tracefs_remove(top_dir);
2819 return -ENOMEM;
2820 }
2821
osnoise_hook_events(void)2822 static int osnoise_hook_events(void)
2823 {
2824 int retval;
2825
2826 /*
2827 * Trace is already hooked, we are re-enabling from
2828 * a stop_tracing_*.
2829 */
2830 if (trace_osnoise_callback_enabled)
2831 return 0;
2832
2833 retval = hook_irq_events();
2834 if (retval)
2835 return -EINVAL;
2836
2837 retval = hook_softirq_events();
2838 if (retval)
2839 goto out_unhook_irq;
2840
2841 retval = hook_thread_events();
2842 /*
2843 * All fine!
2844 */
2845 if (!retval)
2846 return 0;
2847
2848 unhook_softirq_events();
2849 out_unhook_irq:
2850 unhook_irq_events();
2851 return -EINVAL;
2852 }
2853
osnoise_unhook_events(void)2854 static void osnoise_unhook_events(void)
2855 {
2856 unhook_thread_events();
2857 unhook_softirq_events();
2858 unhook_irq_events();
2859 }
2860
2861 /*
2862 * osnoise_workload_start - start the workload and hook to events
2863 */
osnoise_workload_start(void)2864 static int osnoise_workload_start(void)
2865 {
2866 int retval;
2867
2868 /*
2869 * Instances need to be registered after calling workload
2870 * start. Hence, if there is already an instance, the
2871 * workload was already registered. Otherwise, this
2872 * code is on the way to register the first instance,
2873 * and the workload will start.
2874 */
2875 if (osnoise_has_registered_instances())
2876 return 0;
2877
2878 osn_var_reset_all();
2879
2880 retval = osnoise_hook_events();
2881 if (retval)
2882 return retval;
2883
2884 /*
2885 * Make sure that ftrace_nmi_enter/exit() see reset values
2886 * before enabling trace_osnoise_callback_enabled.
2887 */
2888 barrier();
2889 trace_osnoise_callback_enabled = true;
2890
2891 retval = start_per_cpu_kthreads();
2892 if (retval) {
2893 trace_osnoise_callback_enabled = false;
2894 /*
2895 * Make sure that ftrace_nmi_enter/exit() see
2896 * trace_osnoise_callback_enabled as false before continuing.
2897 */
2898 barrier();
2899
2900 osnoise_unhook_events();
2901 return retval;
2902 }
2903
2904 return 0;
2905 }
2906
2907 /*
2908 * osnoise_workload_stop - stop the workload and unhook the events
2909 */
osnoise_workload_stop(void)2910 static void osnoise_workload_stop(void)
2911 {
2912 /*
2913 * Instances need to be unregistered before calling
2914 * stop. Hence, if there is a registered instance, more
2915 * than one instance is running, and the workload will not
2916 * yet stop. Otherwise, this code is on the way to disable
2917 * the last instance, and the workload can stop.
2918 */
2919 if (osnoise_has_registered_instances())
2920 return;
2921
2922 /*
2923 * If callbacks were already disabled in a previous stop
2924 * call, there is no need to disable then again.
2925 *
2926 * For instance, this happens when tracing is stopped via:
2927 * echo 0 > tracing_on
2928 * echo nop > current_tracer.
2929 */
2930 if (!trace_osnoise_callback_enabled)
2931 return;
2932
2933 trace_osnoise_callback_enabled = false;
2934 /*
2935 * Make sure that ftrace_nmi_enter/exit() see
2936 * trace_osnoise_callback_enabled as false before continuing.
2937 */
2938 barrier();
2939
2940 stop_per_cpu_kthreads();
2941
2942 osnoise_unhook_events();
2943 }
2944
osnoise_tracer_start(struct trace_array * tr)2945 static void osnoise_tracer_start(struct trace_array *tr)
2946 {
2947 int retval;
2948
2949 /*
2950 * If the instance is already registered, there is no need to
2951 * register it again.
2952 */
2953 if (osnoise_instance_registered(tr))
2954 return;
2955
2956 retval = osnoise_workload_start();
2957 if (retval)
2958 pr_err(BANNER "Error starting osnoise tracer\n");
2959
2960 osnoise_register_instance(tr);
2961 }
2962
osnoise_tracer_stop(struct trace_array * tr)2963 static void osnoise_tracer_stop(struct trace_array *tr)
2964 {
2965 osnoise_unregister_instance(tr);
2966 osnoise_workload_stop();
2967 }
2968
osnoise_tracer_init(struct trace_array * tr)2969 static int osnoise_tracer_init(struct trace_array *tr)
2970 {
2971 /*
2972 * Only allow osnoise tracer if timerlat tracer is not running
2973 * already.
2974 */
2975 if (timerlat_enabled())
2976 return -EBUSY;
2977
2978 tr->max_latency = 0;
2979
2980 osnoise_tracer_start(tr);
2981 return 0;
2982 }
2983
osnoise_tracer_reset(struct trace_array * tr)2984 static void osnoise_tracer_reset(struct trace_array *tr)
2985 {
2986 osnoise_tracer_stop(tr);
2987 }
2988
2989 static struct tracer osnoise_tracer __read_mostly = {
2990 .name = "osnoise",
2991 .init = osnoise_tracer_init,
2992 .reset = osnoise_tracer_reset,
2993 .start = osnoise_tracer_start,
2994 .stop = osnoise_tracer_stop,
2995 .print_header = print_osnoise_headers,
2996 .allow_instances = true,
2997 };
2998
2999 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_tracer_start(struct trace_array * tr)3000 static void timerlat_tracer_start(struct trace_array *tr)
3001 {
3002 int retval;
3003
3004 /*
3005 * If the instance is already registered, there is no need to
3006 * register it again.
3007 */
3008 if (osnoise_instance_registered(tr))
3009 return;
3010
3011 retval = osnoise_workload_start();
3012 if (retval)
3013 pr_err(BANNER "Error starting timerlat tracer\n");
3014
3015 osnoise_register_instance(tr);
3016
3017 return;
3018 }
3019
timerlat_tracer_stop(struct trace_array * tr)3020 static void timerlat_tracer_stop(struct trace_array *tr)
3021 {
3022 int cpu;
3023
3024 osnoise_unregister_instance(tr);
3025
3026 /*
3027 * Instruct the threads to stop only if this is the last instance.
3028 */
3029 if (!osnoise_has_registered_instances()) {
3030 for_each_online_cpu(cpu)
3031 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3032 }
3033
3034 osnoise_workload_stop();
3035 }
3036
timerlat_tracer_init(struct trace_array * tr)3037 static int timerlat_tracer_init(struct trace_array *tr)
3038 {
3039 /*
3040 * Only allow timerlat tracer if osnoise tracer is not running already.
3041 */
3042 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3043 return -EBUSY;
3044
3045 /*
3046 * If this is the first instance, set timerlat_tracer to block
3047 * osnoise tracer start.
3048 */
3049 if (!osnoise_has_registered_instances())
3050 osnoise_data.timerlat_tracer = 1;
3051
3052 tr->max_latency = 0;
3053 timerlat_tracer_start(tr);
3054
3055 return 0;
3056 }
3057
timerlat_tracer_reset(struct trace_array * tr)3058 static void timerlat_tracer_reset(struct trace_array *tr)
3059 {
3060 timerlat_tracer_stop(tr);
3061
3062 /*
3063 * If this is the last instance, reset timerlat_tracer allowing
3064 * osnoise to be started.
3065 */
3066 if (!osnoise_has_registered_instances())
3067 osnoise_data.timerlat_tracer = 0;
3068 }
3069
3070 static struct tracer timerlat_tracer __read_mostly = {
3071 .name = "timerlat",
3072 .init = timerlat_tracer_init,
3073 .reset = timerlat_tracer_reset,
3074 .start = timerlat_tracer_start,
3075 .stop = timerlat_tracer_stop,
3076 .print_header = print_timerlat_headers,
3077 .allow_instances = true,
3078 };
3079
init_timerlat_tracer(void)3080 __init static int init_timerlat_tracer(void)
3081 {
3082 return register_tracer(&timerlat_tracer);
3083 }
3084 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracer(void)3085 __init static int init_timerlat_tracer(void)
3086 {
3087 return 0;
3088 }
3089 #endif /* CONFIG_TIMERLAT_TRACER */
3090
init_osnoise_tracer(void)3091 __init static int init_osnoise_tracer(void)
3092 {
3093 int ret;
3094
3095 mutex_init(&interface_lock);
3096
3097 cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3098
3099 ret = register_tracer(&osnoise_tracer);
3100 if (ret) {
3101 pr_err(BANNER "Error registering osnoise!\n");
3102 return ret;
3103 }
3104
3105 ret = init_timerlat_tracer();
3106 if (ret) {
3107 pr_err(BANNER "Error registering timerlat!\n");
3108 return ret;
3109 }
3110
3111 osnoise_init_hotplug_support();
3112
3113 INIT_LIST_HEAD_RCU(&osnoise_instances);
3114
3115 init_tracefs();
3116
3117 return 0;
3118 }
3119 late_initcall(init_osnoise_tracer);
3120