1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5 *
6 * Based on "hwlat_detector" tracer by:
7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9 * With feedback from Clark Williams <williams@redhat.com>
10 *
11 * And also based on the rtsl tracer presented on:
12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15 *
16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17 */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include <linux/string.h>
28 #include "trace.h"
29
30 #ifdef CONFIG_X86_LOCAL_APIC
31 #include <asm/trace/irq_vectors.h>
32 #undef TRACE_INCLUDE_PATH
33 #undef TRACE_INCLUDE_FILE
34 #endif /* CONFIG_X86_LOCAL_APIC */
35
36 #include <trace/events/irq.h>
37 #include <trace/events/sched.h>
38
39 #define CREATE_TRACE_POINTS
40 #include <trace/events/osnoise.h>
41
42 /*
43 * Default values.
44 */
45 #define BANNER "osnoise: "
46 #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */
47 #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */
48
49 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */
50 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */
51
52 /*
53 * osnoise/options entries.
54 */
55 enum osnoise_options_index {
56 OSN_DEFAULTS = 0,
57 OSN_WORKLOAD,
58 OSN_PANIC_ON_STOP,
59 OSN_PREEMPT_DISABLE,
60 OSN_IRQ_DISABLE,
61 OSN_MAX
62 };
63
64 static const char * const osnoise_options_str[OSN_MAX] = {
65 "DEFAULTS",
66 "OSNOISE_WORKLOAD",
67 "PANIC_ON_STOP",
68 "OSNOISE_PREEMPT_DISABLE",
69 "OSNOISE_IRQ_DISABLE" };
70
71 #define OSN_DEFAULT_OPTIONS 0x2
72 static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS;
73
74 /*
75 * trace_array of the enabled osnoise/timerlat instances.
76 */
77 struct osnoise_instance {
78 struct list_head list;
79 struct trace_array *tr;
80 };
81
82 static struct list_head osnoise_instances;
83
osnoise_has_registered_instances(void)84 static bool osnoise_has_registered_instances(void)
85 {
86 return !!list_first_or_null_rcu(&osnoise_instances,
87 struct osnoise_instance,
88 list);
89 }
90
91 /*
92 * osnoise_instance_registered - check if a tr is already registered
93 */
osnoise_instance_registered(struct trace_array * tr)94 static int osnoise_instance_registered(struct trace_array *tr)
95 {
96 struct osnoise_instance *inst;
97 int found = 0;
98
99 rcu_read_lock();
100 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
101 if (inst->tr == tr)
102 found = 1;
103 }
104 rcu_read_unlock();
105
106 return found;
107 }
108
109 /*
110 * osnoise_register_instance - register a new trace instance
111 *
112 * Register a trace_array *tr in the list of instances running
113 * osnoise/timerlat tracers.
114 */
osnoise_register_instance(struct trace_array * tr)115 static int osnoise_register_instance(struct trace_array *tr)
116 {
117 struct osnoise_instance *inst;
118
119 /*
120 * register/unregister serialization is provided by trace's
121 * trace_types_lock.
122 */
123 lockdep_assert_held(&trace_types_lock);
124
125 inst = kmalloc(sizeof(*inst), GFP_KERNEL);
126 if (!inst)
127 return -ENOMEM;
128
129 INIT_LIST_HEAD_RCU(&inst->list);
130 inst->tr = tr;
131 list_add_tail_rcu(&inst->list, &osnoise_instances);
132
133 return 0;
134 }
135
136 /*
137 * osnoise_unregister_instance - unregister a registered trace instance
138 *
139 * Remove the trace_array *tr from the list of instances running
140 * osnoise/timerlat tracers.
141 */
osnoise_unregister_instance(struct trace_array * tr)142 static void osnoise_unregister_instance(struct trace_array *tr)
143 {
144 struct osnoise_instance *inst;
145 int found = 0;
146
147 /*
148 * register/unregister serialization is provided by trace's
149 * trace_types_lock.
150 */
151 list_for_each_entry_rcu(inst, &osnoise_instances, list,
152 lockdep_is_held(&trace_types_lock)) {
153 if (inst->tr == tr) {
154 list_del_rcu(&inst->list);
155 found = 1;
156 break;
157 }
158 }
159
160 if (!found)
161 return;
162
163 kvfree_rcu_mightsleep(inst);
164 }
165
166 /*
167 * NMI runtime info.
168 */
169 struct osn_nmi {
170 u64 count;
171 u64 delta_start;
172 };
173
174 /*
175 * IRQ runtime info.
176 */
177 struct osn_irq {
178 u64 count;
179 u64 arrival_time;
180 u64 delta_start;
181 };
182
183 #define IRQ_CONTEXT 0
184 #define THREAD_CONTEXT 1
185 #define THREAD_URET 2
186 /*
187 * sofirq runtime info.
188 */
189 struct osn_softirq {
190 u64 count;
191 u64 arrival_time;
192 u64 delta_start;
193 };
194
195 /*
196 * thread runtime info.
197 */
198 struct osn_thread {
199 u64 count;
200 u64 arrival_time;
201 u64 delta_start;
202 };
203
204 /*
205 * Runtime information: this structure saves the runtime information used by
206 * one sampling thread.
207 */
208 struct osnoise_variables {
209 struct task_struct *kthread;
210 bool sampling;
211 pid_t pid;
212 struct osn_nmi nmi;
213 struct osn_irq irq;
214 struct osn_softirq softirq;
215 struct osn_thread thread;
216 local_t int_counter;
217 };
218
219 /*
220 * Per-cpu runtime information.
221 */
222 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
223
224 /*
225 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
226 */
this_cpu_osn_var(void)227 static inline struct osnoise_variables *this_cpu_osn_var(void)
228 {
229 return this_cpu_ptr(&per_cpu_osnoise_var);
230 }
231
232 /*
233 * Protect the interface.
234 */
235 static struct mutex interface_lock;
236
237 #ifdef CONFIG_TIMERLAT_TRACER
238 /*
239 * Runtime information for the timer mode.
240 */
241 struct timerlat_variables {
242 struct task_struct *kthread;
243 struct hrtimer timer;
244 u64 rel_period;
245 u64 abs_period;
246 bool tracing_thread;
247 u64 count;
248 bool uthread_migrate;
249 };
250
251 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
252
253 /*
254 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
255 */
this_cpu_tmr_var(void)256 static inline struct timerlat_variables *this_cpu_tmr_var(void)
257 {
258 return this_cpu_ptr(&per_cpu_timerlat_var);
259 }
260
261 /*
262 * tlat_var_reset - Reset the values of the given timerlat_variables
263 */
tlat_var_reset(void)264 static inline void tlat_var_reset(void)
265 {
266 struct timerlat_variables *tlat_var;
267 int cpu;
268
269 /* Synchronize with the timerlat interfaces */
270 mutex_lock(&interface_lock);
271 /*
272 * So far, all the values are initialized as 0, so
273 * zeroing the structure is perfect.
274 */
275 for_each_online_cpu(cpu) {
276 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
277 if (tlat_var->kthread)
278 hrtimer_cancel(&tlat_var->timer);
279 memset(tlat_var, 0, sizeof(*tlat_var));
280 }
281 mutex_unlock(&interface_lock);
282 }
283 #else /* CONFIG_TIMERLAT_TRACER */
284 #define tlat_var_reset() do {} while (0)
285 #endif /* CONFIG_TIMERLAT_TRACER */
286
287 /*
288 * osn_var_reset - Reset the values of the given osnoise_variables
289 */
osn_var_reset(void)290 static inline void osn_var_reset(void)
291 {
292 struct osnoise_variables *osn_var;
293 int cpu;
294
295 /*
296 * So far, all the values are initialized as 0, so
297 * zeroing the structure is perfect.
298 */
299 for_each_online_cpu(cpu) {
300 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
301 memset(osn_var, 0, sizeof(*osn_var));
302 }
303 }
304
305 /*
306 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
307 */
osn_var_reset_all(void)308 static inline void osn_var_reset_all(void)
309 {
310 osn_var_reset();
311 tlat_var_reset();
312 }
313
314 /*
315 * Tells NMIs to call back to the osnoise tracer to record timestamps.
316 */
317 bool trace_osnoise_callback_enabled;
318
319 /*
320 * Tracer data.
321 */
322 static struct osnoise_data {
323 u64 sample_period; /* total sampling period */
324 u64 sample_runtime; /* active sampling portion of period */
325 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */
326 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */
327 #ifdef CONFIG_TIMERLAT_TRACER
328 u64 timerlat_period; /* timerlat period */
329 u64 print_stack; /* print IRQ stack if total > */
330 int timerlat_tracer; /* timerlat tracer */
331 #endif
332 bool tainted; /* infor users and developers about a problem */
333 } osnoise_data = {
334 .sample_period = DEFAULT_SAMPLE_PERIOD,
335 .sample_runtime = DEFAULT_SAMPLE_RUNTIME,
336 .stop_tracing = 0,
337 .stop_tracing_total = 0,
338 #ifdef CONFIG_TIMERLAT_TRACER
339 .print_stack = 0,
340 .timerlat_period = DEFAULT_TIMERLAT_PERIOD,
341 .timerlat_tracer = 0,
342 #endif
343 };
344
345 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_enabled(void)346 static inline bool timerlat_enabled(void)
347 {
348 return osnoise_data.timerlat_tracer;
349 }
350
timerlat_softirq_exit(struct osnoise_variables * osn_var)351 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
352 {
353 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
354 /*
355 * If the timerlat is enabled, but the irq handler did
356 * not run yet enabling timerlat_tracer, do not trace.
357 */
358 if (!tlat_var->tracing_thread) {
359 osn_var->softirq.arrival_time = 0;
360 osn_var->softirq.delta_start = 0;
361 return 0;
362 }
363 return 1;
364 }
365
timerlat_thread_exit(struct osnoise_variables * osn_var)366 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
367 {
368 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
369 /*
370 * If the timerlat is enabled, but the irq handler did
371 * not run yet enabling timerlat_tracer, do not trace.
372 */
373 if (!tlat_var->tracing_thread) {
374 osn_var->thread.delta_start = 0;
375 osn_var->thread.arrival_time = 0;
376 return 0;
377 }
378 return 1;
379 }
380 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_enabled(void)381 static inline bool timerlat_enabled(void)
382 {
383 return false;
384 }
385
timerlat_softirq_exit(struct osnoise_variables * osn_var)386 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
387 {
388 return 1;
389 }
timerlat_thread_exit(struct osnoise_variables * osn_var)390 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
391 {
392 return 1;
393 }
394 #endif
395
396 #ifdef CONFIG_PREEMPT_RT
397 /*
398 * Print the osnoise header info.
399 */
print_osnoise_headers(struct seq_file * s)400 static void print_osnoise_headers(struct seq_file *s)
401 {
402 if (osnoise_data.tainted)
403 seq_puts(s, "# osnoise is tainted!\n");
404
405 seq_puts(s, "# _-------=> irqs-off\n");
406 seq_puts(s, "# / _------=> need-resched\n");
407 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
408 seq_puts(s, "# || / _----=> hardirq/softirq\n");
409 seq_puts(s, "# ||| / _---=> preempt-depth\n");
410 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
411 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
412
413 seq_puts(s, "# |||||| / ");
414 seq_puts(s, " MAX\n");
415
416 seq_puts(s, "# ||||| / ");
417 seq_puts(s, " SINGLE Interference counters:\n");
418
419 seq_puts(s, "# ||||||| RUNTIME ");
420 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
421
422 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US ");
423 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
424
425 seq_puts(s, "# | | | ||||||| | | ");
426 seq_puts(s, " | | | | | | | |\n");
427 }
428 #else /* CONFIG_PREEMPT_RT */
print_osnoise_headers(struct seq_file * s)429 static void print_osnoise_headers(struct seq_file *s)
430 {
431 if (osnoise_data.tainted)
432 seq_puts(s, "# osnoise is tainted!\n");
433
434 seq_puts(s, "# _-----=> irqs-off\n");
435 seq_puts(s, "# / _----=> need-resched\n");
436 seq_puts(s, "# | / _---=> hardirq/softirq\n");
437 seq_puts(s, "# || / _--=> preempt-depth\n");
438 seq_puts(s, "# ||| / _-=> migrate-disable ");
439 seq_puts(s, " MAX\n");
440 seq_puts(s, "# |||| / delay ");
441 seq_puts(s, " SINGLE Interference counters:\n");
442
443 seq_puts(s, "# ||||| RUNTIME ");
444 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
445
446 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US ");
447 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
448
449 seq_puts(s, "# | | | ||||| | | ");
450 seq_puts(s, " | | | | | | | |\n");
451 }
452 #endif /* CONFIG_PREEMPT_RT */
453
454 /*
455 * osnoise_taint - report an osnoise error.
456 */
457 #define osnoise_taint(msg) ({ \
458 struct osnoise_instance *inst; \
459 struct trace_buffer *buffer; \
460 \
461 rcu_read_lock(); \
462 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \
463 buffer = inst->tr->array_buffer.buffer; \
464 trace_array_printk_buf(buffer, _THIS_IP_, msg); \
465 } \
466 rcu_read_unlock(); \
467 osnoise_data.tainted = true; \
468 })
469
470 /*
471 * Record an osnoise_sample into the tracer buffer.
472 */
473 static void
__record_osnoise_sample(struct osnoise_sample * sample,struct trace_buffer * buffer)474 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
475 {
476 struct ring_buffer_event *event;
477 struct osnoise_entry *entry;
478
479 event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
480 tracing_gen_ctx());
481 if (!event)
482 return;
483 entry = ring_buffer_event_data(event);
484 entry->runtime = sample->runtime;
485 entry->noise = sample->noise;
486 entry->max_sample = sample->max_sample;
487 entry->hw_count = sample->hw_count;
488 entry->nmi_count = sample->nmi_count;
489 entry->irq_count = sample->irq_count;
490 entry->softirq_count = sample->softirq_count;
491 entry->thread_count = sample->thread_count;
492
493 trace_buffer_unlock_commit_nostack(buffer, event);
494 }
495
496 /*
497 * Record an osnoise_sample on all osnoise instances and fire trace event.
498 */
record_osnoise_sample(struct osnoise_sample * sample)499 static void record_osnoise_sample(struct osnoise_sample *sample)
500 {
501 struct osnoise_instance *inst;
502 struct trace_buffer *buffer;
503
504 trace_osnoise_sample(sample);
505
506 rcu_read_lock();
507 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
508 buffer = inst->tr->array_buffer.buffer;
509 __record_osnoise_sample(sample, buffer);
510 }
511 rcu_read_unlock();
512 }
513
514 #ifdef CONFIG_TIMERLAT_TRACER
515 /*
516 * Print the timerlat header info.
517 */
518 #ifdef CONFIG_PREEMPT_RT
print_timerlat_headers(struct seq_file * s)519 static void print_timerlat_headers(struct seq_file *s)
520 {
521 seq_puts(s, "# _-------=> irqs-off\n");
522 seq_puts(s, "# / _------=> need-resched\n");
523 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
524 seq_puts(s, "# || / _----=> hardirq/softirq\n");
525 seq_puts(s, "# ||| / _---=> preempt-depth\n");
526 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
527 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
528 seq_puts(s, "# |||||| /\n");
529 seq_puts(s, "# ||||||| ACTIVATION\n");
530 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID ");
531 seq_puts(s, " CONTEXT LATENCY\n");
532 seq_puts(s, "# | | | ||||||| | | ");
533 seq_puts(s, " | |\n");
534 }
535 #else /* CONFIG_PREEMPT_RT */
print_timerlat_headers(struct seq_file * s)536 static void print_timerlat_headers(struct seq_file *s)
537 {
538 seq_puts(s, "# _-----=> irqs-off\n");
539 seq_puts(s, "# / _----=> need-resched\n");
540 seq_puts(s, "# | / _---=> hardirq/softirq\n");
541 seq_puts(s, "# || / _--=> preempt-depth\n");
542 seq_puts(s, "# ||| / _-=> migrate-disable\n");
543 seq_puts(s, "# |||| / delay\n");
544 seq_puts(s, "# ||||| ACTIVATION\n");
545 seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID ");
546 seq_puts(s, " CONTEXT LATENCY\n");
547 seq_puts(s, "# | | | ||||| | | ");
548 seq_puts(s, " | |\n");
549 }
550 #endif /* CONFIG_PREEMPT_RT */
551
552 static void
__record_timerlat_sample(struct timerlat_sample * sample,struct trace_buffer * buffer)553 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
554 {
555 struct ring_buffer_event *event;
556 struct timerlat_entry *entry;
557
558 event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
559 tracing_gen_ctx());
560 if (!event)
561 return;
562 entry = ring_buffer_event_data(event);
563 entry->seqnum = sample->seqnum;
564 entry->context = sample->context;
565 entry->timer_latency = sample->timer_latency;
566
567 trace_buffer_unlock_commit_nostack(buffer, event);
568 }
569
570 /*
571 * Record an timerlat_sample into the tracer buffer.
572 */
record_timerlat_sample(struct timerlat_sample * sample)573 static void record_timerlat_sample(struct timerlat_sample *sample)
574 {
575 struct osnoise_instance *inst;
576 struct trace_buffer *buffer;
577
578 trace_timerlat_sample(sample);
579
580 rcu_read_lock();
581 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
582 buffer = inst->tr->array_buffer.buffer;
583 __record_timerlat_sample(sample, buffer);
584 }
585 rcu_read_unlock();
586 }
587
588 #ifdef CONFIG_STACKTRACE
589
590 #define MAX_CALLS 256
591
592 /*
593 * Stack trace will take place only at IRQ level, so, no need
594 * to control nesting here.
595 */
596 struct trace_stack {
597 int stack_size;
598 int nr_entries;
599 unsigned long calls[MAX_CALLS];
600 };
601
602 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
603
604 /*
605 * timerlat_save_stack - save a stack trace without printing
606 *
607 * Save the current stack trace without printing. The
608 * stack will be printed later, after the end of the measurement.
609 */
timerlat_save_stack(int skip)610 static void timerlat_save_stack(int skip)
611 {
612 unsigned int size, nr_entries;
613 struct trace_stack *fstack;
614
615 fstack = this_cpu_ptr(&trace_stack);
616
617 size = ARRAY_SIZE(fstack->calls);
618
619 nr_entries = stack_trace_save(fstack->calls, size, skip);
620
621 fstack->stack_size = nr_entries * sizeof(unsigned long);
622 fstack->nr_entries = nr_entries;
623
624 return;
625
626 }
627
628 static void
__timerlat_dump_stack(struct trace_buffer * buffer,struct trace_stack * fstack,unsigned int size)629 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
630 {
631 struct ring_buffer_event *event;
632 struct stack_entry *entry;
633
634 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
635 tracing_gen_ctx());
636 if (!event)
637 return;
638
639 entry = ring_buffer_event_data(event);
640
641 entry->size = fstack->nr_entries;
642 memcpy(&entry->caller, fstack->calls, size);
643
644 trace_buffer_unlock_commit_nostack(buffer, event);
645 }
646
647 /*
648 * timerlat_dump_stack - dump a stack trace previously saved
649 */
timerlat_dump_stack(u64 latency)650 static void timerlat_dump_stack(u64 latency)
651 {
652 struct osnoise_instance *inst;
653 struct trace_buffer *buffer;
654 struct trace_stack *fstack;
655 unsigned int size;
656
657 /*
658 * trace only if latency > print_stack config, if enabled.
659 */
660 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
661 return;
662
663 preempt_disable_notrace();
664 fstack = this_cpu_ptr(&trace_stack);
665 size = fstack->stack_size;
666
667 rcu_read_lock();
668 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
669 buffer = inst->tr->array_buffer.buffer;
670 __timerlat_dump_stack(buffer, fstack, size);
671
672 }
673 rcu_read_unlock();
674 preempt_enable_notrace();
675 }
676 #else /* CONFIG_STACKTRACE */
677 #define timerlat_dump_stack(u64 latency) do {} while (0)
678 #define timerlat_save_stack(a) do {} while (0)
679 #endif /* CONFIG_STACKTRACE */
680 #endif /* CONFIG_TIMERLAT_TRACER */
681
682 /*
683 * Macros to encapsulate the time capturing infrastructure.
684 */
685 #define time_get() trace_clock_local()
686 #define time_to_us(x) div_u64(x, 1000)
687 #define time_sub(a, b) ((a) - (b))
688
689 /*
690 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
691 *
692 * If an IRQ is preempted by an NMI, its delta_start is pushed forward
693 * to discount the NMI interference.
694 *
695 * See get_int_safe_duration().
696 */
697 static inline void
cond_move_irq_delta_start(struct osnoise_variables * osn_var,u64 duration)698 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
699 {
700 if (osn_var->irq.delta_start)
701 osn_var->irq.delta_start += duration;
702 }
703
704 #ifndef CONFIG_PREEMPT_RT
705 /*
706 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
707 *
708 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
709 * forward to discount the interference.
710 *
711 * See get_int_safe_duration().
712 */
713 static inline void
cond_move_softirq_delta_start(struct osnoise_variables * osn_var,u64 duration)714 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
715 {
716 if (osn_var->softirq.delta_start)
717 osn_var->softirq.delta_start += duration;
718 }
719 #else /* CONFIG_PREEMPT_RT */
720 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
721 #endif
722
723 /*
724 * cond_move_thread_delta_start - Forward the delta_start of a running thread
725 *
726 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
727 * is pushed forward to discount the interference.
728 *
729 * See get_int_safe_duration().
730 */
731 static inline void
cond_move_thread_delta_start(struct osnoise_variables * osn_var,u64 duration)732 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
733 {
734 if (osn_var->thread.delta_start)
735 osn_var->thread.delta_start += duration;
736 }
737
738 /*
739 * get_int_safe_duration - Get the duration of a window
740 *
741 * The irq, softirq and thread varaibles need to have its duration without
742 * the interference from higher priority interrupts. Instead of keeping a
743 * variable to discount the interrupt interference from these variables, the
744 * starting time of these variables are pushed forward with the interrupt's
745 * duration. In this way, a single variable is used to:
746 *
747 * - Know if a given window is being measured.
748 * - Account its duration.
749 * - Discount the interference.
750 *
751 * To avoid getting inconsistent values, e.g.,:
752 *
753 * now = time_get()
754 * ---> interrupt!
755 * delta_start -= int duration;
756 * <---
757 * duration = now - delta_start;
758 *
759 * result: negative duration if the variable duration before the
760 * interrupt was smaller than the interrupt execution.
761 *
762 * A counter of interrupts is used. If the counter increased, try
763 * to capture an interference safe duration.
764 */
765 static inline s64
get_int_safe_duration(struct osnoise_variables * osn_var,u64 * delta_start)766 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
767 {
768 u64 int_counter, now;
769 s64 duration;
770
771 do {
772 int_counter = local_read(&osn_var->int_counter);
773 /* synchronize with interrupts */
774 barrier();
775
776 now = time_get();
777 duration = (now - *delta_start);
778
779 /* synchronize with interrupts */
780 barrier();
781 } while (int_counter != local_read(&osn_var->int_counter));
782
783 /*
784 * This is an evidence of race conditions that cause
785 * a value to be "discounted" too much.
786 */
787 if (duration < 0)
788 osnoise_taint("Negative duration!\n");
789
790 *delta_start = 0;
791
792 return duration;
793 }
794
795 /*
796 *
797 * set_int_safe_time - Save the current time on *time, aware of interference
798 *
799 * Get the time, taking into consideration a possible interference from
800 * higher priority interrupts.
801 *
802 * See get_int_safe_duration() for an explanation.
803 */
804 static u64
set_int_safe_time(struct osnoise_variables * osn_var,u64 * time)805 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
806 {
807 u64 int_counter;
808
809 do {
810 int_counter = local_read(&osn_var->int_counter);
811 /* synchronize with interrupts */
812 barrier();
813
814 *time = time_get();
815
816 /* synchronize with interrupts */
817 barrier();
818 } while (int_counter != local_read(&osn_var->int_counter));
819
820 return int_counter;
821 }
822
823 #ifdef CONFIG_TIMERLAT_TRACER
824 /*
825 * copy_int_safe_time - Copy *src into *desc aware of interference
826 */
827 static u64
copy_int_safe_time(struct osnoise_variables * osn_var,u64 * dst,u64 * src)828 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
829 {
830 u64 int_counter;
831
832 do {
833 int_counter = local_read(&osn_var->int_counter);
834 /* synchronize with interrupts */
835 barrier();
836
837 *dst = *src;
838
839 /* synchronize with interrupts */
840 barrier();
841 } while (int_counter != local_read(&osn_var->int_counter));
842
843 return int_counter;
844 }
845 #endif /* CONFIG_TIMERLAT_TRACER */
846
847 /*
848 * trace_osnoise_callback - NMI entry/exit callback
849 *
850 * This function is called at the entry and exit NMI code. The bool enter
851 * distinguishes between either case. This function is used to note a NMI
852 * occurrence, compute the noise caused by the NMI, and to remove the noise
853 * it is potentially causing on other interference variables.
854 */
trace_osnoise_callback(bool enter)855 void trace_osnoise_callback(bool enter)
856 {
857 struct osnoise_variables *osn_var = this_cpu_osn_var();
858 u64 duration;
859
860 if (!osn_var->sampling)
861 return;
862
863 /*
864 * Currently trace_clock_local() calls sched_clock() and the
865 * generic version is not NMI safe.
866 */
867 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
868 if (enter) {
869 osn_var->nmi.delta_start = time_get();
870 local_inc(&osn_var->int_counter);
871 } else {
872 duration = time_get() - osn_var->nmi.delta_start;
873
874 trace_nmi_noise(osn_var->nmi.delta_start, duration);
875
876 cond_move_irq_delta_start(osn_var, duration);
877 cond_move_softirq_delta_start(osn_var, duration);
878 cond_move_thread_delta_start(osn_var, duration);
879 }
880 }
881
882 if (enter)
883 osn_var->nmi.count++;
884 }
885
886 /*
887 * osnoise_trace_irq_entry - Note the starting of an IRQ
888 *
889 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
890 * it is safe to use a single variable (ons_var->irq) to save the statistics.
891 * The arrival_time is used to report... the arrival time. The delta_start
892 * is used to compute the duration at the IRQ exit handler. See
893 * cond_move_irq_delta_start().
894 */
osnoise_trace_irq_entry(int id)895 void osnoise_trace_irq_entry(int id)
896 {
897 struct osnoise_variables *osn_var = this_cpu_osn_var();
898
899 if (!osn_var->sampling)
900 return;
901 /*
902 * This value will be used in the report, but not to compute
903 * the execution time, so it is safe to get it unsafe.
904 */
905 osn_var->irq.arrival_time = time_get();
906 set_int_safe_time(osn_var, &osn_var->irq.delta_start);
907 osn_var->irq.count++;
908
909 local_inc(&osn_var->int_counter);
910 }
911
912 /*
913 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
914 *
915 * Computes the duration of the IRQ noise, and trace it. Also discounts the
916 * interference from other sources of noise could be currently being accounted.
917 */
osnoise_trace_irq_exit(int id,const char * desc)918 void osnoise_trace_irq_exit(int id, const char *desc)
919 {
920 struct osnoise_variables *osn_var = this_cpu_osn_var();
921 s64 duration;
922
923 if (!osn_var->sampling)
924 return;
925
926 duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
927 trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
928 osn_var->irq.arrival_time = 0;
929 cond_move_softirq_delta_start(osn_var, duration);
930 cond_move_thread_delta_start(osn_var, duration);
931 }
932
933 /*
934 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
935 *
936 * Used to note the starting of an IRQ occurece.
937 */
trace_irqentry_callback(void * data,int irq,struct irqaction * action)938 static void trace_irqentry_callback(void *data, int irq,
939 struct irqaction *action)
940 {
941 osnoise_trace_irq_entry(irq);
942 }
943
944 /*
945 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
946 *
947 * Used to note the end of an IRQ occurece.
948 */
trace_irqexit_callback(void * data,int irq,struct irqaction * action,int ret)949 static void trace_irqexit_callback(void *data, int irq,
950 struct irqaction *action, int ret)
951 {
952 osnoise_trace_irq_exit(irq, action->name);
953 }
954
955 /*
956 * arch specific register function.
957 */
osnoise_arch_register(void)958 int __weak osnoise_arch_register(void)
959 {
960 return 0;
961 }
962
963 /*
964 * arch specific unregister function.
965 */
osnoise_arch_unregister(void)966 void __weak osnoise_arch_unregister(void)
967 {
968 return;
969 }
970
971 /*
972 * hook_irq_events - Hook IRQ handling events
973 *
974 * This function hooks the IRQ related callbacks to the respective trace
975 * events.
976 */
hook_irq_events(void)977 static int hook_irq_events(void)
978 {
979 int ret;
980
981 ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
982 if (ret)
983 goto out_err;
984
985 ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
986 if (ret)
987 goto out_unregister_entry;
988
989 ret = osnoise_arch_register();
990 if (ret)
991 goto out_irq_exit;
992
993 return 0;
994
995 out_irq_exit:
996 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
997 out_unregister_entry:
998 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
999 out_err:
1000 return -EINVAL;
1001 }
1002
1003 /*
1004 * unhook_irq_events - Unhook IRQ handling events
1005 *
1006 * This function unhooks the IRQ related callbacks to the respective trace
1007 * events.
1008 */
unhook_irq_events(void)1009 static void unhook_irq_events(void)
1010 {
1011 osnoise_arch_unregister();
1012 unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1013 unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1014 }
1015
1016 #ifndef CONFIG_PREEMPT_RT
1017 /*
1018 * trace_softirq_entry_callback - Note the starting of a softirq
1019 *
1020 * Save the starting time of a softirq. As softirqs are non-preemptive to
1021 * other softirqs, it is safe to use a single variable (ons_var->softirq)
1022 * to save the statistics. The arrival_time is used to report... the
1023 * arrival time. The delta_start is used to compute the duration at the
1024 * softirq exit handler. See cond_move_softirq_delta_start().
1025 */
trace_softirq_entry_callback(void * data,unsigned int vec_nr)1026 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1027 {
1028 struct osnoise_variables *osn_var = this_cpu_osn_var();
1029
1030 if (!osn_var->sampling)
1031 return;
1032 /*
1033 * This value will be used in the report, but not to compute
1034 * the execution time, so it is safe to get it unsafe.
1035 */
1036 osn_var->softirq.arrival_time = time_get();
1037 set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1038 osn_var->softirq.count++;
1039
1040 local_inc(&osn_var->int_counter);
1041 }
1042
1043 /*
1044 * trace_softirq_exit_callback - Note the end of an softirq
1045 *
1046 * Computes the duration of the softirq noise, and trace it. Also discounts the
1047 * interference from other sources of noise could be currently being accounted.
1048 */
trace_softirq_exit_callback(void * data,unsigned int vec_nr)1049 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1050 {
1051 struct osnoise_variables *osn_var = this_cpu_osn_var();
1052 s64 duration;
1053
1054 if (!osn_var->sampling)
1055 return;
1056
1057 if (unlikely(timerlat_enabled()))
1058 if (!timerlat_softirq_exit(osn_var))
1059 return;
1060
1061 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1062 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1063 cond_move_thread_delta_start(osn_var, duration);
1064 osn_var->softirq.arrival_time = 0;
1065 }
1066
1067 /*
1068 * hook_softirq_events - Hook softirq handling events
1069 *
1070 * This function hooks the softirq related callbacks to the respective trace
1071 * events.
1072 */
hook_softirq_events(void)1073 static int hook_softirq_events(void)
1074 {
1075 int ret;
1076
1077 ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1078 if (ret)
1079 goto out_err;
1080
1081 ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1082 if (ret)
1083 goto out_unreg_entry;
1084
1085 return 0;
1086
1087 out_unreg_entry:
1088 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1089 out_err:
1090 return -EINVAL;
1091 }
1092
1093 /*
1094 * unhook_softirq_events - Unhook softirq handling events
1095 *
1096 * This function hooks the softirq related callbacks to the respective trace
1097 * events.
1098 */
unhook_softirq_events(void)1099 static void unhook_softirq_events(void)
1100 {
1101 unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1102 unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1103 }
1104 #else /* CONFIG_PREEMPT_RT */
1105 /*
1106 * softirq are threads on the PREEMPT_RT mode.
1107 */
hook_softirq_events(void)1108 static int hook_softirq_events(void)
1109 {
1110 return 0;
1111 }
unhook_softirq_events(void)1112 static void unhook_softirq_events(void)
1113 {
1114 }
1115 #endif
1116
1117 /*
1118 * thread_entry - Record the starting of a thread noise window
1119 *
1120 * It saves the context switch time for a noisy thread, and increments
1121 * the interference counters.
1122 */
1123 static void
thread_entry(struct osnoise_variables * osn_var,struct task_struct * t)1124 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1125 {
1126 if (!osn_var->sampling)
1127 return;
1128 /*
1129 * The arrival time will be used in the report, but not to compute
1130 * the execution time, so it is safe to get it unsafe.
1131 */
1132 osn_var->thread.arrival_time = time_get();
1133
1134 set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1135
1136 osn_var->thread.count++;
1137 local_inc(&osn_var->int_counter);
1138 }
1139
1140 /*
1141 * thread_exit - Report the end of a thread noise window
1142 *
1143 * It computes the total noise from a thread, tracing if needed.
1144 */
1145 static void
thread_exit(struct osnoise_variables * osn_var,struct task_struct * t)1146 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1147 {
1148 s64 duration;
1149
1150 if (!osn_var->sampling)
1151 return;
1152
1153 if (unlikely(timerlat_enabled()))
1154 if (!timerlat_thread_exit(osn_var))
1155 return;
1156
1157 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1158
1159 trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1160
1161 osn_var->thread.arrival_time = 0;
1162 }
1163
1164 #ifdef CONFIG_TIMERLAT_TRACER
1165 /*
1166 * osnoise_stop_exception - Stop tracing and the tracer.
1167 */
osnoise_stop_exception(char * msg,int cpu)1168 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1169 {
1170 struct osnoise_instance *inst;
1171 struct trace_array *tr;
1172
1173 rcu_read_lock();
1174 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1175 tr = inst->tr;
1176 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1177 "stop tracing hit on cpu %d due to exception: %s\n",
1178 smp_processor_id(),
1179 msg);
1180
1181 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1182 panic("tracer hit on cpu %d due to exception: %s\n",
1183 smp_processor_id(),
1184 msg);
1185
1186 tracer_tracing_off(tr);
1187 }
1188 rcu_read_unlock();
1189 }
1190
1191 /*
1192 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1193 *
1194 * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1195 * timerlat user-space thread migration.
1196 */
trace_sched_migrate_callback(void * data,struct task_struct * p,int dest_cpu)1197 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1198 {
1199 struct osnoise_variables *osn_var;
1200 long cpu = task_cpu(p);
1201
1202 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1203 if (osn_var->pid == p->pid && dest_cpu != cpu) {
1204 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1205 osnoise_taint("timerlat user-thread migrated\n");
1206 osnoise_stop_exception("timerlat user-thread migrated", cpu);
1207 }
1208 }
1209
1210 static bool monitor_enabled;
1211
register_migration_monitor(void)1212 static int register_migration_monitor(void)
1213 {
1214 int ret = 0;
1215
1216 /*
1217 * Timerlat thread migration check is only required when running timerlat in user-space.
1218 * Thus, enable callback only if timerlat is set with no workload.
1219 */
1220 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1221 if (WARN_ON_ONCE(monitor_enabled))
1222 return 0;
1223
1224 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1225 if (!ret)
1226 monitor_enabled = true;
1227 }
1228
1229 return ret;
1230 }
1231
unregister_migration_monitor(void)1232 static void unregister_migration_monitor(void)
1233 {
1234 if (!monitor_enabled)
1235 return;
1236
1237 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1238 monitor_enabled = false;
1239 }
1240 #else
register_migration_monitor(void)1241 static int register_migration_monitor(void)
1242 {
1243 return 0;
1244 }
unregister_migration_monitor(void)1245 static void unregister_migration_monitor(void) {}
1246 #endif
1247 /*
1248 * trace_sched_switch - sched:sched_switch trace event handler
1249 *
1250 * This function is hooked to the sched:sched_switch trace event, and it is
1251 * used to record the beginning and to report the end of a thread noise window.
1252 */
1253 static void
trace_sched_switch_callback(void * data,bool preempt,struct task_struct * p,struct task_struct * n,unsigned int prev_state)1254 trace_sched_switch_callback(void *data, bool preempt,
1255 struct task_struct *p,
1256 struct task_struct *n,
1257 unsigned int prev_state)
1258 {
1259 struct osnoise_variables *osn_var = this_cpu_osn_var();
1260 int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1261
1262 if ((p->pid != osn_var->pid) || !workload)
1263 thread_exit(osn_var, p);
1264
1265 if ((n->pid != osn_var->pid) || !workload)
1266 thread_entry(osn_var, n);
1267 }
1268
1269 /*
1270 * hook_thread_events - Hook the instrumentation for thread noise
1271 *
1272 * Hook the osnoise tracer callbacks to handle the noise from other
1273 * threads on the necessary kernel events.
1274 */
hook_thread_events(void)1275 static int hook_thread_events(void)
1276 {
1277 int ret;
1278
1279 ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1280 if (ret)
1281 return -EINVAL;
1282
1283 ret = register_migration_monitor();
1284 if (ret)
1285 goto out_unreg;
1286
1287 return 0;
1288
1289 out_unreg:
1290 unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1291 return -EINVAL;
1292 }
1293
1294 /*
1295 * unhook_thread_events - unhook the instrumentation for thread noise
1296 *
1297 * Unook the osnoise tracer callbacks to handle the noise from other
1298 * threads on the necessary kernel events.
1299 */
unhook_thread_events(void)1300 static void unhook_thread_events(void)
1301 {
1302 unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1303 unregister_migration_monitor();
1304 }
1305
1306 /*
1307 * save_osn_sample_stats - Save the osnoise_sample statistics
1308 *
1309 * Save the osnoise_sample statistics before the sampling phase. These
1310 * values will be used later to compute the diff betwneen the statistics
1311 * before and after the osnoise sampling.
1312 */
1313 static void
save_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1314 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1315 {
1316 s->nmi_count = osn_var->nmi.count;
1317 s->irq_count = osn_var->irq.count;
1318 s->softirq_count = osn_var->softirq.count;
1319 s->thread_count = osn_var->thread.count;
1320 }
1321
1322 /*
1323 * diff_osn_sample_stats - Compute the osnoise_sample statistics
1324 *
1325 * After a sample period, compute the difference on the osnoise_sample
1326 * statistics. The struct osnoise_sample *s contains the statistics saved via
1327 * save_osn_sample_stats() before the osnoise sampling.
1328 */
1329 static void
diff_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1330 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1331 {
1332 s->nmi_count = osn_var->nmi.count - s->nmi_count;
1333 s->irq_count = osn_var->irq.count - s->irq_count;
1334 s->softirq_count = osn_var->softirq.count - s->softirq_count;
1335 s->thread_count = osn_var->thread.count - s->thread_count;
1336 }
1337
1338 /*
1339 * osnoise_stop_tracing - Stop tracing and the tracer.
1340 */
osnoise_stop_tracing(void)1341 static __always_inline void osnoise_stop_tracing(void)
1342 {
1343 struct osnoise_instance *inst;
1344 struct trace_array *tr;
1345
1346 rcu_read_lock();
1347 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1348 tr = inst->tr;
1349 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1350 "stop tracing hit on cpu %d\n", smp_processor_id());
1351
1352 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1353 panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1354
1355 tracer_tracing_off(tr);
1356 }
1357 rcu_read_unlock();
1358 }
1359
1360 /*
1361 * osnoise_has_tracing_on - Check if there is at least one instance on
1362 */
osnoise_has_tracing_on(void)1363 static __always_inline int osnoise_has_tracing_on(void)
1364 {
1365 struct osnoise_instance *inst;
1366 int trace_is_on = 0;
1367
1368 rcu_read_lock();
1369 list_for_each_entry_rcu(inst, &osnoise_instances, list)
1370 trace_is_on += tracer_tracing_is_on(inst->tr);
1371 rcu_read_unlock();
1372
1373 return trace_is_on;
1374 }
1375
1376 /*
1377 * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1378 */
notify_new_max_latency(u64 latency)1379 static void notify_new_max_latency(u64 latency)
1380 {
1381 struct osnoise_instance *inst;
1382 struct trace_array *tr;
1383
1384 rcu_read_lock();
1385 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1386 tr = inst->tr;
1387 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1388 tr->max_latency = latency;
1389 latency_fsnotify(tr);
1390 }
1391 }
1392 rcu_read_unlock();
1393 }
1394
1395 /*
1396 * run_osnoise - Sample the time and look for osnoise
1397 *
1398 * Used to capture the time, looking for potential osnoise latency repeatedly.
1399 * Different from hwlat_detector, it is called with preemption and interrupts
1400 * enabled. This allows irqs, softirqs and threads to run, interfering on the
1401 * osnoise sampling thread, as they would do with a regular thread.
1402 */
run_osnoise(void)1403 static int run_osnoise(void)
1404 {
1405 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1406 struct osnoise_variables *osn_var = this_cpu_osn_var();
1407 u64 start, sample, last_sample;
1408 u64 last_int_count, int_count;
1409 s64 noise = 0, max_noise = 0;
1410 s64 total, last_total = 0;
1411 struct osnoise_sample s;
1412 bool disable_preemption;
1413 unsigned int threshold;
1414 u64 runtime, stop_in;
1415 u64 sum_noise = 0;
1416 int hw_count = 0;
1417 int ret = -1;
1418
1419 /*
1420 * Disabling preemption is only required if IRQs are enabled,
1421 * and the options is set on.
1422 */
1423 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1424
1425 /*
1426 * Considers the current thread as the workload.
1427 */
1428 osn_var->pid = current->pid;
1429
1430 /*
1431 * Save the current stats for the diff
1432 */
1433 save_osn_sample_stats(osn_var, &s);
1434
1435 /*
1436 * if threshold is 0, use the default value of 1 us.
1437 */
1438 threshold = tracing_thresh ? : 1000;
1439
1440 /*
1441 * Apply PREEMPT and IRQ disabled options.
1442 */
1443 if (disable_irq)
1444 local_irq_disable();
1445
1446 if (disable_preemption)
1447 preempt_disable();
1448
1449 /*
1450 * Make sure NMIs see sampling first
1451 */
1452 osn_var->sampling = true;
1453 barrier();
1454
1455 /*
1456 * Transform the *_us config to nanoseconds to avoid the
1457 * division on the main loop.
1458 */
1459 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1460 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1461
1462 /*
1463 * Start timestemp
1464 */
1465 start = time_get();
1466
1467 /*
1468 * "previous" loop.
1469 */
1470 last_int_count = set_int_safe_time(osn_var, &last_sample);
1471
1472 do {
1473 /*
1474 * Get sample!
1475 */
1476 int_count = set_int_safe_time(osn_var, &sample);
1477
1478 noise = time_sub(sample, last_sample);
1479
1480 /*
1481 * This shouldn't happen.
1482 */
1483 if (noise < 0) {
1484 osnoise_taint("negative noise!");
1485 goto out;
1486 }
1487
1488 /*
1489 * Sample runtime.
1490 */
1491 total = time_sub(sample, start);
1492
1493 /*
1494 * Check for possible overflows.
1495 */
1496 if (total < last_total) {
1497 osnoise_taint("total overflow!");
1498 break;
1499 }
1500
1501 last_total = total;
1502
1503 if (noise >= threshold) {
1504 int interference = int_count - last_int_count;
1505
1506 if (noise > max_noise)
1507 max_noise = noise;
1508
1509 if (!interference)
1510 hw_count++;
1511
1512 sum_noise += noise;
1513
1514 trace_sample_threshold(last_sample, noise, interference);
1515
1516 if (osnoise_data.stop_tracing)
1517 if (noise > stop_in)
1518 osnoise_stop_tracing();
1519 }
1520
1521 /*
1522 * In some cases, notably when running on a nohz_full CPU with
1523 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1524 * account for QSs. This will eventually cause unwarranted
1525 * noise as RCU forces preemption as the means of ending the
1526 * current grace period. We avoid this by calling
1527 * rcu_momentary_eqs(), which performs a zero duration EQS
1528 * allowing RCU to end the current grace period. This call
1529 * shouldn't be wrapped inside an RCU critical section.
1530 *
1531 * Normally QSs for other cases are handled through cond_resched().
1532 * For simplicity, however, we call rcu_momentary_eqs() for all
1533 * configurations here.
1534 */
1535 if (!disable_irq)
1536 local_irq_disable();
1537
1538 rcu_momentary_eqs();
1539
1540 if (!disable_irq)
1541 local_irq_enable();
1542
1543 /*
1544 * For the non-preemptive kernel config: let threads runs, if
1545 * they so wish, unless set not do to so.
1546 */
1547 if (!disable_irq && !disable_preemption)
1548 cond_resched();
1549
1550 last_sample = sample;
1551 last_int_count = int_count;
1552
1553 } while (total < runtime && !kthread_should_stop());
1554
1555 /*
1556 * Finish the above in the view for interrupts.
1557 */
1558 barrier();
1559
1560 osn_var->sampling = false;
1561
1562 /*
1563 * Make sure sampling data is no longer updated.
1564 */
1565 barrier();
1566
1567 /*
1568 * Return to the preemptive state.
1569 */
1570 if (disable_preemption)
1571 preempt_enable();
1572
1573 if (disable_irq)
1574 local_irq_enable();
1575
1576 /*
1577 * Save noise info.
1578 */
1579 s.noise = time_to_us(sum_noise);
1580 s.runtime = time_to_us(total);
1581 s.max_sample = time_to_us(max_noise);
1582 s.hw_count = hw_count;
1583
1584 /* Save interference stats info */
1585 diff_osn_sample_stats(osn_var, &s);
1586
1587 record_osnoise_sample(&s);
1588
1589 notify_new_max_latency(max_noise);
1590
1591 if (osnoise_data.stop_tracing_total)
1592 if (s.noise > osnoise_data.stop_tracing_total)
1593 osnoise_stop_tracing();
1594
1595 return 0;
1596 out:
1597 return ret;
1598 }
1599
1600 static struct cpumask osnoise_cpumask;
1601 static struct cpumask save_cpumask;
1602 static struct cpumask kthread_cpumask;
1603
1604 /*
1605 * osnoise_sleep - sleep until the next period
1606 */
osnoise_sleep(bool skip_period)1607 static void osnoise_sleep(bool skip_period)
1608 {
1609 u64 interval;
1610 ktime_t wake_time;
1611
1612 mutex_lock(&interface_lock);
1613 if (skip_period)
1614 interval = osnoise_data.sample_period;
1615 else
1616 interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1617 mutex_unlock(&interface_lock);
1618
1619 /*
1620 * differently from hwlat_detector, the osnoise tracer can run
1621 * without a pause because preemption is on.
1622 */
1623 if (!interval) {
1624 /* Let synchronize_rcu_tasks() make progress */
1625 cond_resched_tasks_rcu_qs();
1626 return;
1627 }
1628
1629 wake_time = ktime_add_us(ktime_get(), interval);
1630 __set_current_state(TASK_INTERRUPTIBLE);
1631
1632 while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1633 if (kthread_should_stop())
1634 break;
1635 }
1636 }
1637
1638 /*
1639 * osnoise_migration_pending - checks if the task needs to migrate
1640 *
1641 * osnoise/timerlat threads are per-cpu. If there is a pending request to
1642 * migrate the thread away from the current CPU, something bad has happened.
1643 * Play the good citizen and leave.
1644 *
1645 * Returns 0 if it is safe to continue, 1 otherwise.
1646 */
osnoise_migration_pending(void)1647 static inline int osnoise_migration_pending(void)
1648 {
1649 if (!current->migration_pending)
1650 return 0;
1651
1652 /*
1653 * If migration is pending, there is a task waiting for the
1654 * tracer to enable migration. The tracer does not allow migration,
1655 * thus: taint and leave to unblock the blocked thread.
1656 */
1657 osnoise_taint("migration requested to osnoise threads, leaving.");
1658
1659 /*
1660 * Unset this thread from the threads managed by the interface.
1661 * The tracers are responsible for cleaning their env before
1662 * exiting.
1663 */
1664 mutex_lock(&interface_lock);
1665 this_cpu_osn_var()->kthread = NULL;
1666 cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
1667 mutex_unlock(&interface_lock);
1668
1669 return 1;
1670 }
1671
1672 /*
1673 * osnoise_main - The osnoise detection kernel thread
1674 *
1675 * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1676 * every period.
1677 */
osnoise_main(void * data)1678 static int osnoise_main(void *data)
1679 {
1680 unsigned long flags;
1681
1682 /*
1683 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1684 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1685 *
1686 * To work around this limitation, disable migration and remove the
1687 * flag.
1688 */
1689 migrate_disable();
1690 raw_spin_lock_irqsave(¤t->pi_lock, flags);
1691 current->flags &= ~(PF_NO_SETAFFINITY);
1692 raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
1693
1694 while (!kthread_should_stop()) {
1695 if (osnoise_migration_pending())
1696 break;
1697
1698 /* skip a period if tracing is off on all instances */
1699 if (!osnoise_has_tracing_on()) {
1700 osnoise_sleep(true);
1701 continue;
1702 }
1703
1704 run_osnoise();
1705 osnoise_sleep(false);
1706 }
1707
1708 migrate_enable();
1709 return 0;
1710 }
1711
1712 #ifdef CONFIG_TIMERLAT_TRACER
1713 /*
1714 * timerlat_irq - hrtimer handler for timerlat.
1715 */
timerlat_irq(struct hrtimer * timer)1716 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1717 {
1718 struct osnoise_variables *osn_var = this_cpu_osn_var();
1719 struct timerlat_variables *tlat;
1720 struct timerlat_sample s;
1721 u64 now;
1722 u64 diff;
1723
1724 /*
1725 * I am not sure if the timer was armed for this CPU. So, get
1726 * the timerlat struct from the timer itself, not from this
1727 * CPU.
1728 */
1729 tlat = container_of(timer, struct timerlat_variables, timer);
1730
1731 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1732
1733 /*
1734 * Enable the osnoise: events for thread an softirq.
1735 */
1736 tlat->tracing_thread = true;
1737
1738 osn_var->thread.arrival_time = time_get();
1739
1740 /*
1741 * A hardirq is running: the timer IRQ. It is for sure preempting
1742 * a thread, and potentially preempting a softirq.
1743 *
1744 * At this point, it is not interesting to know the duration of the
1745 * preempted thread (and maybe softirq), but how much time they will
1746 * delay the beginning of the execution of the timer thread.
1747 *
1748 * To get the correct (net) delay added by the softirq, its delta_start
1749 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1750 * start of the sofitrq will be zeroed, accounting then only the time
1751 * after that.
1752 *
1753 * The thread follows the same principle. However, if a softirq is
1754 * running, the thread needs to receive the softirq delta_start. The
1755 * reason being is that the softirq will be the last to be unfolded,
1756 * resseting the thread delay to zero.
1757 *
1758 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1759 * on RT, moving the thread is enough.
1760 */
1761 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1762 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1763 &osn_var->softirq.delta_start);
1764
1765 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1766 &osn_var->irq.delta_start);
1767 } else {
1768 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1769 &osn_var->irq.delta_start);
1770 }
1771
1772 /*
1773 * Compute the current time with the expected time.
1774 */
1775 diff = now - tlat->abs_period;
1776
1777 tlat->count++;
1778 s.seqnum = tlat->count;
1779 s.timer_latency = diff;
1780 s.context = IRQ_CONTEXT;
1781
1782 record_timerlat_sample(&s);
1783
1784 if (osnoise_data.stop_tracing) {
1785 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1786
1787 /*
1788 * At this point, if stop_tracing is set and <= print_stack,
1789 * print_stack is set and would be printed in the thread handler.
1790 *
1791 * Thus, print the stack trace as it is helpful to define the
1792 * root cause of an IRQ latency.
1793 */
1794 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1795 timerlat_save_stack(0);
1796 timerlat_dump_stack(time_to_us(diff));
1797 }
1798
1799 osnoise_stop_tracing();
1800 notify_new_max_latency(diff);
1801
1802 wake_up_process(tlat->kthread);
1803
1804 return HRTIMER_NORESTART;
1805 }
1806 }
1807
1808 wake_up_process(tlat->kthread);
1809
1810 if (osnoise_data.print_stack)
1811 timerlat_save_stack(0);
1812
1813 return HRTIMER_NORESTART;
1814 }
1815
1816 /*
1817 * wait_next_period - Wait for the next period for timerlat
1818 */
wait_next_period(struct timerlat_variables * tlat)1819 static int wait_next_period(struct timerlat_variables *tlat)
1820 {
1821 ktime_t next_abs_period, now;
1822 u64 rel_period = osnoise_data.timerlat_period * 1000;
1823
1824 now = hrtimer_cb_get_time(&tlat->timer);
1825 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1826
1827 /*
1828 * Save the next abs_period.
1829 */
1830 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1831
1832 /*
1833 * If the new abs_period is in the past, skip the activation.
1834 */
1835 while (ktime_compare(now, next_abs_period) > 0) {
1836 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1837 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1838 }
1839
1840 set_current_state(TASK_INTERRUPTIBLE);
1841
1842 hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1843 schedule();
1844 return 1;
1845 }
1846
1847 /*
1848 * timerlat_main- Timerlat main
1849 */
timerlat_main(void * data)1850 static int timerlat_main(void *data)
1851 {
1852 struct osnoise_variables *osn_var = this_cpu_osn_var();
1853 struct timerlat_variables *tlat = this_cpu_tmr_var();
1854 struct timerlat_sample s;
1855 struct sched_param sp;
1856 unsigned long flags;
1857 u64 now, diff;
1858
1859 /*
1860 * Make the thread RT, that is how cyclictest is usually used.
1861 */
1862 sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1863 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1864
1865 /*
1866 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1867 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1868 *
1869 * To work around this limitation, disable migration and remove the
1870 * flag.
1871 */
1872 migrate_disable();
1873 raw_spin_lock_irqsave(¤t->pi_lock, flags);
1874 current->flags &= ~(PF_NO_SETAFFINITY);
1875 raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
1876
1877 tlat->count = 0;
1878 tlat->tracing_thread = false;
1879
1880 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1881 tlat->kthread = current;
1882 osn_var->pid = current->pid;
1883 /*
1884 * Anotate the arrival time.
1885 */
1886 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1887
1888 wait_next_period(tlat);
1889
1890 osn_var->sampling = 1;
1891
1892 while (!kthread_should_stop()) {
1893
1894 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1895 diff = now - tlat->abs_period;
1896
1897 s.seqnum = tlat->count;
1898 s.timer_latency = diff;
1899 s.context = THREAD_CONTEXT;
1900
1901 record_timerlat_sample(&s);
1902
1903 notify_new_max_latency(diff);
1904
1905 timerlat_dump_stack(time_to_us(diff));
1906
1907 tlat->tracing_thread = false;
1908 if (osnoise_data.stop_tracing_total)
1909 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1910 osnoise_stop_tracing();
1911
1912 if (osnoise_migration_pending())
1913 break;
1914
1915 wait_next_period(tlat);
1916 }
1917
1918 hrtimer_cancel(&tlat->timer);
1919 migrate_enable();
1920 return 0;
1921 }
1922 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_main(void * data)1923 static int timerlat_main(void *data)
1924 {
1925 return 0;
1926 }
1927 #endif /* CONFIG_TIMERLAT_TRACER */
1928
1929 /*
1930 * stop_kthread - stop a workload thread
1931 */
stop_kthread(unsigned int cpu)1932 static void stop_kthread(unsigned int cpu)
1933 {
1934 struct task_struct *kthread;
1935
1936 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1937 if (kthread) {
1938 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1939 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1940 kthread_stop(kthread);
1941 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1942 /*
1943 * This is a user thread waiting on the timerlat_fd. We need
1944 * to close all users, and the best way to guarantee this is
1945 * by killing the thread. NOTE: this is a purpose specific file.
1946 */
1947 kill_pid(kthread->thread_pid, SIGKILL, 1);
1948 put_task_struct(kthread);
1949 }
1950 } else {
1951 /* if no workload, just return */
1952 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1953 /*
1954 * This is set in the osnoise tracer case.
1955 */
1956 per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1957 barrier();
1958 }
1959 }
1960 }
1961
1962 /*
1963 * stop_per_cpu_kthread - Stop per-cpu threads
1964 *
1965 * Stop the osnoise sampling htread. Use this on unload and at system
1966 * shutdown.
1967 */
stop_per_cpu_kthreads(void)1968 static void stop_per_cpu_kthreads(void)
1969 {
1970 int cpu;
1971
1972 cpus_read_lock();
1973
1974 for_each_online_cpu(cpu)
1975 stop_kthread(cpu);
1976
1977 cpus_read_unlock();
1978 }
1979
1980 /*
1981 * start_kthread - Start a workload tread
1982 */
start_kthread(unsigned int cpu)1983 static int start_kthread(unsigned int cpu)
1984 {
1985 struct task_struct *kthread;
1986 void *main = osnoise_main;
1987 char comm[24];
1988
1989 /* Do not start a new thread if it is already running */
1990 if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1991 return 0;
1992
1993 if (timerlat_enabled()) {
1994 snprintf(comm, 24, "timerlat/%d", cpu);
1995 main = timerlat_main;
1996 } else {
1997 /* if no workload, just return */
1998 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1999 per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
2000 barrier();
2001 return 0;
2002 }
2003 snprintf(comm, 24, "osnoise/%d", cpu);
2004 }
2005
2006 kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2007
2008 if (IS_ERR(kthread)) {
2009 pr_err(BANNER "could not start sampling thread\n");
2010 return -ENOMEM;
2011 }
2012
2013 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2014 cpumask_set_cpu(cpu, &kthread_cpumask);
2015
2016 return 0;
2017 }
2018
2019 /*
2020 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2021 *
2022 * This starts the kernel thread that will look for osnoise on many
2023 * cpus.
2024 */
start_per_cpu_kthreads(void)2025 static int start_per_cpu_kthreads(void)
2026 {
2027 struct cpumask *current_mask = &save_cpumask;
2028 int retval = 0;
2029 int cpu;
2030
2031 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2032 if (timerlat_enabled())
2033 return 0;
2034 }
2035
2036 cpus_read_lock();
2037 /*
2038 * Run only on online CPUs in which osnoise is allowed to run.
2039 */
2040 cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2041
2042 for_each_possible_cpu(cpu) {
2043 if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2044 struct task_struct *kthread;
2045
2046 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2047 if (!WARN_ON(!kthread))
2048 kthread_stop(kthread);
2049 }
2050 }
2051
2052 for_each_cpu(cpu, current_mask) {
2053 retval = start_kthread(cpu);
2054 if (retval) {
2055 cpus_read_unlock();
2056 stop_per_cpu_kthreads();
2057 return retval;
2058 }
2059 }
2060
2061 cpus_read_unlock();
2062
2063 return retval;
2064 }
2065
2066 #ifdef CONFIG_HOTPLUG_CPU
osnoise_hotplug_workfn(struct work_struct * dummy)2067 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2068 {
2069 unsigned int cpu = smp_processor_id();
2070
2071 guard(mutex)(&trace_types_lock);
2072
2073 if (!osnoise_has_registered_instances())
2074 return;
2075
2076 guard(mutex)(&interface_lock);
2077 guard(cpus_read_lock)();
2078
2079 if (!cpu_online(cpu))
2080 return;
2081
2082 if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2083 return;
2084
2085 start_kthread(cpu);
2086 }
2087
2088 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2089
2090 /*
2091 * osnoise_cpu_init - CPU hotplug online callback function
2092 */
osnoise_cpu_init(unsigned int cpu)2093 static int osnoise_cpu_init(unsigned int cpu)
2094 {
2095 schedule_work_on(cpu, &osnoise_hotplug_work);
2096 return 0;
2097 }
2098
2099 /*
2100 * osnoise_cpu_die - CPU hotplug offline callback function
2101 */
osnoise_cpu_die(unsigned int cpu)2102 static int osnoise_cpu_die(unsigned int cpu)
2103 {
2104 stop_kthread(cpu);
2105 return 0;
2106 }
2107
osnoise_init_hotplug_support(void)2108 static void osnoise_init_hotplug_support(void)
2109 {
2110 int ret;
2111
2112 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2113 osnoise_cpu_init, osnoise_cpu_die);
2114 if (ret < 0)
2115 pr_warn(BANNER "Error to init cpu hotplug support\n");
2116
2117 return;
2118 }
2119 #else /* CONFIG_HOTPLUG_CPU */
osnoise_init_hotplug_support(void)2120 static void osnoise_init_hotplug_support(void)
2121 {
2122 return;
2123 }
2124 #endif /* CONFIG_HOTPLUG_CPU */
2125
2126 /*
2127 * seq file functions for the osnoise/options file.
2128 */
s_options_start(struct seq_file * s,loff_t * pos)2129 static void *s_options_start(struct seq_file *s, loff_t *pos)
2130 {
2131 int option = *pos;
2132
2133 mutex_lock(&interface_lock);
2134
2135 if (option >= OSN_MAX)
2136 return NULL;
2137
2138 return pos;
2139 }
2140
s_options_next(struct seq_file * s,void * v,loff_t * pos)2141 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2142 {
2143 int option = ++(*pos);
2144
2145 if (option >= OSN_MAX)
2146 return NULL;
2147
2148 return pos;
2149 }
2150
s_options_show(struct seq_file * s,void * v)2151 static int s_options_show(struct seq_file *s, void *v)
2152 {
2153 loff_t *pos = v;
2154 int option = *pos;
2155
2156 if (option == OSN_DEFAULTS) {
2157 if (osnoise_options == OSN_DEFAULT_OPTIONS)
2158 seq_printf(s, "%s", osnoise_options_str[option]);
2159 else
2160 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2161 goto out;
2162 }
2163
2164 if (test_bit(option, &osnoise_options))
2165 seq_printf(s, "%s", osnoise_options_str[option]);
2166 else
2167 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2168
2169 out:
2170 if (option != OSN_MAX)
2171 seq_puts(s, " ");
2172
2173 return 0;
2174 }
2175
s_options_stop(struct seq_file * s,void * v)2176 static void s_options_stop(struct seq_file *s, void *v)
2177 {
2178 seq_puts(s, "\n");
2179 mutex_unlock(&interface_lock);
2180 }
2181
2182 static const struct seq_operations osnoise_options_seq_ops = {
2183 .start = s_options_start,
2184 .next = s_options_next,
2185 .show = s_options_show,
2186 .stop = s_options_stop
2187 };
2188
osnoise_options_open(struct inode * inode,struct file * file)2189 static int osnoise_options_open(struct inode *inode, struct file *file)
2190 {
2191 return seq_open(file, &osnoise_options_seq_ops);
2192 };
2193
2194 /**
2195 * osnoise_options_write - Write function for "options" entry
2196 * @filp: The active open file structure
2197 * @ubuf: The user buffer that contains the value to write
2198 * @cnt: The maximum number of bytes to write to "file"
2199 * @ppos: The current position in @file
2200 *
2201 * Writing the option name sets the option, writing the "NO_"
2202 * prefix in front of the option name disables it.
2203 *
2204 * Writing "DEFAULTS" resets the option values to the default ones.
2205 */
osnoise_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2206 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2207 size_t cnt, loff_t *ppos)
2208 {
2209 int running, option, enable, retval;
2210 char buf[256], *option_str;
2211
2212 if (cnt >= 256)
2213 return -EINVAL;
2214
2215 if (copy_from_user(buf, ubuf, cnt))
2216 return -EFAULT;
2217
2218 buf[cnt] = 0;
2219
2220 if (strncmp(buf, "NO_", 3)) {
2221 option_str = strstrip(buf);
2222 enable = true;
2223 } else {
2224 option_str = strstrip(&buf[3]);
2225 enable = false;
2226 }
2227
2228 option = match_string(osnoise_options_str, OSN_MAX, option_str);
2229 if (option < 0)
2230 return -EINVAL;
2231
2232 /*
2233 * trace_types_lock is taken to avoid concurrency on start/stop.
2234 */
2235 mutex_lock(&trace_types_lock);
2236 running = osnoise_has_registered_instances();
2237 if (running)
2238 stop_per_cpu_kthreads();
2239
2240 mutex_lock(&interface_lock);
2241 /*
2242 * avoid CPU hotplug operations that might read options.
2243 */
2244 cpus_read_lock();
2245
2246 retval = cnt;
2247
2248 if (enable) {
2249 if (option == OSN_DEFAULTS)
2250 osnoise_options = OSN_DEFAULT_OPTIONS;
2251 else
2252 set_bit(option, &osnoise_options);
2253 } else {
2254 if (option == OSN_DEFAULTS)
2255 retval = -EINVAL;
2256 else
2257 clear_bit(option, &osnoise_options);
2258 }
2259
2260 cpus_read_unlock();
2261 mutex_unlock(&interface_lock);
2262
2263 if (running)
2264 start_per_cpu_kthreads();
2265 mutex_unlock(&trace_types_lock);
2266
2267 return retval;
2268 }
2269
2270 /*
2271 * osnoise_cpus_read - Read function for reading the "cpus" file
2272 * @filp: The active open file structure
2273 * @ubuf: The userspace provided buffer to read value into
2274 * @cnt: The maximum number of bytes to read
2275 * @ppos: The current "file" position
2276 *
2277 * Prints the "cpus" output into the user-provided buffer.
2278 */
2279 static ssize_t
osnoise_cpus_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)2280 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2281 loff_t *ppos)
2282 {
2283 char *mask_str __free(kfree) = NULL;
2284 int len;
2285
2286 guard(mutex)(&interface_lock);
2287
2288 len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2289 mask_str = kmalloc(len, GFP_KERNEL);
2290 if (!mask_str)
2291 return -ENOMEM;
2292
2293 len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2294 if (len >= count)
2295 return -EINVAL;
2296
2297 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2298
2299 return count;
2300 }
2301
2302 /*
2303 * osnoise_cpus_write - Write function for "cpus" entry
2304 * @filp: The active open file structure
2305 * @ubuf: The user buffer that contains the value to write
2306 * @count: The maximum number of bytes to write to "file"
2307 * @ppos: The current position in @file
2308 *
2309 * This function provides a write implementation for the "cpus"
2310 * interface to the osnoise trace. By default, it lists all CPUs,
2311 * in this way, allowing osnoise threads to run on any online CPU
2312 * of the system. It serves to restrict the execution of osnoise to the
2313 * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2314 * Because the user might be interested in tracing what is running on
2315 * other CPUs. For instance, one might run osnoise in one HT CPU
2316 * while observing what is running on the sibling HT CPU.
2317 */
2318 static ssize_t
osnoise_cpus_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2319 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2320 loff_t *ppos)
2321 {
2322 cpumask_var_t osnoise_cpumask_new;
2323 int running, err;
2324 char *buf __free(kfree) = NULL;
2325
2326 if (count < 1)
2327 return 0;
2328
2329 buf = memdup_user_nul(ubuf, count);
2330 if (IS_ERR(buf))
2331 return PTR_ERR(buf);
2332
2333 if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2334 return -ENOMEM;
2335
2336 err = cpulist_parse(buf, osnoise_cpumask_new);
2337 if (err)
2338 goto err_free;
2339
2340 /*
2341 * trace_types_lock is taken to avoid concurrency on start/stop.
2342 */
2343 mutex_lock(&trace_types_lock);
2344 running = osnoise_has_registered_instances();
2345 if (running)
2346 stop_per_cpu_kthreads();
2347
2348 mutex_lock(&interface_lock);
2349 /*
2350 * osnoise_cpumask is read by CPU hotplug operations.
2351 */
2352 cpus_read_lock();
2353
2354 cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2355
2356 cpus_read_unlock();
2357 mutex_unlock(&interface_lock);
2358
2359 if (running)
2360 start_per_cpu_kthreads();
2361 mutex_unlock(&trace_types_lock);
2362
2363 free_cpumask_var(osnoise_cpumask_new);
2364 return count;
2365
2366 err_free:
2367 free_cpumask_var(osnoise_cpumask_new);
2368
2369 return err;
2370 }
2371
2372 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_fd_open(struct inode * inode,struct file * file)2373 static int timerlat_fd_open(struct inode *inode, struct file *file)
2374 {
2375 struct osnoise_variables *osn_var;
2376 struct timerlat_variables *tlat;
2377 long cpu = (long) inode->i_cdev;
2378
2379 mutex_lock(&interface_lock);
2380
2381 /*
2382 * This file is accessible only if timerlat is enabled, and
2383 * NO_OSNOISE_WORKLOAD is set.
2384 */
2385 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2386 mutex_unlock(&interface_lock);
2387 return -EINVAL;
2388 }
2389
2390 migrate_disable();
2391
2392 osn_var = this_cpu_osn_var();
2393
2394 /*
2395 * The osn_var->pid holds the single access to this file.
2396 */
2397 if (osn_var->pid) {
2398 mutex_unlock(&interface_lock);
2399 migrate_enable();
2400 return -EBUSY;
2401 }
2402
2403 /*
2404 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2405 * is pinned to a single CPU. The tracer laters monitor if the task
2406 * migrates and then disables tracer if it does. However, it is
2407 * worth doing this basic acceptance test to avoid obviusly wrong
2408 * setup.
2409 */
2410 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) {
2411 mutex_unlock(&interface_lock);
2412 migrate_enable();
2413 return -EPERM;
2414 }
2415
2416 /*
2417 * From now on, it is good to go.
2418 */
2419 file->private_data = inode->i_cdev;
2420
2421 get_task_struct(current);
2422
2423 osn_var->kthread = current;
2424 osn_var->pid = current->pid;
2425
2426 /*
2427 * Setup is done.
2428 */
2429 mutex_unlock(&interface_lock);
2430
2431 tlat = this_cpu_tmr_var();
2432 tlat->count = 0;
2433
2434 hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2435
2436 migrate_enable();
2437 return 0;
2438 };
2439
2440 /*
2441 * timerlat_fd_read - Read function for "timerlat_fd" file
2442 * @file: The active open file structure
2443 * @ubuf: The userspace provided buffer to read value into
2444 * @cnt: The maximum number of bytes to read
2445 * @ppos: The current "file" position
2446 *
2447 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2448 */
2449 static ssize_t
timerlat_fd_read(struct file * file,char __user * ubuf,size_t count,loff_t * ppos)2450 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2451 loff_t *ppos)
2452 {
2453 long cpu = (long) file->private_data;
2454 struct osnoise_variables *osn_var;
2455 struct timerlat_variables *tlat;
2456 struct timerlat_sample s;
2457 s64 diff;
2458 u64 now;
2459
2460 migrate_disable();
2461
2462 tlat = this_cpu_tmr_var();
2463
2464 /*
2465 * While in user-space, the thread is migratable. There is nothing
2466 * we can do about it.
2467 * So, if the thread is running on another CPU, stop the machinery.
2468 */
2469 if (cpu == smp_processor_id()) {
2470 if (tlat->uthread_migrate) {
2471 migrate_enable();
2472 return -EINVAL;
2473 }
2474 } else {
2475 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2476 osnoise_taint("timerlat user thread migrate\n");
2477 osnoise_stop_tracing();
2478 migrate_enable();
2479 return -EINVAL;
2480 }
2481
2482 osn_var = this_cpu_osn_var();
2483
2484 /*
2485 * The timerlat in user-space runs in a different order:
2486 * the read() starts from the execution of the previous occurrence,
2487 * sleeping for the next occurrence.
2488 *
2489 * So, skip if we are entering on read() before the first wakeup
2490 * from timerlat IRQ:
2491 */
2492 if (likely(osn_var->sampling)) {
2493 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2494 diff = now - tlat->abs_period;
2495
2496 /*
2497 * it was not a timer firing, but some other signal?
2498 */
2499 if (diff < 0)
2500 goto out;
2501
2502 s.seqnum = tlat->count;
2503 s.timer_latency = diff;
2504 s.context = THREAD_URET;
2505
2506 record_timerlat_sample(&s);
2507
2508 notify_new_max_latency(diff);
2509
2510 tlat->tracing_thread = false;
2511 if (osnoise_data.stop_tracing_total)
2512 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2513 osnoise_stop_tracing();
2514 } else {
2515 tlat->tracing_thread = false;
2516 tlat->kthread = current;
2517
2518 /* Annotate now to drift new period */
2519 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2520
2521 osn_var->sampling = 1;
2522 }
2523
2524 /* wait for the next period */
2525 wait_next_period(tlat);
2526
2527 /* This is the wakeup from this cycle */
2528 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2529 diff = now - tlat->abs_period;
2530
2531 /*
2532 * it was not a timer firing, but some other signal?
2533 */
2534 if (diff < 0)
2535 goto out;
2536
2537 s.seqnum = tlat->count;
2538 s.timer_latency = diff;
2539 s.context = THREAD_CONTEXT;
2540
2541 record_timerlat_sample(&s);
2542
2543 if (osnoise_data.stop_tracing_total) {
2544 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2545 timerlat_dump_stack(time_to_us(diff));
2546 notify_new_max_latency(diff);
2547 osnoise_stop_tracing();
2548 }
2549 }
2550
2551 out:
2552 migrate_enable();
2553 return 0;
2554 }
2555
timerlat_fd_release(struct inode * inode,struct file * file)2556 static int timerlat_fd_release(struct inode *inode, struct file *file)
2557 {
2558 struct osnoise_variables *osn_var;
2559 struct timerlat_variables *tlat_var;
2560 long cpu = (long) file->private_data;
2561
2562 migrate_disable();
2563 mutex_lock(&interface_lock);
2564
2565 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2566 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2567
2568 if (tlat_var->kthread)
2569 hrtimer_cancel(&tlat_var->timer);
2570 memset(tlat_var, 0, sizeof(*tlat_var));
2571
2572 osn_var->sampling = 0;
2573 osn_var->pid = 0;
2574
2575 /*
2576 * We are leaving, not being stopped... see stop_kthread();
2577 */
2578 if (osn_var->kthread) {
2579 put_task_struct(osn_var->kthread);
2580 osn_var->kthread = NULL;
2581 }
2582
2583 mutex_unlock(&interface_lock);
2584 migrate_enable();
2585 return 0;
2586 }
2587 #endif
2588
2589 /*
2590 * osnoise/runtime_us: cannot be greater than the period.
2591 */
2592 static struct trace_min_max_param osnoise_runtime = {
2593 .lock = &interface_lock,
2594 .val = &osnoise_data.sample_runtime,
2595 .max = &osnoise_data.sample_period,
2596 .min = NULL,
2597 };
2598
2599 /*
2600 * osnoise/period_us: cannot be smaller than the runtime.
2601 */
2602 static struct trace_min_max_param osnoise_period = {
2603 .lock = &interface_lock,
2604 .val = &osnoise_data.sample_period,
2605 .max = NULL,
2606 .min = &osnoise_data.sample_runtime,
2607 };
2608
2609 /*
2610 * osnoise/stop_tracing_us: no limit.
2611 */
2612 static struct trace_min_max_param osnoise_stop_tracing_in = {
2613 .lock = &interface_lock,
2614 .val = &osnoise_data.stop_tracing,
2615 .max = NULL,
2616 .min = NULL,
2617 };
2618
2619 /*
2620 * osnoise/stop_tracing_total_us: no limit.
2621 */
2622 static struct trace_min_max_param osnoise_stop_tracing_total = {
2623 .lock = &interface_lock,
2624 .val = &osnoise_data.stop_tracing_total,
2625 .max = NULL,
2626 .min = NULL,
2627 };
2628
2629 #ifdef CONFIG_TIMERLAT_TRACER
2630 /*
2631 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2632 * latency is higher than val.
2633 */
2634 static struct trace_min_max_param osnoise_print_stack = {
2635 .lock = &interface_lock,
2636 .val = &osnoise_data.print_stack,
2637 .max = NULL,
2638 .min = NULL,
2639 };
2640
2641 /*
2642 * osnoise/timerlat_period: min 100 us, max 1 s
2643 */
2644 static u64 timerlat_min_period = 100;
2645 static u64 timerlat_max_period = 1000000;
2646 static struct trace_min_max_param timerlat_period = {
2647 .lock = &interface_lock,
2648 .val = &osnoise_data.timerlat_period,
2649 .max = &timerlat_max_period,
2650 .min = &timerlat_min_period,
2651 };
2652
2653 static const struct file_operations timerlat_fd_fops = {
2654 .open = timerlat_fd_open,
2655 .read = timerlat_fd_read,
2656 .release = timerlat_fd_release,
2657 .llseek = generic_file_llseek,
2658 };
2659 #endif
2660
2661 static const struct file_operations cpus_fops = {
2662 .open = tracing_open_generic,
2663 .read = osnoise_cpus_read,
2664 .write = osnoise_cpus_write,
2665 .llseek = generic_file_llseek,
2666 };
2667
2668 static const struct file_operations osnoise_options_fops = {
2669 .open = osnoise_options_open,
2670 .read = seq_read,
2671 .llseek = seq_lseek,
2672 .release = seq_release,
2673 .write = osnoise_options_write
2674 };
2675
2676 #ifdef CONFIG_TIMERLAT_TRACER
2677 #ifdef CONFIG_STACKTRACE
init_timerlat_stack_tracefs(struct dentry * top_dir)2678 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2679 {
2680 struct dentry *tmp;
2681
2682 tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2683 &osnoise_print_stack, &trace_min_max_fops);
2684 if (!tmp)
2685 return -ENOMEM;
2686
2687 return 0;
2688 }
2689 #else /* CONFIG_STACKTRACE */
init_timerlat_stack_tracefs(struct dentry * top_dir)2690 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2691 {
2692 return 0;
2693 }
2694 #endif /* CONFIG_STACKTRACE */
2695
osnoise_create_cpu_timerlat_fd(struct dentry * top_dir)2696 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2697 {
2698 struct dentry *timerlat_fd;
2699 struct dentry *per_cpu;
2700 struct dentry *cpu_dir;
2701 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2702 long cpu;
2703
2704 /*
2705 * Why not using tracing instance per_cpu/ dir?
2706 *
2707 * Because osnoise/timerlat have a single workload, having
2708 * multiple files like these are wast of memory.
2709 */
2710 per_cpu = tracefs_create_dir("per_cpu", top_dir);
2711 if (!per_cpu)
2712 return -ENOMEM;
2713
2714 for_each_possible_cpu(cpu) {
2715 snprintf(cpu_str, 30, "cpu%ld", cpu);
2716 cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2717 if (!cpu_dir)
2718 goto out_clean;
2719
2720 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2721 cpu_dir, NULL, &timerlat_fd_fops);
2722 if (!timerlat_fd)
2723 goto out_clean;
2724
2725 /* Record the CPU */
2726 d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2727 }
2728
2729 return 0;
2730
2731 out_clean:
2732 tracefs_remove(per_cpu);
2733 return -ENOMEM;
2734 }
2735
2736 /*
2737 * init_timerlat_tracefs - A function to initialize the timerlat interface files
2738 */
init_timerlat_tracefs(struct dentry * top_dir)2739 static int init_timerlat_tracefs(struct dentry *top_dir)
2740 {
2741 struct dentry *tmp;
2742 int retval;
2743
2744 tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2745 &timerlat_period, &trace_min_max_fops);
2746 if (!tmp)
2747 return -ENOMEM;
2748
2749 retval = osnoise_create_cpu_timerlat_fd(top_dir);
2750 if (retval)
2751 return retval;
2752
2753 return init_timerlat_stack_tracefs(top_dir);
2754 }
2755 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracefs(struct dentry * top_dir)2756 static int init_timerlat_tracefs(struct dentry *top_dir)
2757 {
2758 return 0;
2759 }
2760 #endif /* CONFIG_TIMERLAT_TRACER */
2761
2762 /*
2763 * init_tracefs - A function to initialize the tracefs interface files
2764 *
2765 * This function creates entries in tracefs for "osnoise" and "timerlat".
2766 * It creates these directories in the tracing directory, and within that
2767 * directory the use can change and view the configs.
2768 */
init_tracefs(void)2769 static int init_tracefs(void)
2770 {
2771 struct dentry *top_dir;
2772 struct dentry *tmp;
2773 int ret;
2774
2775 ret = tracing_init_dentry();
2776 if (ret)
2777 return -ENOMEM;
2778
2779 top_dir = tracefs_create_dir("osnoise", NULL);
2780 if (!top_dir)
2781 return 0;
2782
2783 tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2784 &osnoise_period, &trace_min_max_fops);
2785 if (!tmp)
2786 goto err;
2787
2788 tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2789 &osnoise_runtime, &trace_min_max_fops);
2790 if (!tmp)
2791 goto err;
2792
2793 tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2794 &osnoise_stop_tracing_in, &trace_min_max_fops);
2795 if (!tmp)
2796 goto err;
2797
2798 tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2799 &osnoise_stop_tracing_total, &trace_min_max_fops);
2800 if (!tmp)
2801 goto err;
2802
2803 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2804 if (!tmp)
2805 goto err;
2806
2807 tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2808 &osnoise_options_fops);
2809 if (!tmp)
2810 goto err;
2811
2812 ret = init_timerlat_tracefs(top_dir);
2813 if (ret)
2814 goto err;
2815
2816 return 0;
2817
2818 err:
2819 tracefs_remove(top_dir);
2820 return -ENOMEM;
2821 }
2822
osnoise_hook_events(void)2823 static int osnoise_hook_events(void)
2824 {
2825 int retval;
2826
2827 /*
2828 * Trace is already hooked, we are re-enabling from
2829 * a stop_tracing_*.
2830 */
2831 if (trace_osnoise_callback_enabled)
2832 return 0;
2833
2834 retval = hook_irq_events();
2835 if (retval)
2836 return -EINVAL;
2837
2838 retval = hook_softirq_events();
2839 if (retval)
2840 goto out_unhook_irq;
2841
2842 retval = hook_thread_events();
2843 /*
2844 * All fine!
2845 */
2846 if (!retval)
2847 return 0;
2848
2849 unhook_softirq_events();
2850 out_unhook_irq:
2851 unhook_irq_events();
2852 return -EINVAL;
2853 }
2854
osnoise_unhook_events(void)2855 static void osnoise_unhook_events(void)
2856 {
2857 unhook_thread_events();
2858 unhook_softirq_events();
2859 unhook_irq_events();
2860 }
2861
2862 /*
2863 * osnoise_workload_start - start the workload and hook to events
2864 */
osnoise_workload_start(void)2865 static int osnoise_workload_start(void)
2866 {
2867 int retval;
2868
2869 /*
2870 * Instances need to be registered after calling workload
2871 * start. Hence, if there is already an instance, the
2872 * workload was already registered. Otherwise, this
2873 * code is on the way to register the first instance,
2874 * and the workload will start.
2875 */
2876 if (osnoise_has_registered_instances())
2877 return 0;
2878
2879 osn_var_reset_all();
2880
2881 retval = osnoise_hook_events();
2882 if (retval)
2883 return retval;
2884
2885 /*
2886 * Make sure that ftrace_nmi_enter/exit() see reset values
2887 * before enabling trace_osnoise_callback_enabled.
2888 */
2889 barrier();
2890 trace_osnoise_callback_enabled = true;
2891
2892 retval = start_per_cpu_kthreads();
2893 if (retval) {
2894 trace_osnoise_callback_enabled = false;
2895 /*
2896 * Make sure that ftrace_nmi_enter/exit() see
2897 * trace_osnoise_callback_enabled as false before continuing.
2898 */
2899 barrier();
2900
2901 osnoise_unhook_events();
2902 return retval;
2903 }
2904
2905 return 0;
2906 }
2907
2908 /*
2909 * osnoise_workload_stop - stop the workload and unhook the events
2910 */
osnoise_workload_stop(void)2911 static void osnoise_workload_stop(void)
2912 {
2913 /*
2914 * Instances need to be unregistered before calling
2915 * stop. Hence, if there is a registered instance, more
2916 * than one instance is running, and the workload will not
2917 * yet stop. Otherwise, this code is on the way to disable
2918 * the last instance, and the workload can stop.
2919 */
2920 if (osnoise_has_registered_instances())
2921 return;
2922
2923 /*
2924 * If callbacks were already disabled in a previous stop
2925 * call, there is no need to disable then again.
2926 *
2927 * For instance, this happens when tracing is stopped via:
2928 * echo 0 > tracing_on
2929 * echo nop > current_tracer.
2930 */
2931 if (!trace_osnoise_callback_enabled)
2932 return;
2933
2934 trace_osnoise_callback_enabled = false;
2935 /*
2936 * Make sure that ftrace_nmi_enter/exit() see
2937 * trace_osnoise_callback_enabled as false before continuing.
2938 */
2939 barrier();
2940
2941 stop_per_cpu_kthreads();
2942
2943 osnoise_unhook_events();
2944 }
2945
osnoise_tracer_start(struct trace_array * tr)2946 static void osnoise_tracer_start(struct trace_array *tr)
2947 {
2948 int retval;
2949
2950 /*
2951 * If the instance is already registered, there is no need to
2952 * register it again.
2953 */
2954 if (osnoise_instance_registered(tr))
2955 return;
2956
2957 retval = osnoise_workload_start();
2958 if (retval)
2959 pr_err(BANNER "Error starting osnoise tracer\n");
2960
2961 osnoise_register_instance(tr);
2962 }
2963
osnoise_tracer_stop(struct trace_array * tr)2964 static void osnoise_tracer_stop(struct trace_array *tr)
2965 {
2966 osnoise_unregister_instance(tr);
2967 osnoise_workload_stop();
2968 }
2969
osnoise_tracer_init(struct trace_array * tr)2970 static int osnoise_tracer_init(struct trace_array *tr)
2971 {
2972 /*
2973 * Only allow osnoise tracer if timerlat tracer is not running
2974 * already.
2975 */
2976 if (timerlat_enabled())
2977 return -EBUSY;
2978
2979 tr->max_latency = 0;
2980
2981 osnoise_tracer_start(tr);
2982 return 0;
2983 }
2984
osnoise_tracer_reset(struct trace_array * tr)2985 static void osnoise_tracer_reset(struct trace_array *tr)
2986 {
2987 osnoise_tracer_stop(tr);
2988 }
2989
2990 static struct tracer osnoise_tracer __read_mostly = {
2991 .name = "osnoise",
2992 .init = osnoise_tracer_init,
2993 .reset = osnoise_tracer_reset,
2994 .start = osnoise_tracer_start,
2995 .stop = osnoise_tracer_stop,
2996 .print_header = print_osnoise_headers,
2997 .allow_instances = true,
2998 };
2999
3000 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_tracer_start(struct trace_array * tr)3001 static void timerlat_tracer_start(struct trace_array *tr)
3002 {
3003 int retval;
3004
3005 /*
3006 * If the instance is already registered, there is no need to
3007 * register it again.
3008 */
3009 if (osnoise_instance_registered(tr))
3010 return;
3011
3012 retval = osnoise_workload_start();
3013 if (retval)
3014 pr_err(BANNER "Error starting timerlat tracer\n");
3015
3016 osnoise_register_instance(tr);
3017
3018 return;
3019 }
3020
timerlat_tracer_stop(struct trace_array * tr)3021 static void timerlat_tracer_stop(struct trace_array *tr)
3022 {
3023 int cpu;
3024
3025 osnoise_unregister_instance(tr);
3026
3027 /*
3028 * Instruct the threads to stop only if this is the last instance.
3029 */
3030 if (!osnoise_has_registered_instances()) {
3031 for_each_online_cpu(cpu)
3032 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3033 }
3034
3035 osnoise_workload_stop();
3036 }
3037
timerlat_tracer_init(struct trace_array * tr)3038 static int timerlat_tracer_init(struct trace_array *tr)
3039 {
3040 /*
3041 * Only allow timerlat tracer if osnoise tracer is not running already.
3042 */
3043 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3044 return -EBUSY;
3045
3046 /*
3047 * If this is the first instance, set timerlat_tracer to block
3048 * osnoise tracer start.
3049 */
3050 if (!osnoise_has_registered_instances())
3051 osnoise_data.timerlat_tracer = 1;
3052
3053 tr->max_latency = 0;
3054 timerlat_tracer_start(tr);
3055
3056 return 0;
3057 }
3058
timerlat_tracer_reset(struct trace_array * tr)3059 static void timerlat_tracer_reset(struct trace_array *tr)
3060 {
3061 timerlat_tracer_stop(tr);
3062
3063 /*
3064 * If this is the last instance, reset timerlat_tracer allowing
3065 * osnoise to be started.
3066 */
3067 if (!osnoise_has_registered_instances())
3068 osnoise_data.timerlat_tracer = 0;
3069 }
3070
3071 static struct tracer timerlat_tracer __read_mostly = {
3072 .name = "timerlat",
3073 .init = timerlat_tracer_init,
3074 .reset = timerlat_tracer_reset,
3075 .start = timerlat_tracer_start,
3076 .stop = timerlat_tracer_stop,
3077 .print_header = print_timerlat_headers,
3078 .allow_instances = true,
3079 };
3080
init_timerlat_tracer(void)3081 __init static int init_timerlat_tracer(void)
3082 {
3083 return register_tracer(&timerlat_tracer);
3084 }
3085 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracer(void)3086 __init static int init_timerlat_tracer(void)
3087 {
3088 return 0;
3089 }
3090 #endif /* CONFIG_TIMERLAT_TRACER */
3091
init_osnoise_tracer(void)3092 __init static int init_osnoise_tracer(void)
3093 {
3094 int ret;
3095
3096 mutex_init(&interface_lock);
3097
3098 cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3099
3100 ret = register_tracer(&osnoise_tracer);
3101 if (ret) {
3102 pr_err(BANNER "Error registering osnoise!\n");
3103 return ret;
3104 }
3105
3106 ret = init_timerlat_tracer();
3107 if (ret) {
3108 pr_err(BANNER "Error registering timerlat!\n");
3109 return ret;
3110 }
3111
3112 osnoise_init_hotplug_support();
3113
3114 INIT_LIST_HEAD_RCU(&osnoise_instances);
3115
3116 init_tracefs();
3117
3118 return 0;
3119 }
3120 late_initcall(init_osnoise_tracer);
3121