xref: /linux/kernel/trace/trace_osnoise.c (revision 744fab2d9ff9177a27647c3710e86d43f2efe68c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18 
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28 
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34 
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37 
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40 
41 /*
42  * Default values.
43  */
44 #define BANNER			"osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD	1000000			/* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME	1000000			/* 1s */
47 
48 #define DEFAULT_TIMERLAT_PERIOD	1000			/* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO	95			/* FIFO 95 */
50 
51 /*
52  * osnoise/options entries.
53  */
54 enum osnoise_options_index {
55 	OSN_DEFAULTS = 0,
56 	OSN_WORKLOAD,
57 	OSN_PANIC_ON_STOP,
58 	OSN_PREEMPT_DISABLE,
59 	OSN_IRQ_DISABLE,
60 	OSN_MAX
61 };
62 
63 static const char * const osnoise_options_str[OSN_MAX] = {
64 							"DEFAULTS",
65 							"OSNOISE_WORKLOAD",
66 							"PANIC_ON_STOP",
67 							"OSNOISE_PREEMPT_DISABLE",
68 							"OSNOISE_IRQ_DISABLE" };
69 
70 #define OSN_DEFAULT_OPTIONS		0x2
71 static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
72 
73 /*
74  * trace_array of the enabled osnoise/timerlat instances.
75  */
76 struct osnoise_instance {
77 	struct list_head	list;
78 	struct trace_array	*tr;
79 };
80 
81 static struct list_head osnoise_instances;
82 
osnoise_has_registered_instances(void)83 static bool osnoise_has_registered_instances(void)
84 {
85 	return !!list_first_or_null_rcu(&osnoise_instances,
86 					struct osnoise_instance,
87 					list);
88 }
89 
90 /*
91  * osnoise_instance_registered - check if a tr is already registered
92  */
osnoise_instance_registered(struct trace_array * tr)93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95 	struct osnoise_instance *inst;
96 	int found = 0;
97 
98 	rcu_read_lock();
99 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100 		if (inst->tr == tr)
101 			found = 1;
102 	}
103 	rcu_read_unlock();
104 
105 	return found;
106 }
107 
108 /*
109  * osnoise_register_instance - register a new trace instance
110  *
111  * Register a trace_array *tr in the list of instances running
112  * osnoise/timerlat tracers.
113  */
osnoise_register_instance(struct trace_array * tr)114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116 	struct osnoise_instance *inst;
117 
118 	/*
119 	 * register/unregister serialization is provided by trace's
120 	 * trace_types_lock.
121 	 */
122 	lockdep_assert_held(&trace_types_lock);
123 
124 	inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125 	if (!inst)
126 		return -ENOMEM;
127 
128 	INIT_LIST_HEAD_RCU(&inst->list);
129 	inst->tr = tr;
130 	list_add_tail_rcu(&inst->list, &osnoise_instances);
131 
132 	return 0;
133 }
134 
135 /*
136  *  osnoise_unregister_instance - unregister a registered trace instance
137  *
138  * Remove the trace_array *tr from the list of instances running
139  * osnoise/timerlat tracers.
140  */
osnoise_unregister_instance(struct trace_array * tr)141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143 	struct osnoise_instance *inst;
144 	int found = 0;
145 
146 	/*
147 	 * register/unregister serialization is provided by trace's
148 	 * trace_types_lock.
149 	 */
150 	list_for_each_entry_rcu(inst, &osnoise_instances, list,
151 				lockdep_is_held(&trace_types_lock)) {
152 		if (inst->tr == tr) {
153 			list_del_rcu(&inst->list);
154 			found = 1;
155 			break;
156 		}
157 	}
158 
159 	if (!found)
160 		return;
161 
162 	kvfree_rcu_mightsleep(inst);
163 }
164 
165 /*
166  * NMI runtime info.
167  */
168 struct osn_nmi {
169 	u64	count;
170 	u64	delta_start;
171 };
172 
173 /*
174  * IRQ runtime info.
175  */
176 struct osn_irq {
177 	u64	count;
178 	u64	arrival_time;
179 	u64	delta_start;
180 };
181 
182 #define IRQ_CONTEXT	0
183 #define THREAD_CONTEXT	1
184 #define THREAD_URET	2
185 /*
186  * sofirq runtime info.
187  */
188 struct osn_softirq {
189 	u64	count;
190 	u64	arrival_time;
191 	u64	delta_start;
192 };
193 
194 /*
195  * thread runtime info.
196  */
197 struct osn_thread {
198 	u64	count;
199 	u64	arrival_time;
200 	u64	delta_start;
201 };
202 
203 /*
204  * Runtime information: this structure saves the runtime information used by
205  * one sampling thread.
206  */
207 struct osnoise_variables {
208 	struct task_struct	*kthread;
209 	bool			sampling;
210 	pid_t			pid;
211 	struct osn_nmi		nmi;
212 	struct osn_irq		irq;
213 	struct osn_softirq	softirq;
214 	struct osn_thread	thread;
215 	local_t			int_counter;
216 };
217 
218 /*
219  * Per-cpu runtime information.
220  */
221 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
222 
223 /*
224  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
225  */
this_cpu_osn_var(void)226 static inline struct osnoise_variables *this_cpu_osn_var(void)
227 {
228 	return this_cpu_ptr(&per_cpu_osnoise_var);
229 }
230 
231 /*
232  * Protect the interface.
233  */
234 static struct mutex interface_lock;
235 
236 #ifdef CONFIG_TIMERLAT_TRACER
237 /*
238  * Runtime information for the timer mode.
239  */
240 struct timerlat_variables {
241 	struct task_struct	*kthread;
242 	struct hrtimer		timer;
243 	u64			rel_period;
244 	u64			abs_period;
245 	bool			tracing_thread;
246 	u64			count;
247 	bool			uthread_migrate;
248 };
249 
250 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
251 
252 /*
253  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
254  */
this_cpu_tmr_var(void)255 static inline struct timerlat_variables *this_cpu_tmr_var(void)
256 {
257 	return this_cpu_ptr(&per_cpu_timerlat_var);
258 }
259 
260 /*
261  * tlat_var_reset - Reset the values of the given timerlat_variables
262  */
tlat_var_reset(void)263 static inline void tlat_var_reset(void)
264 {
265 	struct timerlat_variables *tlat_var;
266 	int cpu;
267 
268 	/* Synchronize with the timerlat interfaces */
269 	mutex_lock(&interface_lock);
270 	/*
271 	 * So far, all the values are initialized as 0, so
272 	 * zeroing the structure is perfect.
273 	 */
274 	for_each_cpu(cpu, cpu_online_mask) {
275 		tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
276 		if (tlat_var->kthread)
277 			hrtimer_cancel(&tlat_var->timer);
278 		memset(tlat_var, 0, sizeof(*tlat_var));
279 	}
280 	mutex_unlock(&interface_lock);
281 }
282 #else /* CONFIG_TIMERLAT_TRACER */
283 #define tlat_var_reset()	do {} while (0)
284 #endif /* CONFIG_TIMERLAT_TRACER */
285 
286 /*
287  * osn_var_reset - Reset the values of the given osnoise_variables
288  */
osn_var_reset(void)289 static inline void osn_var_reset(void)
290 {
291 	struct osnoise_variables *osn_var;
292 	int cpu;
293 
294 	/*
295 	 * So far, all the values are initialized as 0, so
296 	 * zeroing the structure is perfect.
297 	 */
298 	for_each_cpu(cpu, cpu_online_mask) {
299 		osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
300 		memset(osn_var, 0, sizeof(*osn_var));
301 	}
302 }
303 
304 /*
305  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
306  */
osn_var_reset_all(void)307 static inline void osn_var_reset_all(void)
308 {
309 	osn_var_reset();
310 	tlat_var_reset();
311 }
312 
313 /*
314  * Tells NMIs to call back to the osnoise tracer to record timestamps.
315  */
316 bool trace_osnoise_callback_enabled;
317 
318 /*
319  * Tracer data.
320  */
321 static struct osnoise_data {
322 	u64	sample_period;		/* total sampling period */
323 	u64	sample_runtime;		/* active sampling portion of period */
324 	u64	stop_tracing;		/* stop trace in the internal operation (loop/irq) */
325 	u64	stop_tracing_total;	/* stop trace in the final operation (report/thread) */
326 #ifdef CONFIG_TIMERLAT_TRACER
327 	u64	timerlat_period;	/* timerlat period */
328 	u64	print_stack;		/* print IRQ stack if total > */
329 	int	timerlat_tracer;	/* timerlat tracer */
330 #endif
331 	bool	tainted;		/* infor users and developers about a problem */
332 } osnoise_data = {
333 	.sample_period			= DEFAULT_SAMPLE_PERIOD,
334 	.sample_runtime			= DEFAULT_SAMPLE_RUNTIME,
335 	.stop_tracing			= 0,
336 	.stop_tracing_total		= 0,
337 #ifdef CONFIG_TIMERLAT_TRACER
338 	.print_stack			= 0,
339 	.timerlat_period		= DEFAULT_TIMERLAT_PERIOD,
340 	.timerlat_tracer		= 0,
341 #endif
342 };
343 
344 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_enabled(void)345 static inline bool timerlat_enabled(void)
346 {
347 	return osnoise_data.timerlat_tracer;
348 }
349 
timerlat_softirq_exit(struct osnoise_variables * osn_var)350 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
351 {
352 	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
353 	/*
354 	 * If the timerlat is enabled, but the irq handler did
355 	 * not run yet enabling timerlat_tracer, do not trace.
356 	 */
357 	if (!tlat_var->tracing_thread) {
358 		osn_var->softirq.arrival_time = 0;
359 		osn_var->softirq.delta_start = 0;
360 		return 0;
361 	}
362 	return 1;
363 }
364 
timerlat_thread_exit(struct osnoise_variables * osn_var)365 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
366 {
367 	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
368 	/*
369 	 * If the timerlat is enabled, but the irq handler did
370 	 * not run yet enabling timerlat_tracer, do not trace.
371 	 */
372 	if (!tlat_var->tracing_thread) {
373 		osn_var->thread.delta_start = 0;
374 		osn_var->thread.arrival_time = 0;
375 		return 0;
376 	}
377 	return 1;
378 }
379 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_enabled(void)380 static inline bool timerlat_enabled(void)
381 {
382 	return false;
383 }
384 
timerlat_softirq_exit(struct osnoise_variables * osn_var)385 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
386 {
387 	return 1;
388 }
timerlat_thread_exit(struct osnoise_variables * osn_var)389 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
390 {
391 	return 1;
392 }
393 #endif
394 
395 #ifdef CONFIG_PREEMPT_RT
396 /*
397  * Print the osnoise header info.
398  */
print_osnoise_headers(struct seq_file * s)399 static void print_osnoise_headers(struct seq_file *s)
400 {
401 	if (osnoise_data.tainted)
402 		seq_puts(s, "# osnoise is tainted!\n");
403 
404 	seq_puts(s, "#                                _-------=> irqs-off\n");
405 	seq_puts(s, "#                               / _------=> need-resched\n");
406 	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
407 	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
408 	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
409 	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
410 	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
411 
412 	seq_puts(s, "#                              |||||| /          ");
413 	seq_puts(s, "                                     MAX\n");
414 
415 	seq_puts(s, "#                              ||||| /                         ");
416 	seq_puts(s, "                    SINGLE      Interference counters:\n");
417 
418 	seq_puts(s, "#                              |||||||               RUNTIME   ");
419 	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
420 
421 	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
422 	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
423 
424 	seq_puts(s, "#              | |         |   |||||||      |           |      ");
425 	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
426 }
427 #else /* CONFIG_PREEMPT_RT */
print_osnoise_headers(struct seq_file * s)428 static void print_osnoise_headers(struct seq_file *s)
429 {
430 	if (osnoise_data.tainted)
431 		seq_puts(s, "# osnoise is tainted!\n");
432 
433 	seq_puts(s, "#                                _-----=> irqs-off\n");
434 	seq_puts(s, "#                               / _----=> need-resched\n");
435 	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
436 	seq_puts(s, "#                              || / _--=> preempt-depth\n");
437 	seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
438 	seq_puts(s, "                    MAX\n");
439 	seq_puts(s, "#                              |||| /     delay               ");
440 	seq_puts(s, "                    SINGLE      Interference counters:\n");
441 
442 	seq_puts(s, "#                              |||||               RUNTIME   ");
443 	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
444 
445 	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
446 	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
447 
448 	seq_puts(s, "#              | |         |   |||||      |           |      ");
449 	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
450 }
451 #endif /* CONFIG_PREEMPT_RT */
452 
453 /*
454  * osnoise_taint - report an osnoise error.
455  */
456 #define osnoise_taint(msg) ({							\
457 	struct osnoise_instance *inst;						\
458 	struct trace_buffer *buffer;						\
459 										\
460 	rcu_read_lock();							\
461 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {		\
462 		buffer = inst->tr->array_buffer.buffer;				\
463 		trace_array_printk_buf(buffer, _THIS_IP_, msg);			\
464 	}									\
465 	rcu_read_unlock();							\
466 	osnoise_data.tainted = true;						\
467 })
468 
469 /*
470  * Record an osnoise_sample into the tracer buffer.
471  */
472 static void
__record_osnoise_sample(struct osnoise_sample * sample,struct trace_buffer * buffer)473 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
474 {
475 	struct ring_buffer_event *event;
476 	struct osnoise_entry *entry;
477 
478 	event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
479 					  tracing_gen_ctx());
480 	if (!event)
481 		return;
482 	entry	= ring_buffer_event_data(event);
483 	entry->runtime		= sample->runtime;
484 	entry->noise		= sample->noise;
485 	entry->max_sample	= sample->max_sample;
486 	entry->hw_count		= sample->hw_count;
487 	entry->nmi_count	= sample->nmi_count;
488 	entry->irq_count	= sample->irq_count;
489 	entry->softirq_count	= sample->softirq_count;
490 	entry->thread_count	= sample->thread_count;
491 
492 	trace_buffer_unlock_commit_nostack(buffer, event);
493 }
494 
495 /*
496  * Record an osnoise_sample on all osnoise instances and fire trace event.
497  */
record_osnoise_sample(struct osnoise_sample * sample)498 static void record_osnoise_sample(struct osnoise_sample *sample)
499 {
500 	struct osnoise_instance *inst;
501 	struct trace_buffer *buffer;
502 
503 	trace_osnoise_sample(sample);
504 
505 	rcu_read_lock();
506 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
507 		buffer = inst->tr->array_buffer.buffer;
508 		__record_osnoise_sample(sample, buffer);
509 	}
510 	rcu_read_unlock();
511 }
512 
513 #ifdef CONFIG_TIMERLAT_TRACER
514 /*
515  * Print the timerlat header info.
516  */
517 #ifdef CONFIG_PREEMPT_RT
print_timerlat_headers(struct seq_file * s)518 static void print_timerlat_headers(struct seq_file *s)
519 {
520 	seq_puts(s, "#                                _-------=> irqs-off\n");
521 	seq_puts(s, "#                               / _------=> need-resched\n");
522 	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
523 	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
524 	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
525 	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
526 	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
527 	seq_puts(s, "#                              |||||| /\n");
528 	seq_puts(s, "#                              |||||||             ACTIVATION\n");
529 	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
530 	seq_puts(s, "       CONTEXT                LATENCY\n");
531 	seq_puts(s, "#              | |         |   |||||||      |         |      ");
532 	seq_puts(s, "            |                       |\n");
533 }
534 #else /* CONFIG_PREEMPT_RT */
print_timerlat_headers(struct seq_file * s)535 static void print_timerlat_headers(struct seq_file *s)
536 {
537 	seq_puts(s, "#                                _-----=> irqs-off\n");
538 	seq_puts(s, "#                               / _----=> need-resched\n");
539 	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
540 	seq_puts(s, "#                              || / _--=> preempt-depth\n");
541 	seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
542 	seq_puts(s, "#                              |||| /     delay\n");
543 	seq_puts(s, "#                              |||||            ACTIVATION\n");
544 	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
545 	seq_puts(s, "      CONTEXT                 LATENCY\n");
546 	seq_puts(s, "#              | |         |   |||||      |         |      ");
547 	seq_puts(s, "            |                       |\n");
548 }
549 #endif /* CONFIG_PREEMPT_RT */
550 
551 static void
__record_timerlat_sample(struct timerlat_sample * sample,struct trace_buffer * buffer)552 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
553 {
554 	struct ring_buffer_event *event;
555 	struct timerlat_entry *entry;
556 
557 	event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
558 					  tracing_gen_ctx());
559 	if (!event)
560 		return;
561 	entry	= ring_buffer_event_data(event);
562 	entry->seqnum			= sample->seqnum;
563 	entry->context			= sample->context;
564 	entry->timer_latency		= sample->timer_latency;
565 
566 	trace_buffer_unlock_commit_nostack(buffer, event);
567 }
568 
569 /*
570  * Record an timerlat_sample into the tracer buffer.
571  */
record_timerlat_sample(struct timerlat_sample * sample)572 static void record_timerlat_sample(struct timerlat_sample *sample)
573 {
574 	struct osnoise_instance *inst;
575 	struct trace_buffer *buffer;
576 
577 	trace_timerlat_sample(sample);
578 
579 	rcu_read_lock();
580 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
581 		buffer = inst->tr->array_buffer.buffer;
582 		__record_timerlat_sample(sample, buffer);
583 	}
584 	rcu_read_unlock();
585 }
586 
587 #ifdef CONFIG_STACKTRACE
588 
589 #define	MAX_CALLS	256
590 
591 /*
592  * Stack trace will take place only at IRQ level, so, no need
593  * to control nesting here.
594  */
595 struct trace_stack {
596 	int		stack_size;
597 	int		nr_entries;
598 	unsigned long	calls[MAX_CALLS];
599 };
600 
601 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
602 
603 /*
604  * timerlat_save_stack - save a stack trace without printing
605  *
606  * Save the current stack trace without printing. The
607  * stack will be printed later, after the end of the measurement.
608  */
timerlat_save_stack(int skip)609 static void timerlat_save_stack(int skip)
610 {
611 	unsigned int size, nr_entries;
612 	struct trace_stack *fstack;
613 
614 	fstack = this_cpu_ptr(&trace_stack);
615 
616 	size = ARRAY_SIZE(fstack->calls);
617 
618 	nr_entries = stack_trace_save(fstack->calls, size, skip);
619 
620 	fstack->stack_size = nr_entries * sizeof(unsigned long);
621 	fstack->nr_entries = nr_entries;
622 
623 	return;
624 
625 }
626 
627 static void
__timerlat_dump_stack(struct trace_buffer * buffer,struct trace_stack * fstack,unsigned int size)628 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
629 {
630 	struct ring_buffer_event *event;
631 	struct stack_entry *entry;
632 
633 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
634 					  tracing_gen_ctx());
635 	if (!event)
636 		return;
637 
638 	entry = ring_buffer_event_data(event);
639 
640 	memcpy(&entry->caller, fstack->calls, size);
641 	entry->size = fstack->nr_entries;
642 
643 	trace_buffer_unlock_commit_nostack(buffer, event);
644 }
645 
646 /*
647  * timerlat_dump_stack - dump a stack trace previously saved
648  */
timerlat_dump_stack(u64 latency)649 static void timerlat_dump_stack(u64 latency)
650 {
651 	struct osnoise_instance *inst;
652 	struct trace_buffer *buffer;
653 	struct trace_stack *fstack;
654 	unsigned int size;
655 
656 	/*
657 	 * trace only if latency > print_stack config, if enabled.
658 	 */
659 	if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
660 		return;
661 
662 	preempt_disable_notrace();
663 	fstack = this_cpu_ptr(&trace_stack);
664 	size = fstack->stack_size;
665 
666 	rcu_read_lock();
667 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
668 		buffer = inst->tr->array_buffer.buffer;
669 		__timerlat_dump_stack(buffer, fstack, size);
670 
671 	}
672 	rcu_read_unlock();
673 	preempt_enable_notrace();
674 }
675 #else /* CONFIG_STACKTRACE */
676 #define timerlat_dump_stack(u64 latency) do {} while (0)
677 #define timerlat_save_stack(a) do {} while (0)
678 #endif /* CONFIG_STACKTRACE */
679 #endif /* CONFIG_TIMERLAT_TRACER */
680 
681 /*
682  * Macros to encapsulate the time capturing infrastructure.
683  */
684 #define time_get()	trace_clock_local()
685 #define time_to_us(x)	div_u64(x, 1000)
686 #define time_sub(a, b)	((a) - (b))
687 
688 /*
689  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
690  *
691  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
692  * to discount the NMI interference.
693  *
694  * See get_int_safe_duration().
695  */
696 static inline void
cond_move_irq_delta_start(struct osnoise_variables * osn_var,u64 duration)697 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
698 {
699 	if (osn_var->irq.delta_start)
700 		osn_var->irq.delta_start += duration;
701 }
702 
703 #ifndef CONFIG_PREEMPT_RT
704 /*
705  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
706  *
707  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
708  * forward to discount the interference.
709  *
710  * See get_int_safe_duration().
711  */
712 static inline void
cond_move_softirq_delta_start(struct osnoise_variables * osn_var,u64 duration)713 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
714 {
715 	if (osn_var->softirq.delta_start)
716 		osn_var->softirq.delta_start += duration;
717 }
718 #else /* CONFIG_PREEMPT_RT */
719 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
720 #endif
721 
722 /*
723  * cond_move_thread_delta_start - Forward the delta_start of a running thread
724  *
725  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
726  * is pushed forward to discount the interference.
727  *
728  * See get_int_safe_duration().
729  */
730 static inline void
cond_move_thread_delta_start(struct osnoise_variables * osn_var,u64 duration)731 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
732 {
733 	if (osn_var->thread.delta_start)
734 		osn_var->thread.delta_start += duration;
735 }
736 
737 /*
738  * get_int_safe_duration - Get the duration of a window
739  *
740  * The irq, softirq and thread varaibles need to have its duration without
741  * the interference from higher priority interrupts. Instead of keeping a
742  * variable to discount the interrupt interference from these variables, the
743  * starting time of these variables are pushed forward with the interrupt's
744  * duration. In this way, a single variable is used to:
745  *
746  *   - Know if a given window is being measured.
747  *   - Account its duration.
748  *   - Discount the interference.
749  *
750  * To avoid getting inconsistent values, e.g.,:
751  *
752  *	now = time_get()
753  *		--->	interrupt!
754  *			delta_start -= int duration;
755  *		<---
756  *	duration = now - delta_start;
757  *
758  *	result: negative duration if the variable duration before the
759  *	interrupt was smaller than the interrupt execution.
760  *
761  * A counter of interrupts is used. If the counter increased, try
762  * to capture an interference safe duration.
763  */
764 static inline s64
get_int_safe_duration(struct osnoise_variables * osn_var,u64 * delta_start)765 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
766 {
767 	u64 int_counter, now;
768 	s64 duration;
769 
770 	do {
771 		int_counter = local_read(&osn_var->int_counter);
772 		/* synchronize with interrupts */
773 		barrier();
774 
775 		now = time_get();
776 		duration = (now - *delta_start);
777 
778 		/* synchronize with interrupts */
779 		barrier();
780 	} while (int_counter != local_read(&osn_var->int_counter));
781 
782 	/*
783 	 * This is an evidence of race conditions that cause
784 	 * a value to be "discounted" too much.
785 	 */
786 	if (duration < 0)
787 		osnoise_taint("Negative duration!\n");
788 
789 	*delta_start = 0;
790 
791 	return duration;
792 }
793 
794 /*
795  *
796  * set_int_safe_time - Save the current time on *time, aware of interference
797  *
798  * Get the time, taking into consideration a possible interference from
799  * higher priority interrupts.
800  *
801  * See get_int_safe_duration() for an explanation.
802  */
803 static u64
set_int_safe_time(struct osnoise_variables * osn_var,u64 * time)804 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
805 {
806 	u64 int_counter;
807 
808 	do {
809 		int_counter = local_read(&osn_var->int_counter);
810 		/* synchronize with interrupts */
811 		barrier();
812 
813 		*time = time_get();
814 
815 		/* synchronize with interrupts */
816 		barrier();
817 	} while (int_counter != local_read(&osn_var->int_counter));
818 
819 	return int_counter;
820 }
821 
822 #ifdef CONFIG_TIMERLAT_TRACER
823 /*
824  * copy_int_safe_time - Copy *src into *desc aware of interference
825  */
826 static u64
copy_int_safe_time(struct osnoise_variables * osn_var,u64 * dst,u64 * src)827 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
828 {
829 	u64 int_counter;
830 
831 	do {
832 		int_counter = local_read(&osn_var->int_counter);
833 		/* synchronize with interrupts */
834 		barrier();
835 
836 		*dst = *src;
837 
838 		/* synchronize with interrupts */
839 		barrier();
840 	} while (int_counter != local_read(&osn_var->int_counter));
841 
842 	return int_counter;
843 }
844 #endif /* CONFIG_TIMERLAT_TRACER */
845 
846 /*
847  * trace_osnoise_callback - NMI entry/exit callback
848  *
849  * This function is called at the entry and exit NMI code. The bool enter
850  * distinguishes between either case. This function is used to note a NMI
851  * occurrence, compute the noise caused by the NMI, and to remove the noise
852  * it is potentially causing on other interference variables.
853  */
trace_osnoise_callback(bool enter)854 void trace_osnoise_callback(bool enter)
855 {
856 	struct osnoise_variables *osn_var = this_cpu_osn_var();
857 	u64 duration;
858 
859 	if (!osn_var->sampling)
860 		return;
861 
862 	/*
863 	 * Currently trace_clock_local() calls sched_clock() and the
864 	 * generic version is not NMI safe.
865 	 */
866 	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
867 		if (enter) {
868 			osn_var->nmi.delta_start = time_get();
869 			local_inc(&osn_var->int_counter);
870 		} else {
871 			duration = time_get() - osn_var->nmi.delta_start;
872 
873 			trace_nmi_noise(osn_var->nmi.delta_start, duration);
874 
875 			cond_move_irq_delta_start(osn_var, duration);
876 			cond_move_softirq_delta_start(osn_var, duration);
877 			cond_move_thread_delta_start(osn_var, duration);
878 		}
879 	}
880 
881 	if (enter)
882 		osn_var->nmi.count++;
883 }
884 
885 /*
886  * osnoise_trace_irq_entry - Note the starting of an IRQ
887  *
888  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
889  * it is safe to use a single variable (ons_var->irq) to save the statistics.
890  * The arrival_time is used to report... the arrival time. The delta_start
891  * is used to compute the duration at the IRQ exit handler. See
892  * cond_move_irq_delta_start().
893  */
osnoise_trace_irq_entry(int id)894 void osnoise_trace_irq_entry(int id)
895 {
896 	struct osnoise_variables *osn_var = this_cpu_osn_var();
897 
898 	if (!osn_var->sampling)
899 		return;
900 	/*
901 	 * This value will be used in the report, but not to compute
902 	 * the execution time, so it is safe to get it unsafe.
903 	 */
904 	osn_var->irq.arrival_time = time_get();
905 	set_int_safe_time(osn_var, &osn_var->irq.delta_start);
906 	osn_var->irq.count++;
907 
908 	local_inc(&osn_var->int_counter);
909 }
910 
911 /*
912  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
913  *
914  * Computes the duration of the IRQ noise, and trace it. Also discounts the
915  * interference from other sources of noise could be currently being accounted.
916  */
osnoise_trace_irq_exit(int id,const char * desc)917 void osnoise_trace_irq_exit(int id, const char *desc)
918 {
919 	struct osnoise_variables *osn_var = this_cpu_osn_var();
920 	s64 duration;
921 
922 	if (!osn_var->sampling)
923 		return;
924 
925 	duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
926 	trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
927 	osn_var->irq.arrival_time = 0;
928 	cond_move_softirq_delta_start(osn_var, duration);
929 	cond_move_thread_delta_start(osn_var, duration);
930 }
931 
932 /*
933  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
934  *
935  * Used to note the starting of an IRQ occurece.
936  */
trace_irqentry_callback(void * data,int irq,struct irqaction * action)937 static void trace_irqentry_callback(void *data, int irq,
938 				    struct irqaction *action)
939 {
940 	osnoise_trace_irq_entry(irq);
941 }
942 
943 /*
944  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
945  *
946  * Used to note the end of an IRQ occurece.
947  */
trace_irqexit_callback(void * data,int irq,struct irqaction * action,int ret)948 static void trace_irqexit_callback(void *data, int irq,
949 				   struct irqaction *action, int ret)
950 {
951 	osnoise_trace_irq_exit(irq, action->name);
952 }
953 
954 /*
955  * arch specific register function.
956  */
osnoise_arch_register(void)957 int __weak osnoise_arch_register(void)
958 {
959 	return 0;
960 }
961 
962 /*
963  * arch specific unregister function.
964  */
osnoise_arch_unregister(void)965 void __weak osnoise_arch_unregister(void)
966 {
967 	return;
968 }
969 
970 /*
971  * hook_irq_events - Hook IRQ handling events
972  *
973  * This function hooks the IRQ related callbacks to the respective trace
974  * events.
975  */
hook_irq_events(void)976 static int hook_irq_events(void)
977 {
978 	int ret;
979 
980 	ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
981 	if (ret)
982 		goto out_err;
983 
984 	ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
985 	if (ret)
986 		goto out_unregister_entry;
987 
988 	ret = osnoise_arch_register();
989 	if (ret)
990 		goto out_irq_exit;
991 
992 	return 0;
993 
994 out_irq_exit:
995 	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
996 out_unregister_entry:
997 	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
998 out_err:
999 	return -EINVAL;
1000 }
1001 
1002 /*
1003  * unhook_irq_events - Unhook IRQ handling events
1004  *
1005  * This function unhooks the IRQ related callbacks to the respective trace
1006  * events.
1007  */
unhook_irq_events(void)1008 static void unhook_irq_events(void)
1009 {
1010 	osnoise_arch_unregister();
1011 	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1012 	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1013 }
1014 
1015 #ifndef CONFIG_PREEMPT_RT
1016 /*
1017  * trace_softirq_entry_callback - Note the starting of a softirq
1018  *
1019  * Save the starting time of a softirq. As softirqs are non-preemptive to
1020  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1021  * to save the statistics. The arrival_time is used to report... the
1022  * arrival time. The delta_start is used to compute the duration at the
1023  * softirq exit handler. See cond_move_softirq_delta_start().
1024  */
trace_softirq_entry_callback(void * data,unsigned int vec_nr)1025 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1026 {
1027 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1028 
1029 	if (!osn_var->sampling)
1030 		return;
1031 	/*
1032 	 * This value will be used in the report, but not to compute
1033 	 * the execution time, so it is safe to get it unsafe.
1034 	 */
1035 	osn_var->softirq.arrival_time = time_get();
1036 	set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1037 	osn_var->softirq.count++;
1038 
1039 	local_inc(&osn_var->int_counter);
1040 }
1041 
1042 /*
1043  * trace_softirq_exit_callback - Note the end of an softirq
1044  *
1045  * Computes the duration of the softirq noise, and trace it. Also discounts the
1046  * interference from other sources of noise could be currently being accounted.
1047  */
trace_softirq_exit_callback(void * data,unsigned int vec_nr)1048 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1049 {
1050 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1051 	s64 duration;
1052 
1053 	if (!osn_var->sampling)
1054 		return;
1055 
1056 	if (unlikely(timerlat_enabled()))
1057 		if (!timerlat_softirq_exit(osn_var))
1058 			return;
1059 
1060 	duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1061 	trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1062 	cond_move_thread_delta_start(osn_var, duration);
1063 	osn_var->softirq.arrival_time = 0;
1064 }
1065 
1066 /*
1067  * hook_softirq_events - Hook softirq handling events
1068  *
1069  * This function hooks the softirq related callbacks to the respective trace
1070  * events.
1071  */
hook_softirq_events(void)1072 static int hook_softirq_events(void)
1073 {
1074 	int ret;
1075 
1076 	ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1077 	if (ret)
1078 		goto out_err;
1079 
1080 	ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1081 	if (ret)
1082 		goto out_unreg_entry;
1083 
1084 	return 0;
1085 
1086 out_unreg_entry:
1087 	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1088 out_err:
1089 	return -EINVAL;
1090 }
1091 
1092 /*
1093  * unhook_softirq_events - Unhook softirq handling events
1094  *
1095  * This function hooks the softirq related callbacks to the respective trace
1096  * events.
1097  */
unhook_softirq_events(void)1098 static void unhook_softirq_events(void)
1099 {
1100 	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1101 	unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102 }
1103 #else /* CONFIG_PREEMPT_RT */
1104 /*
1105  * softirq are threads on the PREEMPT_RT mode.
1106  */
hook_softirq_events(void)1107 static int hook_softirq_events(void)
1108 {
1109 	return 0;
1110 }
unhook_softirq_events(void)1111 static void unhook_softirq_events(void)
1112 {
1113 }
1114 #endif
1115 
1116 /*
1117  * thread_entry - Record the starting of a thread noise window
1118  *
1119  * It saves the context switch time for a noisy thread, and increments
1120  * the interference counters.
1121  */
1122 static void
thread_entry(struct osnoise_variables * osn_var,struct task_struct * t)1123 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1124 {
1125 	if (!osn_var->sampling)
1126 		return;
1127 	/*
1128 	 * The arrival time will be used in the report, but not to compute
1129 	 * the execution time, so it is safe to get it unsafe.
1130 	 */
1131 	osn_var->thread.arrival_time = time_get();
1132 
1133 	set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1134 
1135 	osn_var->thread.count++;
1136 	local_inc(&osn_var->int_counter);
1137 }
1138 
1139 /*
1140  * thread_exit - Report the end of a thread noise window
1141  *
1142  * It computes the total noise from a thread, tracing if needed.
1143  */
1144 static void
thread_exit(struct osnoise_variables * osn_var,struct task_struct * t)1145 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1146 {
1147 	s64 duration;
1148 
1149 	if (!osn_var->sampling)
1150 		return;
1151 
1152 	if (unlikely(timerlat_enabled()))
1153 		if (!timerlat_thread_exit(osn_var))
1154 			return;
1155 
1156 	duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1157 
1158 	trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1159 
1160 	osn_var->thread.arrival_time = 0;
1161 }
1162 
1163 #ifdef CONFIG_TIMERLAT_TRACER
1164 /*
1165  * osnoise_stop_exception - Stop tracing and the tracer.
1166  */
osnoise_stop_exception(char * msg,int cpu)1167 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1168 {
1169 	struct osnoise_instance *inst;
1170 	struct trace_array *tr;
1171 
1172 	rcu_read_lock();
1173 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1174 		tr = inst->tr;
1175 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1176 				       "stop tracing hit on cpu %d due to exception: %s\n",
1177 				       smp_processor_id(),
1178 				       msg);
1179 
1180 		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1181 			panic("tracer hit on cpu %d due to exception: %s\n",
1182 			      smp_processor_id(),
1183 			      msg);
1184 
1185 		tracer_tracing_off(tr);
1186 	}
1187 	rcu_read_unlock();
1188 }
1189 
1190 /*
1191  * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1192  *
1193  * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1194  * timerlat user-space thread migration.
1195  */
trace_sched_migrate_callback(void * data,struct task_struct * p,int dest_cpu)1196 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1197 {
1198 	struct osnoise_variables *osn_var;
1199 	long cpu = task_cpu(p);
1200 
1201 	osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1202 	if (osn_var->pid == p->pid && dest_cpu != cpu) {
1203 		per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1204 		osnoise_taint("timerlat user-thread migrated\n");
1205 		osnoise_stop_exception("timerlat user-thread migrated", cpu);
1206 	}
1207 }
1208 
1209 static bool monitor_enabled;
1210 
register_migration_monitor(void)1211 static int register_migration_monitor(void)
1212 {
1213 	int ret = 0;
1214 
1215 	/*
1216 	 * Timerlat thread migration check is only required when running timerlat in user-space.
1217 	 * Thus, enable callback only if timerlat is set with no workload.
1218 	 */
1219 	if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1220 		if (WARN_ON_ONCE(monitor_enabled))
1221 			return 0;
1222 
1223 		ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1224 		if (!ret)
1225 			monitor_enabled = true;
1226 	}
1227 
1228 	return ret;
1229 }
1230 
unregister_migration_monitor(void)1231 static void unregister_migration_monitor(void)
1232 {
1233 	if (!monitor_enabled)
1234 		return;
1235 
1236 	unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1237 	monitor_enabled = false;
1238 }
1239 #else
register_migration_monitor(void)1240 static int register_migration_monitor(void)
1241 {
1242 	return 0;
1243 }
unregister_migration_monitor(void)1244 static void unregister_migration_monitor(void) {}
1245 #endif
1246 /*
1247  * trace_sched_switch - sched:sched_switch trace event handler
1248  *
1249  * This function is hooked to the sched:sched_switch trace event, and it is
1250  * used to record the beginning and to report the end of a thread noise window.
1251  */
1252 static void
trace_sched_switch_callback(void * data,bool preempt,struct task_struct * p,struct task_struct * n,unsigned int prev_state)1253 trace_sched_switch_callback(void *data, bool preempt,
1254 			    struct task_struct *p,
1255 			    struct task_struct *n,
1256 			    unsigned int prev_state)
1257 {
1258 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1259 	int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1260 
1261 	if ((p->pid != osn_var->pid) || !workload)
1262 		thread_exit(osn_var, p);
1263 
1264 	if ((n->pid != osn_var->pid) || !workload)
1265 		thread_entry(osn_var, n);
1266 }
1267 
1268 /*
1269  * hook_thread_events - Hook the instrumentation for thread noise
1270  *
1271  * Hook the osnoise tracer callbacks to handle the noise from other
1272  * threads on the necessary kernel events.
1273  */
hook_thread_events(void)1274 static int hook_thread_events(void)
1275 {
1276 	int ret;
1277 
1278 	ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1279 	if (ret)
1280 		return -EINVAL;
1281 
1282 	ret = register_migration_monitor();
1283 	if (ret)
1284 		goto out_unreg;
1285 
1286 	return 0;
1287 
1288 out_unreg:
1289 	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1290 	return -EINVAL;
1291 }
1292 
1293 /*
1294  * unhook_thread_events - unhook the instrumentation for thread noise
1295  *
1296  * Unook the osnoise tracer callbacks to handle the noise from other
1297  * threads on the necessary kernel events.
1298  */
unhook_thread_events(void)1299 static void unhook_thread_events(void)
1300 {
1301 	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1302 	unregister_migration_monitor();
1303 }
1304 
1305 /*
1306  * save_osn_sample_stats - Save the osnoise_sample statistics
1307  *
1308  * Save the osnoise_sample statistics before the sampling phase. These
1309  * values will be used later to compute the diff betwneen the statistics
1310  * before and after the osnoise sampling.
1311  */
1312 static void
save_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1313 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1314 {
1315 	s->nmi_count = osn_var->nmi.count;
1316 	s->irq_count = osn_var->irq.count;
1317 	s->softirq_count = osn_var->softirq.count;
1318 	s->thread_count = osn_var->thread.count;
1319 }
1320 
1321 /*
1322  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1323  *
1324  * After a sample period, compute the difference on the osnoise_sample
1325  * statistics. The struct osnoise_sample *s contains the statistics saved via
1326  * save_osn_sample_stats() before the osnoise sampling.
1327  */
1328 static void
diff_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1329 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1330 {
1331 	s->nmi_count = osn_var->nmi.count - s->nmi_count;
1332 	s->irq_count = osn_var->irq.count - s->irq_count;
1333 	s->softirq_count = osn_var->softirq.count - s->softirq_count;
1334 	s->thread_count = osn_var->thread.count - s->thread_count;
1335 }
1336 
1337 /*
1338  * osnoise_stop_tracing - Stop tracing and the tracer.
1339  */
osnoise_stop_tracing(void)1340 static __always_inline void osnoise_stop_tracing(void)
1341 {
1342 	struct osnoise_instance *inst;
1343 	struct trace_array *tr;
1344 
1345 	rcu_read_lock();
1346 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1347 		tr = inst->tr;
1348 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1349 				"stop tracing hit on cpu %d\n", smp_processor_id());
1350 
1351 		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1352 			panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1353 
1354 		tracer_tracing_off(tr);
1355 	}
1356 	rcu_read_unlock();
1357 }
1358 
1359 /*
1360  * osnoise_has_tracing_on - Check if there is at least one instance on
1361  */
osnoise_has_tracing_on(void)1362 static __always_inline int osnoise_has_tracing_on(void)
1363 {
1364 	struct osnoise_instance *inst;
1365 	int trace_is_on = 0;
1366 
1367 	rcu_read_lock();
1368 	list_for_each_entry_rcu(inst, &osnoise_instances, list)
1369 		trace_is_on += tracer_tracing_is_on(inst->tr);
1370 	rcu_read_unlock();
1371 
1372 	return trace_is_on;
1373 }
1374 
1375 /*
1376  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1377  */
notify_new_max_latency(u64 latency)1378 static void notify_new_max_latency(u64 latency)
1379 {
1380 	struct osnoise_instance *inst;
1381 	struct trace_array *tr;
1382 
1383 	rcu_read_lock();
1384 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1385 		tr = inst->tr;
1386 		if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1387 			tr->max_latency = latency;
1388 			latency_fsnotify(tr);
1389 		}
1390 	}
1391 	rcu_read_unlock();
1392 }
1393 
1394 /*
1395  * run_osnoise - Sample the time and look for osnoise
1396  *
1397  * Used to capture the time, looking for potential osnoise latency repeatedly.
1398  * Different from hwlat_detector, it is called with preemption and interrupts
1399  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1400  * osnoise sampling thread, as they would do with a regular thread.
1401  */
run_osnoise(void)1402 static int run_osnoise(void)
1403 {
1404 	bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1405 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1406 	u64 start, sample, last_sample;
1407 	u64 last_int_count, int_count;
1408 	s64 noise = 0, max_noise = 0;
1409 	s64 total, last_total = 0;
1410 	struct osnoise_sample s;
1411 	bool disable_preemption;
1412 	unsigned int threshold;
1413 	u64 runtime, stop_in;
1414 	u64 sum_noise = 0;
1415 	int hw_count = 0;
1416 	int ret = -1;
1417 
1418 	/*
1419 	 * Disabling preemption is only required if IRQs are enabled,
1420 	 * and the options is set on.
1421 	 */
1422 	disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1423 
1424 	/*
1425 	 * Considers the current thread as the workload.
1426 	 */
1427 	osn_var->pid = current->pid;
1428 
1429 	/*
1430 	 * Save the current stats for the diff
1431 	 */
1432 	save_osn_sample_stats(osn_var, &s);
1433 
1434 	/*
1435 	 * if threshold is 0, use the default value of 1 us.
1436 	 */
1437 	threshold = tracing_thresh ? : 1000;
1438 
1439 	/*
1440 	 * Apply PREEMPT and IRQ disabled options.
1441 	 */
1442 	if (disable_irq)
1443 		local_irq_disable();
1444 
1445 	if (disable_preemption)
1446 		preempt_disable();
1447 
1448 	/*
1449 	 * Make sure NMIs see sampling first
1450 	 */
1451 	osn_var->sampling = true;
1452 	barrier();
1453 
1454 	/*
1455 	 * Transform the *_us config to nanoseconds to avoid the
1456 	 * division on the main loop.
1457 	 */
1458 	runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1459 	stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1460 
1461 	/*
1462 	 * Start timestemp
1463 	 */
1464 	start = time_get();
1465 
1466 	/*
1467 	 * "previous" loop.
1468 	 */
1469 	last_int_count = set_int_safe_time(osn_var, &last_sample);
1470 
1471 	do {
1472 		/*
1473 		 * Get sample!
1474 		 */
1475 		int_count = set_int_safe_time(osn_var, &sample);
1476 
1477 		noise = time_sub(sample, last_sample);
1478 
1479 		/*
1480 		 * This shouldn't happen.
1481 		 */
1482 		if (noise < 0) {
1483 			osnoise_taint("negative noise!");
1484 			goto out;
1485 		}
1486 
1487 		/*
1488 		 * Sample runtime.
1489 		 */
1490 		total = time_sub(sample, start);
1491 
1492 		/*
1493 		 * Check for possible overflows.
1494 		 */
1495 		if (total < last_total) {
1496 			osnoise_taint("total overflow!");
1497 			break;
1498 		}
1499 
1500 		last_total = total;
1501 
1502 		if (noise >= threshold) {
1503 			int interference = int_count - last_int_count;
1504 
1505 			if (noise > max_noise)
1506 				max_noise = noise;
1507 
1508 			if (!interference)
1509 				hw_count++;
1510 
1511 			sum_noise += noise;
1512 
1513 			trace_sample_threshold(last_sample, noise, interference);
1514 
1515 			if (osnoise_data.stop_tracing)
1516 				if (noise > stop_in)
1517 					osnoise_stop_tracing();
1518 		}
1519 
1520 		/*
1521 		 * In some cases, notably when running on a nohz_full CPU with
1522 		 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1523 		 * account for QSs. This will eventually cause unwarranted
1524 		 * noise as RCU forces preemption as the means of ending the
1525 		 * current grace period.  We avoid this by calling
1526 		 * rcu_momentary_eqs(), which performs a zero duration EQS
1527 		 * allowing RCU to end the current grace period. This call
1528 		 * shouldn't be wrapped inside an RCU critical section.
1529 		 *
1530 		 * Normally QSs for other cases are handled through cond_resched().
1531 		 * For simplicity, however, we call rcu_momentary_eqs() for all
1532 		 * configurations here.
1533 		 */
1534 		if (!disable_irq)
1535 			local_irq_disable();
1536 
1537 		rcu_momentary_eqs();
1538 
1539 		if (!disable_irq)
1540 			local_irq_enable();
1541 
1542 		/*
1543 		 * For the non-preemptive kernel config: let threads runs, if
1544 		 * they so wish, unless set not do to so.
1545 		 */
1546 		if (!disable_irq && !disable_preemption)
1547 			cond_resched();
1548 
1549 		last_sample = sample;
1550 		last_int_count = int_count;
1551 
1552 	} while (total < runtime && !kthread_should_stop());
1553 
1554 	/*
1555 	 * Finish the above in the view for interrupts.
1556 	 */
1557 	barrier();
1558 
1559 	osn_var->sampling = false;
1560 
1561 	/*
1562 	 * Make sure sampling data is no longer updated.
1563 	 */
1564 	barrier();
1565 
1566 	/*
1567 	 * Return to the preemptive state.
1568 	 */
1569 	if (disable_preemption)
1570 		preempt_enable();
1571 
1572 	if (disable_irq)
1573 		local_irq_enable();
1574 
1575 	/*
1576 	 * Save noise info.
1577 	 */
1578 	s.noise = time_to_us(sum_noise);
1579 	s.runtime = time_to_us(total);
1580 	s.max_sample = time_to_us(max_noise);
1581 	s.hw_count = hw_count;
1582 
1583 	/* Save interference stats info */
1584 	diff_osn_sample_stats(osn_var, &s);
1585 
1586 	record_osnoise_sample(&s);
1587 
1588 	notify_new_max_latency(max_noise);
1589 
1590 	if (osnoise_data.stop_tracing_total)
1591 		if (s.noise > osnoise_data.stop_tracing_total)
1592 			osnoise_stop_tracing();
1593 
1594 	return 0;
1595 out:
1596 	return ret;
1597 }
1598 
1599 static struct cpumask osnoise_cpumask;
1600 static struct cpumask save_cpumask;
1601 static struct cpumask kthread_cpumask;
1602 
1603 /*
1604  * osnoise_sleep - sleep until the next period
1605  */
osnoise_sleep(bool skip_period)1606 static void osnoise_sleep(bool skip_period)
1607 {
1608 	u64 interval;
1609 	ktime_t wake_time;
1610 
1611 	mutex_lock(&interface_lock);
1612 	if (skip_period)
1613 		interval = osnoise_data.sample_period;
1614 	else
1615 		interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1616 	mutex_unlock(&interface_lock);
1617 
1618 	/*
1619 	 * differently from hwlat_detector, the osnoise tracer can run
1620 	 * without a pause because preemption is on.
1621 	 */
1622 	if (!interval) {
1623 		/* Let synchronize_rcu_tasks() make progress */
1624 		cond_resched_tasks_rcu_qs();
1625 		return;
1626 	}
1627 
1628 	wake_time = ktime_add_us(ktime_get(), interval);
1629 	__set_current_state(TASK_INTERRUPTIBLE);
1630 
1631 	while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1632 		if (kthread_should_stop())
1633 			break;
1634 	}
1635 }
1636 
1637 /*
1638  * osnoise_migration_pending - checks if the task needs to migrate
1639  *
1640  * osnoise/timerlat threads are per-cpu. If there is a pending request to
1641  * migrate the thread away from the current CPU, something bad has happened.
1642  * Play the good citizen and leave.
1643  *
1644  * Returns 0 if it is safe to continue, 1 otherwise.
1645  */
osnoise_migration_pending(void)1646 static inline int osnoise_migration_pending(void)
1647 {
1648 	if (!current->migration_pending)
1649 		return 0;
1650 
1651 	/*
1652 	 * If migration is pending, there is a task waiting for the
1653 	 * tracer to enable migration. The tracer does not allow migration,
1654 	 * thus: taint and leave to unblock the blocked thread.
1655 	 */
1656 	osnoise_taint("migration requested to osnoise threads, leaving.");
1657 
1658 	/*
1659 	 * Unset this thread from the threads managed by the interface.
1660 	 * The tracers are responsible for cleaning their env before
1661 	 * exiting.
1662 	 */
1663 	mutex_lock(&interface_lock);
1664 	this_cpu_osn_var()->kthread = NULL;
1665 	cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
1666 	mutex_unlock(&interface_lock);
1667 
1668 	return 1;
1669 }
1670 
1671 /*
1672  * osnoise_main - The osnoise detection kernel thread
1673  *
1674  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1675  * every period.
1676  */
osnoise_main(void * data)1677 static int osnoise_main(void *data)
1678 {
1679 	unsigned long flags;
1680 
1681 	/*
1682 	 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1683 	 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1684 	 *
1685 	 * To work around this limitation, disable migration and remove the
1686 	 * flag.
1687 	 */
1688 	migrate_disable();
1689 	raw_spin_lock_irqsave(&current->pi_lock, flags);
1690 	current->flags &= ~(PF_NO_SETAFFINITY);
1691 	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1692 
1693 	while (!kthread_should_stop()) {
1694 		if (osnoise_migration_pending())
1695 			break;
1696 
1697 		/* skip a period if tracing is off on all instances */
1698 		if (!osnoise_has_tracing_on()) {
1699 			osnoise_sleep(true);
1700 			continue;
1701 		}
1702 
1703 		run_osnoise();
1704 		osnoise_sleep(false);
1705 	}
1706 
1707 	migrate_enable();
1708 	return 0;
1709 }
1710 
1711 #ifdef CONFIG_TIMERLAT_TRACER
1712 /*
1713  * timerlat_irq - hrtimer handler for timerlat.
1714  */
timerlat_irq(struct hrtimer * timer)1715 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1716 {
1717 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1718 	struct timerlat_variables *tlat;
1719 	struct timerlat_sample s;
1720 	u64 now;
1721 	u64 diff;
1722 
1723 	/*
1724 	 * I am not sure if the timer was armed for this CPU. So, get
1725 	 * the timerlat struct from the timer itself, not from this
1726 	 * CPU.
1727 	 */
1728 	tlat = container_of(timer, struct timerlat_variables, timer);
1729 
1730 	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1731 
1732 	/*
1733 	 * Enable the osnoise: events for thread an softirq.
1734 	 */
1735 	tlat->tracing_thread = true;
1736 
1737 	osn_var->thread.arrival_time = time_get();
1738 
1739 	/*
1740 	 * A hardirq is running: the timer IRQ. It is for sure preempting
1741 	 * a thread, and potentially preempting a softirq.
1742 	 *
1743 	 * At this point, it is not interesting to know the duration of the
1744 	 * preempted thread (and maybe softirq), but how much time they will
1745 	 * delay the beginning of the execution of the timer thread.
1746 	 *
1747 	 * To get the correct (net) delay added by the softirq, its delta_start
1748 	 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1749 	 * start of the sofitrq will be zeroed, accounting then only the time
1750 	 * after that.
1751 	 *
1752 	 * The thread follows the same principle. However, if a softirq is
1753 	 * running, the thread needs to receive the softirq delta_start. The
1754 	 * reason being is that the softirq will be the last to be unfolded,
1755 	 * resseting the thread delay to zero.
1756 	 *
1757 	 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1758 	 * on RT, moving the thread is enough.
1759 	 */
1760 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1761 		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1762 				   &osn_var->softirq.delta_start);
1763 
1764 		copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1765 				    &osn_var->irq.delta_start);
1766 	} else {
1767 		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1768 				    &osn_var->irq.delta_start);
1769 	}
1770 
1771 	/*
1772 	 * Compute the current time with the expected time.
1773 	 */
1774 	diff = now - tlat->abs_period;
1775 
1776 	tlat->count++;
1777 	s.seqnum = tlat->count;
1778 	s.timer_latency = diff;
1779 	s.context = IRQ_CONTEXT;
1780 
1781 	record_timerlat_sample(&s);
1782 
1783 	if (osnoise_data.stop_tracing) {
1784 		if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1785 
1786 			/*
1787 			 * At this point, if stop_tracing is set and <= print_stack,
1788 			 * print_stack is set and would be printed in the thread handler.
1789 			 *
1790 			 * Thus, print the stack trace as it is helpful to define the
1791 			 * root cause of an IRQ latency.
1792 			 */
1793 			if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1794 				timerlat_save_stack(0);
1795 				timerlat_dump_stack(time_to_us(diff));
1796 			}
1797 
1798 			osnoise_stop_tracing();
1799 			notify_new_max_latency(diff);
1800 
1801 			wake_up_process(tlat->kthread);
1802 
1803 			return HRTIMER_NORESTART;
1804 		}
1805 	}
1806 
1807 	wake_up_process(tlat->kthread);
1808 
1809 	if (osnoise_data.print_stack)
1810 		timerlat_save_stack(0);
1811 
1812 	return HRTIMER_NORESTART;
1813 }
1814 
1815 /*
1816  * wait_next_period - Wait for the next period for timerlat
1817  */
wait_next_period(struct timerlat_variables * tlat)1818 static int wait_next_period(struct timerlat_variables *tlat)
1819 {
1820 	ktime_t next_abs_period, now;
1821 	u64 rel_period = osnoise_data.timerlat_period * 1000;
1822 
1823 	now = hrtimer_cb_get_time(&tlat->timer);
1824 	next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1825 
1826 	/*
1827 	 * Save the next abs_period.
1828 	 */
1829 	tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1830 
1831 	/*
1832 	 * If the new abs_period is in the past, skip the activation.
1833 	 */
1834 	while (ktime_compare(now, next_abs_period) > 0) {
1835 		next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1836 		tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1837 	}
1838 
1839 	set_current_state(TASK_INTERRUPTIBLE);
1840 
1841 	hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1842 	schedule();
1843 	return 1;
1844 }
1845 
1846 /*
1847  * timerlat_main- Timerlat main
1848  */
timerlat_main(void * data)1849 static int timerlat_main(void *data)
1850 {
1851 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1852 	struct timerlat_variables *tlat = this_cpu_tmr_var();
1853 	struct timerlat_sample s;
1854 	struct sched_param sp;
1855 	unsigned long flags;
1856 	u64 now, diff;
1857 
1858 	/*
1859 	 * Make the thread RT, that is how cyclictest is usually used.
1860 	 */
1861 	sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1862 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1863 
1864 	/*
1865 	 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1866 	 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1867 	 *
1868 	 * To work around this limitation, disable migration and remove the
1869 	 * flag.
1870 	 */
1871 	migrate_disable();
1872 	raw_spin_lock_irqsave(&current->pi_lock, flags);
1873 	current->flags &= ~(PF_NO_SETAFFINITY);
1874 	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1875 
1876 	tlat->count = 0;
1877 	tlat->tracing_thread = false;
1878 
1879 	hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1880 	tlat->kthread = current;
1881 	osn_var->pid = current->pid;
1882 	/*
1883 	 * Anotate the arrival time.
1884 	 */
1885 	tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1886 
1887 	wait_next_period(tlat);
1888 
1889 	osn_var->sampling = 1;
1890 
1891 	while (!kthread_should_stop()) {
1892 
1893 		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1894 		diff = now - tlat->abs_period;
1895 
1896 		s.seqnum = tlat->count;
1897 		s.timer_latency = diff;
1898 		s.context = THREAD_CONTEXT;
1899 
1900 		record_timerlat_sample(&s);
1901 
1902 		notify_new_max_latency(diff);
1903 
1904 		timerlat_dump_stack(time_to_us(diff));
1905 
1906 		tlat->tracing_thread = false;
1907 		if (osnoise_data.stop_tracing_total)
1908 			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1909 				osnoise_stop_tracing();
1910 
1911 		if (osnoise_migration_pending())
1912 			break;
1913 
1914 		wait_next_period(tlat);
1915 	}
1916 
1917 	hrtimer_cancel(&tlat->timer);
1918 	migrate_enable();
1919 	return 0;
1920 }
1921 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_main(void * data)1922 static int timerlat_main(void *data)
1923 {
1924 	return 0;
1925 }
1926 #endif /* CONFIG_TIMERLAT_TRACER */
1927 
1928 /*
1929  * stop_kthread - stop a workload thread
1930  */
stop_kthread(unsigned int cpu)1931 static void stop_kthread(unsigned int cpu)
1932 {
1933 	struct task_struct *kthread;
1934 
1935 	kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1936 	if (kthread) {
1937 		if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1938 		    !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1939 			kthread_stop(kthread);
1940 		} else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1941 			/*
1942 			 * This is a user thread waiting on the timerlat_fd. We need
1943 			 * to close all users, and the best way to guarantee this is
1944 			 * by killing the thread. NOTE: this is a purpose specific file.
1945 			 */
1946 			kill_pid(kthread->thread_pid, SIGKILL, 1);
1947 			put_task_struct(kthread);
1948 		}
1949 	} else {
1950 		/* if no workload, just return */
1951 		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1952 			/*
1953 			 * This is set in the osnoise tracer case.
1954 			 */
1955 			per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1956 			barrier();
1957 		}
1958 	}
1959 }
1960 
1961 /*
1962  * stop_per_cpu_kthread - Stop per-cpu threads
1963  *
1964  * Stop the osnoise sampling htread. Use this on unload and at system
1965  * shutdown.
1966  */
stop_per_cpu_kthreads(void)1967 static void stop_per_cpu_kthreads(void)
1968 {
1969 	int cpu;
1970 
1971 	cpus_read_lock();
1972 
1973 	for_each_online_cpu(cpu)
1974 		stop_kthread(cpu);
1975 
1976 	cpus_read_unlock();
1977 }
1978 
1979 /*
1980  * start_kthread - Start a workload tread
1981  */
start_kthread(unsigned int cpu)1982 static int start_kthread(unsigned int cpu)
1983 {
1984 	struct task_struct *kthread;
1985 	void *main = osnoise_main;
1986 	char comm[24];
1987 
1988 	/* Do not start a new thread if it is already running */
1989 	if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1990 		return 0;
1991 
1992 	if (timerlat_enabled()) {
1993 		snprintf(comm, 24, "timerlat/%d", cpu);
1994 		main = timerlat_main;
1995 	} else {
1996 		/* if no workload, just return */
1997 		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1998 			per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
1999 			barrier();
2000 			return 0;
2001 		}
2002 		snprintf(comm, 24, "osnoise/%d", cpu);
2003 	}
2004 
2005 	kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2006 
2007 	if (IS_ERR(kthread)) {
2008 		pr_err(BANNER "could not start sampling thread\n");
2009 		return -ENOMEM;
2010 	}
2011 
2012 	per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2013 	cpumask_set_cpu(cpu, &kthread_cpumask);
2014 
2015 	return 0;
2016 }
2017 
2018 /*
2019  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2020  *
2021  * This starts the kernel thread that will look for osnoise on many
2022  * cpus.
2023  */
start_per_cpu_kthreads(void)2024 static int start_per_cpu_kthreads(void)
2025 {
2026 	struct cpumask *current_mask = &save_cpumask;
2027 	int retval = 0;
2028 	int cpu;
2029 
2030 	if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2031 		if (timerlat_enabled())
2032 			return 0;
2033 	}
2034 
2035 	cpus_read_lock();
2036 	/*
2037 	 * Run only on online CPUs in which osnoise is allowed to run.
2038 	 */
2039 	cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2040 
2041 	for_each_possible_cpu(cpu) {
2042 		if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2043 			struct task_struct *kthread;
2044 
2045 			kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2046 			if (!WARN_ON(!kthread))
2047 				kthread_stop(kthread);
2048 		}
2049 	}
2050 
2051 	for_each_cpu(cpu, current_mask) {
2052 		retval = start_kthread(cpu);
2053 		if (retval) {
2054 			cpus_read_unlock();
2055 			stop_per_cpu_kthreads();
2056 			return retval;
2057 		}
2058 	}
2059 
2060 	cpus_read_unlock();
2061 
2062 	return retval;
2063 }
2064 
2065 #ifdef CONFIG_HOTPLUG_CPU
osnoise_hotplug_workfn(struct work_struct * dummy)2066 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2067 {
2068 	unsigned int cpu = smp_processor_id();
2069 
2070 	guard(mutex)(&trace_types_lock);
2071 
2072 	if (!osnoise_has_registered_instances())
2073 		return;
2074 
2075 	guard(mutex)(&interface_lock);
2076 	guard(cpus_read_lock)();
2077 
2078 	if (!cpu_online(cpu))
2079 		return;
2080 
2081 	if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2082 		return;
2083 
2084 	start_kthread(cpu);
2085 }
2086 
2087 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2088 
2089 /*
2090  * osnoise_cpu_init - CPU hotplug online callback function
2091  */
osnoise_cpu_init(unsigned int cpu)2092 static int osnoise_cpu_init(unsigned int cpu)
2093 {
2094 	schedule_work_on(cpu, &osnoise_hotplug_work);
2095 	return 0;
2096 }
2097 
2098 /*
2099  * osnoise_cpu_die - CPU hotplug offline callback function
2100  */
osnoise_cpu_die(unsigned int cpu)2101 static int osnoise_cpu_die(unsigned int cpu)
2102 {
2103 	stop_kthread(cpu);
2104 	return 0;
2105 }
2106 
osnoise_init_hotplug_support(void)2107 static void osnoise_init_hotplug_support(void)
2108 {
2109 	int ret;
2110 
2111 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2112 				osnoise_cpu_init, osnoise_cpu_die);
2113 	if (ret < 0)
2114 		pr_warn(BANNER "Error to init cpu hotplug support\n");
2115 
2116 	return;
2117 }
2118 #else /* CONFIG_HOTPLUG_CPU */
osnoise_init_hotplug_support(void)2119 static void osnoise_init_hotplug_support(void)
2120 {
2121 	return;
2122 }
2123 #endif /* CONFIG_HOTPLUG_CPU */
2124 
2125 /*
2126  * seq file functions for the osnoise/options file.
2127  */
s_options_start(struct seq_file * s,loff_t * pos)2128 static void *s_options_start(struct seq_file *s, loff_t *pos)
2129 {
2130 	int option = *pos;
2131 
2132 	mutex_lock(&interface_lock);
2133 
2134 	if (option >= OSN_MAX)
2135 		return NULL;
2136 
2137 	return pos;
2138 }
2139 
s_options_next(struct seq_file * s,void * v,loff_t * pos)2140 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2141 {
2142 	int option = ++(*pos);
2143 
2144 	if (option >= OSN_MAX)
2145 		return NULL;
2146 
2147 	return pos;
2148 }
2149 
s_options_show(struct seq_file * s,void * v)2150 static int s_options_show(struct seq_file *s, void *v)
2151 {
2152 	loff_t *pos = v;
2153 	int option = *pos;
2154 
2155 	if (option == OSN_DEFAULTS) {
2156 		if (osnoise_options == OSN_DEFAULT_OPTIONS)
2157 			seq_printf(s, "%s", osnoise_options_str[option]);
2158 		else
2159 			seq_printf(s, "NO_%s", osnoise_options_str[option]);
2160 		goto out;
2161 	}
2162 
2163 	if (test_bit(option, &osnoise_options))
2164 		seq_printf(s, "%s", osnoise_options_str[option]);
2165 	else
2166 		seq_printf(s, "NO_%s", osnoise_options_str[option]);
2167 
2168 out:
2169 	if (option != OSN_MAX)
2170 		seq_puts(s, " ");
2171 
2172 	return 0;
2173 }
2174 
s_options_stop(struct seq_file * s,void * v)2175 static void s_options_stop(struct seq_file *s, void *v)
2176 {
2177 	seq_puts(s, "\n");
2178 	mutex_unlock(&interface_lock);
2179 }
2180 
2181 static const struct seq_operations osnoise_options_seq_ops = {
2182 	.start		= s_options_start,
2183 	.next		= s_options_next,
2184 	.show		= s_options_show,
2185 	.stop		= s_options_stop
2186 };
2187 
osnoise_options_open(struct inode * inode,struct file * file)2188 static int osnoise_options_open(struct inode *inode, struct file *file)
2189 {
2190 	return seq_open(file, &osnoise_options_seq_ops);
2191 };
2192 
2193 /**
2194  * osnoise_options_write - Write function for "options" entry
2195  * @filp: The active open file structure
2196  * @ubuf: The user buffer that contains the value to write
2197  * @cnt: The maximum number of bytes to write to "file"
2198  * @ppos: The current position in @file
2199  *
2200  * Writing the option name sets the option, writing the "NO_"
2201  * prefix in front of the option name disables it.
2202  *
2203  * Writing "DEFAULTS" resets the option values to the default ones.
2204  */
osnoise_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2205 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2206 				     size_t cnt, loff_t *ppos)
2207 {
2208 	int running, option, enable, retval;
2209 	char buf[256], *option_str;
2210 
2211 	if (cnt >= 256)
2212 		return -EINVAL;
2213 
2214 	if (copy_from_user(buf, ubuf, cnt))
2215 		return -EFAULT;
2216 
2217 	buf[cnt] = 0;
2218 
2219 	if (strncmp(buf, "NO_", 3)) {
2220 		option_str = strstrip(buf);
2221 		enable = true;
2222 	} else {
2223 		option_str = strstrip(&buf[3]);
2224 		enable = false;
2225 	}
2226 
2227 	option = match_string(osnoise_options_str, OSN_MAX, option_str);
2228 	if (option < 0)
2229 		return -EINVAL;
2230 
2231 	/*
2232 	 * trace_types_lock is taken to avoid concurrency on start/stop.
2233 	 */
2234 	mutex_lock(&trace_types_lock);
2235 	running = osnoise_has_registered_instances();
2236 	if (running)
2237 		stop_per_cpu_kthreads();
2238 
2239 	mutex_lock(&interface_lock);
2240 	/*
2241 	 * avoid CPU hotplug operations that might read options.
2242 	 */
2243 	cpus_read_lock();
2244 
2245 	retval = cnt;
2246 
2247 	if (enable) {
2248 		if (option == OSN_DEFAULTS)
2249 			osnoise_options = OSN_DEFAULT_OPTIONS;
2250 		else
2251 			set_bit(option, &osnoise_options);
2252 	} else {
2253 		if (option == OSN_DEFAULTS)
2254 			retval = -EINVAL;
2255 		else
2256 			clear_bit(option, &osnoise_options);
2257 	}
2258 
2259 	cpus_read_unlock();
2260 	mutex_unlock(&interface_lock);
2261 
2262 	if (running)
2263 		start_per_cpu_kthreads();
2264 	mutex_unlock(&trace_types_lock);
2265 
2266 	return retval;
2267 }
2268 
2269 /*
2270  * osnoise_cpus_read - Read function for reading the "cpus" file
2271  * @filp: The active open file structure
2272  * @ubuf: The userspace provided buffer to read value into
2273  * @cnt: The maximum number of bytes to read
2274  * @ppos: The current "file" position
2275  *
2276  * Prints the "cpus" output into the user-provided buffer.
2277  */
2278 static ssize_t
osnoise_cpus_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)2279 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2280 		  loff_t *ppos)
2281 {
2282 	char *mask_str __free(kfree) = NULL;
2283 	int len;
2284 
2285 	guard(mutex)(&interface_lock);
2286 
2287 	len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2288 	mask_str = kmalloc(len, GFP_KERNEL);
2289 	if (!mask_str)
2290 		return -ENOMEM;
2291 
2292 	len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2293 	if (len >= count)
2294 		return -EINVAL;
2295 
2296 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2297 
2298 	return count;
2299 }
2300 
2301 /*
2302  * osnoise_cpus_write - Write function for "cpus" entry
2303  * @filp: The active open file structure
2304  * @ubuf: The user buffer that contains the value to write
2305  * @cnt: The maximum number of bytes to write to "file"
2306  * @ppos: The current position in @file
2307  *
2308  * This function provides a write implementation for the "cpus"
2309  * interface to the osnoise trace. By default, it lists all  CPUs,
2310  * in this way, allowing osnoise threads to run on any online CPU
2311  * of the system. It serves to restrict the execution of osnoise to the
2312  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2313  * Because the user might be interested in tracing what is running on
2314  * other CPUs. For instance, one might run osnoise in one HT CPU
2315  * while observing what is running on the sibling HT CPU.
2316  */
2317 static ssize_t
osnoise_cpus_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2318 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2319 		   loff_t *ppos)
2320 {
2321 	cpumask_var_t osnoise_cpumask_new;
2322 	int running, err;
2323 	char buf[256];
2324 
2325 	if (count >= 256)
2326 		return -EINVAL;
2327 
2328 	if (copy_from_user(buf, ubuf, count))
2329 		return -EFAULT;
2330 
2331 	if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2332 		return -ENOMEM;
2333 
2334 	err = cpulist_parse(buf, osnoise_cpumask_new);
2335 	if (err)
2336 		goto err_free;
2337 
2338 	/*
2339 	 * trace_types_lock is taken to avoid concurrency on start/stop.
2340 	 */
2341 	mutex_lock(&trace_types_lock);
2342 	running = osnoise_has_registered_instances();
2343 	if (running)
2344 		stop_per_cpu_kthreads();
2345 
2346 	mutex_lock(&interface_lock);
2347 	/*
2348 	 * osnoise_cpumask is read by CPU hotplug operations.
2349 	 */
2350 	cpus_read_lock();
2351 
2352 	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2353 
2354 	cpus_read_unlock();
2355 	mutex_unlock(&interface_lock);
2356 
2357 	if (running)
2358 		start_per_cpu_kthreads();
2359 	mutex_unlock(&trace_types_lock);
2360 
2361 	free_cpumask_var(osnoise_cpumask_new);
2362 	return count;
2363 
2364 err_free:
2365 	free_cpumask_var(osnoise_cpumask_new);
2366 
2367 	return err;
2368 }
2369 
2370 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_fd_open(struct inode * inode,struct file * file)2371 static int timerlat_fd_open(struct inode *inode, struct file *file)
2372 {
2373 	struct osnoise_variables *osn_var;
2374 	struct timerlat_variables *tlat;
2375 	long cpu = (long) inode->i_cdev;
2376 
2377 	mutex_lock(&interface_lock);
2378 
2379 	/*
2380 	 * This file is accessible only if timerlat is enabled, and
2381 	 * NO_OSNOISE_WORKLOAD is set.
2382 	 */
2383 	if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2384 		mutex_unlock(&interface_lock);
2385 		return -EINVAL;
2386 	}
2387 
2388 	migrate_disable();
2389 
2390 	osn_var = this_cpu_osn_var();
2391 
2392 	/*
2393 	 * The osn_var->pid holds the single access to this file.
2394 	 */
2395 	if (osn_var->pid) {
2396 		mutex_unlock(&interface_lock);
2397 		migrate_enable();
2398 		return -EBUSY;
2399 	}
2400 
2401 	/*
2402 	 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2403 	 * is pinned to a single CPU. The tracer laters monitor if the task
2404 	 * migrates and then disables tracer if it does. However, it is
2405 	 * worth doing this basic acceptance test to avoid obviusly wrong
2406 	 * setup.
2407 	 */
2408 	if (current->nr_cpus_allowed > 1 ||  cpu != smp_processor_id()) {
2409 		mutex_unlock(&interface_lock);
2410 		migrate_enable();
2411 		return -EPERM;
2412 	}
2413 
2414 	/*
2415 	 * From now on, it is good to go.
2416 	 */
2417 	file->private_data = inode->i_cdev;
2418 
2419 	get_task_struct(current);
2420 
2421 	osn_var->kthread = current;
2422 	osn_var->pid = current->pid;
2423 
2424 	/*
2425 	 * Setup is done.
2426 	 */
2427 	mutex_unlock(&interface_lock);
2428 
2429 	tlat = this_cpu_tmr_var();
2430 	tlat->count = 0;
2431 
2432 	hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2433 
2434 	migrate_enable();
2435 	return 0;
2436 };
2437 
2438 /*
2439  * timerlat_fd_read - Read function for "timerlat_fd" file
2440  * @file: The active open file structure
2441  * @ubuf: The userspace provided buffer to read value into
2442  * @cnt: The maximum number of bytes to read
2443  * @ppos: The current "file" position
2444  *
2445  * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2446  */
2447 static ssize_t
timerlat_fd_read(struct file * file,char __user * ubuf,size_t count,loff_t * ppos)2448 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2449 		  loff_t *ppos)
2450 {
2451 	long cpu = (long) file->private_data;
2452 	struct osnoise_variables *osn_var;
2453 	struct timerlat_variables *tlat;
2454 	struct timerlat_sample s;
2455 	s64 diff;
2456 	u64 now;
2457 
2458 	migrate_disable();
2459 
2460 	tlat = this_cpu_tmr_var();
2461 
2462 	/*
2463 	 * While in user-space, the thread is migratable. There is nothing
2464 	 * we can do about it.
2465 	 * So, if the thread is running on another CPU, stop the machinery.
2466 	 */
2467 	if (cpu == smp_processor_id()) {
2468 		if (tlat->uthread_migrate) {
2469 			migrate_enable();
2470 			return -EINVAL;
2471 		}
2472 	} else {
2473 		per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2474 		osnoise_taint("timerlat user thread migrate\n");
2475 		osnoise_stop_tracing();
2476 		migrate_enable();
2477 		return -EINVAL;
2478 	}
2479 
2480 	osn_var = this_cpu_osn_var();
2481 
2482 	/*
2483 	 * The timerlat in user-space runs in a different order:
2484 	 * the read() starts from the execution of the previous occurrence,
2485 	 * sleeping for the next occurrence.
2486 	 *
2487 	 * So, skip if we are entering on read() before the first wakeup
2488 	 * from timerlat IRQ:
2489 	 */
2490 	if (likely(osn_var->sampling)) {
2491 		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2492 		diff = now - tlat->abs_period;
2493 
2494 		/*
2495 		 * it was not a timer firing, but some other signal?
2496 		 */
2497 		if (diff < 0)
2498 			goto out;
2499 
2500 		s.seqnum = tlat->count;
2501 		s.timer_latency = diff;
2502 		s.context = THREAD_URET;
2503 
2504 		record_timerlat_sample(&s);
2505 
2506 		notify_new_max_latency(diff);
2507 
2508 		tlat->tracing_thread = false;
2509 		if (osnoise_data.stop_tracing_total)
2510 			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2511 				osnoise_stop_tracing();
2512 	} else {
2513 		tlat->tracing_thread = false;
2514 		tlat->kthread = current;
2515 
2516 		/* Annotate now to drift new period */
2517 		tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2518 
2519 		osn_var->sampling = 1;
2520 	}
2521 
2522 	/* wait for the next period */
2523 	wait_next_period(tlat);
2524 
2525 	/* This is the wakeup from this cycle */
2526 	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2527 	diff = now - tlat->abs_period;
2528 
2529 	/*
2530 	 * it was not a timer firing, but some other signal?
2531 	 */
2532 	if (diff < 0)
2533 		goto out;
2534 
2535 	s.seqnum = tlat->count;
2536 	s.timer_latency = diff;
2537 	s.context = THREAD_CONTEXT;
2538 
2539 	record_timerlat_sample(&s);
2540 
2541 	if (osnoise_data.stop_tracing_total) {
2542 		if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2543 			timerlat_dump_stack(time_to_us(diff));
2544 			notify_new_max_latency(diff);
2545 			osnoise_stop_tracing();
2546 		}
2547 	}
2548 
2549 out:
2550 	migrate_enable();
2551 	return 0;
2552 }
2553 
timerlat_fd_release(struct inode * inode,struct file * file)2554 static int timerlat_fd_release(struct inode *inode, struct file *file)
2555 {
2556 	struct osnoise_variables *osn_var;
2557 	struct timerlat_variables *tlat_var;
2558 	long cpu = (long) file->private_data;
2559 
2560 	migrate_disable();
2561 	mutex_lock(&interface_lock);
2562 
2563 	osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2564 	tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2565 
2566 	if (tlat_var->kthread)
2567 		hrtimer_cancel(&tlat_var->timer);
2568 	memset(tlat_var, 0, sizeof(*tlat_var));
2569 
2570 	osn_var->sampling = 0;
2571 	osn_var->pid = 0;
2572 
2573 	/*
2574 	 * We are leaving, not being stopped... see stop_kthread();
2575 	 */
2576 	if (osn_var->kthread) {
2577 		put_task_struct(osn_var->kthread);
2578 		osn_var->kthread = NULL;
2579 	}
2580 
2581 	mutex_unlock(&interface_lock);
2582 	migrate_enable();
2583 	return 0;
2584 }
2585 #endif
2586 
2587 /*
2588  * osnoise/runtime_us: cannot be greater than the period.
2589  */
2590 static struct trace_min_max_param osnoise_runtime = {
2591 	.lock	= &interface_lock,
2592 	.val	= &osnoise_data.sample_runtime,
2593 	.max	= &osnoise_data.sample_period,
2594 	.min	= NULL,
2595 };
2596 
2597 /*
2598  * osnoise/period_us: cannot be smaller than the runtime.
2599  */
2600 static struct trace_min_max_param osnoise_period = {
2601 	.lock	= &interface_lock,
2602 	.val	= &osnoise_data.sample_period,
2603 	.max	= NULL,
2604 	.min	= &osnoise_data.sample_runtime,
2605 };
2606 
2607 /*
2608  * osnoise/stop_tracing_us: no limit.
2609  */
2610 static struct trace_min_max_param osnoise_stop_tracing_in = {
2611 	.lock	= &interface_lock,
2612 	.val	= &osnoise_data.stop_tracing,
2613 	.max	= NULL,
2614 	.min	= NULL,
2615 };
2616 
2617 /*
2618  * osnoise/stop_tracing_total_us: no limit.
2619  */
2620 static struct trace_min_max_param osnoise_stop_tracing_total = {
2621 	.lock	= &interface_lock,
2622 	.val	= &osnoise_data.stop_tracing_total,
2623 	.max	= NULL,
2624 	.min	= NULL,
2625 };
2626 
2627 #ifdef CONFIG_TIMERLAT_TRACER
2628 /*
2629  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2630  * latency is higher than val.
2631  */
2632 static struct trace_min_max_param osnoise_print_stack = {
2633 	.lock	= &interface_lock,
2634 	.val	= &osnoise_data.print_stack,
2635 	.max	= NULL,
2636 	.min	= NULL,
2637 };
2638 
2639 /*
2640  * osnoise/timerlat_period: min 100 us, max 1 s
2641  */
2642 static u64 timerlat_min_period = 100;
2643 static u64 timerlat_max_period = 1000000;
2644 static struct trace_min_max_param timerlat_period = {
2645 	.lock	= &interface_lock,
2646 	.val	= &osnoise_data.timerlat_period,
2647 	.max	= &timerlat_max_period,
2648 	.min	= &timerlat_min_period,
2649 };
2650 
2651 static const struct file_operations timerlat_fd_fops = {
2652 	.open		= timerlat_fd_open,
2653 	.read		= timerlat_fd_read,
2654 	.release	= timerlat_fd_release,
2655 	.llseek		= generic_file_llseek,
2656 };
2657 #endif
2658 
2659 static const struct file_operations cpus_fops = {
2660 	.open		= tracing_open_generic,
2661 	.read		= osnoise_cpus_read,
2662 	.write		= osnoise_cpus_write,
2663 	.llseek		= generic_file_llseek,
2664 };
2665 
2666 static const struct file_operations osnoise_options_fops = {
2667 	.open		= osnoise_options_open,
2668 	.read		= seq_read,
2669 	.llseek		= seq_lseek,
2670 	.release	= seq_release,
2671 	.write		= osnoise_options_write
2672 };
2673 
2674 #ifdef CONFIG_TIMERLAT_TRACER
2675 #ifdef CONFIG_STACKTRACE
init_timerlat_stack_tracefs(struct dentry * top_dir)2676 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2677 {
2678 	struct dentry *tmp;
2679 
2680 	tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2681 				  &osnoise_print_stack, &trace_min_max_fops);
2682 	if (!tmp)
2683 		return -ENOMEM;
2684 
2685 	return 0;
2686 }
2687 #else /* CONFIG_STACKTRACE */
init_timerlat_stack_tracefs(struct dentry * top_dir)2688 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2689 {
2690 	return 0;
2691 }
2692 #endif /* CONFIG_STACKTRACE */
2693 
osnoise_create_cpu_timerlat_fd(struct dentry * top_dir)2694 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2695 {
2696 	struct dentry *timerlat_fd;
2697 	struct dentry *per_cpu;
2698 	struct dentry *cpu_dir;
2699 	char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2700 	long cpu;
2701 
2702 	/*
2703 	 * Why not using tracing instance per_cpu/ dir?
2704 	 *
2705 	 * Because osnoise/timerlat have a single workload, having
2706 	 * multiple files like these are wast of memory.
2707 	 */
2708 	per_cpu = tracefs_create_dir("per_cpu", top_dir);
2709 	if (!per_cpu)
2710 		return -ENOMEM;
2711 
2712 	for_each_possible_cpu(cpu) {
2713 		snprintf(cpu_str, 30, "cpu%ld", cpu);
2714 		cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2715 		if (!cpu_dir)
2716 			goto out_clean;
2717 
2718 		timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2719 						cpu_dir, NULL, &timerlat_fd_fops);
2720 		if (!timerlat_fd)
2721 			goto out_clean;
2722 
2723 		/* Record the CPU */
2724 		d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2725 	}
2726 
2727 	return 0;
2728 
2729 out_clean:
2730 	tracefs_remove(per_cpu);
2731 	return -ENOMEM;
2732 }
2733 
2734 /*
2735  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2736  */
init_timerlat_tracefs(struct dentry * top_dir)2737 static int init_timerlat_tracefs(struct dentry *top_dir)
2738 {
2739 	struct dentry *tmp;
2740 	int retval;
2741 
2742 	tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2743 				  &timerlat_period, &trace_min_max_fops);
2744 	if (!tmp)
2745 		return -ENOMEM;
2746 
2747 	retval = osnoise_create_cpu_timerlat_fd(top_dir);
2748 	if (retval)
2749 		return retval;
2750 
2751 	return init_timerlat_stack_tracefs(top_dir);
2752 }
2753 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracefs(struct dentry * top_dir)2754 static int init_timerlat_tracefs(struct dentry *top_dir)
2755 {
2756 	return 0;
2757 }
2758 #endif /* CONFIG_TIMERLAT_TRACER */
2759 
2760 /*
2761  * init_tracefs - A function to initialize the tracefs interface files
2762  *
2763  * This function creates entries in tracefs for "osnoise" and "timerlat".
2764  * It creates these directories in the tracing directory, and within that
2765  * directory the use can change and view the configs.
2766  */
init_tracefs(void)2767 static int init_tracefs(void)
2768 {
2769 	struct dentry *top_dir;
2770 	struct dentry *tmp;
2771 	int ret;
2772 
2773 	ret = tracing_init_dentry();
2774 	if (ret)
2775 		return -ENOMEM;
2776 
2777 	top_dir = tracefs_create_dir("osnoise", NULL);
2778 	if (!top_dir)
2779 		return 0;
2780 
2781 	tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2782 				  &osnoise_period, &trace_min_max_fops);
2783 	if (!tmp)
2784 		goto err;
2785 
2786 	tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2787 				  &osnoise_runtime, &trace_min_max_fops);
2788 	if (!tmp)
2789 		goto err;
2790 
2791 	tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2792 				  &osnoise_stop_tracing_in, &trace_min_max_fops);
2793 	if (!tmp)
2794 		goto err;
2795 
2796 	tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2797 				  &osnoise_stop_tracing_total, &trace_min_max_fops);
2798 	if (!tmp)
2799 		goto err;
2800 
2801 	tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2802 	if (!tmp)
2803 		goto err;
2804 
2805 	tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2806 				&osnoise_options_fops);
2807 	if (!tmp)
2808 		goto err;
2809 
2810 	ret = init_timerlat_tracefs(top_dir);
2811 	if (ret)
2812 		goto err;
2813 
2814 	return 0;
2815 
2816 err:
2817 	tracefs_remove(top_dir);
2818 	return -ENOMEM;
2819 }
2820 
osnoise_hook_events(void)2821 static int osnoise_hook_events(void)
2822 {
2823 	int retval;
2824 
2825 	/*
2826 	 * Trace is already hooked, we are re-enabling from
2827 	 * a stop_tracing_*.
2828 	 */
2829 	if (trace_osnoise_callback_enabled)
2830 		return 0;
2831 
2832 	retval = hook_irq_events();
2833 	if (retval)
2834 		return -EINVAL;
2835 
2836 	retval = hook_softirq_events();
2837 	if (retval)
2838 		goto out_unhook_irq;
2839 
2840 	retval = hook_thread_events();
2841 	/*
2842 	 * All fine!
2843 	 */
2844 	if (!retval)
2845 		return 0;
2846 
2847 	unhook_softirq_events();
2848 out_unhook_irq:
2849 	unhook_irq_events();
2850 	return -EINVAL;
2851 }
2852 
osnoise_unhook_events(void)2853 static void osnoise_unhook_events(void)
2854 {
2855 	unhook_thread_events();
2856 	unhook_softirq_events();
2857 	unhook_irq_events();
2858 }
2859 
2860 /*
2861  * osnoise_workload_start - start the workload and hook to events
2862  */
osnoise_workload_start(void)2863 static int osnoise_workload_start(void)
2864 {
2865 	int retval;
2866 
2867 	/*
2868 	 * Instances need to be registered after calling workload
2869 	 * start. Hence, if there is already an instance, the
2870 	 * workload was already registered. Otherwise, this
2871 	 * code is on the way to register the first instance,
2872 	 * and the workload will start.
2873 	 */
2874 	if (osnoise_has_registered_instances())
2875 		return 0;
2876 
2877 	osn_var_reset_all();
2878 
2879 	retval = osnoise_hook_events();
2880 	if (retval)
2881 		return retval;
2882 
2883 	/*
2884 	 * Make sure that ftrace_nmi_enter/exit() see reset values
2885 	 * before enabling trace_osnoise_callback_enabled.
2886 	 */
2887 	barrier();
2888 	trace_osnoise_callback_enabled = true;
2889 
2890 	retval = start_per_cpu_kthreads();
2891 	if (retval) {
2892 		trace_osnoise_callback_enabled = false;
2893 		/*
2894 		 * Make sure that ftrace_nmi_enter/exit() see
2895 		 * trace_osnoise_callback_enabled as false before continuing.
2896 		 */
2897 		barrier();
2898 
2899 		osnoise_unhook_events();
2900 		return retval;
2901 	}
2902 
2903 	return 0;
2904 }
2905 
2906 /*
2907  * osnoise_workload_stop - stop the workload and unhook the events
2908  */
osnoise_workload_stop(void)2909 static void osnoise_workload_stop(void)
2910 {
2911 	/*
2912 	 * Instances need to be unregistered before calling
2913 	 * stop. Hence, if there is a registered instance, more
2914 	 * than one instance is running, and the workload will not
2915 	 * yet stop. Otherwise, this code is on the way to disable
2916 	 * the last instance, and the workload can stop.
2917 	 */
2918 	if (osnoise_has_registered_instances())
2919 		return;
2920 
2921 	/*
2922 	 * If callbacks were already disabled in a previous stop
2923 	 * call, there is no need to disable then again.
2924 	 *
2925 	 * For instance, this happens when tracing is stopped via:
2926 	 * echo 0 > tracing_on
2927 	 * echo nop > current_tracer.
2928 	 */
2929 	if (!trace_osnoise_callback_enabled)
2930 		return;
2931 
2932 	trace_osnoise_callback_enabled = false;
2933 	/*
2934 	 * Make sure that ftrace_nmi_enter/exit() see
2935 	 * trace_osnoise_callback_enabled as false before continuing.
2936 	 */
2937 	barrier();
2938 
2939 	stop_per_cpu_kthreads();
2940 
2941 	osnoise_unhook_events();
2942 }
2943 
osnoise_tracer_start(struct trace_array * tr)2944 static void osnoise_tracer_start(struct trace_array *tr)
2945 {
2946 	int retval;
2947 
2948 	/*
2949 	 * If the instance is already registered, there is no need to
2950 	 * register it again.
2951 	 */
2952 	if (osnoise_instance_registered(tr))
2953 		return;
2954 
2955 	retval = osnoise_workload_start();
2956 	if (retval)
2957 		pr_err(BANNER "Error starting osnoise tracer\n");
2958 
2959 	osnoise_register_instance(tr);
2960 }
2961 
osnoise_tracer_stop(struct trace_array * tr)2962 static void osnoise_tracer_stop(struct trace_array *tr)
2963 {
2964 	osnoise_unregister_instance(tr);
2965 	osnoise_workload_stop();
2966 }
2967 
osnoise_tracer_init(struct trace_array * tr)2968 static int osnoise_tracer_init(struct trace_array *tr)
2969 {
2970 	/*
2971 	 * Only allow osnoise tracer if timerlat tracer is not running
2972 	 * already.
2973 	 */
2974 	if (timerlat_enabled())
2975 		return -EBUSY;
2976 
2977 	tr->max_latency = 0;
2978 
2979 	osnoise_tracer_start(tr);
2980 	return 0;
2981 }
2982 
osnoise_tracer_reset(struct trace_array * tr)2983 static void osnoise_tracer_reset(struct trace_array *tr)
2984 {
2985 	osnoise_tracer_stop(tr);
2986 }
2987 
2988 static struct tracer osnoise_tracer __read_mostly = {
2989 	.name		= "osnoise",
2990 	.init		= osnoise_tracer_init,
2991 	.reset		= osnoise_tracer_reset,
2992 	.start		= osnoise_tracer_start,
2993 	.stop		= osnoise_tracer_stop,
2994 	.print_header	= print_osnoise_headers,
2995 	.allow_instances = true,
2996 };
2997 
2998 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_tracer_start(struct trace_array * tr)2999 static void timerlat_tracer_start(struct trace_array *tr)
3000 {
3001 	int retval;
3002 
3003 	/*
3004 	 * If the instance is already registered, there is no need to
3005 	 * register it again.
3006 	 */
3007 	if (osnoise_instance_registered(tr))
3008 		return;
3009 
3010 	retval = osnoise_workload_start();
3011 	if (retval)
3012 		pr_err(BANNER "Error starting timerlat tracer\n");
3013 
3014 	osnoise_register_instance(tr);
3015 
3016 	return;
3017 }
3018 
timerlat_tracer_stop(struct trace_array * tr)3019 static void timerlat_tracer_stop(struct trace_array *tr)
3020 {
3021 	int cpu;
3022 
3023 	osnoise_unregister_instance(tr);
3024 
3025 	/*
3026 	 * Instruct the threads to stop only if this is the last instance.
3027 	 */
3028 	if (!osnoise_has_registered_instances()) {
3029 		for_each_online_cpu(cpu)
3030 			per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3031 	}
3032 
3033 	osnoise_workload_stop();
3034 }
3035 
timerlat_tracer_init(struct trace_array * tr)3036 static int timerlat_tracer_init(struct trace_array *tr)
3037 {
3038 	/*
3039 	 * Only allow timerlat tracer if osnoise tracer is not running already.
3040 	 */
3041 	if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3042 		return -EBUSY;
3043 
3044 	/*
3045 	 * If this is the first instance, set timerlat_tracer to block
3046 	 * osnoise tracer start.
3047 	 */
3048 	if (!osnoise_has_registered_instances())
3049 		osnoise_data.timerlat_tracer = 1;
3050 
3051 	tr->max_latency = 0;
3052 	timerlat_tracer_start(tr);
3053 
3054 	return 0;
3055 }
3056 
timerlat_tracer_reset(struct trace_array * tr)3057 static void timerlat_tracer_reset(struct trace_array *tr)
3058 {
3059 	timerlat_tracer_stop(tr);
3060 
3061 	/*
3062 	 * If this is the last instance, reset timerlat_tracer allowing
3063 	 * osnoise to be started.
3064 	 */
3065 	if (!osnoise_has_registered_instances())
3066 		osnoise_data.timerlat_tracer = 0;
3067 }
3068 
3069 static struct tracer timerlat_tracer __read_mostly = {
3070 	.name		= "timerlat",
3071 	.init		= timerlat_tracer_init,
3072 	.reset		= timerlat_tracer_reset,
3073 	.start		= timerlat_tracer_start,
3074 	.stop		= timerlat_tracer_stop,
3075 	.print_header	= print_timerlat_headers,
3076 	.allow_instances = true,
3077 };
3078 
init_timerlat_tracer(void)3079 __init static int init_timerlat_tracer(void)
3080 {
3081 	return register_tracer(&timerlat_tracer);
3082 }
3083 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracer(void)3084 __init static int init_timerlat_tracer(void)
3085 {
3086 	return 0;
3087 }
3088 #endif /* CONFIG_TIMERLAT_TRACER */
3089 
init_osnoise_tracer(void)3090 __init static int init_osnoise_tracer(void)
3091 {
3092 	int ret;
3093 
3094 	mutex_init(&interface_lock);
3095 
3096 	cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3097 
3098 	ret = register_tracer(&osnoise_tracer);
3099 	if (ret) {
3100 		pr_err(BANNER "Error registering osnoise!\n");
3101 		return ret;
3102 	}
3103 
3104 	ret = init_timerlat_tracer();
3105 	if (ret) {
3106 		pr_err(BANNER "Error registering timerlat!\n");
3107 		return ret;
3108 	}
3109 
3110 	osnoise_init_hotplug_support();
3111 
3112 	INIT_LIST_HEAD_RCU(&osnoise_instances);
3113 
3114 	init_tracefs();
3115 
3116 	return 0;
3117 }
3118 late_initcall(init_osnoise_tracer);
3119