xref: /linux/kernel/trace/trace_osnoise.c (revision 5472d60c129f75282d94ae5ad072ee6dfb7c7246)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18 
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include <linux/string.h>
28 #include "trace.h"
29 
30 #ifdef CONFIG_X86_LOCAL_APIC
31 #include <asm/trace/irq_vectors.h>
32 #undef TRACE_INCLUDE_PATH
33 #undef TRACE_INCLUDE_FILE
34 #endif /* CONFIG_X86_LOCAL_APIC */
35 
36 #include <trace/events/irq.h>
37 #include <trace/events/sched.h>
38 
39 #define CREATE_TRACE_POINTS
40 #include <trace/events/osnoise.h>
41 
42 /*
43  * Default values.
44  */
45 #define BANNER			"osnoise: "
46 #define DEFAULT_SAMPLE_PERIOD	1000000			/* 1s */
47 #define DEFAULT_SAMPLE_RUNTIME	1000000			/* 1s */
48 
49 #define DEFAULT_TIMERLAT_PERIOD	1000			/* 1ms */
50 #define DEFAULT_TIMERLAT_PRIO	95			/* FIFO 95 */
51 
52 /*
53  * osnoise/options entries.
54  */
55 enum osnoise_options_index {
56 	OSN_DEFAULTS = 0,
57 	OSN_WORKLOAD,
58 	OSN_PANIC_ON_STOP,
59 	OSN_PREEMPT_DISABLE,
60 	OSN_IRQ_DISABLE,
61 	OSN_MAX
62 };
63 
64 static const char * const osnoise_options_str[OSN_MAX] = {
65 							"DEFAULTS",
66 							"OSNOISE_WORKLOAD",
67 							"PANIC_ON_STOP",
68 							"OSNOISE_PREEMPT_DISABLE",
69 							"OSNOISE_IRQ_DISABLE" };
70 
71 #define OSN_DEFAULT_OPTIONS		0x2
72 static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
73 
74 /*
75  * trace_array of the enabled osnoise/timerlat instances.
76  */
77 struct osnoise_instance {
78 	struct list_head	list;
79 	struct trace_array	*tr;
80 };
81 
82 static struct list_head osnoise_instances;
83 
osnoise_has_registered_instances(void)84 static bool osnoise_has_registered_instances(void)
85 {
86 	return !!list_first_or_null_rcu(&osnoise_instances,
87 					struct osnoise_instance,
88 					list);
89 }
90 
91 /*
92  * osnoise_instance_registered - check if a tr is already registered
93  */
osnoise_instance_registered(struct trace_array * tr)94 static int osnoise_instance_registered(struct trace_array *tr)
95 {
96 	struct osnoise_instance *inst;
97 	int found = 0;
98 
99 	rcu_read_lock();
100 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
101 		if (inst->tr == tr)
102 			found = 1;
103 	}
104 	rcu_read_unlock();
105 
106 	return found;
107 }
108 
109 /*
110  * osnoise_register_instance - register a new trace instance
111  *
112  * Register a trace_array *tr in the list of instances running
113  * osnoise/timerlat tracers.
114  */
osnoise_register_instance(struct trace_array * tr)115 static int osnoise_register_instance(struct trace_array *tr)
116 {
117 	struct osnoise_instance *inst;
118 
119 	/*
120 	 * register/unregister serialization is provided by trace's
121 	 * trace_types_lock.
122 	 */
123 	lockdep_assert_held(&trace_types_lock);
124 
125 	inst = kmalloc(sizeof(*inst), GFP_KERNEL);
126 	if (!inst)
127 		return -ENOMEM;
128 
129 	INIT_LIST_HEAD_RCU(&inst->list);
130 	inst->tr = tr;
131 	list_add_tail_rcu(&inst->list, &osnoise_instances);
132 
133 	return 0;
134 }
135 
136 /*
137  *  osnoise_unregister_instance - unregister a registered trace instance
138  *
139  * Remove the trace_array *tr from the list of instances running
140  * osnoise/timerlat tracers.
141  */
osnoise_unregister_instance(struct trace_array * tr)142 static void osnoise_unregister_instance(struct trace_array *tr)
143 {
144 	struct osnoise_instance *inst;
145 	int found = 0;
146 
147 	/*
148 	 * register/unregister serialization is provided by trace's
149 	 * trace_types_lock.
150 	 */
151 	list_for_each_entry_rcu(inst, &osnoise_instances, list,
152 				lockdep_is_held(&trace_types_lock)) {
153 		if (inst->tr == tr) {
154 			list_del_rcu(&inst->list);
155 			found = 1;
156 			break;
157 		}
158 	}
159 
160 	if (!found)
161 		return;
162 
163 	kvfree_rcu_mightsleep(inst);
164 }
165 
166 /*
167  * NMI runtime info.
168  */
169 struct osn_nmi {
170 	u64	count;
171 	u64	delta_start;
172 };
173 
174 /*
175  * IRQ runtime info.
176  */
177 struct osn_irq {
178 	u64	count;
179 	u64	arrival_time;
180 	u64	delta_start;
181 };
182 
183 #define IRQ_CONTEXT	0
184 #define THREAD_CONTEXT	1
185 #define THREAD_URET	2
186 /*
187  * sofirq runtime info.
188  */
189 struct osn_softirq {
190 	u64	count;
191 	u64	arrival_time;
192 	u64	delta_start;
193 };
194 
195 /*
196  * thread runtime info.
197  */
198 struct osn_thread {
199 	u64	count;
200 	u64	arrival_time;
201 	u64	delta_start;
202 };
203 
204 /*
205  * Runtime information: this structure saves the runtime information used by
206  * one sampling thread.
207  */
208 struct osnoise_variables {
209 	struct task_struct	*kthread;
210 	bool			sampling;
211 	pid_t			pid;
212 	struct osn_nmi		nmi;
213 	struct osn_irq		irq;
214 	struct osn_softirq	softirq;
215 	struct osn_thread	thread;
216 	local_t			int_counter;
217 };
218 
219 /*
220  * Per-cpu runtime information.
221  */
222 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
223 
224 /*
225  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
226  */
this_cpu_osn_var(void)227 static inline struct osnoise_variables *this_cpu_osn_var(void)
228 {
229 	return this_cpu_ptr(&per_cpu_osnoise_var);
230 }
231 
232 /*
233  * Protect the interface.
234  */
235 static struct mutex interface_lock;
236 
237 #ifdef CONFIG_TIMERLAT_TRACER
238 /*
239  * Runtime information for the timer mode.
240  */
241 struct timerlat_variables {
242 	struct task_struct	*kthread;
243 	struct hrtimer		timer;
244 	u64			rel_period;
245 	u64			abs_period;
246 	bool			tracing_thread;
247 	u64			count;
248 	bool			uthread_migrate;
249 };
250 
251 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
252 
253 /*
254  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
255  */
this_cpu_tmr_var(void)256 static inline struct timerlat_variables *this_cpu_tmr_var(void)
257 {
258 	return this_cpu_ptr(&per_cpu_timerlat_var);
259 }
260 
261 /*
262  * tlat_var_reset - Reset the values of the given timerlat_variables
263  */
tlat_var_reset(void)264 static inline void tlat_var_reset(void)
265 {
266 	struct timerlat_variables *tlat_var;
267 	int cpu;
268 
269 	/* Synchronize with the timerlat interfaces */
270 	mutex_lock(&interface_lock);
271 	/*
272 	 * So far, all the values are initialized as 0, so
273 	 * zeroing the structure is perfect.
274 	 */
275 	for_each_online_cpu(cpu) {
276 		tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
277 		if (tlat_var->kthread)
278 			hrtimer_cancel(&tlat_var->timer);
279 		memset(tlat_var, 0, sizeof(*tlat_var));
280 	}
281 	mutex_unlock(&interface_lock);
282 }
283 #else /* CONFIG_TIMERLAT_TRACER */
284 #define tlat_var_reset()	do {} while (0)
285 #endif /* CONFIG_TIMERLAT_TRACER */
286 
287 /*
288  * osn_var_reset - Reset the values of the given osnoise_variables
289  */
osn_var_reset(void)290 static inline void osn_var_reset(void)
291 {
292 	struct osnoise_variables *osn_var;
293 	int cpu;
294 
295 	/*
296 	 * So far, all the values are initialized as 0, so
297 	 * zeroing the structure is perfect.
298 	 */
299 	for_each_online_cpu(cpu) {
300 		osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
301 		memset(osn_var, 0, sizeof(*osn_var));
302 	}
303 }
304 
305 /*
306  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
307  */
osn_var_reset_all(void)308 static inline void osn_var_reset_all(void)
309 {
310 	osn_var_reset();
311 	tlat_var_reset();
312 }
313 
314 /*
315  * Tells NMIs to call back to the osnoise tracer to record timestamps.
316  */
317 bool trace_osnoise_callback_enabled;
318 
319 /*
320  * Tracer data.
321  */
322 static struct osnoise_data {
323 	u64	sample_period;		/* total sampling period */
324 	u64	sample_runtime;		/* active sampling portion of period */
325 	u64	stop_tracing;		/* stop trace in the internal operation (loop/irq) */
326 	u64	stop_tracing_total;	/* stop trace in the final operation (report/thread) */
327 #ifdef CONFIG_TIMERLAT_TRACER
328 	u64	timerlat_period;	/* timerlat period */
329 	u64	print_stack;		/* print IRQ stack if total > */
330 	int	timerlat_tracer;	/* timerlat tracer */
331 #endif
332 	bool	tainted;		/* infor users and developers about a problem */
333 } osnoise_data = {
334 	.sample_period			= DEFAULT_SAMPLE_PERIOD,
335 	.sample_runtime			= DEFAULT_SAMPLE_RUNTIME,
336 	.stop_tracing			= 0,
337 	.stop_tracing_total		= 0,
338 #ifdef CONFIG_TIMERLAT_TRACER
339 	.print_stack			= 0,
340 	.timerlat_period		= DEFAULT_TIMERLAT_PERIOD,
341 	.timerlat_tracer		= 0,
342 #endif
343 };
344 
345 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_enabled(void)346 static inline bool timerlat_enabled(void)
347 {
348 	return osnoise_data.timerlat_tracer;
349 }
350 
timerlat_softirq_exit(struct osnoise_variables * osn_var)351 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
352 {
353 	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
354 	/*
355 	 * If the timerlat is enabled, but the irq handler did
356 	 * not run yet enabling timerlat_tracer, do not trace.
357 	 */
358 	if (!tlat_var->tracing_thread) {
359 		osn_var->softirq.arrival_time = 0;
360 		osn_var->softirq.delta_start = 0;
361 		return 0;
362 	}
363 	return 1;
364 }
365 
timerlat_thread_exit(struct osnoise_variables * osn_var)366 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
367 {
368 	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
369 	/*
370 	 * If the timerlat is enabled, but the irq handler did
371 	 * not run yet enabling timerlat_tracer, do not trace.
372 	 */
373 	if (!tlat_var->tracing_thread) {
374 		osn_var->thread.delta_start = 0;
375 		osn_var->thread.arrival_time = 0;
376 		return 0;
377 	}
378 	return 1;
379 }
380 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_enabled(void)381 static inline bool timerlat_enabled(void)
382 {
383 	return false;
384 }
385 
timerlat_softirq_exit(struct osnoise_variables * osn_var)386 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
387 {
388 	return 1;
389 }
timerlat_thread_exit(struct osnoise_variables * osn_var)390 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
391 {
392 	return 1;
393 }
394 #endif
395 
396 #ifdef CONFIG_PREEMPT_RT
397 /*
398  * Print the osnoise header info.
399  */
print_osnoise_headers(struct seq_file * s)400 static void print_osnoise_headers(struct seq_file *s)
401 {
402 	if (osnoise_data.tainted)
403 		seq_puts(s, "# osnoise is tainted!\n");
404 
405 	seq_puts(s, "#                                _-------=> irqs-off\n");
406 	seq_puts(s, "#                               / _------=> need-resched\n");
407 	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
408 	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
409 	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
410 	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
411 	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
412 
413 	seq_puts(s, "#                              |||||| /          ");
414 	seq_puts(s, "                                     MAX\n");
415 
416 	seq_puts(s, "#                              ||||| /                         ");
417 	seq_puts(s, "                    SINGLE      Interference counters:\n");
418 
419 	seq_puts(s, "#                              |||||||               RUNTIME   ");
420 	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
421 
422 	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
423 	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
424 
425 	seq_puts(s, "#              | |         |   |||||||      |           |      ");
426 	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
427 }
428 #else /* CONFIG_PREEMPT_RT */
print_osnoise_headers(struct seq_file * s)429 static void print_osnoise_headers(struct seq_file *s)
430 {
431 	if (osnoise_data.tainted)
432 		seq_puts(s, "# osnoise is tainted!\n");
433 
434 	seq_puts(s, "#                                _-----=> irqs-off\n");
435 	seq_puts(s, "#                               / _----=> need-resched\n");
436 	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
437 	seq_puts(s, "#                              || / _--=> preempt-depth\n");
438 	seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
439 	seq_puts(s, "                    MAX\n");
440 	seq_puts(s, "#                              |||| /     delay               ");
441 	seq_puts(s, "                    SINGLE      Interference counters:\n");
442 
443 	seq_puts(s, "#                              |||||               RUNTIME   ");
444 	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
445 
446 	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
447 	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
448 
449 	seq_puts(s, "#              | |         |   |||||      |           |      ");
450 	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
451 }
452 #endif /* CONFIG_PREEMPT_RT */
453 
454 /*
455  * osnoise_taint - report an osnoise error.
456  */
457 #define osnoise_taint(msg) ({							\
458 	struct osnoise_instance *inst;						\
459 	struct trace_buffer *buffer;						\
460 										\
461 	rcu_read_lock();							\
462 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {		\
463 		buffer = inst->tr->array_buffer.buffer;				\
464 		trace_array_printk_buf(buffer, _THIS_IP_, msg);			\
465 	}									\
466 	rcu_read_unlock();							\
467 	osnoise_data.tainted = true;						\
468 })
469 
470 /*
471  * Record an osnoise_sample into the tracer buffer.
472  */
473 static void
__record_osnoise_sample(struct osnoise_sample * sample,struct trace_buffer * buffer)474 __record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
475 {
476 	struct ring_buffer_event *event;
477 	struct osnoise_entry *entry;
478 
479 	event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
480 					  tracing_gen_ctx());
481 	if (!event)
482 		return;
483 	entry	= ring_buffer_event_data(event);
484 	entry->runtime		= sample->runtime;
485 	entry->noise		= sample->noise;
486 	entry->max_sample	= sample->max_sample;
487 	entry->hw_count		= sample->hw_count;
488 	entry->nmi_count	= sample->nmi_count;
489 	entry->irq_count	= sample->irq_count;
490 	entry->softirq_count	= sample->softirq_count;
491 	entry->thread_count	= sample->thread_count;
492 
493 	trace_buffer_unlock_commit_nostack(buffer, event);
494 }
495 
496 /*
497  * Record an osnoise_sample on all osnoise instances and fire trace event.
498  */
record_osnoise_sample(struct osnoise_sample * sample)499 static void record_osnoise_sample(struct osnoise_sample *sample)
500 {
501 	struct osnoise_instance *inst;
502 	struct trace_buffer *buffer;
503 
504 	trace_osnoise_sample(sample);
505 
506 	rcu_read_lock();
507 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
508 		buffer = inst->tr->array_buffer.buffer;
509 		__record_osnoise_sample(sample, buffer);
510 	}
511 	rcu_read_unlock();
512 }
513 
514 #ifdef CONFIG_TIMERLAT_TRACER
515 /*
516  * Print the timerlat header info.
517  */
518 #ifdef CONFIG_PREEMPT_RT
print_timerlat_headers(struct seq_file * s)519 static void print_timerlat_headers(struct seq_file *s)
520 {
521 	seq_puts(s, "#                                _-------=> irqs-off\n");
522 	seq_puts(s, "#                               / _------=> need-resched\n");
523 	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
524 	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
525 	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
526 	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
527 	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
528 	seq_puts(s, "#                              |||||| /\n");
529 	seq_puts(s, "#                              |||||||             ACTIVATION\n");
530 	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
531 	seq_puts(s, "       CONTEXT                LATENCY\n");
532 	seq_puts(s, "#              | |         |   |||||||      |         |      ");
533 	seq_puts(s, "            |                       |\n");
534 }
535 #else /* CONFIG_PREEMPT_RT */
print_timerlat_headers(struct seq_file * s)536 static void print_timerlat_headers(struct seq_file *s)
537 {
538 	seq_puts(s, "#                                _-----=> irqs-off\n");
539 	seq_puts(s, "#                               / _----=> need-resched\n");
540 	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
541 	seq_puts(s, "#                              || / _--=> preempt-depth\n");
542 	seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
543 	seq_puts(s, "#                              |||| /     delay\n");
544 	seq_puts(s, "#                              |||||            ACTIVATION\n");
545 	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
546 	seq_puts(s, "      CONTEXT                 LATENCY\n");
547 	seq_puts(s, "#              | |         |   |||||      |         |      ");
548 	seq_puts(s, "            |                       |\n");
549 }
550 #endif /* CONFIG_PREEMPT_RT */
551 
552 static void
__record_timerlat_sample(struct timerlat_sample * sample,struct trace_buffer * buffer)553 __record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
554 {
555 	struct ring_buffer_event *event;
556 	struct timerlat_entry *entry;
557 
558 	event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
559 					  tracing_gen_ctx());
560 	if (!event)
561 		return;
562 	entry	= ring_buffer_event_data(event);
563 	entry->seqnum			= sample->seqnum;
564 	entry->context			= sample->context;
565 	entry->timer_latency		= sample->timer_latency;
566 
567 	trace_buffer_unlock_commit_nostack(buffer, event);
568 }
569 
570 /*
571  * Record an timerlat_sample into the tracer buffer.
572  */
record_timerlat_sample(struct timerlat_sample * sample)573 static void record_timerlat_sample(struct timerlat_sample *sample)
574 {
575 	struct osnoise_instance *inst;
576 	struct trace_buffer *buffer;
577 
578 	trace_timerlat_sample(sample);
579 
580 	rcu_read_lock();
581 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
582 		buffer = inst->tr->array_buffer.buffer;
583 		__record_timerlat_sample(sample, buffer);
584 	}
585 	rcu_read_unlock();
586 }
587 
588 #ifdef CONFIG_STACKTRACE
589 
590 #define	MAX_CALLS	256
591 
592 /*
593  * Stack trace will take place only at IRQ level, so, no need
594  * to control nesting here.
595  */
596 struct trace_stack {
597 	int		stack_size;
598 	int		nr_entries;
599 	unsigned long	calls[MAX_CALLS];
600 };
601 
602 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
603 
604 /*
605  * timerlat_save_stack - save a stack trace without printing
606  *
607  * Save the current stack trace without printing. The
608  * stack will be printed later, after the end of the measurement.
609  */
timerlat_save_stack(int skip)610 static void timerlat_save_stack(int skip)
611 {
612 	unsigned int size, nr_entries;
613 	struct trace_stack *fstack;
614 
615 	fstack = this_cpu_ptr(&trace_stack);
616 
617 	size = ARRAY_SIZE(fstack->calls);
618 
619 	nr_entries = stack_trace_save(fstack->calls, size, skip);
620 
621 	fstack->stack_size = nr_entries * sizeof(unsigned long);
622 	fstack->nr_entries = nr_entries;
623 
624 	return;
625 
626 }
627 
628 static void
__timerlat_dump_stack(struct trace_buffer * buffer,struct trace_stack * fstack,unsigned int size)629 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
630 {
631 	struct ring_buffer_event *event;
632 	struct stack_entry *entry;
633 
634 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
635 					  tracing_gen_ctx());
636 	if (!event)
637 		return;
638 
639 	entry = ring_buffer_event_data(event);
640 
641 	entry->size = fstack->nr_entries;
642 	memcpy(&entry->caller, fstack->calls, size);
643 
644 	trace_buffer_unlock_commit_nostack(buffer, event);
645 }
646 
647 /*
648  * timerlat_dump_stack - dump a stack trace previously saved
649  */
timerlat_dump_stack(u64 latency)650 static void timerlat_dump_stack(u64 latency)
651 {
652 	struct osnoise_instance *inst;
653 	struct trace_buffer *buffer;
654 	struct trace_stack *fstack;
655 	unsigned int size;
656 
657 	/*
658 	 * trace only if latency > print_stack config, if enabled.
659 	 */
660 	if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
661 		return;
662 
663 	preempt_disable_notrace();
664 	fstack = this_cpu_ptr(&trace_stack);
665 	size = fstack->stack_size;
666 
667 	rcu_read_lock();
668 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
669 		buffer = inst->tr->array_buffer.buffer;
670 		__timerlat_dump_stack(buffer, fstack, size);
671 
672 	}
673 	rcu_read_unlock();
674 	preempt_enable_notrace();
675 }
676 #else /* CONFIG_STACKTRACE */
677 #define timerlat_dump_stack(u64 latency) do {} while (0)
678 #define timerlat_save_stack(a) do {} while (0)
679 #endif /* CONFIG_STACKTRACE */
680 #endif /* CONFIG_TIMERLAT_TRACER */
681 
682 /*
683  * Macros to encapsulate the time capturing infrastructure.
684  */
685 #define time_get()	trace_clock_local()
686 #define time_to_us(x)	div_u64(x, 1000)
687 #define time_sub(a, b)	((a) - (b))
688 
689 /*
690  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
691  *
692  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
693  * to discount the NMI interference.
694  *
695  * See get_int_safe_duration().
696  */
697 static inline void
cond_move_irq_delta_start(struct osnoise_variables * osn_var,u64 duration)698 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
699 {
700 	if (osn_var->irq.delta_start)
701 		osn_var->irq.delta_start += duration;
702 }
703 
704 #ifndef CONFIG_PREEMPT_RT
705 /*
706  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
707  *
708  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
709  * forward to discount the interference.
710  *
711  * See get_int_safe_duration().
712  */
713 static inline void
cond_move_softirq_delta_start(struct osnoise_variables * osn_var,u64 duration)714 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
715 {
716 	if (osn_var->softirq.delta_start)
717 		osn_var->softirq.delta_start += duration;
718 }
719 #else /* CONFIG_PREEMPT_RT */
720 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
721 #endif
722 
723 /*
724  * cond_move_thread_delta_start - Forward the delta_start of a running thread
725  *
726  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
727  * is pushed forward to discount the interference.
728  *
729  * See get_int_safe_duration().
730  */
731 static inline void
cond_move_thread_delta_start(struct osnoise_variables * osn_var,u64 duration)732 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
733 {
734 	if (osn_var->thread.delta_start)
735 		osn_var->thread.delta_start += duration;
736 }
737 
738 /*
739  * get_int_safe_duration - Get the duration of a window
740  *
741  * The irq, softirq and thread varaibles need to have its duration without
742  * the interference from higher priority interrupts. Instead of keeping a
743  * variable to discount the interrupt interference from these variables, the
744  * starting time of these variables are pushed forward with the interrupt's
745  * duration. In this way, a single variable is used to:
746  *
747  *   - Know if a given window is being measured.
748  *   - Account its duration.
749  *   - Discount the interference.
750  *
751  * To avoid getting inconsistent values, e.g.,:
752  *
753  *	now = time_get()
754  *		--->	interrupt!
755  *			delta_start -= int duration;
756  *		<---
757  *	duration = now - delta_start;
758  *
759  *	result: negative duration if the variable duration before the
760  *	interrupt was smaller than the interrupt execution.
761  *
762  * A counter of interrupts is used. If the counter increased, try
763  * to capture an interference safe duration.
764  */
765 static inline s64
get_int_safe_duration(struct osnoise_variables * osn_var,u64 * delta_start)766 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
767 {
768 	u64 int_counter, now;
769 	s64 duration;
770 
771 	do {
772 		int_counter = local_read(&osn_var->int_counter);
773 		/* synchronize with interrupts */
774 		barrier();
775 
776 		now = time_get();
777 		duration = (now - *delta_start);
778 
779 		/* synchronize with interrupts */
780 		barrier();
781 	} while (int_counter != local_read(&osn_var->int_counter));
782 
783 	/*
784 	 * This is an evidence of race conditions that cause
785 	 * a value to be "discounted" too much.
786 	 */
787 	if (duration < 0)
788 		osnoise_taint("Negative duration!\n");
789 
790 	*delta_start = 0;
791 
792 	return duration;
793 }
794 
795 /*
796  *
797  * set_int_safe_time - Save the current time on *time, aware of interference
798  *
799  * Get the time, taking into consideration a possible interference from
800  * higher priority interrupts.
801  *
802  * See get_int_safe_duration() for an explanation.
803  */
804 static u64
set_int_safe_time(struct osnoise_variables * osn_var,u64 * time)805 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
806 {
807 	u64 int_counter;
808 
809 	do {
810 		int_counter = local_read(&osn_var->int_counter);
811 		/* synchronize with interrupts */
812 		barrier();
813 
814 		*time = time_get();
815 
816 		/* synchronize with interrupts */
817 		barrier();
818 	} while (int_counter != local_read(&osn_var->int_counter));
819 
820 	return int_counter;
821 }
822 
823 #ifdef CONFIG_TIMERLAT_TRACER
824 /*
825  * copy_int_safe_time - Copy *src into *desc aware of interference
826  */
827 static u64
copy_int_safe_time(struct osnoise_variables * osn_var,u64 * dst,u64 * src)828 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
829 {
830 	u64 int_counter;
831 
832 	do {
833 		int_counter = local_read(&osn_var->int_counter);
834 		/* synchronize with interrupts */
835 		barrier();
836 
837 		*dst = *src;
838 
839 		/* synchronize with interrupts */
840 		barrier();
841 	} while (int_counter != local_read(&osn_var->int_counter));
842 
843 	return int_counter;
844 }
845 #endif /* CONFIG_TIMERLAT_TRACER */
846 
847 /*
848  * trace_osnoise_callback - NMI entry/exit callback
849  *
850  * This function is called at the entry and exit NMI code. The bool enter
851  * distinguishes between either case. This function is used to note a NMI
852  * occurrence, compute the noise caused by the NMI, and to remove the noise
853  * it is potentially causing on other interference variables.
854  */
trace_osnoise_callback(bool enter)855 void trace_osnoise_callback(bool enter)
856 {
857 	struct osnoise_variables *osn_var = this_cpu_osn_var();
858 	u64 duration;
859 
860 	if (!osn_var->sampling)
861 		return;
862 
863 	/*
864 	 * Currently trace_clock_local() calls sched_clock() and the
865 	 * generic version is not NMI safe.
866 	 */
867 	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
868 		if (enter) {
869 			osn_var->nmi.delta_start = time_get();
870 			local_inc(&osn_var->int_counter);
871 		} else {
872 			duration = time_get() - osn_var->nmi.delta_start;
873 
874 			trace_nmi_noise(osn_var->nmi.delta_start, duration);
875 
876 			cond_move_irq_delta_start(osn_var, duration);
877 			cond_move_softirq_delta_start(osn_var, duration);
878 			cond_move_thread_delta_start(osn_var, duration);
879 		}
880 	}
881 
882 	if (enter)
883 		osn_var->nmi.count++;
884 }
885 
886 /*
887  * osnoise_trace_irq_entry - Note the starting of an IRQ
888  *
889  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
890  * it is safe to use a single variable (ons_var->irq) to save the statistics.
891  * The arrival_time is used to report... the arrival time. The delta_start
892  * is used to compute the duration at the IRQ exit handler. See
893  * cond_move_irq_delta_start().
894  */
osnoise_trace_irq_entry(int id)895 void osnoise_trace_irq_entry(int id)
896 {
897 	struct osnoise_variables *osn_var = this_cpu_osn_var();
898 
899 	if (!osn_var->sampling)
900 		return;
901 	/*
902 	 * This value will be used in the report, but not to compute
903 	 * the execution time, so it is safe to get it unsafe.
904 	 */
905 	osn_var->irq.arrival_time = time_get();
906 	set_int_safe_time(osn_var, &osn_var->irq.delta_start);
907 	osn_var->irq.count++;
908 
909 	local_inc(&osn_var->int_counter);
910 }
911 
912 /*
913  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
914  *
915  * Computes the duration of the IRQ noise, and trace it. Also discounts the
916  * interference from other sources of noise could be currently being accounted.
917  */
osnoise_trace_irq_exit(int id,const char * desc)918 void osnoise_trace_irq_exit(int id, const char *desc)
919 {
920 	struct osnoise_variables *osn_var = this_cpu_osn_var();
921 	s64 duration;
922 
923 	if (!osn_var->sampling)
924 		return;
925 
926 	duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
927 	trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
928 	osn_var->irq.arrival_time = 0;
929 	cond_move_softirq_delta_start(osn_var, duration);
930 	cond_move_thread_delta_start(osn_var, duration);
931 }
932 
933 /*
934  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
935  *
936  * Used to note the starting of an IRQ occurece.
937  */
trace_irqentry_callback(void * data,int irq,struct irqaction * action)938 static void trace_irqentry_callback(void *data, int irq,
939 				    struct irqaction *action)
940 {
941 	osnoise_trace_irq_entry(irq);
942 }
943 
944 /*
945  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
946  *
947  * Used to note the end of an IRQ occurece.
948  */
trace_irqexit_callback(void * data,int irq,struct irqaction * action,int ret)949 static void trace_irqexit_callback(void *data, int irq,
950 				   struct irqaction *action, int ret)
951 {
952 	osnoise_trace_irq_exit(irq, action->name);
953 }
954 
955 /*
956  * arch specific register function.
957  */
osnoise_arch_register(void)958 int __weak osnoise_arch_register(void)
959 {
960 	return 0;
961 }
962 
963 /*
964  * arch specific unregister function.
965  */
osnoise_arch_unregister(void)966 void __weak osnoise_arch_unregister(void)
967 {
968 	return;
969 }
970 
971 /*
972  * hook_irq_events - Hook IRQ handling events
973  *
974  * This function hooks the IRQ related callbacks to the respective trace
975  * events.
976  */
hook_irq_events(void)977 static int hook_irq_events(void)
978 {
979 	int ret;
980 
981 	ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
982 	if (ret)
983 		goto out_err;
984 
985 	ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
986 	if (ret)
987 		goto out_unregister_entry;
988 
989 	ret = osnoise_arch_register();
990 	if (ret)
991 		goto out_irq_exit;
992 
993 	return 0;
994 
995 out_irq_exit:
996 	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
997 out_unregister_entry:
998 	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
999 out_err:
1000 	return -EINVAL;
1001 }
1002 
1003 /*
1004  * unhook_irq_events - Unhook IRQ handling events
1005  *
1006  * This function unhooks the IRQ related callbacks to the respective trace
1007  * events.
1008  */
unhook_irq_events(void)1009 static void unhook_irq_events(void)
1010 {
1011 	osnoise_arch_unregister();
1012 	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1013 	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1014 }
1015 
1016 #ifndef CONFIG_PREEMPT_RT
1017 /*
1018  * trace_softirq_entry_callback - Note the starting of a softirq
1019  *
1020  * Save the starting time of a softirq. As softirqs are non-preemptive to
1021  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1022  * to save the statistics. The arrival_time is used to report... the
1023  * arrival time. The delta_start is used to compute the duration at the
1024  * softirq exit handler. See cond_move_softirq_delta_start().
1025  */
trace_softirq_entry_callback(void * data,unsigned int vec_nr)1026 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1027 {
1028 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1029 
1030 	if (!osn_var->sampling)
1031 		return;
1032 	/*
1033 	 * This value will be used in the report, but not to compute
1034 	 * the execution time, so it is safe to get it unsafe.
1035 	 */
1036 	osn_var->softirq.arrival_time = time_get();
1037 	set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1038 	osn_var->softirq.count++;
1039 
1040 	local_inc(&osn_var->int_counter);
1041 }
1042 
1043 /*
1044  * trace_softirq_exit_callback - Note the end of an softirq
1045  *
1046  * Computes the duration of the softirq noise, and trace it. Also discounts the
1047  * interference from other sources of noise could be currently being accounted.
1048  */
trace_softirq_exit_callback(void * data,unsigned int vec_nr)1049 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1050 {
1051 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1052 	s64 duration;
1053 
1054 	if (!osn_var->sampling)
1055 		return;
1056 
1057 	if (unlikely(timerlat_enabled()))
1058 		if (!timerlat_softirq_exit(osn_var))
1059 			return;
1060 
1061 	duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1062 	trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1063 	cond_move_thread_delta_start(osn_var, duration);
1064 	osn_var->softirq.arrival_time = 0;
1065 }
1066 
1067 /*
1068  * hook_softirq_events - Hook softirq handling events
1069  *
1070  * This function hooks the softirq related callbacks to the respective trace
1071  * events.
1072  */
hook_softirq_events(void)1073 static int hook_softirq_events(void)
1074 {
1075 	int ret;
1076 
1077 	ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1078 	if (ret)
1079 		goto out_err;
1080 
1081 	ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1082 	if (ret)
1083 		goto out_unreg_entry;
1084 
1085 	return 0;
1086 
1087 out_unreg_entry:
1088 	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1089 out_err:
1090 	return -EINVAL;
1091 }
1092 
1093 /*
1094  * unhook_softirq_events - Unhook softirq handling events
1095  *
1096  * This function hooks the softirq related callbacks to the respective trace
1097  * events.
1098  */
unhook_softirq_events(void)1099 static void unhook_softirq_events(void)
1100 {
1101 	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1102 	unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1103 }
1104 #else /* CONFIG_PREEMPT_RT */
1105 /*
1106  * softirq are threads on the PREEMPT_RT mode.
1107  */
hook_softirq_events(void)1108 static int hook_softirq_events(void)
1109 {
1110 	return 0;
1111 }
unhook_softirq_events(void)1112 static void unhook_softirq_events(void)
1113 {
1114 }
1115 #endif
1116 
1117 /*
1118  * thread_entry - Record the starting of a thread noise window
1119  *
1120  * It saves the context switch time for a noisy thread, and increments
1121  * the interference counters.
1122  */
1123 static void
thread_entry(struct osnoise_variables * osn_var,struct task_struct * t)1124 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1125 {
1126 	if (!osn_var->sampling)
1127 		return;
1128 	/*
1129 	 * The arrival time will be used in the report, but not to compute
1130 	 * the execution time, so it is safe to get it unsafe.
1131 	 */
1132 	osn_var->thread.arrival_time = time_get();
1133 
1134 	set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1135 
1136 	osn_var->thread.count++;
1137 	local_inc(&osn_var->int_counter);
1138 }
1139 
1140 /*
1141  * thread_exit - Report the end of a thread noise window
1142  *
1143  * It computes the total noise from a thread, tracing if needed.
1144  */
1145 static void
thread_exit(struct osnoise_variables * osn_var,struct task_struct * t)1146 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1147 {
1148 	s64 duration;
1149 
1150 	if (!osn_var->sampling)
1151 		return;
1152 
1153 	if (unlikely(timerlat_enabled()))
1154 		if (!timerlat_thread_exit(osn_var))
1155 			return;
1156 
1157 	duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1158 
1159 	trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1160 
1161 	osn_var->thread.arrival_time = 0;
1162 }
1163 
1164 #ifdef CONFIG_TIMERLAT_TRACER
1165 /*
1166  * osnoise_stop_exception - Stop tracing and the tracer.
1167  */
osnoise_stop_exception(char * msg,int cpu)1168 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1169 {
1170 	struct osnoise_instance *inst;
1171 	struct trace_array *tr;
1172 
1173 	rcu_read_lock();
1174 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1175 		tr = inst->tr;
1176 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1177 				       "stop tracing hit on cpu %d due to exception: %s\n",
1178 				       smp_processor_id(),
1179 				       msg);
1180 
1181 		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1182 			panic("tracer hit on cpu %d due to exception: %s\n",
1183 			      smp_processor_id(),
1184 			      msg);
1185 
1186 		tracer_tracing_off(tr);
1187 	}
1188 	rcu_read_unlock();
1189 }
1190 
1191 /*
1192  * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1193  *
1194  * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1195  * timerlat user-space thread migration.
1196  */
trace_sched_migrate_callback(void * data,struct task_struct * p,int dest_cpu)1197 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1198 {
1199 	struct osnoise_variables *osn_var;
1200 	long cpu = task_cpu(p);
1201 
1202 	osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1203 	if (osn_var->pid == p->pid && dest_cpu != cpu) {
1204 		per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1205 		osnoise_taint("timerlat user-thread migrated\n");
1206 		osnoise_stop_exception("timerlat user-thread migrated", cpu);
1207 	}
1208 }
1209 
1210 static bool monitor_enabled;
1211 
register_migration_monitor(void)1212 static int register_migration_monitor(void)
1213 {
1214 	int ret = 0;
1215 
1216 	/*
1217 	 * Timerlat thread migration check is only required when running timerlat in user-space.
1218 	 * Thus, enable callback only if timerlat is set with no workload.
1219 	 */
1220 	if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1221 		if (WARN_ON_ONCE(monitor_enabled))
1222 			return 0;
1223 
1224 		ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1225 		if (!ret)
1226 			monitor_enabled = true;
1227 	}
1228 
1229 	return ret;
1230 }
1231 
unregister_migration_monitor(void)1232 static void unregister_migration_monitor(void)
1233 {
1234 	if (!monitor_enabled)
1235 		return;
1236 
1237 	unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1238 	monitor_enabled = false;
1239 }
1240 #else
register_migration_monitor(void)1241 static int register_migration_monitor(void)
1242 {
1243 	return 0;
1244 }
unregister_migration_monitor(void)1245 static void unregister_migration_monitor(void) {}
1246 #endif
1247 /*
1248  * trace_sched_switch - sched:sched_switch trace event handler
1249  *
1250  * This function is hooked to the sched:sched_switch trace event, and it is
1251  * used to record the beginning and to report the end of a thread noise window.
1252  */
1253 static void
trace_sched_switch_callback(void * data,bool preempt,struct task_struct * p,struct task_struct * n,unsigned int prev_state)1254 trace_sched_switch_callback(void *data, bool preempt,
1255 			    struct task_struct *p,
1256 			    struct task_struct *n,
1257 			    unsigned int prev_state)
1258 {
1259 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1260 	int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1261 
1262 	if ((p->pid != osn_var->pid) || !workload)
1263 		thread_exit(osn_var, p);
1264 
1265 	if ((n->pid != osn_var->pid) || !workload)
1266 		thread_entry(osn_var, n);
1267 }
1268 
1269 /*
1270  * hook_thread_events - Hook the instrumentation for thread noise
1271  *
1272  * Hook the osnoise tracer callbacks to handle the noise from other
1273  * threads on the necessary kernel events.
1274  */
hook_thread_events(void)1275 static int hook_thread_events(void)
1276 {
1277 	int ret;
1278 
1279 	ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1280 	if (ret)
1281 		return -EINVAL;
1282 
1283 	ret = register_migration_monitor();
1284 	if (ret)
1285 		goto out_unreg;
1286 
1287 	return 0;
1288 
1289 out_unreg:
1290 	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1291 	return -EINVAL;
1292 }
1293 
1294 /*
1295  * unhook_thread_events - unhook the instrumentation for thread noise
1296  *
1297  * Unook the osnoise tracer callbacks to handle the noise from other
1298  * threads on the necessary kernel events.
1299  */
unhook_thread_events(void)1300 static void unhook_thread_events(void)
1301 {
1302 	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1303 	unregister_migration_monitor();
1304 }
1305 
1306 /*
1307  * save_osn_sample_stats - Save the osnoise_sample statistics
1308  *
1309  * Save the osnoise_sample statistics before the sampling phase. These
1310  * values will be used later to compute the diff betwneen the statistics
1311  * before and after the osnoise sampling.
1312  */
1313 static void
save_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1314 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1315 {
1316 	s->nmi_count = osn_var->nmi.count;
1317 	s->irq_count = osn_var->irq.count;
1318 	s->softirq_count = osn_var->softirq.count;
1319 	s->thread_count = osn_var->thread.count;
1320 }
1321 
1322 /*
1323  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1324  *
1325  * After a sample period, compute the difference on the osnoise_sample
1326  * statistics. The struct osnoise_sample *s contains the statistics saved via
1327  * save_osn_sample_stats() before the osnoise sampling.
1328  */
1329 static void
diff_osn_sample_stats(struct osnoise_variables * osn_var,struct osnoise_sample * s)1330 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1331 {
1332 	s->nmi_count = osn_var->nmi.count - s->nmi_count;
1333 	s->irq_count = osn_var->irq.count - s->irq_count;
1334 	s->softirq_count = osn_var->softirq.count - s->softirq_count;
1335 	s->thread_count = osn_var->thread.count - s->thread_count;
1336 }
1337 
1338 /*
1339  * osnoise_stop_tracing - Stop tracing and the tracer.
1340  */
osnoise_stop_tracing(void)1341 static __always_inline void osnoise_stop_tracing(void)
1342 {
1343 	struct osnoise_instance *inst;
1344 	struct trace_array *tr;
1345 
1346 	rcu_read_lock();
1347 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1348 		tr = inst->tr;
1349 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1350 				"stop tracing hit on cpu %d\n", smp_processor_id());
1351 
1352 		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1353 			panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1354 
1355 		tracer_tracing_off(tr);
1356 	}
1357 	rcu_read_unlock();
1358 }
1359 
1360 /*
1361  * osnoise_has_tracing_on - Check if there is at least one instance on
1362  */
osnoise_has_tracing_on(void)1363 static __always_inline int osnoise_has_tracing_on(void)
1364 {
1365 	struct osnoise_instance *inst;
1366 	int trace_is_on = 0;
1367 
1368 	rcu_read_lock();
1369 	list_for_each_entry_rcu(inst, &osnoise_instances, list)
1370 		trace_is_on += tracer_tracing_is_on(inst->tr);
1371 	rcu_read_unlock();
1372 
1373 	return trace_is_on;
1374 }
1375 
1376 /*
1377  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1378  */
notify_new_max_latency(u64 latency)1379 static void notify_new_max_latency(u64 latency)
1380 {
1381 	struct osnoise_instance *inst;
1382 	struct trace_array *tr;
1383 
1384 	rcu_read_lock();
1385 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1386 		tr = inst->tr;
1387 		if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1388 			tr->max_latency = latency;
1389 			latency_fsnotify(tr);
1390 		}
1391 	}
1392 	rcu_read_unlock();
1393 }
1394 
1395 /*
1396  * run_osnoise - Sample the time and look for osnoise
1397  *
1398  * Used to capture the time, looking for potential osnoise latency repeatedly.
1399  * Different from hwlat_detector, it is called with preemption and interrupts
1400  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1401  * osnoise sampling thread, as they would do with a regular thread.
1402  */
run_osnoise(void)1403 static int run_osnoise(void)
1404 {
1405 	bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1406 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1407 	u64 start, sample, last_sample;
1408 	u64 last_int_count, int_count;
1409 	s64 noise = 0, max_noise = 0;
1410 	s64 total, last_total = 0;
1411 	struct osnoise_sample s;
1412 	bool disable_preemption;
1413 	unsigned int threshold;
1414 	u64 runtime, stop_in;
1415 	u64 sum_noise = 0;
1416 	int hw_count = 0;
1417 	int ret = -1;
1418 
1419 	/*
1420 	 * Disabling preemption is only required if IRQs are enabled,
1421 	 * and the options is set on.
1422 	 */
1423 	disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1424 
1425 	/*
1426 	 * Considers the current thread as the workload.
1427 	 */
1428 	osn_var->pid = current->pid;
1429 
1430 	/*
1431 	 * Save the current stats for the diff
1432 	 */
1433 	save_osn_sample_stats(osn_var, &s);
1434 
1435 	/*
1436 	 * if threshold is 0, use the default value of 1 us.
1437 	 */
1438 	threshold = tracing_thresh ? : 1000;
1439 
1440 	/*
1441 	 * Apply PREEMPT and IRQ disabled options.
1442 	 */
1443 	if (disable_irq)
1444 		local_irq_disable();
1445 
1446 	if (disable_preemption)
1447 		preempt_disable();
1448 
1449 	/*
1450 	 * Make sure NMIs see sampling first
1451 	 */
1452 	osn_var->sampling = true;
1453 	barrier();
1454 
1455 	/*
1456 	 * Transform the *_us config to nanoseconds to avoid the
1457 	 * division on the main loop.
1458 	 */
1459 	runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1460 	stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1461 
1462 	/*
1463 	 * Start timestemp
1464 	 */
1465 	start = time_get();
1466 
1467 	/*
1468 	 * "previous" loop.
1469 	 */
1470 	last_int_count = set_int_safe_time(osn_var, &last_sample);
1471 
1472 	do {
1473 		/*
1474 		 * Get sample!
1475 		 */
1476 		int_count = set_int_safe_time(osn_var, &sample);
1477 
1478 		noise = time_sub(sample, last_sample);
1479 
1480 		/*
1481 		 * This shouldn't happen.
1482 		 */
1483 		if (noise < 0) {
1484 			osnoise_taint("negative noise!");
1485 			goto out;
1486 		}
1487 
1488 		/*
1489 		 * Sample runtime.
1490 		 */
1491 		total = time_sub(sample, start);
1492 
1493 		/*
1494 		 * Check for possible overflows.
1495 		 */
1496 		if (total < last_total) {
1497 			osnoise_taint("total overflow!");
1498 			break;
1499 		}
1500 
1501 		last_total = total;
1502 
1503 		if (noise >= threshold) {
1504 			int interference = int_count - last_int_count;
1505 
1506 			if (noise > max_noise)
1507 				max_noise = noise;
1508 
1509 			if (!interference)
1510 				hw_count++;
1511 
1512 			sum_noise += noise;
1513 
1514 			trace_sample_threshold(last_sample, noise, interference);
1515 
1516 			if (osnoise_data.stop_tracing)
1517 				if (noise > stop_in)
1518 					osnoise_stop_tracing();
1519 		}
1520 
1521 		/*
1522 		 * In some cases, notably when running on a nohz_full CPU with
1523 		 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1524 		 * account for QSs. This will eventually cause unwarranted
1525 		 * noise as RCU forces preemption as the means of ending the
1526 		 * current grace period.  We avoid this by calling
1527 		 * rcu_momentary_eqs(), which performs a zero duration EQS
1528 		 * allowing RCU to end the current grace period. This call
1529 		 * shouldn't be wrapped inside an RCU critical section.
1530 		 *
1531 		 * Normally QSs for other cases are handled through cond_resched().
1532 		 * For simplicity, however, we call rcu_momentary_eqs() for all
1533 		 * configurations here.
1534 		 */
1535 		if (!disable_irq)
1536 			local_irq_disable();
1537 
1538 		rcu_momentary_eqs();
1539 
1540 		if (!disable_irq)
1541 			local_irq_enable();
1542 
1543 		/*
1544 		 * For the non-preemptive kernel config: let threads runs, if
1545 		 * they so wish, unless set not do to so.
1546 		 */
1547 		if (!disable_irq && !disable_preemption)
1548 			cond_resched();
1549 
1550 		last_sample = sample;
1551 		last_int_count = int_count;
1552 
1553 	} while (total < runtime && !kthread_should_stop());
1554 
1555 	/*
1556 	 * Finish the above in the view for interrupts.
1557 	 */
1558 	barrier();
1559 
1560 	osn_var->sampling = false;
1561 
1562 	/*
1563 	 * Make sure sampling data is no longer updated.
1564 	 */
1565 	barrier();
1566 
1567 	/*
1568 	 * Return to the preemptive state.
1569 	 */
1570 	if (disable_preemption)
1571 		preempt_enable();
1572 
1573 	if (disable_irq)
1574 		local_irq_enable();
1575 
1576 	/*
1577 	 * Save noise info.
1578 	 */
1579 	s.noise = time_to_us(sum_noise);
1580 	s.runtime = time_to_us(total);
1581 	s.max_sample = time_to_us(max_noise);
1582 	s.hw_count = hw_count;
1583 
1584 	/* Save interference stats info */
1585 	diff_osn_sample_stats(osn_var, &s);
1586 
1587 	record_osnoise_sample(&s);
1588 
1589 	notify_new_max_latency(max_noise);
1590 
1591 	if (osnoise_data.stop_tracing_total)
1592 		if (s.noise > osnoise_data.stop_tracing_total)
1593 			osnoise_stop_tracing();
1594 
1595 	return 0;
1596 out:
1597 	return ret;
1598 }
1599 
1600 static struct cpumask osnoise_cpumask;
1601 static struct cpumask save_cpumask;
1602 static struct cpumask kthread_cpumask;
1603 
1604 /*
1605  * osnoise_sleep - sleep until the next period
1606  */
osnoise_sleep(bool skip_period)1607 static void osnoise_sleep(bool skip_period)
1608 {
1609 	u64 interval;
1610 	ktime_t wake_time;
1611 
1612 	mutex_lock(&interface_lock);
1613 	if (skip_period)
1614 		interval = osnoise_data.sample_period;
1615 	else
1616 		interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1617 	mutex_unlock(&interface_lock);
1618 
1619 	/*
1620 	 * differently from hwlat_detector, the osnoise tracer can run
1621 	 * without a pause because preemption is on.
1622 	 */
1623 	if (!interval) {
1624 		/* Let synchronize_rcu_tasks() make progress */
1625 		cond_resched_tasks_rcu_qs();
1626 		return;
1627 	}
1628 
1629 	wake_time = ktime_add_us(ktime_get(), interval);
1630 	__set_current_state(TASK_INTERRUPTIBLE);
1631 
1632 	while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1633 		if (kthread_should_stop())
1634 			break;
1635 	}
1636 }
1637 
1638 /*
1639  * osnoise_migration_pending - checks if the task needs to migrate
1640  *
1641  * osnoise/timerlat threads are per-cpu. If there is a pending request to
1642  * migrate the thread away from the current CPU, something bad has happened.
1643  * Play the good citizen and leave.
1644  *
1645  * Returns 0 if it is safe to continue, 1 otherwise.
1646  */
osnoise_migration_pending(void)1647 static inline int osnoise_migration_pending(void)
1648 {
1649 	if (!current->migration_pending)
1650 		return 0;
1651 
1652 	/*
1653 	 * If migration is pending, there is a task waiting for the
1654 	 * tracer to enable migration. The tracer does not allow migration,
1655 	 * thus: taint and leave to unblock the blocked thread.
1656 	 */
1657 	osnoise_taint("migration requested to osnoise threads, leaving.");
1658 
1659 	/*
1660 	 * Unset this thread from the threads managed by the interface.
1661 	 * The tracers are responsible for cleaning their env before
1662 	 * exiting.
1663 	 */
1664 	mutex_lock(&interface_lock);
1665 	this_cpu_osn_var()->kthread = NULL;
1666 	cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
1667 	mutex_unlock(&interface_lock);
1668 
1669 	return 1;
1670 }
1671 
1672 /*
1673  * osnoise_main - The osnoise detection kernel thread
1674  *
1675  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1676  * every period.
1677  */
osnoise_main(void * data)1678 static int osnoise_main(void *data)
1679 {
1680 	unsigned long flags;
1681 
1682 	/*
1683 	 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1684 	 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1685 	 *
1686 	 * To work around this limitation, disable migration and remove the
1687 	 * flag.
1688 	 */
1689 	migrate_disable();
1690 	raw_spin_lock_irqsave(&current->pi_lock, flags);
1691 	current->flags &= ~(PF_NO_SETAFFINITY);
1692 	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1693 
1694 	while (!kthread_should_stop()) {
1695 		if (osnoise_migration_pending())
1696 			break;
1697 
1698 		/* skip a period if tracing is off on all instances */
1699 		if (!osnoise_has_tracing_on()) {
1700 			osnoise_sleep(true);
1701 			continue;
1702 		}
1703 
1704 		run_osnoise();
1705 		osnoise_sleep(false);
1706 	}
1707 
1708 	migrate_enable();
1709 	return 0;
1710 }
1711 
1712 #ifdef CONFIG_TIMERLAT_TRACER
1713 /*
1714  * timerlat_irq - hrtimer handler for timerlat.
1715  */
timerlat_irq(struct hrtimer * timer)1716 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1717 {
1718 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1719 	struct timerlat_variables *tlat;
1720 	struct timerlat_sample s;
1721 	u64 now;
1722 	u64 diff;
1723 
1724 	/*
1725 	 * I am not sure if the timer was armed for this CPU. So, get
1726 	 * the timerlat struct from the timer itself, not from this
1727 	 * CPU.
1728 	 */
1729 	tlat = container_of(timer, struct timerlat_variables, timer);
1730 
1731 	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1732 
1733 	/*
1734 	 * Enable the osnoise: events for thread an softirq.
1735 	 */
1736 	tlat->tracing_thread = true;
1737 
1738 	osn_var->thread.arrival_time = time_get();
1739 
1740 	/*
1741 	 * A hardirq is running: the timer IRQ. It is for sure preempting
1742 	 * a thread, and potentially preempting a softirq.
1743 	 *
1744 	 * At this point, it is not interesting to know the duration of the
1745 	 * preempted thread (and maybe softirq), but how much time they will
1746 	 * delay the beginning of the execution of the timer thread.
1747 	 *
1748 	 * To get the correct (net) delay added by the softirq, its delta_start
1749 	 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1750 	 * start of the sofitrq will be zeroed, accounting then only the time
1751 	 * after that.
1752 	 *
1753 	 * The thread follows the same principle. However, if a softirq is
1754 	 * running, the thread needs to receive the softirq delta_start. The
1755 	 * reason being is that the softirq will be the last to be unfolded,
1756 	 * resseting the thread delay to zero.
1757 	 *
1758 	 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1759 	 * on RT, moving the thread is enough.
1760 	 */
1761 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1762 		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1763 				   &osn_var->softirq.delta_start);
1764 
1765 		copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1766 				    &osn_var->irq.delta_start);
1767 	} else {
1768 		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1769 				    &osn_var->irq.delta_start);
1770 	}
1771 
1772 	/*
1773 	 * Compute the current time with the expected time.
1774 	 */
1775 	diff = now - tlat->abs_period;
1776 
1777 	tlat->count++;
1778 	s.seqnum = tlat->count;
1779 	s.timer_latency = diff;
1780 	s.context = IRQ_CONTEXT;
1781 
1782 	record_timerlat_sample(&s);
1783 
1784 	if (osnoise_data.stop_tracing) {
1785 		if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1786 
1787 			/*
1788 			 * At this point, if stop_tracing is set and <= print_stack,
1789 			 * print_stack is set and would be printed in the thread handler.
1790 			 *
1791 			 * Thus, print the stack trace as it is helpful to define the
1792 			 * root cause of an IRQ latency.
1793 			 */
1794 			if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1795 				timerlat_save_stack(0);
1796 				timerlat_dump_stack(time_to_us(diff));
1797 			}
1798 
1799 			osnoise_stop_tracing();
1800 			notify_new_max_latency(diff);
1801 
1802 			wake_up_process(tlat->kthread);
1803 
1804 			return HRTIMER_NORESTART;
1805 		}
1806 	}
1807 
1808 	wake_up_process(tlat->kthread);
1809 
1810 	if (osnoise_data.print_stack)
1811 		timerlat_save_stack(0);
1812 
1813 	return HRTIMER_NORESTART;
1814 }
1815 
1816 /*
1817  * wait_next_period - Wait for the next period for timerlat
1818  */
wait_next_period(struct timerlat_variables * tlat)1819 static int wait_next_period(struct timerlat_variables *tlat)
1820 {
1821 	ktime_t next_abs_period, now;
1822 	u64 rel_period = osnoise_data.timerlat_period * 1000;
1823 
1824 	now = hrtimer_cb_get_time(&tlat->timer);
1825 	next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1826 
1827 	/*
1828 	 * Save the next abs_period.
1829 	 */
1830 	tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1831 
1832 	/*
1833 	 * If the new abs_period is in the past, skip the activation.
1834 	 */
1835 	while (ktime_compare(now, next_abs_period) > 0) {
1836 		next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1837 		tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1838 	}
1839 
1840 	set_current_state(TASK_INTERRUPTIBLE);
1841 
1842 	hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1843 	schedule();
1844 	return 1;
1845 }
1846 
1847 /*
1848  * timerlat_main- Timerlat main
1849  */
timerlat_main(void * data)1850 static int timerlat_main(void *data)
1851 {
1852 	struct osnoise_variables *osn_var = this_cpu_osn_var();
1853 	struct timerlat_variables *tlat = this_cpu_tmr_var();
1854 	struct timerlat_sample s;
1855 	struct sched_param sp;
1856 	unsigned long flags;
1857 	u64 now, diff;
1858 
1859 	/*
1860 	 * Make the thread RT, that is how cyclictest is usually used.
1861 	 */
1862 	sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1863 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1864 
1865 	/*
1866 	 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1867 	 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1868 	 *
1869 	 * To work around this limitation, disable migration and remove the
1870 	 * flag.
1871 	 */
1872 	migrate_disable();
1873 	raw_spin_lock_irqsave(&current->pi_lock, flags);
1874 	current->flags &= ~(PF_NO_SETAFFINITY);
1875 	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1876 
1877 	tlat->count = 0;
1878 	tlat->tracing_thread = false;
1879 
1880 	hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1881 	tlat->kthread = current;
1882 	osn_var->pid = current->pid;
1883 	/*
1884 	 * Anotate the arrival time.
1885 	 */
1886 	tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1887 
1888 	wait_next_period(tlat);
1889 
1890 	osn_var->sampling = 1;
1891 
1892 	while (!kthread_should_stop()) {
1893 
1894 		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1895 		diff = now - tlat->abs_period;
1896 
1897 		s.seqnum = tlat->count;
1898 		s.timer_latency = diff;
1899 		s.context = THREAD_CONTEXT;
1900 
1901 		record_timerlat_sample(&s);
1902 
1903 		notify_new_max_latency(diff);
1904 
1905 		timerlat_dump_stack(time_to_us(diff));
1906 
1907 		tlat->tracing_thread = false;
1908 		if (osnoise_data.stop_tracing_total)
1909 			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1910 				osnoise_stop_tracing();
1911 
1912 		if (osnoise_migration_pending())
1913 			break;
1914 
1915 		wait_next_period(tlat);
1916 	}
1917 
1918 	hrtimer_cancel(&tlat->timer);
1919 	migrate_enable();
1920 	return 0;
1921 }
1922 #else /* CONFIG_TIMERLAT_TRACER */
timerlat_main(void * data)1923 static int timerlat_main(void *data)
1924 {
1925 	return 0;
1926 }
1927 #endif /* CONFIG_TIMERLAT_TRACER */
1928 
1929 /*
1930  * stop_kthread - stop a workload thread
1931  */
stop_kthread(unsigned int cpu)1932 static void stop_kthread(unsigned int cpu)
1933 {
1934 	struct task_struct *kthread;
1935 
1936 	kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1937 	if (kthread) {
1938 		if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
1939 		    !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1940 			kthread_stop(kthread);
1941 		} else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1942 			/*
1943 			 * This is a user thread waiting on the timerlat_fd. We need
1944 			 * to close all users, and the best way to guarantee this is
1945 			 * by killing the thread. NOTE: this is a purpose specific file.
1946 			 */
1947 			kill_pid(kthread->thread_pid, SIGKILL, 1);
1948 			put_task_struct(kthread);
1949 		}
1950 	} else {
1951 		/* if no workload, just return */
1952 		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1953 			/*
1954 			 * This is set in the osnoise tracer case.
1955 			 */
1956 			per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1957 			barrier();
1958 		}
1959 	}
1960 }
1961 
1962 /*
1963  * stop_per_cpu_kthread - Stop per-cpu threads
1964  *
1965  * Stop the osnoise sampling htread. Use this on unload and at system
1966  * shutdown.
1967  */
stop_per_cpu_kthreads(void)1968 static void stop_per_cpu_kthreads(void)
1969 {
1970 	int cpu;
1971 
1972 	cpus_read_lock();
1973 
1974 	for_each_online_cpu(cpu)
1975 		stop_kthread(cpu);
1976 
1977 	cpus_read_unlock();
1978 }
1979 
1980 /*
1981  * start_kthread - Start a workload tread
1982  */
start_kthread(unsigned int cpu)1983 static int start_kthread(unsigned int cpu)
1984 {
1985 	struct task_struct *kthread;
1986 	void *main = osnoise_main;
1987 	char comm[24];
1988 
1989 	/* Do not start a new thread if it is already running */
1990 	if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1991 		return 0;
1992 
1993 	if (timerlat_enabled()) {
1994 		snprintf(comm, 24, "timerlat/%d", cpu);
1995 		main = timerlat_main;
1996 	} else {
1997 		/* if no workload, just return */
1998 		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1999 			per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
2000 			barrier();
2001 			return 0;
2002 		}
2003 		snprintf(comm, 24, "osnoise/%d", cpu);
2004 	}
2005 
2006 	kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2007 
2008 	if (IS_ERR(kthread)) {
2009 		pr_err(BANNER "could not start sampling thread\n");
2010 		return -ENOMEM;
2011 	}
2012 
2013 	per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2014 	cpumask_set_cpu(cpu, &kthread_cpumask);
2015 
2016 	return 0;
2017 }
2018 
2019 /*
2020  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2021  *
2022  * This starts the kernel thread that will look for osnoise on many
2023  * cpus.
2024  */
start_per_cpu_kthreads(void)2025 static int start_per_cpu_kthreads(void)
2026 {
2027 	struct cpumask *current_mask = &save_cpumask;
2028 	int retval = 0;
2029 	int cpu;
2030 
2031 	if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2032 		if (timerlat_enabled())
2033 			return 0;
2034 	}
2035 
2036 	cpus_read_lock();
2037 	/*
2038 	 * Run only on online CPUs in which osnoise is allowed to run.
2039 	 */
2040 	cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2041 
2042 	for_each_possible_cpu(cpu) {
2043 		if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
2044 			struct task_struct *kthread;
2045 
2046 			kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2047 			if (!WARN_ON(!kthread))
2048 				kthread_stop(kthread);
2049 		}
2050 	}
2051 
2052 	for_each_cpu(cpu, current_mask) {
2053 		retval = start_kthread(cpu);
2054 		if (retval) {
2055 			cpus_read_unlock();
2056 			stop_per_cpu_kthreads();
2057 			return retval;
2058 		}
2059 	}
2060 
2061 	cpus_read_unlock();
2062 
2063 	return retval;
2064 }
2065 
2066 #ifdef CONFIG_HOTPLUG_CPU
osnoise_hotplug_workfn(struct work_struct * dummy)2067 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2068 {
2069 	unsigned int cpu = smp_processor_id();
2070 
2071 	guard(mutex)(&trace_types_lock);
2072 
2073 	if (!osnoise_has_registered_instances())
2074 		return;
2075 
2076 	guard(mutex)(&interface_lock);
2077 	guard(cpus_read_lock)();
2078 
2079 	if (!cpu_online(cpu))
2080 		return;
2081 
2082 	if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2083 		return;
2084 
2085 	start_kthread(cpu);
2086 }
2087 
2088 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2089 
2090 /*
2091  * osnoise_cpu_init - CPU hotplug online callback function
2092  */
osnoise_cpu_init(unsigned int cpu)2093 static int osnoise_cpu_init(unsigned int cpu)
2094 {
2095 	schedule_work_on(cpu, &osnoise_hotplug_work);
2096 	return 0;
2097 }
2098 
2099 /*
2100  * osnoise_cpu_die - CPU hotplug offline callback function
2101  */
osnoise_cpu_die(unsigned int cpu)2102 static int osnoise_cpu_die(unsigned int cpu)
2103 {
2104 	stop_kthread(cpu);
2105 	return 0;
2106 }
2107 
osnoise_init_hotplug_support(void)2108 static void osnoise_init_hotplug_support(void)
2109 {
2110 	int ret;
2111 
2112 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2113 				osnoise_cpu_init, osnoise_cpu_die);
2114 	if (ret < 0)
2115 		pr_warn(BANNER "Error to init cpu hotplug support\n");
2116 
2117 	return;
2118 }
2119 #else /* CONFIG_HOTPLUG_CPU */
osnoise_init_hotplug_support(void)2120 static void osnoise_init_hotplug_support(void)
2121 {
2122 	return;
2123 }
2124 #endif /* CONFIG_HOTPLUG_CPU */
2125 
2126 /*
2127  * seq file functions for the osnoise/options file.
2128  */
s_options_start(struct seq_file * s,loff_t * pos)2129 static void *s_options_start(struct seq_file *s, loff_t *pos)
2130 {
2131 	int option = *pos;
2132 
2133 	mutex_lock(&interface_lock);
2134 
2135 	if (option >= OSN_MAX)
2136 		return NULL;
2137 
2138 	return pos;
2139 }
2140 
s_options_next(struct seq_file * s,void * v,loff_t * pos)2141 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2142 {
2143 	int option = ++(*pos);
2144 
2145 	if (option >= OSN_MAX)
2146 		return NULL;
2147 
2148 	return pos;
2149 }
2150 
s_options_show(struct seq_file * s,void * v)2151 static int s_options_show(struct seq_file *s, void *v)
2152 {
2153 	loff_t *pos = v;
2154 	int option = *pos;
2155 
2156 	if (option == OSN_DEFAULTS) {
2157 		if (osnoise_options == OSN_DEFAULT_OPTIONS)
2158 			seq_printf(s, "%s", osnoise_options_str[option]);
2159 		else
2160 			seq_printf(s, "NO_%s", osnoise_options_str[option]);
2161 		goto out;
2162 	}
2163 
2164 	if (test_bit(option, &osnoise_options))
2165 		seq_printf(s, "%s", osnoise_options_str[option]);
2166 	else
2167 		seq_printf(s, "NO_%s", osnoise_options_str[option]);
2168 
2169 out:
2170 	if (option != OSN_MAX)
2171 		seq_puts(s, " ");
2172 
2173 	return 0;
2174 }
2175 
s_options_stop(struct seq_file * s,void * v)2176 static void s_options_stop(struct seq_file *s, void *v)
2177 {
2178 	seq_puts(s, "\n");
2179 	mutex_unlock(&interface_lock);
2180 }
2181 
2182 static const struct seq_operations osnoise_options_seq_ops = {
2183 	.start		= s_options_start,
2184 	.next		= s_options_next,
2185 	.show		= s_options_show,
2186 	.stop		= s_options_stop
2187 };
2188 
osnoise_options_open(struct inode * inode,struct file * file)2189 static int osnoise_options_open(struct inode *inode, struct file *file)
2190 {
2191 	return seq_open(file, &osnoise_options_seq_ops);
2192 };
2193 
2194 /**
2195  * osnoise_options_write - Write function for "options" entry
2196  * @filp: The active open file structure
2197  * @ubuf: The user buffer that contains the value to write
2198  * @cnt: The maximum number of bytes to write to "file"
2199  * @ppos: The current position in @file
2200  *
2201  * Writing the option name sets the option, writing the "NO_"
2202  * prefix in front of the option name disables it.
2203  *
2204  * Writing "DEFAULTS" resets the option values to the default ones.
2205  */
osnoise_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2206 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2207 				     size_t cnt, loff_t *ppos)
2208 {
2209 	int running, option, enable, retval;
2210 	char buf[256], *option_str;
2211 
2212 	if (cnt >= 256)
2213 		return -EINVAL;
2214 
2215 	if (copy_from_user(buf, ubuf, cnt))
2216 		return -EFAULT;
2217 
2218 	buf[cnt] = 0;
2219 
2220 	if (strncmp(buf, "NO_", 3)) {
2221 		option_str = strstrip(buf);
2222 		enable = true;
2223 	} else {
2224 		option_str = strstrip(&buf[3]);
2225 		enable = false;
2226 	}
2227 
2228 	option = match_string(osnoise_options_str, OSN_MAX, option_str);
2229 	if (option < 0)
2230 		return -EINVAL;
2231 
2232 	/*
2233 	 * trace_types_lock is taken to avoid concurrency on start/stop.
2234 	 */
2235 	mutex_lock(&trace_types_lock);
2236 	running = osnoise_has_registered_instances();
2237 	if (running)
2238 		stop_per_cpu_kthreads();
2239 
2240 	mutex_lock(&interface_lock);
2241 	/*
2242 	 * avoid CPU hotplug operations that might read options.
2243 	 */
2244 	cpus_read_lock();
2245 
2246 	retval = cnt;
2247 
2248 	if (enable) {
2249 		if (option == OSN_DEFAULTS)
2250 			osnoise_options = OSN_DEFAULT_OPTIONS;
2251 		else
2252 			set_bit(option, &osnoise_options);
2253 	} else {
2254 		if (option == OSN_DEFAULTS)
2255 			retval = -EINVAL;
2256 		else
2257 			clear_bit(option, &osnoise_options);
2258 	}
2259 
2260 	cpus_read_unlock();
2261 	mutex_unlock(&interface_lock);
2262 
2263 	if (running)
2264 		start_per_cpu_kthreads();
2265 	mutex_unlock(&trace_types_lock);
2266 
2267 	return retval;
2268 }
2269 
2270 /*
2271  * osnoise_cpus_read - Read function for reading the "cpus" file
2272  * @filp: The active open file structure
2273  * @ubuf: The userspace provided buffer to read value into
2274  * @cnt: The maximum number of bytes to read
2275  * @ppos: The current "file" position
2276  *
2277  * Prints the "cpus" output into the user-provided buffer.
2278  */
2279 static ssize_t
osnoise_cpus_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)2280 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2281 		  loff_t *ppos)
2282 {
2283 	char *mask_str __free(kfree) = NULL;
2284 	int len;
2285 
2286 	guard(mutex)(&interface_lock);
2287 
2288 	len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2289 	mask_str = kmalloc(len, GFP_KERNEL);
2290 	if (!mask_str)
2291 		return -ENOMEM;
2292 
2293 	len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2294 	if (len >= count)
2295 		return -EINVAL;
2296 
2297 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2298 
2299 	return count;
2300 }
2301 
2302 /*
2303  * osnoise_cpus_write - Write function for "cpus" entry
2304  * @filp: The active open file structure
2305  * @ubuf: The user buffer that contains the value to write
2306  * @count: The maximum number of bytes to write to "file"
2307  * @ppos: The current position in @file
2308  *
2309  * This function provides a write implementation for the "cpus"
2310  * interface to the osnoise trace. By default, it lists all  CPUs,
2311  * in this way, allowing osnoise threads to run on any online CPU
2312  * of the system. It serves to restrict the execution of osnoise to the
2313  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2314  * Because the user might be interested in tracing what is running on
2315  * other CPUs. For instance, one might run osnoise in one HT CPU
2316  * while observing what is running on the sibling HT CPU.
2317  */
2318 static ssize_t
osnoise_cpus_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)2319 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2320 		   loff_t *ppos)
2321 {
2322 	cpumask_var_t osnoise_cpumask_new;
2323 	int running, err;
2324 	char *buf __free(kfree) = NULL;
2325 
2326 	if (count < 1)
2327 		return 0;
2328 
2329 	buf = memdup_user_nul(ubuf, count);
2330 	if (IS_ERR(buf))
2331 		return PTR_ERR(buf);
2332 
2333 	if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2334 		return -ENOMEM;
2335 
2336 	err = cpulist_parse(buf, osnoise_cpumask_new);
2337 	if (err)
2338 		goto err_free;
2339 
2340 	/*
2341 	 * trace_types_lock is taken to avoid concurrency on start/stop.
2342 	 */
2343 	mutex_lock(&trace_types_lock);
2344 	running = osnoise_has_registered_instances();
2345 	if (running)
2346 		stop_per_cpu_kthreads();
2347 
2348 	mutex_lock(&interface_lock);
2349 	/*
2350 	 * osnoise_cpumask is read by CPU hotplug operations.
2351 	 */
2352 	cpus_read_lock();
2353 
2354 	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2355 
2356 	cpus_read_unlock();
2357 	mutex_unlock(&interface_lock);
2358 
2359 	if (running)
2360 		start_per_cpu_kthreads();
2361 	mutex_unlock(&trace_types_lock);
2362 
2363 	free_cpumask_var(osnoise_cpumask_new);
2364 	return count;
2365 
2366 err_free:
2367 	free_cpumask_var(osnoise_cpumask_new);
2368 
2369 	return err;
2370 }
2371 
2372 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_fd_open(struct inode * inode,struct file * file)2373 static int timerlat_fd_open(struct inode *inode, struct file *file)
2374 {
2375 	struct osnoise_variables *osn_var;
2376 	struct timerlat_variables *tlat;
2377 	long cpu = (long) inode->i_cdev;
2378 
2379 	mutex_lock(&interface_lock);
2380 
2381 	/*
2382 	 * This file is accessible only if timerlat is enabled, and
2383 	 * NO_OSNOISE_WORKLOAD is set.
2384 	 */
2385 	if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2386 		mutex_unlock(&interface_lock);
2387 		return -EINVAL;
2388 	}
2389 
2390 	migrate_disable();
2391 
2392 	osn_var = this_cpu_osn_var();
2393 
2394 	/*
2395 	 * The osn_var->pid holds the single access to this file.
2396 	 */
2397 	if (osn_var->pid) {
2398 		mutex_unlock(&interface_lock);
2399 		migrate_enable();
2400 		return -EBUSY;
2401 	}
2402 
2403 	/*
2404 	 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2405 	 * is pinned to a single CPU. The tracer laters monitor if the task
2406 	 * migrates and then disables tracer if it does. However, it is
2407 	 * worth doing this basic acceptance test to avoid obviusly wrong
2408 	 * setup.
2409 	 */
2410 	if (current->nr_cpus_allowed > 1 ||  cpu != smp_processor_id()) {
2411 		mutex_unlock(&interface_lock);
2412 		migrate_enable();
2413 		return -EPERM;
2414 	}
2415 
2416 	/*
2417 	 * From now on, it is good to go.
2418 	 */
2419 	file->private_data = inode->i_cdev;
2420 
2421 	get_task_struct(current);
2422 
2423 	osn_var->kthread = current;
2424 	osn_var->pid = current->pid;
2425 
2426 	/*
2427 	 * Setup is done.
2428 	 */
2429 	mutex_unlock(&interface_lock);
2430 
2431 	tlat = this_cpu_tmr_var();
2432 	tlat->count = 0;
2433 
2434 	hrtimer_setup(&tlat->timer, timerlat_irq, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2435 
2436 	migrate_enable();
2437 	return 0;
2438 };
2439 
2440 /*
2441  * timerlat_fd_read - Read function for "timerlat_fd" file
2442  * @file: The active open file structure
2443  * @ubuf: The userspace provided buffer to read value into
2444  * @cnt: The maximum number of bytes to read
2445  * @ppos: The current "file" position
2446  *
2447  * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2448  */
2449 static ssize_t
timerlat_fd_read(struct file * file,char __user * ubuf,size_t count,loff_t * ppos)2450 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2451 		  loff_t *ppos)
2452 {
2453 	long cpu = (long) file->private_data;
2454 	struct osnoise_variables *osn_var;
2455 	struct timerlat_variables *tlat;
2456 	struct timerlat_sample s;
2457 	s64 diff;
2458 	u64 now;
2459 
2460 	migrate_disable();
2461 
2462 	tlat = this_cpu_tmr_var();
2463 
2464 	/*
2465 	 * While in user-space, the thread is migratable. There is nothing
2466 	 * we can do about it.
2467 	 * So, if the thread is running on another CPU, stop the machinery.
2468 	 */
2469 	if (cpu == smp_processor_id()) {
2470 		if (tlat->uthread_migrate) {
2471 			migrate_enable();
2472 			return -EINVAL;
2473 		}
2474 	} else {
2475 		per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2476 		osnoise_taint("timerlat user thread migrate\n");
2477 		osnoise_stop_tracing();
2478 		migrate_enable();
2479 		return -EINVAL;
2480 	}
2481 
2482 	osn_var = this_cpu_osn_var();
2483 
2484 	/*
2485 	 * The timerlat in user-space runs in a different order:
2486 	 * the read() starts from the execution of the previous occurrence,
2487 	 * sleeping for the next occurrence.
2488 	 *
2489 	 * So, skip if we are entering on read() before the first wakeup
2490 	 * from timerlat IRQ:
2491 	 */
2492 	if (likely(osn_var->sampling)) {
2493 		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2494 		diff = now - tlat->abs_period;
2495 
2496 		/*
2497 		 * it was not a timer firing, but some other signal?
2498 		 */
2499 		if (diff < 0)
2500 			goto out;
2501 
2502 		s.seqnum = tlat->count;
2503 		s.timer_latency = diff;
2504 		s.context = THREAD_URET;
2505 
2506 		record_timerlat_sample(&s);
2507 
2508 		notify_new_max_latency(diff);
2509 
2510 		tlat->tracing_thread = false;
2511 		if (osnoise_data.stop_tracing_total)
2512 			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2513 				osnoise_stop_tracing();
2514 	} else {
2515 		tlat->tracing_thread = false;
2516 		tlat->kthread = current;
2517 
2518 		/* Annotate now to drift new period */
2519 		tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2520 
2521 		osn_var->sampling = 1;
2522 	}
2523 
2524 	/* wait for the next period */
2525 	wait_next_period(tlat);
2526 
2527 	/* This is the wakeup from this cycle */
2528 	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2529 	diff = now - tlat->abs_period;
2530 
2531 	/*
2532 	 * it was not a timer firing, but some other signal?
2533 	 */
2534 	if (diff < 0)
2535 		goto out;
2536 
2537 	s.seqnum = tlat->count;
2538 	s.timer_latency = diff;
2539 	s.context = THREAD_CONTEXT;
2540 
2541 	record_timerlat_sample(&s);
2542 
2543 	if (osnoise_data.stop_tracing_total) {
2544 		if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2545 			timerlat_dump_stack(time_to_us(diff));
2546 			notify_new_max_latency(diff);
2547 			osnoise_stop_tracing();
2548 		}
2549 	}
2550 
2551 out:
2552 	migrate_enable();
2553 	return 0;
2554 }
2555 
timerlat_fd_release(struct inode * inode,struct file * file)2556 static int timerlat_fd_release(struct inode *inode, struct file *file)
2557 {
2558 	struct osnoise_variables *osn_var;
2559 	struct timerlat_variables *tlat_var;
2560 	long cpu = (long) file->private_data;
2561 
2562 	migrate_disable();
2563 	mutex_lock(&interface_lock);
2564 
2565 	osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2566 	tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2567 
2568 	if (tlat_var->kthread)
2569 		hrtimer_cancel(&tlat_var->timer);
2570 	memset(tlat_var, 0, sizeof(*tlat_var));
2571 
2572 	osn_var->sampling = 0;
2573 	osn_var->pid = 0;
2574 
2575 	/*
2576 	 * We are leaving, not being stopped... see stop_kthread();
2577 	 */
2578 	if (osn_var->kthread) {
2579 		put_task_struct(osn_var->kthread);
2580 		osn_var->kthread = NULL;
2581 	}
2582 
2583 	mutex_unlock(&interface_lock);
2584 	migrate_enable();
2585 	return 0;
2586 }
2587 #endif
2588 
2589 /*
2590  * osnoise/runtime_us: cannot be greater than the period.
2591  */
2592 static struct trace_min_max_param osnoise_runtime = {
2593 	.lock	= &interface_lock,
2594 	.val	= &osnoise_data.sample_runtime,
2595 	.max	= &osnoise_data.sample_period,
2596 	.min	= NULL,
2597 };
2598 
2599 /*
2600  * osnoise/period_us: cannot be smaller than the runtime.
2601  */
2602 static struct trace_min_max_param osnoise_period = {
2603 	.lock	= &interface_lock,
2604 	.val	= &osnoise_data.sample_period,
2605 	.max	= NULL,
2606 	.min	= &osnoise_data.sample_runtime,
2607 };
2608 
2609 /*
2610  * osnoise/stop_tracing_us: no limit.
2611  */
2612 static struct trace_min_max_param osnoise_stop_tracing_in = {
2613 	.lock	= &interface_lock,
2614 	.val	= &osnoise_data.stop_tracing,
2615 	.max	= NULL,
2616 	.min	= NULL,
2617 };
2618 
2619 /*
2620  * osnoise/stop_tracing_total_us: no limit.
2621  */
2622 static struct trace_min_max_param osnoise_stop_tracing_total = {
2623 	.lock	= &interface_lock,
2624 	.val	= &osnoise_data.stop_tracing_total,
2625 	.max	= NULL,
2626 	.min	= NULL,
2627 };
2628 
2629 #ifdef CONFIG_TIMERLAT_TRACER
2630 /*
2631  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2632  * latency is higher than val.
2633  */
2634 static struct trace_min_max_param osnoise_print_stack = {
2635 	.lock	= &interface_lock,
2636 	.val	= &osnoise_data.print_stack,
2637 	.max	= NULL,
2638 	.min	= NULL,
2639 };
2640 
2641 /*
2642  * osnoise/timerlat_period: min 100 us, max 1 s
2643  */
2644 static u64 timerlat_min_period = 100;
2645 static u64 timerlat_max_period = 1000000;
2646 static struct trace_min_max_param timerlat_period = {
2647 	.lock	= &interface_lock,
2648 	.val	= &osnoise_data.timerlat_period,
2649 	.max	= &timerlat_max_period,
2650 	.min	= &timerlat_min_period,
2651 };
2652 
2653 static const struct file_operations timerlat_fd_fops = {
2654 	.open		= timerlat_fd_open,
2655 	.read		= timerlat_fd_read,
2656 	.release	= timerlat_fd_release,
2657 	.llseek		= generic_file_llseek,
2658 };
2659 #endif
2660 
2661 static const struct file_operations cpus_fops = {
2662 	.open		= tracing_open_generic,
2663 	.read		= osnoise_cpus_read,
2664 	.write		= osnoise_cpus_write,
2665 	.llseek		= generic_file_llseek,
2666 };
2667 
2668 static const struct file_operations osnoise_options_fops = {
2669 	.open		= osnoise_options_open,
2670 	.read		= seq_read,
2671 	.llseek		= seq_lseek,
2672 	.release	= seq_release,
2673 	.write		= osnoise_options_write
2674 };
2675 
2676 #ifdef CONFIG_TIMERLAT_TRACER
2677 #ifdef CONFIG_STACKTRACE
init_timerlat_stack_tracefs(struct dentry * top_dir)2678 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2679 {
2680 	struct dentry *tmp;
2681 
2682 	tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2683 				  &osnoise_print_stack, &trace_min_max_fops);
2684 	if (!tmp)
2685 		return -ENOMEM;
2686 
2687 	return 0;
2688 }
2689 #else /* CONFIG_STACKTRACE */
init_timerlat_stack_tracefs(struct dentry * top_dir)2690 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2691 {
2692 	return 0;
2693 }
2694 #endif /* CONFIG_STACKTRACE */
2695 
osnoise_create_cpu_timerlat_fd(struct dentry * top_dir)2696 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2697 {
2698 	struct dentry *timerlat_fd;
2699 	struct dentry *per_cpu;
2700 	struct dentry *cpu_dir;
2701 	char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2702 	long cpu;
2703 
2704 	/*
2705 	 * Why not using tracing instance per_cpu/ dir?
2706 	 *
2707 	 * Because osnoise/timerlat have a single workload, having
2708 	 * multiple files like these are wast of memory.
2709 	 */
2710 	per_cpu = tracefs_create_dir("per_cpu", top_dir);
2711 	if (!per_cpu)
2712 		return -ENOMEM;
2713 
2714 	for_each_possible_cpu(cpu) {
2715 		snprintf(cpu_str, 30, "cpu%ld", cpu);
2716 		cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2717 		if (!cpu_dir)
2718 			goto out_clean;
2719 
2720 		timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2721 						cpu_dir, NULL, &timerlat_fd_fops);
2722 		if (!timerlat_fd)
2723 			goto out_clean;
2724 
2725 		/* Record the CPU */
2726 		d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2727 	}
2728 
2729 	return 0;
2730 
2731 out_clean:
2732 	tracefs_remove(per_cpu);
2733 	return -ENOMEM;
2734 }
2735 
2736 /*
2737  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2738  */
init_timerlat_tracefs(struct dentry * top_dir)2739 static int init_timerlat_tracefs(struct dentry *top_dir)
2740 {
2741 	struct dentry *tmp;
2742 	int retval;
2743 
2744 	tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2745 				  &timerlat_period, &trace_min_max_fops);
2746 	if (!tmp)
2747 		return -ENOMEM;
2748 
2749 	retval = osnoise_create_cpu_timerlat_fd(top_dir);
2750 	if (retval)
2751 		return retval;
2752 
2753 	return init_timerlat_stack_tracefs(top_dir);
2754 }
2755 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracefs(struct dentry * top_dir)2756 static int init_timerlat_tracefs(struct dentry *top_dir)
2757 {
2758 	return 0;
2759 }
2760 #endif /* CONFIG_TIMERLAT_TRACER */
2761 
2762 /*
2763  * init_tracefs - A function to initialize the tracefs interface files
2764  *
2765  * This function creates entries in tracefs for "osnoise" and "timerlat".
2766  * It creates these directories in the tracing directory, and within that
2767  * directory the use can change and view the configs.
2768  */
init_tracefs(void)2769 static int init_tracefs(void)
2770 {
2771 	struct dentry *top_dir;
2772 	struct dentry *tmp;
2773 	int ret;
2774 
2775 	ret = tracing_init_dentry();
2776 	if (ret)
2777 		return -ENOMEM;
2778 
2779 	top_dir = tracefs_create_dir("osnoise", NULL);
2780 	if (!top_dir)
2781 		return 0;
2782 
2783 	tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2784 				  &osnoise_period, &trace_min_max_fops);
2785 	if (!tmp)
2786 		goto err;
2787 
2788 	tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2789 				  &osnoise_runtime, &trace_min_max_fops);
2790 	if (!tmp)
2791 		goto err;
2792 
2793 	tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2794 				  &osnoise_stop_tracing_in, &trace_min_max_fops);
2795 	if (!tmp)
2796 		goto err;
2797 
2798 	tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2799 				  &osnoise_stop_tracing_total, &trace_min_max_fops);
2800 	if (!tmp)
2801 		goto err;
2802 
2803 	tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2804 	if (!tmp)
2805 		goto err;
2806 
2807 	tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2808 				&osnoise_options_fops);
2809 	if (!tmp)
2810 		goto err;
2811 
2812 	ret = init_timerlat_tracefs(top_dir);
2813 	if (ret)
2814 		goto err;
2815 
2816 	return 0;
2817 
2818 err:
2819 	tracefs_remove(top_dir);
2820 	return -ENOMEM;
2821 }
2822 
osnoise_hook_events(void)2823 static int osnoise_hook_events(void)
2824 {
2825 	int retval;
2826 
2827 	/*
2828 	 * Trace is already hooked, we are re-enabling from
2829 	 * a stop_tracing_*.
2830 	 */
2831 	if (trace_osnoise_callback_enabled)
2832 		return 0;
2833 
2834 	retval = hook_irq_events();
2835 	if (retval)
2836 		return -EINVAL;
2837 
2838 	retval = hook_softirq_events();
2839 	if (retval)
2840 		goto out_unhook_irq;
2841 
2842 	retval = hook_thread_events();
2843 	/*
2844 	 * All fine!
2845 	 */
2846 	if (!retval)
2847 		return 0;
2848 
2849 	unhook_softirq_events();
2850 out_unhook_irq:
2851 	unhook_irq_events();
2852 	return -EINVAL;
2853 }
2854 
osnoise_unhook_events(void)2855 static void osnoise_unhook_events(void)
2856 {
2857 	unhook_thread_events();
2858 	unhook_softirq_events();
2859 	unhook_irq_events();
2860 }
2861 
2862 /*
2863  * osnoise_workload_start - start the workload and hook to events
2864  */
osnoise_workload_start(void)2865 static int osnoise_workload_start(void)
2866 {
2867 	int retval;
2868 
2869 	/*
2870 	 * Instances need to be registered after calling workload
2871 	 * start. Hence, if there is already an instance, the
2872 	 * workload was already registered. Otherwise, this
2873 	 * code is on the way to register the first instance,
2874 	 * and the workload will start.
2875 	 */
2876 	if (osnoise_has_registered_instances())
2877 		return 0;
2878 
2879 	osn_var_reset_all();
2880 
2881 	retval = osnoise_hook_events();
2882 	if (retval)
2883 		return retval;
2884 
2885 	/*
2886 	 * Make sure that ftrace_nmi_enter/exit() see reset values
2887 	 * before enabling trace_osnoise_callback_enabled.
2888 	 */
2889 	barrier();
2890 	trace_osnoise_callback_enabled = true;
2891 
2892 	retval = start_per_cpu_kthreads();
2893 	if (retval) {
2894 		trace_osnoise_callback_enabled = false;
2895 		/*
2896 		 * Make sure that ftrace_nmi_enter/exit() see
2897 		 * trace_osnoise_callback_enabled as false before continuing.
2898 		 */
2899 		barrier();
2900 
2901 		osnoise_unhook_events();
2902 		return retval;
2903 	}
2904 
2905 	return 0;
2906 }
2907 
2908 /*
2909  * osnoise_workload_stop - stop the workload and unhook the events
2910  */
osnoise_workload_stop(void)2911 static void osnoise_workload_stop(void)
2912 {
2913 	/*
2914 	 * Instances need to be unregistered before calling
2915 	 * stop. Hence, if there is a registered instance, more
2916 	 * than one instance is running, and the workload will not
2917 	 * yet stop. Otherwise, this code is on the way to disable
2918 	 * the last instance, and the workload can stop.
2919 	 */
2920 	if (osnoise_has_registered_instances())
2921 		return;
2922 
2923 	/*
2924 	 * If callbacks were already disabled in a previous stop
2925 	 * call, there is no need to disable then again.
2926 	 *
2927 	 * For instance, this happens when tracing is stopped via:
2928 	 * echo 0 > tracing_on
2929 	 * echo nop > current_tracer.
2930 	 */
2931 	if (!trace_osnoise_callback_enabled)
2932 		return;
2933 
2934 	trace_osnoise_callback_enabled = false;
2935 	/*
2936 	 * Make sure that ftrace_nmi_enter/exit() see
2937 	 * trace_osnoise_callback_enabled as false before continuing.
2938 	 */
2939 	barrier();
2940 
2941 	stop_per_cpu_kthreads();
2942 
2943 	osnoise_unhook_events();
2944 }
2945 
osnoise_tracer_start(struct trace_array * tr)2946 static void osnoise_tracer_start(struct trace_array *tr)
2947 {
2948 	int retval;
2949 
2950 	/*
2951 	 * If the instance is already registered, there is no need to
2952 	 * register it again.
2953 	 */
2954 	if (osnoise_instance_registered(tr))
2955 		return;
2956 
2957 	retval = osnoise_workload_start();
2958 	if (retval)
2959 		pr_err(BANNER "Error starting osnoise tracer\n");
2960 
2961 	osnoise_register_instance(tr);
2962 }
2963 
osnoise_tracer_stop(struct trace_array * tr)2964 static void osnoise_tracer_stop(struct trace_array *tr)
2965 {
2966 	osnoise_unregister_instance(tr);
2967 	osnoise_workload_stop();
2968 }
2969 
osnoise_tracer_init(struct trace_array * tr)2970 static int osnoise_tracer_init(struct trace_array *tr)
2971 {
2972 	/*
2973 	 * Only allow osnoise tracer if timerlat tracer is not running
2974 	 * already.
2975 	 */
2976 	if (timerlat_enabled())
2977 		return -EBUSY;
2978 
2979 	tr->max_latency = 0;
2980 
2981 	osnoise_tracer_start(tr);
2982 	return 0;
2983 }
2984 
osnoise_tracer_reset(struct trace_array * tr)2985 static void osnoise_tracer_reset(struct trace_array *tr)
2986 {
2987 	osnoise_tracer_stop(tr);
2988 }
2989 
2990 static struct tracer osnoise_tracer __read_mostly = {
2991 	.name		= "osnoise",
2992 	.init		= osnoise_tracer_init,
2993 	.reset		= osnoise_tracer_reset,
2994 	.start		= osnoise_tracer_start,
2995 	.stop		= osnoise_tracer_stop,
2996 	.print_header	= print_osnoise_headers,
2997 	.allow_instances = true,
2998 };
2999 
3000 #ifdef CONFIG_TIMERLAT_TRACER
timerlat_tracer_start(struct trace_array * tr)3001 static void timerlat_tracer_start(struct trace_array *tr)
3002 {
3003 	int retval;
3004 
3005 	/*
3006 	 * If the instance is already registered, there is no need to
3007 	 * register it again.
3008 	 */
3009 	if (osnoise_instance_registered(tr))
3010 		return;
3011 
3012 	retval = osnoise_workload_start();
3013 	if (retval)
3014 		pr_err(BANNER "Error starting timerlat tracer\n");
3015 
3016 	osnoise_register_instance(tr);
3017 
3018 	return;
3019 }
3020 
timerlat_tracer_stop(struct trace_array * tr)3021 static void timerlat_tracer_stop(struct trace_array *tr)
3022 {
3023 	int cpu;
3024 
3025 	osnoise_unregister_instance(tr);
3026 
3027 	/*
3028 	 * Instruct the threads to stop only if this is the last instance.
3029 	 */
3030 	if (!osnoise_has_registered_instances()) {
3031 		for_each_online_cpu(cpu)
3032 			per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3033 	}
3034 
3035 	osnoise_workload_stop();
3036 }
3037 
timerlat_tracer_init(struct trace_array * tr)3038 static int timerlat_tracer_init(struct trace_array *tr)
3039 {
3040 	/*
3041 	 * Only allow timerlat tracer if osnoise tracer is not running already.
3042 	 */
3043 	if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3044 		return -EBUSY;
3045 
3046 	/*
3047 	 * If this is the first instance, set timerlat_tracer to block
3048 	 * osnoise tracer start.
3049 	 */
3050 	if (!osnoise_has_registered_instances())
3051 		osnoise_data.timerlat_tracer = 1;
3052 
3053 	tr->max_latency = 0;
3054 	timerlat_tracer_start(tr);
3055 
3056 	return 0;
3057 }
3058 
timerlat_tracer_reset(struct trace_array * tr)3059 static void timerlat_tracer_reset(struct trace_array *tr)
3060 {
3061 	timerlat_tracer_stop(tr);
3062 
3063 	/*
3064 	 * If this is the last instance, reset timerlat_tracer allowing
3065 	 * osnoise to be started.
3066 	 */
3067 	if (!osnoise_has_registered_instances())
3068 		osnoise_data.timerlat_tracer = 0;
3069 }
3070 
3071 static struct tracer timerlat_tracer __read_mostly = {
3072 	.name		= "timerlat",
3073 	.init		= timerlat_tracer_init,
3074 	.reset		= timerlat_tracer_reset,
3075 	.start		= timerlat_tracer_start,
3076 	.stop		= timerlat_tracer_stop,
3077 	.print_header	= print_timerlat_headers,
3078 	.allow_instances = true,
3079 };
3080 
init_timerlat_tracer(void)3081 __init static int init_timerlat_tracer(void)
3082 {
3083 	return register_tracer(&timerlat_tracer);
3084 }
3085 #else /* CONFIG_TIMERLAT_TRACER */
init_timerlat_tracer(void)3086 __init static int init_timerlat_tracer(void)
3087 {
3088 	return 0;
3089 }
3090 #endif /* CONFIG_TIMERLAT_TRACER */
3091 
init_osnoise_tracer(void)3092 __init static int init_osnoise_tracer(void)
3093 {
3094 	int ret;
3095 
3096 	mutex_init(&interface_lock);
3097 
3098 	cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3099 
3100 	ret = register_tracer(&osnoise_tracer);
3101 	if (ret) {
3102 		pr_err(BANNER "Error registering osnoise!\n");
3103 		return ret;
3104 	}
3105 
3106 	ret = init_timerlat_tracer();
3107 	if (ret) {
3108 		pr_err(BANNER "Error registering timerlat!\n");
3109 		return ret;
3110 	}
3111 
3112 	osnoise_init_hotplug_support();
3113 
3114 	INIT_LIST_HEAD_RCU(&osnoise_instances);
3115 
3116 	init_tracefs();
3117 
3118 	return 0;
3119 }
3120 late_initcall(init_osnoise_tracer);
3121